xref: /freebsd/sys/dev/e1000/if_em.c (revision b78ee15e9f04ae15c3e1200df974473167524d17)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2015, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #include "opt_em.h"
36 #include "opt_ddb.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #endif
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #ifdef DDB
47 #include <sys/types.h>
48 #include <ddb/ddb.h>
49 #endif
50 #if __FreeBSD_version >= 800000
51 #include <sys/buf_ring.h>
52 #endif
53 #include <sys/bus.h>
54 #include <sys/endian.h>
55 #include <sys/kernel.h>
56 #include <sys/kthread.h>
57 #include <sys/malloc.h>
58 #include <sys/mbuf.h>
59 #include <sys/module.h>
60 #include <sys/rman.h>
61 #include <sys/smp.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/sysctl.h>
65 #include <sys/taskqueue.h>
66 #include <sys/eventhandler.h>
67 #include <machine/bus.h>
68 #include <machine/resource.h>
69 
70 #include <net/bpf.h>
71 #include <net/ethernet.h>
72 #include <net/if.h>
73 #include <net/if_var.h>
74 #include <net/if_arp.h>
75 #include <net/if_dl.h>
76 #include <net/if_media.h>
77 
78 #include <net/if_types.h>
79 #include <net/if_vlan_var.h>
80 
81 #include <netinet/in_systm.h>
82 #include <netinet/in.h>
83 #include <netinet/if_ether.h>
84 #include <netinet/ip.h>
85 #include <netinet/ip6.h>
86 #include <netinet/tcp.h>
87 #include <netinet/udp.h>
88 
89 #include <machine/in_cksum.h>
90 #include <dev/led/led.h>
91 #include <dev/pci/pcivar.h>
92 #include <dev/pci/pcireg.h>
93 
94 #include "e1000_api.h"
95 #include "e1000_82571.h"
96 #include "if_em.h"
97 
98 /*********************************************************************
99  *  Set this to one to display debug statistics
100  *********************************************************************/
101 int	em_display_debug_stats = 0;
102 
103 /*********************************************************************
104  *  Driver version:
105  *********************************************************************/
106 char em_driver_version[] = "7.4.2";
107 
108 /*********************************************************************
109  *  PCI Device ID Table
110  *
111  *  Used by probe to select devices to load on
112  *  Last field stores an index into e1000_strings
113  *  Last entry must be all 0s
114  *
115  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
116  *********************************************************************/
117 
118 static em_vendor_info_t em_vendor_info_array[] =
119 {
120 	/* Intel(R) PRO/1000 Network Connection */
121 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
122 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
123 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
124 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
125 						PCI_ANY_ID, PCI_ANY_ID, 0},
126 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
127 						PCI_ANY_ID, PCI_ANY_ID, 0},
128 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
129 						PCI_ANY_ID, PCI_ANY_ID, 0},
130 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
131 						PCI_ANY_ID, PCI_ANY_ID, 0},
132 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
133 						PCI_ANY_ID, PCI_ANY_ID, 0},
134 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
135 						PCI_ANY_ID, PCI_ANY_ID, 0},
136 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
138 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
139 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
140 
141 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
142 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
143 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
144 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
145 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
146 						PCI_ANY_ID, PCI_ANY_ID, 0},
147 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
148 						PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
150 						PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
152 						PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
175 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
176 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
177 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
178 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
179 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
180 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
181 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
182 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
183 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
184 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
185 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
186 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
187 						PCI_ANY_ID, PCI_ANY_ID, 0},
188 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
189 						PCI_ANY_ID, PCI_ANY_ID, 0},
190 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM2,	PCI_ANY_ID, PCI_ANY_ID, 0},
191 	{ 0x8086, E1000_DEV_ID_PCH_I218_V2,	PCI_ANY_ID, PCI_ANY_ID, 0},
192 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM3,	PCI_ANY_ID, PCI_ANY_ID, 0},
193 	{ 0x8086, E1000_DEV_ID_PCH_I218_V3,	PCI_ANY_ID, PCI_ANY_ID, 0},
194 	/* required last entry */
195 	{ 0, 0, 0, 0, 0}
196 };
197 
198 /*********************************************************************
199  *  Table of branding strings for all supported NICs.
200  *********************************************************************/
201 
202 static char *em_strings[] = {
203 	"Intel(R) PRO/1000 Network Connection"
204 };
205 
206 /*********************************************************************
207  *  Function prototypes
208  *********************************************************************/
209 static int	em_probe(device_t);
210 static int	em_attach(device_t);
211 static int	em_detach(device_t);
212 static int	em_shutdown(device_t);
213 static int	em_suspend(device_t);
214 static int	em_resume(device_t);
215 #ifdef EM_MULTIQUEUE
216 static int	em_mq_start(if_t, struct mbuf *);
217 static int	em_mq_start_locked(if_t,
218 		    struct tx_ring *);
219 static void	em_qflush(if_t);
220 #else
221 static void	em_start(if_t);
222 static void	em_start_locked(if_t, struct tx_ring *);
223 #endif
224 static int	em_ioctl(if_t, u_long, caddr_t);
225 static uint64_t	em_get_counter(if_t, ift_counter);
226 static void	em_init(void *);
227 static void	em_init_locked(struct adapter *);
228 static void	em_stop(void *);
229 static void	em_media_status(if_t, struct ifmediareq *);
230 static int	em_media_change(if_t);
231 static void	em_identify_hardware(struct adapter *);
232 static int	em_allocate_pci_resources(struct adapter *);
233 static int	em_allocate_legacy(struct adapter *);
234 static int	em_allocate_msix(struct adapter *);
235 static int	em_allocate_queues(struct adapter *);
236 static int	em_setup_msix(struct adapter *);
237 static void	em_free_pci_resources(struct adapter *);
238 static void	em_local_timer(void *);
239 static void	em_reset(struct adapter *);
240 static int	em_setup_interface(device_t, struct adapter *);
241 
242 static void	em_setup_transmit_structures(struct adapter *);
243 static void	em_initialize_transmit_unit(struct adapter *);
244 static int	em_allocate_transmit_buffers(struct tx_ring *);
245 static void	em_free_transmit_structures(struct adapter *);
246 static void	em_free_transmit_buffers(struct tx_ring *);
247 
248 static int	em_setup_receive_structures(struct adapter *);
249 static int	em_allocate_receive_buffers(struct rx_ring *);
250 static void	em_initialize_receive_unit(struct adapter *);
251 static void	em_free_receive_structures(struct adapter *);
252 static void	em_free_receive_buffers(struct rx_ring *);
253 
254 static void	em_enable_intr(struct adapter *);
255 static void	em_disable_intr(struct adapter *);
256 static void	em_update_stats_counters(struct adapter *);
257 static void	em_add_hw_stats(struct adapter *adapter);
258 static void	em_txeof(struct tx_ring *);
259 static bool	em_rxeof(struct rx_ring *, int, int *);
260 #ifndef __NO_STRICT_ALIGNMENT
261 static int	em_fixup_rx(struct rx_ring *);
262 #endif
263 static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
264 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
265 		    struct ip *, u32 *, u32 *);
266 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
267 		    struct tcphdr *, u32 *, u32 *);
268 static void	em_set_promisc(struct adapter *);
269 static void	em_disable_promisc(struct adapter *);
270 static void	em_set_multi(struct adapter *);
271 static void	em_update_link_status(struct adapter *);
272 static void	em_refresh_mbufs(struct rx_ring *, int);
273 static void	em_register_vlan(void *, if_t, u16);
274 static void	em_unregister_vlan(void *, if_t, u16);
275 static void	em_setup_vlan_hw_support(struct adapter *);
276 static int	em_xmit(struct tx_ring *, struct mbuf **);
277 static int	em_dma_malloc(struct adapter *, bus_size_t,
278 		    struct em_dma_alloc *, int);
279 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
280 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
281 static void	em_print_nvm_info(struct adapter *);
282 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
283 static void	em_print_debug_info(struct adapter *);
284 static int 	em_is_valid_ether_addr(u8 *);
285 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
286 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
287 		    const char *, struct em_int_delay_info *, int, int);
288 /* Management and WOL Support */
289 static void	em_init_manageability(struct adapter *);
290 static void	em_release_manageability(struct adapter *);
291 static void     em_get_hw_control(struct adapter *);
292 static void     em_release_hw_control(struct adapter *);
293 static void	em_get_wakeup(device_t);
294 static void     em_enable_wakeup(device_t);
295 static int	em_enable_phy_wakeup(struct adapter *);
296 static void	em_led_func(void *, int);
297 static void	em_disable_aspm(struct adapter *);
298 
299 static int	em_irq_fast(void *);
300 
301 /* MSIX handlers */
302 static void	em_msix_tx(void *);
303 static void	em_msix_rx(void *);
304 static void	em_msix_link(void *);
305 static void	em_handle_tx(void *context, int pending);
306 static void	em_handle_rx(void *context, int pending);
307 static void	em_handle_link(void *context, int pending);
308 
309 #ifdef EM_MULTIQUEUE
310 static void	em_enable_vectors_82574(struct adapter *);
311 #endif
312 
313 static void	em_set_sysctl_value(struct adapter *, const char *,
314 		    const char *, int *, int);
315 static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
316 static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
317 
318 static __inline void em_rx_discard(struct rx_ring *, int);
319 
320 #ifdef DEVICE_POLLING
321 static poll_handler_t em_poll;
322 #endif /* POLLING */
323 
324 /*********************************************************************
325  *  FreeBSD Device Interface Entry Points
326  *********************************************************************/
327 
328 static device_method_t em_methods[] = {
329 	/* Device interface */
330 	DEVMETHOD(device_probe, em_probe),
331 	DEVMETHOD(device_attach, em_attach),
332 	DEVMETHOD(device_detach, em_detach),
333 	DEVMETHOD(device_shutdown, em_shutdown),
334 	DEVMETHOD(device_suspend, em_suspend),
335 	DEVMETHOD(device_resume, em_resume),
336 	DEVMETHOD_END
337 };
338 
339 static driver_t em_driver = {
340 	"em", em_methods, sizeof(struct adapter),
341 };
342 
343 devclass_t em_devclass;
344 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
345 MODULE_DEPEND(em, pci, 1, 1, 1);
346 MODULE_DEPEND(em, ether, 1, 1, 1);
347 
348 /*********************************************************************
349  *  Tunable default values.
350  *********************************************************************/
351 
352 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
353 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
354 #define M_TSO_LEN			66
355 
356 #define MAX_INTS_PER_SEC	8000
357 #define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
358 
359 /* Allow common code without TSO */
360 #ifndef CSUM_TSO
361 #define CSUM_TSO	0
362 #endif
363 
364 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
365 
366 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
367 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
368 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
369     0, "Default transmit interrupt delay in usecs");
370 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
371     0, "Default receive interrupt delay in usecs");
372 
373 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
374 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
375 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
376     &em_tx_abs_int_delay_dflt, 0,
377     "Default transmit interrupt delay limit in usecs");
378 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
379     &em_rx_abs_int_delay_dflt, 0,
380     "Default receive interrupt delay limit in usecs");
381 
382 static int em_rxd = EM_DEFAULT_RXD;
383 static int em_txd = EM_DEFAULT_TXD;
384 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
385     "Number of receive descriptors per queue");
386 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
387     "Number of transmit descriptors per queue");
388 
389 static int em_smart_pwr_down = FALSE;
390 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
391     0, "Set to true to leave smart power down enabled on newer adapters");
392 
393 /* Controls whether promiscuous also shows bad packets */
394 static int em_debug_sbp = FALSE;
395 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
396     "Show bad packets in promiscuous mode");
397 
398 static int em_enable_msix = TRUE;
399 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
400     "Enable MSI-X interrupts");
401 
402 #ifdef EM_MULTIQUEUE
403 static int em_num_queues = 1;
404 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
405     "82574 only: Number of queues to configure, 0 indicates autoconfigure");
406 #endif
407 
408 /*
409 ** Global variable to store last used CPU when binding queues
410 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
411 ** queue is bound to a cpu.
412 */
413 static int em_last_bind_cpu = -1;
414 
415 /* How many packets rxeof tries to clean at a time */
416 static int em_rx_process_limit = 100;
417 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
418     &em_rx_process_limit, 0,
419     "Maximum number of received packets to process "
420     "at a time, -1 means unlimited");
421 
422 /* Energy efficient ethernet - default to OFF */
423 static int eee_setting = 1;
424 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
425     "Enable Energy Efficient Ethernet");
426 
427 /* Global used in WOL setup with multiport cards */
428 static int global_quad_port_a = 0;
429 
430 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
431 #include <dev/netmap/if_em_netmap.h>
432 #endif /* DEV_NETMAP */
433 
434 /*********************************************************************
435  *  Device identification routine
436  *
437  *  em_probe determines if the driver should be loaded on
438  *  adapter based on PCI vendor/device id of the adapter.
439  *
440  *  return BUS_PROBE_DEFAULT on success, positive on failure
441  *********************************************************************/
442 
443 static int
444 em_probe(device_t dev)
445 {
446 	char		adapter_name[60];
447 	uint16_t	pci_vendor_id = 0;
448 	uint16_t	pci_device_id = 0;
449 	uint16_t	pci_subvendor_id = 0;
450 	uint16_t	pci_subdevice_id = 0;
451 	em_vendor_info_t *ent;
452 
453 	INIT_DEBUGOUT("em_probe: begin");
454 
455 	pci_vendor_id = pci_get_vendor(dev);
456 	if (pci_vendor_id != EM_VENDOR_ID)
457 		return (ENXIO);
458 
459 	pci_device_id = pci_get_device(dev);
460 	pci_subvendor_id = pci_get_subvendor(dev);
461 	pci_subdevice_id = pci_get_subdevice(dev);
462 
463 	ent = em_vendor_info_array;
464 	while (ent->vendor_id != 0) {
465 		if ((pci_vendor_id == ent->vendor_id) &&
466 		    (pci_device_id == ent->device_id) &&
467 
468 		    ((pci_subvendor_id == ent->subvendor_id) ||
469 		    (ent->subvendor_id == PCI_ANY_ID)) &&
470 
471 		    ((pci_subdevice_id == ent->subdevice_id) ||
472 		    (ent->subdevice_id == PCI_ANY_ID))) {
473 			sprintf(adapter_name, "%s %s",
474 				em_strings[ent->index],
475 				em_driver_version);
476 			device_set_desc_copy(dev, adapter_name);
477 			return (BUS_PROBE_DEFAULT);
478 		}
479 		ent++;
480 	}
481 
482 	return (ENXIO);
483 }
484 
485 /*********************************************************************
486  *  Device initialization routine
487  *
488  *  The attach entry point is called when the driver is being loaded.
489  *  This routine identifies the type of hardware, allocates all resources
490  *  and initializes the hardware.
491  *
492  *  return 0 on success, positive on failure
493  *********************************************************************/
494 
495 static int
496 em_attach(device_t dev)
497 {
498 	struct adapter	*adapter;
499 	struct e1000_hw	*hw;
500 	int		error = 0;
501 
502 	INIT_DEBUGOUT("em_attach: begin");
503 
504 	if (resource_disabled("em", device_get_unit(dev))) {
505 		device_printf(dev, "Disabled by device hint\n");
506 		return (ENXIO);
507 	}
508 
509 	adapter = device_get_softc(dev);
510 	adapter->dev = adapter->osdep.dev = dev;
511 	hw = &adapter->hw;
512 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
513 
514 	/* SYSCTL stuff */
515 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
516 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
517 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
518 	    em_sysctl_nvm_info, "I", "NVM Information");
519 
520 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
521 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
522 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
523 	    em_sysctl_debug_info, "I", "Debug Information");
524 
525 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
526 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
527 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
528 	    em_set_flowcntl, "I", "Flow Control");
529 
530 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
531 
532 	/* Determine hardware and mac info */
533 	em_identify_hardware(adapter);
534 
535 	/* Setup PCI resources */
536 	if (em_allocate_pci_resources(adapter)) {
537 		device_printf(dev, "Allocation of PCI resources failed\n");
538 		error = ENXIO;
539 		goto err_pci;
540 	}
541 
542 	/*
543 	** For ICH8 and family we need to
544 	** map the flash memory, and this
545 	** must happen after the MAC is
546 	** identified
547 	*/
548 	if ((hw->mac.type == e1000_ich8lan) ||
549 	    (hw->mac.type == e1000_ich9lan) ||
550 	    (hw->mac.type == e1000_ich10lan) ||
551 	    (hw->mac.type == e1000_pchlan) ||
552 	    (hw->mac.type == e1000_pch2lan) ||
553 	    (hw->mac.type == e1000_pch_lpt)) {
554 		int rid = EM_BAR_TYPE_FLASH;
555 		adapter->flash = bus_alloc_resource_any(dev,
556 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
557 		if (adapter->flash == NULL) {
558 			device_printf(dev, "Mapping of Flash failed\n");
559 			error = ENXIO;
560 			goto err_pci;
561 		}
562 		/* This is used in the shared code */
563 		hw->flash_address = (u8 *)adapter->flash;
564 		adapter->osdep.flash_bus_space_tag =
565 		    rman_get_bustag(adapter->flash);
566 		adapter->osdep.flash_bus_space_handle =
567 		    rman_get_bushandle(adapter->flash);
568 	}
569 
570 	/* Do Shared Code initialization */
571 	if (e1000_setup_init_funcs(hw, TRUE)) {
572 		device_printf(dev, "Setup of Shared code failed\n");
573 		error = ENXIO;
574 		goto err_pci;
575 	}
576 
577 	/*
578 	 * Setup MSI/X or MSI if PCI Express
579 	 */
580 	adapter->msix = em_setup_msix(adapter);
581 
582 	e1000_get_bus_info(hw);
583 
584 	/* Set up some sysctls for the tunable interrupt delays */
585 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
586 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
587 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
588 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
589 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
590 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
591 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
592 	    "receive interrupt delay limit in usecs",
593 	    &adapter->rx_abs_int_delay,
594 	    E1000_REGISTER(hw, E1000_RADV),
595 	    em_rx_abs_int_delay_dflt);
596 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
597 	    "transmit interrupt delay limit in usecs",
598 	    &adapter->tx_abs_int_delay,
599 	    E1000_REGISTER(hw, E1000_TADV),
600 	    em_tx_abs_int_delay_dflt);
601 	em_add_int_delay_sysctl(adapter, "itr",
602 	    "interrupt delay limit in usecs/4",
603 	    &adapter->tx_itr,
604 	    E1000_REGISTER(hw, E1000_ITR),
605 	    DEFAULT_ITR);
606 
607 	/* Sysctl for limiting the amount of work done in the taskqueue */
608 	em_set_sysctl_value(adapter, "rx_processing_limit",
609 	    "max number of rx packets to process", &adapter->rx_process_limit,
610 	    em_rx_process_limit);
611 
612 	/*
613 	 * Validate number of transmit and receive descriptors. It
614 	 * must not exceed hardware maximum, and must be multiple
615 	 * of E1000_DBA_ALIGN.
616 	 */
617 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
618 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
619 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
620 		    EM_DEFAULT_TXD, em_txd);
621 		adapter->num_tx_desc = EM_DEFAULT_TXD;
622 	} else
623 		adapter->num_tx_desc = em_txd;
624 
625 	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
626 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
627 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
628 		    EM_DEFAULT_RXD, em_rxd);
629 		adapter->num_rx_desc = EM_DEFAULT_RXD;
630 	} else
631 		adapter->num_rx_desc = em_rxd;
632 
633 	hw->mac.autoneg = DO_AUTO_NEG;
634 	hw->phy.autoneg_wait_to_complete = FALSE;
635 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
636 
637 	/* Copper options */
638 	if (hw->phy.media_type == e1000_media_type_copper) {
639 		hw->phy.mdix = AUTO_ALL_MODES;
640 		hw->phy.disable_polarity_correction = FALSE;
641 		hw->phy.ms_type = EM_MASTER_SLAVE;
642 	}
643 
644 	/*
645 	 * Set the frame limits assuming
646 	 * standard ethernet sized frames.
647 	 */
648 	adapter->hw.mac.max_frame_size =
649 	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
650 
651 	/*
652 	 * This controls when hardware reports transmit completion
653 	 * status.
654 	 */
655 	hw->mac.report_tx_early = 1;
656 
657 	/*
658 	** Get queue/ring memory
659 	*/
660 	if (em_allocate_queues(adapter)) {
661 		error = ENOMEM;
662 		goto err_pci;
663 	}
664 
665 	/* Allocate multicast array memory. */
666 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
667 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
668 	if (adapter->mta == NULL) {
669 		device_printf(dev, "Can not allocate multicast setup array\n");
670 		error = ENOMEM;
671 		goto err_late;
672 	}
673 
674 	/* Check SOL/IDER usage */
675 	if (e1000_check_reset_block(hw))
676 		device_printf(dev, "PHY reset is blocked"
677 		    " due to SOL/IDER session.\n");
678 
679 	/* Sysctl for setting Energy Efficient Ethernet */
680 	hw->dev_spec.ich8lan.eee_disable = eee_setting;
681 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
682 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
683 	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
684 	    adapter, 0, em_sysctl_eee, "I",
685 	    "Disable Energy Efficient Ethernet");
686 
687 	/*
688 	** Start from a known state, this is
689 	** important in reading the nvm and
690 	** mac from that.
691 	*/
692 	e1000_reset_hw(hw);
693 
694 
695 	/* Make sure we have a good EEPROM before we read from it */
696 	if (e1000_validate_nvm_checksum(hw) < 0) {
697 		/*
698 		** Some PCI-E parts fail the first check due to
699 		** the link being in sleep state, call it again,
700 		** if it fails a second time its a real issue.
701 		*/
702 		if (e1000_validate_nvm_checksum(hw) < 0) {
703 			device_printf(dev,
704 			    "The EEPROM Checksum Is Not Valid\n");
705 			error = EIO;
706 			goto err_late;
707 		}
708 	}
709 
710 	/* Copy the permanent MAC address out of the EEPROM */
711 	if (e1000_read_mac_addr(hw) < 0) {
712 		device_printf(dev, "EEPROM read error while reading MAC"
713 		    " address\n");
714 		error = EIO;
715 		goto err_late;
716 	}
717 
718 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
719 		device_printf(dev, "Invalid MAC address\n");
720 		error = EIO;
721 		goto err_late;
722 	}
723 
724 	/* Disable ULP support */
725 	e1000_disable_ulp_lpt_lp(hw, TRUE);
726 
727 	/*
728 	**  Do interrupt configuration
729 	*/
730 	if (adapter->msix > 1) /* Do MSIX */
731 		error = em_allocate_msix(adapter);
732 	else  /* MSI or Legacy */
733 		error = em_allocate_legacy(adapter);
734 	if (error)
735 		goto err_late;
736 
737 	/*
738 	 * Get Wake-on-Lan and Management info for later use
739 	 */
740 	em_get_wakeup(dev);
741 
742 	/* Setup OS specific network interface */
743 	if (em_setup_interface(dev, adapter) != 0)
744 		goto err_late;
745 
746 	em_reset(adapter);
747 
748 	/* Initialize statistics */
749 	em_update_stats_counters(adapter);
750 
751 	hw->mac.get_link_status = 1;
752 	em_update_link_status(adapter);
753 
754 	/* Register for VLAN events */
755 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
756 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
757 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
758 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
759 
760 	em_add_hw_stats(adapter);
761 
762 	/* Non-AMT based hardware can now take control from firmware */
763 	if (adapter->has_manage && !adapter->has_amt)
764 		em_get_hw_control(adapter);
765 
766 	/* Tell the stack that the interface is not active */
767 	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
768 
769 	adapter->led_dev = led_create(em_led_func, adapter,
770 	    device_get_nameunit(dev));
771 #ifdef DEV_NETMAP
772 	em_netmap_attach(adapter);
773 #endif /* DEV_NETMAP */
774 
775 	INIT_DEBUGOUT("em_attach: end");
776 
777 	return (0);
778 
779 err_late:
780 	em_free_transmit_structures(adapter);
781 	em_free_receive_structures(adapter);
782 	em_release_hw_control(adapter);
783 	if (adapter->ifp != (void *)NULL)
784 		if_free(adapter->ifp);
785 err_pci:
786 	em_free_pci_resources(adapter);
787 	free(adapter->mta, M_DEVBUF);
788 	EM_CORE_LOCK_DESTROY(adapter);
789 
790 	return (error);
791 }
792 
793 /*********************************************************************
794  *  Device removal routine
795  *
796  *  The detach entry point is called when the driver is being removed.
797  *  This routine stops the adapter and deallocates all the resources
798  *  that were allocated for driver operation.
799  *
800  *  return 0 on success, positive on failure
801  *********************************************************************/
802 
803 static int
804 em_detach(device_t dev)
805 {
806 	struct adapter	*adapter = device_get_softc(dev);
807 	if_t ifp = adapter->ifp;
808 
809 	INIT_DEBUGOUT("em_detach: begin");
810 
811 	/* Make sure VLANS are not using driver */
812 	if (if_vlantrunkinuse(ifp)) {
813 		device_printf(dev,"Vlan in use, detach first\n");
814 		return (EBUSY);
815 	}
816 
817 #ifdef DEVICE_POLLING
818 	if (if_getcapenable(ifp) & IFCAP_POLLING)
819 		ether_poll_deregister(ifp);
820 #endif
821 
822 	if (adapter->led_dev != NULL)
823 		led_destroy(adapter->led_dev);
824 
825 	EM_CORE_LOCK(adapter);
826 	adapter->in_detach = 1;
827 	em_stop(adapter);
828 	EM_CORE_UNLOCK(adapter);
829 	EM_CORE_LOCK_DESTROY(adapter);
830 
831 	e1000_phy_hw_reset(&adapter->hw);
832 
833 	em_release_manageability(adapter);
834 	em_release_hw_control(adapter);
835 
836 	/* Unregister VLAN events */
837 	if (adapter->vlan_attach != NULL)
838 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
839 	if (adapter->vlan_detach != NULL)
840 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
841 
842 	ether_ifdetach(adapter->ifp);
843 	callout_drain(&adapter->timer);
844 
845 #ifdef DEV_NETMAP
846 	netmap_detach(ifp);
847 #endif /* DEV_NETMAP */
848 
849 	em_free_pci_resources(adapter);
850 	bus_generic_detach(dev);
851 	if_free(ifp);
852 
853 	em_free_transmit_structures(adapter);
854 	em_free_receive_structures(adapter);
855 
856 	em_release_hw_control(adapter);
857 	free(adapter->mta, M_DEVBUF);
858 
859 	return (0);
860 }
861 
862 /*********************************************************************
863  *
864  *  Shutdown entry point
865  *
866  **********************************************************************/
867 
868 static int
869 em_shutdown(device_t dev)
870 {
871 	return em_suspend(dev);
872 }
873 
874 /*
875  * Suspend/resume device methods.
876  */
877 static int
878 em_suspend(device_t dev)
879 {
880 	struct adapter *adapter = device_get_softc(dev);
881 
882 	EM_CORE_LOCK(adapter);
883 
884         em_release_manageability(adapter);
885 	em_release_hw_control(adapter);
886 	em_enable_wakeup(dev);
887 
888 	EM_CORE_UNLOCK(adapter);
889 
890 	return bus_generic_suspend(dev);
891 }
892 
893 static int
894 em_resume(device_t dev)
895 {
896 	struct adapter *adapter = device_get_softc(dev);
897 	struct tx_ring	*txr = adapter->tx_rings;
898 	if_t ifp = adapter->ifp;
899 
900 	EM_CORE_LOCK(adapter);
901 	if (adapter->hw.mac.type == e1000_pch2lan)
902 		e1000_resume_workarounds_pchlan(&adapter->hw);
903 	em_init_locked(adapter);
904 	em_init_manageability(adapter);
905 
906 	if ((if_getflags(ifp) & IFF_UP) &&
907 	    (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) {
908 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
909 			EM_TX_LOCK(txr);
910 #ifdef EM_MULTIQUEUE
911 			if (!drbr_empty(ifp, txr->br))
912 				em_mq_start_locked(ifp, txr);
913 #else
914 			if (!if_sendq_empty(ifp))
915 				em_start_locked(ifp, txr);
916 #endif
917 			EM_TX_UNLOCK(txr);
918 		}
919 	}
920 	EM_CORE_UNLOCK(adapter);
921 
922 	return bus_generic_resume(dev);
923 }
924 
925 
926 #ifndef EM_MULTIQUEUE
927 static void
928 em_start_locked(if_t ifp, struct tx_ring *txr)
929 {
930 	struct adapter	*adapter = if_getsoftc(ifp);
931 	struct mbuf	*m_head;
932 
933 	EM_TX_LOCK_ASSERT(txr);
934 
935 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
936 	    IFF_DRV_RUNNING)
937 		return;
938 
939 	if (!adapter->link_active)
940 		return;
941 
942 	while (!if_sendq_empty(ifp)) {
943         	/* Call cleanup if number of TX descriptors low */
944 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
945 			em_txeof(txr);
946 		if (txr->tx_avail < EM_MAX_SCATTER) {
947 			if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0);
948 			break;
949 		}
950 		m_head = if_dequeue(ifp);
951 		if (m_head == NULL)
952 			break;
953 		/*
954 		 *  Encapsulation can modify our pointer, and or make it
955 		 *  NULL on failure.  In that event, we can't requeue.
956 		 */
957 		if (em_xmit(txr, &m_head)) {
958 			if (m_head == NULL)
959 				break;
960 			if_sendq_prepend(ifp, m_head);
961 			break;
962 		}
963 
964 		/* Mark the queue as having work */
965 		if (txr->busy == EM_TX_IDLE)
966 			txr->busy = EM_TX_BUSY;
967 
968 		/* Send a copy of the frame to the BPF listener */
969 		ETHER_BPF_MTAP(ifp, m_head);
970 
971 	}
972 
973 	return;
974 }
975 
976 static void
977 em_start(if_t ifp)
978 {
979 	struct adapter	*adapter = if_getsoftc(ifp);
980 	struct tx_ring	*txr = adapter->tx_rings;
981 
982 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
983 		EM_TX_LOCK(txr);
984 		em_start_locked(ifp, txr);
985 		EM_TX_UNLOCK(txr);
986 	}
987 	return;
988 }
989 #else /* EM_MULTIQUEUE */
990 /*********************************************************************
991  *  Multiqueue Transmit routines
992  *
993  *  em_mq_start is called by the stack to initiate a transmit.
994  *  however, if busy the driver can queue the request rather
995  *  than do an immediate send. It is this that is an advantage
996  *  in this driver, rather than also having multiple tx queues.
997  **********************************************************************/
998 /*
999 ** Multiqueue capable stack interface
1000 */
1001 static int
1002 em_mq_start(if_t ifp, struct mbuf *m)
1003 {
1004 	struct adapter	*adapter = if_getsoftc(ifp);
1005 	struct tx_ring	*txr = adapter->tx_rings;
1006 	unsigned int	i, error;
1007 
1008 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1009 		i = m->m_pkthdr.flowid % adapter->num_queues;
1010 	else
1011 		i = curcpu % adapter->num_queues;
1012 
1013 	txr = &adapter->tx_rings[i];
1014 
1015 	error = drbr_enqueue(ifp, txr->br, m);
1016 	if (error)
1017 		return (error);
1018 
1019 	if (EM_TX_TRYLOCK(txr)) {
1020 		em_mq_start_locked(ifp, txr);
1021 		EM_TX_UNLOCK(txr);
1022 	} else
1023 		taskqueue_enqueue(txr->tq, &txr->tx_task);
1024 
1025 	return (0);
1026 }
1027 
1028 static int
1029 em_mq_start_locked(if_t ifp, struct tx_ring *txr)
1030 {
1031 	struct adapter  *adapter = txr->adapter;
1032         struct mbuf     *next;
1033         int             err = 0, enq = 0;
1034 
1035 	EM_TX_LOCK_ASSERT(txr);
1036 
1037 	if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) ||
1038 	    adapter->link_active == 0) {
1039 		return (ENETDOWN);
1040 	}
1041 
1042 	/* Process the queue */
1043 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1044 		if ((err = em_xmit(txr, &next)) != 0) {
1045 			if (next == NULL) {
1046 				/* It was freed, move forward */
1047 				drbr_advance(ifp, txr->br);
1048 			} else {
1049 				/*
1050 				 * Still have one left, it may not be
1051 				 * the same since the transmit function
1052 				 * may have changed it.
1053 				 */
1054 				drbr_putback(ifp, txr->br, next);
1055 			}
1056 			break;
1057 		}
1058 		drbr_advance(ifp, txr->br);
1059 		enq++;
1060 		if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
1061 		if (next->m_flags & M_MCAST)
1062 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
1063 		ETHER_BPF_MTAP(ifp, next);
1064 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
1065                         break;
1066 	}
1067 
1068 	/* Mark the queue as having work */
1069 	if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1070 		txr->busy = EM_TX_BUSY;
1071 
1072 	if (txr->tx_avail < EM_MAX_SCATTER)
1073 		em_txeof(txr);
1074 	if (txr->tx_avail < EM_MAX_SCATTER) {
1075 		if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
1076 	}
1077 	return (err);
1078 }
1079 
1080 /*
1081 ** Flush all ring buffers
1082 */
1083 static void
1084 em_qflush(if_t ifp)
1085 {
1086 	struct adapter  *adapter = if_getsoftc(ifp);
1087 	struct tx_ring  *txr = adapter->tx_rings;
1088 	struct mbuf     *m;
1089 
1090 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1091 		EM_TX_LOCK(txr);
1092 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1093 			m_freem(m);
1094 		EM_TX_UNLOCK(txr);
1095 	}
1096 	if_qflush(ifp);
1097 }
1098 #endif /* EM_MULTIQUEUE */
1099 
1100 /*********************************************************************
1101  *  Ioctl entry point
1102  *
1103  *  em_ioctl is called when the user wants to configure the
1104  *  interface.
1105  *
1106  *  return 0 on success, positive on failure
1107  **********************************************************************/
1108 
1109 static int
1110 em_ioctl(if_t ifp, u_long command, caddr_t data)
1111 {
1112 	struct adapter	*adapter = if_getsoftc(ifp);
1113 	struct ifreq	*ifr = (struct ifreq *)data;
1114 #if defined(INET) || defined(INET6)
1115 	struct ifaddr	*ifa = (struct ifaddr *)data;
1116 #endif
1117 	bool		avoid_reset = FALSE;
1118 	int		error = 0;
1119 
1120 	if (adapter->in_detach)
1121 		return (error);
1122 
1123 	switch (command) {
1124 	case SIOCSIFADDR:
1125 #ifdef INET
1126 		if (ifa->ifa_addr->sa_family == AF_INET)
1127 			avoid_reset = TRUE;
1128 #endif
1129 #ifdef INET6
1130 		if (ifa->ifa_addr->sa_family == AF_INET6)
1131 			avoid_reset = TRUE;
1132 #endif
1133 		/*
1134 		** Calling init results in link renegotiation,
1135 		** so we avoid doing it when possible.
1136 		*/
1137 		if (avoid_reset) {
1138 			if_setflagbits(ifp,IFF_UP,0);
1139 			if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
1140 				em_init(adapter);
1141 #ifdef INET
1142 			if (!(if_getflags(ifp) & IFF_NOARP))
1143 				arp_ifinit(ifp, ifa);
1144 #endif
1145 		} else
1146 			error = ether_ioctl(ifp, command, data);
1147 		break;
1148 	case SIOCSIFMTU:
1149 	    {
1150 		int max_frame_size;
1151 
1152 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1153 
1154 		EM_CORE_LOCK(adapter);
1155 		switch (adapter->hw.mac.type) {
1156 		case e1000_82571:
1157 		case e1000_82572:
1158 		case e1000_ich9lan:
1159 		case e1000_ich10lan:
1160 		case e1000_pch2lan:
1161 		case e1000_pch_lpt:
1162 		case e1000_82574:
1163 		case e1000_82583:
1164 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1165 			max_frame_size = 9234;
1166 			break;
1167 		case e1000_pchlan:
1168 			max_frame_size = 4096;
1169 			break;
1170 			/* Adapters that do not support jumbo frames */
1171 		case e1000_ich8lan:
1172 			max_frame_size = ETHER_MAX_LEN;
1173 			break;
1174 		default:
1175 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1176 		}
1177 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1178 		    ETHER_CRC_LEN) {
1179 			EM_CORE_UNLOCK(adapter);
1180 			error = EINVAL;
1181 			break;
1182 		}
1183 
1184 		if_setmtu(ifp, ifr->ifr_mtu);
1185 		adapter->hw.mac.max_frame_size =
1186 		    if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
1187 		em_init_locked(adapter);
1188 		EM_CORE_UNLOCK(adapter);
1189 		break;
1190 	    }
1191 	case SIOCSIFFLAGS:
1192 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1193 		    SIOCSIFFLAGS (Set Interface Flags)");
1194 		EM_CORE_LOCK(adapter);
1195 		if (if_getflags(ifp) & IFF_UP) {
1196 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1197 				if ((if_getflags(ifp) ^ adapter->if_flags) &
1198 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1199 					em_disable_promisc(adapter);
1200 					em_set_promisc(adapter);
1201 				}
1202 			} else
1203 				em_init_locked(adapter);
1204 		} else
1205 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1206 				em_stop(adapter);
1207 		adapter->if_flags = if_getflags(ifp);
1208 		EM_CORE_UNLOCK(adapter);
1209 		break;
1210 	case SIOCADDMULTI:
1211 	case SIOCDELMULTI:
1212 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1213 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1214 			EM_CORE_LOCK(adapter);
1215 			em_disable_intr(adapter);
1216 			em_set_multi(adapter);
1217 #ifdef DEVICE_POLLING
1218 			if (!(if_getcapenable(ifp) & IFCAP_POLLING))
1219 #endif
1220 				em_enable_intr(adapter);
1221 			EM_CORE_UNLOCK(adapter);
1222 		}
1223 		break;
1224 	case SIOCSIFMEDIA:
1225 		/* Check SOL/IDER usage */
1226 		EM_CORE_LOCK(adapter);
1227 		if (e1000_check_reset_block(&adapter->hw)) {
1228 			EM_CORE_UNLOCK(adapter);
1229 			device_printf(adapter->dev, "Media change is"
1230 			    " blocked due to SOL/IDER session.\n");
1231 			break;
1232 		}
1233 		EM_CORE_UNLOCK(adapter);
1234 		/* falls thru */
1235 	case SIOCGIFMEDIA:
1236 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1237 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1238 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1239 		break;
1240 	case SIOCSIFCAP:
1241 	    {
1242 		int mask, reinit;
1243 
1244 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1245 		reinit = 0;
1246 		mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
1247 #ifdef DEVICE_POLLING
1248 		if (mask & IFCAP_POLLING) {
1249 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1250 				error = ether_poll_register(em_poll, ifp);
1251 				if (error)
1252 					return (error);
1253 				EM_CORE_LOCK(adapter);
1254 				em_disable_intr(adapter);
1255 				if_setcapenablebit(ifp, IFCAP_POLLING, 0);
1256 				EM_CORE_UNLOCK(adapter);
1257 			} else {
1258 				error = ether_poll_deregister(ifp);
1259 				/* Enable interrupt even in error case */
1260 				EM_CORE_LOCK(adapter);
1261 				em_enable_intr(adapter);
1262 				if_setcapenablebit(ifp, 0, IFCAP_POLLING);
1263 				EM_CORE_UNLOCK(adapter);
1264 			}
1265 		}
1266 #endif
1267 		if (mask & IFCAP_HWCSUM) {
1268 			if_togglecapenable(ifp,IFCAP_HWCSUM);
1269 			reinit = 1;
1270 		}
1271 		if (mask & IFCAP_TSO4) {
1272 			if_togglecapenable(ifp,IFCAP_TSO4);
1273 			reinit = 1;
1274 		}
1275 		if (mask & IFCAP_VLAN_HWTAGGING) {
1276 			if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING);
1277 			reinit = 1;
1278 		}
1279 		if (mask & IFCAP_VLAN_HWFILTER) {
1280 			if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1281 			reinit = 1;
1282 		}
1283 		if (mask & IFCAP_VLAN_HWTSO) {
1284 			if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1285 			reinit = 1;
1286 		}
1287 		if ((mask & IFCAP_WOL) &&
1288 		    (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
1289 			if (mask & IFCAP_WOL_MCAST)
1290 				if_togglecapenable(ifp, IFCAP_WOL_MCAST);
1291 			if (mask & IFCAP_WOL_MAGIC)
1292 				if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
1293 		}
1294 		if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1295 			em_init(adapter);
1296 		if_vlancap(ifp);
1297 		break;
1298 	    }
1299 
1300 	default:
1301 		error = ether_ioctl(ifp, command, data);
1302 		break;
1303 	}
1304 
1305 	return (error);
1306 }
1307 
1308 
1309 /*********************************************************************
1310  *  Init entry point
1311  *
1312  *  This routine is used in two ways. It is used by the stack as
1313  *  init entry point in network interface structure. It is also used
1314  *  by the driver as a hw/sw initialization routine to get to a
1315  *  consistent state.
1316  *
1317  *  return 0 on success, positive on failure
1318  **********************************************************************/
1319 
1320 static void
1321 em_init_locked(struct adapter *adapter)
1322 {
1323 	if_t ifp = adapter->ifp;
1324 	device_t	dev = adapter->dev;
1325 
1326 	INIT_DEBUGOUT("em_init: begin");
1327 
1328 	EM_CORE_LOCK_ASSERT(adapter);
1329 
1330 	em_disable_intr(adapter);
1331 	callout_stop(&adapter->timer);
1332 
1333 	/* Get the latest mac address, User can use a LAA */
1334         bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
1335               ETHER_ADDR_LEN);
1336 
1337 	/* Put the address into the Receive Address Array */
1338 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1339 
1340 	/*
1341 	 * With the 82571 adapter, RAR[0] may be overwritten
1342 	 * when the other port is reset, we make a duplicate
1343 	 * in RAR[14] for that eventuality, this assures
1344 	 * the interface continues to function.
1345 	 */
1346 	if (adapter->hw.mac.type == e1000_82571) {
1347 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1348 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1349 		    E1000_RAR_ENTRIES - 1);
1350 	}
1351 
1352 	/* Initialize the hardware */
1353 	em_reset(adapter);
1354 	em_update_link_status(adapter);
1355 
1356 	/* Setup VLAN support, basic and offload if available */
1357 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1358 
1359 	/* Set hardware offload abilities */
1360 	if_clearhwassist(ifp);
1361 	if (if_getcapenable(ifp) & IFCAP_TXCSUM)
1362 		if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
1363 	if (if_getcapenable(ifp) & IFCAP_TSO4)
1364 		if_sethwassistbits(ifp, CSUM_TSO, 0);
1365 
1366 	/* Configure for OS presence */
1367 	em_init_manageability(adapter);
1368 
1369 	/* Prepare transmit descriptors and buffers */
1370 	em_setup_transmit_structures(adapter);
1371 	em_initialize_transmit_unit(adapter);
1372 
1373 	/* Setup Multicast table */
1374 	em_set_multi(adapter);
1375 
1376 	/*
1377 	** Figure out the desired mbuf
1378 	** pool for doing jumbos
1379 	*/
1380 	if (adapter->hw.mac.max_frame_size <= 2048)
1381 		adapter->rx_mbuf_sz = MCLBYTES;
1382 	else if (adapter->hw.mac.max_frame_size <= 4096)
1383 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1384 	else
1385 		adapter->rx_mbuf_sz = MJUM9BYTES;
1386 
1387 	/* Prepare receive descriptors and buffers */
1388 	if (em_setup_receive_structures(adapter)) {
1389 		device_printf(dev, "Could not setup receive structures\n");
1390 		em_stop(adapter);
1391 		return;
1392 	}
1393 	em_initialize_receive_unit(adapter);
1394 
1395 	/* Use real VLAN Filter support? */
1396 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
1397 		if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1398 			/* Use real VLAN Filter support */
1399 			em_setup_vlan_hw_support(adapter);
1400 		else {
1401 			u32 ctrl;
1402 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1403 			ctrl |= E1000_CTRL_VME;
1404 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1405 		}
1406 	}
1407 
1408 	/* Don't lose promiscuous settings */
1409 	em_set_promisc(adapter);
1410 
1411 	/* Set the interface as ACTIVE */
1412 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
1413 
1414 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1415 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1416 
1417 	/* MSI/X configuration for 82574 */
1418 	if (adapter->hw.mac.type == e1000_82574) {
1419 		int tmp;
1420 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1421 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1422 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1423 		/* Set the IVAR - interrupt vector routing. */
1424 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1425 	}
1426 
1427 #ifdef DEVICE_POLLING
1428 	/*
1429 	 * Only enable interrupts if we are not polling, make sure
1430 	 * they are off otherwise.
1431 	 */
1432 	if (if_getcapenable(ifp) & IFCAP_POLLING)
1433 		em_disable_intr(adapter);
1434 	else
1435 #endif /* DEVICE_POLLING */
1436 		em_enable_intr(adapter);
1437 
1438 	/* AMT based hardware can now take control from firmware */
1439 	if (adapter->has_manage && adapter->has_amt)
1440 		em_get_hw_control(adapter);
1441 }
1442 
1443 static void
1444 em_init(void *arg)
1445 {
1446 	struct adapter *adapter = arg;
1447 
1448 	EM_CORE_LOCK(adapter);
1449 	em_init_locked(adapter);
1450 	EM_CORE_UNLOCK(adapter);
1451 }
1452 
1453 
1454 #ifdef DEVICE_POLLING
1455 /*********************************************************************
1456  *
1457  *  Legacy polling routine: note this only works with single queue
1458  *
1459  *********************************************************************/
1460 static int
1461 em_poll(if_t ifp, enum poll_cmd cmd, int count)
1462 {
1463 	struct adapter *adapter = if_getsoftc(ifp);
1464 	struct tx_ring	*txr = adapter->tx_rings;
1465 	struct rx_ring	*rxr = adapter->rx_rings;
1466 	u32		reg_icr;
1467 	int		rx_done;
1468 
1469 	EM_CORE_LOCK(adapter);
1470 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
1471 		EM_CORE_UNLOCK(adapter);
1472 		return (0);
1473 	}
1474 
1475 	if (cmd == POLL_AND_CHECK_STATUS) {
1476 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1477 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1478 			callout_stop(&adapter->timer);
1479 			adapter->hw.mac.get_link_status = 1;
1480 			em_update_link_status(adapter);
1481 			callout_reset(&adapter->timer, hz,
1482 			    em_local_timer, adapter);
1483 		}
1484 	}
1485 	EM_CORE_UNLOCK(adapter);
1486 
1487 	em_rxeof(rxr, count, &rx_done);
1488 
1489 	EM_TX_LOCK(txr);
1490 	em_txeof(txr);
1491 #ifdef EM_MULTIQUEUE
1492 	if (!drbr_empty(ifp, txr->br))
1493 		em_mq_start_locked(ifp, txr);
1494 #else
1495 	if (!if_sendq_empty(ifp))
1496 		em_start_locked(ifp, txr);
1497 #endif
1498 	EM_TX_UNLOCK(txr);
1499 
1500 	return (rx_done);
1501 }
1502 #endif /* DEVICE_POLLING */
1503 
1504 
1505 /*********************************************************************
1506  *
1507  *  Fast Legacy/MSI Combined Interrupt Service routine
1508  *
1509  *********************************************************************/
1510 static int
1511 em_irq_fast(void *arg)
1512 {
1513 	struct adapter	*adapter = arg;
1514 	if_t ifp;
1515 	u32		reg_icr;
1516 
1517 	ifp = adapter->ifp;
1518 
1519 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1520 
1521 	/* Hot eject?  */
1522 	if (reg_icr == 0xffffffff)
1523 		return FILTER_STRAY;
1524 
1525 	/* Definitely not our interrupt.  */
1526 	if (reg_icr == 0x0)
1527 		return FILTER_STRAY;
1528 
1529 	/*
1530 	 * Starting with the 82571 chip, bit 31 should be used to
1531 	 * determine whether the interrupt belongs to us.
1532 	 */
1533 	if (adapter->hw.mac.type >= e1000_82571 &&
1534 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1535 		return FILTER_STRAY;
1536 
1537 	em_disable_intr(adapter);
1538 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1539 
1540 	/* Link status change */
1541 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1542 		adapter->hw.mac.get_link_status = 1;
1543 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1544 	}
1545 
1546 	if (reg_icr & E1000_ICR_RXO)
1547 		adapter->rx_overruns++;
1548 	return FILTER_HANDLED;
1549 }
1550 
1551 /* Combined RX/TX handler, used by Legacy and MSI */
1552 static void
1553 em_handle_que(void *context, int pending)
1554 {
1555 	struct adapter	*adapter = context;
1556 	if_t ifp = adapter->ifp;
1557 	struct tx_ring	*txr = adapter->tx_rings;
1558 	struct rx_ring	*rxr = adapter->rx_rings;
1559 
1560 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1561 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1562 
1563 		EM_TX_LOCK(txr);
1564 		em_txeof(txr);
1565 #ifdef EM_MULTIQUEUE
1566 		if (!drbr_empty(ifp, txr->br))
1567 			em_mq_start_locked(ifp, txr);
1568 #else
1569 		if (!if_sendq_empty(ifp))
1570 			em_start_locked(ifp, txr);
1571 #endif
1572 		EM_TX_UNLOCK(txr);
1573 		if (more) {
1574 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1575 			return;
1576 		}
1577 	}
1578 
1579 	em_enable_intr(adapter);
1580 	return;
1581 }
1582 
1583 
1584 /*********************************************************************
1585  *
1586  *  MSIX Interrupt Service Routines
1587  *
1588  **********************************************************************/
1589 static void
1590 em_msix_tx(void *arg)
1591 {
1592 	struct tx_ring *txr = arg;
1593 	struct adapter *adapter = txr->adapter;
1594 	if_t ifp = adapter->ifp;
1595 
1596 	++txr->tx_irq;
1597 	EM_TX_LOCK(txr);
1598 	em_txeof(txr);
1599 #ifdef EM_MULTIQUEUE
1600 	if (!drbr_empty(ifp, txr->br))
1601 		em_mq_start_locked(ifp, txr);
1602 #else
1603 	if (!if_sendq_empty(ifp))
1604 		em_start_locked(ifp, txr);
1605 #endif
1606 
1607 	/* Reenable this interrupt */
1608 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1609 	EM_TX_UNLOCK(txr);
1610 	return;
1611 }
1612 
1613 /*********************************************************************
1614  *
1615  *  MSIX RX Interrupt Service routine
1616  *
1617  **********************************************************************/
1618 
1619 static void
1620 em_msix_rx(void *arg)
1621 {
1622 	struct rx_ring	*rxr = arg;
1623 	struct adapter	*adapter = rxr->adapter;
1624 	bool		more;
1625 
1626 	++rxr->rx_irq;
1627 	if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING))
1628 		return;
1629 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1630 	if (more)
1631 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1632 	else {
1633 		/* Reenable this interrupt */
1634 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1635 	}
1636 	return;
1637 }
1638 
1639 /*********************************************************************
1640  *
1641  *  MSIX Link Fast Interrupt Service routine
1642  *
1643  **********************************************************************/
1644 static void
1645 em_msix_link(void *arg)
1646 {
1647 	struct adapter	*adapter = arg;
1648 	u32		reg_icr;
1649 
1650 	++adapter->link_irq;
1651 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1652 
1653 	if (reg_icr & E1000_ICR_RXO)
1654 		adapter->rx_overruns++;
1655 
1656 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1657 		adapter->hw.mac.get_link_status = 1;
1658 		em_handle_link(adapter, 0);
1659 	} else
1660 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1661 		    EM_MSIX_LINK | E1000_IMS_LSC);
1662 	/*
1663  	** Because we must read the ICR for this interrupt
1664  	** it may clear other causes using autoclear, for
1665  	** this reason we simply create a soft interrupt
1666  	** for all these vectors.
1667  	*/
1668 	if (reg_icr) {
1669 		E1000_WRITE_REG(&adapter->hw,
1670 			E1000_ICS, adapter->ims);
1671 	}
1672 	return;
1673 }
1674 
1675 static void
1676 em_handle_rx(void *context, int pending)
1677 {
1678 	struct rx_ring	*rxr = context;
1679 	struct adapter	*adapter = rxr->adapter;
1680         bool            more;
1681 
1682 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1683 	if (more)
1684 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1685 	else {
1686 		/* Reenable this interrupt */
1687 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1688 	}
1689 }
1690 
1691 static void
1692 em_handle_tx(void *context, int pending)
1693 {
1694 	struct tx_ring	*txr = context;
1695 	struct adapter	*adapter = txr->adapter;
1696 	if_t ifp = adapter->ifp;
1697 
1698 	EM_TX_LOCK(txr);
1699 	em_txeof(txr);
1700 #ifdef EM_MULTIQUEUE
1701 	if (!drbr_empty(ifp, txr->br))
1702 		em_mq_start_locked(ifp, txr);
1703 #else
1704 	if (!if_sendq_empty(ifp))
1705 		em_start_locked(ifp, txr);
1706 #endif
1707 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1708 	EM_TX_UNLOCK(txr);
1709 }
1710 
1711 static void
1712 em_handle_link(void *context, int pending)
1713 {
1714 	struct adapter	*adapter = context;
1715 	struct tx_ring	*txr = adapter->tx_rings;
1716 	if_t ifp = adapter->ifp;
1717 
1718 	if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1719 		return;
1720 
1721 	EM_CORE_LOCK(adapter);
1722 	callout_stop(&adapter->timer);
1723 	em_update_link_status(adapter);
1724 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1725 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1726 	    EM_MSIX_LINK | E1000_IMS_LSC);
1727 	if (adapter->link_active) {
1728 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1729 			EM_TX_LOCK(txr);
1730 #ifdef EM_MULTIQUEUE
1731 			if (!drbr_empty(ifp, txr->br))
1732 				em_mq_start_locked(ifp, txr);
1733 #else
1734 			if (if_sendq_empty(ifp))
1735 				em_start_locked(ifp, txr);
1736 #endif
1737 			EM_TX_UNLOCK(txr);
1738 		}
1739 	}
1740 	EM_CORE_UNLOCK(adapter);
1741 }
1742 
1743 
1744 /*********************************************************************
1745  *
1746  *  Media Ioctl callback
1747  *
1748  *  This routine is called whenever the user queries the status of
1749  *  the interface using ifconfig.
1750  *
1751  **********************************************************************/
1752 static void
1753 em_media_status(if_t ifp, struct ifmediareq *ifmr)
1754 {
1755 	struct adapter *adapter = if_getsoftc(ifp);
1756 	u_char fiber_type = IFM_1000_SX;
1757 
1758 	INIT_DEBUGOUT("em_media_status: begin");
1759 
1760 	EM_CORE_LOCK(adapter);
1761 	em_update_link_status(adapter);
1762 
1763 	ifmr->ifm_status = IFM_AVALID;
1764 	ifmr->ifm_active = IFM_ETHER;
1765 
1766 	if (!adapter->link_active) {
1767 		EM_CORE_UNLOCK(adapter);
1768 		return;
1769 	}
1770 
1771 	ifmr->ifm_status |= IFM_ACTIVE;
1772 
1773 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1774 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1775 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1776 	} else {
1777 		switch (adapter->link_speed) {
1778 		case 10:
1779 			ifmr->ifm_active |= IFM_10_T;
1780 			break;
1781 		case 100:
1782 			ifmr->ifm_active |= IFM_100_TX;
1783 			break;
1784 		case 1000:
1785 			ifmr->ifm_active |= IFM_1000_T;
1786 			break;
1787 		}
1788 		if (adapter->link_duplex == FULL_DUPLEX)
1789 			ifmr->ifm_active |= IFM_FDX;
1790 		else
1791 			ifmr->ifm_active |= IFM_HDX;
1792 	}
1793 	EM_CORE_UNLOCK(adapter);
1794 }
1795 
1796 /*********************************************************************
1797  *
1798  *  Media Ioctl callback
1799  *
1800  *  This routine is called when the user changes speed/duplex using
1801  *  media/mediopt option with ifconfig.
1802  *
1803  **********************************************************************/
1804 static int
1805 em_media_change(if_t ifp)
1806 {
1807 	struct adapter *adapter = if_getsoftc(ifp);
1808 	struct ifmedia  *ifm = &adapter->media;
1809 
1810 	INIT_DEBUGOUT("em_media_change: begin");
1811 
1812 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1813 		return (EINVAL);
1814 
1815 	EM_CORE_LOCK(adapter);
1816 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1817 	case IFM_AUTO:
1818 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1819 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1820 		break;
1821 	case IFM_1000_LX:
1822 	case IFM_1000_SX:
1823 	case IFM_1000_T:
1824 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1825 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1826 		break;
1827 	case IFM_100_TX:
1828 		adapter->hw.mac.autoneg = FALSE;
1829 		adapter->hw.phy.autoneg_advertised = 0;
1830 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1831 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1832 		else
1833 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1834 		break;
1835 	case IFM_10_T:
1836 		adapter->hw.mac.autoneg = FALSE;
1837 		adapter->hw.phy.autoneg_advertised = 0;
1838 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1839 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1840 		else
1841 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1842 		break;
1843 	default:
1844 		device_printf(adapter->dev, "Unsupported media type\n");
1845 	}
1846 
1847 	em_init_locked(adapter);
1848 	EM_CORE_UNLOCK(adapter);
1849 
1850 	return (0);
1851 }
1852 
1853 /*********************************************************************
1854  *
1855  *  This routine maps the mbufs to tx descriptors.
1856  *
1857  *  return 0 on success, positive on failure
1858  **********************************************************************/
1859 
1860 static int
1861 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1862 {
1863 	struct adapter		*adapter = txr->adapter;
1864 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1865 	bus_dmamap_t		map;
1866 	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1867 	struct e1000_tx_desc	*ctxd = NULL;
1868 	struct mbuf		*m_head;
1869 	struct ether_header	*eh;
1870 	struct ip		*ip = NULL;
1871 	struct tcphdr		*tp = NULL;
1872 	u32			txd_upper, txd_lower, txd_used, txd_saved;
1873 	int			ip_off, poff;
1874 	int			nsegs, i, j, first, last = 0;
1875 	int			error, do_tso, tso_desc = 0, remap = 1;
1876 
1877 	m_head = *m_headp;
1878 	txd_upper = txd_lower = txd_used = txd_saved = 0;
1879 	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1880 	ip_off = poff = 0;
1881 
1882 	/*
1883 	 * Intel recommends entire IP/TCP header length reside in a single
1884 	 * buffer. If multiple descriptors are used to describe the IP and
1885 	 * TCP header, each descriptor should describe one or more
1886 	 * complete headers; descriptors referencing only parts of headers
1887 	 * are not supported. If all layer headers are not coalesced into
1888 	 * a single buffer, each buffer should not cross a 4KB boundary,
1889 	 * or be larger than the maximum read request size.
1890 	 * Controller also requires modifing IP/TCP header to make TSO work
1891 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1892 	 * IP/TCP header into a single buffer to meet the requirement of
1893 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1894 	 * which also has similiar restrictions.
1895 	 */
1896 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1897 		if (do_tso || (m_head->m_next != NULL &&
1898 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1899 			if (M_WRITABLE(*m_headp) == 0) {
1900 				m_head = m_dup(*m_headp, M_NOWAIT);
1901 				m_freem(*m_headp);
1902 				if (m_head == NULL) {
1903 					*m_headp = NULL;
1904 					return (ENOBUFS);
1905 				}
1906 				*m_headp = m_head;
1907 			}
1908 		}
1909 		/*
1910 		 * XXX
1911 		 * Assume IPv4, we don't have TSO/checksum offload support
1912 		 * for IPv6 yet.
1913 		 */
1914 		ip_off = sizeof(struct ether_header);
1915 		m_head = m_pullup(m_head, ip_off);
1916 		if (m_head == NULL) {
1917 			*m_headp = NULL;
1918 			return (ENOBUFS);
1919 		}
1920 		eh = mtod(m_head, struct ether_header *);
1921 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1922 			ip_off = sizeof(struct ether_vlan_header);
1923 			m_head = m_pullup(m_head, ip_off);
1924 			if (m_head == NULL) {
1925 				*m_headp = NULL;
1926 				return (ENOBUFS);
1927 			}
1928 		}
1929 		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1930 		if (m_head == NULL) {
1931 			*m_headp = NULL;
1932 			return (ENOBUFS);
1933 		}
1934 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1935 		poff = ip_off + (ip->ip_hl << 2);
1936 		if (do_tso) {
1937 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1938 			if (m_head == NULL) {
1939 				*m_headp = NULL;
1940 				return (ENOBUFS);
1941 			}
1942 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1943 			/*
1944 			 * TSO workaround:
1945 			 *   pull 4 more bytes of data into it.
1946 			 */
1947 			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1948 			if (m_head == NULL) {
1949 				*m_headp = NULL;
1950 				return (ENOBUFS);
1951 			}
1952 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1953 			ip->ip_len = 0;
1954 			ip->ip_sum = 0;
1955 			/*
1956 			 * The pseudo TCP checksum does not include TCP payload
1957 			 * length so driver should recompute the checksum here
1958 			 * what hardware expect to see. This is adherence of
1959 			 * Microsoft's Large Send specification.
1960 			 */
1961 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1962 			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1963 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1964 		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1965 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1966 			if (m_head == NULL) {
1967 				*m_headp = NULL;
1968 				return (ENOBUFS);
1969 			}
1970 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1971 			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1972 			if (m_head == NULL) {
1973 				*m_headp = NULL;
1974 				return (ENOBUFS);
1975 			}
1976 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1977 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1978 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1979 			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1980 			if (m_head == NULL) {
1981 				*m_headp = NULL;
1982 				return (ENOBUFS);
1983 			}
1984 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1985 		}
1986 		*m_headp = m_head;
1987 	}
1988 
1989 	/*
1990 	 * Map the packet for DMA
1991 	 *
1992 	 * Capture the first descriptor index,
1993 	 * this descriptor will have the index
1994 	 * of the EOP which is the only one that
1995 	 * now gets a DONE bit writeback.
1996 	 */
1997 	first = txr->next_avail_desc;
1998 	tx_buffer = &txr->tx_buffers[first];
1999 	tx_buffer_mapped = tx_buffer;
2000 	map = tx_buffer->map;
2001 
2002 retry:
2003 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2004 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2005 
2006 	/*
2007 	 * There are two types of errors we can (try) to handle:
2008 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
2009 	 *   out of segments.  Defragment the mbuf chain and try again.
2010 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
2011 	 *   at this point in time.  Defer sending and try again later.
2012 	 * All other errors, in particular EINVAL, are fatal and prevent the
2013 	 * mbuf chain from ever going through.  Drop it and report error.
2014 	 */
2015 	if (error == EFBIG && remap) {
2016 		struct mbuf *m;
2017 
2018 		m = m_defrag(*m_headp, M_NOWAIT);
2019 		if (m == NULL) {
2020 			adapter->mbuf_alloc_failed++;
2021 			m_freem(*m_headp);
2022 			*m_headp = NULL;
2023 			return (ENOBUFS);
2024 		}
2025 		*m_headp = m;
2026 
2027 		/* Try it again, but only once */
2028 		remap = 0;
2029 		goto retry;
2030 	} else if (error == ENOMEM) {
2031 		adapter->no_tx_dma_setup++;
2032 		return (error);
2033 	} else if (error != 0) {
2034 		adapter->no_tx_dma_setup++;
2035 		m_freem(*m_headp);
2036 		*m_headp = NULL;
2037 		return (error);
2038 	}
2039 
2040 	/*
2041 	 * TSO Hardware workaround, if this packet is not
2042 	 * TSO, and is only a single descriptor long, and
2043 	 * it follows a TSO burst, then we need to add a
2044 	 * sentinel descriptor to prevent premature writeback.
2045 	 */
2046 	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
2047 		if (nsegs == 1)
2048 			tso_desc = TRUE;
2049 		txr->tx_tso = FALSE;
2050 	}
2051 
2052         if (nsegs > (txr->tx_avail - 2)) {
2053                 txr->no_desc_avail++;
2054 		bus_dmamap_unload(txr->txtag, map);
2055 		return (ENOBUFS);
2056         }
2057 	m_head = *m_headp;
2058 
2059 	/* Do hardware assists */
2060 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2061 		em_tso_setup(txr, m_head, ip_off, ip, tp,
2062 		    &txd_upper, &txd_lower);
2063 		/* we need to make a final sentinel transmit desc */
2064 		tso_desc = TRUE;
2065 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2066 		em_transmit_checksum_setup(txr, m_head,
2067 		    ip_off, ip, &txd_upper, &txd_lower);
2068 
2069 	if (m_head->m_flags & M_VLANTAG) {
2070 		/* Set the vlan id. */
2071 		txd_upper |= htole16(if_getvtag(m_head)) << 16;
2072                 /* Tell hardware to add tag */
2073                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2074         }
2075 
2076 	i = txr->next_avail_desc;
2077 
2078 	/* Set up our transmit descriptors */
2079 	for (j = 0; j < nsegs; j++) {
2080 		bus_size_t seg_len;
2081 		bus_addr_t seg_addr;
2082 
2083 		tx_buffer = &txr->tx_buffers[i];
2084 		ctxd = &txr->tx_base[i];
2085 		seg_addr = segs[j].ds_addr;
2086 		seg_len  = segs[j].ds_len;
2087 		/*
2088 		** TSO Workaround:
2089 		** If this is the last descriptor, we want to
2090 		** split it so we have a small final sentinel
2091 		*/
2092 		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2093 			seg_len -= 4;
2094 			ctxd->buffer_addr = htole64(seg_addr);
2095 			ctxd->lower.data = htole32(
2096 			adapter->txd_cmd | txd_lower | seg_len);
2097 			ctxd->upper.data =
2098 			    htole32(txd_upper);
2099 			if (++i == adapter->num_tx_desc)
2100 				i = 0;
2101 			/* Now make the sentinel */
2102 			++txd_used; /* using an extra txd */
2103 			ctxd = &txr->tx_base[i];
2104 			tx_buffer = &txr->tx_buffers[i];
2105 			ctxd->buffer_addr =
2106 			    htole64(seg_addr + seg_len);
2107 			ctxd->lower.data = htole32(
2108 			adapter->txd_cmd | txd_lower | 4);
2109 			ctxd->upper.data =
2110 			    htole32(txd_upper);
2111 			last = i;
2112 			if (++i == adapter->num_tx_desc)
2113 				i = 0;
2114 		} else {
2115 			ctxd->buffer_addr = htole64(seg_addr);
2116 			ctxd->lower.data = htole32(
2117 			adapter->txd_cmd | txd_lower | seg_len);
2118 			ctxd->upper.data =
2119 			    htole32(txd_upper);
2120 			last = i;
2121 			if (++i == adapter->num_tx_desc)
2122 				i = 0;
2123 		}
2124 		tx_buffer->m_head = NULL;
2125 		tx_buffer->next_eop = -1;
2126 	}
2127 
2128 	txr->next_avail_desc = i;
2129 	txr->tx_avail -= nsegs;
2130 	if (tso_desc) /* TSO used an extra for sentinel */
2131 		txr->tx_avail -= txd_used;
2132 
2133         tx_buffer->m_head = m_head;
2134 	/*
2135 	** Here we swap the map so the last descriptor,
2136 	** which gets the completion interrupt has the
2137 	** real map, and the first descriptor gets the
2138 	** unused map from this descriptor.
2139 	*/
2140 	tx_buffer_mapped->map = tx_buffer->map;
2141 	tx_buffer->map = map;
2142         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2143 
2144         /*
2145          * Last Descriptor of Packet
2146 	 * needs End Of Packet (EOP)
2147 	 * and Report Status (RS)
2148          */
2149         ctxd->lower.data |=
2150 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2151 	/*
2152 	 * Keep track in the first buffer which
2153 	 * descriptor will be written back
2154 	 */
2155 	tx_buffer = &txr->tx_buffers[first];
2156 	tx_buffer->next_eop = last;
2157 
2158 	/*
2159 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2160 	 * that this frame is available to transmit.
2161 	 */
2162 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2163 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2164 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2165 
2166 	return (0);
2167 }
2168 
2169 static void
2170 em_set_promisc(struct adapter *adapter)
2171 {
2172 	if_t ifp = adapter->ifp;
2173 	u32		reg_rctl;
2174 
2175 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2176 
2177 	if (if_getflags(ifp) & IFF_PROMISC) {
2178 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2179 		/* Turn this on if you want to see bad packets */
2180 		if (em_debug_sbp)
2181 			reg_rctl |= E1000_RCTL_SBP;
2182 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2183 	} else if (if_getflags(ifp) & IFF_ALLMULTI) {
2184 		reg_rctl |= E1000_RCTL_MPE;
2185 		reg_rctl &= ~E1000_RCTL_UPE;
2186 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2187 	}
2188 }
2189 
2190 static void
2191 em_disable_promisc(struct adapter *adapter)
2192 {
2193 	if_t		ifp = adapter->ifp;
2194 	u32		reg_rctl;
2195 	int		mcnt = 0;
2196 
2197 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2198 	reg_rctl &=  (~E1000_RCTL_UPE);
2199 	if (if_getflags(ifp) & IFF_ALLMULTI)
2200 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2201 	else
2202 		mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
2203 	/* Don't disable if in MAX groups */
2204 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2205 		reg_rctl &=  (~E1000_RCTL_MPE);
2206 	reg_rctl &=  (~E1000_RCTL_SBP);
2207 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2208 }
2209 
2210 
2211 /*********************************************************************
2212  *  Multicast Update
2213  *
2214  *  This routine is called whenever multicast address list is updated.
2215  *
2216  **********************************************************************/
2217 
2218 static void
2219 em_set_multi(struct adapter *adapter)
2220 {
2221 	if_t ifp = adapter->ifp;
2222 	u32 reg_rctl = 0;
2223 	u8  *mta; /* Multicast array memory */
2224 	int mcnt = 0;
2225 
2226 	IOCTL_DEBUGOUT("em_set_multi: begin");
2227 
2228 	mta = adapter->mta;
2229 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2230 
2231 	if (adapter->hw.mac.type == e1000_82542 &&
2232 	    adapter->hw.revision_id == E1000_REVISION_2) {
2233 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2234 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2235 			e1000_pci_clear_mwi(&adapter->hw);
2236 		reg_rctl |= E1000_RCTL_RST;
2237 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2238 		msec_delay(5);
2239 	}
2240 
2241 	if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
2242 
2243 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2244 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2245 		reg_rctl |= E1000_RCTL_MPE;
2246 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2247 	} else
2248 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2249 
2250 	if (adapter->hw.mac.type == e1000_82542 &&
2251 	    adapter->hw.revision_id == E1000_REVISION_2) {
2252 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2253 		reg_rctl &= ~E1000_RCTL_RST;
2254 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2255 		msec_delay(5);
2256 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2257 			e1000_pci_set_mwi(&adapter->hw);
2258 	}
2259 }
2260 
2261 
2262 /*********************************************************************
2263  *  Timer routine
2264  *
2265  *  This routine checks for link status and updates statistics.
2266  *
2267  **********************************************************************/
2268 
2269 static void
2270 em_local_timer(void *arg)
2271 {
2272 	struct adapter	*adapter = arg;
2273 	if_t ifp = adapter->ifp;
2274 	struct tx_ring	*txr = adapter->tx_rings;
2275 	struct rx_ring	*rxr = adapter->rx_rings;
2276 	u32		trigger = 0;
2277 
2278 	EM_CORE_LOCK_ASSERT(adapter);
2279 
2280 	em_update_link_status(adapter);
2281 	em_update_stats_counters(adapter);
2282 
2283 	/* Reset LAA into RAR[0] on 82571 */
2284 	if ((adapter->hw.mac.type == e1000_82571) &&
2285 	    e1000_get_laa_state_82571(&adapter->hw))
2286 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2287 
2288 	/* Mask to use in the irq trigger */
2289 	if (adapter->msix_mem) {
2290 		for (int i = 0; i < adapter->num_queues; i++, rxr++)
2291 			trigger |= rxr->ims;
2292 		rxr = adapter->rx_rings;
2293 	} else
2294 		trigger = E1000_ICS_RXDMT0;
2295 
2296 	/*
2297 	** Check on the state of the TX queue(s), this
2298 	** can be done without the lock because its RO
2299 	** and the HUNG state will be static if set.
2300 	*/
2301 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2302 		if (txr->busy == EM_TX_HUNG)
2303 			goto hung;
2304 		if (txr->busy >= EM_TX_MAXTRIES)
2305 			txr->busy = EM_TX_HUNG;
2306 		/* Schedule a TX tasklet if needed */
2307 		if (txr->tx_avail <= EM_MAX_SCATTER)
2308 			taskqueue_enqueue(txr->tq, &txr->tx_task);
2309 	}
2310 
2311 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2312 #ifndef DEVICE_POLLING
2313 	/* Trigger an RX interrupt to guarantee mbuf refresh */
2314 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2315 #endif
2316 	return;
2317 hung:
2318 	/* Looks like we're hung */
2319 	device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2320 			txr->me);
2321 	em_print_debug_info(adapter);
2322 	if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
2323 	adapter->watchdog_events++;
2324 	em_init_locked(adapter);
2325 }
2326 
2327 
2328 static void
2329 em_update_link_status(struct adapter *adapter)
2330 {
2331 	struct e1000_hw *hw = &adapter->hw;
2332 	if_t ifp = adapter->ifp;
2333 	device_t dev = adapter->dev;
2334 	struct tx_ring *txr = adapter->tx_rings;
2335 	u32 link_check = 0;
2336 
2337 	/* Get the cached link value or read phy for real */
2338 	switch (hw->phy.media_type) {
2339 	case e1000_media_type_copper:
2340 		if (hw->mac.get_link_status) {
2341 			/* Do the work to read phy */
2342 			e1000_check_for_link(hw);
2343 			link_check = !hw->mac.get_link_status;
2344 			if (link_check) /* ESB2 fix */
2345 				e1000_cfg_on_link_up(hw);
2346 		} else
2347 			link_check = TRUE;
2348 		break;
2349 	case e1000_media_type_fiber:
2350 		e1000_check_for_link(hw);
2351 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2352                                  E1000_STATUS_LU);
2353 		break;
2354 	case e1000_media_type_internal_serdes:
2355 		e1000_check_for_link(hw);
2356 		link_check = adapter->hw.mac.serdes_has_link;
2357 		break;
2358 	default:
2359 	case e1000_media_type_unknown:
2360 		break;
2361 	}
2362 
2363 	/* Now check for a transition */
2364 	if (link_check && (adapter->link_active == 0)) {
2365 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2366 		    &adapter->link_duplex);
2367 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2368 		if ((adapter->link_speed != SPEED_1000) &&
2369 		    ((hw->mac.type == e1000_82571) ||
2370 		    (hw->mac.type == e1000_82572))) {
2371 			int tarc0;
2372 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2373 			tarc0 &= ~TARC_SPEED_MODE_BIT;
2374 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2375 		}
2376 		if (bootverbose)
2377 			device_printf(dev, "Link is up %d Mbps %s\n",
2378 			    adapter->link_speed,
2379 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2380 			    "Full Duplex" : "Half Duplex"));
2381 		adapter->link_active = 1;
2382 		adapter->smartspeed = 0;
2383 		if_setbaudrate(ifp, adapter->link_speed * 1000000);
2384 		if_link_state_change(ifp, LINK_STATE_UP);
2385 	} else if (!link_check && (adapter->link_active == 1)) {
2386 		if_setbaudrate(ifp, 0);
2387 		adapter->link_speed = 0;
2388 		adapter->link_duplex = 0;
2389 		if (bootverbose)
2390 			device_printf(dev, "Link is Down\n");
2391 		adapter->link_active = 0;
2392 		/* Link down, disable hang detection */
2393 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2394 			txr->busy = EM_TX_IDLE;
2395 		if_link_state_change(ifp, LINK_STATE_DOWN);
2396 	}
2397 }
2398 
2399 /*********************************************************************
2400  *
2401  *  This routine disables all traffic on the adapter by issuing a
2402  *  global reset on the MAC and deallocates TX/RX buffers.
2403  *
2404  *  This routine should always be called with BOTH the CORE
2405  *  and TX locks.
2406  **********************************************************************/
2407 
2408 static void
2409 em_stop(void *arg)
2410 {
2411 	struct adapter	*adapter = arg;
2412 	if_t ifp = adapter->ifp;
2413 	struct tx_ring	*txr = adapter->tx_rings;
2414 
2415 	EM_CORE_LOCK_ASSERT(adapter);
2416 
2417 	INIT_DEBUGOUT("em_stop: begin");
2418 
2419 	em_disable_intr(adapter);
2420 	callout_stop(&adapter->timer);
2421 
2422 	/* Tell the stack that the interface is no longer active */
2423 	if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2424 
2425         /* Disarm Hang Detection. */
2426 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2427 		EM_TX_LOCK(txr);
2428 		txr->busy = EM_TX_IDLE;
2429 		EM_TX_UNLOCK(txr);
2430 	}
2431 
2432 	e1000_reset_hw(&adapter->hw);
2433 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2434 
2435 	e1000_led_off(&adapter->hw);
2436 	e1000_cleanup_led(&adapter->hw);
2437 }
2438 
2439 
2440 /*********************************************************************
2441  *
2442  *  Determine hardware revision.
2443  *
2444  **********************************************************************/
2445 static void
2446 em_identify_hardware(struct adapter *adapter)
2447 {
2448 	device_t dev = adapter->dev;
2449 
2450 	/* Make sure our PCI config space has the necessary stuff set */
2451 	pci_enable_busmaster(dev);
2452 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2453 
2454 	/* Save off the information about this board */
2455 	adapter->hw.vendor_id = pci_get_vendor(dev);
2456 	adapter->hw.device_id = pci_get_device(dev);
2457 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2458 	adapter->hw.subsystem_vendor_id =
2459 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2460 	adapter->hw.subsystem_device_id =
2461 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2462 
2463 	/* Do Shared Code Init and Setup */
2464 	if (e1000_set_mac_type(&adapter->hw)) {
2465 		device_printf(dev, "Setup init failure\n");
2466 		return;
2467 	}
2468 }
2469 
2470 static int
2471 em_allocate_pci_resources(struct adapter *adapter)
2472 {
2473 	device_t	dev = adapter->dev;
2474 	int		rid;
2475 
2476 	rid = PCIR_BAR(0);
2477 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2478 	    &rid, RF_ACTIVE);
2479 	if (adapter->memory == NULL) {
2480 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2481 		return (ENXIO);
2482 	}
2483 	adapter->osdep.mem_bus_space_tag =
2484 	    rman_get_bustag(adapter->memory);
2485 	adapter->osdep.mem_bus_space_handle =
2486 	    rman_get_bushandle(adapter->memory);
2487 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2488 
2489 	adapter->hw.back = &adapter->osdep;
2490 
2491 	return (0);
2492 }
2493 
2494 /*********************************************************************
2495  *
2496  *  Setup the Legacy or MSI Interrupt handler
2497  *
2498  **********************************************************************/
2499 int
2500 em_allocate_legacy(struct adapter *adapter)
2501 {
2502 	device_t dev = adapter->dev;
2503 	struct tx_ring	*txr = adapter->tx_rings;
2504 	int error, rid = 0;
2505 
2506 	/* Manually turn off all interrupts */
2507 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2508 
2509 	if (adapter->msix == 1) /* using MSI */
2510 		rid = 1;
2511 	/* We allocate a single interrupt resource */
2512 	adapter->res = bus_alloc_resource_any(dev,
2513 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2514 	if (adapter->res == NULL) {
2515 		device_printf(dev, "Unable to allocate bus resource: "
2516 		    "interrupt\n");
2517 		return (ENXIO);
2518 	}
2519 
2520 	/*
2521 	 * Allocate a fast interrupt and the associated
2522 	 * deferred processing contexts.
2523 	 */
2524 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2525 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2526 	    taskqueue_thread_enqueue, &adapter->tq);
2527 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2528 	    device_get_nameunit(adapter->dev));
2529 	/* Use a TX only tasklet for local timer */
2530 	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2531 	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2532 	    taskqueue_thread_enqueue, &txr->tq);
2533 	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2534 	    device_get_nameunit(adapter->dev));
2535 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2536 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2537 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2538 		device_printf(dev, "Failed to register fast interrupt "
2539 			    "handler: %d\n", error);
2540 		taskqueue_free(adapter->tq);
2541 		adapter->tq = NULL;
2542 		return (error);
2543 	}
2544 
2545 	return (0);
2546 }
2547 
2548 /*********************************************************************
2549  *
2550  *  Setup the MSIX Interrupt handlers
2551  *   This is not really Multiqueue, rather
2552  *   its just seperate interrupt vectors
2553  *   for TX, RX, and Link.
2554  *
2555  **********************************************************************/
2556 int
2557 em_allocate_msix(struct adapter *adapter)
2558 {
2559 	device_t	dev = adapter->dev;
2560 	struct		tx_ring *txr = adapter->tx_rings;
2561 	struct		rx_ring *rxr = adapter->rx_rings;
2562 	int		error, rid, vector = 0;
2563 	int		cpu_id = 0;
2564 
2565 
2566 	/* Make sure all interrupts are disabled */
2567 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2568 
2569 	/* First set up ring resources */
2570 	for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2571 
2572 		/* RX ring */
2573 		rid = vector + 1;
2574 
2575 		rxr->res = bus_alloc_resource_any(dev,
2576 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2577 		if (rxr->res == NULL) {
2578 			device_printf(dev,
2579 			    "Unable to allocate bus resource: "
2580 			    "RX MSIX Interrupt %d\n", i);
2581 			return (ENXIO);
2582 		}
2583 		if ((error = bus_setup_intr(dev, rxr->res,
2584 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2585 		    rxr, &rxr->tag)) != 0) {
2586 			device_printf(dev, "Failed to register RX handler");
2587 			return (error);
2588 		}
2589 #if __FreeBSD_version >= 800504
2590 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2591 #endif
2592 		rxr->msix = vector;
2593 
2594 		if (em_last_bind_cpu < 0)
2595 			em_last_bind_cpu = CPU_FIRST();
2596 		cpu_id = em_last_bind_cpu;
2597 		bus_bind_intr(dev, rxr->res, cpu_id);
2598 
2599 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2600 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2601 		    taskqueue_thread_enqueue, &rxr->tq);
2602 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2603 		    device_get_nameunit(adapter->dev), cpu_id);
2604 		/*
2605 		** Set the bit to enable interrupt
2606 		** in E1000_IMS -- bits 20 and 21
2607 		** are for RX0 and RX1, note this has
2608 		** NOTHING to do with the MSIX vector
2609 		*/
2610 		rxr->ims = 1 << (20 + i);
2611 		adapter->ims |= rxr->ims;
2612 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2613 
2614 		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2615 	}
2616 
2617 	for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2618 		/* TX ring */
2619 		rid = vector + 1;
2620 		txr->res = bus_alloc_resource_any(dev,
2621 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2622 		if (txr->res == NULL) {
2623 			device_printf(dev,
2624 			    "Unable to allocate bus resource: "
2625 			    "TX MSIX Interrupt %d\n", i);
2626 			return (ENXIO);
2627 		}
2628 		if ((error = bus_setup_intr(dev, txr->res,
2629 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2630 		    txr, &txr->tag)) != 0) {
2631 			device_printf(dev, "Failed to register TX handler");
2632 			return (error);
2633 		}
2634 #if __FreeBSD_version >= 800504
2635 		bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2636 #endif
2637 		txr->msix = vector;
2638 
2639                 if (em_last_bind_cpu < 0)
2640                         em_last_bind_cpu = CPU_FIRST();
2641                 cpu_id = em_last_bind_cpu;
2642                 bus_bind_intr(dev, txr->res, cpu_id);
2643 
2644 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2645 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2646 		    taskqueue_thread_enqueue, &txr->tq);
2647 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2648 		    device_get_nameunit(adapter->dev), cpu_id);
2649 		/*
2650 		** Set the bit to enable interrupt
2651 		** in E1000_IMS -- bits 22 and 23
2652 		** are for TX0 and TX1, note this has
2653 		** NOTHING to do with the MSIX vector
2654 		*/
2655 		txr->ims = 1 << (22 + i);
2656 		adapter->ims |= txr->ims;
2657 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2658 
2659 		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2660 	}
2661 
2662 	/* Link interrupt */
2663 	rid = vector + 1;
2664 	adapter->res = bus_alloc_resource_any(dev,
2665 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2666 	if (!adapter->res) {
2667 		device_printf(dev,"Unable to allocate "
2668 		    "bus resource: Link interrupt [%d]\n", rid);
2669 		return (ENXIO);
2670         }
2671 	/* Set the link handler function */
2672 	error = bus_setup_intr(dev, adapter->res,
2673 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2674 	    em_msix_link, adapter, &adapter->tag);
2675 	if (error) {
2676 		adapter->res = NULL;
2677 		device_printf(dev, "Failed to register LINK handler");
2678 		return (error);
2679 	}
2680 #if __FreeBSD_version >= 800504
2681 	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2682 #endif
2683 	adapter->linkvec = vector;
2684 	adapter->ivars |=  (8 | vector) << 16;
2685 	adapter->ivars |= 0x80000000;
2686 
2687 	return (0);
2688 }
2689 
2690 
2691 static void
2692 em_free_pci_resources(struct adapter *adapter)
2693 {
2694 	device_t	dev = adapter->dev;
2695 	struct tx_ring	*txr;
2696 	struct rx_ring	*rxr;
2697 	int		rid;
2698 
2699 
2700 	/*
2701 	** Release all the queue interrupt resources:
2702 	*/
2703 	for (int i = 0; i < adapter->num_queues; i++) {
2704 		txr = &adapter->tx_rings[i];
2705 		/* an early abort? */
2706 		if (txr == NULL)
2707 			break;
2708 		rid = txr->msix +1;
2709 		if (txr->tag != NULL) {
2710 			bus_teardown_intr(dev, txr->res, txr->tag);
2711 			txr->tag = NULL;
2712 		}
2713 		if (txr->res != NULL)
2714 			bus_release_resource(dev, SYS_RES_IRQ,
2715 			    rid, txr->res);
2716 
2717 		rxr = &adapter->rx_rings[i];
2718 		/* an early abort? */
2719 		if (rxr == NULL)
2720 			break;
2721 		rid = rxr->msix +1;
2722 		if (rxr->tag != NULL) {
2723 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2724 			rxr->tag = NULL;
2725 		}
2726 		if (rxr->res != NULL)
2727 			bus_release_resource(dev, SYS_RES_IRQ,
2728 			    rid, rxr->res);
2729 	}
2730 
2731         if (adapter->linkvec) /* we are doing MSIX */
2732                 rid = adapter->linkvec + 1;
2733         else
2734                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2735 
2736 	if (adapter->tag != NULL) {
2737 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2738 		adapter->tag = NULL;
2739 	}
2740 
2741 	if (adapter->res != NULL)
2742 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2743 
2744 
2745 	if (adapter->msix)
2746 		pci_release_msi(dev);
2747 
2748 	if (adapter->msix_mem != NULL)
2749 		bus_release_resource(dev, SYS_RES_MEMORY,
2750 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2751 
2752 	if (adapter->memory != NULL)
2753 		bus_release_resource(dev, SYS_RES_MEMORY,
2754 		    PCIR_BAR(0), adapter->memory);
2755 
2756 	if (adapter->flash != NULL)
2757 		bus_release_resource(dev, SYS_RES_MEMORY,
2758 		    EM_FLASH, adapter->flash);
2759 }
2760 
2761 /*
2762  * Setup MSI or MSI/X
2763  */
2764 static int
2765 em_setup_msix(struct adapter *adapter)
2766 {
2767 	device_t dev = adapter->dev;
2768 	int val;
2769 
2770 	/* Nearly always going to use one queue */
2771 	adapter->num_queues = 1;
2772 
2773 	/*
2774 	** Try using MSI-X for Hartwell adapters
2775 	*/
2776 	if ((adapter->hw.mac.type == e1000_82574) &&
2777 	    (em_enable_msix == TRUE)) {
2778 #ifdef EM_MULTIQUEUE
2779 		adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2780 		if (adapter->num_queues > 1)
2781 			em_enable_vectors_82574(adapter);
2782 #endif
2783 		/* Map the MSIX BAR */
2784 		int rid = PCIR_BAR(EM_MSIX_BAR);
2785 		adapter->msix_mem = bus_alloc_resource_any(dev,
2786 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2787        		if (adapter->msix_mem == NULL) {
2788 			/* May not be enabled */
2789                		device_printf(adapter->dev,
2790 			    "Unable to map MSIX table \n");
2791 			goto msi;
2792        		}
2793 		val = pci_msix_count(dev);
2794 
2795 #ifdef EM_MULTIQUEUE
2796 		/* We need 5 vectors in the multiqueue case */
2797 		if (adapter->num_queues > 1 ) {
2798 			if (val >= 5)
2799 				val = 5;
2800 			else {
2801 				adapter->num_queues = 1;
2802 				device_printf(adapter->dev,
2803 				    "Insufficient MSIX vectors for >1 queue, "
2804 				    "using single queue...\n");
2805 				goto msix_one;
2806 			}
2807 		} else {
2808 msix_one:
2809 #endif
2810 			if (val >= 3)
2811 				val = 3;
2812 			else {
2813 				device_printf(adapter->dev,
2814 			    	"Insufficient MSIX vectors, using MSI\n");
2815 				goto msi;
2816 			}
2817 #ifdef EM_MULTIQUEUE
2818 		}
2819 #endif
2820 
2821 		if ((pci_alloc_msix(dev, &val) == 0)) {
2822 			device_printf(adapter->dev,
2823 			    "Using MSIX interrupts "
2824 			    "with %d vectors\n", val);
2825 			return (val);
2826 		}
2827 
2828 		/*
2829 		** If MSIX alloc failed or provided us with
2830 		** less than needed, free and fall through to MSI
2831 		*/
2832 		pci_release_msi(dev);
2833 	}
2834 msi:
2835 	if (adapter->msix_mem != NULL) {
2836 		bus_release_resource(dev, SYS_RES_MEMORY,
2837 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2838 		adapter->msix_mem = NULL;
2839 	}
2840        	val = 1;
2841        	if (pci_alloc_msi(dev, &val) == 0) {
2842                	device_printf(adapter->dev, "Using an MSI interrupt\n");
2843 		return (val);
2844 	}
2845 	/* Should only happen due to manual configuration */
2846 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2847 	return (0);
2848 }
2849 
2850 
2851 /*********************************************************************
2852  *
2853  *  Initialize the hardware to a configuration
2854  *  as specified by the adapter structure.
2855  *
2856  **********************************************************************/
2857 static void
2858 em_reset(struct adapter *adapter)
2859 {
2860 	device_t	dev = adapter->dev;
2861 	if_t ifp = adapter->ifp;
2862 	struct e1000_hw	*hw = &adapter->hw;
2863 	u16		rx_buffer_size;
2864 	u32		pba;
2865 
2866 	INIT_DEBUGOUT("em_reset: begin");
2867 
2868 	/* Set up smart power down as default off on newer adapters. */
2869 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2870 	    hw->mac.type == e1000_82572)) {
2871 		u16 phy_tmp = 0;
2872 
2873 		/* Speed up time to link by disabling smart power down. */
2874 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2875 		phy_tmp &= ~IGP02E1000_PM_SPD;
2876 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2877 	}
2878 
2879 	/*
2880 	 * Packet Buffer Allocation (PBA)
2881 	 * Writing PBA sets the receive portion of the buffer
2882 	 * the remainder is used for the transmit buffer.
2883 	 */
2884 	switch (hw->mac.type) {
2885 	/* Total Packet Buffer on these is 48K */
2886 	case e1000_82571:
2887 	case e1000_82572:
2888 	case e1000_80003es2lan:
2889 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2890 		break;
2891 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2892 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2893 		break;
2894 	case e1000_82574:
2895 	case e1000_82583:
2896 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2897 		break;
2898 	case e1000_ich8lan:
2899 		pba = E1000_PBA_8K;
2900 		break;
2901 	case e1000_ich9lan:
2902 	case e1000_ich10lan:
2903 		/* Boost Receive side for jumbo frames */
2904 		if (adapter->hw.mac.max_frame_size > 4096)
2905 			pba = E1000_PBA_14K;
2906 		else
2907 			pba = E1000_PBA_10K;
2908 		break;
2909 	case e1000_pchlan:
2910 	case e1000_pch2lan:
2911 	case e1000_pch_lpt:
2912 		pba = E1000_PBA_26K;
2913 		break;
2914 	default:
2915 		if (adapter->hw.mac.max_frame_size > 8192)
2916 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2917 		else
2918 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2919 	}
2920 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2921 
2922 	/*
2923 	 * These parameters control the automatic generation (Tx) and
2924 	 * response (Rx) to Ethernet PAUSE frames.
2925 	 * - High water mark should allow for at least two frames to be
2926 	 *   received after sending an XOFF.
2927 	 * - Low water mark works best when it is very near the high water mark.
2928 	 *   This allows the receiver to restart by sending XON when it has
2929 	 *   drained a bit. Here we use an arbitary value of 1500 which will
2930 	 *   restart after one full frame is pulled from the buffer. There
2931 	 *   could be several smaller frames in the buffer and if so they will
2932 	 *   not trigger the XON until their total number reduces the buffer
2933 	 *   by 1500.
2934 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2935 	 */
2936 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2937 	hw->fc.high_water = rx_buffer_size -
2938 	    roundup2(adapter->hw.mac.max_frame_size, 1024);
2939 	hw->fc.low_water = hw->fc.high_water - 1500;
2940 
2941 	if (adapter->fc) /* locally set flow control value? */
2942 		hw->fc.requested_mode = adapter->fc;
2943 	else
2944 		hw->fc.requested_mode = e1000_fc_full;
2945 
2946 	if (hw->mac.type == e1000_80003es2lan)
2947 		hw->fc.pause_time = 0xFFFF;
2948 	else
2949 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2950 
2951 	hw->fc.send_xon = TRUE;
2952 
2953 	/* Device specific overrides/settings */
2954 	switch (hw->mac.type) {
2955 	case e1000_pchlan:
2956 		/* Workaround: no TX flow ctrl for PCH */
2957                 hw->fc.requested_mode = e1000_fc_rx_pause;
2958 		hw->fc.pause_time = 0xFFFF; /* override */
2959 		if (if_getmtu(ifp) > ETHERMTU) {
2960 			hw->fc.high_water = 0x3500;
2961 			hw->fc.low_water = 0x1500;
2962 		} else {
2963 			hw->fc.high_water = 0x5000;
2964 			hw->fc.low_water = 0x3000;
2965 		}
2966 		hw->fc.refresh_time = 0x1000;
2967 		break;
2968 	case e1000_pch2lan:
2969 	case e1000_pch_lpt:
2970 		hw->fc.high_water = 0x5C20;
2971 		hw->fc.low_water = 0x5048;
2972 		hw->fc.pause_time = 0x0650;
2973 		hw->fc.refresh_time = 0x0400;
2974 		/* Jumbos need adjusted PBA */
2975 		if (if_getmtu(ifp) > ETHERMTU)
2976 			E1000_WRITE_REG(hw, E1000_PBA, 12);
2977 		else
2978 			E1000_WRITE_REG(hw, E1000_PBA, 26);
2979 		break;
2980         case e1000_ich9lan:
2981         case e1000_ich10lan:
2982 		if (if_getmtu(ifp) > ETHERMTU) {
2983 			hw->fc.high_water = 0x2800;
2984 			hw->fc.low_water = hw->fc.high_water - 8;
2985 			break;
2986 		}
2987 		/* else fall thru */
2988 	default:
2989 		if (hw->mac.type == e1000_80003es2lan)
2990 			hw->fc.pause_time = 0xFFFF;
2991 		break;
2992 	}
2993 
2994 	/* Issue a global reset */
2995 	e1000_reset_hw(hw);
2996 	E1000_WRITE_REG(hw, E1000_WUC, 0);
2997 	em_disable_aspm(adapter);
2998 	/* and a re-init */
2999 	if (e1000_init_hw(hw) < 0) {
3000 		device_printf(dev, "Hardware Initialization Failed\n");
3001 		return;
3002 	}
3003 
3004 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3005 	e1000_get_phy_info(hw);
3006 	e1000_check_for_link(hw);
3007 	return;
3008 }
3009 
3010 /*********************************************************************
3011  *
3012  *  Setup networking device structure and register an interface.
3013  *
3014  **********************************************************************/
3015 static int
3016 em_setup_interface(device_t dev, struct adapter *adapter)
3017 {
3018 	if_t ifp;
3019 
3020 	INIT_DEBUGOUT("em_setup_interface: begin");
3021 
3022 	ifp = adapter->ifp = if_gethandle(IFT_ETHER);
3023 	if (ifp == 0) {
3024 		device_printf(dev, "can not allocate ifnet structure\n");
3025 		return (-1);
3026 	}
3027 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3028 	if_setdev(ifp, dev);
3029 	if_setinitfn(ifp, em_init);
3030 	if_setsoftc(ifp, adapter);
3031 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
3032 	if_setioctlfn(ifp, em_ioctl);
3033 	if_setgetcounterfn(ifp, em_get_counter);
3034 #ifdef EM_MULTIQUEUE
3035 	/* Multiqueue stack interface */
3036 	if_settransmitfn(ifp, em_mq_start);
3037 	if_setqflushfn(ifp, em_qflush);
3038 #else
3039 	if_setstartfn(ifp, em_start);
3040 	if_setsendqlen(ifp, adapter->num_tx_desc - 1);
3041 	if_setsendqready(ifp);
3042 #endif
3043 
3044 	ether_ifattach(ifp, adapter->hw.mac.addr);
3045 
3046 	if_setcapabilities(ifp, 0);
3047 	if_setcapenable(ifp, 0);
3048 
3049 
3050 	if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM |
3051 	    IFCAP_TSO4, 0);
3052 	/*
3053 	 * Tell the upper layer(s) we
3054 	 * support full VLAN capability
3055 	 */
3056 	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
3057 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
3058 	    IFCAP_VLAN_MTU, 0);
3059 	if_setcapenable(ifp, if_getcapabilities(ifp));
3060 
3061 	/*
3062 	** Don't turn this on by default, if vlans are
3063 	** created on another pseudo device (eg. lagg)
3064 	** then vlan events are not passed thru, breaking
3065 	** operation, but with HW FILTER off it works. If
3066 	** using vlans directly on the em driver you can
3067 	** enable this and get full hardware tag filtering.
3068 	*/
3069 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
3070 
3071 #ifdef DEVICE_POLLING
3072 	if_setcapabilitiesbit(ifp, IFCAP_POLLING,0);
3073 #endif
3074 
3075 	/* Enable only WOL MAGIC by default */
3076 	if (adapter->wol) {
3077 		if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
3078 		if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
3079 	}
3080 
3081 	/*
3082 	 * Specify the media types supported by this adapter and register
3083 	 * callbacks to update media and link information
3084 	 */
3085 	ifmedia_init(&adapter->media, IFM_IMASK,
3086 	    em_media_change, em_media_status);
3087 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3088 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3089 		u_char fiber_type = IFM_1000_SX;	/* default type */
3090 
3091 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3092 			    0, NULL);
3093 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3094 	} else {
3095 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3096 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3097 			    0, NULL);
3098 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3099 			    0, NULL);
3100 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3101 			    0, NULL);
3102 		if (adapter->hw.phy.type != e1000_phy_ife) {
3103 			ifmedia_add(&adapter->media,
3104 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3105 			ifmedia_add(&adapter->media,
3106 				IFM_ETHER | IFM_1000_T, 0, NULL);
3107 		}
3108 	}
3109 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3110 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3111 	return (0);
3112 }
3113 
3114 
3115 /*
3116  * Manage DMA'able memory.
3117  */
3118 static void
3119 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3120 {
3121 	if (error)
3122 		return;
3123 	*(bus_addr_t *) arg = segs[0].ds_addr;
3124 }
3125 
3126 static int
3127 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3128         struct em_dma_alloc *dma, int mapflags)
3129 {
3130 	int error;
3131 
3132 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3133 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3134 				BUS_SPACE_MAXADDR,	/* lowaddr */
3135 				BUS_SPACE_MAXADDR,	/* highaddr */
3136 				NULL, NULL,		/* filter, filterarg */
3137 				size,			/* maxsize */
3138 				1,			/* nsegments */
3139 				size,			/* maxsegsize */
3140 				0,			/* flags */
3141 				NULL,			/* lockfunc */
3142 				NULL,			/* lockarg */
3143 				&dma->dma_tag);
3144 	if (error) {
3145 		device_printf(adapter->dev,
3146 		    "%s: bus_dma_tag_create failed: %d\n",
3147 		    __func__, error);
3148 		goto fail_0;
3149 	}
3150 
3151 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3152 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3153 	if (error) {
3154 		device_printf(adapter->dev,
3155 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3156 		    __func__, (uintmax_t)size, error);
3157 		goto fail_2;
3158 	}
3159 
3160 	dma->dma_paddr = 0;
3161 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3162 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3163 	if (error || dma->dma_paddr == 0) {
3164 		device_printf(adapter->dev,
3165 		    "%s: bus_dmamap_load failed: %d\n",
3166 		    __func__, error);
3167 		goto fail_3;
3168 	}
3169 
3170 	return (0);
3171 
3172 fail_3:
3173 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3174 fail_2:
3175 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3176 	bus_dma_tag_destroy(dma->dma_tag);
3177 fail_0:
3178 	dma->dma_tag = NULL;
3179 
3180 	return (error);
3181 }
3182 
3183 static void
3184 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3185 {
3186 	if (dma->dma_tag == NULL)
3187 		return;
3188 	if (dma->dma_paddr != 0) {
3189 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3190 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3191 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3192 		dma->dma_paddr = 0;
3193 	}
3194 	if (dma->dma_vaddr != NULL) {
3195 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3196 		dma->dma_vaddr = NULL;
3197 	}
3198 	bus_dma_tag_destroy(dma->dma_tag);
3199 	dma->dma_tag = NULL;
3200 }
3201 
3202 
3203 /*********************************************************************
3204  *
3205  *  Allocate memory for the transmit and receive rings, and then
3206  *  the descriptors associated with each, called only once at attach.
3207  *
3208  **********************************************************************/
3209 static int
3210 em_allocate_queues(struct adapter *adapter)
3211 {
3212 	device_t		dev = adapter->dev;
3213 	struct tx_ring		*txr = NULL;
3214 	struct rx_ring		*rxr = NULL;
3215 	int rsize, tsize, error = E1000_SUCCESS;
3216 	int txconf = 0, rxconf = 0;
3217 
3218 
3219 	/* Allocate the TX ring struct memory */
3220 	if (!(adapter->tx_rings =
3221 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3222 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3223 		device_printf(dev, "Unable to allocate TX ring memory\n");
3224 		error = ENOMEM;
3225 		goto fail;
3226 	}
3227 
3228 	/* Now allocate the RX */
3229 	if (!(adapter->rx_rings =
3230 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3231 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3232 		device_printf(dev, "Unable to allocate RX ring memory\n");
3233 		error = ENOMEM;
3234 		goto rx_fail;
3235 	}
3236 
3237 	tsize = roundup2(adapter->num_tx_desc *
3238 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3239 	/*
3240 	 * Now set up the TX queues, txconf is needed to handle the
3241 	 * possibility that things fail midcourse and we need to
3242 	 * undo memory gracefully
3243 	 */
3244 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3245 		/* Set up some basics */
3246 		txr = &adapter->tx_rings[i];
3247 		txr->adapter = adapter;
3248 		txr->me = i;
3249 
3250 		/* Initialize the TX lock */
3251 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3252 		    device_get_nameunit(dev), txr->me);
3253 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3254 
3255 		if (em_dma_malloc(adapter, tsize,
3256 			&txr->txdma, BUS_DMA_NOWAIT)) {
3257 			device_printf(dev,
3258 			    "Unable to allocate TX Descriptor memory\n");
3259 			error = ENOMEM;
3260 			goto err_tx_desc;
3261 		}
3262 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3263 		bzero((void *)txr->tx_base, tsize);
3264 
3265         	if (em_allocate_transmit_buffers(txr)) {
3266 			device_printf(dev,
3267 			    "Critical Failure setting up transmit buffers\n");
3268 			error = ENOMEM;
3269 			goto err_tx_desc;
3270         	}
3271 #if __FreeBSD_version >= 800000
3272 		/* Allocate a buf ring */
3273 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3274 		    M_WAITOK, &txr->tx_mtx);
3275 #endif
3276 	}
3277 
3278 	/*
3279 	 * Next the RX queues...
3280 	 */
3281 	rsize = roundup2(adapter->num_rx_desc *
3282 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3283 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3284 		rxr = &adapter->rx_rings[i];
3285 		rxr->adapter = adapter;
3286 		rxr->me = i;
3287 
3288 		/* Initialize the RX lock */
3289 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3290 		    device_get_nameunit(dev), txr->me);
3291 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3292 
3293 		if (em_dma_malloc(adapter, rsize,
3294 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3295 			device_printf(dev,
3296 			    "Unable to allocate RxDescriptor memory\n");
3297 			error = ENOMEM;
3298 			goto err_rx_desc;
3299 		}
3300 		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3301 		bzero((void *)rxr->rx_base, rsize);
3302 
3303         	/* Allocate receive buffers for the ring*/
3304 		if (em_allocate_receive_buffers(rxr)) {
3305 			device_printf(dev,
3306 			    "Critical Failure setting up receive buffers\n");
3307 			error = ENOMEM;
3308 			goto err_rx_desc;
3309 		}
3310 	}
3311 
3312 	return (0);
3313 
3314 err_rx_desc:
3315 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3316 		em_dma_free(adapter, &rxr->rxdma);
3317 err_tx_desc:
3318 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3319 		em_dma_free(adapter, &txr->txdma);
3320 	free(adapter->rx_rings, M_DEVBUF);
3321 rx_fail:
3322 #if __FreeBSD_version >= 800000
3323 	buf_ring_free(txr->br, M_DEVBUF);
3324 #endif
3325 	free(adapter->tx_rings, M_DEVBUF);
3326 fail:
3327 	return (error);
3328 }
3329 
3330 
3331 /*********************************************************************
3332  *
3333  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3334  *  the information needed to transmit a packet on the wire. This is
3335  *  called only once at attach, setup is done every reset.
3336  *
3337  **********************************************************************/
3338 static int
3339 em_allocate_transmit_buffers(struct tx_ring *txr)
3340 {
3341 	struct adapter *adapter = txr->adapter;
3342 	device_t dev = adapter->dev;
3343 	struct em_buffer *txbuf;
3344 	int error, i;
3345 
3346 	/*
3347 	 * Setup DMA descriptor areas.
3348 	 */
3349 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3350 			       1, 0,			/* alignment, bounds */
3351 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3352 			       BUS_SPACE_MAXADDR,	/* highaddr */
3353 			       NULL, NULL,		/* filter, filterarg */
3354 			       EM_TSO_SIZE,		/* maxsize */
3355 			       EM_MAX_SCATTER,		/* nsegments */
3356 			       PAGE_SIZE,		/* maxsegsize */
3357 			       0,			/* flags */
3358 			       NULL,			/* lockfunc */
3359 			       NULL,			/* lockfuncarg */
3360 			       &txr->txtag))) {
3361 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3362 		goto fail;
3363 	}
3364 
3365 	if (!(txr->tx_buffers =
3366 	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3367 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3368 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3369 		error = ENOMEM;
3370 		goto fail;
3371 	}
3372 
3373         /* Create the descriptor buffer dma maps */
3374 	txbuf = txr->tx_buffers;
3375 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3376 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3377 		if (error != 0) {
3378 			device_printf(dev, "Unable to create TX DMA map\n");
3379 			goto fail;
3380 		}
3381 	}
3382 
3383 	return 0;
3384 fail:
3385 	/* We free all, it handles case where we are in the middle */
3386 	em_free_transmit_structures(adapter);
3387 	return (error);
3388 }
3389 
3390 /*********************************************************************
3391  *
3392  *  Initialize a transmit ring.
3393  *
3394  **********************************************************************/
3395 static void
3396 em_setup_transmit_ring(struct tx_ring *txr)
3397 {
3398 	struct adapter *adapter = txr->adapter;
3399 	struct em_buffer *txbuf;
3400 	int i;
3401 #ifdef DEV_NETMAP
3402 	struct netmap_slot *slot;
3403 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
3404 #endif /* DEV_NETMAP */
3405 
3406 	/* Clear the old descriptor contents */
3407 	EM_TX_LOCK(txr);
3408 #ifdef DEV_NETMAP
3409 	slot = netmap_reset(na, NR_TX, txr->me, 0);
3410 #endif /* DEV_NETMAP */
3411 
3412 	bzero((void *)txr->tx_base,
3413 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3414 	/* Reset indices */
3415 	txr->next_avail_desc = 0;
3416 	txr->next_to_clean = 0;
3417 
3418 	/* Free any existing tx buffers. */
3419         txbuf = txr->tx_buffers;
3420 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3421 		if (txbuf->m_head != NULL) {
3422 			bus_dmamap_sync(txr->txtag, txbuf->map,
3423 			    BUS_DMASYNC_POSTWRITE);
3424 			bus_dmamap_unload(txr->txtag, txbuf->map);
3425 			m_freem(txbuf->m_head);
3426 			txbuf->m_head = NULL;
3427 		}
3428 #ifdef DEV_NETMAP
3429 		if (slot) {
3430 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3431 			uint64_t paddr;
3432 			void *addr;
3433 
3434 			addr = PNMB(na, slot + si, &paddr);
3435 			txr->tx_base[i].buffer_addr = htole64(paddr);
3436 			/* reload the map for netmap mode */
3437 			netmap_load_map(na, txr->txtag, txbuf->map, addr);
3438 		}
3439 #endif /* DEV_NETMAP */
3440 
3441 		/* clear the watch index */
3442 		txbuf->next_eop = -1;
3443         }
3444 
3445 	/* Set number of descriptors available */
3446 	txr->tx_avail = adapter->num_tx_desc;
3447 	txr->busy = EM_TX_IDLE;
3448 
3449 	/* Clear checksum offload context. */
3450 	txr->last_hw_offload = 0;
3451 	txr->last_hw_ipcss = 0;
3452 	txr->last_hw_ipcso = 0;
3453 	txr->last_hw_tucss = 0;
3454 	txr->last_hw_tucso = 0;
3455 
3456 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3457 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3458 	EM_TX_UNLOCK(txr);
3459 }
3460 
3461 /*********************************************************************
3462  *
3463  *  Initialize all transmit rings.
3464  *
3465  **********************************************************************/
3466 static void
3467 em_setup_transmit_structures(struct adapter *adapter)
3468 {
3469 	struct tx_ring *txr = adapter->tx_rings;
3470 
3471 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3472 		em_setup_transmit_ring(txr);
3473 
3474 	return;
3475 }
3476 
3477 /*********************************************************************
3478  *
3479  *  Enable transmit unit.
3480  *
3481  **********************************************************************/
3482 static void
3483 em_initialize_transmit_unit(struct adapter *adapter)
3484 {
3485 	struct tx_ring	*txr = adapter->tx_rings;
3486 	struct e1000_hw	*hw = &adapter->hw;
3487 	u32	tctl, txdctl = 0, tarc, tipg = 0;
3488 
3489 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3490 
3491 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3492 		u64 bus_addr = txr->txdma.dma_paddr;
3493 		/* Base and Len of TX Ring */
3494 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3495 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3496 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3497 	    	    (u32)(bus_addr >> 32));
3498 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3499 	    	    (u32)bus_addr);
3500 		/* Init the HEAD/TAIL indices */
3501 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3502 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3503 
3504 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3505 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3506 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3507 
3508 		txr->busy = EM_TX_IDLE;
3509 		txdctl = 0; /* clear txdctl */
3510                 txdctl |= 0x1f; /* PTHRESH */
3511                 txdctl |= 1 << 8; /* HTHRESH */
3512                 txdctl |= 1 << 16;/* WTHRESH */
3513 		txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3514 		txdctl |= E1000_TXDCTL_GRAN;
3515                 txdctl |= 1 << 25; /* LWTHRESH */
3516 
3517                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3518 	}
3519 
3520 	/* Set the default values for the Tx Inter Packet Gap timer */
3521 	switch (adapter->hw.mac.type) {
3522 	case e1000_80003es2lan:
3523 		tipg = DEFAULT_82543_TIPG_IPGR1;
3524 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3525 		    E1000_TIPG_IPGR2_SHIFT;
3526 		break;
3527 	default:
3528 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3529 		    (adapter->hw.phy.media_type ==
3530 		    e1000_media_type_internal_serdes))
3531 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3532 		else
3533 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3534 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3535 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3536 	}
3537 
3538 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3539 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3540 
3541 	if(adapter->hw.mac.type >= e1000_82540)
3542 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3543 		    adapter->tx_abs_int_delay.value);
3544 
3545 	if ((adapter->hw.mac.type == e1000_82571) ||
3546 	    (adapter->hw.mac.type == e1000_82572)) {
3547 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3548 		tarc |= TARC_SPEED_MODE_BIT;
3549 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3550 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3551 		/* errata: program both queues to unweighted RR */
3552 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3553 		tarc |= 1;
3554 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3555 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3556 		tarc |= 1;
3557 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3558 	} else if (adapter->hw.mac.type == e1000_82574) {
3559 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3560 		tarc |= TARC_ERRATA_BIT;
3561 		if ( adapter->num_queues > 1) {
3562 			tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3563 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3564 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3565 		} else
3566 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3567 	}
3568 
3569 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3570 	if (adapter->tx_int_delay.value > 0)
3571 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3572 
3573 	/* Program the Transmit Control Register */
3574 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3575 	tctl &= ~E1000_TCTL_CT;
3576 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3577 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3578 
3579 	if (adapter->hw.mac.type >= e1000_82571)
3580 		tctl |= E1000_TCTL_MULR;
3581 
3582 	/* This write will effectively turn on the transmit unit. */
3583 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3584 
3585 }
3586 
3587 
3588 /*********************************************************************
3589  *
3590  *  Free all transmit rings.
3591  *
3592  **********************************************************************/
3593 static void
3594 em_free_transmit_structures(struct adapter *adapter)
3595 {
3596 	struct tx_ring *txr = adapter->tx_rings;
3597 
3598 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3599 		EM_TX_LOCK(txr);
3600 		em_free_transmit_buffers(txr);
3601 		em_dma_free(adapter, &txr->txdma);
3602 		EM_TX_UNLOCK(txr);
3603 		EM_TX_LOCK_DESTROY(txr);
3604 	}
3605 
3606 	free(adapter->tx_rings, M_DEVBUF);
3607 }
3608 
3609 /*********************************************************************
3610  *
3611  *  Free transmit ring related data structures.
3612  *
3613  **********************************************************************/
3614 static void
3615 em_free_transmit_buffers(struct tx_ring *txr)
3616 {
3617 	struct adapter		*adapter = txr->adapter;
3618 	struct em_buffer	*txbuf;
3619 
3620 	INIT_DEBUGOUT("free_transmit_ring: begin");
3621 
3622 	if (txr->tx_buffers == NULL)
3623 		return;
3624 
3625 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3626 		txbuf = &txr->tx_buffers[i];
3627 		if (txbuf->m_head != NULL) {
3628 			bus_dmamap_sync(txr->txtag, txbuf->map,
3629 			    BUS_DMASYNC_POSTWRITE);
3630 			bus_dmamap_unload(txr->txtag,
3631 			    txbuf->map);
3632 			m_freem(txbuf->m_head);
3633 			txbuf->m_head = NULL;
3634 			if (txbuf->map != NULL) {
3635 				bus_dmamap_destroy(txr->txtag,
3636 				    txbuf->map);
3637 				txbuf->map = NULL;
3638 			}
3639 		} else if (txbuf->map != NULL) {
3640 			bus_dmamap_unload(txr->txtag,
3641 			    txbuf->map);
3642 			bus_dmamap_destroy(txr->txtag,
3643 			    txbuf->map);
3644 			txbuf->map = NULL;
3645 		}
3646 	}
3647 #if __FreeBSD_version >= 800000
3648 	if (txr->br != NULL)
3649 		buf_ring_free(txr->br, M_DEVBUF);
3650 #endif
3651 	if (txr->tx_buffers != NULL) {
3652 		free(txr->tx_buffers, M_DEVBUF);
3653 		txr->tx_buffers = NULL;
3654 	}
3655 	if (txr->txtag != NULL) {
3656 		bus_dma_tag_destroy(txr->txtag);
3657 		txr->txtag = NULL;
3658 	}
3659 	return;
3660 }
3661 
3662 
3663 /*********************************************************************
3664  *  The offload context is protocol specific (TCP/UDP) and thus
3665  *  only needs to be set when the protocol changes. The occasion
3666  *  of a context change can be a performance detriment, and
3667  *  might be better just disabled. The reason arises in the way
3668  *  in which the controller supports pipelined requests from the
3669  *  Tx data DMA. Up to four requests can be pipelined, and they may
3670  *  belong to the same packet or to multiple packets. However all
3671  *  requests for one packet are issued before a request is issued
3672  *  for a subsequent packet and if a request for the next packet
3673  *  requires a context change, that request will be stalled
3674  *  until the previous request completes. This means setting up
3675  *  a new context effectively disables pipelined Tx data DMA which
3676  *  in turn greatly slow down performance to send small sized
3677  *  frames.
3678  **********************************************************************/
3679 static void
3680 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3681     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3682 {
3683 	struct adapter			*adapter = txr->adapter;
3684 	struct e1000_context_desc	*TXD = NULL;
3685 	struct em_buffer		*tx_buffer;
3686 	int				cur, hdr_len;
3687 	u32				cmd = 0;
3688 	u16				offload = 0;
3689 	u8				ipcso, ipcss, tucso, tucss;
3690 
3691 	ipcss = ipcso = tucss = tucso = 0;
3692 	hdr_len = ip_off + (ip->ip_hl << 2);
3693 	cur = txr->next_avail_desc;
3694 
3695 	/* Setup of IP header checksum. */
3696 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3697 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3698 		offload |= CSUM_IP;
3699 		ipcss = ip_off;
3700 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3701 		/*
3702 		 * Start offset for header checksum calculation.
3703 		 * End offset for header checksum calculation.
3704 		 * Offset of place to put the checksum.
3705 		 */
3706 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3707 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3708 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3709 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3710 		cmd |= E1000_TXD_CMD_IP;
3711 	}
3712 
3713 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3714  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3715  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3716  		offload |= CSUM_TCP;
3717  		tucss = hdr_len;
3718  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3719  		/*
3720  		 * Setting up new checksum offload context for every frames
3721  		 * takes a lot of processing time for hardware. This also
3722  		 * reduces performance a lot for small sized frames so avoid
3723  		 * it if driver can use previously configured checksum
3724  		 * offload context.
3725  		 */
3726  		if (txr->last_hw_offload == offload) {
3727  			if (offload & CSUM_IP) {
3728  				if (txr->last_hw_ipcss == ipcss &&
3729  				    txr->last_hw_ipcso == ipcso &&
3730  				    txr->last_hw_tucss == tucss &&
3731  				    txr->last_hw_tucso == tucso)
3732  					return;
3733  			} else {
3734  				if (txr->last_hw_tucss == tucss &&
3735  				    txr->last_hw_tucso == tucso)
3736  					return;
3737  			}
3738   		}
3739  		txr->last_hw_offload = offload;
3740  		txr->last_hw_tucss = tucss;
3741  		txr->last_hw_tucso = tucso;
3742  		/*
3743  		 * Start offset for payload checksum calculation.
3744  		 * End offset for payload checksum calculation.
3745  		 * Offset of place to put the checksum.
3746  		 */
3747 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3748  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3749  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3750  		TXD->upper_setup.tcp_fields.tucso = tucso;
3751  		cmd |= E1000_TXD_CMD_TCP;
3752  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3753  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3754  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3755  		tucss = hdr_len;
3756  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3757  		/*
3758  		 * Setting up new checksum offload context for every frames
3759  		 * takes a lot of processing time for hardware. This also
3760  		 * reduces performance a lot for small sized frames so avoid
3761  		 * it if driver can use previously configured checksum
3762  		 * offload context.
3763  		 */
3764  		if (txr->last_hw_offload == offload) {
3765  			if (offload & CSUM_IP) {
3766  				if (txr->last_hw_ipcss == ipcss &&
3767  				    txr->last_hw_ipcso == ipcso &&
3768  				    txr->last_hw_tucss == tucss &&
3769  				    txr->last_hw_tucso == tucso)
3770  					return;
3771  			} else {
3772  				if (txr->last_hw_tucss == tucss &&
3773  				    txr->last_hw_tucso == tucso)
3774  					return;
3775  			}
3776  		}
3777  		txr->last_hw_offload = offload;
3778  		txr->last_hw_tucss = tucss;
3779  		txr->last_hw_tucso = tucso;
3780  		/*
3781  		 * Start offset for header checksum calculation.
3782  		 * End offset for header checksum calculation.
3783  		 * Offset of place to put the checksum.
3784  		 */
3785 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3786  		TXD->upper_setup.tcp_fields.tucss = tucss;
3787  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3788  		TXD->upper_setup.tcp_fields.tucso = tucso;
3789   	}
3790 
3791  	if (offload & CSUM_IP) {
3792  		txr->last_hw_ipcss = ipcss;
3793  		txr->last_hw_ipcso = ipcso;
3794   	}
3795 
3796 	TXD->tcp_seg_setup.data = htole32(0);
3797 	TXD->cmd_and_length =
3798 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3799 	tx_buffer = &txr->tx_buffers[cur];
3800 	tx_buffer->m_head = NULL;
3801 	tx_buffer->next_eop = -1;
3802 
3803 	if (++cur == adapter->num_tx_desc)
3804 		cur = 0;
3805 
3806 	txr->tx_avail--;
3807 	txr->next_avail_desc = cur;
3808 }
3809 
3810 
3811 /**********************************************************************
3812  *
3813  *  Setup work for hardware segmentation offload (TSO)
3814  *
3815  **********************************************************************/
3816 static void
3817 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3818     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3819 {
3820 	struct adapter			*adapter = txr->adapter;
3821 	struct e1000_context_desc	*TXD;
3822 	struct em_buffer		*tx_buffer;
3823 	int cur, hdr_len;
3824 
3825 	/*
3826 	 * In theory we can use the same TSO context if and only if
3827 	 * frame is the same type(IP/TCP) and the same MSS. However
3828 	 * checking whether a frame has the same IP/TCP structure is
3829 	 * hard thing so just ignore that and always restablish a
3830 	 * new TSO context.
3831 	 */
3832 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3833 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3834 		      E1000_TXD_DTYP_D |	/* Data descr type */
3835 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3836 
3837 	/* IP and/or TCP header checksum calculation and insertion. */
3838 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3839 
3840 	cur = txr->next_avail_desc;
3841 	tx_buffer = &txr->tx_buffers[cur];
3842 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3843 
3844 	/*
3845 	 * Start offset for header checksum calculation.
3846 	 * End offset for header checksum calculation.
3847 	 * Offset of place put the checksum.
3848 	 */
3849 	TXD->lower_setup.ip_fields.ipcss = ip_off;
3850 	TXD->lower_setup.ip_fields.ipcse =
3851 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3852 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3853 	/*
3854 	 * Start offset for payload checksum calculation.
3855 	 * End offset for payload checksum calculation.
3856 	 * Offset of place to put the checksum.
3857 	 */
3858 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3859 	TXD->upper_setup.tcp_fields.tucse = 0;
3860 	TXD->upper_setup.tcp_fields.tucso =
3861 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3862 	/*
3863 	 * Payload size per packet w/o any headers.
3864 	 * Length of all headers up to payload.
3865 	 */
3866 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3867 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3868 
3869 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3870 				E1000_TXD_CMD_DEXT |	/* Extended descr */
3871 				E1000_TXD_CMD_TSE |	/* TSE context */
3872 				E1000_TXD_CMD_IP |	/* Do IP csum */
3873 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3874 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3875 
3876 	tx_buffer->m_head = NULL;
3877 	tx_buffer->next_eop = -1;
3878 
3879 	if (++cur == adapter->num_tx_desc)
3880 		cur = 0;
3881 
3882 	txr->tx_avail--;
3883 	txr->next_avail_desc = cur;
3884 	txr->tx_tso = TRUE;
3885 }
3886 
3887 
3888 /**********************************************************************
3889  *
3890  *  Examine each tx_buffer in the used queue. If the hardware is done
3891  *  processing the packet then free associated resources. The
3892  *  tx_buffer is put back on the free queue.
3893  *
3894  **********************************************************************/
3895 static void
3896 em_txeof(struct tx_ring *txr)
3897 {
3898 	struct adapter	*adapter = txr->adapter;
3899         int first, last, done, processed;
3900         struct em_buffer *tx_buffer;
3901         struct e1000_tx_desc   *tx_desc, *eop_desc;
3902 	if_t ifp = adapter->ifp;
3903 
3904 	EM_TX_LOCK_ASSERT(txr);
3905 #ifdef DEV_NETMAP
3906 	if (netmap_tx_irq(ifp, txr->me))
3907 		return;
3908 #endif /* DEV_NETMAP */
3909 
3910 	/* No work, make sure hang detection is disabled */
3911         if (txr->tx_avail == adapter->num_tx_desc) {
3912 		txr->busy = EM_TX_IDLE;
3913                 return;
3914 	}
3915 
3916 	processed = 0;
3917         first = txr->next_to_clean;
3918         tx_desc = &txr->tx_base[first];
3919         tx_buffer = &txr->tx_buffers[first];
3920 	last = tx_buffer->next_eop;
3921         eop_desc = &txr->tx_base[last];
3922 
3923 	/*
3924 	 * What this does is get the index of the
3925 	 * first descriptor AFTER the EOP of the
3926 	 * first packet, that way we can do the
3927 	 * simple comparison on the inner while loop.
3928 	 */
3929 	if (++last == adapter->num_tx_desc)
3930  		last = 0;
3931 	done = last;
3932 
3933         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3934             BUS_DMASYNC_POSTREAD);
3935 
3936         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3937 		/* We clean the range of the packet */
3938 		while (first != done) {
3939                 	tx_desc->upper.data = 0;
3940                 	tx_desc->lower.data = 0;
3941                 	tx_desc->buffer_addr = 0;
3942                 	++txr->tx_avail;
3943 			++processed;
3944 
3945 			if (tx_buffer->m_head) {
3946 				bus_dmamap_sync(txr->txtag,
3947 				    tx_buffer->map,
3948 				    BUS_DMASYNC_POSTWRITE);
3949 				bus_dmamap_unload(txr->txtag,
3950 				    tx_buffer->map);
3951                         	m_freem(tx_buffer->m_head);
3952                         	tx_buffer->m_head = NULL;
3953                 	}
3954 			tx_buffer->next_eop = -1;
3955 
3956 	                if (++first == adapter->num_tx_desc)
3957 				first = 0;
3958 
3959 	                tx_buffer = &txr->tx_buffers[first];
3960 			tx_desc = &txr->tx_base[first];
3961 		}
3962 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
3963 		/* See if we can continue to the next packet */
3964 		last = tx_buffer->next_eop;
3965 		if (last != -1) {
3966         		eop_desc = &txr->tx_base[last];
3967 			/* Get new done point */
3968 			if (++last == adapter->num_tx_desc) last = 0;
3969 			done = last;
3970 		} else
3971 			break;
3972         }
3973         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3974             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3975 
3976         txr->next_to_clean = first;
3977 
3978 	/*
3979 	** Hang detection: we know there's work outstanding
3980 	** or the entry return would have been taken, so no
3981 	** descriptor processed here indicates a potential hang.
3982 	** The local timer will examine this and do a reset if needed.
3983 	*/
3984 	if (processed == 0) {
3985 		if (txr->busy != EM_TX_HUNG)
3986 			++txr->busy;
3987 	} else /* At least one descriptor was cleaned */
3988 		txr->busy = EM_TX_BUSY; /* note this clears HUNG */
3989 
3990         /*
3991          * If we have a minimum free, clear IFF_DRV_OACTIVE
3992          * to tell the stack that it is OK to send packets.
3993 	 * Notice that all writes of OACTIVE happen under the
3994 	 * TX lock which, with a single queue, guarantees
3995 	 * sanity.
3996          */
3997         if (txr->tx_avail >= EM_MAX_SCATTER) {
3998 		if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
3999 	}
4000 
4001 	/* Disable hang detection if all clean */
4002 	if (txr->tx_avail == adapter->num_tx_desc)
4003 		txr->busy = EM_TX_IDLE;
4004 }
4005 
4006 
4007 /*********************************************************************
4008  *
4009  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
4010  *
4011  **********************************************************************/
4012 static void
4013 em_refresh_mbufs(struct rx_ring *rxr, int limit)
4014 {
4015 	struct adapter		*adapter = rxr->adapter;
4016 	struct mbuf		*m;
4017 	bus_dma_segment_t	segs[1];
4018 	struct em_buffer	*rxbuf;
4019 	int			i, j, error, nsegs;
4020 	bool			cleaned = FALSE;
4021 
4022 	i = j = rxr->next_to_refresh;
4023 	/*
4024 	** Get one descriptor beyond
4025 	** our work mark to control
4026 	** the loop.
4027 	*/
4028 	if (++j == adapter->num_rx_desc)
4029 		j = 0;
4030 
4031 	while (j != limit) {
4032 		rxbuf = &rxr->rx_buffers[i];
4033 		if (rxbuf->m_head == NULL) {
4034 			m = m_getjcl(M_NOWAIT, MT_DATA,
4035 			    M_PKTHDR, adapter->rx_mbuf_sz);
4036 			/*
4037 			** If we have a temporary resource shortage
4038 			** that causes a failure, just abort refresh
4039 			** for now, we will return to this point when
4040 			** reinvoked from em_rxeof.
4041 			*/
4042 			if (m == NULL)
4043 				goto update;
4044 		} else
4045 			m = rxbuf->m_head;
4046 
4047 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4048 		m->m_flags |= M_PKTHDR;
4049 		m->m_data = m->m_ext.ext_buf;
4050 
4051 		/* Use bus_dma machinery to setup the memory mapping  */
4052 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4053 		    m, segs, &nsegs, BUS_DMA_NOWAIT);
4054 		if (error != 0) {
4055 			printf("Refresh mbufs: hdr dmamap load"
4056 			    " failure - %d\n", error);
4057 			m_free(m);
4058 			rxbuf->m_head = NULL;
4059 			goto update;
4060 		}
4061 		rxbuf->m_head = m;
4062 		bus_dmamap_sync(rxr->rxtag,
4063 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4064 		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
4065 		cleaned = TRUE;
4066 
4067 		i = j; /* Next is precalulated for us */
4068 		rxr->next_to_refresh = i;
4069 		/* Calculate next controlling index */
4070 		if (++j == adapter->num_rx_desc)
4071 			j = 0;
4072 	}
4073 update:
4074 	/*
4075 	** Update the tail pointer only if,
4076 	** and as far as we have refreshed.
4077 	*/
4078 	if (cleaned)
4079 		E1000_WRITE_REG(&adapter->hw,
4080 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4081 
4082 	return;
4083 }
4084 
4085 
4086 /*********************************************************************
4087  *
4088  *  Allocate memory for rx_buffer structures. Since we use one
4089  *  rx_buffer per received packet, the maximum number of rx_buffer's
4090  *  that we'll need is equal to the number of receive descriptors
4091  *  that we've allocated.
4092  *
4093  **********************************************************************/
4094 static int
4095 em_allocate_receive_buffers(struct rx_ring *rxr)
4096 {
4097 	struct adapter		*adapter = rxr->adapter;
4098 	device_t		dev = adapter->dev;
4099 	struct em_buffer	*rxbuf;
4100 	int			error;
4101 
4102 	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4103 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4104 	if (rxr->rx_buffers == NULL) {
4105 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4106 		return (ENOMEM);
4107 	}
4108 
4109 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4110 				1, 0,			/* alignment, bounds */
4111 				BUS_SPACE_MAXADDR,	/* lowaddr */
4112 				BUS_SPACE_MAXADDR,	/* highaddr */
4113 				NULL, NULL,		/* filter, filterarg */
4114 				MJUM9BYTES,		/* maxsize */
4115 				1,			/* nsegments */
4116 				MJUM9BYTES,		/* maxsegsize */
4117 				0,			/* flags */
4118 				NULL,			/* lockfunc */
4119 				NULL,			/* lockarg */
4120 				&rxr->rxtag);
4121 	if (error) {
4122 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4123 		    __func__, error);
4124 		goto fail;
4125 	}
4126 
4127 	rxbuf = rxr->rx_buffers;
4128 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4129 		rxbuf = &rxr->rx_buffers[i];
4130 		error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4131 		if (error) {
4132 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4133 			    __func__, error);
4134 			goto fail;
4135 		}
4136 	}
4137 
4138 	return (0);
4139 
4140 fail:
4141 	em_free_receive_structures(adapter);
4142 	return (error);
4143 }
4144 
4145 
4146 /*********************************************************************
4147  *
4148  *  Initialize a receive ring and its buffers.
4149  *
4150  **********************************************************************/
4151 static int
4152 em_setup_receive_ring(struct rx_ring *rxr)
4153 {
4154 	struct	adapter 	*adapter = rxr->adapter;
4155 	struct em_buffer	*rxbuf;
4156 	bus_dma_segment_t	seg[1];
4157 	int			rsize, nsegs, error = 0;
4158 #ifdef DEV_NETMAP
4159 	struct netmap_slot *slot;
4160 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
4161 #endif
4162 
4163 
4164 	/* Clear the ring contents */
4165 	EM_RX_LOCK(rxr);
4166 	rsize = roundup2(adapter->num_rx_desc *
4167 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4168 	bzero((void *)rxr->rx_base, rsize);
4169 #ifdef DEV_NETMAP
4170 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4171 #endif
4172 
4173 	/*
4174 	** Free current RX buffer structs and their mbufs
4175 	*/
4176 	for (int i = 0; i < adapter->num_rx_desc; i++) {
4177 		rxbuf = &rxr->rx_buffers[i];
4178 		if (rxbuf->m_head != NULL) {
4179 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4180 			    BUS_DMASYNC_POSTREAD);
4181 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4182 			m_freem(rxbuf->m_head);
4183 			rxbuf->m_head = NULL; /* mark as freed */
4184 		}
4185 	}
4186 
4187 	/* Now replenish the mbufs */
4188         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4189 		rxbuf = &rxr->rx_buffers[j];
4190 #ifdef DEV_NETMAP
4191 		if (slot) {
4192 			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4193 			uint64_t paddr;
4194 			void *addr;
4195 
4196 			addr = PNMB(na, slot + si, &paddr);
4197 			netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4198 			/* Update descriptor */
4199 			rxr->rx_base[j].buffer_addr = htole64(paddr);
4200 			continue;
4201 		}
4202 #endif /* DEV_NETMAP */
4203 		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4204 		    M_PKTHDR, adapter->rx_mbuf_sz);
4205 		if (rxbuf->m_head == NULL) {
4206 			error = ENOBUFS;
4207 			goto fail;
4208 		}
4209 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4210 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4211 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4212 
4213 		/* Get the memory mapping */
4214 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4215 		    rxbuf->map, rxbuf->m_head, seg,
4216 		    &nsegs, BUS_DMA_NOWAIT);
4217 		if (error != 0) {
4218 			m_freem(rxbuf->m_head);
4219 			rxbuf->m_head = NULL;
4220 			goto fail;
4221 		}
4222 		bus_dmamap_sync(rxr->rxtag,
4223 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4224 
4225 		/* Update descriptor */
4226 		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4227 	}
4228 	rxr->next_to_check = 0;
4229 	rxr->next_to_refresh = 0;
4230 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4231 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4232 
4233 fail:
4234 	EM_RX_UNLOCK(rxr);
4235 	return (error);
4236 }
4237 
4238 /*********************************************************************
4239  *
4240  *  Initialize all receive rings.
4241  *
4242  **********************************************************************/
4243 static int
4244 em_setup_receive_structures(struct adapter *adapter)
4245 {
4246 	struct rx_ring *rxr = adapter->rx_rings;
4247 	int q;
4248 
4249 	for (q = 0; q < adapter->num_queues; q++, rxr++)
4250 		if (em_setup_receive_ring(rxr))
4251 			goto fail;
4252 
4253 	return (0);
4254 fail:
4255 	/*
4256 	 * Free RX buffers allocated so far, we will only handle
4257 	 * the rings that completed, the failing case will have
4258 	 * cleaned up for itself. 'q' failed, so its the terminus.
4259 	 */
4260 	for (int i = 0; i < q; ++i) {
4261 		rxr = &adapter->rx_rings[i];
4262 		for (int n = 0; n < adapter->num_rx_desc; n++) {
4263 			struct em_buffer *rxbuf;
4264 			rxbuf = &rxr->rx_buffers[n];
4265 			if (rxbuf->m_head != NULL) {
4266 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4267 			  	  BUS_DMASYNC_POSTREAD);
4268 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4269 				m_freem(rxbuf->m_head);
4270 				rxbuf->m_head = NULL;
4271 			}
4272 		}
4273 		rxr->next_to_check = 0;
4274 		rxr->next_to_refresh = 0;
4275 	}
4276 
4277 	return (ENOBUFS);
4278 }
4279 
4280 /*********************************************************************
4281  *
4282  *  Free all receive rings.
4283  *
4284  **********************************************************************/
4285 static void
4286 em_free_receive_structures(struct adapter *adapter)
4287 {
4288 	struct rx_ring *rxr = adapter->rx_rings;
4289 
4290 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4291 		em_free_receive_buffers(rxr);
4292 		/* Free the ring memory as well */
4293 		em_dma_free(adapter, &rxr->rxdma);
4294 		EM_RX_LOCK_DESTROY(rxr);
4295 	}
4296 
4297 	free(adapter->rx_rings, M_DEVBUF);
4298 }
4299 
4300 
4301 /*********************************************************************
4302  *
4303  *  Free receive ring data structures
4304  *
4305  **********************************************************************/
4306 static void
4307 em_free_receive_buffers(struct rx_ring *rxr)
4308 {
4309 	struct adapter		*adapter = rxr->adapter;
4310 	struct em_buffer	*rxbuf = NULL;
4311 
4312 	INIT_DEBUGOUT("free_receive_buffers: begin");
4313 
4314 	if (rxr->rx_buffers != NULL) {
4315 		for (int i = 0; i < adapter->num_rx_desc; i++) {
4316 			rxbuf = &rxr->rx_buffers[i];
4317 			if (rxbuf->map != NULL) {
4318 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4319 				    BUS_DMASYNC_POSTREAD);
4320 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4321 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4322 			}
4323 			if (rxbuf->m_head != NULL) {
4324 				m_freem(rxbuf->m_head);
4325 				rxbuf->m_head = NULL;
4326 			}
4327 		}
4328 		free(rxr->rx_buffers, M_DEVBUF);
4329 		rxr->rx_buffers = NULL;
4330 		rxr->next_to_check = 0;
4331 		rxr->next_to_refresh = 0;
4332 	}
4333 
4334 	if (rxr->rxtag != NULL) {
4335 		bus_dma_tag_destroy(rxr->rxtag);
4336 		rxr->rxtag = NULL;
4337 	}
4338 
4339 	return;
4340 }
4341 
4342 
4343 /*********************************************************************
4344  *
4345  *  Enable receive unit.
4346  *
4347  **********************************************************************/
4348 
4349 static void
4350 em_initialize_receive_unit(struct adapter *adapter)
4351 {
4352 	struct rx_ring	*rxr = adapter->rx_rings;
4353 	if_t ifp = adapter->ifp;
4354 	struct e1000_hw	*hw = &adapter->hw;
4355 	u64	bus_addr;
4356 	u32	rctl, rxcsum;
4357 
4358 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4359 
4360 	/*
4361 	 * Make sure receives are disabled while setting
4362 	 * up the descriptor ring
4363 	 */
4364 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4365 	/* Do not disable if ever enabled on this hardware */
4366 	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4367 		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4368 
4369 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4370 	    adapter->rx_abs_int_delay.value);
4371 
4372 	E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4373 	    adapter->rx_int_delay.value);
4374 	/*
4375 	 * Set the interrupt throttling rate. Value is calculated
4376 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4377 	 */
4378 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4379 
4380 	/*
4381 	** When using MSIX interrupts we need to throttle
4382 	** using the EITR register (82574 only)
4383 	*/
4384 	if (hw->mac.type == e1000_82574) {
4385 		u32 rfctl;
4386 		for (int i = 0; i < 4; i++)
4387 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4388 			    DEFAULT_ITR);
4389 		/* Disable accelerated acknowledge */
4390 		rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4391 		rfctl |= E1000_RFCTL_ACK_DIS;
4392 		E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4393 	}
4394 
4395 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4396 	if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
4397 #ifdef EM_MULTIQUEUE
4398 		rxcsum |= E1000_RXCSUM_TUOFL |
4399 			  E1000_RXCSUM_IPOFL |
4400 			  E1000_RXCSUM_PCSD;
4401 #else
4402 		rxcsum |= E1000_RXCSUM_TUOFL;
4403 #endif
4404 	} else
4405 		rxcsum &= ~E1000_RXCSUM_TUOFL;
4406 
4407 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4408 
4409 #ifdef EM_MULTIQUEUE
4410 	if (adapter->num_queues > 1) {
4411 		uint32_t rss_key[10];
4412 		uint32_t reta;
4413 		int i;
4414 
4415 		/*
4416 		* Configure RSS key
4417 		*/
4418 		arc4rand(rss_key, sizeof(rss_key), 0);
4419 		for (i = 0; i < 10; ++i)
4420 			E1000_WRITE_REG_ARRAY(hw,E1000_RSSRK(0), i, rss_key[i]);
4421 
4422 		/*
4423 		* Configure RSS redirect table in following fashion:
4424 		* (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4425 		*/
4426 		reta = 0;
4427 		for (i = 0; i < 4; ++i) {
4428 			uint32_t q;
4429 			q = (i % adapter->num_queues) << 7;
4430 			reta |= q << (8 * i);
4431 		}
4432 		for (i = 0; i < 32; ++i)
4433 			E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4434 
4435 		E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q |
4436 				E1000_MRQC_RSS_FIELD_IPV4_TCP |
4437 				E1000_MRQC_RSS_FIELD_IPV4 |
4438 				E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4439 				E1000_MRQC_RSS_FIELD_IPV6_EX |
4440 				E1000_MRQC_RSS_FIELD_IPV6 |
4441 				E1000_MRQC_RSS_FIELD_IPV6_TCP);
4442 	}
4443 #endif
4444 	/*
4445 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4446 	** long latencies are observed, like Lenovo X60. This
4447 	** change eliminates the problem, but since having positive
4448 	** values in RDTR is a known source of problems on other
4449 	** platforms another solution is being sought.
4450 	*/
4451 	if (hw->mac.type == e1000_82573)
4452 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4453 
4454 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4455 		/* Setup the Base and Length of the Rx Descriptor Ring */
4456 		u32 rdt = adapter->num_rx_desc - 1; /* default */
4457 
4458 		bus_addr = rxr->rxdma.dma_paddr;
4459 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4460 		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4461 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4462 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4463 		/* Setup the Head and Tail Descriptor Pointers */
4464 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4465 #ifdef DEV_NETMAP
4466 		/*
4467 		 * an init() while a netmap client is active must
4468 		 * preserve the rx buffers passed to userspace.
4469 		 */
4470 		if (if_getcapenable(ifp) & IFCAP_NETMAP) {
4471 			struct netmap_adapter *na = netmap_getna(adapter->ifp);
4472 			rdt -= nm_kr_rxspace(&na->rx_rings[i]);
4473 		}
4474 #endif /* DEV_NETMAP */
4475 		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4476 	}
4477 
4478 	/*
4479 	 * Set PTHRESH for improved jumbo performance
4480 	 * According to 10.2.5.11 of Intel 82574 Datasheet,
4481 	 * RXDCTL(1) is written whenever RXDCTL(0) is written.
4482 	 * Only write to RXDCTL(1) if there is a need for different
4483 	 * settings.
4484 	 */
4485 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4486 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4487 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4488 	    (if_getmtu(ifp) > ETHERMTU)) {
4489 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4490 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4491 	} else if ((adapter->hw.mac.type == e1000_82574) &&
4492 		  (if_getmtu(ifp) > ETHERMTU)) {
4493 		for (int i = 0; i < adapter->num_queues; i++) {
4494 			u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4495 
4496                 	rxdctl |= 0x20; /* PTHRESH */
4497                 	rxdctl |= 4 << 8; /* HTHRESH */
4498                 	rxdctl |= 4 << 16;/* WTHRESH */
4499 			rxdctl |= 1 << 24; /* Switch to granularity */
4500 			E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4501 		}
4502 	}
4503 
4504 	if (adapter->hw.mac.type >= e1000_pch2lan) {
4505 		if (if_getmtu(ifp) > ETHERMTU)
4506 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4507 		else
4508 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4509 	}
4510 
4511 	/* Setup the Receive Control Register */
4512 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4513 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4514 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4515 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4516 
4517         /* Strip the CRC */
4518         rctl |= E1000_RCTL_SECRC;
4519 
4520         /* Make sure VLAN Filters are off */
4521         rctl &= ~E1000_RCTL_VFE;
4522 	rctl &= ~E1000_RCTL_SBP;
4523 
4524 	if (adapter->rx_mbuf_sz == MCLBYTES)
4525 		rctl |= E1000_RCTL_SZ_2048;
4526 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4527 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4528 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4529 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4530 
4531 	if (if_getmtu(ifp) > ETHERMTU)
4532 		rctl |= E1000_RCTL_LPE;
4533 	else
4534 		rctl &= ~E1000_RCTL_LPE;
4535 
4536 	/* Write out the settings */
4537 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4538 
4539 	return;
4540 }
4541 
4542 
4543 /*********************************************************************
4544  *
4545  *  This routine executes in interrupt context. It replenishes
4546  *  the mbufs in the descriptor and sends data which has been
4547  *  dma'ed into host memory to upper layer.
4548  *
4549  *  We loop at most count times if count is > 0, or until done if
4550  *  count < 0.
4551  *
4552  *  For polling we also now return the number of cleaned packets
4553  *********************************************************************/
4554 static bool
4555 em_rxeof(struct rx_ring *rxr, int count, int *done)
4556 {
4557 	struct adapter		*adapter = rxr->adapter;
4558 	if_t ifp = adapter->ifp;
4559 	struct mbuf		*mp, *sendmp;
4560 	u8			status = 0;
4561 	u16 			len;
4562 	int			i, processed, rxdone = 0;
4563 	bool			eop;
4564 	struct e1000_rx_desc	*cur;
4565 
4566 	EM_RX_LOCK(rxr);
4567 
4568 	/* Sync the ring */
4569 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4570 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4571 
4572 
4573 #ifdef DEV_NETMAP
4574 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4575 		EM_RX_UNLOCK(rxr);
4576 		return (FALSE);
4577 	}
4578 #endif /* DEV_NETMAP */
4579 
4580 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4581 
4582 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
4583 			break;
4584 
4585 		cur = &rxr->rx_base[i];
4586 		status = cur->status;
4587 		mp = sendmp = NULL;
4588 
4589 		if ((status & E1000_RXD_STAT_DD) == 0)
4590 			break;
4591 
4592 		len = le16toh(cur->length);
4593 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4594 
4595 		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4596 		    (rxr->discard == TRUE)) {
4597 			adapter->dropped_pkts++;
4598 			++rxr->rx_discarded;
4599 			if (!eop) /* Catch subsequent segs */
4600 				rxr->discard = TRUE;
4601 			else
4602 				rxr->discard = FALSE;
4603 			em_rx_discard(rxr, i);
4604 			goto next_desc;
4605 		}
4606 		bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4607 
4608 		/* Assign correct length to the current fragment */
4609 		mp = rxr->rx_buffers[i].m_head;
4610 		mp->m_len = len;
4611 
4612 		/* Trigger for refresh */
4613 		rxr->rx_buffers[i].m_head = NULL;
4614 
4615 		/* First segment? */
4616 		if (rxr->fmp == NULL) {
4617 			mp->m_pkthdr.len = len;
4618 			rxr->fmp = rxr->lmp = mp;
4619 		} else {
4620 			/* Chain mbuf's together */
4621 			mp->m_flags &= ~M_PKTHDR;
4622 			rxr->lmp->m_next = mp;
4623 			rxr->lmp = mp;
4624 			rxr->fmp->m_pkthdr.len += len;
4625 		}
4626 
4627 		if (eop) {
4628 			--count;
4629 			sendmp = rxr->fmp;
4630 			if_setrcvif(sendmp, ifp);
4631 			if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
4632 			em_receive_checksum(cur, sendmp);
4633 #ifndef __NO_STRICT_ALIGNMENT
4634 			if (adapter->hw.mac.max_frame_size >
4635 			    (MCLBYTES - ETHER_ALIGN) &&
4636 			    em_fixup_rx(rxr) != 0)
4637 				goto skip;
4638 #endif
4639 			if (status & E1000_RXD_STAT_VP) {
4640 				if_setvtag(sendmp,
4641 				    le16toh(cur->special));
4642 				sendmp->m_flags |= M_VLANTAG;
4643 			}
4644 #ifndef __NO_STRICT_ALIGNMENT
4645 skip:
4646 #endif
4647 			rxr->fmp = rxr->lmp = NULL;
4648 		}
4649 next_desc:
4650 		/* Sync the ring */
4651 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4652 	    		BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4653 
4654 		/* Zero out the receive descriptors status. */
4655 		cur->status = 0;
4656 		++rxdone;	/* cumulative for POLL */
4657 		++processed;
4658 
4659 		/* Advance our pointers to the next descriptor. */
4660 		if (++i == adapter->num_rx_desc)
4661 			i = 0;
4662 
4663 		/* Send to the stack */
4664 		if (sendmp != NULL) {
4665 			rxr->next_to_check = i;
4666 			EM_RX_UNLOCK(rxr);
4667 			if_input(ifp, sendmp);
4668 			EM_RX_LOCK(rxr);
4669 			i = rxr->next_to_check;
4670 		}
4671 
4672 		/* Only refresh mbufs every 8 descriptors */
4673 		if (processed == 8) {
4674 			em_refresh_mbufs(rxr, i);
4675 			processed = 0;
4676 		}
4677 	}
4678 
4679 	/* Catch any remaining refresh work */
4680 	if (e1000_rx_unrefreshed(rxr))
4681 		em_refresh_mbufs(rxr, i);
4682 
4683 	rxr->next_to_check = i;
4684 	if (done != NULL)
4685 		*done = rxdone;
4686 	EM_RX_UNLOCK(rxr);
4687 
4688 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4689 }
4690 
4691 static __inline void
4692 em_rx_discard(struct rx_ring *rxr, int i)
4693 {
4694 	struct em_buffer	*rbuf;
4695 
4696 	rbuf = &rxr->rx_buffers[i];
4697 	bus_dmamap_unload(rxr->rxtag, rbuf->map);
4698 
4699 	/* Free any previous pieces */
4700 	if (rxr->fmp != NULL) {
4701 		rxr->fmp->m_flags |= M_PKTHDR;
4702 		m_freem(rxr->fmp);
4703 		rxr->fmp = NULL;
4704 		rxr->lmp = NULL;
4705 	}
4706 	/*
4707 	** Free buffer and allow em_refresh_mbufs()
4708 	** to clean up and recharge buffer.
4709 	*/
4710 	if (rbuf->m_head) {
4711 		m_free(rbuf->m_head);
4712 		rbuf->m_head = NULL;
4713 	}
4714 	return;
4715 }
4716 
4717 #ifndef __NO_STRICT_ALIGNMENT
4718 /*
4719  * When jumbo frames are enabled we should realign entire payload on
4720  * architecures with strict alignment. This is serious design mistake of 8254x
4721  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4722  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4723  * payload. On architecures without strict alignment restrictions 8254x still
4724  * performs unaligned memory access which would reduce the performance too.
4725  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4726  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4727  * existing mbuf chain.
4728  *
4729  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4730  * not used at all on architectures with strict alignment.
4731  */
4732 static int
4733 em_fixup_rx(struct rx_ring *rxr)
4734 {
4735 	struct adapter *adapter = rxr->adapter;
4736 	struct mbuf *m, *n;
4737 	int error;
4738 
4739 	error = 0;
4740 	m = rxr->fmp;
4741 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4742 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4743 		m->m_data += ETHER_HDR_LEN;
4744 	} else {
4745 		MGETHDR(n, M_NOWAIT, MT_DATA);
4746 		if (n != NULL) {
4747 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4748 			m->m_data += ETHER_HDR_LEN;
4749 			m->m_len -= ETHER_HDR_LEN;
4750 			n->m_len = ETHER_HDR_LEN;
4751 			M_MOVE_PKTHDR(n, m);
4752 			n->m_next = m;
4753 			rxr->fmp = n;
4754 		} else {
4755 			adapter->dropped_pkts++;
4756 			m_freem(rxr->fmp);
4757 			rxr->fmp = NULL;
4758 			error = ENOMEM;
4759 		}
4760 	}
4761 
4762 	return (error);
4763 }
4764 #endif
4765 
4766 /*********************************************************************
4767  *
4768  *  Verify that the hardware indicated that the checksum is valid.
4769  *  Inform the stack about the status of checksum so that stack
4770  *  doesn't spend time verifying the checksum.
4771  *
4772  *********************************************************************/
4773 static void
4774 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4775 {
4776 	mp->m_pkthdr.csum_flags = 0;
4777 
4778 	/* Ignore Checksum bit is set */
4779 	if (rx_desc->status & E1000_RXD_STAT_IXSM)
4780 		return;
4781 
4782 	if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
4783 		return;
4784 
4785 	/* IP Checksum Good? */
4786 	if (rx_desc->status & E1000_RXD_STAT_IPCS)
4787 		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4788 
4789 	/* TCP or UDP checksum */
4790 	if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4791 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4792 		mp->m_pkthdr.csum_data = htons(0xffff);
4793 	}
4794 }
4795 
4796 /*
4797  * This routine is run via an vlan
4798  * config EVENT
4799  */
4800 static void
4801 em_register_vlan(void *arg, if_t ifp, u16 vtag)
4802 {
4803 	struct adapter	*adapter = if_getsoftc(ifp);
4804 	u32		index, bit;
4805 
4806 	if ((void*)adapter !=  arg)   /* Not our event */
4807 		return;
4808 
4809 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4810                 return;
4811 
4812 	EM_CORE_LOCK(adapter);
4813 	index = (vtag >> 5) & 0x7F;
4814 	bit = vtag & 0x1F;
4815 	adapter->shadow_vfta[index] |= (1 << bit);
4816 	++adapter->num_vlans;
4817 	/* Re-init to load the changes */
4818 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
4819 		em_init_locked(adapter);
4820 	EM_CORE_UNLOCK(adapter);
4821 }
4822 
4823 /*
4824  * This routine is run via an vlan
4825  * unconfig EVENT
4826  */
4827 static void
4828 em_unregister_vlan(void *arg, if_t ifp, u16 vtag)
4829 {
4830 	struct adapter	*adapter = if_getsoftc(ifp);
4831 	u32		index, bit;
4832 
4833 	if (adapter != arg)
4834 		return;
4835 
4836 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4837                 return;
4838 
4839 	EM_CORE_LOCK(adapter);
4840 	index = (vtag >> 5) & 0x7F;
4841 	bit = vtag & 0x1F;
4842 	adapter->shadow_vfta[index] &= ~(1 << bit);
4843 	--adapter->num_vlans;
4844 	/* Re-init to load the changes */
4845 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
4846 		em_init_locked(adapter);
4847 	EM_CORE_UNLOCK(adapter);
4848 }
4849 
4850 static void
4851 em_setup_vlan_hw_support(struct adapter *adapter)
4852 {
4853 	struct e1000_hw *hw = &adapter->hw;
4854 	u32             reg;
4855 
4856 	/*
4857 	** We get here thru init_locked, meaning
4858 	** a soft reset, this has already cleared
4859 	** the VFTA and other state, so if there
4860 	** have been no vlan's registered do nothing.
4861 	*/
4862 	if (adapter->num_vlans == 0)
4863                 return;
4864 
4865 	/*
4866 	** A soft reset zero's out the VFTA, so
4867 	** we need to repopulate it now.
4868 	*/
4869 	for (int i = 0; i < EM_VFTA_SIZE; i++)
4870                 if (adapter->shadow_vfta[i] != 0)
4871 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4872                             i, adapter->shadow_vfta[i]);
4873 
4874 	reg = E1000_READ_REG(hw, E1000_CTRL);
4875 	reg |= E1000_CTRL_VME;
4876 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4877 
4878 	/* Enable the Filter Table */
4879 	reg = E1000_READ_REG(hw, E1000_RCTL);
4880 	reg &= ~E1000_RCTL_CFIEN;
4881 	reg |= E1000_RCTL_VFE;
4882 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4883 }
4884 
4885 static void
4886 em_enable_intr(struct adapter *adapter)
4887 {
4888 	struct e1000_hw *hw = &adapter->hw;
4889 	u32 ims_mask = IMS_ENABLE_MASK;
4890 
4891 	if (hw->mac.type == e1000_82574) {
4892 		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4893 		ims_mask |= EM_MSIX_MASK;
4894 	}
4895 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4896 }
4897 
4898 static void
4899 em_disable_intr(struct adapter *adapter)
4900 {
4901 	struct e1000_hw *hw = &adapter->hw;
4902 
4903 	if (hw->mac.type == e1000_82574)
4904 		E1000_WRITE_REG(hw, EM_EIAC, 0);
4905 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4906 }
4907 
4908 /*
4909  * Bit of a misnomer, what this really means is
4910  * to enable OS management of the system... aka
4911  * to disable special hardware management features
4912  */
4913 static void
4914 em_init_manageability(struct adapter *adapter)
4915 {
4916 	/* A shared code workaround */
4917 #define E1000_82542_MANC2H E1000_MANC2H
4918 	if (adapter->has_manage) {
4919 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4920 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4921 
4922 		/* disable hardware interception of ARP */
4923 		manc &= ~(E1000_MANC_ARP_EN);
4924 
4925                 /* enable receiving management packets to the host */
4926 		manc |= E1000_MANC_EN_MNG2HOST;
4927 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4928 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4929 		manc2h |= E1000_MNG2HOST_PORT_623;
4930 		manc2h |= E1000_MNG2HOST_PORT_664;
4931 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4932 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4933 	}
4934 }
4935 
4936 /*
4937  * Give control back to hardware management
4938  * controller if there is one.
4939  */
4940 static void
4941 em_release_manageability(struct adapter *adapter)
4942 {
4943 	if (adapter->has_manage) {
4944 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4945 
4946 		/* re-enable hardware interception of ARP */
4947 		manc |= E1000_MANC_ARP_EN;
4948 		manc &= ~E1000_MANC_EN_MNG2HOST;
4949 
4950 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4951 	}
4952 }
4953 
4954 /*
4955  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4956  * For ASF and Pass Through versions of f/w this means
4957  * that the driver is loaded. For AMT version type f/w
4958  * this means that the network i/f is open.
4959  */
4960 static void
4961 em_get_hw_control(struct adapter *adapter)
4962 {
4963 	u32 ctrl_ext, swsm;
4964 
4965 	if (adapter->hw.mac.type == e1000_82573) {
4966 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4967 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4968 		    swsm | E1000_SWSM_DRV_LOAD);
4969 		return;
4970 	}
4971 	/* else */
4972 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4973 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4974 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4975 	return;
4976 }
4977 
4978 /*
4979  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4980  * For ASF and Pass Through versions of f/w this means that
4981  * the driver is no longer loaded. For AMT versions of the
4982  * f/w this means that the network i/f is closed.
4983  */
4984 static void
4985 em_release_hw_control(struct adapter *adapter)
4986 {
4987 	u32 ctrl_ext, swsm;
4988 
4989 	if (!adapter->has_manage)
4990 		return;
4991 
4992 	if (adapter->hw.mac.type == e1000_82573) {
4993 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4994 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4995 		    swsm & ~E1000_SWSM_DRV_LOAD);
4996 		return;
4997 	}
4998 	/* else */
4999 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5000 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5001 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5002 	return;
5003 }
5004 
5005 static int
5006 em_is_valid_ether_addr(u8 *addr)
5007 {
5008 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5009 
5010 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5011 		return (FALSE);
5012 	}
5013 
5014 	return (TRUE);
5015 }
5016 
5017 /*
5018 ** Parse the interface capabilities with regard
5019 ** to both system management and wake-on-lan for
5020 ** later use.
5021 */
5022 static void
5023 em_get_wakeup(device_t dev)
5024 {
5025 	struct adapter	*adapter = device_get_softc(dev);
5026 	u16		eeprom_data = 0, device_id, apme_mask;
5027 
5028 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5029 	apme_mask = EM_EEPROM_APME;
5030 
5031 	switch (adapter->hw.mac.type) {
5032 	case e1000_82573:
5033 	case e1000_82583:
5034 		adapter->has_amt = TRUE;
5035 		/* Falls thru */
5036 	case e1000_82571:
5037 	case e1000_82572:
5038 	case e1000_80003es2lan:
5039 		if (adapter->hw.bus.func == 1) {
5040 			e1000_read_nvm(&adapter->hw,
5041 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5042 			break;
5043 		} else
5044 			e1000_read_nvm(&adapter->hw,
5045 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5046 		break;
5047 	case e1000_ich8lan:
5048 	case e1000_ich9lan:
5049 	case e1000_ich10lan:
5050 	case e1000_pchlan:
5051 	case e1000_pch2lan:
5052 		apme_mask = E1000_WUC_APME;
5053 		adapter->has_amt = TRUE;
5054 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5055 		break;
5056 	default:
5057 		e1000_read_nvm(&adapter->hw,
5058 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5059 		break;
5060 	}
5061 	if (eeprom_data & apme_mask)
5062 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5063 	/*
5064          * We have the eeprom settings, now apply the special cases
5065          * where the eeprom may be wrong or the board won't support
5066          * wake on lan on a particular port
5067 	 */
5068 	device_id = pci_get_device(dev);
5069         switch (device_id) {
5070 	case E1000_DEV_ID_82571EB_FIBER:
5071 		/* Wake events only supported on port A for dual fiber
5072 		 * regardless of eeprom setting */
5073 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5074 		    E1000_STATUS_FUNC_1)
5075 			adapter->wol = 0;
5076 		break;
5077 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
5078 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
5079 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5080                 /* if quad port adapter, disable WoL on all but port A */
5081 		if (global_quad_port_a != 0)
5082 			adapter->wol = 0;
5083 		/* Reset for multiple quad port adapters */
5084 		if (++global_quad_port_a == 4)
5085 			global_quad_port_a = 0;
5086                 break;
5087 	}
5088 	return;
5089 }
5090 
5091 
5092 /*
5093  * Enable PCI Wake On Lan capability
5094  */
5095 static void
5096 em_enable_wakeup(device_t dev)
5097 {
5098 	struct adapter	*adapter = device_get_softc(dev);
5099 	if_t ifp = adapter->ifp;
5100 	u32		pmc, ctrl, ctrl_ext, rctl;
5101 	u16     	status;
5102 
5103 	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
5104 		return;
5105 
5106 	/* Advertise the wakeup capability */
5107 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5108 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5109 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5110 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5111 
5112 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
5113 	    (adapter->hw.mac.type == e1000_pchlan) ||
5114 	    (adapter->hw.mac.type == e1000_ich9lan) ||
5115 	    (adapter->hw.mac.type == e1000_ich10lan))
5116 		e1000_suspend_workarounds_ich8lan(&adapter->hw);
5117 
5118 	/* Keep the laser running on Fiber adapters */
5119 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5120 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5121 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5122 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5123 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5124 	}
5125 
5126 	/*
5127 	** Determine type of Wakeup: note that wol
5128 	** is set with all bits on by default.
5129 	*/
5130 	if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
5131 		adapter->wol &= ~E1000_WUFC_MAG;
5132 
5133 	if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
5134 		adapter->wol &= ~E1000_WUFC_MC;
5135 	else {
5136 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5137 		rctl |= E1000_RCTL_MPE;
5138 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5139 	}
5140 
5141 	if ((adapter->hw.mac.type == e1000_pchlan) ||
5142 	    (adapter->hw.mac.type == e1000_pch2lan)) {
5143 		if (em_enable_phy_wakeup(adapter))
5144 			return;
5145 	} else {
5146 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5147 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5148 	}
5149 
5150 	if (adapter->hw.phy.type == e1000_phy_igp_3)
5151 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5152 
5153         /* Request PME */
5154         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5155 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5156 	if (if_getcapenable(ifp) & IFCAP_WOL)
5157 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5158         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5159 
5160 	return;
5161 }
5162 
5163 /*
5164 ** WOL in the newer chipset interfaces (pchlan)
5165 ** require thing to be copied into the phy
5166 */
5167 static int
5168 em_enable_phy_wakeup(struct adapter *adapter)
5169 {
5170 	struct e1000_hw *hw = &adapter->hw;
5171 	u32 mreg, ret = 0;
5172 	u16 preg;
5173 
5174 	/* copy MAC RARs to PHY RARs */
5175 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5176 
5177 	/* copy MAC MTA to PHY MTA */
5178 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5179 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5180 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5181 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5182 		    (u16)((mreg >> 16) & 0xFFFF));
5183 	}
5184 
5185 	/* configure PHY Rx Control register */
5186 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5187 	mreg = E1000_READ_REG(hw, E1000_RCTL);
5188 	if (mreg & E1000_RCTL_UPE)
5189 		preg |= BM_RCTL_UPE;
5190 	if (mreg & E1000_RCTL_MPE)
5191 		preg |= BM_RCTL_MPE;
5192 	preg &= ~(BM_RCTL_MO_MASK);
5193 	if (mreg & E1000_RCTL_MO_3)
5194 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5195 				<< BM_RCTL_MO_SHIFT);
5196 	if (mreg & E1000_RCTL_BAM)
5197 		preg |= BM_RCTL_BAM;
5198 	if (mreg & E1000_RCTL_PMCF)
5199 		preg |= BM_RCTL_PMCF;
5200 	mreg = E1000_READ_REG(hw, E1000_CTRL);
5201 	if (mreg & E1000_CTRL_RFCE)
5202 		preg |= BM_RCTL_RFCE;
5203 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5204 
5205 	/* enable PHY wakeup in MAC register */
5206 	E1000_WRITE_REG(hw, E1000_WUC,
5207 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5208 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5209 
5210 	/* configure and enable PHY wakeup in PHY registers */
5211 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5212 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5213 
5214 	/* activate PHY wakeup */
5215 	ret = hw->phy.ops.acquire(hw);
5216 	if (ret) {
5217 		printf("Could not acquire PHY\n");
5218 		return ret;
5219 	}
5220 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5221 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5222 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5223 	if (ret) {
5224 		printf("Could not read PHY page 769\n");
5225 		goto out;
5226 	}
5227 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5228 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5229 	if (ret)
5230 		printf("Could not set PHY Host Wakeup bit\n");
5231 out:
5232 	hw->phy.ops.release(hw);
5233 
5234 	return ret;
5235 }
5236 
5237 static void
5238 em_led_func(void *arg, int onoff)
5239 {
5240 	struct adapter	*adapter = arg;
5241 
5242 	EM_CORE_LOCK(adapter);
5243 	if (onoff) {
5244 		e1000_setup_led(&adapter->hw);
5245 		e1000_led_on(&adapter->hw);
5246 	} else {
5247 		e1000_led_off(&adapter->hw);
5248 		e1000_cleanup_led(&adapter->hw);
5249 	}
5250 	EM_CORE_UNLOCK(adapter);
5251 }
5252 
5253 /*
5254 ** Disable the L0S and L1 LINK states
5255 */
5256 static void
5257 em_disable_aspm(struct adapter *adapter)
5258 {
5259 	int		base, reg;
5260 	u16		link_cap,link_ctrl;
5261 	device_t	dev = adapter->dev;
5262 
5263 	switch (adapter->hw.mac.type) {
5264 		case e1000_82573:
5265 		case e1000_82574:
5266 		case e1000_82583:
5267 			break;
5268 		default:
5269 			return;
5270 	}
5271 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5272 		return;
5273 	reg = base + PCIER_LINK_CAP;
5274 	link_cap = pci_read_config(dev, reg, 2);
5275 	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5276 		return;
5277 	reg = base + PCIER_LINK_CTL;
5278 	link_ctrl = pci_read_config(dev, reg, 2);
5279 	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5280 	pci_write_config(dev, reg, link_ctrl, 2);
5281 	return;
5282 }
5283 
5284 /**********************************************************************
5285  *
5286  *  Update the board statistics counters.
5287  *
5288  **********************************************************************/
5289 static void
5290 em_update_stats_counters(struct adapter *adapter)
5291 {
5292 
5293 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5294 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5295 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5296 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5297 	}
5298 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5299 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5300 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5301 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5302 
5303 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5304 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5305 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5306 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5307 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5308 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5309 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5310 	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5311 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5312 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5313 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5314 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5315 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5316 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5317 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5318 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5319 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5320 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5321 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5322 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5323 
5324 	/* For the 64-bit byte counters the low dword must be read first. */
5325 	/* Both registers clear on the read of the high dword */
5326 
5327 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5328 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5329 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5330 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5331 
5332 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5333 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5334 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5335 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5336 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5337 
5338 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5339 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5340 
5341 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5342 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5343 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5344 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5345 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5346 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5347 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5348 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5349 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5350 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5351 
5352 	/* Interrupt Counts */
5353 
5354 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5355 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5356 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5357 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5358 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5359 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5360 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5361 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5362 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5363 
5364 	if (adapter->hw.mac.type >= e1000_82543) {
5365 		adapter->stats.algnerrc +=
5366 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5367 		adapter->stats.rxerrc +=
5368 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5369 		adapter->stats.tncrs +=
5370 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5371 		adapter->stats.cexterr +=
5372 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5373 		adapter->stats.tsctc +=
5374 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5375 		adapter->stats.tsctfc +=
5376 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5377 	}
5378 }
5379 
5380 static uint64_t
5381 em_get_counter(if_t ifp, ift_counter cnt)
5382 {
5383 	struct adapter *adapter;
5384 
5385 	adapter = if_getsoftc(ifp);
5386 
5387 	switch (cnt) {
5388 	case IFCOUNTER_COLLISIONS:
5389 		return (adapter->stats.colc);
5390 	case IFCOUNTER_IERRORS:
5391 		return (adapter->dropped_pkts + adapter->stats.rxerrc +
5392 		    adapter->stats.crcerrs + adapter->stats.algnerrc +
5393 		    adapter->stats.ruc + adapter->stats.roc +
5394 		    adapter->stats.mpc + adapter->stats.cexterr);
5395 	case IFCOUNTER_OERRORS:
5396 		return (adapter->stats.ecol + adapter->stats.latecol +
5397 		    adapter->watchdog_events);
5398 	default:
5399 		return (if_get_counter_default(ifp, cnt));
5400 	}
5401 }
5402 
5403 /* Export a single 32-bit register via a read-only sysctl. */
5404 static int
5405 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5406 {
5407 	struct adapter *adapter;
5408 	u_int val;
5409 
5410 	adapter = oidp->oid_arg1;
5411 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5412 	return (sysctl_handle_int(oidp, &val, 0, req));
5413 }
5414 
5415 /*
5416  * Add sysctl variables, one per statistic, to the system.
5417  */
5418 static void
5419 em_add_hw_stats(struct adapter *adapter)
5420 {
5421 	device_t dev = adapter->dev;
5422 
5423 	struct tx_ring *txr = adapter->tx_rings;
5424 	struct rx_ring *rxr = adapter->rx_rings;
5425 
5426 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5427 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5428 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5429 	struct e1000_hw_stats *stats = &adapter->stats;
5430 
5431 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5432 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5433 
5434 #define QUEUE_NAME_LEN 32
5435 	char namebuf[QUEUE_NAME_LEN];
5436 
5437 	/* Driver Statistics */
5438 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5439 			CTLFLAG_RD, &adapter->link_irq,
5440 			"Link MSIX IRQ Handled");
5441 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5442 			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5443 			 "Std mbuf failed");
5444 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5445 			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5446 			 "Std mbuf cluster failed");
5447 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5448 			CTLFLAG_RD, &adapter->dropped_pkts,
5449 			"Driver dropped packets");
5450 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5451 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5452 			"Driver tx dma failure in xmit");
5453 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5454 			CTLFLAG_RD, &adapter->rx_overruns,
5455 			"RX overruns");
5456 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5457 			CTLFLAG_RD, &adapter->watchdog_events,
5458 			"Watchdog timeouts");
5459 
5460 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5461 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5462 			em_sysctl_reg_handler, "IU",
5463 			"Device Control Register");
5464 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5465 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5466 			em_sysctl_reg_handler, "IU",
5467 			"Receiver Control Register");
5468 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5469 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5470 			"Flow Control High Watermark");
5471 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5472 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5473 			"Flow Control Low Watermark");
5474 
5475 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5476 		snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5477 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5478 					    CTLFLAG_RD, NULL, "TX Queue Name");
5479 		queue_list = SYSCTL_CHILDREN(queue_node);
5480 
5481 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5482 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5483 				E1000_TDH(txr->me),
5484 				em_sysctl_reg_handler, "IU",
5485  				"Transmit Descriptor Head");
5486 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5487 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5488 				E1000_TDT(txr->me),
5489 				em_sysctl_reg_handler, "IU",
5490  				"Transmit Descriptor Tail");
5491 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5492 				CTLFLAG_RD, &txr->tx_irq,
5493 				"Queue MSI-X Transmit Interrupts");
5494 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5495 				CTLFLAG_RD, &txr->no_desc_avail,
5496 				"Queue No Descriptor Available");
5497 
5498 		snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5499 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5500 					    CTLFLAG_RD, NULL, "RX Queue Name");
5501 		queue_list = SYSCTL_CHILDREN(queue_node);
5502 
5503 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5504 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5505 				E1000_RDH(rxr->me),
5506 				em_sysctl_reg_handler, "IU",
5507 				"Receive Descriptor Head");
5508 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5509 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5510 				E1000_RDT(rxr->me),
5511 				em_sysctl_reg_handler, "IU",
5512 				"Receive Descriptor Tail");
5513 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5514 				CTLFLAG_RD, &rxr->rx_irq,
5515 				"Queue MSI-X Receive Interrupts");
5516 	}
5517 
5518 	/* MAC stats get their own sub node */
5519 
5520 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5521 				    CTLFLAG_RD, NULL, "Statistics");
5522 	stat_list = SYSCTL_CHILDREN(stat_node);
5523 
5524 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5525 			CTLFLAG_RD, &stats->ecol,
5526 			"Excessive collisions");
5527 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5528 			CTLFLAG_RD, &stats->scc,
5529 			"Single collisions");
5530 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5531 			CTLFLAG_RD, &stats->mcc,
5532 			"Multiple collisions");
5533 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5534 			CTLFLAG_RD, &stats->latecol,
5535 			"Late collisions");
5536 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5537 			CTLFLAG_RD, &stats->colc,
5538 			"Collision Count");
5539 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5540 			CTLFLAG_RD, &adapter->stats.symerrs,
5541 			"Symbol Errors");
5542 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5543 			CTLFLAG_RD, &adapter->stats.sec,
5544 			"Sequence Errors");
5545 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5546 			CTLFLAG_RD, &adapter->stats.dc,
5547 			"Defer Count");
5548 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5549 			CTLFLAG_RD, &adapter->stats.mpc,
5550 			"Missed Packets");
5551 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5552 			CTLFLAG_RD, &adapter->stats.rnbc,
5553 			"Receive No Buffers");
5554 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5555 			CTLFLAG_RD, &adapter->stats.ruc,
5556 			"Receive Undersize");
5557 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5558 			CTLFLAG_RD, &adapter->stats.rfc,
5559 			"Fragmented Packets Received ");
5560 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5561 			CTLFLAG_RD, &adapter->stats.roc,
5562 			"Oversized Packets Received");
5563 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5564 			CTLFLAG_RD, &adapter->stats.rjc,
5565 			"Recevied Jabber");
5566 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5567 			CTLFLAG_RD, &adapter->stats.rxerrc,
5568 			"Receive Errors");
5569 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5570 			CTLFLAG_RD, &adapter->stats.crcerrs,
5571 			"CRC errors");
5572 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5573 			CTLFLAG_RD, &adapter->stats.algnerrc,
5574 			"Alignment Errors");
5575 	/* On 82575 these are collision counts */
5576 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5577 			CTLFLAG_RD, &adapter->stats.cexterr,
5578 			"Collision/Carrier extension errors");
5579 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5580 			CTLFLAG_RD, &adapter->stats.xonrxc,
5581 			"XON Received");
5582 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5583 			CTLFLAG_RD, &adapter->stats.xontxc,
5584 			"XON Transmitted");
5585 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5586 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5587 			"XOFF Received");
5588 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5589 			CTLFLAG_RD, &adapter->stats.xofftxc,
5590 			"XOFF Transmitted");
5591 
5592 	/* Packet Reception Stats */
5593 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5594 			CTLFLAG_RD, &adapter->stats.tpr,
5595 			"Total Packets Received ");
5596 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5597 			CTLFLAG_RD, &adapter->stats.gprc,
5598 			"Good Packets Received");
5599 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5600 			CTLFLAG_RD, &adapter->stats.bprc,
5601 			"Broadcast Packets Received");
5602 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5603 			CTLFLAG_RD, &adapter->stats.mprc,
5604 			"Multicast Packets Received");
5605 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5606 			CTLFLAG_RD, &adapter->stats.prc64,
5607 			"64 byte frames received ");
5608 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5609 			CTLFLAG_RD, &adapter->stats.prc127,
5610 			"65-127 byte frames received");
5611 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5612 			CTLFLAG_RD, &adapter->stats.prc255,
5613 			"128-255 byte frames received");
5614 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5615 			CTLFLAG_RD, &adapter->stats.prc511,
5616 			"256-511 byte frames received");
5617 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5618 			CTLFLAG_RD, &adapter->stats.prc1023,
5619 			"512-1023 byte frames received");
5620 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5621 			CTLFLAG_RD, &adapter->stats.prc1522,
5622 			"1023-1522 byte frames received");
5623  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5624  			CTLFLAG_RD, &adapter->stats.gorc,
5625  			"Good Octets Received");
5626 
5627 	/* Packet Transmission Stats */
5628  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5629  			CTLFLAG_RD, &adapter->stats.gotc,
5630  			"Good Octets Transmitted");
5631 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5632 			CTLFLAG_RD, &adapter->stats.tpt,
5633 			"Total Packets Transmitted");
5634 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5635 			CTLFLAG_RD, &adapter->stats.gptc,
5636 			"Good Packets Transmitted");
5637 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5638 			CTLFLAG_RD, &adapter->stats.bptc,
5639 			"Broadcast Packets Transmitted");
5640 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5641 			CTLFLAG_RD, &adapter->stats.mptc,
5642 			"Multicast Packets Transmitted");
5643 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5644 			CTLFLAG_RD, &adapter->stats.ptc64,
5645 			"64 byte frames transmitted ");
5646 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5647 			CTLFLAG_RD, &adapter->stats.ptc127,
5648 			"65-127 byte frames transmitted");
5649 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5650 			CTLFLAG_RD, &adapter->stats.ptc255,
5651 			"128-255 byte frames transmitted");
5652 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5653 			CTLFLAG_RD, &adapter->stats.ptc511,
5654 			"256-511 byte frames transmitted");
5655 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5656 			CTLFLAG_RD, &adapter->stats.ptc1023,
5657 			"512-1023 byte frames transmitted");
5658 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5659 			CTLFLAG_RD, &adapter->stats.ptc1522,
5660 			"1024-1522 byte frames transmitted");
5661 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5662 			CTLFLAG_RD, &adapter->stats.tsctc,
5663 			"TSO Contexts Transmitted");
5664 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5665 			CTLFLAG_RD, &adapter->stats.tsctfc,
5666 			"TSO Contexts Failed");
5667 
5668 
5669 	/* Interrupt Stats */
5670 
5671 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5672 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5673 	int_list = SYSCTL_CHILDREN(int_node);
5674 
5675 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5676 			CTLFLAG_RD, &adapter->stats.iac,
5677 			"Interrupt Assertion Count");
5678 
5679 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5680 			CTLFLAG_RD, &adapter->stats.icrxptc,
5681 			"Interrupt Cause Rx Pkt Timer Expire Count");
5682 
5683 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5684 			CTLFLAG_RD, &adapter->stats.icrxatc,
5685 			"Interrupt Cause Rx Abs Timer Expire Count");
5686 
5687 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5688 			CTLFLAG_RD, &adapter->stats.ictxptc,
5689 			"Interrupt Cause Tx Pkt Timer Expire Count");
5690 
5691 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5692 			CTLFLAG_RD, &adapter->stats.ictxatc,
5693 			"Interrupt Cause Tx Abs Timer Expire Count");
5694 
5695 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5696 			CTLFLAG_RD, &adapter->stats.ictxqec,
5697 			"Interrupt Cause Tx Queue Empty Count");
5698 
5699 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5700 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5701 			"Interrupt Cause Tx Queue Min Thresh Count");
5702 
5703 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5704 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5705 			"Interrupt Cause Rx Desc Min Thresh Count");
5706 
5707 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5708 			CTLFLAG_RD, &adapter->stats.icrxoc,
5709 			"Interrupt Cause Receiver Overrun Count");
5710 }
5711 
5712 /**********************************************************************
5713  *
5714  *  This routine provides a way to dump out the adapter eeprom,
5715  *  often a useful debug/service tool. This only dumps the first
5716  *  32 words, stuff that matters is in that extent.
5717  *
5718  **********************************************************************/
5719 static int
5720 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5721 {
5722 	struct adapter *adapter = (struct adapter *)arg1;
5723 	int error;
5724 	int result;
5725 
5726 	result = -1;
5727 	error = sysctl_handle_int(oidp, &result, 0, req);
5728 
5729 	if (error || !req->newptr)
5730 		return (error);
5731 
5732 	/*
5733 	 * This value will cause a hex dump of the
5734 	 * first 32 16-bit words of the EEPROM to
5735 	 * the screen.
5736 	 */
5737 	if (result == 1)
5738 		em_print_nvm_info(adapter);
5739 
5740 	return (error);
5741 }
5742 
5743 static void
5744 em_print_nvm_info(struct adapter *adapter)
5745 {
5746 	u16	eeprom_data;
5747 	int	i, j, row = 0;
5748 
5749 	/* Its a bit crude, but it gets the job done */
5750 	printf("\nInterface EEPROM Dump:\n");
5751 	printf("Offset\n0x0000  ");
5752 	for (i = 0, j = 0; i < 32; i++, j++) {
5753 		if (j == 8) { /* Make the offset block */
5754 			j = 0; ++row;
5755 			printf("\n0x00%x0  ",row);
5756 		}
5757 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5758 		printf("%04x ", eeprom_data);
5759 	}
5760 	printf("\n");
5761 }
5762 
5763 static int
5764 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5765 {
5766 	struct em_int_delay_info *info;
5767 	struct adapter *adapter;
5768 	u32 regval;
5769 	int error, usecs, ticks;
5770 
5771 	info = (struct em_int_delay_info *)arg1;
5772 	usecs = info->value;
5773 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5774 	if (error != 0 || req->newptr == NULL)
5775 		return (error);
5776 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5777 		return (EINVAL);
5778 	info->value = usecs;
5779 	ticks = EM_USECS_TO_TICKS(usecs);
5780 	if (info->offset == E1000_ITR)	/* units are 256ns here */
5781 		ticks *= 4;
5782 
5783 	adapter = info->adapter;
5784 
5785 	EM_CORE_LOCK(adapter);
5786 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5787 	regval = (regval & ~0xffff) | (ticks & 0xffff);
5788 	/* Handle a few special cases. */
5789 	switch (info->offset) {
5790 	case E1000_RDTR:
5791 		break;
5792 	case E1000_TIDV:
5793 		if (ticks == 0) {
5794 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5795 			/* Don't write 0 into the TIDV register. */
5796 			regval++;
5797 		} else
5798 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5799 		break;
5800 	}
5801 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5802 	EM_CORE_UNLOCK(adapter);
5803 	return (0);
5804 }
5805 
5806 static void
5807 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5808 	const char *description, struct em_int_delay_info *info,
5809 	int offset, int value)
5810 {
5811 	info->adapter = adapter;
5812 	info->offset = offset;
5813 	info->value = value;
5814 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5815 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5816 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5817 	    info, 0, em_sysctl_int_delay, "I", description);
5818 }
5819 
5820 static void
5821 em_set_sysctl_value(struct adapter *adapter, const char *name,
5822 	const char *description, int *limit, int value)
5823 {
5824 	*limit = value;
5825 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5826 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5827 	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
5828 }
5829 
5830 
5831 /*
5832 ** Set flow control using sysctl:
5833 ** Flow control values:
5834 **      0 - off
5835 **      1 - rx pause
5836 **      2 - tx pause
5837 **      3 - full
5838 */
5839 static int
5840 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5841 {
5842         int		error;
5843 	static int	input = 3; /* default is full */
5844         struct adapter	*adapter = (struct adapter *) arg1;
5845 
5846         error = sysctl_handle_int(oidp, &input, 0, req);
5847 
5848         if ((error) || (req->newptr == NULL))
5849                 return (error);
5850 
5851 	if (input == adapter->fc) /* no change? */
5852 		return (error);
5853 
5854         switch (input) {
5855                 case e1000_fc_rx_pause:
5856                 case e1000_fc_tx_pause:
5857                 case e1000_fc_full:
5858                 case e1000_fc_none:
5859                         adapter->hw.fc.requested_mode = input;
5860 			adapter->fc = input;
5861                         break;
5862                 default:
5863 			/* Do nothing */
5864 			return (error);
5865         }
5866 
5867         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5868         e1000_force_mac_fc(&adapter->hw);
5869         return (error);
5870 }
5871 
5872 /*
5873 ** Manage Energy Efficient Ethernet:
5874 ** Control values:
5875 **     0/1 - enabled/disabled
5876 */
5877 static int
5878 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5879 {
5880        struct adapter *adapter = (struct adapter *) arg1;
5881        int             error, value;
5882 
5883        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5884        error = sysctl_handle_int(oidp, &value, 0, req);
5885        if (error || req->newptr == NULL)
5886                return (error);
5887        EM_CORE_LOCK(adapter);
5888        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5889        em_init_locked(adapter);
5890        EM_CORE_UNLOCK(adapter);
5891        return (0);
5892 }
5893 
5894 static int
5895 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5896 {
5897 	struct adapter *adapter;
5898 	int error;
5899 	int result;
5900 
5901 	result = -1;
5902 	error = sysctl_handle_int(oidp, &result, 0, req);
5903 
5904 	if (error || !req->newptr)
5905 		return (error);
5906 
5907 	if (result == 1) {
5908 		adapter = (struct adapter *)arg1;
5909 		em_print_debug_info(adapter);
5910         }
5911 
5912 	return (error);
5913 }
5914 
5915 /*
5916 ** This routine is meant to be fluid, add whatever is
5917 ** needed for debugging a problem.  -jfv
5918 */
5919 static void
5920 em_print_debug_info(struct adapter *adapter)
5921 {
5922 	device_t dev = adapter->dev;
5923 	struct tx_ring *txr = adapter->tx_rings;
5924 	struct rx_ring *rxr = adapter->rx_rings;
5925 
5926 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)
5927 		printf("Interface is RUNNING ");
5928 	else
5929 		printf("Interface is NOT RUNNING\n");
5930 
5931 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE)
5932 		printf("and INACTIVE\n");
5933 	else
5934 		printf("and ACTIVE\n");
5935 
5936 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5937 		device_printf(dev, "TX Queue %d ------\n", i);
5938 		device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5939 	    		E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
5940 	    		E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
5941 		device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
5942 		device_printf(dev, "TX descriptors avail = %d\n",
5943 	    		txr->tx_avail);
5944 		device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5945 	    		txr->no_desc_avail);
5946 		device_printf(dev, "RX Queue %d ------\n", i);
5947 		device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5948 	    		E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
5949 	    		E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
5950 		device_printf(dev, "RX discarded packets = %ld\n",
5951 	    		rxr->rx_discarded);
5952 		device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5953 		device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5954 	}
5955 }
5956 
5957 #ifdef EM_MULTIQUEUE
5958 /*
5959  * 82574 only:
5960  * Write a new value to the EEPROM increasing the number of MSIX
5961  * vectors from 3 to 5, for proper multiqueue support.
5962  */
5963 static void
5964 em_enable_vectors_82574(struct adapter *adapter)
5965 {
5966 	struct e1000_hw *hw = &adapter->hw;
5967 	device_t dev = adapter->dev;
5968 	u16 edata;
5969 
5970 	e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
5971 	printf("Current cap: %#06x\n", edata);
5972 	if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
5973 		device_printf(dev, "Writing to eeprom: increasing "
5974 		    "reported MSIX vectors from 3 to 5...\n");
5975 		edata &= ~(EM_NVM_MSIX_N_MASK);
5976 		edata |= 4 << EM_NVM_MSIX_N_SHIFT;
5977 		e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
5978 		e1000_update_nvm_checksum(hw);
5979 		device_printf(dev, "Writing to eeprom: done\n");
5980 	}
5981 }
5982 #endif
5983 
5984 #ifdef DDB
5985 DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
5986 {
5987 	devclass_t	dc;
5988 	int max_em;
5989 
5990 	dc = devclass_find("em");
5991 	max_em = devclass_get_maxunit(dc);
5992 
5993 	for (int index = 0; index < (max_em - 1); index++) {
5994 		device_t dev;
5995 		dev = devclass_get_device(dc, index);
5996 		if (device_get_driver(dev) == &em_driver) {
5997 			struct adapter *adapter = device_get_softc(dev);
5998 			em_init_locked(adapter);
5999 		}
6000 	}
6001 }
6002 DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6003 {
6004 	devclass_t	dc;
6005 	int max_em;
6006 
6007 	dc = devclass_find("em");
6008 	max_em = devclass_get_maxunit(dc);
6009 
6010 	for (int index = 0; index < (max_em - 1); index++) {
6011 		device_t dev;
6012 		dev = devclass_get_device(dc, index);
6013 		if (device_get_driver(dev) == &em_driver)
6014 			em_print_debug_info(device_get_softc(dev));
6015 	}
6016 
6017 }
6018 #endif
6019