xref: /freebsd/sys/dev/e1000/if_em.c (revision 1f4bcc459a76b7aa664f3fd557684cd0ba6da352)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2015, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #include "opt_em.h"
36 #include "opt_ddb.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #endif
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #ifdef DDB
47 #include <sys/types.h>
48 #include <ddb/ddb.h>
49 #endif
50 #if __FreeBSD_version >= 800000
51 #include <sys/buf_ring.h>
52 #endif
53 #include <sys/bus.h>
54 #include <sys/endian.h>
55 #include <sys/kernel.h>
56 #include <sys/kthread.h>
57 #include <sys/malloc.h>
58 #include <sys/mbuf.h>
59 #include <sys/module.h>
60 #include <sys/rman.h>
61 #include <sys/smp.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/sysctl.h>
65 #include <sys/taskqueue.h>
66 #include <sys/eventhandler.h>
67 #include <machine/bus.h>
68 #include <machine/resource.h>
69 
70 #include <net/bpf.h>
71 #include <net/ethernet.h>
72 #include <net/if.h>
73 #include <net/if_var.h>
74 #include <net/if_arp.h>
75 #include <net/if_dl.h>
76 #include <net/if_media.h>
77 
78 #include <net/if_types.h>
79 #include <net/if_vlan_var.h>
80 
81 #include <netinet/in_systm.h>
82 #include <netinet/in.h>
83 #include <netinet/if_ether.h>
84 #include <netinet/ip.h>
85 #include <netinet/ip6.h>
86 #include <netinet/tcp.h>
87 #include <netinet/udp.h>
88 
89 #include <machine/in_cksum.h>
90 #include <dev/led/led.h>
91 #include <dev/pci/pcivar.h>
92 #include <dev/pci/pcireg.h>
93 
94 #include "e1000_api.h"
95 #include "e1000_82571.h"
96 #include "if_em.h"
97 
98 /*********************************************************************
99  *  Set this to one to display debug statistics
100  *********************************************************************/
101 int	em_display_debug_stats = 0;
102 
103 /*********************************************************************
104  *  Driver version:
105  *********************************************************************/
106 char em_driver_version[] = "7.4.2";
107 
108 /*********************************************************************
109  *  PCI Device ID Table
110  *
111  *  Used by probe to select devices to load on
112  *  Last field stores an index into e1000_strings
113  *  Last entry must be all 0s
114  *
115  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
116  *********************************************************************/
117 
118 static em_vendor_info_t em_vendor_info_array[] =
119 {
120 	/* Intel(R) PRO/1000 Network Connection */
121 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
122 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
123 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
124 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
125 						PCI_ANY_ID, PCI_ANY_ID, 0},
126 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
127 						PCI_ANY_ID, PCI_ANY_ID, 0},
128 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
129 						PCI_ANY_ID, PCI_ANY_ID, 0},
130 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
131 						PCI_ANY_ID, PCI_ANY_ID, 0},
132 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
133 						PCI_ANY_ID, PCI_ANY_ID, 0},
134 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
135 						PCI_ANY_ID, PCI_ANY_ID, 0},
136 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
138 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
139 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
140 
141 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
142 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
143 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
144 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
145 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
146 						PCI_ANY_ID, PCI_ANY_ID, 0},
147 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
148 						PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
150 						PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
152 						PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
175 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
176 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
177 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
178 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
179 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
180 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
181 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
182 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
183 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
184 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
185 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
186 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
187 						PCI_ANY_ID, PCI_ANY_ID, 0},
188 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
189 						PCI_ANY_ID, PCI_ANY_ID, 0},
190 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM2,	PCI_ANY_ID, PCI_ANY_ID, 0},
191 	{ 0x8086, E1000_DEV_ID_PCH_I218_V2,	PCI_ANY_ID, PCI_ANY_ID, 0},
192 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM3,	PCI_ANY_ID, PCI_ANY_ID, 0},
193 	{ 0x8086, E1000_DEV_ID_PCH_I218_V3,	PCI_ANY_ID, PCI_ANY_ID, 0},
194 	/* required last entry */
195 	{ 0, 0, 0, 0, 0}
196 };
197 
198 /*********************************************************************
199  *  Table of branding strings for all supported NICs.
200  *********************************************************************/
201 
202 static char *em_strings[] = {
203 	"Intel(R) PRO/1000 Network Connection"
204 };
205 
206 /*********************************************************************
207  *  Function prototypes
208  *********************************************************************/
209 static int	em_probe(device_t);
210 static int	em_attach(device_t);
211 static int	em_detach(device_t);
212 static int	em_shutdown(device_t);
213 static int	em_suspend(device_t);
214 static int	em_resume(device_t);
215 #ifdef EM_MULTIQUEUE
216 static int	em_mq_start(if_t, struct mbuf *);
217 static int	em_mq_start_locked(if_t,
218 		    struct tx_ring *);
219 static void	em_qflush(if_t);
220 #else
221 static void	em_start(if_t);
222 static void	em_start_locked(if_t, struct tx_ring *);
223 #endif
224 static int	em_ioctl(if_t, u_long, caddr_t);
225 static uint64_t	em_get_counter(if_t, ift_counter);
226 static void	em_init(void *);
227 static void	em_init_locked(struct adapter *);
228 static void	em_stop(void *);
229 static void	em_media_status(if_t, struct ifmediareq *);
230 static int	em_media_change(if_t);
231 static void	em_identify_hardware(struct adapter *);
232 static int	em_allocate_pci_resources(struct adapter *);
233 static int	em_allocate_legacy(struct adapter *);
234 static int	em_allocate_msix(struct adapter *);
235 static int	em_allocate_queues(struct adapter *);
236 static int	em_setup_msix(struct adapter *);
237 static void	em_free_pci_resources(struct adapter *);
238 static void	em_local_timer(void *);
239 static void	em_reset(struct adapter *);
240 static int	em_setup_interface(device_t, struct adapter *);
241 
242 static void	em_setup_transmit_structures(struct adapter *);
243 static void	em_initialize_transmit_unit(struct adapter *);
244 static int	em_allocate_transmit_buffers(struct tx_ring *);
245 static void	em_free_transmit_structures(struct adapter *);
246 static void	em_free_transmit_buffers(struct tx_ring *);
247 
248 static int	em_setup_receive_structures(struct adapter *);
249 static int	em_allocate_receive_buffers(struct rx_ring *);
250 static void	em_initialize_receive_unit(struct adapter *);
251 static void	em_free_receive_structures(struct adapter *);
252 static void	em_free_receive_buffers(struct rx_ring *);
253 
254 static void	em_enable_intr(struct adapter *);
255 static void	em_disable_intr(struct adapter *);
256 static void	em_update_stats_counters(struct adapter *);
257 static void	em_add_hw_stats(struct adapter *adapter);
258 static void	em_txeof(struct tx_ring *);
259 static bool	em_rxeof(struct rx_ring *, int, int *);
260 #ifndef __NO_STRICT_ALIGNMENT
261 static int	em_fixup_rx(struct rx_ring *);
262 #endif
263 static void	em_setup_rxdesc(union e1000_rx_desc_extended *,
264 		    const struct em_rxbuffer *rxbuf);
265 static void	em_receive_checksum(uint32_t status, struct mbuf *);
266 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
267 		    struct ip *, u32 *, u32 *);
268 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
269 		    struct tcphdr *, u32 *, u32 *);
270 static void	em_set_promisc(struct adapter *);
271 static void	em_disable_promisc(struct adapter *);
272 static void	em_set_multi(struct adapter *);
273 static void	em_update_link_status(struct adapter *);
274 static void	em_refresh_mbufs(struct rx_ring *, int);
275 static void	em_register_vlan(void *, if_t, u16);
276 static void	em_unregister_vlan(void *, if_t, u16);
277 static void	em_setup_vlan_hw_support(struct adapter *);
278 static int	em_xmit(struct tx_ring *, struct mbuf **);
279 static int	em_dma_malloc(struct adapter *, bus_size_t,
280 		    struct em_dma_alloc *, int);
281 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
282 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
283 static void	em_print_nvm_info(struct adapter *);
284 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
285 static void	em_print_debug_info(struct adapter *);
286 static int 	em_is_valid_ether_addr(u8 *);
287 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
288 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
289 		    const char *, struct em_int_delay_info *, int, int);
290 /* Management and WOL Support */
291 static void	em_init_manageability(struct adapter *);
292 static void	em_release_manageability(struct adapter *);
293 static void     em_get_hw_control(struct adapter *);
294 static void     em_release_hw_control(struct adapter *);
295 static void	em_get_wakeup(device_t);
296 static void     em_enable_wakeup(device_t);
297 static int	em_enable_phy_wakeup(struct adapter *);
298 static void	em_led_func(void *, int);
299 static void	em_disable_aspm(struct adapter *);
300 
301 static int	em_irq_fast(void *);
302 
303 /* MSIX handlers */
304 static void	em_msix_tx(void *);
305 static void	em_msix_rx(void *);
306 static void	em_msix_link(void *);
307 static void	em_handle_tx(void *context, int pending);
308 static void	em_handle_rx(void *context, int pending);
309 static void	em_handle_link(void *context, int pending);
310 
311 #ifdef EM_MULTIQUEUE
312 static void	em_enable_vectors_82574(struct adapter *);
313 #endif
314 
315 static void	em_set_sysctl_value(struct adapter *, const char *,
316 		    const char *, int *, int);
317 static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
318 static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
319 
320 static __inline void em_rx_discard(struct rx_ring *, int);
321 
322 #ifdef DEVICE_POLLING
323 static poll_handler_t em_poll;
324 #endif /* POLLING */
325 
326 /*********************************************************************
327  *  FreeBSD Device Interface Entry Points
328  *********************************************************************/
329 
330 static device_method_t em_methods[] = {
331 	/* Device interface */
332 	DEVMETHOD(device_probe, em_probe),
333 	DEVMETHOD(device_attach, em_attach),
334 	DEVMETHOD(device_detach, em_detach),
335 	DEVMETHOD(device_shutdown, em_shutdown),
336 	DEVMETHOD(device_suspend, em_suspend),
337 	DEVMETHOD(device_resume, em_resume),
338 	DEVMETHOD_END
339 };
340 
341 static driver_t em_driver = {
342 	"em", em_methods, sizeof(struct adapter),
343 };
344 
345 devclass_t em_devclass;
346 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
347 MODULE_DEPEND(em, pci, 1, 1, 1);
348 MODULE_DEPEND(em, ether, 1, 1, 1);
349 #ifdef DEV_NETMAP
350 MODULE_DEPEND(em, netmap, 1, 1, 1);
351 #endif /* DEV_NETMAP */
352 
353 /*********************************************************************
354  *  Tunable default values.
355  *********************************************************************/
356 
357 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
358 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
359 #define M_TSO_LEN			66
360 
361 #define MAX_INTS_PER_SEC	8000
362 #define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
363 
364 /* Allow common code without TSO */
365 #ifndef CSUM_TSO
366 #define CSUM_TSO	0
367 #endif
368 
369 #define TSO_WORKAROUND	4
370 
371 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
372 
373 static int em_disable_crc_stripping = 0;
374 SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
375     &em_disable_crc_stripping, 0, "Disable CRC Stripping");
376 
377 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
378 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
379 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
380     0, "Default transmit interrupt delay in usecs");
381 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
382     0, "Default receive interrupt delay in usecs");
383 
384 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
385 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
386 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
387     &em_tx_abs_int_delay_dflt, 0,
388     "Default transmit interrupt delay limit in usecs");
389 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
390     &em_rx_abs_int_delay_dflt, 0,
391     "Default receive interrupt delay limit in usecs");
392 
393 static int em_rxd = EM_DEFAULT_RXD;
394 static int em_txd = EM_DEFAULT_TXD;
395 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
396     "Number of receive descriptors per queue");
397 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
398     "Number of transmit descriptors per queue");
399 
400 static int em_smart_pwr_down = FALSE;
401 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
402     0, "Set to true to leave smart power down enabled on newer adapters");
403 
404 /* Controls whether promiscuous also shows bad packets */
405 static int em_debug_sbp = FALSE;
406 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
407     "Show bad packets in promiscuous mode");
408 
409 static int em_enable_msix = TRUE;
410 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
411     "Enable MSI-X interrupts");
412 
413 #ifdef EM_MULTIQUEUE
414 static int em_num_queues = 1;
415 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
416     "82574 only: Number of queues to configure, 0 indicates autoconfigure");
417 #endif
418 
419 /*
420 ** Global variable to store last used CPU when binding queues
421 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
422 ** queue is bound to a cpu.
423 */
424 static int em_last_bind_cpu = -1;
425 
426 /* How many packets rxeof tries to clean at a time */
427 static int em_rx_process_limit = 100;
428 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
429     &em_rx_process_limit, 0,
430     "Maximum number of received packets to process "
431     "at a time, -1 means unlimited");
432 
433 /* Energy efficient ethernet - default to OFF */
434 static int eee_setting = 1;
435 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
436     "Enable Energy Efficient Ethernet");
437 
438 /* Global used in WOL setup with multiport cards */
439 static int global_quad_port_a = 0;
440 
441 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
442 #include <dev/netmap/if_em_netmap.h>
443 #endif /* DEV_NETMAP */
444 
445 /*********************************************************************
446  *  Device identification routine
447  *
448  *  em_probe determines if the driver should be loaded on
449  *  adapter based on PCI vendor/device id of the adapter.
450  *
451  *  return BUS_PROBE_DEFAULT on success, positive on failure
452  *********************************************************************/
453 
454 static int
455 em_probe(device_t dev)
456 {
457 	char		adapter_name[60];
458 	uint16_t	pci_vendor_id = 0;
459 	uint16_t	pci_device_id = 0;
460 	uint16_t	pci_subvendor_id = 0;
461 	uint16_t	pci_subdevice_id = 0;
462 	em_vendor_info_t *ent;
463 
464 	INIT_DEBUGOUT("em_probe: begin");
465 
466 	pci_vendor_id = pci_get_vendor(dev);
467 	if (pci_vendor_id != EM_VENDOR_ID)
468 		return (ENXIO);
469 
470 	pci_device_id = pci_get_device(dev);
471 	pci_subvendor_id = pci_get_subvendor(dev);
472 	pci_subdevice_id = pci_get_subdevice(dev);
473 
474 	ent = em_vendor_info_array;
475 	while (ent->vendor_id != 0) {
476 		if ((pci_vendor_id == ent->vendor_id) &&
477 		    (pci_device_id == ent->device_id) &&
478 
479 		    ((pci_subvendor_id == ent->subvendor_id) ||
480 		    (ent->subvendor_id == PCI_ANY_ID)) &&
481 
482 		    ((pci_subdevice_id == ent->subdevice_id) ||
483 		    (ent->subdevice_id == PCI_ANY_ID))) {
484 			sprintf(adapter_name, "%s %s",
485 				em_strings[ent->index],
486 				em_driver_version);
487 			device_set_desc_copy(dev, adapter_name);
488 			return (BUS_PROBE_DEFAULT);
489 		}
490 		ent++;
491 	}
492 
493 	return (ENXIO);
494 }
495 
496 /*********************************************************************
497  *  Device initialization routine
498  *
499  *  The attach entry point is called when the driver is being loaded.
500  *  This routine identifies the type of hardware, allocates all resources
501  *  and initializes the hardware.
502  *
503  *  return 0 on success, positive on failure
504  *********************************************************************/
505 
506 static int
507 em_attach(device_t dev)
508 {
509 	struct adapter	*adapter;
510 	struct e1000_hw	*hw;
511 	int		error = 0;
512 
513 	INIT_DEBUGOUT("em_attach: begin");
514 
515 	if (resource_disabled("em", device_get_unit(dev))) {
516 		device_printf(dev, "Disabled by device hint\n");
517 		return (ENXIO);
518 	}
519 
520 	adapter = device_get_softc(dev);
521 	adapter->dev = adapter->osdep.dev = dev;
522 	hw = &adapter->hw;
523 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
524 
525 	/* SYSCTL stuff */
526 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
527 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
528 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
529 	    em_sysctl_nvm_info, "I", "NVM Information");
530 
531 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
532 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
533 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
534 	    em_sysctl_debug_info, "I", "Debug Information");
535 
536 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
537 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
538 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
539 	    em_set_flowcntl, "I", "Flow Control");
540 
541 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
542 
543 	/* Determine hardware and mac info */
544 	em_identify_hardware(adapter);
545 
546 	/* Setup PCI resources */
547 	if (em_allocate_pci_resources(adapter)) {
548 		device_printf(dev, "Allocation of PCI resources failed\n");
549 		error = ENXIO;
550 		goto err_pci;
551 	}
552 
553 	/*
554 	** For ICH8 and family we need to
555 	** map the flash memory, and this
556 	** must happen after the MAC is
557 	** identified
558 	*/
559 	if ((hw->mac.type == e1000_ich8lan) ||
560 	    (hw->mac.type == e1000_ich9lan) ||
561 	    (hw->mac.type == e1000_ich10lan) ||
562 	    (hw->mac.type == e1000_pchlan) ||
563 	    (hw->mac.type == e1000_pch2lan) ||
564 	    (hw->mac.type == e1000_pch_lpt)) {
565 		int rid = EM_BAR_TYPE_FLASH;
566 		adapter->flash = bus_alloc_resource_any(dev,
567 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
568 		if (adapter->flash == NULL) {
569 			device_printf(dev, "Mapping of Flash failed\n");
570 			error = ENXIO;
571 			goto err_pci;
572 		}
573 		/* This is used in the shared code */
574 		hw->flash_address = (u8 *)adapter->flash;
575 		adapter->osdep.flash_bus_space_tag =
576 		    rman_get_bustag(adapter->flash);
577 		adapter->osdep.flash_bus_space_handle =
578 		    rman_get_bushandle(adapter->flash);
579 	}
580 
581 	/* Do Shared Code initialization */
582 	if (e1000_setup_init_funcs(hw, TRUE)) {
583 		device_printf(dev, "Setup of Shared code failed\n");
584 		error = ENXIO;
585 		goto err_pci;
586 	}
587 
588 	/*
589 	 * Setup MSI/X or MSI if PCI Express
590 	 */
591 	adapter->msix = em_setup_msix(adapter);
592 
593 	e1000_get_bus_info(hw);
594 
595 	/* Set up some sysctls for the tunable interrupt delays */
596 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
597 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
598 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
599 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
600 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
601 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
602 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
603 	    "receive interrupt delay limit in usecs",
604 	    &adapter->rx_abs_int_delay,
605 	    E1000_REGISTER(hw, E1000_RADV),
606 	    em_rx_abs_int_delay_dflt);
607 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
608 	    "transmit interrupt delay limit in usecs",
609 	    &adapter->tx_abs_int_delay,
610 	    E1000_REGISTER(hw, E1000_TADV),
611 	    em_tx_abs_int_delay_dflt);
612 	em_add_int_delay_sysctl(adapter, "itr",
613 	    "interrupt delay limit in usecs/4",
614 	    &adapter->tx_itr,
615 	    E1000_REGISTER(hw, E1000_ITR),
616 	    DEFAULT_ITR);
617 
618 	/* Sysctl for limiting the amount of work done in the taskqueue */
619 	em_set_sysctl_value(adapter, "rx_processing_limit",
620 	    "max number of rx packets to process", &adapter->rx_process_limit,
621 	    em_rx_process_limit);
622 
623 	/*
624 	 * Validate number of transmit and receive descriptors. It
625 	 * must not exceed hardware maximum, and must be multiple
626 	 * of E1000_DBA_ALIGN.
627 	 */
628 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
629 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
630 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
631 		    EM_DEFAULT_TXD, em_txd);
632 		adapter->num_tx_desc = EM_DEFAULT_TXD;
633 	} else
634 		adapter->num_tx_desc = em_txd;
635 
636 	if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 ||
637 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
638 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
639 		    EM_DEFAULT_RXD, em_rxd);
640 		adapter->num_rx_desc = EM_DEFAULT_RXD;
641 	} else
642 		adapter->num_rx_desc = em_rxd;
643 
644 	hw->mac.autoneg = DO_AUTO_NEG;
645 	hw->phy.autoneg_wait_to_complete = FALSE;
646 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
647 
648 	/* Copper options */
649 	if (hw->phy.media_type == e1000_media_type_copper) {
650 		hw->phy.mdix = AUTO_ALL_MODES;
651 		hw->phy.disable_polarity_correction = FALSE;
652 		hw->phy.ms_type = EM_MASTER_SLAVE;
653 	}
654 
655 	/*
656 	 * Set the frame limits assuming
657 	 * standard ethernet sized frames.
658 	 */
659 	adapter->hw.mac.max_frame_size =
660 	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
661 
662 	/*
663 	 * This controls when hardware reports transmit completion
664 	 * status.
665 	 */
666 	hw->mac.report_tx_early = 1;
667 
668 	/*
669 	** Get queue/ring memory
670 	*/
671 	if (em_allocate_queues(adapter)) {
672 		error = ENOMEM;
673 		goto err_pci;
674 	}
675 
676 	/* Allocate multicast array memory. */
677 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
678 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
679 	if (adapter->mta == NULL) {
680 		device_printf(dev, "Can not allocate multicast setup array\n");
681 		error = ENOMEM;
682 		goto err_late;
683 	}
684 
685 	/* Check SOL/IDER usage */
686 	if (e1000_check_reset_block(hw))
687 		device_printf(dev, "PHY reset is blocked"
688 		    " due to SOL/IDER session.\n");
689 
690 	/* Sysctl for setting Energy Efficient Ethernet */
691 	hw->dev_spec.ich8lan.eee_disable = eee_setting;
692 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
693 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
694 	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
695 	    adapter, 0, em_sysctl_eee, "I",
696 	    "Disable Energy Efficient Ethernet");
697 
698 	/*
699 	** Start from a known state, this is
700 	** important in reading the nvm and
701 	** mac from that.
702 	*/
703 	e1000_reset_hw(hw);
704 
705 
706 	/* Make sure we have a good EEPROM before we read from it */
707 	if (e1000_validate_nvm_checksum(hw) < 0) {
708 		/*
709 		** Some PCI-E parts fail the first check due to
710 		** the link being in sleep state, call it again,
711 		** if it fails a second time its a real issue.
712 		*/
713 		if (e1000_validate_nvm_checksum(hw) < 0) {
714 			device_printf(dev,
715 			    "The EEPROM Checksum Is Not Valid\n");
716 			error = EIO;
717 			goto err_late;
718 		}
719 	}
720 
721 	/* Copy the permanent MAC address out of the EEPROM */
722 	if (e1000_read_mac_addr(hw) < 0) {
723 		device_printf(dev, "EEPROM read error while reading MAC"
724 		    " address\n");
725 		error = EIO;
726 		goto err_late;
727 	}
728 
729 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
730 		device_printf(dev, "Invalid MAC address\n");
731 		error = EIO;
732 		goto err_late;
733 	}
734 
735 	/* Disable ULP support */
736 	e1000_disable_ulp_lpt_lp(hw, TRUE);
737 
738 	/*
739 	**  Do interrupt configuration
740 	*/
741 	if (adapter->msix > 1) /* Do MSIX */
742 		error = em_allocate_msix(adapter);
743 	else  /* MSI or Legacy */
744 		error = em_allocate_legacy(adapter);
745 	if (error)
746 		goto err_late;
747 
748 	/*
749 	 * Get Wake-on-Lan and Management info for later use
750 	 */
751 	em_get_wakeup(dev);
752 
753 	/* Setup OS specific network interface */
754 	if (em_setup_interface(dev, adapter) != 0)
755 		goto err_late;
756 
757 	em_reset(adapter);
758 
759 	/* Initialize statistics */
760 	em_update_stats_counters(adapter);
761 
762 	hw->mac.get_link_status = 1;
763 	em_update_link_status(adapter);
764 
765 	/* Register for VLAN events */
766 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
767 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
768 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
769 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
770 
771 	em_add_hw_stats(adapter);
772 
773 	/* Non-AMT based hardware can now take control from firmware */
774 	if (adapter->has_manage && !adapter->has_amt)
775 		em_get_hw_control(adapter);
776 
777 	/* Tell the stack that the interface is not active */
778 	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
779 
780 	adapter->led_dev = led_create(em_led_func, adapter,
781 	    device_get_nameunit(dev));
782 #ifdef DEV_NETMAP
783 	em_netmap_attach(adapter);
784 #endif /* DEV_NETMAP */
785 
786 	INIT_DEBUGOUT("em_attach: end");
787 
788 	return (0);
789 
790 err_late:
791 	em_free_transmit_structures(adapter);
792 	em_free_receive_structures(adapter);
793 	em_release_hw_control(adapter);
794 	if (adapter->ifp != (void *)NULL)
795 		if_free(adapter->ifp);
796 err_pci:
797 	em_free_pci_resources(adapter);
798 	free(adapter->mta, M_DEVBUF);
799 	EM_CORE_LOCK_DESTROY(adapter);
800 
801 	return (error);
802 }
803 
804 /*********************************************************************
805  *  Device removal routine
806  *
807  *  The detach entry point is called when the driver is being removed.
808  *  This routine stops the adapter and deallocates all the resources
809  *  that were allocated for driver operation.
810  *
811  *  return 0 on success, positive on failure
812  *********************************************************************/
813 
814 static int
815 em_detach(device_t dev)
816 {
817 	struct adapter	*adapter = device_get_softc(dev);
818 	if_t ifp = adapter->ifp;
819 
820 	INIT_DEBUGOUT("em_detach: begin");
821 
822 	/* Make sure VLANS are not using driver */
823 	if (if_vlantrunkinuse(ifp)) {
824 		device_printf(dev,"Vlan in use, detach first\n");
825 		return (EBUSY);
826 	}
827 
828 #ifdef DEVICE_POLLING
829 	if (if_getcapenable(ifp) & IFCAP_POLLING)
830 		ether_poll_deregister(ifp);
831 #endif
832 
833 	if (adapter->led_dev != NULL)
834 		led_destroy(adapter->led_dev);
835 
836 	EM_CORE_LOCK(adapter);
837 	adapter->in_detach = 1;
838 	em_stop(adapter);
839 	EM_CORE_UNLOCK(adapter);
840 	EM_CORE_LOCK_DESTROY(adapter);
841 
842 	e1000_phy_hw_reset(&adapter->hw);
843 
844 	em_release_manageability(adapter);
845 	em_release_hw_control(adapter);
846 
847 	/* Unregister VLAN events */
848 	if (adapter->vlan_attach != NULL)
849 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
850 	if (adapter->vlan_detach != NULL)
851 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
852 
853 	ether_ifdetach(adapter->ifp);
854 	callout_drain(&adapter->timer);
855 
856 #ifdef DEV_NETMAP
857 	netmap_detach(ifp);
858 #endif /* DEV_NETMAP */
859 
860 	em_free_pci_resources(adapter);
861 	bus_generic_detach(dev);
862 	if_free(ifp);
863 
864 	em_free_transmit_structures(adapter);
865 	em_free_receive_structures(adapter);
866 
867 	em_release_hw_control(adapter);
868 	free(adapter->mta, M_DEVBUF);
869 
870 	return (0);
871 }
872 
873 /*********************************************************************
874  *
875  *  Shutdown entry point
876  *
877  **********************************************************************/
878 
879 static int
880 em_shutdown(device_t dev)
881 {
882 	return em_suspend(dev);
883 }
884 
885 /*
886  * Suspend/resume device methods.
887  */
888 static int
889 em_suspend(device_t dev)
890 {
891 	struct adapter *adapter = device_get_softc(dev);
892 
893 	EM_CORE_LOCK(adapter);
894 
895         em_release_manageability(adapter);
896 	em_release_hw_control(adapter);
897 	em_enable_wakeup(dev);
898 
899 	EM_CORE_UNLOCK(adapter);
900 
901 	return bus_generic_suspend(dev);
902 }
903 
904 static int
905 em_resume(device_t dev)
906 {
907 	struct adapter *adapter = device_get_softc(dev);
908 	struct tx_ring	*txr = adapter->tx_rings;
909 	if_t ifp = adapter->ifp;
910 
911 	EM_CORE_LOCK(adapter);
912 	if (adapter->hw.mac.type == e1000_pch2lan)
913 		e1000_resume_workarounds_pchlan(&adapter->hw);
914 	em_init_locked(adapter);
915 	em_init_manageability(adapter);
916 
917 	if ((if_getflags(ifp) & IFF_UP) &&
918 	    (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) {
919 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
920 			EM_TX_LOCK(txr);
921 #ifdef EM_MULTIQUEUE
922 			if (!drbr_empty(ifp, txr->br))
923 				em_mq_start_locked(ifp, txr);
924 #else
925 			if (!if_sendq_empty(ifp))
926 				em_start_locked(ifp, txr);
927 #endif
928 			EM_TX_UNLOCK(txr);
929 		}
930 	}
931 	EM_CORE_UNLOCK(adapter);
932 
933 	return bus_generic_resume(dev);
934 }
935 
936 
937 #ifndef EM_MULTIQUEUE
938 static void
939 em_start_locked(if_t ifp, struct tx_ring *txr)
940 {
941 	struct adapter	*adapter = if_getsoftc(ifp);
942 	struct mbuf	*m_head;
943 
944 	EM_TX_LOCK_ASSERT(txr);
945 
946 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
947 	    IFF_DRV_RUNNING)
948 		return;
949 
950 	if (!adapter->link_active)
951 		return;
952 
953 	while (!if_sendq_empty(ifp)) {
954         	/* Call cleanup if number of TX descriptors low */
955 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
956 			em_txeof(txr);
957 		if (txr->tx_avail < EM_MAX_SCATTER) {
958 			if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0);
959 			break;
960 		}
961 		m_head = if_dequeue(ifp);
962 		if (m_head == NULL)
963 			break;
964 		/*
965 		 *  Encapsulation can modify our pointer, and or make it
966 		 *  NULL on failure.  In that event, we can't requeue.
967 		 */
968 		if (em_xmit(txr, &m_head)) {
969 			if (m_head == NULL)
970 				break;
971 			if_sendq_prepend(ifp, m_head);
972 			break;
973 		}
974 
975 		/* Mark the queue as having work */
976 		if (txr->busy == EM_TX_IDLE)
977 			txr->busy = EM_TX_BUSY;
978 
979 		/* Send a copy of the frame to the BPF listener */
980 		ETHER_BPF_MTAP(ifp, m_head);
981 
982 	}
983 
984 	return;
985 }
986 
987 static void
988 em_start(if_t ifp)
989 {
990 	struct adapter	*adapter = if_getsoftc(ifp);
991 	struct tx_ring	*txr = adapter->tx_rings;
992 
993 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
994 		EM_TX_LOCK(txr);
995 		em_start_locked(ifp, txr);
996 		EM_TX_UNLOCK(txr);
997 	}
998 	return;
999 }
1000 #else /* EM_MULTIQUEUE */
1001 /*********************************************************************
1002  *  Multiqueue Transmit routines
1003  *
1004  *  em_mq_start is called by the stack to initiate a transmit.
1005  *  however, if busy the driver can queue the request rather
1006  *  than do an immediate send. It is this that is an advantage
1007  *  in this driver, rather than also having multiple tx queues.
1008  **********************************************************************/
1009 /*
1010 ** Multiqueue capable stack interface
1011 */
1012 static int
1013 em_mq_start(if_t ifp, struct mbuf *m)
1014 {
1015 	struct adapter	*adapter = if_getsoftc(ifp);
1016 	struct tx_ring	*txr = adapter->tx_rings;
1017 	unsigned int	i, error;
1018 
1019 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1020 		i = m->m_pkthdr.flowid % adapter->num_queues;
1021 	else
1022 		i = curcpu % adapter->num_queues;
1023 
1024 	txr = &adapter->tx_rings[i];
1025 
1026 	error = drbr_enqueue(ifp, txr->br, m);
1027 	if (error)
1028 		return (error);
1029 
1030 	if (EM_TX_TRYLOCK(txr)) {
1031 		em_mq_start_locked(ifp, txr);
1032 		EM_TX_UNLOCK(txr);
1033 	} else
1034 		taskqueue_enqueue(txr->tq, &txr->tx_task);
1035 
1036 	return (0);
1037 }
1038 
1039 static int
1040 em_mq_start_locked(if_t ifp, struct tx_ring *txr)
1041 {
1042 	struct adapter  *adapter = txr->adapter;
1043         struct mbuf     *next;
1044         int             err = 0, enq = 0;
1045 
1046 	EM_TX_LOCK_ASSERT(txr);
1047 
1048 	if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) ||
1049 	    adapter->link_active == 0) {
1050 		return (ENETDOWN);
1051 	}
1052 
1053 	/* Process the queue */
1054 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1055 		if ((err = em_xmit(txr, &next)) != 0) {
1056 			if (next == NULL) {
1057 				/* It was freed, move forward */
1058 				drbr_advance(ifp, txr->br);
1059 			} else {
1060 				/*
1061 				 * Still have one left, it may not be
1062 				 * the same since the transmit function
1063 				 * may have changed it.
1064 				 */
1065 				drbr_putback(ifp, txr->br, next);
1066 			}
1067 			break;
1068 		}
1069 		drbr_advance(ifp, txr->br);
1070 		enq++;
1071 		if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
1072 		if (next->m_flags & M_MCAST)
1073 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
1074 		ETHER_BPF_MTAP(ifp, next);
1075 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
1076                         break;
1077 	}
1078 
1079 	/* Mark the queue as having work */
1080 	if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1081 		txr->busy = EM_TX_BUSY;
1082 
1083 	if (txr->tx_avail < EM_MAX_SCATTER)
1084 		em_txeof(txr);
1085 	if (txr->tx_avail < EM_MAX_SCATTER) {
1086 		if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
1087 	}
1088 	return (err);
1089 }
1090 
1091 /*
1092 ** Flush all ring buffers
1093 */
1094 static void
1095 em_qflush(if_t ifp)
1096 {
1097 	struct adapter  *adapter = if_getsoftc(ifp);
1098 	struct tx_ring  *txr = adapter->tx_rings;
1099 	struct mbuf     *m;
1100 
1101 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1102 		EM_TX_LOCK(txr);
1103 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1104 			m_freem(m);
1105 		EM_TX_UNLOCK(txr);
1106 	}
1107 	if_qflush(ifp);
1108 }
1109 #endif /* EM_MULTIQUEUE */
1110 
1111 /*********************************************************************
1112  *  Ioctl entry point
1113  *
1114  *  em_ioctl is called when the user wants to configure the
1115  *  interface.
1116  *
1117  *  return 0 on success, positive on failure
1118  **********************************************************************/
1119 
1120 static int
1121 em_ioctl(if_t ifp, u_long command, caddr_t data)
1122 {
1123 	struct adapter	*adapter = if_getsoftc(ifp);
1124 	struct ifreq	*ifr = (struct ifreq *)data;
1125 #if defined(INET) || defined(INET6)
1126 	struct ifaddr	*ifa = (struct ifaddr *)data;
1127 #endif
1128 	bool		avoid_reset = FALSE;
1129 	int		error = 0;
1130 
1131 	if (adapter->in_detach)
1132 		return (error);
1133 
1134 	switch (command) {
1135 	case SIOCSIFADDR:
1136 #ifdef INET
1137 		if (ifa->ifa_addr->sa_family == AF_INET)
1138 			avoid_reset = TRUE;
1139 #endif
1140 #ifdef INET6
1141 		if (ifa->ifa_addr->sa_family == AF_INET6)
1142 			avoid_reset = TRUE;
1143 #endif
1144 		/*
1145 		** Calling init results in link renegotiation,
1146 		** so we avoid doing it when possible.
1147 		*/
1148 		if (avoid_reset) {
1149 			if_setflagbits(ifp,IFF_UP,0);
1150 			if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
1151 				em_init(adapter);
1152 #ifdef INET
1153 			if (!(if_getflags(ifp) & IFF_NOARP))
1154 				arp_ifinit(ifp, ifa);
1155 #endif
1156 		} else
1157 			error = ether_ioctl(ifp, command, data);
1158 		break;
1159 	case SIOCSIFMTU:
1160 	    {
1161 		int max_frame_size;
1162 
1163 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1164 
1165 		EM_CORE_LOCK(adapter);
1166 		switch (adapter->hw.mac.type) {
1167 		case e1000_82571:
1168 		case e1000_82572:
1169 		case e1000_ich9lan:
1170 		case e1000_ich10lan:
1171 		case e1000_pch2lan:
1172 		case e1000_pch_lpt:
1173 		case e1000_82574:
1174 		case e1000_82583:
1175 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1176 			max_frame_size = 9234;
1177 			break;
1178 		case e1000_pchlan:
1179 			max_frame_size = 4096;
1180 			break;
1181 			/* Adapters that do not support jumbo frames */
1182 		case e1000_ich8lan:
1183 			max_frame_size = ETHER_MAX_LEN;
1184 			break;
1185 		default:
1186 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1187 		}
1188 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1189 		    ETHER_CRC_LEN) {
1190 			EM_CORE_UNLOCK(adapter);
1191 			error = EINVAL;
1192 			break;
1193 		}
1194 
1195 		if_setmtu(ifp, ifr->ifr_mtu);
1196 		adapter->hw.mac.max_frame_size =
1197 		    if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
1198 		em_init_locked(adapter);
1199 		EM_CORE_UNLOCK(adapter);
1200 		break;
1201 	    }
1202 	case SIOCSIFFLAGS:
1203 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1204 		    SIOCSIFFLAGS (Set Interface Flags)");
1205 		EM_CORE_LOCK(adapter);
1206 		if (if_getflags(ifp) & IFF_UP) {
1207 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1208 				if ((if_getflags(ifp) ^ adapter->if_flags) &
1209 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1210 					em_disable_promisc(adapter);
1211 					em_set_promisc(adapter);
1212 				}
1213 			} else
1214 				em_init_locked(adapter);
1215 		} else
1216 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1217 				em_stop(adapter);
1218 		adapter->if_flags = if_getflags(ifp);
1219 		EM_CORE_UNLOCK(adapter);
1220 		break;
1221 	case SIOCADDMULTI:
1222 	case SIOCDELMULTI:
1223 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1224 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1225 			EM_CORE_LOCK(adapter);
1226 			em_disable_intr(adapter);
1227 			em_set_multi(adapter);
1228 #ifdef DEVICE_POLLING
1229 			if (!(if_getcapenable(ifp) & IFCAP_POLLING))
1230 #endif
1231 				em_enable_intr(adapter);
1232 			EM_CORE_UNLOCK(adapter);
1233 		}
1234 		break;
1235 	case SIOCSIFMEDIA:
1236 		/* Check SOL/IDER usage */
1237 		EM_CORE_LOCK(adapter);
1238 		if (e1000_check_reset_block(&adapter->hw)) {
1239 			EM_CORE_UNLOCK(adapter);
1240 			device_printf(adapter->dev, "Media change is"
1241 			    " blocked due to SOL/IDER session.\n");
1242 			break;
1243 		}
1244 		EM_CORE_UNLOCK(adapter);
1245 		/* falls thru */
1246 	case SIOCGIFMEDIA:
1247 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1248 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1249 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1250 		break;
1251 	case SIOCSIFCAP:
1252 	    {
1253 		int mask, reinit;
1254 
1255 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1256 		reinit = 0;
1257 		mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
1258 #ifdef DEVICE_POLLING
1259 		if (mask & IFCAP_POLLING) {
1260 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1261 				error = ether_poll_register(em_poll, ifp);
1262 				if (error)
1263 					return (error);
1264 				EM_CORE_LOCK(adapter);
1265 				em_disable_intr(adapter);
1266 				if_setcapenablebit(ifp, IFCAP_POLLING, 0);
1267 				EM_CORE_UNLOCK(adapter);
1268 			} else {
1269 				error = ether_poll_deregister(ifp);
1270 				/* Enable interrupt even in error case */
1271 				EM_CORE_LOCK(adapter);
1272 				em_enable_intr(adapter);
1273 				if_setcapenablebit(ifp, 0, IFCAP_POLLING);
1274 				EM_CORE_UNLOCK(adapter);
1275 			}
1276 		}
1277 #endif
1278 		if (mask & IFCAP_HWCSUM) {
1279 			if_togglecapenable(ifp,IFCAP_HWCSUM);
1280 			reinit = 1;
1281 		}
1282 		if (mask & IFCAP_TSO4) {
1283 			if_togglecapenable(ifp,IFCAP_TSO4);
1284 			reinit = 1;
1285 		}
1286 		if (mask & IFCAP_VLAN_HWTAGGING) {
1287 			if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING);
1288 			reinit = 1;
1289 		}
1290 		if (mask & IFCAP_VLAN_HWFILTER) {
1291 			if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1292 			reinit = 1;
1293 		}
1294 		if (mask & IFCAP_VLAN_HWTSO) {
1295 			if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1296 			reinit = 1;
1297 		}
1298 		if ((mask & IFCAP_WOL) &&
1299 		    (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
1300 			if (mask & IFCAP_WOL_MCAST)
1301 				if_togglecapenable(ifp, IFCAP_WOL_MCAST);
1302 			if (mask & IFCAP_WOL_MAGIC)
1303 				if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
1304 		}
1305 		if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1306 			em_init(adapter);
1307 		if_vlancap(ifp);
1308 		break;
1309 	    }
1310 
1311 	default:
1312 		error = ether_ioctl(ifp, command, data);
1313 		break;
1314 	}
1315 
1316 	return (error);
1317 }
1318 
1319 
1320 /*********************************************************************
1321  *  Init entry point
1322  *
1323  *  This routine is used in two ways. It is used by the stack as
1324  *  init entry point in network interface structure. It is also used
1325  *  by the driver as a hw/sw initialization routine to get to a
1326  *  consistent state.
1327  *
1328  *  return 0 on success, positive on failure
1329  **********************************************************************/
1330 
1331 static void
1332 em_init_locked(struct adapter *adapter)
1333 {
1334 	if_t ifp = adapter->ifp;
1335 	device_t	dev = adapter->dev;
1336 
1337 	INIT_DEBUGOUT("em_init: begin");
1338 
1339 	EM_CORE_LOCK_ASSERT(adapter);
1340 
1341 	em_disable_intr(adapter);
1342 	callout_stop(&adapter->timer);
1343 
1344 	/* Get the latest mac address, User can use a LAA */
1345         bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
1346               ETHER_ADDR_LEN);
1347 
1348 	/* Put the address into the Receive Address Array */
1349 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1350 
1351 	/*
1352 	 * With the 82571 adapter, RAR[0] may be overwritten
1353 	 * when the other port is reset, we make a duplicate
1354 	 * in RAR[14] for that eventuality, this assures
1355 	 * the interface continues to function.
1356 	 */
1357 	if (adapter->hw.mac.type == e1000_82571) {
1358 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1359 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1360 		    E1000_RAR_ENTRIES - 1);
1361 	}
1362 
1363 	/* Initialize the hardware */
1364 	em_reset(adapter);
1365 	em_update_link_status(adapter);
1366 
1367 	/* Setup VLAN support, basic and offload if available */
1368 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1369 
1370 	/* Set hardware offload abilities */
1371 	if_clearhwassist(ifp);
1372 	if (if_getcapenable(ifp) & IFCAP_TXCSUM)
1373 		if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
1374 	/*
1375 	** There have proven to be problems with TSO when not
1376 	** at full gigabit speed, so disable the assist automatically
1377 	** when at lower speeds.  -jfv
1378 	*/
1379 	if (if_getcapenable(ifp) & IFCAP_TSO4) {
1380 		if (adapter->link_speed == SPEED_1000)
1381 			if_sethwassistbits(ifp, CSUM_TSO, 0);
1382 	}
1383 
1384 	/* Configure for OS presence */
1385 	em_init_manageability(adapter);
1386 
1387 	/* Prepare transmit descriptors and buffers */
1388 	em_setup_transmit_structures(adapter);
1389 	em_initialize_transmit_unit(adapter);
1390 
1391 	/* Setup Multicast table */
1392 	em_set_multi(adapter);
1393 
1394 	/*
1395 	** Figure out the desired mbuf
1396 	** pool for doing jumbos
1397 	*/
1398 	if (adapter->hw.mac.max_frame_size <= 2048)
1399 		adapter->rx_mbuf_sz = MCLBYTES;
1400 	else if (adapter->hw.mac.max_frame_size <= 4096)
1401 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1402 	else
1403 		adapter->rx_mbuf_sz = MJUM9BYTES;
1404 
1405 	/* Prepare receive descriptors and buffers */
1406 	if (em_setup_receive_structures(adapter)) {
1407 		device_printf(dev, "Could not setup receive structures\n");
1408 		em_stop(adapter);
1409 		return;
1410 	}
1411 	em_initialize_receive_unit(adapter);
1412 
1413 	/* Use real VLAN Filter support? */
1414 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
1415 		if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1416 			/* Use real VLAN Filter support */
1417 			em_setup_vlan_hw_support(adapter);
1418 		else {
1419 			u32 ctrl;
1420 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1421 			ctrl |= E1000_CTRL_VME;
1422 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1423 		}
1424 	}
1425 
1426 	/* Don't lose promiscuous settings */
1427 	em_set_promisc(adapter);
1428 
1429 	/* Set the interface as ACTIVE */
1430 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
1431 
1432 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1433 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1434 
1435 	/* MSI/X configuration for 82574 */
1436 	if (adapter->hw.mac.type == e1000_82574) {
1437 		int tmp;
1438 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1439 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1440 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1441 		/* Set the IVAR - interrupt vector routing. */
1442 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1443 	}
1444 
1445 #ifdef DEVICE_POLLING
1446 	/*
1447 	 * Only enable interrupts if we are not polling, make sure
1448 	 * they are off otherwise.
1449 	 */
1450 	if (if_getcapenable(ifp) & IFCAP_POLLING)
1451 		em_disable_intr(adapter);
1452 	else
1453 #endif /* DEVICE_POLLING */
1454 		em_enable_intr(adapter);
1455 
1456 	/* AMT based hardware can now take control from firmware */
1457 	if (adapter->has_manage && adapter->has_amt)
1458 		em_get_hw_control(adapter);
1459 }
1460 
1461 static void
1462 em_init(void *arg)
1463 {
1464 	struct adapter *adapter = arg;
1465 
1466 	EM_CORE_LOCK(adapter);
1467 	em_init_locked(adapter);
1468 	EM_CORE_UNLOCK(adapter);
1469 }
1470 
1471 
1472 #ifdef DEVICE_POLLING
1473 /*********************************************************************
1474  *
1475  *  Legacy polling routine: note this only works with single queue
1476  *
1477  *********************************************************************/
1478 static int
1479 em_poll(if_t ifp, enum poll_cmd cmd, int count)
1480 {
1481 	struct adapter *adapter = if_getsoftc(ifp);
1482 	struct tx_ring	*txr = adapter->tx_rings;
1483 	struct rx_ring	*rxr = adapter->rx_rings;
1484 	u32		reg_icr;
1485 	int		rx_done;
1486 
1487 	EM_CORE_LOCK(adapter);
1488 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
1489 		EM_CORE_UNLOCK(adapter);
1490 		return (0);
1491 	}
1492 
1493 	if (cmd == POLL_AND_CHECK_STATUS) {
1494 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1495 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1496 			callout_stop(&adapter->timer);
1497 			adapter->hw.mac.get_link_status = 1;
1498 			em_update_link_status(adapter);
1499 			callout_reset(&adapter->timer, hz,
1500 			    em_local_timer, adapter);
1501 		}
1502 	}
1503 	EM_CORE_UNLOCK(adapter);
1504 
1505 	em_rxeof(rxr, count, &rx_done);
1506 
1507 	EM_TX_LOCK(txr);
1508 	em_txeof(txr);
1509 #ifdef EM_MULTIQUEUE
1510 	if (!drbr_empty(ifp, txr->br))
1511 		em_mq_start_locked(ifp, txr);
1512 #else
1513 	if (!if_sendq_empty(ifp))
1514 		em_start_locked(ifp, txr);
1515 #endif
1516 	EM_TX_UNLOCK(txr);
1517 
1518 	return (rx_done);
1519 }
1520 #endif /* DEVICE_POLLING */
1521 
1522 
1523 /*********************************************************************
1524  *
1525  *  Fast Legacy/MSI Combined Interrupt Service routine
1526  *
1527  *********************************************************************/
1528 static int
1529 em_irq_fast(void *arg)
1530 {
1531 	struct adapter	*adapter = arg;
1532 	if_t ifp;
1533 	u32		reg_icr;
1534 
1535 	ifp = adapter->ifp;
1536 
1537 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1538 
1539 	/* Hot eject?  */
1540 	if (reg_icr == 0xffffffff)
1541 		return FILTER_STRAY;
1542 
1543 	/* Definitely not our interrupt.  */
1544 	if (reg_icr == 0x0)
1545 		return FILTER_STRAY;
1546 
1547 	/*
1548 	 * Starting with the 82571 chip, bit 31 should be used to
1549 	 * determine whether the interrupt belongs to us.
1550 	 */
1551 	if (adapter->hw.mac.type >= e1000_82571 &&
1552 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1553 		return FILTER_STRAY;
1554 
1555 	em_disable_intr(adapter);
1556 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1557 
1558 	/* Link status change */
1559 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1560 		adapter->hw.mac.get_link_status = 1;
1561 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1562 	}
1563 
1564 	if (reg_icr & E1000_ICR_RXO)
1565 		adapter->rx_overruns++;
1566 	return FILTER_HANDLED;
1567 }
1568 
1569 /* Combined RX/TX handler, used by Legacy and MSI */
1570 static void
1571 em_handle_que(void *context, int pending)
1572 {
1573 	struct adapter	*adapter = context;
1574 	if_t ifp = adapter->ifp;
1575 	struct tx_ring	*txr = adapter->tx_rings;
1576 	struct rx_ring	*rxr = adapter->rx_rings;
1577 
1578 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1579 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1580 
1581 		EM_TX_LOCK(txr);
1582 		em_txeof(txr);
1583 #ifdef EM_MULTIQUEUE
1584 		if (!drbr_empty(ifp, txr->br))
1585 			em_mq_start_locked(ifp, txr);
1586 #else
1587 		if (!if_sendq_empty(ifp))
1588 			em_start_locked(ifp, txr);
1589 #endif
1590 		EM_TX_UNLOCK(txr);
1591 		if (more) {
1592 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1593 			return;
1594 		}
1595 	}
1596 
1597 	em_enable_intr(adapter);
1598 	return;
1599 }
1600 
1601 
1602 /*********************************************************************
1603  *
1604  *  MSIX Interrupt Service Routines
1605  *
1606  **********************************************************************/
1607 static void
1608 em_msix_tx(void *arg)
1609 {
1610 	struct tx_ring *txr = arg;
1611 	struct adapter *adapter = txr->adapter;
1612 	if_t ifp = adapter->ifp;
1613 
1614 	++txr->tx_irq;
1615 	EM_TX_LOCK(txr);
1616 	em_txeof(txr);
1617 #ifdef EM_MULTIQUEUE
1618 	if (!drbr_empty(ifp, txr->br))
1619 		em_mq_start_locked(ifp, txr);
1620 #else
1621 	if (!if_sendq_empty(ifp))
1622 		em_start_locked(ifp, txr);
1623 #endif
1624 
1625 	/* Reenable this interrupt */
1626 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1627 	EM_TX_UNLOCK(txr);
1628 	return;
1629 }
1630 
1631 /*********************************************************************
1632  *
1633  *  MSIX RX Interrupt Service routine
1634  *
1635  **********************************************************************/
1636 
1637 static void
1638 em_msix_rx(void *arg)
1639 {
1640 	struct rx_ring	*rxr = arg;
1641 	struct adapter	*adapter = rxr->adapter;
1642 	bool		more;
1643 
1644 	++rxr->rx_irq;
1645 	if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING))
1646 		return;
1647 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1648 	if (more)
1649 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1650 	else {
1651 		/* Reenable this interrupt */
1652 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1653 	}
1654 	return;
1655 }
1656 
1657 /*********************************************************************
1658  *
1659  *  MSIX Link Fast Interrupt Service routine
1660  *
1661  **********************************************************************/
1662 static void
1663 em_msix_link(void *arg)
1664 {
1665 	struct adapter	*adapter = arg;
1666 	u32		reg_icr;
1667 
1668 	++adapter->link_irq;
1669 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1670 
1671 	if (reg_icr & E1000_ICR_RXO)
1672 		adapter->rx_overruns++;
1673 
1674 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1675 		adapter->hw.mac.get_link_status = 1;
1676 		em_handle_link(adapter, 0);
1677 	} else
1678 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1679 		    EM_MSIX_LINK | E1000_IMS_LSC);
1680 	/*
1681  	** Because we must read the ICR for this interrupt
1682  	** it may clear other causes using autoclear, for
1683  	** this reason we simply create a soft interrupt
1684  	** for all these vectors.
1685  	*/
1686 	if (reg_icr) {
1687 		E1000_WRITE_REG(&adapter->hw,
1688 			E1000_ICS, adapter->ims);
1689 	}
1690 	return;
1691 }
1692 
1693 static void
1694 em_handle_rx(void *context, int pending)
1695 {
1696 	struct rx_ring	*rxr = context;
1697 	struct adapter	*adapter = rxr->adapter;
1698         bool            more;
1699 
1700 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1701 	if (more)
1702 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1703 	else {
1704 		/* Reenable this interrupt */
1705 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1706 	}
1707 }
1708 
1709 static void
1710 em_handle_tx(void *context, int pending)
1711 {
1712 	struct tx_ring	*txr = context;
1713 	struct adapter	*adapter = txr->adapter;
1714 	if_t ifp = adapter->ifp;
1715 
1716 	EM_TX_LOCK(txr);
1717 	em_txeof(txr);
1718 #ifdef EM_MULTIQUEUE
1719 	if (!drbr_empty(ifp, txr->br))
1720 		em_mq_start_locked(ifp, txr);
1721 #else
1722 	if (!if_sendq_empty(ifp))
1723 		em_start_locked(ifp, txr);
1724 #endif
1725 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1726 	EM_TX_UNLOCK(txr);
1727 }
1728 
1729 static void
1730 em_handle_link(void *context, int pending)
1731 {
1732 	struct adapter	*adapter = context;
1733 	struct tx_ring	*txr = adapter->tx_rings;
1734 	if_t ifp = adapter->ifp;
1735 
1736 	if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1737 		return;
1738 
1739 	EM_CORE_LOCK(adapter);
1740 	callout_stop(&adapter->timer);
1741 	em_update_link_status(adapter);
1742 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1743 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1744 	    EM_MSIX_LINK | E1000_IMS_LSC);
1745 	if (adapter->link_active) {
1746 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1747 			EM_TX_LOCK(txr);
1748 #ifdef EM_MULTIQUEUE
1749 			if (!drbr_empty(ifp, txr->br))
1750 				em_mq_start_locked(ifp, txr);
1751 #else
1752 			if (if_sendq_empty(ifp))
1753 				em_start_locked(ifp, txr);
1754 #endif
1755 			EM_TX_UNLOCK(txr);
1756 		}
1757 	}
1758 	EM_CORE_UNLOCK(adapter);
1759 }
1760 
1761 
1762 /*********************************************************************
1763  *
1764  *  Media Ioctl callback
1765  *
1766  *  This routine is called whenever the user queries the status of
1767  *  the interface using ifconfig.
1768  *
1769  **********************************************************************/
1770 static void
1771 em_media_status(if_t ifp, struct ifmediareq *ifmr)
1772 {
1773 	struct adapter *adapter = if_getsoftc(ifp);
1774 	u_char fiber_type = IFM_1000_SX;
1775 
1776 	INIT_DEBUGOUT("em_media_status: begin");
1777 
1778 	EM_CORE_LOCK(adapter);
1779 	em_update_link_status(adapter);
1780 
1781 	ifmr->ifm_status = IFM_AVALID;
1782 	ifmr->ifm_active = IFM_ETHER;
1783 
1784 	if (!adapter->link_active) {
1785 		EM_CORE_UNLOCK(adapter);
1786 		return;
1787 	}
1788 
1789 	ifmr->ifm_status |= IFM_ACTIVE;
1790 
1791 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1792 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1793 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1794 	} else {
1795 		switch (adapter->link_speed) {
1796 		case 10:
1797 			ifmr->ifm_active |= IFM_10_T;
1798 			break;
1799 		case 100:
1800 			ifmr->ifm_active |= IFM_100_TX;
1801 			break;
1802 		case 1000:
1803 			ifmr->ifm_active |= IFM_1000_T;
1804 			break;
1805 		}
1806 		if (adapter->link_duplex == FULL_DUPLEX)
1807 			ifmr->ifm_active |= IFM_FDX;
1808 		else
1809 			ifmr->ifm_active |= IFM_HDX;
1810 	}
1811 	EM_CORE_UNLOCK(adapter);
1812 }
1813 
1814 /*********************************************************************
1815  *
1816  *  Media Ioctl callback
1817  *
1818  *  This routine is called when the user changes speed/duplex using
1819  *  media/mediopt option with ifconfig.
1820  *
1821  **********************************************************************/
1822 static int
1823 em_media_change(if_t ifp)
1824 {
1825 	struct adapter *adapter = if_getsoftc(ifp);
1826 	struct ifmedia  *ifm = &adapter->media;
1827 
1828 	INIT_DEBUGOUT("em_media_change: begin");
1829 
1830 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1831 		return (EINVAL);
1832 
1833 	EM_CORE_LOCK(adapter);
1834 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1835 	case IFM_AUTO:
1836 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1837 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1838 		break;
1839 	case IFM_1000_LX:
1840 	case IFM_1000_SX:
1841 	case IFM_1000_T:
1842 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1843 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1844 		break;
1845 	case IFM_100_TX:
1846 		adapter->hw.mac.autoneg = FALSE;
1847 		adapter->hw.phy.autoneg_advertised = 0;
1848 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1849 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1850 		else
1851 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1852 		break;
1853 	case IFM_10_T:
1854 		adapter->hw.mac.autoneg = FALSE;
1855 		adapter->hw.phy.autoneg_advertised = 0;
1856 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1857 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1858 		else
1859 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1860 		break;
1861 	default:
1862 		device_printf(adapter->dev, "Unsupported media type\n");
1863 	}
1864 
1865 	em_init_locked(adapter);
1866 	EM_CORE_UNLOCK(adapter);
1867 
1868 	return (0);
1869 }
1870 
1871 /*********************************************************************
1872  *
1873  *  This routine maps the mbufs to tx descriptors.
1874  *
1875  *  return 0 on success, positive on failure
1876  **********************************************************************/
1877 
1878 static int
1879 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1880 {
1881 	struct adapter		*adapter = txr->adapter;
1882 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1883 	bus_dmamap_t		map;
1884 	struct em_txbuffer	*tx_buffer, *tx_buffer_mapped;
1885 	struct e1000_tx_desc	*ctxd = NULL;
1886 	struct mbuf		*m_head;
1887 	struct ether_header	*eh;
1888 	struct ip		*ip = NULL;
1889 	struct tcphdr		*tp = NULL;
1890 	u32			txd_upper = 0, txd_lower = 0;
1891 	int			ip_off, poff;
1892 	int			nsegs, i, j, first, last = 0;
1893 	int			error;
1894 	bool			do_tso, tso_desc, remap = TRUE;
1895 
1896 	m_head = *m_headp;
1897 	do_tso = (m_head->m_pkthdr.csum_flags & CSUM_TSO);
1898 	tso_desc = FALSE;
1899 	ip_off = poff = 0;
1900 
1901 	/*
1902 	 * Intel recommends entire IP/TCP header length reside in a single
1903 	 * buffer. If multiple descriptors are used to describe the IP and
1904 	 * TCP header, each descriptor should describe one or more
1905 	 * complete headers; descriptors referencing only parts of headers
1906 	 * are not supported. If all layer headers are not coalesced into
1907 	 * a single buffer, each buffer should not cross a 4KB boundary,
1908 	 * or be larger than the maximum read request size.
1909 	 * Controller also requires modifing IP/TCP header to make TSO work
1910 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1911 	 * IP/TCP header into a single buffer to meet the requirement of
1912 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1913 	 * which also has similiar restrictions.
1914 	 */
1915 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1916 		if (do_tso || (m_head->m_next != NULL &&
1917 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1918 			if (M_WRITABLE(*m_headp) == 0) {
1919 				m_head = m_dup(*m_headp, M_NOWAIT);
1920 				m_freem(*m_headp);
1921 				if (m_head == NULL) {
1922 					*m_headp = NULL;
1923 					return (ENOBUFS);
1924 				}
1925 				*m_headp = m_head;
1926 			}
1927 		}
1928 		/*
1929 		 * XXX
1930 		 * Assume IPv4, we don't have TSO/checksum offload support
1931 		 * for IPv6 yet.
1932 		 */
1933 		ip_off = sizeof(struct ether_header);
1934 		if (m_head->m_len < ip_off) {
1935 			m_head = m_pullup(m_head, ip_off);
1936 			if (m_head == NULL) {
1937 				*m_headp = NULL;
1938 				return (ENOBUFS);
1939 			}
1940 		}
1941 		eh = mtod(m_head, struct ether_header *);
1942 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1943 			ip_off = sizeof(struct ether_vlan_header);
1944 			if (m_head->m_len < ip_off) {
1945 				m_head = m_pullup(m_head, ip_off);
1946 				if (m_head == NULL) {
1947 					*m_headp = NULL;
1948 					return (ENOBUFS);
1949 				}
1950 			}
1951 		}
1952 		if (m_head->m_len < ip_off + sizeof(struct ip)) {
1953 			m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1954 			if (m_head == NULL) {
1955 				*m_headp = NULL;
1956 				return (ENOBUFS);
1957 			}
1958 		}
1959 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1960 		poff = ip_off + (ip->ip_hl << 2);
1961 
1962 		if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
1963 			if (m_head->m_len < poff + sizeof(struct tcphdr)) {
1964 				m_head = m_pullup(m_head, poff +
1965 				    sizeof(struct tcphdr));
1966 				if (m_head == NULL) {
1967 					*m_headp = NULL;
1968 					return (ENOBUFS);
1969 				}
1970 			}
1971 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1972 			/*
1973 			 * TSO workaround:
1974 			 *   pull 4 more bytes of data into it.
1975 			 */
1976 			if (m_head->m_len < poff + (tp->th_off << 2)) {
1977 				m_head = m_pullup(m_head, poff +
1978 				                 (tp->th_off << 2) +
1979 				                 TSO_WORKAROUND);
1980 				if (m_head == NULL) {
1981 					*m_headp = NULL;
1982 					return (ENOBUFS);
1983 				}
1984 			}
1985 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1986 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1987 			if (do_tso) {
1988 				ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
1989 				                  (ip->ip_hl << 2) +
1990 				                  (tp->th_off << 2));
1991 				ip->ip_sum = 0;
1992 				/*
1993 				 * The pseudo TCP checksum does not include TCP
1994 				 * payload length so driver should recompute
1995 				 * the checksum here what hardware expect to
1996 				 * see. This is adherence of Microsoft's Large
1997 				 * Send specification.
1998 			 	*/
1999 				tp->th_sum = in_pseudo(ip->ip_src.s_addr,
2000 				    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2001 			}
2002 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
2003 			if (m_head->m_len < poff + sizeof(struct udphdr)) {
2004 				m_head = m_pullup(m_head, poff +
2005 				    sizeof(struct udphdr));
2006 				if (m_head == NULL) {
2007 					*m_headp = NULL;
2008 					return (ENOBUFS);
2009 				}
2010 			}
2011 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2012 		}
2013 		*m_headp = m_head;
2014 	}
2015 
2016 	/*
2017 	 * Map the packet for DMA
2018 	 *
2019 	 * Capture the first descriptor index,
2020 	 * this descriptor will have the index
2021 	 * of the EOP which is the only one that
2022 	 * now gets a DONE bit writeback.
2023 	 */
2024 	first = txr->next_avail_desc;
2025 	tx_buffer = &txr->tx_buffers[first];
2026 	tx_buffer_mapped = tx_buffer;
2027 	map = tx_buffer->map;
2028 
2029 retry:
2030 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2031 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2032 
2033 	/*
2034 	 * There are two types of errors we can (try) to handle:
2035 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
2036 	 *   out of segments.  Defragment the mbuf chain and try again.
2037 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
2038 	 *   at this point in time.  Defer sending and try again later.
2039 	 * All other errors, in particular EINVAL, are fatal and prevent the
2040 	 * mbuf chain from ever going through.  Drop it and report error.
2041 	 */
2042 	if (error == EFBIG && remap) {
2043 		struct mbuf *m;
2044 
2045 		m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
2046 		if (m == NULL) {
2047 			adapter->mbuf_defrag_failed++;
2048 			m_freem(*m_headp);
2049 			*m_headp = NULL;
2050 			return (ENOBUFS);
2051 		}
2052 		*m_headp = m;
2053 
2054 		/* Try it again, but only once */
2055 		remap = FALSE;
2056 		goto retry;
2057 	} else if (error != 0) {
2058 		adapter->no_tx_dma_setup++;
2059 		m_freem(*m_headp);
2060 		*m_headp = NULL;
2061 		return (error);
2062 	}
2063 
2064 	/*
2065 	 * TSO Hardware workaround, if this packet is not
2066 	 * TSO, and is only a single descriptor long, and
2067 	 * it follows a TSO burst, then we need to add a
2068 	 * sentinel descriptor to prevent premature writeback.
2069 	 */
2070 	if ((!do_tso) && (txr->tx_tso == TRUE)) {
2071 		if (nsegs == 1)
2072 			tso_desc = TRUE;
2073 		txr->tx_tso = FALSE;
2074 	}
2075 
2076         if (nsegs > (txr->tx_avail - EM_MAX_SCATTER)) {
2077                 txr->no_desc_avail++;
2078 		bus_dmamap_unload(txr->txtag, map);
2079 		return (ENOBUFS);
2080         }
2081 	m_head = *m_headp;
2082 
2083 	/* Do hardware assists */
2084 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2085 		em_tso_setup(txr, m_head, ip_off, ip, tp,
2086 		    &txd_upper, &txd_lower);
2087 		/* we need to make a final sentinel transmit desc */
2088 		tso_desc = TRUE;
2089 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2090 		em_transmit_checksum_setup(txr, m_head,
2091 		    ip_off, ip, &txd_upper, &txd_lower);
2092 
2093 	if (m_head->m_flags & M_VLANTAG) {
2094 		/* Set the vlan id. */
2095 		txd_upper |= htole16(if_getvtag(m_head)) << 16;
2096                 /* Tell hardware to add tag */
2097                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2098         }
2099 
2100 	i = txr->next_avail_desc;
2101 
2102 	/* Set up our transmit descriptors */
2103 	for (j = 0; j < nsegs; j++) {
2104 		bus_size_t seg_len;
2105 		bus_addr_t seg_addr;
2106 
2107 		tx_buffer = &txr->tx_buffers[i];
2108 		ctxd = &txr->tx_base[i];
2109 		seg_addr = segs[j].ds_addr;
2110 		seg_len  = segs[j].ds_len;
2111 		/*
2112 		** TSO Workaround:
2113 		** If this is the last descriptor, we want to
2114 		** split it so we have a small final sentinel
2115 		*/
2116 		if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
2117 			seg_len -= TSO_WORKAROUND;
2118 			ctxd->buffer_addr = htole64(seg_addr);
2119 			ctxd->lower.data = htole32(
2120 				adapter->txd_cmd | txd_lower | seg_len);
2121 			ctxd->upper.data = htole32(txd_upper);
2122 			if (++i == adapter->num_tx_desc)
2123 				i = 0;
2124 
2125 			/* Now make the sentinel */
2126 			txr->tx_avail--;
2127 			ctxd = &txr->tx_base[i];
2128 			tx_buffer = &txr->tx_buffers[i];
2129 			ctxd->buffer_addr =
2130 			    htole64(seg_addr + seg_len);
2131 			ctxd->lower.data = htole32(
2132 			adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
2133 			ctxd->upper.data =
2134 			    htole32(txd_upper);
2135 			last = i;
2136 			if (++i == adapter->num_tx_desc)
2137 				i = 0;
2138 		} else {
2139 			ctxd->buffer_addr = htole64(seg_addr);
2140 			ctxd->lower.data = htole32(
2141 			adapter->txd_cmd | txd_lower | seg_len);
2142 			ctxd->upper.data = htole32(txd_upper);
2143 			last = i;
2144 			if (++i == adapter->num_tx_desc)
2145 				i = 0;
2146 		}
2147 		tx_buffer->m_head = NULL;
2148 		tx_buffer->next_eop = -1;
2149 	}
2150 
2151 	txr->next_avail_desc = i;
2152 	txr->tx_avail -= nsegs;
2153 
2154         tx_buffer->m_head = m_head;
2155 	/*
2156 	** Here we swap the map so the last descriptor,
2157 	** which gets the completion interrupt has the
2158 	** real map, and the first descriptor gets the
2159 	** unused map from this descriptor.
2160 	*/
2161 	tx_buffer_mapped->map = tx_buffer->map;
2162 	tx_buffer->map = map;
2163         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2164 
2165         /*
2166          * Last Descriptor of Packet
2167 	 * needs End Of Packet (EOP)
2168 	 * and Report Status (RS)
2169          */
2170         ctxd->lower.data |=
2171 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2172 	/*
2173 	 * Keep track in the first buffer which
2174 	 * descriptor will be written back
2175 	 */
2176 	tx_buffer = &txr->tx_buffers[first];
2177 	tx_buffer->next_eop = last;
2178 
2179 	/*
2180 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2181 	 * that this frame is available to transmit.
2182 	 */
2183 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2184 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2185 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2186 
2187 	return (0);
2188 }
2189 
2190 static void
2191 em_set_promisc(struct adapter *adapter)
2192 {
2193 	if_t ifp = adapter->ifp;
2194 	u32		reg_rctl;
2195 
2196 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2197 
2198 	if (if_getflags(ifp) & IFF_PROMISC) {
2199 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2200 		/* Turn this on if you want to see bad packets */
2201 		if (em_debug_sbp)
2202 			reg_rctl |= E1000_RCTL_SBP;
2203 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2204 	} else if (if_getflags(ifp) & IFF_ALLMULTI) {
2205 		reg_rctl |= E1000_RCTL_MPE;
2206 		reg_rctl &= ~E1000_RCTL_UPE;
2207 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2208 	}
2209 }
2210 
2211 static void
2212 em_disable_promisc(struct adapter *adapter)
2213 {
2214 	if_t		ifp = adapter->ifp;
2215 	u32		reg_rctl;
2216 	int		mcnt = 0;
2217 
2218 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2219 	reg_rctl &=  (~E1000_RCTL_UPE);
2220 	if (if_getflags(ifp) & IFF_ALLMULTI)
2221 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2222 	else
2223 		mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
2224 	/* Don't disable if in MAX groups */
2225 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2226 		reg_rctl &=  (~E1000_RCTL_MPE);
2227 	reg_rctl &=  (~E1000_RCTL_SBP);
2228 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2229 }
2230 
2231 
2232 /*********************************************************************
2233  *  Multicast Update
2234  *
2235  *  This routine is called whenever multicast address list is updated.
2236  *
2237  **********************************************************************/
2238 
2239 static void
2240 em_set_multi(struct adapter *adapter)
2241 {
2242 	if_t ifp = adapter->ifp;
2243 	u32 reg_rctl = 0;
2244 	u8  *mta; /* Multicast array memory */
2245 	int mcnt = 0;
2246 
2247 	IOCTL_DEBUGOUT("em_set_multi: begin");
2248 
2249 	mta = adapter->mta;
2250 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2251 
2252 	if (adapter->hw.mac.type == e1000_82542 &&
2253 	    adapter->hw.revision_id == E1000_REVISION_2) {
2254 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2255 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2256 			e1000_pci_clear_mwi(&adapter->hw);
2257 		reg_rctl |= E1000_RCTL_RST;
2258 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2259 		msec_delay(5);
2260 	}
2261 
2262 	if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
2263 
2264 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2265 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2266 		reg_rctl |= E1000_RCTL_MPE;
2267 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2268 	} else
2269 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2270 
2271 	if (adapter->hw.mac.type == e1000_82542 &&
2272 	    adapter->hw.revision_id == E1000_REVISION_2) {
2273 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2274 		reg_rctl &= ~E1000_RCTL_RST;
2275 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2276 		msec_delay(5);
2277 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2278 			e1000_pci_set_mwi(&adapter->hw);
2279 	}
2280 }
2281 
2282 
2283 /*********************************************************************
2284  *  Timer routine
2285  *
2286  *  This routine checks for link status and updates statistics.
2287  *
2288  **********************************************************************/
2289 
2290 static void
2291 em_local_timer(void *arg)
2292 {
2293 	struct adapter	*adapter = arg;
2294 	if_t ifp = adapter->ifp;
2295 	struct tx_ring	*txr = adapter->tx_rings;
2296 	struct rx_ring	*rxr = adapter->rx_rings;
2297 	u32		trigger = 0;
2298 
2299 	EM_CORE_LOCK_ASSERT(adapter);
2300 
2301 	em_update_link_status(adapter);
2302 	em_update_stats_counters(adapter);
2303 
2304 	/* Reset LAA into RAR[0] on 82571 */
2305 	if ((adapter->hw.mac.type == e1000_82571) &&
2306 	    e1000_get_laa_state_82571(&adapter->hw))
2307 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2308 
2309 	/* Mask to use in the irq trigger */
2310 	if (adapter->msix_mem) {
2311 		for (int i = 0; i < adapter->num_queues; i++, rxr++)
2312 			trigger |= rxr->ims;
2313 		rxr = adapter->rx_rings;
2314 	} else
2315 		trigger = E1000_ICS_RXDMT0;
2316 
2317 	/*
2318 	** Check on the state of the TX queue(s), this
2319 	** can be done without the lock because its RO
2320 	** and the HUNG state will be static if set.
2321 	*/
2322 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2323 		if (txr->busy == EM_TX_HUNG)
2324 			goto hung;
2325 		if (txr->busy >= EM_TX_MAXTRIES)
2326 			txr->busy = EM_TX_HUNG;
2327 		/* Schedule a TX tasklet if needed */
2328 		if (txr->tx_avail <= EM_MAX_SCATTER)
2329 			taskqueue_enqueue(txr->tq, &txr->tx_task);
2330 	}
2331 
2332 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2333 #ifndef DEVICE_POLLING
2334 	/* Trigger an RX interrupt to guarantee mbuf refresh */
2335 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2336 #endif
2337 	return;
2338 hung:
2339 	/* Looks like we're hung */
2340 	device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2341 			txr->me);
2342 	em_print_debug_info(adapter);
2343 	if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
2344 	adapter->watchdog_events++;
2345 	em_init_locked(adapter);
2346 }
2347 
2348 
2349 static void
2350 em_update_link_status(struct adapter *adapter)
2351 {
2352 	struct e1000_hw *hw = &adapter->hw;
2353 	if_t ifp = adapter->ifp;
2354 	device_t dev = adapter->dev;
2355 	struct tx_ring *txr = adapter->tx_rings;
2356 	u32 link_check = 0;
2357 
2358 	/* Get the cached link value or read phy for real */
2359 	switch (hw->phy.media_type) {
2360 	case e1000_media_type_copper:
2361 		if (hw->mac.get_link_status) {
2362 			/* Do the work to read phy */
2363 			e1000_check_for_link(hw);
2364 			link_check = !hw->mac.get_link_status;
2365 			if (link_check) /* ESB2 fix */
2366 				e1000_cfg_on_link_up(hw);
2367 		} else
2368 			link_check = TRUE;
2369 		break;
2370 	case e1000_media_type_fiber:
2371 		e1000_check_for_link(hw);
2372 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2373                                  E1000_STATUS_LU);
2374 		break;
2375 	case e1000_media_type_internal_serdes:
2376 		e1000_check_for_link(hw);
2377 		link_check = adapter->hw.mac.serdes_has_link;
2378 		break;
2379 	default:
2380 	case e1000_media_type_unknown:
2381 		break;
2382 	}
2383 
2384 	/* Now check for a transition */
2385 	if (link_check && (adapter->link_active == 0)) {
2386 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2387 		    &adapter->link_duplex);
2388 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2389 		if ((adapter->link_speed != SPEED_1000) &&
2390 		    ((hw->mac.type == e1000_82571) ||
2391 		    (hw->mac.type == e1000_82572))) {
2392 			int tarc0;
2393 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2394 			tarc0 &= ~TARC_SPEED_MODE_BIT;
2395 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2396 		}
2397 		if (bootverbose)
2398 			device_printf(dev, "Link is up %d Mbps %s\n",
2399 			    adapter->link_speed,
2400 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2401 			    "Full Duplex" : "Half Duplex"));
2402 		adapter->link_active = 1;
2403 		adapter->smartspeed = 0;
2404 		if_setbaudrate(ifp, adapter->link_speed * 1000000);
2405 		if_link_state_change(ifp, LINK_STATE_UP);
2406 	} else if (!link_check && (adapter->link_active == 1)) {
2407 		if_setbaudrate(ifp, 0);
2408 		adapter->link_speed = 0;
2409 		adapter->link_duplex = 0;
2410 		if (bootverbose)
2411 			device_printf(dev, "Link is Down\n");
2412 		adapter->link_active = 0;
2413 		/* Link down, disable hang detection */
2414 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2415 			txr->busy = EM_TX_IDLE;
2416 		if_link_state_change(ifp, LINK_STATE_DOWN);
2417 	}
2418 }
2419 
2420 /*********************************************************************
2421  *
2422  *  This routine disables all traffic on the adapter by issuing a
2423  *  global reset on the MAC and deallocates TX/RX buffers.
2424  *
2425  *  This routine should always be called with BOTH the CORE
2426  *  and TX locks.
2427  **********************************************************************/
2428 
2429 static void
2430 em_stop(void *arg)
2431 {
2432 	struct adapter	*adapter = arg;
2433 	if_t ifp = adapter->ifp;
2434 	struct tx_ring	*txr = adapter->tx_rings;
2435 
2436 	EM_CORE_LOCK_ASSERT(adapter);
2437 
2438 	INIT_DEBUGOUT("em_stop: begin");
2439 
2440 	em_disable_intr(adapter);
2441 	callout_stop(&adapter->timer);
2442 
2443 	/* Tell the stack that the interface is no longer active */
2444 	if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2445 
2446         /* Disarm Hang Detection. */
2447 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2448 		EM_TX_LOCK(txr);
2449 		txr->busy = EM_TX_IDLE;
2450 		EM_TX_UNLOCK(txr);
2451 	}
2452 
2453 	e1000_reset_hw(&adapter->hw);
2454 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2455 
2456 	e1000_led_off(&adapter->hw);
2457 	e1000_cleanup_led(&adapter->hw);
2458 }
2459 
2460 
2461 /*********************************************************************
2462  *
2463  *  Determine hardware revision.
2464  *
2465  **********************************************************************/
2466 static void
2467 em_identify_hardware(struct adapter *adapter)
2468 {
2469 	device_t dev = adapter->dev;
2470 
2471 	/* Make sure our PCI config space has the necessary stuff set */
2472 	pci_enable_busmaster(dev);
2473 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2474 
2475 	/* Save off the information about this board */
2476 	adapter->hw.vendor_id = pci_get_vendor(dev);
2477 	adapter->hw.device_id = pci_get_device(dev);
2478 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2479 	adapter->hw.subsystem_vendor_id =
2480 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2481 	adapter->hw.subsystem_device_id =
2482 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2483 
2484 	/* Do Shared Code Init and Setup */
2485 	if (e1000_set_mac_type(&adapter->hw)) {
2486 		device_printf(dev, "Setup init failure\n");
2487 		return;
2488 	}
2489 }
2490 
2491 static int
2492 em_allocate_pci_resources(struct adapter *adapter)
2493 {
2494 	device_t	dev = adapter->dev;
2495 	int		rid;
2496 
2497 	rid = PCIR_BAR(0);
2498 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2499 	    &rid, RF_ACTIVE);
2500 	if (adapter->memory == NULL) {
2501 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2502 		return (ENXIO);
2503 	}
2504 	adapter->osdep.mem_bus_space_tag =
2505 	    rman_get_bustag(adapter->memory);
2506 	adapter->osdep.mem_bus_space_handle =
2507 	    rman_get_bushandle(adapter->memory);
2508 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2509 
2510 	adapter->hw.back = &adapter->osdep;
2511 
2512 	return (0);
2513 }
2514 
2515 /*********************************************************************
2516  *
2517  *  Setup the Legacy or MSI Interrupt handler
2518  *
2519  **********************************************************************/
2520 int
2521 em_allocate_legacy(struct adapter *adapter)
2522 {
2523 	device_t dev = adapter->dev;
2524 	struct tx_ring	*txr = adapter->tx_rings;
2525 	int error, rid = 0;
2526 
2527 	/* Manually turn off all interrupts */
2528 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2529 
2530 	if (adapter->msix == 1) /* using MSI */
2531 		rid = 1;
2532 	/* We allocate a single interrupt resource */
2533 	adapter->res = bus_alloc_resource_any(dev,
2534 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2535 	if (adapter->res == NULL) {
2536 		device_printf(dev, "Unable to allocate bus resource: "
2537 		    "interrupt\n");
2538 		return (ENXIO);
2539 	}
2540 
2541 	/*
2542 	 * Allocate a fast interrupt and the associated
2543 	 * deferred processing contexts.
2544 	 */
2545 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2546 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2547 	    taskqueue_thread_enqueue, &adapter->tq);
2548 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2549 	    device_get_nameunit(adapter->dev));
2550 	/* Use a TX only tasklet for local timer */
2551 	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2552 	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2553 	    taskqueue_thread_enqueue, &txr->tq);
2554 	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2555 	    device_get_nameunit(adapter->dev));
2556 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2557 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2558 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2559 		device_printf(dev, "Failed to register fast interrupt "
2560 			    "handler: %d\n", error);
2561 		taskqueue_free(adapter->tq);
2562 		adapter->tq = NULL;
2563 		return (error);
2564 	}
2565 
2566 	return (0);
2567 }
2568 
2569 /*********************************************************************
2570  *
2571  *  Setup the MSIX Interrupt handlers
2572  *   This is not really Multiqueue, rather
2573  *   its just seperate interrupt vectors
2574  *   for TX, RX, and Link.
2575  *
2576  **********************************************************************/
2577 int
2578 em_allocate_msix(struct adapter *adapter)
2579 {
2580 	device_t	dev = adapter->dev;
2581 	struct		tx_ring *txr = adapter->tx_rings;
2582 	struct		rx_ring *rxr = adapter->rx_rings;
2583 	int		error, rid, vector = 0;
2584 	int		cpu_id = 0;
2585 
2586 
2587 	/* Make sure all interrupts are disabled */
2588 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2589 
2590 	/* First set up ring resources */
2591 	for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2592 
2593 		/* RX ring */
2594 		rid = vector + 1;
2595 
2596 		rxr->res = bus_alloc_resource_any(dev,
2597 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2598 		if (rxr->res == NULL) {
2599 			device_printf(dev,
2600 			    "Unable to allocate bus resource: "
2601 			    "RX MSIX Interrupt %d\n", i);
2602 			return (ENXIO);
2603 		}
2604 		if ((error = bus_setup_intr(dev, rxr->res,
2605 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2606 		    rxr, &rxr->tag)) != 0) {
2607 			device_printf(dev, "Failed to register RX handler");
2608 			return (error);
2609 		}
2610 #if __FreeBSD_version >= 800504
2611 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2612 #endif
2613 		rxr->msix = vector;
2614 
2615 		if (em_last_bind_cpu < 0)
2616 			em_last_bind_cpu = CPU_FIRST();
2617 		cpu_id = em_last_bind_cpu;
2618 		bus_bind_intr(dev, rxr->res, cpu_id);
2619 
2620 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2621 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2622 		    taskqueue_thread_enqueue, &rxr->tq);
2623 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2624 		    device_get_nameunit(adapter->dev), cpu_id);
2625 		/*
2626 		** Set the bit to enable interrupt
2627 		** in E1000_IMS -- bits 20 and 21
2628 		** are for RX0 and RX1, note this has
2629 		** NOTHING to do with the MSIX vector
2630 		*/
2631 		rxr->ims = 1 << (20 + i);
2632 		adapter->ims |= rxr->ims;
2633 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2634 
2635 		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2636 	}
2637 
2638 	for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2639 		/* TX ring */
2640 		rid = vector + 1;
2641 		txr->res = bus_alloc_resource_any(dev,
2642 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2643 		if (txr->res == NULL) {
2644 			device_printf(dev,
2645 			    "Unable to allocate bus resource: "
2646 			    "TX MSIX Interrupt %d\n", i);
2647 			return (ENXIO);
2648 		}
2649 		if ((error = bus_setup_intr(dev, txr->res,
2650 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2651 		    txr, &txr->tag)) != 0) {
2652 			device_printf(dev, "Failed to register TX handler");
2653 			return (error);
2654 		}
2655 #if __FreeBSD_version >= 800504
2656 		bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2657 #endif
2658 		txr->msix = vector;
2659 
2660                 if (em_last_bind_cpu < 0)
2661                         em_last_bind_cpu = CPU_FIRST();
2662                 cpu_id = em_last_bind_cpu;
2663                 bus_bind_intr(dev, txr->res, cpu_id);
2664 
2665 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2666 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2667 		    taskqueue_thread_enqueue, &txr->tq);
2668 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2669 		    device_get_nameunit(adapter->dev), cpu_id);
2670 		/*
2671 		** Set the bit to enable interrupt
2672 		** in E1000_IMS -- bits 22 and 23
2673 		** are for TX0 and TX1, note this has
2674 		** NOTHING to do with the MSIX vector
2675 		*/
2676 		txr->ims = 1 << (22 + i);
2677 		adapter->ims |= txr->ims;
2678 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2679 
2680 		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2681 	}
2682 
2683 	/* Link interrupt */
2684 	rid = vector + 1;
2685 	adapter->res = bus_alloc_resource_any(dev,
2686 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2687 	if (!adapter->res) {
2688 		device_printf(dev,"Unable to allocate "
2689 		    "bus resource: Link interrupt [%d]\n", rid);
2690 		return (ENXIO);
2691         }
2692 	/* Set the link handler function */
2693 	error = bus_setup_intr(dev, adapter->res,
2694 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2695 	    em_msix_link, adapter, &adapter->tag);
2696 	if (error) {
2697 		adapter->res = NULL;
2698 		device_printf(dev, "Failed to register LINK handler");
2699 		return (error);
2700 	}
2701 #if __FreeBSD_version >= 800504
2702 	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2703 #endif
2704 	adapter->linkvec = vector;
2705 	adapter->ivars |=  (8 | vector) << 16;
2706 	adapter->ivars |= 0x80000000;
2707 
2708 	return (0);
2709 }
2710 
2711 
2712 static void
2713 em_free_pci_resources(struct adapter *adapter)
2714 {
2715 	device_t	dev = adapter->dev;
2716 	struct tx_ring	*txr;
2717 	struct rx_ring	*rxr;
2718 	int		rid;
2719 
2720 
2721 	/*
2722 	** Release all the queue interrupt resources:
2723 	*/
2724 	for (int i = 0; i < adapter->num_queues; i++) {
2725 		txr = &adapter->tx_rings[i];
2726 		/* an early abort? */
2727 		if (txr == NULL)
2728 			break;
2729 		rid = txr->msix +1;
2730 		if (txr->tag != NULL) {
2731 			bus_teardown_intr(dev, txr->res, txr->tag);
2732 			txr->tag = NULL;
2733 		}
2734 		if (txr->res != NULL)
2735 			bus_release_resource(dev, SYS_RES_IRQ,
2736 			    rid, txr->res);
2737 
2738 		rxr = &adapter->rx_rings[i];
2739 		/* an early abort? */
2740 		if (rxr == NULL)
2741 			break;
2742 		rid = rxr->msix +1;
2743 		if (rxr->tag != NULL) {
2744 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2745 			rxr->tag = NULL;
2746 		}
2747 		if (rxr->res != NULL)
2748 			bus_release_resource(dev, SYS_RES_IRQ,
2749 			    rid, rxr->res);
2750 	}
2751 
2752         if (adapter->linkvec) /* we are doing MSIX */
2753                 rid = adapter->linkvec + 1;
2754         else
2755                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2756 
2757 	if (adapter->tag != NULL) {
2758 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2759 		adapter->tag = NULL;
2760 	}
2761 
2762 	if (adapter->res != NULL)
2763 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2764 
2765 
2766 	if (adapter->msix)
2767 		pci_release_msi(dev);
2768 
2769 	if (adapter->msix_mem != NULL)
2770 		bus_release_resource(dev, SYS_RES_MEMORY,
2771 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2772 
2773 	if (adapter->memory != NULL)
2774 		bus_release_resource(dev, SYS_RES_MEMORY,
2775 		    PCIR_BAR(0), adapter->memory);
2776 
2777 	if (adapter->flash != NULL)
2778 		bus_release_resource(dev, SYS_RES_MEMORY,
2779 		    EM_FLASH, adapter->flash);
2780 }
2781 
2782 /*
2783  * Setup MSI or MSI/X
2784  */
2785 static int
2786 em_setup_msix(struct adapter *adapter)
2787 {
2788 	device_t dev = adapter->dev;
2789 	int val;
2790 
2791 	/* Nearly always going to use one queue */
2792 	adapter->num_queues = 1;
2793 
2794 	/*
2795 	** Try using MSI-X for Hartwell adapters
2796 	*/
2797 	if ((adapter->hw.mac.type == e1000_82574) &&
2798 	    (em_enable_msix == TRUE)) {
2799 #ifdef EM_MULTIQUEUE
2800 		adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2801 		if (adapter->num_queues > 1)
2802 			em_enable_vectors_82574(adapter);
2803 #endif
2804 		/* Map the MSIX BAR */
2805 		int rid = PCIR_BAR(EM_MSIX_BAR);
2806 		adapter->msix_mem = bus_alloc_resource_any(dev,
2807 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2808        		if (adapter->msix_mem == NULL) {
2809 			/* May not be enabled */
2810                		device_printf(adapter->dev,
2811 			    "Unable to map MSIX table \n");
2812 			goto msi;
2813        		}
2814 		val = pci_msix_count(dev);
2815 
2816 #ifdef EM_MULTIQUEUE
2817 		/* We need 5 vectors in the multiqueue case */
2818 		if (adapter->num_queues > 1 ) {
2819 			if (val >= 5)
2820 				val = 5;
2821 			else {
2822 				adapter->num_queues = 1;
2823 				device_printf(adapter->dev,
2824 				    "Insufficient MSIX vectors for >1 queue, "
2825 				    "using single queue...\n");
2826 				goto msix_one;
2827 			}
2828 		} else {
2829 msix_one:
2830 #endif
2831 			if (val >= 3)
2832 				val = 3;
2833 			else {
2834 				device_printf(adapter->dev,
2835 			    	"Insufficient MSIX vectors, using MSI\n");
2836 				goto msi;
2837 			}
2838 #ifdef EM_MULTIQUEUE
2839 		}
2840 #endif
2841 
2842 		if ((pci_alloc_msix(dev, &val) == 0)) {
2843 			device_printf(adapter->dev,
2844 			    "Using MSIX interrupts "
2845 			    "with %d vectors\n", val);
2846 			return (val);
2847 		}
2848 
2849 		/*
2850 		** If MSIX alloc failed or provided us with
2851 		** less than needed, free and fall through to MSI
2852 		*/
2853 		pci_release_msi(dev);
2854 	}
2855 msi:
2856 	if (adapter->msix_mem != NULL) {
2857 		bus_release_resource(dev, SYS_RES_MEMORY,
2858 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2859 		adapter->msix_mem = NULL;
2860 	}
2861        	val = 1;
2862        	if (pci_alloc_msi(dev, &val) == 0) {
2863                	device_printf(adapter->dev, "Using an MSI interrupt\n");
2864 		return (val);
2865 	}
2866 	/* Should only happen due to manual configuration */
2867 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2868 	return (0);
2869 }
2870 
2871 
2872 /*********************************************************************
2873  *
2874  *  Initialize the hardware to a configuration
2875  *  as specified by the adapter structure.
2876  *
2877  **********************************************************************/
2878 static void
2879 em_reset(struct adapter *adapter)
2880 {
2881 	device_t	dev = adapter->dev;
2882 	if_t ifp = adapter->ifp;
2883 	struct e1000_hw	*hw = &adapter->hw;
2884 	u16		rx_buffer_size;
2885 	u32		pba;
2886 
2887 	INIT_DEBUGOUT("em_reset: begin");
2888 
2889 	/* Set up smart power down as default off on newer adapters. */
2890 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2891 	    hw->mac.type == e1000_82572)) {
2892 		u16 phy_tmp = 0;
2893 
2894 		/* Speed up time to link by disabling smart power down. */
2895 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2896 		phy_tmp &= ~IGP02E1000_PM_SPD;
2897 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2898 	}
2899 
2900 	/*
2901 	 * Packet Buffer Allocation (PBA)
2902 	 * Writing PBA sets the receive portion of the buffer
2903 	 * the remainder is used for the transmit buffer.
2904 	 */
2905 	switch (hw->mac.type) {
2906 	/* Total Packet Buffer on these is 48K */
2907 	case e1000_82571:
2908 	case e1000_82572:
2909 	case e1000_80003es2lan:
2910 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2911 		break;
2912 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2913 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2914 		break;
2915 	case e1000_82574:
2916 	case e1000_82583:
2917 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2918 		break;
2919 	case e1000_ich8lan:
2920 		pba = E1000_PBA_8K;
2921 		break;
2922 	case e1000_ich9lan:
2923 	case e1000_ich10lan:
2924 		/* Boost Receive side for jumbo frames */
2925 		if (adapter->hw.mac.max_frame_size > 4096)
2926 			pba = E1000_PBA_14K;
2927 		else
2928 			pba = E1000_PBA_10K;
2929 		break;
2930 	case e1000_pchlan:
2931 	case e1000_pch2lan:
2932 	case e1000_pch_lpt:
2933 		pba = E1000_PBA_26K;
2934 		break;
2935 	default:
2936 		if (adapter->hw.mac.max_frame_size > 8192)
2937 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2938 		else
2939 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2940 	}
2941 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2942 
2943 	/*
2944 	 * These parameters control the automatic generation (Tx) and
2945 	 * response (Rx) to Ethernet PAUSE frames.
2946 	 * - High water mark should allow for at least two frames to be
2947 	 *   received after sending an XOFF.
2948 	 * - Low water mark works best when it is very near the high water mark.
2949 	 *   This allows the receiver to restart by sending XON when it has
2950 	 *   drained a bit. Here we use an arbitary value of 1500 which will
2951 	 *   restart after one full frame is pulled from the buffer. There
2952 	 *   could be several smaller frames in the buffer and if so they will
2953 	 *   not trigger the XON until their total number reduces the buffer
2954 	 *   by 1500.
2955 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2956 	 */
2957 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2958 	hw->fc.high_water = rx_buffer_size -
2959 	    roundup2(adapter->hw.mac.max_frame_size, 1024);
2960 	hw->fc.low_water = hw->fc.high_water - 1500;
2961 
2962 	if (adapter->fc) /* locally set flow control value? */
2963 		hw->fc.requested_mode = adapter->fc;
2964 	else
2965 		hw->fc.requested_mode = e1000_fc_full;
2966 
2967 	if (hw->mac.type == e1000_80003es2lan)
2968 		hw->fc.pause_time = 0xFFFF;
2969 	else
2970 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2971 
2972 	hw->fc.send_xon = TRUE;
2973 
2974 	/* Device specific overrides/settings */
2975 	switch (hw->mac.type) {
2976 	case e1000_pchlan:
2977 		/* Workaround: no TX flow ctrl for PCH */
2978                 hw->fc.requested_mode = e1000_fc_rx_pause;
2979 		hw->fc.pause_time = 0xFFFF; /* override */
2980 		if (if_getmtu(ifp) > ETHERMTU) {
2981 			hw->fc.high_water = 0x3500;
2982 			hw->fc.low_water = 0x1500;
2983 		} else {
2984 			hw->fc.high_water = 0x5000;
2985 			hw->fc.low_water = 0x3000;
2986 		}
2987 		hw->fc.refresh_time = 0x1000;
2988 		break;
2989 	case e1000_pch2lan:
2990 	case e1000_pch_lpt:
2991 		hw->fc.high_water = 0x5C20;
2992 		hw->fc.low_water = 0x5048;
2993 		hw->fc.pause_time = 0x0650;
2994 		hw->fc.refresh_time = 0x0400;
2995 		/* Jumbos need adjusted PBA */
2996 		if (if_getmtu(ifp) > ETHERMTU)
2997 			E1000_WRITE_REG(hw, E1000_PBA, 12);
2998 		else
2999 			E1000_WRITE_REG(hw, E1000_PBA, 26);
3000 		break;
3001         case e1000_ich9lan:
3002         case e1000_ich10lan:
3003 		if (if_getmtu(ifp) > ETHERMTU) {
3004 			hw->fc.high_water = 0x2800;
3005 			hw->fc.low_water = hw->fc.high_water - 8;
3006 			break;
3007 		}
3008 		/* else fall thru */
3009 	default:
3010 		if (hw->mac.type == e1000_80003es2lan)
3011 			hw->fc.pause_time = 0xFFFF;
3012 		break;
3013 	}
3014 
3015 	/* Issue a global reset */
3016 	e1000_reset_hw(hw);
3017 	E1000_WRITE_REG(hw, E1000_WUC, 0);
3018 	em_disable_aspm(adapter);
3019 	/* and a re-init */
3020 	if (e1000_init_hw(hw) < 0) {
3021 		device_printf(dev, "Hardware Initialization Failed\n");
3022 		return;
3023 	}
3024 
3025 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3026 	e1000_get_phy_info(hw);
3027 	e1000_check_for_link(hw);
3028 	return;
3029 }
3030 
3031 /*********************************************************************
3032  *
3033  *  Setup networking device structure and register an interface.
3034  *
3035  **********************************************************************/
3036 static int
3037 em_setup_interface(device_t dev, struct adapter *adapter)
3038 {
3039 	if_t ifp;
3040 
3041 	INIT_DEBUGOUT("em_setup_interface: begin");
3042 
3043 	ifp = adapter->ifp = if_gethandle(IFT_ETHER);
3044 	if (ifp == 0) {
3045 		device_printf(dev, "can not allocate ifnet structure\n");
3046 		return (-1);
3047 	}
3048 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3049 	if_setdev(ifp, dev);
3050 	if_setinitfn(ifp, em_init);
3051 	if_setsoftc(ifp, adapter);
3052 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
3053 	if_setioctlfn(ifp, em_ioctl);
3054 	if_setgetcounterfn(ifp, em_get_counter);
3055 	/* TSO parameters */
3056 	ifp->if_hw_tsomax = IP_MAXPACKET;
3057 	ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER;
3058 	ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
3059 
3060 #ifdef EM_MULTIQUEUE
3061 	/* Multiqueue stack interface */
3062 	if_settransmitfn(ifp, em_mq_start);
3063 	if_setqflushfn(ifp, em_qflush);
3064 #else
3065 	if_setstartfn(ifp, em_start);
3066 	if_setsendqlen(ifp, adapter->num_tx_desc - 1);
3067 	if_setsendqready(ifp);
3068 #endif
3069 
3070 	ether_ifattach(ifp, adapter->hw.mac.addr);
3071 
3072 	if_setcapabilities(ifp, 0);
3073 	if_setcapenable(ifp, 0);
3074 
3075 
3076 	if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM |
3077 	    IFCAP_TSO4, 0);
3078 	/*
3079 	 * Tell the upper layer(s) we
3080 	 * support full VLAN capability
3081 	 */
3082 	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
3083 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
3084 	    IFCAP_VLAN_MTU, 0);
3085 	if_setcapenable(ifp, if_getcapabilities(ifp));
3086 
3087 	/*
3088 	** Don't turn this on by default, if vlans are
3089 	** created on another pseudo device (eg. lagg)
3090 	** then vlan events are not passed thru, breaking
3091 	** operation, but with HW FILTER off it works. If
3092 	** using vlans directly on the em driver you can
3093 	** enable this and get full hardware tag filtering.
3094 	*/
3095 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
3096 
3097 #ifdef DEVICE_POLLING
3098 	if_setcapabilitiesbit(ifp, IFCAP_POLLING,0);
3099 #endif
3100 
3101 	/* Enable only WOL MAGIC by default */
3102 	if (adapter->wol) {
3103 		if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
3104 		if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
3105 	}
3106 
3107 	/*
3108 	 * Specify the media types supported by this adapter and register
3109 	 * callbacks to update media and link information
3110 	 */
3111 	ifmedia_init(&adapter->media, IFM_IMASK,
3112 	    em_media_change, em_media_status);
3113 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3114 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3115 		u_char fiber_type = IFM_1000_SX;	/* default type */
3116 
3117 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3118 			    0, NULL);
3119 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3120 	} else {
3121 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3122 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3123 			    0, NULL);
3124 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3125 			    0, NULL);
3126 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3127 			    0, NULL);
3128 		if (adapter->hw.phy.type != e1000_phy_ife) {
3129 			ifmedia_add(&adapter->media,
3130 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3131 			ifmedia_add(&adapter->media,
3132 				IFM_ETHER | IFM_1000_T, 0, NULL);
3133 		}
3134 	}
3135 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3136 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3137 	return (0);
3138 }
3139 
3140 
3141 /*
3142  * Manage DMA'able memory.
3143  */
3144 static void
3145 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3146 {
3147 	if (error)
3148 		return;
3149 	*(bus_addr_t *) arg = segs[0].ds_addr;
3150 }
3151 
3152 static int
3153 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3154         struct em_dma_alloc *dma, int mapflags)
3155 {
3156 	int error;
3157 
3158 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3159 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3160 				BUS_SPACE_MAXADDR,	/* lowaddr */
3161 				BUS_SPACE_MAXADDR,	/* highaddr */
3162 				NULL, NULL,		/* filter, filterarg */
3163 				size,			/* maxsize */
3164 				1,			/* nsegments */
3165 				size,			/* maxsegsize */
3166 				0,			/* flags */
3167 				NULL,			/* lockfunc */
3168 				NULL,			/* lockarg */
3169 				&dma->dma_tag);
3170 	if (error) {
3171 		device_printf(adapter->dev,
3172 		    "%s: bus_dma_tag_create failed: %d\n",
3173 		    __func__, error);
3174 		goto fail_0;
3175 	}
3176 
3177 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3178 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3179 	if (error) {
3180 		device_printf(adapter->dev,
3181 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3182 		    __func__, (uintmax_t)size, error);
3183 		goto fail_2;
3184 	}
3185 
3186 	dma->dma_paddr = 0;
3187 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3188 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3189 	if (error || dma->dma_paddr == 0) {
3190 		device_printf(adapter->dev,
3191 		    "%s: bus_dmamap_load failed: %d\n",
3192 		    __func__, error);
3193 		goto fail_3;
3194 	}
3195 
3196 	return (0);
3197 
3198 fail_3:
3199 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3200 fail_2:
3201 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3202 	bus_dma_tag_destroy(dma->dma_tag);
3203 fail_0:
3204 	dma->dma_tag = NULL;
3205 
3206 	return (error);
3207 }
3208 
3209 static void
3210 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3211 {
3212 	if (dma->dma_tag == NULL)
3213 		return;
3214 	if (dma->dma_paddr != 0) {
3215 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3216 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3217 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3218 		dma->dma_paddr = 0;
3219 	}
3220 	if (dma->dma_vaddr != NULL) {
3221 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3222 		dma->dma_vaddr = NULL;
3223 	}
3224 	bus_dma_tag_destroy(dma->dma_tag);
3225 	dma->dma_tag = NULL;
3226 }
3227 
3228 
3229 /*********************************************************************
3230  *
3231  *  Allocate memory for the transmit and receive rings, and then
3232  *  the descriptors associated with each, called only once at attach.
3233  *
3234  **********************************************************************/
3235 static int
3236 em_allocate_queues(struct adapter *adapter)
3237 {
3238 	device_t		dev = adapter->dev;
3239 	struct tx_ring		*txr = NULL;
3240 	struct rx_ring		*rxr = NULL;
3241 	int rsize, tsize, error = E1000_SUCCESS;
3242 	int txconf = 0, rxconf = 0;
3243 
3244 
3245 	/* Allocate the TX ring struct memory */
3246 	if (!(adapter->tx_rings =
3247 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3248 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3249 		device_printf(dev, "Unable to allocate TX ring memory\n");
3250 		error = ENOMEM;
3251 		goto fail;
3252 	}
3253 
3254 	/* Now allocate the RX */
3255 	if (!(adapter->rx_rings =
3256 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3257 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3258 		device_printf(dev, "Unable to allocate RX ring memory\n");
3259 		error = ENOMEM;
3260 		goto rx_fail;
3261 	}
3262 
3263 	tsize = roundup2(adapter->num_tx_desc *
3264 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3265 	/*
3266 	 * Now set up the TX queues, txconf is needed to handle the
3267 	 * possibility that things fail midcourse and we need to
3268 	 * undo memory gracefully
3269 	 */
3270 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3271 		/* Set up some basics */
3272 		txr = &adapter->tx_rings[i];
3273 		txr->adapter = adapter;
3274 		txr->me = i;
3275 
3276 		/* Initialize the TX lock */
3277 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3278 		    device_get_nameunit(dev), txr->me);
3279 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3280 
3281 		if (em_dma_malloc(adapter, tsize,
3282 			&txr->txdma, BUS_DMA_NOWAIT)) {
3283 			device_printf(dev,
3284 			    "Unable to allocate TX Descriptor memory\n");
3285 			error = ENOMEM;
3286 			goto err_tx_desc;
3287 		}
3288 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3289 		bzero((void *)txr->tx_base, tsize);
3290 
3291         	if (em_allocate_transmit_buffers(txr)) {
3292 			device_printf(dev,
3293 			    "Critical Failure setting up transmit buffers\n");
3294 			error = ENOMEM;
3295 			goto err_tx_desc;
3296         	}
3297 #if __FreeBSD_version >= 800000
3298 		/* Allocate a buf ring */
3299 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3300 		    M_WAITOK, &txr->tx_mtx);
3301 #endif
3302 	}
3303 
3304 	/*
3305 	 * Next the RX queues...
3306 	 */
3307 	rsize = roundup2(adapter->num_rx_desc *
3308 	    sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
3309 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3310 		rxr = &adapter->rx_rings[i];
3311 		rxr->adapter = adapter;
3312 		rxr->me = i;
3313 
3314 		/* Initialize the RX lock */
3315 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3316 		    device_get_nameunit(dev), txr->me);
3317 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3318 
3319 		if (em_dma_malloc(adapter, rsize,
3320 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3321 			device_printf(dev,
3322 			    "Unable to allocate RxDescriptor memory\n");
3323 			error = ENOMEM;
3324 			goto err_rx_desc;
3325 		}
3326 		rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr;
3327 		bzero((void *)rxr->rx_base, rsize);
3328 
3329         	/* Allocate receive buffers for the ring*/
3330 		if (em_allocate_receive_buffers(rxr)) {
3331 			device_printf(dev,
3332 			    "Critical Failure setting up receive buffers\n");
3333 			error = ENOMEM;
3334 			goto err_rx_desc;
3335 		}
3336 	}
3337 
3338 	return (0);
3339 
3340 err_rx_desc:
3341 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3342 		em_dma_free(adapter, &rxr->rxdma);
3343 err_tx_desc:
3344 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3345 		em_dma_free(adapter, &txr->txdma);
3346 	free(adapter->rx_rings, M_DEVBUF);
3347 rx_fail:
3348 #if __FreeBSD_version >= 800000
3349 	buf_ring_free(txr->br, M_DEVBUF);
3350 #endif
3351 	free(adapter->tx_rings, M_DEVBUF);
3352 fail:
3353 	return (error);
3354 }
3355 
3356 
3357 /*********************************************************************
3358  *
3359  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3360  *  the information needed to transmit a packet on the wire. This is
3361  *  called only once at attach, setup is done every reset.
3362  *
3363  **********************************************************************/
3364 static int
3365 em_allocate_transmit_buffers(struct tx_ring *txr)
3366 {
3367 	struct adapter *adapter = txr->adapter;
3368 	device_t dev = adapter->dev;
3369 	struct em_txbuffer *txbuf;
3370 	int error, i;
3371 
3372 	/*
3373 	 * Setup DMA descriptor areas.
3374 	 */
3375 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3376 			       1, 0,			/* alignment, bounds */
3377 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3378 			       BUS_SPACE_MAXADDR,	/* highaddr */
3379 			       NULL, NULL,		/* filter, filterarg */
3380 			       EM_TSO_SIZE,		/* maxsize */
3381 			       EM_MAX_SCATTER,		/* nsegments */
3382 			       PAGE_SIZE,		/* maxsegsize */
3383 			       0,			/* flags */
3384 			       NULL,			/* lockfunc */
3385 			       NULL,			/* lockfuncarg */
3386 			       &txr->txtag))) {
3387 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3388 		goto fail;
3389 	}
3390 
3391 	if (!(txr->tx_buffers =
3392 	    (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) *
3393 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3394 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3395 		error = ENOMEM;
3396 		goto fail;
3397 	}
3398 
3399         /* Create the descriptor buffer dma maps */
3400 	txbuf = txr->tx_buffers;
3401 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3402 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3403 		if (error != 0) {
3404 			device_printf(dev, "Unable to create TX DMA map\n");
3405 			goto fail;
3406 		}
3407 	}
3408 
3409 	return 0;
3410 fail:
3411 	/* We free all, it handles case where we are in the middle */
3412 	em_free_transmit_structures(adapter);
3413 	return (error);
3414 }
3415 
3416 /*********************************************************************
3417  *
3418  *  Initialize a transmit ring.
3419  *
3420  **********************************************************************/
3421 static void
3422 em_setup_transmit_ring(struct tx_ring *txr)
3423 {
3424 	struct adapter *adapter = txr->adapter;
3425 	struct em_txbuffer *txbuf;
3426 	int i;
3427 #ifdef DEV_NETMAP
3428 	struct netmap_slot *slot;
3429 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
3430 #endif /* DEV_NETMAP */
3431 
3432 	/* Clear the old descriptor contents */
3433 	EM_TX_LOCK(txr);
3434 #ifdef DEV_NETMAP
3435 	slot = netmap_reset(na, NR_TX, txr->me, 0);
3436 #endif /* DEV_NETMAP */
3437 
3438 	bzero((void *)txr->tx_base,
3439 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3440 	/* Reset indices */
3441 	txr->next_avail_desc = 0;
3442 	txr->next_to_clean = 0;
3443 
3444 	/* Free any existing tx buffers. */
3445         txbuf = txr->tx_buffers;
3446 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3447 		if (txbuf->m_head != NULL) {
3448 			bus_dmamap_sync(txr->txtag, txbuf->map,
3449 			    BUS_DMASYNC_POSTWRITE);
3450 			bus_dmamap_unload(txr->txtag, txbuf->map);
3451 			m_freem(txbuf->m_head);
3452 			txbuf->m_head = NULL;
3453 		}
3454 #ifdef DEV_NETMAP
3455 		if (slot) {
3456 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3457 			uint64_t paddr;
3458 			void *addr;
3459 
3460 			addr = PNMB(na, slot + si, &paddr);
3461 			txr->tx_base[i].buffer_addr = htole64(paddr);
3462 			/* reload the map for netmap mode */
3463 			netmap_load_map(na, txr->txtag, txbuf->map, addr);
3464 		}
3465 #endif /* DEV_NETMAP */
3466 
3467 		/* clear the watch index */
3468 		txbuf->next_eop = -1;
3469         }
3470 
3471 	/* Set number of descriptors available */
3472 	txr->tx_avail = adapter->num_tx_desc;
3473 	txr->busy = EM_TX_IDLE;
3474 
3475 	/* Clear checksum offload context. */
3476 	txr->last_hw_offload = 0;
3477 	txr->last_hw_ipcss = 0;
3478 	txr->last_hw_ipcso = 0;
3479 	txr->last_hw_tucss = 0;
3480 	txr->last_hw_tucso = 0;
3481 
3482 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3483 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3484 	EM_TX_UNLOCK(txr);
3485 }
3486 
3487 /*********************************************************************
3488  *
3489  *  Initialize all transmit rings.
3490  *
3491  **********************************************************************/
3492 static void
3493 em_setup_transmit_structures(struct adapter *adapter)
3494 {
3495 	struct tx_ring *txr = adapter->tx_rings;
3496 
3497 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3498 		em_setup_transmit_ring(txr);
3499 
3500 	return;
3501 }
3502 
3503 /*********************************************************************
3504  *
3505  *  Enable transmit unit.
3506  *
3507  **********************************************************************/
3508 static void
3509 em_initialize_transmit_unit(struct adapter *adapter)
3510 {
3511 	struct tx_ring	*txr = adapter->tx_rings;
3512 	struct e1000_hw	*hw = &adapter->hw;
3513 	u32	tctl, txdctl = 0, tarc, tipg = 0;
3514 
3515 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3516 
3517 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3518 		u64 bus_addr = txr->txdma.dma_paddr;
3519 		/* Base and Len of TX Ring */
3520 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3521 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3522 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3523 	    	    (u32)(bus_addr >> 32));
3524 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3525 	    	    (u32)bus_addr);
3526 		/* Init the HEAD/TAIL indices */
3527 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3528 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3529 
3530 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3531 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3532 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3533 
3534 		txr->busy = EM_TX_IDLE;
3535 		txdctl = 0; /* clear txdctl */
3536                 txdctl |= 0x1f; /* PTHRESH */
3537                 txdctl |= 1 << 8; /* HTHRESH */
3538                 txdctl |= 1 << 16;/* WTHRESH */
3539 		txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3540 		txdctl |= E1000_TXDCTL_GRAN;
3541                 txdctl |= 1 << 25; /* LWTHRESH */
3542 
3543                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3544 	}
3545 
3546 	/* Set the default values for the Tx Inter Packet Gap timer */
3547 	switch (adapter->hw.mac.type) {
3548 	case e1000_80003es2lan:
3549 		tipg = DEFAULT_82543_TIPG_IPGR1;
3550 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3551 		    E1000_TIPG_IPGR2_SHIFT;
3552 		break;
3553 	default:
3554 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3555 		    (adapter->hw.phy.media_type ==
3556 		    e1000_media_type_internal_serdes))
3557 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3558 		else
3559 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3560 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3561 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3562 	}
3563 
3564 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3565 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3566 
3567 	if(adapter->hw.mac.type >= e1000_82540)
3568 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3569 		    adapter->tx_abs_int_delay.value);
3570 
3571 	if ((adapter->hw.mac.type == e1000_82571) ||
3572 	    (adapter->hw.mac.type == e1000_82572)) {
3573 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3574 		tarc |= TARC_SPEED_MODE_BIT;
3575 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3576 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3577 		/* errata: program both queues to unweighted RR */
3578 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3579 		tarc |= 1;
3580 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3581 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3582 		tarc |= 1;
3583 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3584 	} else if (adapter->hw.mac.type == e1000_82574) {
3585 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3586 		tarc |= TARC_ERRATA_BIT;
3587 		if ( adapter->num_queues > 1) {
3588 			tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3589 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3590 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3591 		} else
3592 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3593 	}
3594 
3595 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3596 	if (adapter->tx_int_delay.value > 0)
3597 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3598 
3599 	/* Program the Transmit Control Register */
3600 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3601 	tctl &= ~E1000_TCTL_CT;
3602 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3603 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3604 
3605 	if (adapter->hw.mac.type >= e1000_82571)
3606 		tctl |= E1000_TCTL_MULR;
3607 
3608 	/* This write will effectively turn on the transmit unit. */
3609 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3610 
3611 }
3612 
3613 
3614 /*********************************************************************
3615  *
3616  *  Free all transmit rings.
3617  *
3618  **********************************************************************/
3619 static void
3620 em_free_transmit_structures(struct adapter *adapter)
3621 {
3622 	struct tx_ring *txr = adapter->tx_rings;
3623 
3624 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3625 		EM_TX_LOCK(txr);
3626 		em_free_transmit_buffers(txr);
3627 		em_dma_free(adapter, &txr->txdma);
3628 		EM_TX_UNLOCK(txr);
3629 		EM_TX_LOCK_DESTROY(txr);
3630 	}
3631 
3632 	free(adapter->tx_rings, M_DEVBUF);
3633 }
3634 
3635 /*********************************************************************
3636  *
3637  *  Free transmit ring related data structures.
3638  *
3639  **********************************************************************/
3640 static void
3641 em_free_transmit_buffers(struct tx_ring *txr)
3642 {
3643 	struct adapter		*adapter = txr->adapter;
3644 	struct em_txbuffer	*txbuf;
3645 
3646 	INIT_DEBUGOUT("free_transmit_ring: begin");
3647 
3648 	if (txr->tx_buffers == NULL)
3649 		return;
3650 
3651 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3652 		txbuf = &txr->tx_buffers[i];
3653 		if (txbuf->m_head != NULL) {
3654 			bus_dmamap_sync(txr->txtag, txbuf->map,
3655 			    BUS_DMASYNC_POSTWRITE);
3656 			bus_dmamap_unload(txr->txtag,
3657 			    txbuf->map);
3658 			m_freem(txbuf->m_head);
3659 			txbuf->m_head = NULL;
3660 			if (txbuf->map != NULL) {
3661 				bus_dmamap_destroy(txr->txtag,
3662 				    txbuf->map);
3663 				txbuf->map = NULL;
3664 			}
3665 		} else if (txbuf->map != NULL) {
3666 			bus_dmamap_unload(txr->txtag,
3667 			    txbuf->map);
3668 			bus_dmamap_destroy(txr->txtag,
3669 			    txbuf->map);
3670 			txbuf->map = NULL;
3671 		}
3672 	}
3673 #if __FreeBSD_version >= 800000
3674 	if (txr->br != NULL)
3675 		buf_ring_free(txr->br, M_DEVBUF);
3676 #endif
3677 	if (txr->tx_buffers != NULL) {
3678 		free(txr->tx_buffers, M_DEVBUF);
3679 		txr->tx_buffers = NULL;
3680 	}
3681 	if (txr->txtag != NULL) {
3682 		bus_dma_tag_destroy(txr->txtag);
3683 		txr->txtag = NULL;
3684 	}
3685 	return;
3686 }
3687 
3688 
3689 /*********************************************************************
3690  *  The offload context is protocol specific (TCP/UDP) and thus
3691  *  only needs to be set when the protocol changes. The occasion
3692  *  of a context change can be a performance detriment, and
3693  *  might be better just disabled. The reason arises in the way
3694  *  in which the controller supports pipelined requests from the
3695  *  Tx data DMA. Up to four requests can be pipelined, and they may
3696  *  belong to the same packet or to multiple packets. However all
3697  *  requests for one packet are issued before a request is issued
3698  *  for a subsequent packet and if a request for the next packet
3699  *  requires a context change, that request will be stalled
3700  *  until the previous request completes. This means setting up
3701  *  a new context effectively disables pipelined Tx data DMA which
3702  *  in turn greatly slow down performance to send small sized
3703  *  frames.
3704  **********************************************************************/
3705 static void
3706 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3707     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3708 {
3709 	struct adapter			*adapter = txr->adapter;
3710 	struct e1000_context_desc	*TXD = NULL;
3711 	struct em_txbuffer		*tx_buffer;
3712 	int				cur, hdr_len;
3713 	u32				cmd = 0;
3714 	u16				offload = 0;
3715 	u8				ipcso, ipcss, tucso, tucss;
3716 
3717 	ipcss = ipcso = tucss = tucso = 0;
3718 	hdr_len = ip_off + (ip->ip_hl << 2);
3719 	cur = txr->next_avail_desc;
3720 
3721 	/* Setup of IP header checksum. */
3722 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3723 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3724 		offload |= CSUM_IP;
3725 		ipcss = ip_off;
3726 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3727 		/*
3728 		 * Start offset for header checksum calculation.
3729 		 * End offset for header checksum calculation.
3730 		 * Offset of place to put the checksum.
3731 		 */
3732 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3733 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3734 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3735 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3736 		cmd |= E1000_TXD_CMD_IP;
3737 	}
3738 
3739 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3740  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3741  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3742  		offload |= CSUM_TCP;
3743  		tucss = hdr_len;
3744  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3745 		/*
3746 		 * The 82574L can only remember the *last* context used
3747 		 * regardless of queue that it was use for.  We cannot reuse
3748 		 * contexts on this hardware platform and must generate a new
3749 		 * context every time.  82574L hardware spec, section 7.2.6,
3750 		 * second note.
3751 		 */
3752 		if (adapter->num_queues < 2) {
3753  			/*
3754  		 	* Setting up new checksum offload context for every
3755 			* frames takes a lot of processing time for hardware.
3756 			* This also reduces performance a lot for small sized
3757 			* frames so avoid it if driver can use previously
3758 			* configured checksum offload context.
3759  		 	*/
3760  			if (txr->last_hw_offload == offload) {
3761  				if (offload & CSUM_IP) {
3762  					if (txr->last_hw_ipcss == ipcss &&
3763  				    	txr->last_hw_ipcso == ipcso &&
3764  				    	txr->last_hw_tucss == tucss &&
3765  				    	txr->last_hw_tucso == tucso)
3766  						return;
3767  				} else {
3768  					if (txr->last_hw_tucss == tucss &&
3769  				    	txr->last_hw_tucso == tucso)
3770  						return;
3771  				}
3772   			}
3773  			txr->last_hw_offload = offload;
3774  			txr->last_hw_tucss = tucss;
3775  			txr->last_hw_tucso = tucso;
3776 		}
3777  		/*
3778  		 * Start offset for payload checksum calculation.
3779  		 * End offset for payload checksum calculation.
3780  		 * Offset of place to put the checksum.
3781  		 */
3782 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3783  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3784  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3785  		TXD->upper_setup.tcp_fields.tucso = tucso;
3786  		cmd |= E1000_TXD_CMD_TCP;
3787  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3788  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3789  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3790  		tucss = hdr_len;
3791  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3792 		/*
3793 		 * The 82574L can only remember the *last* context used
3794 		 * regardless of queue that it was use for.  We cannot reuse
3795 		 * contexts on this hardware platform and must generate a new
3796 		 * context every time.  82574L hardware spec, section 7.2.6,
3797 		 * second note.
3798 		 */
3799 		if (adapter->num_queues < 2) {
3800  			/*
3801  		 	* Setting up new checksum offload context for every
3802 			* frames takes a lot of processing time for hardware.
3803 			* This also reduces performance a lot for small sized
3804 			* frames so avoid it if driver can use previously
3805 			* configured checksum offload context.
3806  		 	*/
3807  			if (txr->last_hw_offload == offload) {
3808  				if (offload & CSUM_IP) {
3809  					if (txr->last_hw_ipcss == ipcss &&
3810  				    	txr->last_hw_ipcso == ipcso &&
3811  				    	txr->last_hw_tucss == tucss &&
3812  				    	txr->last_hw_tucso == tucso)
3813  						return;
3814  				} else {
3815  					if (txr->last_hw_tucss == tucss &&
3816  				    	txr->last_hw_tucso == tucso)
3817  						return;
3818  				}
3819  			}
3820  			txr->last_hw_offload = offload;
3821  			txr->last_hw_tucss = tucss;
3822  			txr->last_hw_tucso = tucso;
3823 		}
3824  		/*
3825  		 * Start offset for header checksum calculation.
3826  		 * End offset for header checksum calculation.
3827  		 * Offset of place to put the checksum.
3828  		 */
3829 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3830  		TXD->upper_setup.tcp_fields.tucss = tucss;
3831  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3832  		TXD->upper_setup.tcp_fields.tucso = tucso;
3833   	}
3834 
3835  	if (offload & CSUM_IP) {
3836  		txr->last_hw_ipcss = ipcss;
3837  		txr->last_hw_ipcso = ipcso;
3838   	}
3839 
3840 	TXD->tcp_seg_setup.data = htole32(0);
3841 	TXD->cmd_and_length =
3842 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3843 	tx_buffer = &txr->tx_buffers[cur];
3844 	tx_buffer->m_head = NULL;
3845 	tx_buffer->next_eop = -1;
3846 
3847 	if (++cur == adapter->num_tx_desc)
3848 		cur = 0;
3849 
3850 	txr->tx_avail--;
3851 	txr->next_avail_desc = cur;
3852 }
3853 
3854 
3855 /**********************************************************************
3856  *
3857  *  Setup work for hardware segmentation offload (TSO)
3858  *
3859  **********************************************************************/
3860 static void
3861 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3862     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3863 {
3864 	struct adapter			*adapter = txr->adapter;
3865 	struct e1000_context_desc	*TXD;
3866 	struct em_txbuffer		*tx_buffer;
3867 	int cur, hdr_len;
3868 
3869 	/*
3870 	 * In theory we can use the same TSO context if and only if
3871 	 * frame is the same type(IP/TCP) and the same MSS. However
3872 	 * checking whether a frame has the same IP/TCP structure is
3873 	 * hard thing so just ignore that and always restablish a
3874 	 * new TSO context.
3875 	 */
3876 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3877 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3878 		      E1000_TXD_DTYP_D |	/* Data descr type */
3879 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3880 
3881 	/* IP and/or TCP header checksum calculation and insertion. */
3882 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3883 
3884 	cur = txr->next_avail_desc;
3885 	tx_buffer = &txr->tx_buffers[cur];
3886 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3887 
3888 	/*
3889 	 * Start offset for header checksum calculation.
3890 	 * End offset for header checksum calculation.
3891 	 * Offset of place put the checksum.
3892 	 */
3893 	TXD->lower_setup.ip_fields.ipcss = ip_off;
3894 	TXD->lower_setup.ip_fields.ipcse =
3895 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3896 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3897 	/*
3898 	 * Start offset for payload checksum calculation.
3899 	 * End offset for payload checksum calculation.
3900 	 * Offset of place to put the checksum.
3901 	 */
3902 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3903 	TXD->upper_setup.tcp_fields.tucse = 0;
3904 	TXD->upper_setup.tcp_fields.tucso =
3905 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3906 	/*
3907 	 * Payload size per packet w/o any headers.
3908 	 * Length of all headers up to payload.
3909 	 */
3910 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3911 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3912 
3913 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3914 				E1000_TXD_CMD_DEXT |	/* Extended descr */
3915 				E1000_TXD_CMD_TSE |	/* TSE context */
3916 				E1000_TXD_CMD_IP |	/* Do IP csum */
3917 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3918 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3919 
3920 	tx_buffer->m_head = NULL;
3921 	tx_buffer->next_eop = -1;
3922 
3923 	if (++cur == adapter->num_tx_desc)
3924 		cur = 0;
3925 
3926 	txr->tx_avail--;
3927 	txr->next_avail_desc = cur;
3928 	txr->tx_tso = TRUE;
3929 }
3930 
3931 
3932 /**********************************************************************
3933  *
3934  *  Examine each tx_buffer in the used queue. If the hardware is done
3935  *  processing the packet then free associated resources. The
3936  *  tx_buffer is put back on the free queue.
3937  *
3938  **********************************************************************/
3939 static void
3940 em_txeof(struct tx_ring *txr)
3941 {
3942 	struct adapter	*adapter = txr->adapter;
3943         int first, last, done, processed;
3944         struct em_txbuffer *tx_buffer;
3945         struct e1000_tx_desc   *tx_desc, *eop_desc;
3946 	if_t ifp = adapter->ifp;
3947 
3948 	EM_TX_LOCK_ASSERT(txr);
3949 #ifdef DEV_NETMAP
3950 	if (netmap_tx_irq(ifp, txr->me))
3951 		return;
3952 #endif /* DEV_NETMAP */
3953 
3954 	/* No work, make sure hang detection is disabled */
3955         if (txr->tx_avail == adapter->num_tx_desc) {
3956 		txr->busy = EM_TX_IDLE;
3957                 return;
3958 	}
3959 
3960 	processed = 0;
3961         first = txr->next_to_clean;
3962         tx_desc = &txr->tx_base[first];
3963         tx_buffer = &txr->tx_buffers[first];
3964 	last = tx_buffer->next_eop;
3965         eop_desc = &txr->tx_base[last];
3966 
3967 	/*
3968 	 * What this does is get the index of the
3969 	 * first descriptor AFTER the EOP of the
3970 	 * first packet, that way we can do the
3971 	 * simple comparison on the inner while loop.
3972 	 */
3973 	if (++last == adapter->num_tx_desc)
3974  		last = 0;
3975 	done = last;
3976 
3977         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3978             BUS_DMASYNC_POSTREAD);
3979 
3980         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3981 		/* We clean the range of the packet */
3982 		while (first != done) {
3983                 	tx_desc->upper.data = 0;
3984                 	tx_desc->lower.data = 0;
3985                 	tx_desc->buffer_addr = 0;
3986                 	++txr->tx_avail;
3987 			++processed;
3988 
3989 			if (tx_buffer->m_head) {
3990 				bus_dmamap_sync(txr->txtag,
3991 				    tx_buffer->map,
3992 				    BUS_DMASYNC_POSTWRITE);
3993 				bus_dmamap_unload(txr->txtag,
3994 				    tx_buffer->map);
3995                         	m_freem(tx_buffer->m_head);
3996                         	tx_buffer->m_head = NULL;
3997                 	}
3998 			tx_buffer->next_eop = -1;
3999 
4000 	                if (++first == adapter->num_tx_desc)
4001 				first = 0;
4002 
4003 	                tx_buffer = &txr->tx_buffers[first];
4004 			tx_desc = &txr->tx_base[first];
4005 		}
4006 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
4007 		/* See if we can continue to the next packet */
4008 		last = tx_buffer->next_eop;
4009 		if (last != -1) {
4010         		eop_desc = &txr->tx_base[last];
4011 			/* Get new done point */
4012 			if (++last == adapter->num_tx_desc) last = 0;
4013 			done = last;
4014 		} else
4015 			break;
4016         }
4017         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4018             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4019 
4020         txr->next_to_clean = first;
4021 
4022 	/*
4023 	** Hang detection: we know there's work outstanding
4024 	** or the entry return would have been taken, so no
4025 	** descriptor processed here indicates a potential hang.
4026 	** The local timer will examine this and do a reset if needed.
4027 	*/
4028 	if (processed == 0) {
4029 		if (txr->busy != EM_TX_HUNG)
4030 			++txr->busy;
4031 	} else /* At least one descriptor was cleaned */
4032 		txr->busy = EM_TX_BUSY; /* note this clears HUNG */
4033 
4034         /*
4035          * If we have a minimum free, clear IFF_DRV_OACTIVE
4036          * to tell the stack that it is OK to send packets.
4037 	 * Notice that all writes of OACTIVE happen under the
4038 	 * TX lock which, with a single queue, guarantees
4039 	 * sanity.
4040          */
4041         if (txr->tx_avail >= EM_MAX_SCATTER) {
4042 		if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
4043 	}
4044 
4045 	/* Disable hang detection if all clean */
4046 	if (txr->tx_avail == adapter->num_tx_desc)
4047 		txr->busy = EM_TX_IDLE;
4048 }
4049 
4050 /*********************************************************************
4051  *
4052  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
4053  *
4054  **********************************************************************/
4055 static void
4056 em_refresh_mbufs(struct rx_ring *rxr, int limit)
4057 {
4058 	struct adapter		*adapter = rxr->adapter;
4059 	struct mbuf		*m;
4060 	bus_dma_segment_t	segs;
4061 	struct em_rxbuffer	*rxbuf;
4062 	int			i, j, error, nsegs;
4063 	bool			cleaned = FALSE;
4064 
4065 	i = j = rxr->next_to_refresh;
4066 	/*
4067 	** Get one descriptor beyond
4068 	** our work mark to control
4069 	** the loop.
4070 	*/
4071 	if (++j == adapter->num_rx_desc)
4072 		j = 0;
4073 
4074 	while (j != limit) {
4075 		rxbuf = &rxr->rx_buffers[i];
4076 		if (rxbuf->m_head == NULL) {
4077 			m = m_getjcl(M_NOWAIT, MT_DATA,
4078 			    M_PKTHDR, adapter->rx_mbuf_sz);
4079 			/*
4080 			** If we have a temporary resource shortage
4081 			** that causes a failure, just abort refresh
4082 			** for now, we will return to this point when
4083 			** reinvoked from em_rxeof.
4084 			*/
4085 			if (m == NULL)
4086 				goto update;
4087 		} else
4088 			m = rxbuf->m_head;
4089 
4090 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4091 		m->m_flags |= M_PKTHDR;
4092 		m->m_data = m->m_ext.ext_buf;
4093 
4094 		/* Use bus_dma machinery to setup the memory mapping  */
4095 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4096 		    m, &segs, &nsegs, BUS_DMA_NOWAIT);
4097 		if (error != 0) {
4098 			printf("Refresh mbufs: hdr dmamap load"
4099 			    " failure - %d\n", error);
4100 			m_free(m);
4101 			rxbuf->m_head = NULL;
4102 			goto update;
4103 		}
4104 		rxbuf->m_head = m;
4105 		rxbuf->paddr = segs.ds_addr;
4106 		bus_dmamap_sync(rxr->rxtag,
4107 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4108 		em_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4109 		cleaned = TRUE;
4110 
4111 		i = j; /* Next is precalulated for us */
4112 		rxr->next_to_refresh = i;
4113 		/* Calculate next controlling index */
4114 		if (++j == adapter->num_rx_desc)
4115 			j = 0;
4116 	}
4117 update:
4118 	/*
4119 	** Update the tail pointer only if,
4120 	** and as far as we have refreshed.
4121 	*/
4122 	if (cleaned)
4123 		E1000_WRITE_REG(&adapter->hw,
4124 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4125 
4126 	return;
4127 }
4128 
4129 
4130 /*********************************************************************
4131  *
4132  *  Allocate memory for rx_buffer structures. Since we use one
4133  *  rx_buffer per received packet, the maximum number of rx_buffer's
4134  *  that we'll need is equal to the number of receive descriptors
4135  *  that we've allocated.
4136  *
4137  **********************************************************************/
4138 static int
4139 em_allocate_receive_buffers(struct rx_ring *rxr)
4140 {
4141 	struct adapter		*adapter = rxr->adapter;
4142 	device_t		dev = adapter->dev;
4143 	struct em_rxbuffer	*rxbuf;
4144 	int			error;
4145 
4146 	rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) *
4147 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4148 	if (rxr->rx_buffers == NULL) {
4149 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4150 		return (ENOMEM);
4151 	}
4152 
4153 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4154 				1, 0,			/* alignment, bounds */
4155 				BUS_SPACE_MAXADDR,	/* lowaddr */
4156 				BUS_SPACE_MAXADDR,	/* highaddr */
4157 				NULL, NULL,		/* filter, filterarg */
4158 				MJUM9BYTES,		/* maxsize */
4159 				1,			/* nsegments */
4160 				MJUM9BYTES,		/* maxsegsize */
4161 				0,			/* flags */
4162 				NULL,			/* lockfunc */
4163 				NULL,			/* lockarg */
4164 				&rxr->rxtag);
4165 	if (error) {
4166 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4167 		    __func__, error);
4168 		goto fail;
4169 	}
4170 
4171 	rxbuf = rxr->rx_buffers;
4172 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4173 		rxbuf = &rxr->rx_buffers[i];
4174 		error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4175 		if (error) {
4176 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4177 			    __func__, error);
4178 			goto fail;
4179 		}
4180 	}
4181 
4182 	return (0);
4183 
4184 fail:
4185 	em_free_receive_structures(adapter);
4186 	return (error);
4187 }
4188 
4189 
4190 /*********************************************************************
4191  *
4192  *  Initialize a receive ring and its buffers.
4193  *
4194  **********************************************************************/
4195 static int
4196 em_setup_receive_ring(struct rx_ring *rxr)
4197 {
4198 	struct	adapter 	*adapter = rxr->adapter;
4199 	struct em_rxbuffer	*rxbuf;
4200 	bus_dma_segment_t	seg[1];
4201 	int			rsize, nsegs, error = 0;
4202 #ifdef DEV_NETMAP
4203 	struct netmap_slot *slot;
4204 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
4205 #endif
4206 
4207 
4208 	/* Clear the ring contents */
4209 	EM_RX_LOCK(rxr);
4210 	rsize = roundup2(adapter->num_rx_desc *
4211 	    sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
4212 	bzero((void *)rxr->rx_base, rsize);
4213 #ifdef DEV_NETMAP
4214 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4215 #endif
4216 
4217 	/*
4218 	** Free current RX buffer structs and their mbufs
4219 	*/
4220 	for (int i = 0; i < adapter->num_rx_desc; i++) {
4221 		rxbuf = &rxr->rx_buffers[i];
4222 		if (rxbuf->m_head != NULL) {
4223 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4224 			    BUS_DMASYNC_POSTREAD);
4225 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4226 			m_freem(rxbuf->m_head);
4227 			rxbuf->m_head = NULL; /* mark as freed */
4228 		}
4229 	}
4230 
4231 	/* Now replenish the mbufs */
4232         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4233 		rxbuf = &rxr->rx_buffers[j];
4234 #ifdef DEV_NETMAP
4235 		if (slot) {
4236 			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4237 			uint64_t paddr;
4238 			void *addr;
4239 
4240 			addr = PNMB(na, slot + si, &paddr);
4241 			netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4242 			em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4243 			continue;
4244 		}
4245 #endif /* DEV_NETMAP */
4246 		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4247 		    M_PKTHDR, adapter->rx_mbuf_sz);
4248 		if (rxbuf->m_head == NULL) {
4249 			error = ENOBUFS;
4250 			goto fail;
4251 		}
4252 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4253 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4254 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4255 
4256 		/* Get the memory mapping */
4257 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4258 		    rxbuf->map, rxbuf->m_head, seg,
4259 		    &nsegs, BUS_DMA_NOWAIT);
4260 		if (error != 0) {
4261 			m_freem(rxbuf->m_head);
4262 			rxbuf->m_head = NULL;
4263 			goto fail;
4264 		}
4265 		bus_dmamap_sync(rxr->rxtag,
4266 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4267 
4268 		rxbuf->paddr = seg[0].ds_addr;
4269 		em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4270 	}
4271 	rxr->next_to_check = 0;
4272 	rxr->next_to_refresh = 0;
4273 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4274 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4275 
4276 fail:
4277 	EM_RX_UNLOCK(rxr);
4278 	return (error);
4279 }
4280 
4281 /*********************************************************************
4282  *
4283  *  Initialize all receive rings.
4284  *
4285  **********************************************************************/
4286 static int
4287 em_setup_receive_structures(struct adapter *adapter)
4288 {
4289 	struct rx_ring *rxr = adapter->rx_rings;
4290 	int q;
4291 
4292 	for (q = 0; q < adapter->num_queues; q++, rxr++)
4293 		if (em_setup_receive_ring(rxr))
4294 			goto fail;
4295 
4296 	return (0);
4297 fail:
4298 	/*
4299 	 * Free RX buffers allocated so far, we will only handle
4300 	 * the rings that completed, the failing case will have
4301 	 * cleaned up for itself. 'q' failed, so its the terminus.
4302 	 */
4303 	for (int i = 0; i < q; ++i) {
4304 		rxr = &adapter->rx_rings[i];
4305 		for (int n = 0; n < adapter->num_rx_desc; n++) {
4306 			struct em_rxbuffer *rxbuf;
4307 			rxbuf = &rxr->rx_buffers[n];
4308 			if (rxbuf->m_head != NULL) {
4309 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4310 			  	  BUS_DMASYNC_POSTREAD);
4311 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4312 				m_freem(rxbuf->m_head);
4313 				rxbuf->m_head = NULL;
4314 			}
4315 		}
4316 		rxr->next_to_check = 0;
4317 		rxr->next_to_refresh = 0;
4318 	}
4319 
4320 	return (ENOBUFS);
4321 }
4322 
4323 /*********************************************************************
4324  *
4325  *  Free all receive rings.
4326  *
4327  **********************************************************************/
4328 static void
4329 em_free_receive_structures(struct adapter *adapter)
4330 {
4331 	struct rx_ring *rxr = adapter->rx_rings;
4332 
4333 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4334 		em_free_receive_buffers(rxr);
4335 		/* Free the ring memory as well */
4336 		em_dma_free(adapter, &rxr->rxdma);
4337 		EM_RX_LOCK_DESTROY(rxr);
4338 	}
4339 
4340 	free(adapter->rx_rings, M_DEVBUF);
4341 }
4342 
4343 
4344 /*********************************************************************
4345  *
4346  *  Free receive ring data structures
4347  *
4348  **********************************************************************/
4349 static void
4350 em_free_receive_buffers(struct rx_ring *rxr)
4351 {
4352 	struct adapter		*adapter = rxr->adapter;
4353 	struct em_rxbuffer	*rxbuf = NULL;
4354 
4355 	INIT_DEBUGOUT("free_receive_buffers: begin");
4356 
4357 	if (rxr->rx_buffers != NULL) {
4358 		for (int i = 0; i < adapter->num_rx_desc; i++) {
4359 			rxbuf = &rxr->rx_buffers[i];
4360 			if (rxbuf->map != NULL) {
4361 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4362 				    BUS_DMASYNC_POSTREAD);
4363 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4364 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4365 			}
4366 			if (rxbuf->m_head != NULL) {
4367 				m_freem(rxbuf->m_head);
4368 				rxbuf->m_head = NULL;
4369 			}
4370 		}
4371 		free(rxr->rx_buffers, M_DEVBUF);
4372 		rxr->rx_buffers = NULL;
4373 		rxr->next_to_check = 0;
4374 		rxr->next_to_refresh = 0;
4375 	}
4376 
4377 	if (rxr->rxtag != NULL) {
4378 		bus_dma_tag_destroy(rxr->rxtag);
4379 		rxr->rxtag = NULL;
4380 	}
4381 
4382 	return;
4383 }
4384 
4385 
4386 /*********************************************************************
4387  *
4388  *  Enable receive unit.
4389  *
4390  **********************************************************************/
4391 
4392 static void
4393 em_initialize_receive_unit(struct adapter *adapter)
4394 {
4395 	struct rx_ring *rxr = adapter->rx_rings;
4396 	if_t ifp = adapter->ifp;
4397 	struct e1000_hw	*hw = &adapter->hw;
4398 	u32	rctl, rxcsum, rfctl;
4399 
4400 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4401 
4402 	/*
4403 	 * Make sure receives are disabled while setting
4404 	 * up the descriptor ring
4405 	 */
4406 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4407 	/* Do not disable if ever enabled on this hardware */
4408 	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4409 		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4410 
4411 	/* Setup the Receive Control Register */
4412 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4413 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4414 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4415 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4416 
4417 	/* Do not store bad packets */
4418 	rctl &= ~E1000_RCTL_SBP;
4419 
4420 	/* Enable Long Packet receive */
4421 	if (if_getmtu(ifp) > ETHERMTU)
4422 		rctl |= E1000_RCTL_LPE;
4423 	else
4424 		rctl &= ~E1000_RCTL_LPE;
4425 
4426         /* Strip the CRC */
4427         if (!em_disable_crc_stripping)
4428 		rctl |= E1000_RCTL_SECRC;
4429 
4430 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4431 	    adapter->rx_abs_int_delay.value);
4432 
4433 	E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4434 	    adapter->rx_int_delay.value);
4435 	/*
4436 	 * Set the interrupt throttling rate. Value is calculated
4437 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4438 	 */
4439 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4440 
4441 	/* Use extended rx descriptor formats */
4442 	rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4443 	rfctl |= E1000_RFCTL_EXTEN;
4444 	/*
4445 	** When using MSIX interrupts we need to throttle
4446 	** using the EITR register (82574 only)
4447 	*/
4448 	if (hw->mac.type == e1000_82574) {
4449 		for (int i = 0; i < 4; i++)
4450 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4451 			    DEFAULT_ITR);
4452 		/* Disable accelerated acknowledge */
4453 		rfctl |= E1000_RFCTL_ACK_DIS;
4454 	}
4455 	E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4456 
4457 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4458 	if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
4459 #ifdef EM_MULTIQUEUE
4460 		rxcsum |= E1000_RXCSUM_TUOFL |
4461 			  E1000_RXCSUM_IPOFL |
4462 			  E1000_RXCSUM_PCSD;
4463 #else
4464 		rxcsum |= E1000_RXCSUM_TUOFL;
4465 #endif
4466 	} else
4467 		rxcsum &= ~E1000_RXCSUM_TUOFL;
4468 
4469 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4470 
4471 #ifdef EM_MULTIQUEUE
4472 #define RSSKEYLEN 10
4473 	if (adapter->num_queues > 1) {
4474 		uint8_t  rss_key[4 * RSSKEYLEN];
4475 		uint32_t reta = 0;
4476 		int i;
4477 
4478 		/*
4479 		* Configure RSS key
4480 		*/
4481 		arc4rand(rss_key, sizeof(rss_key), 0);
4482 		for (i = 0; i < RSSKEYLEN; ++i) {
4483 			uint32_t rssrk = 0;
4484 
4485 			rssrk = EM_RSSRK_VAL(rss_key, i);
4486 			E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk);
4487 		}
4488 
4489 		/*
4490 		* Configure RSS redirect table in following fashion:
4491 		* (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4492 		*/
4493 		for (i = 0; i < sizeof(reta); ++i) {
4494 			uint32_t q;
4495 
4496 			q = (i % adapter->num_queues) << 7;
4497 			reta |= q << (8 * i);
4498 		}
4499 
4500 		for (i = 0; i < 32; ++i) {
4501 			E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4502 		}
4503 
4504 		E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q |
4505 				E1000_MRQC_RSS_FIELD_IPV4_TCP |
4506 				E1000_MRQC_RSS_FIELD_IPV4 |
4507 				E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4508 				E1000_MRQC_RSS_FIELD_IPV6_EX |
4509 				E1000_MRQC_RSS_FIELD_IPV6);
4510 	}
4511 #endif
4512 	/*
4513 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4514 	** long latencies are observed, like Lenovo X60. This
4515 	** change eliminates the problem, but since having positive
4516 	** values in RDTR is a known source of problems on other
4517 	** platforms another solution is being sought.
4518 	*/
4519 	if (hw->mac.type == e1000_82573)
4520 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4521 
4522 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4523 		/* Setup the Base and Length of the Rx Descriptor Ring */
4524 		u64 bus_addr = rxr->rxdma.dma_paddr;
4525 		u32 rdt = adapter->num_rx_desc - 1; /* default */
4526 
4527 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4528 		    adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended));
4529 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4530 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4531 		/* Setup the Head and Tail Descriptor Pointers */
4532 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4533 #ifdef DEV_NETMAP
4534 		/*
4535 		 * an init() while a netmap client is active must
4536 		 * preserve the rx buffers passed to userspace.
4537 		 */
4538 		if (if_getcapenable(ifp) & IFCAP_NETMAP) {
4539 			struct netmap_adapter *na = netmap_getna(adapter->ifp);
4540 			rdt -= nm_kr_rxspace(&na->rx_rings[i]);
4541 		}
4542 #endif /* DEV_NETMAP */
4543 		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4544 	}
4545 
4546 	/*
4547 	 * Set PTHRESH for improved jumbo performance
4548 	 * According to 10.2.5.11 of Intel 82574 Datasheet,
4549 	 * RXDCTL(1) is written whenever RXDCTL(0) is written.
4550 	 * Only write to RXDCTL(1) if there is a need for different
4551 	 * settings.
4552 	 */
4553 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4554 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4555 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4556 	    (if_getmtu(ifp) > ETHERMTU)) {
4557 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4558 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4559 	} else if (adapter->hw.mac.type == e1000_82574) {
4560 		for (int i = 0; i < adapter->num_queues; i++) {
4561 			u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4562 
4563 			rxdctl |= 0x20; /* PTHRESH */
4564 			rxdctl |= 4 << 8; /* HTHRESH */
4565 			rxdctl |= 4 << 16;/* WTHRESH */
4566 			rxdctl |= 1 << 24; /* Switch to granularity */
4567 			E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4568 		}
4569 	}
4570 
4571 	if (adapter->hw.mac.type >= e1000_pch2lan) {
4572 		if (if_getmtu(ifp) > ETHERMTU)
4573 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4574 		else
4575 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4576 	}
4577 
4578         /* Make sure VLAN Filters are off */
4579         rctl &= ~E1000_RCTL_VFE;
4580 
4581 	if (adapter->rx_mbuf_sz == MCLBYTES)
4582 		rctl |= E1000_RCTL_SZ_2048;
4583 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4584 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4585 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4586 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4587 
4588 	/* ensure we clear use DTYPE of 00 here */
4589 	rctl &= ~0x00000C00;
4590 	/* Write out the settings */
4591 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4592 
4593 	return;
4594 }
4595 
4596 
4597 /*********************************************************************
4598  *
4599  *  This routine executes in interrupt context. It replenishes
4600  *  the mbufs in the descriptor and sends data which has been
4601  *  dma'ed into host memory to upper layer.
4602  *
4603  *  We loop at most count times if count is > 0, or until done if
4604  *  count < 0.
4605  *
4606  *  For polling we also now return the number of cleaned packets
4607  *********************************************************************/
4608 static bool
4609 em_rxeof(struct rx_ring *rxr, int count, int *done)
4610 {
4611 	struct adapter		*adapter = rxr->adapter;
4612 	if_t ifp = adapter->ifp;
4613 	struct mbuf		*mp, *sendmp;
4614 	u32			status = 0;
4615 	u16 			len;
4616 	int			i, processed, rxdone = 0;
4617 	bool			eop;
4618 	union e1000_rx_desc_extended	*cur;
4619 
4620 	EM_RX_LOCK(rxr);
4621 
4622 	/* Sync the ring */
4623 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4624 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4625 
4626 
4627 #ifdef DEV_NETMAP
4628 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4629 		EM_RX_UNLOCK(rxr);
4630 		return (FALSE);
4631 	}
4632 #endif /* DEV_NETMAP */
4633 
4634 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4635 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
4636 			break;
4637 
4638 		cur = &rxr->rx_base[i];
4639 		status = le32toh(cur->wb.upper.status_error);
4640 		mp = sendmp = NULL;
4641 
4642 		if ((status & E1000_RXD_STAT_DD) == 0)
4643 			break;
4644 
4645 		len = le16toh(cur->wb.upper.length);
4646 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4647 
4648 		if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
4649 		    (rxr->discard == TRUE)) {
4650 			adapter->dropped_pkts++;
4651 			++rxr->rx_discarded;
4652 			if (!eop) /* Catch subsequent segs */
4653 				rxr->discard = TRUE;
4654 			else
4655 				rxr->discard = FALSE;
4656 			em_rx_discard(rxr, i);
4657 			goto next_desc;
4658 		}
4659 		bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4660 
4661 		/* Assign correct length to the current fragment */
4662 		mp = rxr->rx_buffers[i].m_head;
4663 		mp->m_len = len;
4664 
4665 		/* Trigger for refresh */
4666 		rxr->rx_buffers[i].m_head = NULL;
4667 
4668 		/* First segment? */
4669 		if (rxr->fmp == NULL) {
4670 			mp->m_pkthdr.len = len;
4671 			rxr->fmp = rxr->lmp = mp;
4672 		} else {
4673 			/* Chain mbuf's together */
4674 			mp->m_flags &= ~M_PKTHDR;
4675 			rxr->lmp->m_next = mp;
4676 			rxr->lmp = mp;
4677 			rxr->fmp->m_pkthdr.len += len;
4678 		}
4679 
4680 		if (eop) {
4681 			--count;
4682 			sendmp = rxr->fmp;
4683 			if_setrcvif(sendmp, ifp);
4684 			if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
4685 			em_receive_checksum(status, sendmp);
4686 #ifndef __NO_STRICT_ALIGNMENT
4687 			if (adapter->hw.mac.max_frame_size >
4688 			    (MCLBYTES - ETHER_ALIGN) &&
4689 			    em_fixup_rx(rxr) != 0)
4690 				goto skip;
4691 #endif
4692 			if (status & E1000_RXD_STAT_VP) {
4693 				if_setvtag(sendmp,
4694 				    le16toh(cur->wb.upper.vlan));
4695 				sendmp->m_flags |= M_VLANTAG;
4696 			}
4697 #ifndef __NO_STRICT_ALIGNMENT
4698 skip:
4699 #endif
4700 			rxr->fmp = rxr->lmp = NULL;
4701 		}
4702 next_desc:
4703 		/* Sync the ring */
4704 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4705 	    		BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4706 
4707 		/* Zero out the receive descriptors status. */
4708 		cur->wb.upper.status_error &= htole32(~0xFF);
4709 		++rxdone;	/* cumulative for POLL */
4710 		++processed;
4711 
4712 		/* Advance our pointers to the next descriptor. */
4713 		if (++i == adapter->num_rx_desc)
4714 			i = 0;
4715 
4716 		/* Send to the stack */
4717 		if (sendmp != NULL) {
4718 			rxr->next_to_check = i;
4719 			EM_RX_UNLOCK(rxr);
4720 			if_input(ifp, sendmp);
4721 			EM_RX_LOCK(rxr);
4722 			i = rxr->next_to_check;
4723 		}
4724 
4725 		/* Only refresh mbufs every 8 descriptors */
4726 		if (processed == 8) {
4727 			em_refresh_mbufs(rxr, i);
4728 			processed = 0;
4729 		}
4730 	}
4731 
4732 	/* Catch any remaining refresh work */
4733 	if (e1000_rx_unrefreshed(rxr))
4734 		em_refresh_mbufs(rxr, i);
4735 
4736 	rxr->next_to_check = i;
4737 	if (done != NULL)
4738 		*done = rxdone;
4739 	EM_RX_UNLOCK(rxr);
4740 
4741 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4742 }
4743 
4744 static __inline void
4745 em_rx_discard(struct rx_ring *rxr, int i)
4746 {
4747 	struct em_rxbuffer	*rbuf;
4748 
4749 	rbuf = &rxr->rx_buffers[i];
4750 	bus_dmamap_unload(rxr->rxtag, rbuf->map);
4751 
4752 	/* Free any previous pieces */
4753 	if (rxr->fmp != NULL) {
4754 		rxr->fmp->m_flags |= M_PKTHDR;
4755 		m_freem(rxr->fmp);
4756 		rxr->fmp = NULL;
4757 		rxr->lmp = NULL;
4758 	}
4759 	/*
4760 	** Free buffer and allow em_refresh_mbufs()
4761 	** to clean up and recharge buffer.
4762 	*/
4763 	if (rbuf->m_head) {
4764 		m_free(rbuf->m_head);
4765 		rbuf->m_head = NULL;
4766 	}
4767 	return;
4768 }
4769 
4770 #ifndef __NO_STRICT_ALIGNMENT
4771 /*
4772  * When jumbo frames are enabled we should realign entire payload on
4773  * architecures with strict alignment. This is serious design mistake of 8254x
4774  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4775  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4776  * payload. On architecures without strict alignment restrictions 8254x still
4777  * performs unaligned memory access which would reduce the performance too.
4778  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4779  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4780  * existing mbuf chain.
4781  *
4782  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4783  * not used at all on architectures with strict alignment.
4784  */
4785 static int
4786 em_fixup_rx(struct rx_ring *rxr)
4787 {
4788 	struct adapter *adapter = rxr->adapter;
4789 	struct mbuf *m, *n;
4790 	int error;
4791 
4792 	error = 0;
4793 	m = rxr->fmp;
4794 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4795 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4796 		m->m_data += ETHER_HDR_LEN;
4797 	} else {
4798 		MGETHDR(n, M_NOWAIT, MT_DATA);
4799 		if (n != NULL) {
4800 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4801 			m->m_data += ETHER_HDR_LEN;
4802 			m->m_len -= ETHER_HDR_LEN;
4803 			n->m_len = ETHER_HDR_LEN;
4804 			M_MOVE_PKTHDR(n, m);
4805 			n->m_next = m;
4806 			rxr->fmp = n;
4807 		} else {
4808 			adapter->dropped_pkts++;
4809 			m_freem(rxr->fmp);
4810 			rxr->fmp = NULL;
4811 			error = ENOMEM;
4812 		}
4813 	}
4814 
4815 	return (error);
4816 }
4817 #endif
4818 
4819 static void
4820 em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf)
4821 {
4822 	rxd->read.buffer_addr = htole64(rxbuf->paddr);
4823 	/* DD bits must be cleared */
4824 	rxd->wb.upper.status_error= 0;
4825 }
4826 
4827 /*********************************************************************
4828  *
4829  *  Verify that the hardware indicated that the checksum is valid.
4830  *  Inform the stack about the status of checksum so that stack
4831  *  doesn't spend time verifying the checksum.
4832  *
4833  *********************************************************************/
4834 static void
4835 em_receive_checksum(uint32_t status, struct mbuf *mp)
4836 {
4837 	mp->m_pkthdr.csum_flags = 0;
4838 
4839 	/* Ignore Checksum bit is set */
4840 	if (status & E1000_RXD_STAT_IXSM)
4841 		return;
4842 
4843 	/* If the IP checksum exists and there is no IP Checksum error */
4844 	if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
4845 		E1000_RXD_STAT_IPCS) {
4846 		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4847 	}
4848 
4849 	/* TCP or UDP checksum */
4850 	if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
4851 	    E1000_RXD_STAT_TCPCS) {
4852 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4853 		mp->m_pkthdr.csum_data = htons(0xffff);
4854 	}
4855 	if (status & E1000_RXD_STAT_UDPCS) {
4856 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4857 		mp->m_pkthdr.csum_data = htons(0xffff);
4858 	}
4859 }
4860 
4861 /*
4862  * This routine is run via an vlan
4863  * config EVENT
4864  */
4865 static void
4866 em_register_vlan(void *arg, if_t ifp, u16 vtag)
4867 {
4868 	struct adapter	*adapter = if_getsoftc(ifp);
4869 	u32		index, bit;
4870 
4871 	if ((void*)adapter !=  arg)   /* Not our event */
4872 		return;
4873 
4874 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4875                 return;
4876 
4877 	EM_CORE_LOCK(adapter);
4878 	index = (vtag >> 5) & 0x7F;
4879 	bit = vtag & 0x1F;
4880 	adapter->shadow_vfta[index] |= (1 << bit);
4881 	++adapter->num_vlans;
4882 	/* Re-init to load the changes */
4883 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
4884 		em_init_locked(adapter);
4885 	EM_CORE_UNLOCK(adapter);
4886 }
4887 
4888 /*
4889  * This routine is run via an vlan
4890  * unconfig EVENT
4891  */
4892 static void
4893 em_unregister_vlan(void *arg, if_t ifp, u16 vtag)
4894 {
4895 	struct adapter	*adapter = if_getsoftc(ifp);
4896 	u32		index, bit;
4897 
4898 	if (adapter != arg)
4899 		return;
4900 
4901 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4902                 return;
4903 
4904 	EM_CORE_LOCK(adapter);
4905 	index = (vtag >> 5) & 0x7F;
4906 	bit = vtag & 0x1F;
4907 	adapter->shadow_vfta[index] &= ~(1 << bit);
4908 	--adapter->num_vlans;
4909 	/* Re-init to load the changes */
4910 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
4911 		em_init_locked(adapter);
4912 	EM_CORE_UNLOCK(adapter);
4913 }
4914 
4915 static void
4916 em_setup_vlan_hw_support(struct adapter *adapter)
4917 {
4918 	struct e1000_hw *hw = &adapter->hw;
4919 	u32             reg;
4920 
4921 	/*
4922 	** We get here thru init_locked, meaning
4923 	** a soft reset, this has already cleared
4924 	** the VFTA and other state, so if there
4925 	** have been no vlan's registered do nothing.
4926 	*/
4927 	if (adapter->num_vlans == 0)
4928                 return;
4929 
4930 	/*
4931 	** A soft reset zero's out the VFTA, so
4932 	** we need to repopulate it now.
4933 	*/
4934 	for (int i = 0; i < EM_VFTA_SIZE; i++)
4935                 if (adapter->shadow_vfta[i] != 0)
4936 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4937                             i, adapter->shadow_vfta[i]);
4938 
4939 	reg = E1000_READ_REG(hw, E1000_CTRL);
4940 	reg |= E1000_CTRL_VME;
4941 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4942 
4943 	/* Enable the Filter Table */
4944 	reg = E1000_READ_REG(hw, E1000_RCTL);
4945 	reg &= ~E1000_RCTL_CFIEN;
4946 	reg |= E1000_RCTL_VFE;
4947 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4948 }
4949 
4950 static void
4951 em_enable_intr(struct adapter *adapter)
4952 {
4953 	struct e1000_hw *hw = &adapter->hw;
4954 	u32 ims_mask = IMS_ENABLE_MASK;
4955 
4956 	if (hw->mac.type == e1000_82574) {
4957 		E1000_WRITE_REG(hw, EM_EIAC, adapter->ims);
4958 		ims_mask |= adapter->ims;
4959 	}
4960 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4961 }
4962 
4963 static void
4964 em_disable_intr(struct adapter *adapter)
4965 {
4966 	struct e1000_hw *hw = &adapter->hw;
4967 
4968 	if (hw->mac.type == e1000_82574)
4969 		E1000_WRITE_REG(hw, EM_EIAC, 0);
4970 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4971 }
4972 
4973 /*
4974  * Bit of a misnomer, what this really means is
4975  * to enable OS management of the system... aka
4976  * to disable special hardware management features
4977  */
4978 static void
4979 em_init_manageability(struct adapter *adapter)
4980 {
4981 	/* A shared code workaround */
4982 #define E1000_82542_MANC2H E1000_MANC2H
4983 	if (adapter->has_manage) {
4984 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4985 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4986 
4987 		/* disable hardware interception of ARP */
4988 		manc &= ~(E1000_MANC_ARP_EN);
4989 
4990                 /* enable receiving management packets to the host */
4991 		manc |= E1000_MANC_EN_MNG2HOST;
4992 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4993 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4994 		manc2h |= E1000_MNG2HOST_PORT_623;
4995 		manc2h |= E1000_MNG2HOST_PORT_664;
4996 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4997 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4998 	}
4999 }
5000 
5001 /*
5002  * Give control back to hardware management
5003  * controller if there is one.
5004  */
5005 static void
5006 em_release_manageability(struct adapter *adapter)
5007 {
5008 	if (adapter->has_manage) {
5009 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5010 
5011 		/* re-enable hardware interception of ARP */
5012 		manc |= E1000_MANC_ARP_EN;
5013 		manc &= ~E1000_MANC_EN_MNG2HOST;
5014 
5015 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5016 	}
5017 }
5018 
5019 /*
5020  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
5021  * For ASF and Pass Through versions of f/w this means
5022  * that the driver is loaded. For AMT version type f/w
5023  * this means that the network i/f is open.
5024  */
5025 static void
5026 em_get_hw_control(struct adapter *adapter)
5027 {
5028 	u32 ctrl_ext, swsm;
5029 
5030 	if (adapter->hw.mac.type == e1000_82573) {
5031 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5032 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5033 		    swsm | E1000_SWSM_DRV_LOAD);
5034 		return;
5035 	}
5036 	/* else */
5037 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5038 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5039 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5040 	return;
5041 }
5042 
5043 /*
5044  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
5045  * For ASF and Pass Through versions of f/w this means that
5046  * the driver is no longer loaded. For AMT versions of the
5047  * f/w this means that the network i/f is closed.
5048  */
5049 static void
5050 em_release_hw_control(struct adapter *adapter)
5051 {
5052 	u32 ctrl_ext, swsm;
5053 
5054 	if (!adapter->has_manage)
5055 		return;
5056 
5057 	if (adapter->hw.mac.type == e1000_82573) {
5058 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5059 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5060 		    swsm & ~E1000_SWSM_DRV_LOAD);
5061 		return;
5062 	}
5063 	/* else */
5064 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5065 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5066 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5067 	return;
5068 }
5069 
5070 static int
5071 em_is_valid_ether_addr(u8 *addr)
5072 {
5073 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5074 
5075 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5076 		return (FALSE);
5077 	}
5078 
5079 	return (TRUE);
5080 }
5081 
5082 /*
5083 ** Parse the interface capabilities with regard
5084 ** to both system management and wake-on-lan for
5085 ** later use.
5086 */
5087 static void
5088 em_get_wakeup(device_t dev)
5089 {
5090 	struct adapter	*adapter = device_get_softc(dev);
5091 	u16		eeprom_data = 0, device_id, apme_mask;
5092 
5093 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5094 	apme_mask = EM_EEPROM_APME;
5095 
5096 	switch (adapter->hw.mac.type) {
5097 	case e1000_82573:
5098 	case e1000_82583:
5099 		adapter->has_amt = TRUE;
5100 		/* Falls thru */
5101 	case e1000_82571:
5102 	case e1000_82572:
5103 	case e1000_80003es2lan:
5104 		if (adapter->hw.bus.func == 1) {
5105 			e1000_read_nvm(&adapter->hw,
5106 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5107 			break;
5108 		} else
5109 			e1000_read_nvm(&adapter->hw,
5110 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5111 		break;
5112 	case e1000_ich8lan:
5113 	case e1000_ich9lan:
5114 	case e1000_ich10lan:
5115 	case e1000_pchlan:
5116 	case e1000_pch2lan:
5117 		apme_mask = E1000_WUC_APME;
5118 		adapter->has_amt = TRUE;
5119 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5120 		break;
5121 	default:
5122 		e1000_read_nvm(&adapter->hw,
5123 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5124 		break;
5125 	}
5126 	if (eeprom_data & apme_mask)
5127 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5128 	/*
5129          * We have the eeprom settings, now apply the special cases
5130          * where the eeprom may be wrong or the board won't support
5131          * wake on lan on a particular port
5132 	 */
5133 	device_id = pci_get_device(dev);
5134         switch (device_id) {
5135 	case E1000_DEV_ID_82571EB_FIBER:
5136 		/* Wake events only supported on port A for dual fiber
5137 		 * regardless of eeprom setting */
5138 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5139 		    E1000_STATUS_FUNC_1)
5140 			adapter->wol = 0;
5141 		break;
5142 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
5143 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
5144 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5145                 /* if quad port adapter, disable WoL on all but port A */
5146 		if (global_quad_port_a != 0)
5147 			adapter->wol = 0;
5148 		/* Reset for multiple quad port adapters */
5149 		if (++global_quad_port_a == 4)
5150 			global_quad_port_a = 0;
5151                 break;
5152 	}
5153 	return;
5154 }
5155 
5156 
5157 /*
5158  * Enable PCI Wake On Lan capability
5159  */
5160 static void
5161 em_enable_wakeup(device_t dev)
5162 {
5163 	struct adapter	*adapter = device_get_softc(dev);
5164 	if_t ifp = adapter->ifp;
5165 	u32		pmc, ctrl, ctrl_ext, rctl;
5166 	u16     	status;
5167 
5168 	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
5169 		return;
5170 
5171 	/* Advertise the wakeup capability */
5172 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5173 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5174 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5175 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5176 
5177 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
5178 	    (adapter->hw.mac.type == e1000_pchlan) ||
5179 	    (adapter->hw.mac.type == e1000_ich9lan) ||
5180 	    (adapter->hw.mac.type == e1000_ich10lan))
5181 		e1000_suspend_workarounds_ich8lan(&adapter->hw);
5182 
5183 	/* Keep the laser running on Fiber adapters */
5184 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5185 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5186 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5187 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5188 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5189 	}
5190 
5191 	/*
5192 	** Determine type of Wakeup: note that wol
5193 	** is set with all bits on by default.
5194 	*/
5195 	if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
5196 		adapter->wol &= ~E1000_WUFC_MAG;
5197 
5198 	if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
5199 		adapter->wol &= ~E1000_WUFC_MC;
5200 	else {
5201 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5202 		rctl |= E1000_RCTL_MPE;
5203 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5204 	}
5205 
5206 	if ((adapter->hw.mac.type == e1000_pchlan) ||
5207 	    (adapter->hw.mac.type == e1000_pch2lan)) {
5208 		if (em_enable_phy_wakeup(adapter))
5209 			return;
5210 	} else {
5211 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5212 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5213 	}
5214 
5215 	if (adapter->hw.phy.type == e1000_phy_igp_3)
5216 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5217 
5218         /* Request PME */
5219         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5220 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5221 	if (if_getcapenable(ifp) & IFCAP_WOL)
5222 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5223         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5224 
5225 	return;
5226 }
5227 
5228 /*
5229 ** WOL in the newer chipset interfaces (pchlan)
5230 ** require thing to be copied into the phy
5231 */
5232 static int
5233 em_enable_phy_wakeup(struct adapter *adapter)
5234 {
5235 	struct e1000_hw *hw = &adapter->hw;
5236 	u32 mreg, ret = 0;
5237 	u16 preg;
5238 
5239 	/* copy MAC RARs to PHY RARs */
5240 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5241 
5242 	/* copy MAC MTA to PHY MTA */
5243 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5244 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5245 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5246 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5247 		    (u16)((mreg >> 16) & 0xFFFF));
5248 	}
5249 
5250 	/* configure PHY Rx Control register */
5251 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5252 	mreg = E1000_READ_REG(hw, E1000_RCTL);
5253 	if (mreg & E1000_RCTL_UPE)
5254 		preg |= BM_RCTL_UPE;
5255 	if (mreg & E1000_RCTL_MPE)
5256 		preg |= BM_RCTL_MPE;
5257 	preg &= ~(BM_RCTL_MO_MASK);
5258 	if (mreg & E1000_RCTL_MO_3)
5259 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5260 				<< BM_RCTL_MO_SHIFT);
5261 	if (mreg & E1000_RCTL_BAM)
5262 		preg |= BM_RCTL_BAM;
5263 	if (mreg & E1000_RCTL_PMCF)
5264 		preg |= BM_RCTL_PMCF;
5265 	mreg = E1000_READ_REG(hw, E1000_CTRL);
5266 	if (mreg & E1000_CTRL_RFCE)
5267 		preg |= BM_RCTL_RFCE;
5268 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5269 
5270 	/* enable PHY wakeup in MAC register */
5271 	E1000_WRITE_REG(hw, E1000_WUC,
5272 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5273 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5274 
5275 	/* configure and enable PHY wakeup in PHY registers */
5276 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5277 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5278 
5279 	/* activate PHY wakeup */
5280 	ret = hw->phy.ops.acquire(hw);
5281 	if (ret) {
5282 		printf("Could not acquire PHY\n");
5283 		return ret;
5284 	}
5285 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5286 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5287 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5288 	if (ret) {
5289 		printf("Could not read PHY page 769\n");
5290 		goto out;
5291 	}
5292 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5293 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5294 	if (ret)
5295 		printf("Could not set PHY Host Wakeup bit\n");
5296 out:
5297 	hw->phy.ops.release(hw);
5298 
5299 	return ret;
5300 }
5301 
5302 static void
5303 em_led_func(void *arg, int onoff)
5304 {
5305 	struct adapter	*adapter = arg;
5306 
5307 	EM_CORE_LOCK(adapter);
5308 	if (onoff) {
5309 		e1000_setup_led(&adapter->hw);
5310 		e1000_led_on(&adapter->hw);
5311 	} else {
5312 		e1000_led_off(&adapter->hw);
5313 		e1000_cleanup_led(&adapter->hw);
5314 	}
5315 	EM_CORE_UNLOCK(adapter);
5316 }
5317 
5318 /*
5319 ** Disable the L0S and L1 LINK states
5320 */
5321 static void
5322 em_disable_aspm(struct adapter *adapter)
5323 {
5324 	int		base, reg;
5325 	u16		link_cap,link_ctrl;
5326 	device_t	dev = adapter->dev;
5327 
5328 	switch (adapter->hw.mac.type) {
5329 		case e1000_82573:
5330 		case e1000_82574:
5331 		case e1000_82583:
5332 			break;
5333 		default:
5334 			return;
5335 	}
5336 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5337 		return;
5338 	reg = base + PCIER_LINK_CAP;
5339 	link_cap = pci_read_config(dev, reg, 2);
5340 	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5341 		return;
5342 	reg = base + PCIER_LINK_CTL;
5343 	link_ctrl = pci_read_config(dev, reg, 2);
5344 	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5345 	pci_write_config(dev, reg, link_ctrl, 2);
5346 	return;
5347 }
5348 
5349 /**********************************************************************
5350  *
5351  *  Update the board statistics counters.
5352  *
5353  **********************************************************************/
5354 static void
5355 em_update_stats_counters(struct adapter *adapter)
5356 {
5357 
5358 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5359 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5360 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5361 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5362 	}
5363 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5364 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5365 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5366 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5367 
5368 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5369 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5370 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5371 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5372 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5373 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5374 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5375 	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5376 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5377 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5378 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5379 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5380 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5381 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5382 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5383 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5384 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5385 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5386 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5387 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5388 
5389 	/* For the 64-bit byte counters the low dword must be read first. */
5390 	/* Both registers clear on the read of the high dword */
5391 
5392 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5393 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5394 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5395 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5396 
5397 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5398 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5399 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5400 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5401 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5402 
5403 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5404 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5405 
5406 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5407 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5408 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5409 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5410 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5411 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5412 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5413 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5414 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5415 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5416 
5417 	/* Interrupt Counts */
5418 
5419 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5420 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5421 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5422 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5423 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5424 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5425 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5426 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5427 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5428 
5429 	if (adapter->hw.mac.type >= e1000_82543) {
5430 		adapter->stats.algnerrc +=
5431 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5432 		adapter->stats.rxerrc +=
5433 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5434 		adapter->stats.tncrs +=
5435 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5436 		adapter->stats.cexterr +=
5437 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5438 		adapter->stats.tsctc +=
5439 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5440 		adapter->stats.tsctfc +=
5441 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5442 	}
5443 }
5444 
5445 static uint64_t
5446 em_get_counter(if_t ifp, ift_counter cnt)
5447 {
5448 	struct adapter *adapter;
5449 
5450 	adapter = if_getsoftc(ifp);
5451 
5452 	switch (cnt) {
5453 	case IFCOUNTER_COLLISIONS:
5454 		return (adapter->stats.colc);
5455 	case IFCOUNTER_IERRORS:
5456 		return (adapter->dropped_pkts + adapter->stats.rxerrc +
5457 		    adapter->stats.crcerrs + adapter->stats.algnerrc +
5458 		    adapter->stats.ruc + adapter->stats.roc +
5459 		    adapter->stats.mpc + adapter->stats.cexterr);
5460 	case IFCOUNTER_OERRORS:
5461 		return (adapter->stats.ecol + adapter->stats.latecol +
5462 		    adapter->watchdog_events);
5463 	default:
5464 		return (if_get_counter_default(ifp, cnt));
5465 	}
5466 }
5467 
5468 /* Export a single 32-bit register via a read-only sysctl. */
5469 static int
5470 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5471 {
5472 	struct adapter *adapter;
5473 	u_int val;
5474 
5475 	adapter = oidp->oid_arg1;
5476 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5477 	return (sysctl_handle_int(oidp, &val, 0, req));
5478 }
5479 
5480 /*
5481  * Add sysctl variables, one per statistic, to the system.
5482  */
5483 static void
5484 em_add_hw_stats(struct adapter *adapter)
5485 {
5486 	device_t dev = adapter->dev;
5487 
5488 	struct tx_ring *txr = adapter->tx_rings;
5489 	struct rx_ring *rxr = adapter->rx_rings;
5490 
5491 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5492 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5493 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5494 	struct e1000_hw_stats *stats = &adapter->stats;
5495 
5496 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5497 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5498 
5499 #define QUEUE_NAME_LEN 32
5500 	char namebuf[QUEUE_NAME_LEN];
5501 
5502 	/* Driver Statistics */
5503 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5504 			CTLFLAG_RD, &adapter->dropped_pkts,
5505 			"Driver dropped packets");
5506 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5507 			CTLFLAG_RD, &adapter->link_irq,
5508 			"Link MSIX IRQ Handled");
5509 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5510 			 CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5511 			 "Defragmenting mbuf chain failed");
5512 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5513 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5514 			"Driver tx dma failure in xmit");
5515 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5516 			CTLFLAG_RD, &adapter->rx_overruns,
5517 			"RX overruns");
5518 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5519 			CTLFLAG_RD, &adapter->watchdog_events,
5520 			"Watchdog timeouts");
5521 
5522 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5523 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5524 			em_sysctl_reg_handler, "IU",
5525 			"Device Control Register");
5526 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5527 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5528 			em_sysctl_reg_handler, "IU",
5529 			"Receiver Control Register");
5530 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5531 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5532 			"Flow Control High Watermark");
5533 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5534 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5535 			"Flow Control Low Watermark");
5536 
5537 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5538 		snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5539 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5540 					    CTLFLAG_RD, NULL, "TX Queue Name");
5541 		queue_list = SYSCTL_CHILDREN(queue_node);
5542 
5543 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5544 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5545 				E1000_TDH(txr->me),
5546 				em_sysctl_reg_handler, "IU",
5547  				"Transmit Descriptor Head");
5548 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5549 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5550 				E1000_TDT(txr->me),
5551 				em_sysctl_reg_handler, "IU",
5552  				"Transmit Descriptor Tail");
5553 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5554 				CTLFLAG_RD, &txr->tx_irq,
5555 				"Queue MSI-X Transmit Interrupts");
5556 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5557 				CTLFLAG_RD, &txr->no_desc_avail,
5558 				"Queue No Descriptor Available");
5559 
5560 		snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5561 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5562 					    CTLFLAG_RD, NULL, "RX Queue Name");
5563 		queue_list = SYSCTL_CHILDREN(queue_node);
5564 
5565 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5566 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5567 				E1000_RDH(rxr->me),
5568 				em_sysctl_reg_handler, "IU",
5569 				"Receive Descriptor Head");
5570 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5571 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5572 				E1000_RDT(rxr->me),
5573 				em_sysctl_reg_handler, "IU",
5574 				"Receive Descriptor Tail");
5575 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5576 				CTLFLAG_RD, &rxr->rx_irq,
5577 				"Queue MSI-X Receive Interrupts");
5578 	}
5579 
5580 	/* MAC stats get their own sub node */
5581 
5582 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5583 				    CTLFLAG_RD, NULL, "Statistics");
5584 	stat_list = SYSCTL_CHILDREN(stat_node);
5585 
5586 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5587 			CTLFLAG_RD, &stats->ecol,
5588 			"Excessive collisions");
5589 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5590 			CTLFLAG_RD, &stats->scc,
5591 			"Single collisions");
5592 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5593 			CTLFLAG_RD, &stats->mcc,
5594 			"Multiple collisions");
5595 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5596 			CTLFLAG_RD, &stats->latecol,
5597 			"Late collisions");
5598 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5599 			CTLFLAG_RD, &stats->colc,
5600 			"Collision Count");
5601 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5602 			CTLFLAG_RD, &adapter->stats.symerrs,
5603 			"Symbol Errors");
5604 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5605 			CTLFLAG_RD, &adapter->stats.sec,
5606 			"Sequence Errors");
5607 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5608 			CTLFLAG_RD, &adapter->stats.dc,
5609 			"Defer Count");
5610 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5611 			CTLFLAG_RD, &adapter->stats.mpc,
5612 			"Missed Packets");
5613 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5614 			CTLFLAG_RD, &adapter->stats.rnbc,
5615 			"Receive No Buffers");
5616 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5617 			CTLFLAG_RD, &adapter->stats.ruc,
5618 			"Receive Undersize");
5619 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5620 			CTLFLAG_RD, &adapter->stats.rfc,
5621 			"Fragmented Packets Received ");
5622 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5623 			CTLFLAG_RD, &adapter->stats.roc,
5624 			"Oversized Packets Received");
5625 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5626 			CTLFLAG_RD, &adapter->stats.rjc,
5627 			"Recevied Jabber");
5628 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5629 			CTLFLAG_RD, &adapter->stats.rxerrc,
5630 			"Receive Errors");
5631 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5632 			CTLFLAG_RD, &adapter->stats.crcerrs,
5633 			"CRC errors");
5634 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5635 			CTLFLAG_RD, &adapter->stats.algnerrc,
5636 			"Alignment Errors");
5637 	/* On 82575 these are collision counts */
5638 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5639 			CTLFLAG_RD, &adapter->stats.cexterr,
5640 			"Collision/Carrier extension errors");
5641 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5642 			CTLFLAG_RD, &adapter->stats.xonrxc,
5643 			"XON Received");
5644 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5645 			CTLFLAG_RD, &adapter->stats.xontxc,
5646 			"XON Transmitted");
5647 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5648 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5649 			"XOFF Received");
5650 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5651 			CTLFLAG_RD, &adapter->stats.xofftxc,
5652 			"XOFF Transmitted");
5653 
5654 	/* Packet Reception Stats */
5655 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5656 			CTLFLAG_RD, &adapter->stats.tpr,
5657 			"Total Packets Received ");
5658 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5659 			CTLFLAG_RD, &adapter->stats.gprc,
5660 			"Good Packets Received");
5661 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5662 			CTLFLAG_RD, &adapter->stats.bprc,
5663 			"Broadcast Packets Received");
5664 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5665 			CTLFLAG_RD, &adapter->stats.mprc,
5666 			"Multicast Packets Received");
5667 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5668 			CTLFLAG_RD, &adapter->stats.prc64,
5669 			"64 byte frames received ");
5670 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5671 			CTLFLAG_RD, &adapter->stats.prc127,
5672 			"65-127 byte frames received");
5673 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5674 			CTLFLAG_RD, &adapter->stats.prc255,
5675 			"128-255 byte frames received");
5676 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5677 			CTLFLAG_RD, &adapter->stats.prc511,
5678 			"256-511 byte frames received");
5679 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5680 			CTLFLAG_RD, &adapter->stats.prc1023,
5681 			"512-1023 byte frames received");
5682 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5683 			CTLFLAG_RD, &adapter->stats.prc1522,
5684 			"1023-1522 byte frames received");
5685  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5686  			CTLFLAG_RD, &adapter->stats.gorc,
5687  			"Good Octets Received");
5688 
5689 	/* Packet Transmission Stats */
5690  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5691  			CTLFLAG_RD, &adapter->stats.gotc,
5692  			"Good Octets Transmitted");
5693 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5694 			CTLFLAG_RD, &adapter->stats.tpt,
5695 			"Total Packets Transmitted");
5696 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5697 			CTLFLAG_RD, &adapter->stats.gptc,
5698 			"Good Packets Transmitted");
5699 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5700 			CTLFLAG_RD, &adapter->stats.bptc,
5701 			"Broadcast Packets Transmitted");
5702 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5703 			CTLFLAG_RD, &adapter->stats.mptc,
5704 			"Multicast Packets Transmitted");
5705 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5706 			CTLFLAG_RD, &adapter->stats.ptc64,
5707 			"64 byte frames transmitted ");
5708 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5709 			CTLFLAG_RD, &adapter->stats.ptc127,
5710 			"65-127 byte frames transmitted");
5711 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5712 			CTLFLAG_RD, &adapter->stats.ptc255,
5713 			"128-255 byte frames transmitted");
5714 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5715 			CTLFLAG_RD, &adapter->stats.ptc511,
5716 			"256-511 byte frames transmitted");
5717 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5718 			CTLFLAG_RD, &adapter->stats.ptc1023,
5719 			"512-1023 byte frames transmitted");
5720 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5721 			CTLFLAG_RD, &adapter->stats.ptc1522,
5722 			"1024-1522 byte frames transmitted");
5723 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5724 			CTLFLAG_RD, &adapter->stats.tsctc,
5725 			"TSO Contexts Transmitted");
5726 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5727 			CTLFLAG_RD, &adapter->stats.tsctfc,
5728 			"TSO Contexts Failed");
5729 
5730 
5731 	/* Interrupt Stats */
5732 
5733 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5734 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5735 	int_list = SYSCTL_CHILDREN(int_node);
5736 
5737 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5738 			CTLFLAG_RD, &adapter->stats.iac,
5739 			"Interrupt Assertion Count");
5740 
5741 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5742 			CTLFLAG_RD, &adapter->stats.icrxptc,
5743 			"Interrupt Cause Rx Pkt Timer Expire Count");
5744 
5745 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5746 			CTLFLAG_RD, &adapter->stats.icrxatc,
5747 			"Interrupt Cause Rx Abs Timer Expire Count");
5748 
5749 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5750 			CTLFLAG_RD, &adapter->stats.ictxptc,
5751 			"Interrupt Cause Tx Pkt Timer Expire Count");
5752 
5753 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5754 			CTLFLAG_RD, &adapter->stats.ictxatc,
5755 			"Interrupt Cause Tx Abs Timer Expire Count");
5756 
5757 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5758 			CTLFLAG_RD, &adapter->stats.ictxqec,
5759 			"Interrupt Cause Tx Queue Empty Count");
5760 
5761 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5762 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5763 			"Interrupt Cause Tx Queue Min Thresh Count");
5764 
5765 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5766 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5767 			"Interrupt Cause Rx Desc Min Thresh Count");
5768 
5769 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5770 			CTLFLAG_RD, &adapter->stats.icrxoc,
5771 			"Interrupt Cause Receiver Overrun Count");
5772 }
5773 
5774 /**********************************************************************
5775  *
5776  *  This routine provides a way to dump out the adapter eeprom,
5777  *  often a useful debug/service tool. This only dumps the first
5778  *  32 words, stuff that matters is in that extent.
5779  *
5780  **********************************************************************/
5781 static int
5782 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5783 {
5784 	struct adapter *adapter = (struct adapter *)arg1;
5785 	int error;
5786 	int result;
5787 
5788 	result = -1;
5789 	error = sysctl_handle_int(oidp, &result, 0, req);
5790 
5791 	if (error || !req->newptr)
5792 		return (error);
5793 
5794 	/*
5795 	 * This value will cause a hex dump of the
5796 	 * first 32 16-bit words of the EEPROM to
5797 	 * the screen.
5798 	 */
5799 	if (result == 1)
5800 		em_print_nvm_info(adapter);
5801 
5802 	return (error);
5803 }
5804 
5805 static void
5806 em_print_nvm_info(struct adapter *adapter)
5807 {
5808 	u16	eeprom_data;
5809 	int	i, j, row = 0;
5810 
5811 	/* Its a bit crude, but it gets the job done */
5812 	printf("\nInterface EEPROM Dump:\n");
5813 	printf("Offset\n0x0000  ");
5814 	for (i = 0, j = 0; i < 32; i++, j++) {
5815 		if (j == 8) { /* Make the offset block */
5816 			j = 0; ++row;
5817 			printf("\n0x00%x0  ",row);
5818 		}
5819 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5820 		printf("%04x ", eeprom_data);
5821 	}
5822 	printf("\n");
5823 }
5824 
5825 static int
5826 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5827 {
5828 	struct em_int_delay_info *info;
5829 	struct adapter *adapter;
5830 	u32 regval;
5831 	int error, usecs, ticks;
5832 
5833 	info = (struct em_int_delay_info *)arg1;
5834 	usecs = info->value;
5835 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5836 	if (error != 0 || req->newptr == NULL)
5837 		return (error);
5838 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5839 		return (EINVAL);
5840 	info->value = usecs;
5841 	ticks = EM_USECS_TO_TICKS(usecs);
5842 	if (info->offset == E1000_ITR)	/* units are 256ns here */
5843 		ticks *= 4;
5844 
5845 	adapter = info->adapter;
5846 
5847 	EM_CORE_LOCK(adapter);
5848 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5849 	regval = (regval & ~0xffff) | (ticks & 0xffff);
5850 	/* Handle a few special cases. */
5851 	switch (info->offset) {
5852 	case E1000_RDTR:
5853 		break;
5854 	case E1000_TIDV:
5855 		if (ticks == 0) {
5856 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5857 			/* Don't write 0 into the TIDV register. */
5858 			regval++;
5859 		} else
5860 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5861 		break;
5862 	}
5863 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5864 	EM_CORE_UNLOCK(adapter);
5865 	return (0);
5866 }
5867 
5868 static void
5869 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5870 	const char *description, struct em_int_delay_info *info,
5871 	int offset, int value)
5872 {
5873 	info->adapter = adapter;
5874 	info->offset = offset;
5875 	info->value = value;
5876 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5877 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5878 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5879 	    info, 0, em_sysctl_int_delay, "I", description);
5880 }
5881 
5882 static void
5883 em_set_sysctl_value(struct adapter *adapter, const char *name,
5884 	const char *description, int *limit, int value)
5885 {
5886 	*limit = value;
5887 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5888 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5889 	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
5890 }
5891 
5892 
5893 /*
5894 ** Set flow control using sysctl:
5895 ** Flow control values:
5896 **      0 - off
5897 **      1 - rx pause
5898 **      2 - tx pause
5899 **      3 - full
5900 */
5901 static int
5902 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5903 {
5904         int		error;
5905 	static int	input = 3; /* default is full */
5906         struct adapter	*adapter = (struct adapter *) arg1;
5907 
5908         error = sysctl_handle_int(oidp, &input, 0, req);
5909 
5910         if ((error) || (req->newptr == NULL))
5911                 return (error);
5912 
5913 	if (input == adapter->fc) /* no change? */
5914 		return (error);
5915 
5916         switch (input) {
5917                 case e1000_fc_rx_pause:
5918                 case e1000_fc_tx_pause:
5919                 case e1000_fc_full:
5920                 case e1000_fc_none:
5921                         adapter->hw.fc.requested_mode = input;
5922 			adapter->fc = input;
5923                         break;
5924                 default:
5925 			/* Do nothing */
5926 			return (error);
5927         }
5928 
5929         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5930         e1000_force_mac_fc(&adapter->hw);
5931         return (error);
5932 }
5933 
5934 /*
5935 ** Manage Energy Efficient Ethernet:
5936 ** Control values:
5937 **     0/1 - enabled/disabled
5938 */
5939 static int
5940 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5941 {
5942        struct adapter *adapter = (struct adapter *) arg1;
5943        int             error, value;
5944 
5945        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5946        error = sysctl_handle_int(oidp, &value, 0, req);
5947        if (error || req->newptr == NULL)
5948                return (error);
5949        EM_CORE_LOCK(adapter);
5950        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5951        em_init_locked(adapter);
5952        EM_CORE_UNLOCK(adapter);
5953        return (0);
5954 }
5955 
5956 static int
5957 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5958 {
5959 	struct adapter *adapter;
5960 	int error;
5961 	int result;
5962 
5963 	result = -1;
5964 	error = sysctl_handle_int(oidp, &result, 0, req);
5965 
5966 	if (error || !req->newptr)
5967 		return (error);
5968 
5969 	if (result == 1) {
5970 		adapter = (struct adapter *)arg1;
5971 		em_print_debug_info(adapter);
5972         }
5973 
5974 	return (error);
5975 }
5976 
5977 /*
5978 ** This routine is meant to be fluid, add whatever is
5979 ** needed for debugging a problem.  -jfv
5980 */
5981 static void
5982 em_print_debug_info(struct adapter *adapter)
5983 {
5984 	device_t dev = adapter->dev;
5985 	struct tx_ring *txr = adapter->tx_rings;
5986 	struct rx_ring *rxr = adapter->rx_rings;
5987 
5988 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)
5989 		printf("Interface is RUNNING ");
5990 	else
5991 		printf("Interface is NOT RUNNING\n");
5992 
5993 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE)
5994 		printf("and INACTIVE\n");
5995 	else
5996 		printf("and ACTIVE\n");
5997 
5998 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5999 		device_printf(dev, "TX Queue %d ------\n", i);
6000 		device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
6001 	    		E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
6002 	    		E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
6003 		device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
6004 		device_printf(dev, "TX descriptors avail = %d\n",
6005 	    		txr->tx_avail);
6006 		device_printf(dev, "Tx Descriptors avail failure = %ld\n",
6007 	    		txr->no_desc_avail);
6008 		device_printf(dev, "RX Queue %d ------\n", i);
6009 		device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
6010 	    		E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
6011 	    		E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
6012 		device_printf(dev, "RX discarded packets = %ld\n",
6013 	    		rxr->rx_discarded);
6014 		device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
6015 		device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
6016 	}
6017 }
6018 
6019 #ifdef EM_MULTIQUEUE
6020 /*
6021  * 82574 only:
6022  * Write a new value to the EEPROM increasing the number of MSIX
6023  * vectors from 3 to 5, for proper multiqueue support.
6024  */
6025 static void
6026 em_enable_vectors_82574(struct adapter *adapter)
6027 {
6028 	struct e1000_hw *hw = &adapter->hw;
6029 	device_t dev = adapter->dev;
6030 	u16 edata;
6031 
6032 	e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6033 	printf("Current cap: %#06x\n", edata);
6034 	if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
6035 		device_printf(dev, "Writing to eeprom: increasing "
6036 		    "reported MSIX vectors from 3 to 5...\n");
6037 		edata &= ~(EM_NVM_MSIX_N_MASK);
6038 		edata |= 4 << EM_NVM_MSIX_N_SHIFT;
6039 		e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6040 		e1000_update_nvm_checksum(hw);
6041 		device_printf(dev, "Writing to eeprom: done\n");
6042 	}
6043 }
6044 #endif
6045 
6046 #ifdef DDB
6047 DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
6048 {
6049 	devclass_t	dc;
6050 	int max_em;
6051 
6052 	dc = devclass_find("em");
6053 	max_em = devclass_get_maxunit(dc);
6054 
6055 	for (int index = 0; index < (max_em - 1); index++) {
6056 		device_t dev;
6057 		dev = devclass_get_device(dc, index);
6058 		if (device_get_driver(dev) == &em_driver) {
6059 			struct adapter *adapter = device_get_softc(dev);
6060 			EM_CORE_LOCK(adapter);
6061 			em_init_locked(adapter);
6062 			EM_CORE_UNLOCK(adapter);
6063 		}
6064 	}
6065 }
6066 DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6067 {
6068 	devclass_t	dc;
6069 	int max_em;
6070 
6071 	dc = devclass_find("em");
6072 	max_em = devclass_get_maxunit(dc);
6073 
6074 	for (int index = 0; index < (max_em - 1); index++) {
6075 		device_t dev;
6076 		dev = devclass_get_device(dc, index);
6077 		if (device_get_driver(dev) == &em_driver)
6078 			em_print_debug_info(device_get_softc(dev));
6079 	}
6080 
6081 }
6082 #endif
6083