xref: /freebsd/sys/dev/e1000/if_em.c (revision 4f0a4502a1f33fef287ac558c98e5ef99a32216f)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2015, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #include "opt_em.h"
36 #include "opt_ddb.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #endif
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #ifdef DDB
47 #include <sys/types.h>
48 #include <ddb/ddb.h>
49 #endif
50 #if __FreeBSD_version >= 800000
51 #include <sys/buf_ring.h>
52 #endif
53 #include <sys/bus.h>
54 #include <sys/endian.h>
55 #include <sys/kernel.h>
56 #include <sys/kthread.h>
57 #include <sys/malloc.h>
58 #include <sys/mbuf.h>
59 #include <sys/module.h>
60 #include <sys/rman.h>
61 #include <sys/smp.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/sysctl.h>
65 #include <sys/taskqueue.h>
66 #include <sys/eventhandler.h>
67 #include <machine/bus.h>
68 #include <machine/resource.h>
69 
70 #include <net/bpf.h>
71 #include <net/ethernet.h>
72 #include <net/if.h>
73 #include <net/if_var.h>
74 #include <net/if_arp.h>
75 #include <net/if_dl.h>
76 #include <net/if_media.h>
77 
78 #include <net/if_types.h>
79 #include <net/if_vlan_var.h>
80 
81 #include <netinet/in_systm.h>
82 #include <netinet/in.h>
83 #include <netinet/if_ether.h>
84 #include <netinet/ip.h>
85 #include <netinet/ip6.h>
86 #include <netinet/tcp.h>
87 #include <netinet/udp.h>
88 
89 #include <machine/in_cksum.h>
90 #include <dev/led/led.h>
91 #include <dev/pci/pcivar.h>
92 #include <dev/pci/pcireg.h>
93 
94 #include "e1000_api.h"
95 #include "e1000_82571.h"
96 #include "if_em.h"
97 
98 /*********************************************************************
99  *  Set this to one to display debug statistics
100  *********************************************************************/
101 int	em_display_debug_stats = 0;
102 
103 /*********************************************************************
104  *  Driver version:
105  *********************************************************************/
106 char em_driver_version[] = "7.4.2";
107 
108 /*********************************************************************
109  *  PCI Device ID Table
110  *
111  *  Used by probe to select devices to load on
112  *  Last field stores an index into e1000_strings
113  *  Last entry must be all 0s
114  *
115  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
116  *********************************************************************/
117 
118 static em_vendor_info_t em_vendor_info_array[] =
119 {
120 	/* Intel(R) PRO/1000 Network Connection */
121 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
122 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
123 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
124 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
125 						PCI_ANY_ID, PCI_ANY_ID, 0},
126 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
127 						PCI_ANY_ID, PCI_ANY_ID, 0},
128 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
129 						PCI_ANY_ID, PCI_ANY_ID, 0},
130 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
131 						PCI_ANY_ID, PCI_ANY_ID, 0},
132 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
133 						PCI_ANY_ID, PCI_ANY_ID, 0},
134 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
135 						PCI_ANY_ID, PCI_ANY_ID, 0},
136 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
138 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
139 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
140 
141 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
142 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
143 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
144 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
145 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
146 						PCI_ANY_ID, PCI_ANY_ID, 0},
147 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
148 						PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
150 						PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
152 						PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
175 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
176 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
177 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
178 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
179 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
180 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
181 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
182 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
183 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
184 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
185 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
186 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
187 						PCI_ANY_ID, PCI_ANY_ID, 0},
188 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
189 						PCI_ANY_ID, PCI_ANY_ID, 0},
190 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM2,	PCI_ANY_ID, PCI_ANY_ID, 0},
191 	{ 0x8086, E1000_DEV_ID_PCH_I218_V2,	PCI_ANY_ID, PCI_ANY_ID, 0},
192 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM3,	PCI_ANY_ID, PCI_ANY_ID, 0},
193 	{ 0x8086, E1000_DEV_ID_PCH_I218_V3,	PCI_ANY_ID, PCI_ANY_ID, 0},
194 	/* required last entry */
195 	{ 0, 0, 0, 0, 0}
196 };
197 
198 /*********************************************************************
199  *  Table of branding strings for all supported NICs.
200  *********************************************************************/
201 
202 static char *em_strings[] = {
203 	"Intel(R) PRO/1000 Network Connection"
204 };
205 
206 /*********************************************************************
207  *  Function prototypes
208  *********************************************************************/
209 static int	em_probe(device_t);
210 static int	em_attach(device_t);
211 static int	em_detach(device_t);
212 static int	em_shutdown(device_t);
213 static int	em_suspend(device_t);
214 static int	em_resume(device_t);
215 #ifdef EM_MULTIQUEUE
216 static int	em_mq_start(if_t, struct mbuf *);
217 static int	em_mq_start_locked(if_t,
218 		    struct tx_ring *);
219 static void	em_qflush(if_t);
220 #else
221 static void	em_start(if_t);
222 static void	em_start_locked(if_t, struct tx_ring *);
223 #endif
224 static int	em_ioctl(if_t, u_long, caddr_t);
225 static uint64_t	em_get_counter(if_t, ift_counter);
226 static void	em_init(void *);
227 static void	em_init_locked(struct adapter *);
228 static void	em_stop(void *);
229 static void	em_media_status(if_t, struct ifmediareq *);
230 static int	em_media_change(if_t);
231 static void	em_identify_hardware(struct adapter *);
232 static int	em_allocate_pci_resources(struct adapter *);
233 static int	em_allocate_legacy(struct adapter *);
234 static int	em_allocate_msix(struct adapter *);
235 static int	em_allocate_queues(struct adapter *);
236 static int	em_setup_msix(struct adapter *);
237 static void	em_free_pci_resources(struct adapter *);
238 static void	em_local_timer(void *);
239 static void	em_reset(struct adapter *);
240 static int	em_setup_interface(device_t, struct adapter *);
241 
242 static void	em_setup_transmit_structures(struct adapter *);
243 static void	em_initialize_transmit_unit(struct adapter *);
244 static int	em_allocate_transmit_buffers(struct tx_ring *);
245 static void	em_free_transmit_structures(struct adapter *);
246 static void	em_free_transmit_buffers(struct tx_ring *);
247 
248 static int	em_setup_receive_structures(struct adapter *);
249 static int	em_allocate_receive_buffers(struct rx_ring *);
250 static void	em_initialize_receive_unit(struct adapter *);
251 static void	em_free_receive_structures(struct adapter *);
252 static void	em_free_receive_buffers(struct rx_ring *);
253 
254 static void	em_enable_intr(struct adapter *);
255 static void	em_disable_intr(struct adapter *);
256 static void	em_update_stats_counters(struct adapter *);
257 static void	em_add_hw_stats(struct adapter *adapter);
258 static void	em_txeof(struct tx_ring *);
259 static bool	em_rxeof(struct rx_ring *, int, int *);
260 #ifndef __NO_STRICT_ALIGNMENT
261 static int	em_fixup_rx(struct rx_ring *);
262 #endif
263 static void	em_setup_rxdesc(union e1000_rx_desc_extended *,
264 		    const struct em_rxbuffer *rxbuf);
265 static void	em_receive_checksum(uint32_t status, struct mbuf *);
266 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
267 		    struct ip *, u32 *, u32 *);
268 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
269 		    struct tcphdr *, u32 *, u32 *);
270 static void	em_set_promisc(struct adapter *);
271 static void	em_disable_promisc(struct adapter *);
272 static void	em_set_multi(struct adapter *);
273 static void	em_update_link_status(struct adapter *);
274 static void	em_refresh_mbufs(struct rx_ring *, int);
275 static void	em_register_vlan(void *, if_t, u16);
276 static void	em_unregister_vlan(void *, if_t, u16);
277 static void	em_setup_vlan_hw_support(struct adapter *);
278 static int	em_xmit(struct tx_ring *, struct mbuf **);
279 static int	em_dma_malloc(struct adapter *, bus_size_t,
280 		    struct em_dma_alloc *, int);
281 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
282 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
283 static void	em_print_nvm_info(struct adapter *);
284 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
285 static void	em_print_debug_info(struct adapter *);
286 static int 	em_is_valid_ether_addr(u8 *);
287 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
288 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
289 		    const char *, struct em_int_delay_info *, int, int);
290 /* Management and WOL Support */
291 static void	em_init_manageability(struct adapter *);
292 static void	em_release_manageability(struct adapter *);
293 static void     em_get_hw_control(struct adapter *);
294 static void     em_release_hw_control(struct adapter *);
295 static void	em_get_wakeup(device_t);
296 static void     em_enable_wakeup(device_t);
297 static int	em_enable_phy_wakeup(struct adapter *);
298 static void	em_led_func(void *, int);
299 static void	em_disable_aspm(struct adapter *);
300 
301 static int	em_irq_fast(void *);
302 
303 /* MSIX handlers */
304 static void	em_msix_tx(void *);
305 static void	em_msix_rx(void *);
306 static void	em_msix_link(void *);
307 static void	em_handle_tx(void *context, int pending);
308 static void	em_handle_rx(void *context, int pending);
309 static void	em_handle_link(void *context, int pending);
310 
311 #ifdef EM_MULTIQUEUE
312 static void	em_enable_vectors_82574(struct adapter *);
313 #endif
314 
315 static void	em_set_sysctl_value(struct adapter *, const char *,
316 		    const char *, int *, int);
317 static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
318 static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
319 
320 static __inline void em_rx_discard(struct rx_ring *, int);
321 
322 #ifdef DEVICE_POLLING
323 static poll_handler_t em_poll;
324 #endif /* POLLING */
325 
326 /*********************************************************************
327  *  FreeBSD Device Interface Entry Points
328  *********************************************************************/
329 
330 static device_method_t em_methods[] = {
331 	/* Device interface */
332 	DEVMETHOD(device_probe, em_probe),
333 	DEVMETHOD(device_attach, em_attach),
334 	DEVMETHOD(device_detach, em_detach),
335 	DEVMETHOD(device_shutdown, em_shutdown),
336 	DEVMETHOD(device_suspend, em_suspend),
337 	DEVMETHOD(device_resume, em_resume),
338 	DEVMETHOD_END
339 };
340 
341 static driver_t em_driver = {
342 	"em", em_methods, sizeof(struct adapter),
343 };
344 
345 devclass_t em_devclass;
346 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
347 MODULE_DEPEND(em, pci, 1, 1, 1);
348 MODULE_DEPEND(em, ether, 1, 1, 1);
349 #ifdef DEV_NETMAP
350 MODULE_DEPEND(em, netmap, 1, 1, 1);
351 #endif /* DEV_NETMAP */
352 
353 /*********************************************************************
354  *  Tunable default values.
355  *********************************************************************/
356 
357 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
358 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
359 #define M_TSO_LEN			66
360 
361 #define MAX_INTS_PER_SEC	8000
362 #define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
363 
364 /* Allow common code without TSO */
365 #ifndef CSUM_TSO
366 #define CSUM_TSO	0
367 #endif
368 
369 #define TSO_WORKAROUND	4
370 
371 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
372 
373 static int em_disable_crc_stripping = 0;
374 SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
375     &em_disable_crc_stripping, 0, "Disable CRC Stripping");
376 
377 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
378 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
379 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
380     0, "Default transmit interrupt delay in usecs");
381 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
382     0, "Default receive interrupt delay in usecs");
383 
384 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
385 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
386 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
387     &em_tx_abs_int_delay_dflt, 0,
388     "Default transmit interrupt delay limit in usecs");
389 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
390     &em_rx_abs_int_delay_dflt, 0,
391     "Default receive interrupt delay limit in usecs");
392 
393 static int em_rxd = EM_DEFAULT_RXD;
394 static int em_txd = EM_DEFAULT_TXD;
395 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
396     "Number of receive descriptors per queue");
397 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
398     "Number of transmit descriptors per queue");
399 
400 static int em_smart_pwr_down = FALSE;
401 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
402     0, "Set to true to leave smart power down enabled on newer adapters");
403 
404 /* Controls whether promiscuous also shows bad packets */
405 static int em_debug_sbp = FALSE;
406 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
407     "Show bad packets in promiscuous mode");
408 
409 static int em_enable_msix = TRUE;
410 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
411     "Enable MSI-X interrupts");
412 
413 #ifdef EM_MULTIQUEUE
414 static int em_num_queues = 1;
415 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
416     "82574 only: Number of queues to configure, 0 indicates autoconfigure");
417 #endif
418 
419 /*
420 ** Global variable to store last used CPU when binding queues
421 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
422 ** queue is bound to a cpu.
423 */
424 static int em_last_bind_cpu = -1;
425 
426 /* How many packets rxeof tries to clean at a time */
427 static int em_rx_process_limit = 100;
428 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
429     &em_rx_process_limit, 0,
430     "Maximum number of received packets to process "
431     "at a time, -1 means unlimited");
432 
433 /* Energy efficient ethernet - default to OFF */
434 static int eee_setting = 1;
435 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
436     "Enable Energy Efficient Ethernet");
437 
438 /* Global used in WOL setup with multiport cards */
439 static int global_quad_port_a = 0;
440 
441 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
442 #include <dev/netmap/if_em_netmap.h>
443 #endif /* DEV_NETMAP */
444 
445 /*********************************************************************
446  *  Device identification routine
447  *
448  *  em_probe determines if the driver should be loaded on
449  *  adapter based on PCI vendor/device id of the adapter.
450  *
451  *  return BUS_PROBE_DEFAULT on success, positive on failure
452  *********************************************************************/
453 
454 static int
455 em_probe(device_t dev)
456 {
457 	char		adapter_name[60];
458 	uint16_t	pci_vendor_id = 0;
459 	uint16_t	pci_device_id = 0;
460 	uint16_t	pci_subvendor_id = 0;
461 	uint16_t	pci_subdevice_id = 0;
462 	em_vendor_info_t *ent;
463 
464 	INIT_DEBUGOUT("em_probe: begin");
465 
466 	pci_vendor_id = pci_get_vendor(dev);
467 	if (pci_vendor_id != EM_VENDOR_ID)
468 		return (ENXIO);
469 
470 	pci_device_id = pci_get_device(dev);
471 	pci_subvendor_id = pci_get_subvendor(dev);
472 	pci_subdevice_id = pci_get_subdevice(dev);
473 
474 	ent = em_vendor_info_array;
475 	while (ent->vendor_id != 0) {
476 		if ((pci_vendor_id == ent->vendor_id) &&
477 		    (pci_device_id == ent->device_id) &&
478 
479 		    ((pci_subvendor_id == ent->subvendor_id) ||
480 		    (ent->subvendor_id == PCI_ANY_ID)) &&
481 
482 		    ((pci_subdevice_id == ent->subdevice_id) ||
483 		    (ent->subdevice_id == PCI_ANY_ID))) {
484 			sprintf(adapter_name, "%s %s",
485 				em_strings[ent->index],
486 				em_driver_version);
487 			device_set_desc_copy(dev, adapter_name);
488 			return (BUS_PROBE_DEFAULT);
489 		}
490 		ent++;
491 	}
492 
493 	return (ENXIO);
494 }
495 
496 /*********************************************************************
497  *  Device initialization routine
498  *
499  *  The attach entry point is called when the driver is being loaded.
500  *  This routine identifies the type of hardware, allocates all resources
501  *  and initializes the hardware.
502  *
503  *  return 0 on success, positive on failure
504  *********************************************************************/
505 
506 static int
507 em_attach(device_t dev)
508 {
509 	struct adapter	*adapter;
510 	struct e1000_hw	*hw;
511 	int		error = 0;
512 
513 	INIT_DEBUGOUT("em_attach: begin");
514 
515 	if (resource_disabled("em", device_get_unit(dev))) {
516 		device_printf(dev, "Disabled by device hint\n");
517 		return (ENXIO);
518 	}
519 
520 	adapter = device_get_softc(dev);
521 	adapter->dev = adapter->osdep.dev = dev;
522 	hw = &adapter->hw;
523 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
524 
525 	/* SYSCTL stuff */
526 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
527 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
528 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
529 	    em_sysctl_nvm_info, "I", "NVM Information");
530 
531 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
532 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
533 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
534 	    em_sysctl_debug_info, "I", "Debug Information");
535 
536 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
537 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
538 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
539 	    em_set_flowcntl, "I", "Flow Control");
540 
541 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
542 
543 	/* Determine hardware and mac info */
544 	em_identify_hardware(adapter);
545 
546 	/* Setup PCI resources */
547 	if (em_allocate_pci_resources(adapter)) {
548 		device_printf(dev, "Allocation of PCI resources failed\n");
549 		error = ENXIO;
550 		goto err_pci;
551 	}
552 
553 	/*
554 	** For ICH8 and family we need to
555 	** map the flash memory, and this
556 	** must happen after the MAC is
557 	** identified
558 	*/
559 	if ((hw->mac.type == e1000_ich8lan) ||
560 	    (hw->mac.type == e1000_ich9lan) ||
561 	    (hw->mac.type == e1000_ich10lan) ||
562 	    (hw->mac.type == e1000_pchlan) ||
563 	    (hw->mac.type == e1000_pch2lan) ||
564 	    (hw->mac.type == e1000_pch_lpt)) {
565 		int rid = EM_BAR_TYPE_FLASH;
566 		adapter->flash = bus_alloc_resource_any(dev,
567 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
568 		if (adapter->flash == NULL) {
569 			device_printf(dev, "Mapping of Flash failed\n");
570 			error = ENXIO;
571 			goto err_pci;
572 		}
573 		/* This is used in the shared code */
574 		hw->flash_address = (u8 *)adapter->flash;
575 		adapter->osdep.flash_bus_space_tag =
576 		    rman_get_bustag(adapter->flash);
577 		adapter->osdep.flash_bus_space_handle =
578 		    rman_get_bushandle(adapter->flash);
579 	}
580 
581 	/* Do Shared Code initialization */
582 	if (e1000_setup_init_funcs(hw, TRUE)) {
583 		device_printf(dev, "Setup of Shared code failed\n");
584 		error = ENXIO;
585 		goto err_pci;
586 	}
587 
588 	/*
589 	 * Setup MSI/X or MSI if PCI Express
590 	 */
591 	adapter->msix = em_setup_msix(adapter);
592 
593 	e1000_get_bus_info(hw);
594 
595 	/* Set up some sysctls for the tunable interrupt delays */
596 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
597 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
598 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
599 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
600 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
601 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
602 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
603 	    "receive interrupt delay limit in usecs",
604 	    &adapter->rx_abs_int_delay,
605 	    E1000_REGISTER(hw, E1000_RADV),
606 	    em_rx_abs_int_delay_dflt);
607 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
608 	    "transmit interrupt delay limit in usecs",
609 	    &adapter->tx_abs_int_delay,
610 	    E1000_REGISTER(hw, E1000_TADV),
611 	    em_tx_abs_int_delay_dflt);
612 	em_add_int_delay_sysctl(adapter, "itr",
613 	    "interrupt delay limit in usecs/4",
614 	    &adapter->tx_itr,
615 	    E1000_REGISTER(hw, E1000_ITR),
616 	    DEFAULT_ITR);
617 
618 	/* Sysctl for limiting the amount of work done in the taskqueue */
619 	em_set_sysctl_value(adapter, "rx_processing_limit",
620 	    "max number of rx packets to process", &adapter->rx_process_limit,
621 	    em_rx_process_limit);
622 
623 	/*
624 	 * Validate number of transmit and receive descriptors. It
625 	 * must not exceed hardware maximum, and must be multiple
626 	 * of E1000_DBA_ALIGN.
627 	 */
628 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
629 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
630 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
631 		    EM_DEFAULT_TXD, em_txd);
632 		adapter->num_tx_desc = EM_DEFAULT_TXD;
633 	} else
634 		adapter->num_tx_desc = em_txd;
635 
636 	if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 ||
637 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
638 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
639 		    EM_DEFAULT_RXD, em_rxd);
640 		adapter->num_rx_desc = EM_DEFAULT_RXD;
641 	} else
642 		adapter->num_rx_desc = em_rxd;
643 
644 	hw->mac.autoneg = DO_AUTO_NEG;
645 	hw->phy.autoneg_wait_to_complete = FALSE;
646 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
647 
648 	/* Copper options */
649 	if (hw->phy.media_type == e1000_media_type_copper) {
650 		hw->phy.mdix = AUTO_ALL_MODES;
651 		hw->phy.disable_polarity_correction = FALSE;
652 		hw->phy.ms_type = EM_MASTER_SLAVE;
653 	}
654 
655 	/*
656 	 * Set the frame limits assuming
657 	 * standard ethernet sized frames.
658 	 */
659 	adapter->hw.mac.max_frame_size =
660 	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
661 
662 	/*
663 	 * This controls when hardware reports transmit completion
664 	 * status.
665 	 */
666 	hw->mac.report_tx_early = 1;
667 
668 	/*
669 	** Get queue/ring memory
670 	*/
671 	if (em_allocate_queues(adapter)) {
672 		error = ENOMEM;
673 		goto err_pci;
674 	}
675 
676 	/* Allocate multicast array memory. */
677 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
678 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
679 	if (adapter->mta == NULL) {
680 		device_printf(dev, "Can not allocate multicast setup array\n");
681 		error = ENOMEM;
682 		goto err_late;
683 	}
684 
685 	/* Check SOL/IDER usage */
686 	if (e1000_check_reset_block(hw))
687 		device_printf(dev, "PHY reset is blocked"
688 		    " due to SOL/IDER session.\n");
689 
690 	/* Sysctl for setting Energy Efficient Ethernet */
691 	hw->dev_spec.ich8lan.eee_disable = eee_setting;
692 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
693 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
694 	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
695 	    adapter, 0, em_sysctl_eee, "I",
696 	    "Disable Energy Efficient Ethernet");
697 
698 	/*
699 	** Start from a known state, this is
700 	** important in reading the nvm and
701 	** mac from that.
702 	*/
703 	e1000_reset_hw(hw);
704 
705 
706 	/* Make sure we have a good EEPROM before we read from it */
707 	if (e1000_validate_nvm_checksum(hw) < 0) {
708 		/*
709 		** Some PCI-E parts fail the first check due to
710 		** the link being in sleep state, call it again,
711 		** if it fails a second time its a real issue.
712 		*/
713 		if (e1000_validate_nvm_checksum(hw) < 0) {
714 			device_printf(dev,
715 			    "The EEPROM Checksum Is Not Valid\n");
716 			error = EIO;
717 			goto err_late;
718 		}
719 	}
720 
721 	/* Copy the permanent MAC address out of the EEPROM */
722 	if (e1000_read_mac_addr(hw) < 0) {
723 		device_printf(dev, "EEPROM read error while reading MAC"
724 		    " address\n");
725 		error = EIO;
726 		goto err_late;
727 	}
728 
729 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
730 		device_printf(dev, "Invalid MAC address\n");
731 		error = EIO;
732 		goto err_late;
733 	}
734 
735 	/* Disable ULP support */
736 	e1000_disable_ulp_lpt_lp(hw, TRUE);
737 
738 	/*
739 	**  Do interrupt configuration
740 	*/
741 	if (adapter->msix > 1) /* Do MSIX */
742 		error = em_allocate_msix(adapter);
743 	else  /* MSI or Legacy */
744 		error = em_allocate_legacy(adapter);
745 	if (error)
746 		goto err_late;
747 
748 	/*
749 	 * Get Wake-on-Lan and Management info for later use
750 	 */
751 	em_get_wakeup(dev);
752 
753 	/* Setup OS specific network interface */
754 	if (em_setup_interface(dev, adapter) != 0)
755 		goto err_late;
756 
757 	em_reset(adapter);
758 
759 	/* Initialize statistics */
760 	em_update_stats_counters(adapter);
761 
762 	hw->mac.get_link_status = 1;
763 	em_update_link_status(adapter);
764 
765 	/* Register for VLAN events */
766 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
767 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
768 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
769 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
770 
771 	em_add_hw_stats(adapter);
772 
773 	/* Non-AMT based hardware can now take control from firmware */
774 	if (adapter->has_manage && !adapter->has_amt)
775 		em_get_hw_control(adapter);
776 
777 	/* Tell the stack that the interface is not active */
778 	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
779 
780 	adapter->led_dev = led_create(em_led_func, adapter,
781 	    device_get_nameunit(dev));
782 #ifdef DEV_NETMAP
783 	em_netmap_attach(adapter);
784 #endif /* DEV_NETMAP */
785 
786 	INIT_DEBUGOUT("em_attach: end");
787 
788 	return (0);
789 
790 err_late:
791 	em_free_transmit_structures(adapter);
792 	em_free_receive_structures(adapter);
793 	em_release_hw_control(adapter);
794 	if (adapter->ifp != (void *)NULL)
795 		if_free(adapter->ifp);
796 err_pci:
797 	em_free_pci_resources(adapter);
798 	free(adapter->mta, M_DEVBUF);
799 	EM_CORE_LOCK_DESTROY(adapter);
800 
801 	return (error);
802 }
803 
804 /*********************************************************************
805  *  Device removal routine
806  *
807  *  The detach entry point is called when the driver is being removed.
808  *  This routine stops the adapter and deallocates all the resources
809  *  that were allocated for driver operation.
810  *
811  *  return 0 on success, positive on failure
812  *********************************************************************/
813 
814 static int
815 em_detach(device_t dev)
816 {
817 	struct adapter	*adapter = device_get_softc(dev);
818 	if_t ifp = adapter->ifp;
819 
820 	INIT_DEBUGOUT("em_detach: begin");
821 
822 	/* Make sure VLANS are not using driver */
823 	if (if_vlantrunkinuse(ifp)) {
824 		device_printf(dev,"Vlan in use, detach first\n");
825 		return (EBUSY);
826 	}
827 
828 #ifdef DEVICE_POLLING
829 	if (if_getcapenable(ifp) & IFCAP_POLLING)
830 		ether_poll_deregister(ifp);
831 #endif
832 
833 	if (adapter->led_dev != NULL)
834 		led_destroy(adapter->led_dev);
835 
836 	EM_CORE_LOCK(adapter);
837 	adapter->in_detach = 1;
838 	em_stop(adapter);
839 	EM_CORE_UNLOCK(adapter);
840 	EM_CORE_LOCK_DESTROY(adapter);
841 
842 	e1000_phy_hw_reset(&adapter->hw);
843 
844 	em_release_manageability(adapter);
845 	em_release_hw_control(adapter);
846 
847 	/* Unregister VLAN events */
848 	if (adapter->vlan_attach != NULL)
849 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
850 	if (adapter->vlan_detach != NULL)
851 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
852 
853 	ether_ifdetach(adapter->ifp);
854 	callout_drain(&adapter->timer);
855 
856 #ifdef DEV_NETMAP
857 	netmap_detach(ifp);
858 #endif /* DEV_NETMAP */
859 
860 	em_free_pci_resources(adapter);
861 	bus_generic_detach(dev);
862 	if_free(ifp);
863 
864 	em_free_transmit_structures(adapter);
865 	em_free_receive_structures(adapter);
866 
867 	em_release_hw_control(adapter);
868 	free(adapter->mta, M_DEVBUF);
869 
870 	return (0);
871 }
872 
873 /*********************************************************************
874  *
875  *  Shutdown entry point
876  *
877  **********************************************************************/
878 
879 static int
880 em_shutdown(device_t dev)
881 {
882 	return em_suspend(dev);
883 }
884 
885 /*
886  * Suspend/resume device methods.
887  */
888 static int
889 em_suspend(device_t dev)
890 {
891 	struct adapter *adapter = device_get_softc(dev);
892 
893 	EM_CORE_LOCK(adapter);
894 
895         em_release_manageability(adapter);
896 	em_release_hw_control(adapter);
897 	em_enable_wakeup(dev);
898 
899 	EM_CORE_UNLOCK(adapter);
900 
901 	return bus_generic_suspend(dev);
902 }
903 
904 static int
905 em_resume(device_t dev)
906 {
907 	struct adapter *adapter = device_get_softc(dev);
908 	struct tx_ring	*txr = adapter->tx_rings;
909 	if_t ifp = adapter->ifp;
910 
911 	EM_CORE_LOCK(adapter);
912 	if (adapter->hw.mac.type == e1000_pch2lan)
913 		e1000_resume_workarounds_pchlan(&adapter->hw);
914 	em_init_locked(adapter);
915 	em_init_manageability(adapter);
916 
917 	if ((if_getflags(ifp) & IFF_UP) &&
918 	    (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) {
919 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
920 			EM_TX_LOCK(txr);
921 #ifdef EM_MULTIQUEUE
922 			if (!drbr_empty(ifp, txr->br))
923 				em_mq_start_locked(ifp, txr);
924 #else
925 			if (!if_sendq_empty(ifp))
926 				em_start_locked(ifp, txr);
927 #endif
928 			EM_TX_UNLOCK(txr);
929 		}
930 	}
931 	EM_CORE_UNLOCK(adapter);
932 
933 	return bus_generic_resume(dev);
934 }
935 
936 
937 #ifndef EM_MULTIQUEUE
938 static void
939 em_start_locked(if_t ifp, struct tx_ring *txr)
940 {
941 	struct adapter	*adapter = if_getsoftc(ifp);
942 	struct mbuf	*m_head;
943 
944 	EM_TX_LOCK_ASSERT(txr);
945 
946 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
947 	    IFF_DRV_RUNNING)
948 		return;
949 
950 	if (!adapter->link_active)
951 		return;
952 
953 	while (!if_sendq_empty(ifp)) {
954         	/* Call cleanup if number of TX descriptors low */
955 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
956 			em_txeof(txr);
957 		if (txr->tx_avail < EM_MAX_SCATTER) {
958 			if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0);
959 			break;
960 		}
961 		m_head = if_dequeue(ifp);
962 		if (m_head == NULL)
963 			break;
964 		/*
965 		 *  Encapsulation can modify our pointer, and or make it
966 		 *  NULL on failure.  In that event, we can't requeue.
967 		 */
968 		if (em_xmit(txr, &m_head)) {
969 			if (m_head == NULL)
970 				break;
971 			if_sendq_prepend(ifp, m_head);
972 			break;
973 		}
974 
975 		/* Mark the queue as having work */
976 		if (txr->busy == EM_TX_IDLE)
977 			txr->busy = EM_TX_BUSY;
978 
979 		/* Send a copy of the frame to the BPF listener */
980 		ETHER_BPF_MTAP(ifp, m_head);
981 
982 	}
983 
984 	return;
985 }
986 
987 static void
988 em_start(if_t ifp)
989 {
990 	struct adapter	*adapter = if_getsoftc(ifp);
991 	struct tx_ring	*txr = adapter->tx_rings;
992 
993 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
994 		EM_TX_LOCK(txr);
995 		em_start_locked(ifp, txr);
996 		EM_TX_UNLOCK(txr);
997 	}
998 	return;
999 }
1000 #else /* EM_MULTIQUEUE */
1001 /*********************************************************************
1002  *  Multiqueue Transmit routines
1003  *
1004  *  em_mq_start is called by the stack to initiate a transmit.
1005  *  however, if busy the driver can queue the request rather
1006  *  than do an immediate send. It is this that is an advantage
1007  *  in this driver, rather than also having multiple tx queues.
1008  **********************************************************************/
1009 /*
1010 ** Multiqueue capable stack interface
1011 */
1012 static int
1013 em_mq_start(if_t ifp, struct mbuf *m)
1014 {
1015 	struct adapter	*adapter = if_getsoftc(ifp);
1016 	struct tx_ring	*txr = adapter->tx_rings;
1017 	unsigned int	i, error;
1018 
1019 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1020 		i = m->m_pkthdr.flowid % adapter->num_queues;
1021 	else
1022 		i = curcpu % adapter->num_queues;
1023 
1024 	txr = &adapter->tx_rings[i];
1025 
1026 	error = drbr_enqueue(ifp, txr->br, m);
1027 	if (error)
1028 		return (error);
1029 
1030 	if (EM_TX_TRYLOCK(txr)) {
1031 		em_mq_start_locked(ifp, txr);
1032 		EM_TX_UNLOCK(txr);
1033 	} else
1034 		taskqueue_enqueue(txr->tq, &txr->tx_task);
1035 
1036 	return (0);
1037 }
1038 
1039 static int
1040 em_mq_start_locked(if_t ifp, struct tx_ring *txr)
1041 {
1042 	struct adapter  *adapter = txr->adapter;
1043         struct mbuf     *next;
1044         int             err = 0, enq = 0;
1045 
1046 	EM_TX_LOCK_ASSERT(txr);
1047 
1048 	if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) ||
1049 	    adapter->link_active == 0) {
1050 		return (ENETDOWN);
1051 	}
1052 
1053 	/* Process the queue */
1054 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1055 		if ((err = em_xmit(txr, &next)) != 0) {
1056 			if (next == NULL) {
1057 				/* It was freed, move forward */
1058 				drbr_advance(ifp, txr->br);
1059 			} else {
1060 				/*
1061 				 * Still have one left, it may not be
1062 				 * the same since the transmit function
1063 				 * may have changed it.
1064 				 */
1065 				drbr_putback(ifp, txr->br, next);
1066 			}
1067 			break;
1068 		}
1069 		drbr_advance(ifp, txr->br);
1070 		enq++;
1071 		if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
1072 		if (next->m_flags & M_MCAST)
1073 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
1074 		ETHER_BPF_MTAP(ifp, next);
1075 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
1076                         break;
1077 	}
1078 
1079 	/* Mark the queue as having work */
1080 	if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1081 		txr->busy = EM_TX_BUSY;
1082 
1083 	if (txr->tx_avail < EM_MAX_SCATTER)
1084 		em_txeof(txr);
1085 	if (txr->tx_avail < EM_MAX_SCATTER) {
1086 		if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
1087 	}
1088 	return (err);
1089 }
1090 
1091 /*
1092 ** Flush all ring buffers
1093 */
1094 static void
1095 em_qflush(if_t ifp)
1096 {
1097 	struct adapter  *adapter = if_getsoftc(ifp);
1098 	struct tx_ring  *txr = adapter->tx_rings;
1099 	struct mbuf     *m;
1100 
1101 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1102 		EM_TX_LOCK(txr);
1103 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1104 			m_freem(m);
1105 		EM_TX_UNLOCK(txr);
1106 	}
1107 	if_qflush(ifp);
1108 }
1109 #endif /* EM_MULTIQUEUE */
1110 
1111 /*********************************************************************
1112  *  Ioctl entry point
1113  *
1114  *  em_ioctl is called when the user wants to configure the
1115  *  interface.
1116  *
1117  *  return 0 on success, positive on failure
1118  **********************************************************************/
1119 
1120 static int
1121 em_ioctl(if_t ifp, u_long command, caddr_t data)
1122 {
1123 	struct adapter	*adapter = if_getsoftc(ifp);
1124 	struct ifreq	*ifr = (struct ifreq *)data;
1125 #if defined(INET) || defined(INET6)
1126 	struct ifaddr	*ifa = (struct ifaddr *)data;
1127 #endif
1128 	bool		avoid_reset = FALSE;
1129 	int		error = 0;
1130 
1131 	if (adapter->in_detach)
1132 		return (error);
1133 
1134 	switch (command) {
1135 	case SIOCSIFADDR:
1136 #ifdef INET
1137 		if (ifa->ifa_addr->sa_family == AF_INET)
1138 			avoid_reset = TRUE;
1139 #endif
1140 #ifdef INET6
1141 		if (ifa->ifa_addr->sa_family == AF_INET6)
1142 			avoid_reset = TRUE;
1143 #endif
1144 		/*
1145 		** Calling init results in link renegotiation,
1146 		** so we avoid doing it when possible.
1147 		*/
1148 		if (avoid_reset) {
1149 			if_setflagbits(ifp,IFF_UP,0);
1150 			if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
1151 				em_init(adapter);
1152 #ifdef INET
1153 			if (!(if_getflags(ifp) & IFF_NOARP))
1154 				arp_ifinit(ifp, ifa);
1155 #endif
1156 		} else
1157 			error = ether_ioctl(ifp, command, data);
1158 		break;
1159 	case SIOCSIFMTU:
1160 	    {
1161 		int max_frame_size;
1162 
1163 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1164 
1165 		EM_CORE_LOCK(adapter);
1166 		switch (adapter->hw.mac.type) {
1167 		case e1000_82571:
1168 		case e1000_82572:
1169 		case e1000_ich9lan:
1170 		case e1000_ich10lan:
1171 		case e1000_pch2lan:
1172 		case e1000_pch_lpt:
1173 		case e1000_82574:
1174 		case e1000_82583:
1175 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1176 			max_frame_size = 9234;
1177 			break;
1178 		case e1000_pchlan:
1179 			max_frame_size = 4096;
1180 			break;
1181 			/* Adapters that do not support jumbo frames */
1182 		case e1000_ich8lan:
1183 			max_frame_size = ETHER_MAX_LEN;
1184 			break;
1185 		default:
1186 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1187 		}
1188 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1189 		    ETHER_CRC_LEN) {
1190 			EM_CORE_UNLOCK(adapter);
1191 			error = EINVAL;
1192 			break;
1193 		}
1194 
1195 		if_setmtu(ifp, ifr->ifr_mtu);
1196 		adapter->hw.mac.max_frame_size =
1197 		    if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
1198 		em_init_locked(adapter);
1199 		EM_CORE_UNLOCK(adapter);
1200 		break;
1201 	    }
1202 	case SIOCSIFFLAGS:
1203 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1204 		    SIOCSIFFLAGS (Set Interface Flags)");
1205 		EM_CORE_LOCK(adapter);
1206 		if (if_getflags(ifp) & IFF_UP) {
1207 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1208 				if ((if_getflags(ifp) ^ adapter->if_flags) &
1209 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1210 					em_disable_promisc(adapter);
1211 					em_set_promisc(adapter);
1212 				}
1213 			} else
1214 				em_init_locked(adapter);
1215 		} else
1216 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1217 				em_stop(adapter);
1218 		adapter->if_flags = if_getflags(ifp);
1219 		EM_CORE_UNLOCK(adapter);
1220 		break;
1221 	case SIOCADDMULTI:
1222 	case SIOCDELMULTI:
1223 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1224 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1225 			EM_CORE_LOCK(adapter);
1226 			em_disable_intr(adapter);
1227 			em_set_multi(adapter);
1228 #ifdef DEVICE_POLLING
1229 			if (!(if_getcapenable(ifp) & IFCAP_POLLING))
1230 #endif
1231 				em_enable_intr(adapter);
1232 			EM_CORE_UNLOCK(adapter);
1233 		}
1234 		break;
1235 	case SIOCSIFMEDIA:
1236 		/* Check SOL/IDER usage */
1237 		EM_CORE_LOCK(adapter);
1238 		if (e1000_check_reset_block(&adapter->hw)) {
1239 			EM_CORE_UNLOCK(adapter);
1240 			device_printf(adapter->dev, "Media change is"
1241 			    " blocked due to SOL/IDER session.\n");
1242 			break;
1243 		}
1244 		EM_CORE_UNLOCK(adapter);
1245 		/* falls thru */
1246 	case SIOCGIFMEDIA:
1247 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1248 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1249 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1250 		break;
1251 	case SIOCSIFCAP:
1252 	    {
1253 		int mask, reinit;
1254 
1255 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1256 		reinit = 0;
1257 		mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
1258 #ifdef DEVICE_POLLING
1259 		if (mask & IFCAP_POLLING) {
1260 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1261 				error = ether_poll_register(em_poll, ifp);
1262 				if (error)
1263 					return (error);
1264 				EM_CORE_LOCK(adapter);
1265 				em_disable_intr(adapter);
1266 				if_setcapenablebit(ifp, IFCAP_POLLING, 0);
1267 				EM_CORE_UNLOCK(adapter);
1268 			} else {
1269 				error = ether_poll_deregister(ifp);
1270 				/* Enable interrupt even in error case */
1271 				EM_CORE_LOCK(adapter);
1272 				em_enable_intr(adapter);
1273 				if_setcapenablebit(ifp, 0, IFCAP_POLLING);
1274 				EM_CORE_UNLOCK(adapter);
1275 			}
1276 		}
1277 #endif
1278 		if (mask & IFCAP_HWCSUM) {
1279 			if_togglecapenable(ifp,IFCAP_HWCSUM);
1280 			reinit = 1;
1281 		}
1282 		if (mask & IFCAP_TSO4) {
1283 			if_togglecapenable(ifp,IFCAP_TSO4);
1284 			reinit = 1;
1285 		}
1286 		if (mask & IFCAP_VLAN_HWTAGGING) {
1287 			if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING);
1288 			reinit = 1;
1289 		}
1290 		if (mask & IFCAP_VLAN_HWFILTER) {
1291 			if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1292 			reinit = 1;
1293 		}
1294 		if (mask & IFCAP_VLAN_HWTSO) {
1295 			if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1296 			reinit = 1;
1297 		}
1298 		if ((mask & IFCAP_WOL) &&
1299 		    (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
1300 			if (mask & IFCAP_WOL_MCAST)
1301 				if_togglecapenable(ifp, IFCAP_WOL_MCAST);
1302 			if (mask & IFCAP_WOL_MAGIC)
1303 				if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
1304 		}
1305 		if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1306 			em_init(adapter);
1307 		if_vlancap(ifp);
1308 		break;
1309 	    }
1310 
1311 	default:
1312 		error = ether_ioctl(ifp, command, data);
1313 		break;
1314 	}
1315 
1316 	return (error);
1317 }
1318 
1319 
1320 /*********************************************************************
1321  *  Init entry point
1322  *
1323  *  This routine is used in two ways. It is used by the stack as
1324  *  init entry point in network interface structure. It is also used
1325  *  by the driver as a hw/sw initialization routine to get to a
1326  *  consistent state.
1327  *
1328  *  return 0 on success, positive on failure
1329  **********************************************************************/
1330 
1331 static void
1332 em_init_locked(struct adapter *adapter)
1333 {
1334 	if_t ifp = adapter->ifp;
1335 	device_t	dev = adapter->dev;
1336 
1337 	INIT_DEBUGOUT("em_init: begin");
1338 
1339 	EM_CORE_LOCK_ASSERT(adapter);
1340 
1341 	em_disable_intr(adapter);
1342 	callout_stop(&adapter->timer);
1343 
1344 	/* Get the latest mac address, User can use a LAA */
1345         bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
1346               ETHER_ADDR_LEN);
1347 
1348 	/* Put the address into the Receive Address Array */
1349 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1350 
1351 	/*
1352 	 * With the 82571 adapter, RAR[0] may be overwritten
1353 	 * when the other port is reset, we make a duplicate
1354 	 * in RAR[14] for that eventuality, this assures
1355 	 * the interface continues to function.
1356 	 */
1357 	if (adapter->hw.mac.type == e1000_82571) {
1358 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1359 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1360 		    E1000_RAR_ENTRIES - 1);
1361 	}
1362 
1363 	/* Initialize the hardware */
1364 	em_reset(adapter);
1365 	em_update_link_status(adapter);
1366 
1367 	/* Setup VLAN support, basic and offload if available */
1368 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1369 
1370 	/* Set hardware offload abilities */
1371 	if_clearhwassist(ifp);
1372 	if (if_getcapenable(ifp) & IFCAP_TXCSUM)
1373 		if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
1374 	if (if_getcapenable(ifp) & IFCAP_TSO4)
1375 		if_sethwassistbits(ifp, CSUM_TSO, 0);
1376 
1377 	/* Configure for OS presence */
1378 	em_init_manageability(adapter);
1379 
1380 	/* Prepare transmit descriptors and buffers */
1381 	em_setup_transmit_structures(adapter);
1382 	em_initialize_transmit_unit(adapter);
1383 
1384 	/* Setup Multicast table */
1385 	em_set_multi(adapter);
1386 
1387 	/*
1388 	** Figure out the desired mbuf
1389 	** pool for doing jumbos
1390 	*/
1391 	if (adapter->hw.mac.max_frame_size <= 2048)
1392 		adapter->rx_mbuf_sz = MCLBYTES;
1393 	else if (adapter->hw.mac.max_frame_size <= 4096)
1394 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1395 	else
1396 		adapter->rx_mbuf_sz = MJUM9BYTES;
1397 
1398 	/* Prepare receive descriptors and buffers */
1399 	if (em_setup_receive_structures(adapter)) {
1400 		device_printf(dev, "Could not setup receive structures\n");
1401 		em_stop(adapter);
1402 		return;
1403 	}
1404 	em_initialize_receive_unit(adapter);
1405 
1406 	/* Use real VLAN Filter support? */
1407 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
1408 		if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1409 			/* Use real VLAN Filter support */
1410 			em_setup_vlan_hw_support(adapter);
1411 		else {
1412 			u32 ctrl;
1413 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1414 			ctrl |= E1000_CTRL_VME;
1415 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1416 		}
1417 	}
1418 
1419 	/* Don't lose promiscuous settings */
1420 	em_set_promisc(adapter);
1421 
1422 	/* Set the interface as ACTIVE */
1423 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
1424 
1425 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1426 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1427 
1428 	/* MSI/X configuration for 82574 */
1429 	if (adapter->hw.mac.type == e1000_82574) {
1430 		int tmp;
1431 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1432 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1433 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1434 		/* Set the IVAR - interrupt vector routing. */
1435 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1436 	}
1437 
1438 #ifdef DEVICE_POLLING
1439 	/*
1440 	 * Only enable interrupts if we are not polling, make sure
1441 	 * they are off otherwise.
1442 	 */
1443 	if (if_getcapenable(ifp) & IFCAP_POLLING)
1444 		em_disable_intr(adapter);
1445 	else
1446 #endif /* DEVICE_POLLING */
1447 		em_enable_intr(adapter);
1448 
1449 	/* AMT based hardware can now take control from firmware */
1450 	if (adapter->has_manage && adapter->has_amt)
1451 		em_get_hw_control(adapter);
1452 }
1453 
1454 static void
1455 em_init(void *arg)
1456 {
1457 	struct adapter *adapter = arg;
1458 
1459 	EM_CORE_LOCK(adapter);
1460 	em_init_locked(adapter);
1461 	EM_CORE_UNLOCK(adapter);
1462 }
1463 
1464 
1465 #ifdef DEVICE_POLLING
1466 /*********************************************************************
1467  *
1468  *  Legacy polling routine: note this only works with single queue
1469  *
1470  *********************************************************************/
1471 static int
1472 em_poll(if_t ifp, enum poll_cmd cmd, int count)
1473 {
1474 	struct adapter *adapter = if_getsoftc(ifp);
1475 	struct tx_ring	*txr = adapter->tx_rings;
1476 	struct rx_ring	*rxr = adapter->rx_rings;
1477 	u32		reg_icr;
1478 	int		rx_done;
1479 
1480 	EM_CORE_LOCK(adapter);
1481 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
1482 		EM_CORE_UNLOCK(adapter);
1483 		return (0);
1484 	}
1485 
1486 	if (cmd == POLL_AND_CHECK_STATUS) {
1487 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1488 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1489 			callout_stop(&adapter->timer);
1490 			adapter->hw.mac.get_link_status = 1;
1491 			em_update_link_status(adapter);
1492 			callout_reset(&adapter->timer, hz,
1493 			    em_local_timer, adapter);
1494 		}
1495 	}
1496 	EM_CORE_UNLOCK(adapter);
1497 
1498 	em_rxeof(rxr, count, &rx_done);
1499 
1500 	EM_TX_LOCK(txr);
1501 	em_txeof(txr);
1502 #ifdef EM_MULTIQUEUE
1503 	if (!drbr_empty(ifp, txr->br))
1504 		em_mq_start_locked(ifp, txr);
1505 #else
1506 	if (!if_sendq_empty(ifp))
1507 		em_start_locked(ifp, txr);
1508 #endif
1509 	EM_TX_UNLOCK(txr);
1510 
1511 	return (rx_done);
1512 }
1513 #endif /* DEVICE_POLLING */
1514 
1515 
1516 /*********************************************************************
1517  *
1518  *  Fast Legacy/MSI Combined Interrupt Service routine
1519  *
1520  *********************************************************************/
1521 static int
1522 em_irq_fast(void *arg)
1523 {
1524 	struct adapter	*adapter = arg;
1525 	if_t ifp;
1526 	u32		reg_icr;
1527 
1528 	ifp = adapter->ifp;
1529 
1530 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1531 
1532 	/* Hot eject?  */
1533 	if (reg_icr == 0xffffffff)
1534 		return FILTER_STRAY;
1535 
1536 	/* Definitely not our interrupt.  */
1537 	if (reg_icr == 0x0)
1538 		return FILTER_STRAY;
1539 
1540 	/*
1541 	 * Starting with the 82571 chip, bit 31 should be used to
1542 	 * determine whether the interrupt belongs to us.
1543 	 */
1544 	if (adapter->hw.mac.type >= e1000_82571 &&
1545 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1546 		return FILTER_STRAY;
1547 
1548 	em_disable_intr(adapter);
1549 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1550 
1551 	/* Link status change */
1552 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1553 		adapter->hw.mac.get_link_status = 1;
1554 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1555 	}
1556 
1557 	if (reg_icr & E1000_ICR_RXO)
1558 		adapter->rx_overruns++;
1559 	return FILTER_HANDLED;
1560 }
1561 
1562 /* Combined RX/TX handler, used by Legacy and MSI */
1563 static void
1564 em_handle_que(void *context, int pending)
1565 {
1566 	struct adapter	*adapter = context;
1567 	if_t ifp = adapter->ifp;
1568 	struct tx_ring	*txr = adapter->tx_rings;
1569 	struct rx_ring	*rxr = adapter->rx_rings;
1570 
1571 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1572 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1573 
1574 		EM_TX_LOCK(txr);
1575 		em_txeof(txr);
1576 #ifdef EM_MULTIQUEUE
1577 		if (!drbr_empty(ifp, txr->br))
1578 			em_mq_start_locked(ifp, txr);
1579 #else
1580 		if (!if_sendq_empty(ifp))
1581 			em_start_locked(ifp, txr);
1582 #endif
1583 		EM_TX_UNLOCK(txr);
1584 		if (more) {
1585 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1586 			return;
1587 		}
1588 	}
1589 
1590 	em_enable_intr(adapter);
1591 	return;
1592 }
1593 
1594 
1595 /*********************************************************************
1596  *
1597  *  MSIX Interrupt Service Routines
1598  *
1599  **********************************************************************/
1600 static void
1601 em_msix_tx(void *arg)
1602 {
1603 	struct tx_ring *txr = arg;
1604 	struct adapter *adapter = txr->adapter;
1605 	if_t ifp = adapter->ifp;
1606 
1607 	++txr->tx_irq;
1608 	EM_TX_LOCK(txr);
1609 	em_txeof(txr);
1610 #ifdef EM_MULTIQUEUE
1611 	if (!drbr_empty(ifp, txr->br))
1612 		em_mq_start_locked(ifp, txr);
1613 #else
1614 	if (!if_sendq_empty(ifp))
1615 		em_start_locked(ifp, txr);
1616 #endif
1617 
1618 	/* Reenable this interrupt */
1619 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1620 	EM_TX_UNLOCK(txr);
1621 	return;
1622 }
1623 
1624 /*********************************************************************
1625  *
1626  *  MSIX RX Interrupt Service routine
1627  *
1628  **********************************************************************/
1629 
1630 static void
1631 em_msix_rx(void *arg)
1632 {
1633 	struct rx_ring	*rxr = arg;
1634 	struct adapter	*adapter = rxr->adapter;
1635 	bool		more;
1636 
1637 	++rxr->rx_irq;
1638 	if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING))
1639 		return;
1640 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1641 	if (more)
1642 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1643 	else {
1644 		/* Reenable this interrupt */
1645 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1646 	}
1647 	return;
1648 }
1649 
1650 /*********************************************************************
1651  *
1652  *  MSIX Link Fast Interrupt Service routine
1653  *
1654  **********************************************************************/
1655 static void
1656 em_msix_link(void *arg)
1657 {
1658 	struct adapter	*adapter = arg;
1659 	u32		reg_icr;
1660 
1661 	++adapter->link_irq;
1662 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1663 
1664 	if (reg_icr & E1000_ICR_RXO)
1665 		adapter->rx_overruns++;
1666 
1667 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1668 		adapter->hw.mac.get_link_status = 1;
1669 		em_handle_link(adapter, 0);
1670 	} else
1671 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1672 		    EM_MSIX_LINK | E1000_IMS_LSC);
1673 	/*
1674  	** Because we must read the ICR for this interrupt
1675  	** it may clear other causes using autoclear, for
1676  	** this reason we simply create a soft interrupt
1677  	** for all these vectors.
1678  	*/
1679 	if (reg_icr) {
1680 		E1000_WRITE_REG(&adapter->hw,
1681 			E1000_ICS, adapter->ims);
1682 	}
1683 	return;
1684 }
1685 
1686 static void
1687 em_handle_rx(void *context, int pending)
1688 {
1689 	struct rx_ring	*rxr = context;
1690 	struct adapter	*adapter = rxr->adapter;
1691         bool            more;
1692 
1693 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1694 	if (more)
1695 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1696 	else {
1697 		/* Reenable this interrupt */
1698 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1699 	}
1700 }
1701 
1702 static void
1703 em_handle_tx(void *context, int pending)
1704 {
1705 	struct tx_ring	*txr = context;
1706 	struct adapter	*adapter = txr->adapter;
1707 	if_t ifp = adapter->ifp;
1708 
1709 	EM_TX_LOCK(txr);
1710 	em_txeof(txr);
1711 #ifdef EM_MULTIQUEUE
1712 	if (!drbr_empty(ifp, txr->br))
1713 		em_mq_start_locked(ifp, txr);
1714 #else
1715 	if (!if_sendq_empty(ifp))
1716 		em_start_locked(ifp, txr);
1717 #endif
1718 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1719 	EM_TX_UNLOCK(txr);
1720 }
1721 
1722 static void
1723 em_handle_link(void *context, int pending)
1724 {
1725 	struct adapter	*adapter = context;
1726 	struct tx_ring	*txr = adapter->tx_rings;
1727 	if_t ifp = adapter->ifp;
1728 
1729 	if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1730 		return;
1731 
1732 	EM_CORE_LOCK(adapter);
1733 	callout_stop(&adapter->timer);
1734 	em_update_link_status(adapter);
1735 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1736 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1737 	    EM_MSIX_LINK | E1000_IMS_LSC);
1738 	if (adapter->link_active) {
1739 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1740 			EM_TX_LOCK(txr);
1741 #ifdef EM_MULTIQUEUE
1742 			if (!drbr_empty(ifp, txr->br))
1743 				em_mq_start_locked(ifp, txr);
1744 #else
1745 			if (if_sendq_empty(ifp))
1746 				em_start_locked(ifp, txr);
1747 #endif
1748 			EM_TX_UNLOCK(txr);
1749 		}
1750 	}
1751 	EM_CORE_UNLOCK(adapter);
1752 }
1753 
1754 
1755 /*********************************************************************
1756  *
1757  *  Media Ioctl callback
1758  *
1759  *  This routine is called whenever the user queries the status of
1760  *  the interface using ifconfig.
1761  *
1762  **********************************************************************/
1763 static void
1764 em_media_status(if_t ifp, struct ifmediareq *ifmr)
1765 {
1766 	struct adapter *adapter = if_getsoftc(ifp);
1767 	u_char fiber_type = IFM_1000_SX;
1768 
1769 	INIT_DEBUGOUT("em_media_status: begin");
1770 
1771 	EM_CORE_LOCK(adapter);
1772 	em_update_link_status(adapter);
1773 
1774 	ifmr->ifm_status = IFM_AVALID;
1775 	ifmr->ifm_active = IFM_ETHER;
1776 
1777 	if (!adapter->link_active) {
1778 		EM_CORE_UNLOCK(adapter);
1779 		return;
1780 	}
1781 
1782 	ifmr->ifm_status |= IFM_ACTIVE;
1783 
1784 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1785 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1786 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1787 	} else {
1788 		switch (adapter->link_speed) {
1789 		case 10:
1790 			ifmr->ifm_active |= IFM_10_T;
1791 			break;
1792 		case 100:
1793 			ifmr->ifm_active |= IFM_100_TX;
1794 			break;
1795 		case 1000:
1796 			ifmr->ifm_active |= IFM_1000_T;
1797 			break;
1798 		}
1799 		if (adapter->link_duplex == FULL_DUPLEX)
1800 			ifmr->ifm_active |= IFM_FDX;
1801 		else
1802 			ifmr->ifm_active |= IFM_HDX;
1803 	}
1804 	EM_CORE_UNLOCK(adapter);
1805 }
1806 
1807 /*********************************************************************
1808  *
1809  *  Media Ioctl callback
1810  *
1811  *  This routine is called when the user changes speed/duplex using
1812  *  media/mediopt option with ifconfig.
1813  *
1814  **********************************************************************/
1815 static int
1816 em_media_change(if_t ifp)
1817 {
1818 	struct adapter *adapter = if_getsoftc(ifp);
1819 	struct ifmedia  *ifm = &adapter->media;
1820 
1821 	INIT_DEBUGOUT("em_media_change: begin");
1822 
1823 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1824 		return (EINVAL);
1825 
1826 	EM_CORE_LOCK(adapter);
1827 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1828 	case IFM_AUTO:
1829 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1830 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1831 		break;
1832 	case IFM_1000_LX:
1833 	case IFM_1000_SX:
1834 	case IFM_1000_T:
1835 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1836 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1837 		break;
1838 	case IFM_100_TX:
1839 		adapter->hw.mac.autoneg = FALSE;
1840 		adapter->hw.phy.autoneg_advertised = 0;
1841 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1842 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1843 		else
1844 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1845 		break;
1846 	case IFM_10_T:
1847 		adapter->hw.mac.autoneg = FALSE;
1848 		adapter->hw.phy.autoneg_advertised = 0;
1849 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1850 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1851 		else
1852 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1853 		break;
1854 	default:
1855 		device_printf(adapter->dev, "Unsupported media type\n");
1856 	}
1857 
1858 	em_init_locked(adapter);
1859 	EM_CORE_UNLOCK(adapter);
1860 
1861 	return (0);
1862 }
1863 
1864 /*********************************************************************
1865  *
1866  *  This routine maps the mbufs to tx descriptors.
1867  *
1868  *  return 0 on success, positive on failure
1869  **********************************************************************/
1870 
1871 static int
1872 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1873 {
1874 	struct adapter		*adapter = txr->adapter;
1875 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1876 	bus_dmamap_t		map;
1877 	struct em_txbuffer	*tx_buffer, *tx_buffer_mapped;
1878 	struct e1000_tx_desc	*ctxd = NULL;
1879 	struct mbuf		*m_head;
1880 	struct ether_header	*eh;
1881 	struct ip		*ip = NULL;
1882 	struct tcphdr		*tp = NULL;
1883 	u32			txd_upper = 0, txd_lower = 0;
1884 	int			ip_off, poff;
1885 	int			nsegs, i, j, first, last = 0;
1886 	int			error;
1887 	bool			do_tso, tso_desc, remap = TRUE;
1888 
1889 	m_head = *m_headp;
1890 	do_tso = (m_head->m_pkthdr.csum_flags & CSUM_TSO);
1891 	tso_desc = FALSE;
1892 	ip_off = poff = 0;
1893 
1894 	/*
1895 	 * Intel recommends entire IP/TCP header length reside in a single
1896 	 * buffer. If multiple descriptors are used to describe the IP and
1897 	 * TCP header, each descriptor should describe one or more
1898 	 * complete headers; descriptors referencing only parts of headers
1899 	 * are not supported. If all layer headers are not coalesced into
1900 	 * a single buffer, each buffer should not cross a 4KB boundary,
1901 	 * or be larger than the maximum read request size.
1902 	 * Controller also requires modifing IP/TCP header to make TSO work
1903 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1904 	 * IP/TCP header into a single buffer to meet the requirement of
1905 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1906 	 * which also has similiar restrictions.
1907 	 */
1908 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1909 		if (do_tso || (m_head->m_next != NULL &&
1910 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1911 			if (M_WRITABLE(*m_headp) == 0) {
1912 				m_head = m_dup(*m_headp, M_NOWAIT);
1913 				m_freem(*m_headp);
1914 				if (m_head == NULL) {
1915 					*m_headp = NULL;
1916 					return (ENOBUFS);
1917 				}
1918 				*m_headp = m_head;
1919 			}
1920 		}
1921 		/*
1922 		 * XXX
1923 		 * Assume IPv4, we don't have TSO/checksum offload support
1924 		 * for IPv6 yet.
1925 		 */
1926 		ip_off = sizeof(struct ether_header);
1927 		if (m_head->m_len < ip_off) {
1928 			m_head = m_pullup(m_head, ip_off);
1929 			if (m_head == NULL) {
1930 				*m_headp = NULL;
1931 				return (ENOBUFS);
1932 			}
1933 		}
1934 		eh = mtod(m_head, struct ether_header *);
1935 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1936 			ip_off = sizeof(struct ether_vlan_header);
1937 			if (m_head->m_len < ip_off) {
1938 				m_head = m_pullup(m_head, ip_off);
1939 				if (m_head == NULL) {
1940 					*m_headp = NULL;
1941 					return (ENOBUFS);
1942 				}
1943 			}
1944 		}
1945 		if (m_head->m_len < ip_off + sizeof(struct ip)) {
1946 			m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1947 			if (m_head == NULL) {
1948 				*m_headp = NULL;
1949 				return (ENOBUFS);
1950 			}
1951 		}
1952 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1953 		poff = ip_off + (ip->ip_hl << 2);
1954 
1955 		if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
1956 			if (m_head->m_len < poff + sizeof(struct tcphdr)) {
1957 				m_head = m_pullup(m_head, poff +
1958 				    sizeof(struct tcphdr));
1959 				if (m_head == NULL) {
1960 					*m_headp = NULL;
1961 					return (ENOBUFS);
1962 				}
1963 			}
1964 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1965 			/*
1966 			 * TSO workaround:
1967 			 *   pull 4 more bytes of data into it.
1968 			 */
1969 			if (m_head->m_len < poff + (tp->th_off << 2)) {
1970 				m_head = m_pullup(m_head, poff +
1971 				                 (tp->th_off << 2) +
1972 				                 TSO_WORKAROUND);
1973 				if (m_head == NULL) {
1974 					*m_headp = NULL;
1975 					return (ENOBUFS);
1976 				}
1977 			}
1978 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1979 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1980 			if (do_tso) {
1981 				ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
1982 				                  (ip->ip_hl << 2) +
1983 				                  (tp->th_off << 2));
1984 				ip->ip_sum = 0;
1985 				/*
1986 				 * The pseudo TCP checksum does not include TCP
1987 				 * payload length so driver should recompute
1988 				 * the checksum here what hardware expect to
1989 				 * see. This is adherence of Microsoft's Large
1990 				 * Send specification.
1991 			 	*/
1992 				tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1993 				    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1994 			}
1995 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1996 			if (m_head->m_len < poff + sizeof(struct udphdr)) {
1997 				m_head = m_pullup(m_head, poff +
1998 				    sizeof(struct udphdr));
1999 				if (m_head == NULL) {
2000 					*m_headp = NULL;
2001 					return (ENOBUFS);
2002 				}
2003 			}
2004 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2005 		}
2006 		*m_headp = m_head;
2007 	}
2008 
2009 	/*
2010 	 * Map the packet for DMA
2011 	 *
2012 	 * Capture the first descriptor index,
2013 	 * this descriptor will have the index
2014 	 * of the EOP which is the only one that
2015 	 * now gets a DONE bit writeback.
2016 	 */
2017 	first = txr->next_avail_desc;
2018 	tx_buffer = &txr->tx_buffers[first];
2019 	tx_buffer_mapped = tx_buffer;
2020 	map = tx_buffer->map;
2021 
2022 retry:
2023 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2024 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2025 
2026 	/*
2027 	 * There are two types of errors we can (try) to handle:
2028 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
2029 	 *   out of segments.  Defragment the mbuf chain and try again.
2030 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
2031 	 *   at this point in time.  Defer sending and try again later.
2032 	 * All other errors, in particular EINVAL, are fatal and prevent the
2033 	 * mbuf chain from ever going through.  Drop it and report error.
2034 	 */
2035 	if (error == EFBIG && remap) {
2036 		struct mbuf *m;
2037 
2038 		m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
2039 		if (m == NULL) {
2040 			adapter->mbuf_defrag_failed++;
2041 			m_freem(*m_headp);
2042 			*m_headp = NULL;
2043 			return (ENOBUFS);
2044 		}
2045 		*m_headp = m;
2046 
2047 		/* Try it again, but only once */
2048 		remap = FALSE;
2049 		goto retry;
2050 	} else if (error != 0) {
2051 		adapter->no_tx_dma_setup++;
2052 		m_freem(*m_headp);
2053 		*m_headp = NULL;
2054 		return (error);
2055 	}
2056 
2057 	/*
2058 	 * TSO Hardware workaround, if this packet is not
2059 	 * TSO, and is only a single descriptor long, and
2060 	 * it follows a TSO burst, then we need to add a
2061 	 * sentinel descriptor to prevent premature writeback.
2062 	 */
2063 	if ((!do_tso) && (txr->tx_tso == TRUE)) {
2064 		if (nsegs == 1)
2065 			tso_desc = TRUE;
2066 		txr->tx_tso = FALSE;
2067 	}
2068 
2069         if (nsegs > (txr->tx_avail - EM_MAX_SCATTER)) {
2070                 txr->no_desc_avail++;
2071 		bus_dmamap_unload(txr->txtag, map);
2072 		return (ENOBUFS);
2073         }
2074 	m_head = *m_headp;
2075 
2076 	/* Do hardware assists */
2077 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2078 		em_tso_setup(txr, m_head, ip_off, ip, tp,
2079 		    &txd_upper, &txd_lower);
2080 		/* we need to make a final sentinel transmit desc */
2081 		tso_desc = TRUE;
2082 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2083 		em_transmit_checksum_setup(txr, m_head,
2084 		    ip_off, ip, &txd_upper, &txd_lower);
2085 
2086 	if (m_head->m_flags & M_VLANTAG) {
2087 		/* Set the vlan id. */
2088 		txd_upper |= htole16(if_getvtag(m_head)) << 16;
2089                 /* Tell hardware to add tag */
2090                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2091         }
2092 
2093 	i = txr->next_avail_desc;
2094 
2095 	/* Set up our transmit descriptors */
2096 	for (j = 0; j < nsegs; j++) {
2097 		bus_size_t seg_len;
2098 		bus_addr_t seg_addr;
2099 
2100 		tx_buffer = &txr->tx_buffers[i];
2101 		ctxd = &txr->tx_base[i];
2102 		seg_addr = segs[j].ds_addr;
2103 		seg_len  = segs[j].ds_len;
2104 		/*
2105 		** TSO Workaround:
2106 		** If this is the last descriptor, we want to
2107 		** split it so we have a small final sentinel
2108 		*/
2109 		if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
2110 			seg_len -= TSO_WORKAROUND;
2111 			ctxd->buffer_addr = htole64(seg_addr);
2112 			ctxd->lower.data = htole32(
2113 				adapter->txd_cmd | txd_lower | seg_len);
2114 			ctxd->upper.data = htole32(txd_upper);
2115 			if (++i == adapter->num_tx_desc)
2116 				i = 0;
2117 
2118 			/* Now make the sentinel */
2119 			txr->tx_avail--;
2120 			ctxd = &txr->tx_base[i];
2121 			tx_buffer = &txr->tx_buffers[i];
2122 			ctxd->buffer_addr =
2123 			    htole64(seg_addr + seg_len);
2124 			ctxd->lower.data = htole32(
2125 			adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
2126 			ctxd->upper.data =
2127 			    htole32(txd_upper);
2128 			last = i;
2129 			if (++i == adapter->num_tx_desc)
2130 				i = 0;
2131 		} else {
2132 			ctxd->buffer_addr = htole64(seg_addr);
2133 			ctxd->lower.data = htole32(
2134 			adapter->txd_cmd | txd_lower | seg_len);
2135 			ctxd->upper.data = htole32(txd_upper);
2136 			last = i;
2137 			if (++i == adapter->num_tx_desc)
2138 				i = 0;
2139 		}
2140 		tx_buffer->m_head = NULL;
2141 		tx_buffer->next_eop = -1;
2142 	}
2143 
2144 	txr->next_avail_desc = i;
2145 	txr->tx_avail -= nsegs;
2146 
2147         tx_buffer->m_head = m_head;
2148 	/*
2149 	** Here we swap the map so the last descriptor,
2150 	** which gets the completion interrupt has the
2151 	** real map, and the first descriptor gets the
2152 	** unused map from this descriptor.
2153 	*/
2154 	tx_buffer_mapped->map = tx_buffer->map;
2155 	tx_buffer->map = map;
2156         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2157 
2158         /*
2159          * Last Descriptor of Packet
2160 	 * needs End Of Packet (EOP)
2161 	 * and Report Status (RS)
2162          */
2163         ctxd->lower.data |=
2164 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2165 	/*
2166 	 * Keep track in the first buffer which
2167 	 * descriptor will be written back
2168 	 */
2169 	tx_buffer = &txr->tx_buffers[first];
2170 	tx_buffer->next_eop = last;
2171 
2172 	/*
2173 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2174 	 * that this frame is available to transmit.
2175 	 */
2176 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2177 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2178 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2179 
2180 	return (0);
2181 }
2182 
2183 static void
2184 em_set_promisc(struct adapter *adapter)
2185 {
2186 	if_t ifp = adapter->ifp;
2187 	u32		reg_rctl;
2188 
2189 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2190 
2191 	if (if_getflags(ifp) & IFF_PROMISC) {
2192 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2193 		/* Turn this on if you want to see bad packets */
2194 		if (em_debug_sbp)
2195 			reg_rctl |= E1000_RCTL_SBP;
2196 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2197 	} else if (if_getflags(ifp) & IFF_ALLMULTI) {
2198 		reg_rctl |= E1000_RCTL_MPE;
2199 		reg_rctl &= ~E1000_RCTL_UPE;
2200 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2201 	}
2202 }
2203 
2204 static void
2205 em_disable_promisc(struct adapter *adapter)
2206 {
2207 	if_t		ifp = adapter->ifp;
2208 	u32		reg_rctl;
2209 	int		mcnt = 0;
2210 
2211 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2212 	reg_rctl &=  (~E1000_RCTL_UPE);
2213 	if (if_getflags(ifp) & IFF_ALLMULTI)
2214 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2215 	else
2216 		mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
2217 	/* Don't disable if in MAX groups */
2218 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2219 		reg_rctl &=  (~E1000_RCTL_MPE);
2220 	reg_rctl &=  (~E1000_RCTL_SBP);
2221 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2222 }
2223 
2224 
2225 /*********************************************************************
2226  *  Multicast Update
2227  *
2228  *  This routine is called whenever multicast address list is updated.
2229  *
2230  **********************************************************************/
2231 
2232 static void
2233 em_set_multi(struct adapter *adapter)
2234 {
2235 	if_t ifp = adapter->ifp;
2236 	u32 reg_rctl = 0;
2237 	u8  *mta; /* Multicast array memory */
2238 	int mcnt = 0;
2239 
2240 	IOCTL_DEBUGOUT("em_set_multi: begin");
2241 
2242 	mta = adapter->mta;
2243 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2244 
2245 	if (adapter->hw.mac.type == e1000_82542 &&
2246 	    adapter->hw.revision_id == E1000_REVISION_2) {
2247 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2248 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2249 			e1000_pci_clear_mwi(&adapter->hw);
2250 		reg_rctl |= E1000_RCTL_RST;
2251 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2252 		msec_delay(5);
2253 	}
2254 
2255 	if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
2256 
2257 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2258 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2259 		reg_rctl |= E1000_RCTL_MPE;
2260 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2261 	} else
2262 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2263 
2264 	if (adapter->hw.mac.type == e1000_82542 &&
2265 	    adapter->hw.revision_id == E1000_REVISION_2) {
2266 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2267 		reg_rctl &= ~E1000_RCTL_RST;
2268 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2269 		msec_delay(5);
2270 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2271 			e1000_pci_set_mwi(&adapter->hw);
2272 	}
2273 }
2274 
2275 
2276 /*********************************************************************
2277  *  Timer routine
2278  *
2279  *  This routine checks for link status and updates statistics.
2280  *
2281  **********************************************************************/
2282 
2283 static void
2284 em_local_timer(void *arg)
2285 {
2286 	struct adapter	*adapter = arg;
2287 	if_t ifp = adapter->ifp;
2288 	struct tx_ring	*txr = adapter->tx_rings;
2289 	struct rx_ring	*rxr = adapter->rx_rings;
2290 	u32		trigger = 0;
2291 
2292 	EM_CORE_LOCK_ASSERT(adapter);
2293 
2294 	em_update_link_status(adapter);
2295 	em_update_stats_counters(adapter);
2296 
2297 	/* Reset LAA into RAR[0] on 82571 */
2298 	if ((adapter->hw.mac.type == e1000_82571) &&
2299 	    e1000_get_laa_state_82571(&adapter->hw))
2300 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2301 
2302 	/* Mask to use in the irq trigger */
2303 	if (adapter->msix_mem) {
2304 		for (int i = 0; i < adapter->num_queues; i++, rxr++)
2305 			trigger |= rxr->ims;
2306 		rxr = adapter->rx_rings;
2307 	} else
2308 		trigger = E1000_ICS_RXDMT0;
2309 
2310 	/*
2311 	** Check on the state of the TX queue(s), this
2312 	** can be done without the lock because its RO
2313 	** and the HUNG state will be static if set.
2314 	*/
2315 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2316 		if (txr->busy == EM_TX_HUNG)
2317 			goto hung;
2318 		if (txr->busy >= EM_TX_MAXTRIES)
2319 			txr->busy = EM_TX_HUNG;
2320 		/* Schedule a TX tasklet if needed */
2321 		if (txr->tx_avail <= EM_MAX_SCATTER)
2322 			taskqueue_enqueue(txr->tq, &txr->tx_task);
2323 	}
2324 
2325 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2326 #ifndef DEVICE_POLLING
2327 	/* Trigger an RX interrupt to guarantee mbuf refresh */
2328 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2329 #endif
2330 	return;
2331 hung:
2332 	/* Looks like we're hung */
2333 	device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2334 			txr->me);
2335 	em_print_debug_info(adapter);
2336 	if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
2337 	adapter->watchdog_events++;
2338 	em_init_locked(adapter);
2339 }
2340 
2341 
2342 static void
2343 em_update_link_status(struct adapter *adapter)
2344 {
2345 	struct e1000_hw *hw = &adapter->hw;
2346 	if_t ifp = adapter->ifp;
2347 	device_t dev = adapter->dev;
2348 	struct tx_ring *txr = adapter->tx_rings;
2349 	u32 link_check = 0;
2350 
2351 	/* Get the cached link value or read phy for real */
2352 	switch (hw->phy.media_type) {
2353 	case e1000_media_type_copper:
2354 		if (hw->mac.get_link_status) {
2355 			/* Do the work to read phy */
2356 			e1000_check_for_link(hw);
2357 			link_check = !hw->mac.get_link_status;
2358 			if (link_check) /* ESB2 fix */
2359 				e1000_cfg_on_link_up(hw);
2360 		} else
2361 			link_check = TRUE;
2362 		break;
2363 	case e1000_media_type_fiber:
2364 		e1000_check_for_link(hw);
2365 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2366                                  E1000_STATUS_LU);
2367 		break;
2368 	case e1000_media_type_internal_serdes:
2369 		e1000_check_for_link(hw);
2370 		link_check = adapter->hw.mac.serdes_has_link;
2371 		break;
2372 	default:
2373 	case e1000_media_type_unknown:
2374 		break;
2375 	}
2376 
2377 	/* Now check for a transition */
2378 	if (link_check && (adapter->link_active == 0)) {
2379 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2380 		    &adapter->link_duplex);
2381 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2382 		if ((adapter->link_speed != SPEED_1000) &&
2383 		    ((hw->mac.type == e1000_82571) ||
2384 		    (hw->mac.type == e1000_82572))) {
2385 			int tarc0;
2386 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2387 			tarc0 &= ~TARC_SPEED_MODE_BIT;
2388 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2389 		}
2390 		if (bootverbose)
2391 			device_printf(dev, "Link is up %d Mbps %s\n",
2392 			    adapter->link_speed,
2393 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2394 			    "Full Duplex" : "Half Duplex"));
2395 		adapter->link_active = 1;
2396 		adapter->smartspeed = 0;
2397 		if_setbaudrate(ifp, adapter->link_speed * 1000000);
2398 		if_link_state_change(ifp, LINK_STATE_UP);
2399 	} else if (!link_check && (adapter->link_active == 1)) {
2400 		if_setbaudrate(ifp, 0);
2401 		adapter->link_speed = 0;
2402 		adapter->link_duplex = 0;
2403 		if (bootverbose)
2404 			device_printf(dev, "Link is Down\n");
2405 		adapter->link_active = 0;
2406 		/* Link down, disable hang detection */
2407 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2408 			txr->busy = EM_TX_IDLE;
2409 		if_link_state_change(ifp, LINK_STATE_DOWN);
2410 	}
2411 }
2412 
2413 /*********************************************************************
2414  *
2415  *  This routine disables all traffic on the adapter by issuing a
2416  *  global reset on the MAC and deallocates TX/RX buffers.
2417  *
2418  *  This routine should always be called with BOTH the CORE
2419  *  and TX locks.
2420  **********************************************************************/
2421 
2422 static void
2423 em_stop(void *arg)
2424 {
2425 	struct adapter	*adapter = arg;
2426 	if_t ifp = adapter->ifp;
2427 	struct tx_ring	*txr = adapter->tx_rings;
2428 
2429 	EM_CORE_LOCK_ASSERT(adapter);
2430 
2431 	INIT_DEBUGOUT("em_stop: begin");
2432 
2433 	em_disable_intr(adapter);
2434 	callout_stop(&adapter->timer);
2435 
2436 	/* Tell the stack that the interface is no longer active */
2437 	if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2438 
2439         /* Disarm Hang Detection. */
2440 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2441 		EM_TX_LOCK(txr);
2442 		txr->busy = EM_TX_IDLE;
2443 		EM_TX_UNLOCK(txr);
2444 	}
2445 
2446 	e1000_reset_hw(&adapter->hw);
2447 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2448 
2449 	e1000_led_off(&adapter->hw);
2450 	e1000_cleanup_led(&adapter->hw);
2451 }
2452 
2453 
2454 /*********************************************************************
2455  *
2456  *  Determine hardware revision.
2457  *
2458  **********************************************************************/
2459 static void
2460 em_identify_hardware(struct adapter *adapter)
2461 {
2462 	device_t dev = adapter->dev;
2463 
2464 	/* Make sure our PCI config space has the necessary stuff set */
2465 	pci_enable_busmaster(dev);
2466 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2467 
2468 	/* Save off the information about this board */
2469 	adapter->hw.vendor_id = pci_get_vendor(dev);
2470 	adapter->hw.device_id = pci_get_device(dev);
2471 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2472 	adapter->hw.subsystem_vendor_id =
2473 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2474 	adapter->hw.subsystem_device_id =
2475 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2476 
2477 	/* Do Shared Code Init and Setup */
2478 	if (e1000_set_mac_type(&adapter->hw)) {
2479 		device_printf(dev, "Setup init failure\n");
2480 		return;
2481 	}
2482 }
2483 
2484 static int
2485 em_allocate_pci_resources(struct adapter *adapter)
2486 {
2487 	device_t	dev = adapter->dev;
2488 	int		rid;
2489 
2490 	rid = PCIR_BAR(0);
2491 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2492 	    &rid, RF_ACTIVE);
2493 	if (adapter->memory == NULL) {
2494 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2495 		return (ENXIO);
2496 	}
2497 	adapter->osdep.mem_bus_space_tag =
2498 	    rman_get_bustag(adapter->memory);
2499 	adapter->osdep.mem_bus_space_handle =
2500 	    rman_get_bushandle(adapter->memory);
2501 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2502 
2503 	adapter->hw.back = &adapter->osdep;
2504 
2505 	return (0);
2506 }
2507 
2508 /*********************************************************************
2509  *
2510  *  Setup the Legacy or MSI Interrupt handler
2511  *
2512  **********************************************************************/
2513 int
2514 em_allocate_legacy(struct adapter *adapter)
2515 {
2516 	device_t dev = adapter->dev;
2517 	struct tx_ring	*txr = adapter->tx_rings;
2518 	int error, rid = 0;
2519 
2520 	/* Manually turn off all interrupts */
2521 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2522 
2523 	if (adapter->msix == 1) /* using MSI */
2524 		rid = 1;
2525 	/* We allocate a single interrupt resource */
2526 	adapter->res = bus_alloc_resource_any(dev,
2527 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2528 	if (adapter->res == NULL) {
2529 		device_printf(dev, "Unable to allocate bus resource: "
2530 		    "interrupt\n");
2531 		return (ENXIO);
2532 	}
2533 
2534 	/*
2535 	 * Allocate a fast interrupt and the associated
2536 	 * deferred processing contexts.
2537 	 */
2538 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2539 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2540 	    taskqueue_thread_enqueue, &adapter->tq);
2541 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2542 	    device_get_nameunit(adapter->dev));
2543 	/* Use a TX only tasklet for local timer */
2544 	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2545 	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2546 	    taskqueue_thread_enqueue, &txr->tq);
2547 	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2548 	    device_get_nameunit(adapter->dev));
2549 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2550 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2551 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2552 		device_printf(dev, "Failed to register fast interrupt "
2553 			    "handler: %d\n", error);
2554 		taskqueue_free(adapter->tq);
2555 		adapter->tq = NULL;
2556 		return (error);
2557 	}
2558 
2559 	return (0);
2560 }
2561 
2562 /*********************************************************************
2563  *
2564  *  Setup the MSIX Interrupt handlers
2565  *   This is not really Multiqueue, rather
2566  *   its just seperate interrupt vectors
2567  *   for TX, RX, and Link.
2568  *
2569  **********************************************************************/
2570 int
2571 em_allocate_msix(struct adapter *adapter)
2572 {
2573 	device_t	dev = adapter->dev;
2574 	struct		tx_ring *txr = adapter->tx_rings;
2575 	struct		rx_ring *rxr = adapter->rx_rings;
2576 	int		error, rid, vector = 0;
2577 	int		cpu_id = 0;
2578 
2579 
2580 	/* Make sure all interrupts are disabled */
2581 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2582 
2583 	/* First set up ring resources */
2584 	for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2585 
2586 		/* RX ring */
2587 		rid = vector + 1;
2588 
2589 		rxr->res = bus_alloc_resource_any(dev,
2590 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2591 		if (rxr->res == NULL) {
2592 			device_printf(dev,
2593 			    "Unable to allocate bus resource: "
2594 			    "RX MSIX Interrupt %d\n", i);
2595 			return (ENXIO);
2596 		}
2597 		if ((error = bus_setup_intr(dev, rxr->res,
2598 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2599 		    rxr, &rxr->tag)) != 0) {
2600 			device_printf(dev, "Failed to register RX handler");
2601 			return (error);
2602 		}
2603 #if __FreeBSD_version >= 800504
2604 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2605 #endif
2606 		rxr->msix = vector;
2607 
2608 		if (em_last_bind_cpu < 0)
2609 			em_last_bind_cpu = CPU_FIRST();
2610 		cpu_id = em_last_bind_cpu;
2611 		bus_bind_intr(dev, rxr->res, cpu_id);
2612 
2613 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2614 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2615 		    taskqueue_thread_enqueue, &rxr->tq);
2616 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2617 		    device_get_nameunit(adapter->dev), cpu_id);
2618 		/*
2619 		** Set the bit to enable interrupt
2620 		** in E1000_IMS -- bits 20 and 21
2621 		** are for RX0 and RX1, note this has
2622 		** NOTHING to do with the MSIX vector
2623 		*/
2624 		rxr->ims = 1 << (20 + i);
2625 		adapter->ims |= rxr->ims;
2626 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2627 
2628 		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2629 	}
2630 
2631 	for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2632 		/* TX ring */
2633 		rid = vector + 1;
2634 		txr->res = bus_alloc_resource_any(dev,
2635 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2636 		if (txr->res == NULL) {
2637 			device_printf(dev,
2638 			    "Unable to allocate bus resource: "
2639 			    "TX MSIX Interrupt %d\n", i);
2640 			return (ENXIO);
2641 		}
2642 		if ((error = bus_setup_intr(dev, txr->res,
2643 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2644 		    txr, &txr->tag)) != 0) {
2645 			device_printf(dev, "Failed to register TX handler");
2646 			return (error);
2647 		}
2648 #if __FreeBSD_version >= 800504
2649 		bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2650 #endif
2651 		txr->msix = vector;
2652 
2653                 if (em_last_bind_cpu < 0)
2654                         em_last_bind_cpu = CPU_FIRST();
2655                 cpu_id = em_last_bind_cpu;
2656                 bus_bind_intr(dev, txr->res, cpu_id);
2657 
2658 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2659 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2660 		    taskqueue_thread_enqueue, &txr->tq);
2661 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2662 		    device_get_nameunit(adapter->dev), cpu_id);
2663 		/*
2664 		** Set the bit to enable interrupt
2665 		** in E1000_IMS -- bits 22 and 23
2666 		** are for TX0 and TX1, note this has
2667 		** NOTHING to do with the MSIX vector
2668 		*/
2669 		txr->ims = 1 << (22 + i);
2670 		adapter->ims |= txr->ims;
2671 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2672 
2673 		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2674 	}
2675 
2676 	/* Link interrupt */
2677 	rid = vector + 1;
2678 	adapter->res = bus_alloc_resource_any(dev,
2679 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2680 	if (!adapter->res) {
2681 		device_printf(dev,"Unable to allocate "
2682 		    "bus resource: Link interrupt [%d]\n", rid);
2683 		return (ENXIO);
2684         }
2685 	/* Set the link handler function */
2686 	error = bus_setup_intr(dev, adapter->res,
2687 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2688 	    em_msix_link, adapter, &adapter->tag);
2689 	if (error) {
2690 		adapter->res = NULL;
2691 		device_printf(dev, "Failed to register LINK handler");
2692 		return (error);
2693 	}
2694 #if __FreeBSD_version >= 800504
2695 	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2696 #endif
2697 	adapter->linkvec = vector;
2698 	adapter->ivars |=  (8 | vector) << 16;
2699 	adapter->ivars |= 0x80000000;
2700 
2701 	return (0);
2702 }
2703 
2704 
2705 static void
2706 em_free_pci_resources(struct adapter *adapter)
2707 {
2708 	device_t	dev = adapter->dev;
2709 	struct tx_ring	*txr;
2710 	struct rx_ring	*rxr;
2711 	int		rid;
2712 
2713 
2714 	/*
2715 	** Release all the queue interrupt resources:
2716 	*/
2717 	for (int i = 0; i < adapter->num_queues; i++) {
2718 		txr = &adapter->tx_rings[i];
2719 		/* an early abort? */
2720 		if (txr == NULL)
2721 			break;
2722 		rid = txr->msix +1;
2723 		if (txr->tag != NULL) {
2724 			bus_teardown_intr(dev, txr->res, txr->tag);
2725 			txr->tag = NULL;
2726 		}
2727 		if (txr->res != NULL)
2728 			bus_release_resource(dev, SYS_RES_IRQ,
2729 			    rid, txr->res);
2730 
2731 		rxr = &adapter->rx_rings[i];
2732 		/* an early abort? */
2733 		if (rxr == NULL)
2734 			break;
2735 		rid = rxr->msix +1;
2736 		if (rxr->tag != NULL) {
2737 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2738 			rxr->tag = NULL;
2739 		}
2740 		if (rxr->res != NULL)
2741 			bus_release_resource(dev, SYS_RES_IRQ,
2742 			    rid, rxr->res);
2743 	}
2744 
2745         if (adapter->linkvec) /* we are doing MSIX */
2746                 rid = adapter->linkvec + 1;
2747         else
2748                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2749 
2750 	if (adapter->tag != NULL) {
2751 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2752 		adapter->tag = NULL;
2753 	}
2754 
2755 	if (adapter->res != NULL)
2756 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2757 
2758 
2759 	if (adapter->msix)
2760 		pci_release_msi(dev);
2761 
2762 	if (adapter->msix_mem != NULL)
2763 		bus_release_resource(dev, SYS_RES_MEMORY,
2764 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2765 
2766 	if (adapter->memory != NULL)
2767 		bus_release_resource(dev, SYS_RES_MEMORY,
2768 		    PCIR_BAR(0), adapter->memory);
2769 
2770 	if (adapter->flash != NULL)
2771 		bus_release_resource(dev, SYS_RES_MEMORY,
2772 		    EM_FLASH, adapter->flash);
2773 }
2774 
2775 /*
2776  * Setup MSI or MSI/X
2777  */
2778 static int
2779 em_setup_msix(struct adapter *adapter)
2780 {
2781 	device_t dev = adapter->dev;
2782 	int val;
2783 
2784 	/* Nearly always going to use one queue */
2785 	adapter->num_queues = 1;
2786 
2787 	/*
2788 	** Try using MSI-X for Hartwell adapters
2789 	*/
2790 	if ((adapter->hw.mac.type == e1000_82574) &&
2791 	    (em_enable_msix == TRUE)) {
2792 #ifdef EM_MULTIQUEUE
2793 		adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2794 		if (adapter->num_queues > 1)
2795 			em_enable_vectors_82574(adapter);
2796 #endif
2797 		/* Map the MSIX BAR */
2798 		int rid = PCIR_BAR(EM_MSIX_BAR);
2799 		adapter->msix_mem = bus_alloc_resource_any(dev,
2800 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2801        		if (adapter->msix_mem == NULL) {
2802 			/* May not be enabled */
2803                		device_printf(adapter->dev,
2804 			    "Unable to map MSIX table \n");
2805 			goto msi;
2806        		}
2807 		val = pci_msix_count(dev);
2808 
2809 #ifdef EM_MULTIQUEUE
2810 		/* We need 5 vectors in the multiqueue case */
2811 		if (adapter->num_queues > 1 ) {
2812 			if (val >= 5)
2813 				val = 5;
2814 			else {
2815 				adapter->num_queues = 1;
2816 				device_printf(adapter->dev,
2817 				    "Insufficient MSIX vectors for >1 queue, "
2818 				    "using single queue...\n");
2819 				goto msix_one;
2820 			}
2821 		} else {
2822 msix_one:
2823 #endif
2824 			if (val >= 3)
2825 				val = 3;
2826 			else {
2827 				device_printf(adapter->dev,
2828 			    	"Insufficient MSIX vectors, using MSI\n");
2829 				goto msi;
2830 			}
2831 #ifdef EM_MULTIQUEUE
2832 		}
2833 #endif
2834 
2835 		if ((pci_alloc_msix(dev, &val) == 0)) {
2836 			device_printf(adapter->dev,
2837 			    "Using MSIX interrupts "
2838 			    "with %d vectors\n", val);
2839 			return (val);
2840 		}
2841 
2842 		/*
2843 		** If MSIX alloc failed or provided us with
2844 		** less than needed, free and fall through to MSI
2845 		*/
2846 		pci_release_msi(dev);
2847 	}
2848 msi:
2849 	if (adapter->msix_mem != NULL) {
2850 		bus_release_resource(dev, SYS_RES_MEMORY,
2851 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2852 		adapter->msix_mem = NULL;
2853 	}
2854        	val = 1;
2855        	if (pci_alloc_msi(dev, &val) == 0) {
2856                	device_printf(adapter->dev, "Using an MSI interrupt\n");
2857 		return (val);
2858 	}
2859 	/* Should only happen due to manual configuration */
2860 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2861 	return (0);
2862 }
2863 
2864 
2865 /*********************************************************************
2866  *
2867  *  Initialize the hardware to a configuration
2868  *  as specified by the adapter structure.
2869  *
2870  **********************************************************************/
2871 static void
2872 em_reset(struct adapter *adapter)
2873 {
2874 	device_t	dev = adapter->dev;
2875 	if_t ifp = adapter->ifp;
2876 	struct e1000_hw	*hw = &adapter->hw;
2877 	u16		rx_buffer_size;
2878 	u32		pba;
2879 
2880 	INIT_DEBUGOUT("em_reset: begin");
2881 
2882 	/* Set up smart power down as default off on newer adapters. */
2883 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2884 	    hw->mac.type == e1000_82572)) {
2885 		u16 phy_tmp = 0;
2886 
2887 		/* Speed up time to link by disabling smart power down. */
2888 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2889 		phy_tmp &= ~IGP02E1000_PM_SPD;
2890 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2891 	}
2892 
2893 	/*
2894 	 * Packet Buffer Allocation (PBA)
2895 	 * Writing PBA sets the receive portion of the buffer
2896 	 * the remainder is used for the transmit buffer.
2897 	 */
2898 	switch (hw->mac.type) {
2899 	/* Total Packet Buffer on these is 48K */
2900 	case e1000_82571:
2901 	case e1000_82572:
2902 	case e1000_80003es2lan:
2903 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2904 		break;
2905 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2906 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2907 		break;
2908 	case e1000_82574:
2909 	case e1000_82583:
2910 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2911 		break;
2912 	case e1000_ich8lan:
2913 		pba = E1000_PBA_8K;
2914 		break;
2915 	case e1000_ich9lan:
2916 	case e1000_ich10lan:
2917 		/* Boost Receive side for jumbo frames */
2918 		if (adapter->hw.mac.max_frame_size > 4096)
2919 			pba = E1000_PBA_14K;
2920 		else
2921 			pba = E1000_PBA_10K;
2922 		break;
2923 	case e1000_pchlan:
2924 	case e1000_pch2lan:
2925 	case e1000_pch_lpt:
2926 		pba = E1000_PBA_26K;
2927 		break;
2928 	default:
2929 		if (adapter->hw.mac.max_frame_size > 8192)
2930 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2931 		else
2932 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2933 	}
2934 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2935 
2936 	/*
2937 	 * These parameters control the automatic generation (Tx) and
2938 	 * response (Rx) to Ethernet PAUSE frames.
2939 	 * - High water mark should allow for at least two frames to be
2940 	 *   received after sending an XOFF.
2941 	 * - Low water mark works best when it is very near the high water mark.
2942 	 *   This allows the receiver to restart by sending XON when it has
2943 	 *   drained a bit. Here we use an arbitary value of 1500 which will
2944 	 *   restart after one full frame is pulled from the buffer. There
2945 	 *   could be several smaller frames in the buffer and if so they will
2946 	 *   not trigger the XON until their total number reduces the buffer
2947 	 *   by 1500.
2948 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2949 	 */
2950 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2951 	hw->fc.high_water = rx_buffer_size -
2952 	    roundup2(adapter->hw.mac.max_frame_size, 1024);
2953 	hw->fc.low_water = hw->fc.high_water - 1500;
2954 
2955 	if (adapter->fc) /* locally set flow control value? */
2956 		hw->fc.requested_mode = adapter->fc;
2957 	else
2958 		hw->fc.requested_mode = e1000_fc_full;
2959 
2960 	if (hw->mac.type == e1000_80003es2lan)
2961 		hw->fc.pause_time = 0xFFFF;
2962 	else
2963 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2964 
2965 	hw->fc.send_xon = TRUE;
2966 
2967 	/* Device specific overrides/settings */
2968 	switch (hw->mac.type) {
2969 	case e1000_pchlan:
2970 		/* Workaround: no TX flow ctrl for PCH */
2971                 hw->fc.requested_mode = e1000_fc_rx_pause;
2972 		hw->fc.pause_time = 0xFFFF; /* override */
2973 		if (if_getmtu(ifp) > ETHERMTU) {
2974 			hw->fc.high_water = 0x3500;
2975 			hw->fc.low_water = 0x1500;
2976 		} else {
2977 			hw->fc.high_water = 0x5000;
2978 			hw->fc.low_water = 0x3000;
2979 		}
2980 		hw->fc.refresh_time = 0x1000;
2981 		break;
2982 	case e1000_pch2lan:
2983 	case e1000_pch_lpt:
2984 		hw->fc.high_water = 0x5C20;
2985 		hw->fc.low_water = 0x5048;
2986 		hw->fc.pause_time = 0x0650;
2987 		hw->fc.refresh_time = 0x0400;
2988 		/* Jumbos need adjusted PBA */
2989 		if (if_getmtu(ifp) > ETHERMTU)
2990 			E1000_WRITE_REG(hw, E1000_PBA, 12);
2991 		else
2992 			E1000_WRITE_REG(hw, E1000_PBA, 26);
2993 		break;
2994         case e1000_ich9lan:
2995         case e1000_ich10lan:
2996 		if (if_getmtu(ifp) > ETHERMTU) {
2997 			hw->fc.high_water = 0x2800;
2998 			hw->fc.low_water = hw->fc.high_water - 8;
2999 			break;
3000 		}
3001 		/* else fall thru */
3002 	default:
3003 		if (hw->mac.type == e1000_80003es2lan)
3004 			hw->fc.pause_time = 0xFFFF;
3005 		break;
3006 	}
3007 
3008 	/* Issue a global reset */
3009 	e1000_reset_hw(hw);
3010 	E1000_WRITE_REG(hw, E1000_WUC, 0);
3011 	em_disable_aspm(adapter);
3012 	/* and a re-init */
3013 	if (e1000_init_hw(hw) < 0) {
3014 		device_printf(dev, "Hardware Initialization Failed\n");
3015 		return;
3016 	}
3017 
3018 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3019 	e1000_get_phy_info(hw);
3020 	e1000_check_for_link(hw);
3021 	return;
3022 }
3023 
3024 /*********************************************************************
3025  *
3026  *  Setup networking device structure and register an interface.
3027  *
3028  **********************************************************************/
3029 static int
3030 em_setup_interface(device_t dev, struct adapter *adapter)
3031 {
3032 	if_t ifp;
3033 
3034 	INIT_DEBUGOUT("em_setup_interface: begin");
3035 
3036 	ifp = adapter->ifp = if_gethandle(IFT_ETHER);
3037 	if (ifp == 0) {
3038 		device_printf(dev, "can not allocate ifnet structure\n");
3039 		return (-1);
3040 	}
3041 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3042 	if_setdev(ifp, dev);
3043 	if_setinitfn(ifp, em_init);
3044 	if_setsoftc(ifp, adapter);
3045 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
3046 	if_setioctlfn(ifp, em_ioctl);
3047 	if_setgetcounterfn(ifp, em_get_counter);
3048 	/* TSO parameters */
3049 	ifp->if_hw_tsomax = IP_MAXPACKET;
3050 	ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER;
3051 	ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
3052 
3053 #ifdef EM_MULTIQUEUE
3054 	/* Multiqueue stack interface */
3055 	if_settransmitfn(ifp, em_mq_start);
3056 	if_setqflushfn(ifp, em_qflush);
3057 #else
3058 	if_setstartfn(ifp, em_start);
3059 	if_setsendqlen(ifp, adapter->num_tx_desc - 1);
3060 	if_setsendqready(ifp);
3061 #endif
3062 
3063 	ether_ifattach(ifp, adapter->hw.mac.addr);
3064 
3065 	if_setcapabilities(ifp, 0);
3066 	if_setcapenable(ifp, 0);
3067 
3068 
3069 	if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM |
3070 	    IFCAP_TSO4, 0);
3071 	/*
3072 	 * Tell the upper layer(s) we
3073 	 * support full VLAN capability
3074 	 */
3075 	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
3076 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
3077 	    IFCAP_VLAN_MTU, 0);
3078 	if_setcapenable(ifp, if_getcapabilities(ifp));
3079 
3080 	/*
3081 	** Don't turn this on by default, if vlans are
3082 	** created on another pseudo device (eg. lagg)
3083 	** then vlan events are not passed thru, breaking
3084 	** operation, but with HW FILTER off it works. If
3085 	** using vlans directly on the em driver you can
3086 	** enable this and get full hardware tag filtering.
3087 	*/
3088 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
3089 
3090 #ifdef DEVICE_POLLING
3091 	if_setcapabilitiesbit(ifp, IFCAP_POLLING,0);
3092 #endif
3093 
3094 	/* Enable only WOL MAGIC by default */
3095 	if (adapter->wol) {
3096 		if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
3097 		if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
3098 	}
3099 
3100 	/*
3101 	 * Specify the media types supported by this adapter and register
3102 	 * callbacks to update media and link information
3103 	 */
3104 	ifmedia_init(&adapter->media, IFM_IMASK,
3105 	    em_media_change, em_media_status);
3106 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3107 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3108 		u_char fiber_type = IFM_1000_SX;	/* default type */
3109 
3110 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3111 			    0, NULL);
3112 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3113 	} else {
3114 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3115 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3116 			    0, NULL);
3117 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3118 			    0, NULL);
3119 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3120 			    0, NULL);
3121 		if (adapter->hw.phy.type != e1000_phy_ife) {
3122 			ifmedia_add(&adapter->media,
3123 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3124 			ifmedia_add(&adapter->media,
3125 				IFM_ETHER | IFM_1000_T, 0, NULL);
3126 		}
3127 	}
3128 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3129 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3130 	return (0);
3131 }
3132 
3133 
3134 /*
3135  * Manage DMA'able memory.
3136  */
3137 static void
3138 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3139 {
3140 	if (error)
3141 		return;
3142 	*(bus_addr_t *) arg = segs[0].ds_addr;
3143 }
3144 
3145 static int
3146 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3147         struct em_dma_alloc *dma, int mapflags)
3148 {
3149 	int error;
3150 
3151 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3152 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3153 				BUS_SPACE_MAXADDR,	/* lowaddr */
3154 				BUS_SPACE_MAXADDR,	/* highaddr */
3155 				NULL, NULL,		/* filter, filterarg */
3156 				size,			/* maxsize */
3157 				1,			/* nsegments */
3158 				size,			/* maxsegsize */
3159 				0,			/* flags */
3160 				NULL,			/* lockfunc */
3161 				NULL,			/* lockarg */
3162 				&dma->dma_tag);
3163 	if (error) {
3164 		device_printf(adapter->dev,
3165 		    "%s: bus_dma_tag_create failed: %d\n",
3166 		    __func__, error);
3167 		goto fail_0;
3168 	}
3169 
3170 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3171 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3172 	if (error) {
3173 		device_printf(adapter->dev,
3174 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3175 		    __func__, (uintmax_t)size, error);
3176 		goto fail_2;
3177 	}
3178 
3179 	dma->dma_paddr = 0;
3180 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3181 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3182 	if (error || dma->dma_paddr == 0) {
3183 		device_printf(adapter->dev,
3184 		    "%s: bus_dmamap_load failed: %d\n",
3185 		    __func__, error);
3186 		goto fail_3;
3187 	}
3188 
3189 	return (0);
3190 
3191 fail_3:
3192 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3193 fail_2:
3194 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3195 	bus_dma_tag_destroy(dma->dma_tag);
3196 fail_0:
3197 	dma->dma_tag = NULL;
3198 
3199 	return (error);
3200 }
3201 
3202 static void
3203 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3204 {
3205 	if (dma->dma_tag == NULL)
3206 		return;
3207 	if (dma->dma_paddr != 0) {
3208 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3209 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3210 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3211 		dma->dma_paddr = 0;
3212 	}
3213 	if (dma->dma_vaddr != NULL) {
3214 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3215 		dma->dma_vaddr = NULL;
3216 	}
3217 	bus_dma_tag_destroy(dma->dma_tag);
3218 	dma->dma_tag = NULL;
3219 }
3220 
3221 
3222 /*********************************************************************
3223  *
3224  *  Allocate memory for the transmit and receive rings, and then
3225  *  the descriptors associated with each, called only once at attach.
3226  *
3227  **********************************************************************/
3228 static int
3229 em_allocate_queues(struct adapter *adapter)
3230 {
3231 	device_t		dev = adapter->dev;
3232 	struct tx_ring		*txr = NULL;
3233 	struct rx_ring		*rxr = NULL;
3234 	int rsize, tsize, error = E1000_SUCCESS;
3235 	int txconf = 0, rxconf = 0;
3236 
3237 
3238 	/* Allocate the TX ring struct memory */
3239 	if (!(adapter->tx_rings =
3240 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3241 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3242 		device_printf(dev, "Unable to allocate TX ring memory\n");
3243 		error = ENOMEM;
3244 		goto fail;
3245 	}
3246 
3247 	/* Now allocate the RX */
3248 	if (!(adapter->rx_rings =
3249 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3250 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3251 		device_printf(dev, "Unable to allocate RX ring memory\n");
3252 		error = ENOMEM;
3253 		goto rx_fail;
3254 	}
3255 
3256 	tsize = roundup2(adapter->num_tx_desc *
3257 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3258 	/*
3259 	 * Now set up the TX queues, txconf is needed to handle the
3260 	 * possibility that things fail midcourse and we need to
3261 	 * undo memory gracefully
3262 	 */
3263 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3264 		/* Set up some basics */
3265 		txr = &adapter->tx_rings[i];
3266 		txr->adapter = adapter;
3267 		txr->me = i;
3268 
3269 		/* Initialize the TX lock */
3270 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3271 		    device_get_nameunit(dev), txr->me);
3272 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3273 
3274 		if (em_dma_malloc(adapter, tsize,
3275 			&txr->txdma, BUS_DMA_NOWAIT)) {
3276 			device_printf(dev,
3277 			    "Unable to allocate TX Descriptor memory\n");
3278 			error = ENOMEM;
3279 			goto err_tx_desc;
3280 		}
3281 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3282 		bzero((void *)txr->tx_base, tsize);
3283 
3284         	if (em_allocate_transmit_buffers(txr)) {
3285 			device_printf(dev,
3286 			    "Critical Failure setting up transmit buffers\n");
3287 			error = ENOMEM;
3288 			goto err_tx_desc;
3289         	}
3290 #if __FreeBSD_version >= 800000
3291 		/* Allocate a buf ring */
3292 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3293 		    M_WAITOK, &txr->tx_mtx);
3294 #endif
3295 	}
3296 
3297 	/*
3298 	 * Next the RX queues...
3299 	 */
3300 	rsize = roundup2(adapter->num_rx_desc *
3301 	    sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
3302 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3303 		rxr = &adapter->rx_rings[i];
3304 		rxr->adapter = adapter;
3305 		rxr->me = i;
3306 
3307 		/* Initialize the RX lock */
3308 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3309 		    device_get_nameunit(dev), txr->me);
3310 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3311 
3312 		if (em_dma_malloc(adapter, rsize,
3313 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3314 			device_printf(dev,
3315 			    "Unable to allocate RxDescriptor memory\n");
3316 			error = ENOMEM;
3317 			goto err_rx_desc;
3318 		}
3319 		rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr;
3320 		bzero((void *)rxr->rx_base, rsize);
3321 
3322         	/* Allocate receive buffers for the ring*/
3323 		if (em_allocate_receive_buffers(rxr)) {
3324 			device_printf(dev,
3325 			    "Critical Failure setting up receive buffers\n");
3326 			error = ENOMEM;
3327 			goto err_rx_desc;
3328 		}
3329 	}
3330 
3331 	return (0);
3332 
3333 err_rx_desc:
3334 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3335 		em_dma_free(adapter, &rxr->rxdma);
3336 err_tx_desc:
3337 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3338 		em_dma_free(adapter, &txr->txdma);
3339 	free(adapter->rx_rings, M_DEVBUF);
3340 rx_fail:
3341 #if __FreeBSD_version >= 800000
3342 	buf_ring_free(txr->br, M_DEVBUF);
3343 #endif
3344 	free(adapter->tx_rings, M_DEVBUF);
3345 fail:
3346 	return (error);
3347 }
3348 
3349 
3350 /*********************************************************************
3351  *
3352  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3353  *  the information needed to transmit a packet on the wire. This is
3354  *  called only once at attach, setup is done every reset.
3355  *
3356  **********************************************************************/
3357 static int
3358 em_allocate_transmit_buffers(struct tx_ring *txr)
3359 {
3360 	struct adapter *adapter = txr->adapter;
3361 	device_t dev = adapter->dev;
3362 	struct em_txbuffer *txbuf;
3363 	int error, i;
3364 
3365 	/*
3366 	 * Setup DMA descriptor areas.
3367 	 */
3368 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3369 			       1, 0,			/* alignment, bounds */
3370 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3371 			       BUS_SPACE_MAXADDR,	/* highaddr */
3372 			       NULL, NULL,		/* filter, filterarg */
3373 			       EM_TSO_SIZE,		/* maxsize */
3374 			       EM_MAX_SCATTER,		/* nsegments */
3375 			       PAGE_SIZE,		/* maxsegsize */
3376 			       0,			/* flags */
3377 			       NULL,			/* lockfunc */
3378 			       NULL,			/* lockfuncarg */
3379 			       &txr->txtag))) {
3380 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3381 		goto fail;
3382 	}
3383 
3384 	if (!(txr->tx_buffers =
3385 	    (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) *
3386 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3387 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3388 		error = ENOMEM;
3389 		goto fail;
3390 	}
3391 
3392         /* Create the descriptor buffer dma maps */
3393 	txbuf = txr->tx_buffers;
3394 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3395 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3396 		if (error != 0) {
3397 			device_printf(dev, "Unable to create TX DMA map\n");
3398 			goto fail;
3399 		}
3400 	}
3401 
3402 	return 0;
3403 fail:
3404 	/* We free all, it handles case where we are in the middle */
3405 	em_free_transmit_structures(adapter);
3406 	return (error);
3407 }
3408 
3409 /*********************************************************************
3410  *
3411  *  Initialize a transmit ring.
3412  *
3413  **********************************************************************/
3414 static void
3415 em_setup_transmit_ring(struct tx_ring *txr)
3416 {
3417 	struct adapter *adapter = txr->adapter;
3418 	struct em_txbuffer *txbuf;
3419 	int i;
3420 #ifdef DEV_NETMAP
3421 	struct netmap_slot *slot;
3422 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
3423 #endif /* DEV_NETMAP */
3424 
3425 	/* Clear the old descriptor contents */
3426 	EM_TX_LOCK(txr);
3427 #ifdef DEV_NETMAP
3428 	slot = netmap_reset(na, NR_TX, txr->me, 0);
3429 #endif /* DEV_NETMAP */
3430 
3431 	bzero((void *)txr->tx_base,
3432 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3433 	/* Reset indices */
3434 	txr->next_avail_desc = 0;
3435 	txr->next_to_clean = 0;
3436 
3437 	/* Free any existing tx buffers. */
3438         txbuf = txr->tx_buffers;
3439 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3440 		if (txbuf->m_head != NULL) {
3441 			bus_dmamap_sync(txr->txtag, txbuf->map,
3442 			    BUS_DMASYNC_POSTWRITE);
3443 			bus_dmamap_unload(txr->txtag, txbuf->map);
3444 			m_freem(txbuf->m_head);
3445 			txbuf->m_head = NULL;
3446 		}
3447 #ifdef DEV_NETMAP
3448 		if (slot) {
3449 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3450 			uint64_t paddr;
3451 			void *addr;
3452 
3453 			addr = PNMB(na, slot + si, &paddr);
3454 			txr->tx_base[i].buffer_addr = htole64(paddr);
3455 			/* reload the map for netmap mode */
3456 			netmap_load_map(na, txr->txtag, txbuf->map, addr);
3457 		}
3458 #endif /* DEV_NETMAP */
3459 
3460 		/* clear the watch index */
3461 		txbuf->next_eop = -1;
3462         }
3463 
3464 	/* Set number of descriptors available */
3465 	txr->tx_avail = adapter->num_tx_desc;
3466 	txr->busy = EM_TX_IDLE;
3467 
3468 	/* Clear checksum offload context. */
3469 	txr->last_hw_offload = 0;
3470 	txr->last_hw_ipcss = 0;
3471 	txr->last_hw_ipcso = 0;
3472 	txr->last_hw_tucss = 0;
3473 	txr->last_hw_tucso = 0;
3474 
3475 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3476 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3477 	EM_TX_UNLOCK(txr);
3478 }
3479 
3480 /*********************************************************************
3481  *
3482  *  Initialize all transmit rings.
3483  *
3484  **********************************************************************/
3485 static void
3486 em_setup_transmit_structures(struct adapter *adapter)
3487 {
3488 	struct tx_ring *txr = adapter->tx_rings;
3489 
3490 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3491 		em_setup_transmit_ring(txr);
3492 
3493 	return;
3494 }
3495 
3496 /*********************************************************************
3497  *
3498  *  Enable transmit unit.
3499  *
3500  **********************************************************************/
3501 static void
3502 em_initialize_transmit_unit(struct adapter *adapter)
3503 {
3504 	struct tx_ring	*txr = adapter->tx_rings;
3505 	struct e1000_hw	*hw = &adapter->hw;
3506 	u32	tctl, txdctl = 0, tarc, tipg = 0;
3507 
3508 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3509 
3510 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3511 		u64 bus_addr = txr->txdma.dma_paddr;
3512 		/* Base and Len of TX Ring */
3513 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3514 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3515 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3516 	    	    (u32)(bus_addr >> 32));
3517 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3518 	    	    (u32)bus_addr);
3519 		/* Init the HEAD/TAIL indices */
3520 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3521 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3522 
3523 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3524 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3525 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3526 
3527 		txr->busy = EM_TX_IDLE;
3528 		txdctl = 0; /* clear txdctl */
3529                 txdctl |= 0x1f; /* PTHRESH */
3530                 txdctl |= 1 << 8; /* HTHRESH */
3531                 txdctl |= 1 << 16;/* WTHRESH */
3532 		txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3533 		txdctl |= E1000_TXDCTL_GRAN;
3534                 txdctl |= 1 << 25; /* LWTHRESH */
3535 
3536                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3537 	}
3538 
3539 	/* Set the default values for the Tx Inter Packet Gap timer */
3540 	switch (adapter->hw.mac.type) {
3541 	case e1000_80003es2lan:
3542 		tipg = DEFAULT_82543_TIPG_IPGR1;
3543 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3544 		    E1000_TIPG_IPGR2_SHIFT;
3545 		break;
3546 	default:
3547 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3548 		    (adapter->hw.phy.media_type ==
3549 		    e1000_media_type_internal_serdes))
3550 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3551 		else
3552 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3553 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3554 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3555 	}
3556 
3557 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3558 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3559 
3560 	if(adapter->hw.mac.type >= e1000_82540)
3561 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3562 		    adapter->tx_abs_int_delay.value);
3563 
3564 	if ((adapter->hw.mac.type == e1000_82571) ||
3565 	    (adapter->hw.mac.type == e1000_82572)) {
3566 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3567 		tarc |= TARC_SPEED_MODE_BIT;
3568 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3569 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3570 		/* errata: program both queues to unweighted RR */
3571 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3572 		tarc |= 1;
3573 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3574 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3575 		tarc |= 1;
3576 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3577 	} else if (adapter->hw.mac.type == e1000_82574) {
3578 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3579 		tarc |= TARC_ERRATA_BIT;
3580 		if ( adapter->num_queues > 1) {
3581 			tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3582 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3583 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3584 		} else
3585 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3586 	}
3587 
3588 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3589 	if (adapter->tx_int_delay.value > 0)
3590 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3591 
3592 	/* Program the Transmit Control Register */
3593 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3594 	tctl &= ~E1000_TCTL_CT;
3595 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3596 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3597 
3598 	if (adapter->hw.mac.type >= e1000_82571)
3599 		tctl |= E1000_TCTL_MULR;
3600 
3601 	/* This write will effectively turn on the transmit unit. */
3602 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3603 
3604 }
3605 
3606 
3607 /*********************************************************************
3608  *
3609  *  Free all transmit rings.
3610  *
3611  **********************************************************************/
3612 static void
3613 em_free_transmit_structures(struct adapter *adapter)
3614 {
3615 	struct tx_ring *txr = adapter->tx_rings;
3616 
3617 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3618 		EM_TX_LOCK(txr);
3619 		em_free_transmit_buffers(txr);
3620 		em_dma_free(adapter, &txr->txdma);
3621 		EM_TX_UNLOCK(txr);
3622 		EM_TX_LOCK_DESTROY(txr);
3623 	}
3624 
3625 	free(adapter->tx_rings, M_DEVBUF);
3626 }
3627 
3628 /*********************************************************************
3629  *
3630  *  Free transmit ring related data structures.
3631  *
3632  **********************************************************************/
3633 static void
3634 em_free_transmit_buffers(struct tx_ring *txr)
3635 {
3636 	struct adapter		*adapter = txr->adapter;
3637 	struct em_txbuffer	*txbuf;
3638 
3639 	INIT_DEBUGOUT("free_transmit_ring: begin");
3640 
3641 	if (txr->tx_buffers == NULL)
3642 		return;
3643 
3644 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3645 		txbuf = &txr->tx_buffers[i];
3646 		if (txbuf->m_head != NULL) {
3647 			bus_dmamap_sync(txr->txtag, txbuf->map,
3648 			    BUS_DMASYNC_POSTWRITE);
3649 			bus_dmamap_unload(txr->txtag,
3650 			    txbuf->map);
3651 			m_freem(txbuf->m_head);
3652 			txbuf->m_head = NULL;
3653 			if (txbuf->map != NULL) {
3654 				bus_dmamap_destroy(txr->txtag,
3655 				    txbuf->map);
3656 				txbuf->map = NULL;
3657 			}
3658 		} else if (txbuf->map != NULL) {
3659 			bus_dmamap_unload(txr->txtag,
3660 			    txbuf->map);
3661 			bus_dmamap_destroy(txr->txtag,
3662 			    txbuf->map);
3663 			txbuf->map = NULL;
3664 		}
3665 	}
3666 #if __FreeBSD_version >= 800000
3667 	if (txr->br != NULL)
3668 		buf_ring_free(txr->br, M_DEVBUF);
3669 #endif
3670 	if (txr->tx_buffers != NULL) {
3671 		free(txr->tx_buffers, M_DEVBUF);
3672 		txr->tx_buffers = NULL;
3673 	}
3674 	if (txr->txtag != NULL) {
3675 		bus_dma_tag_destroy(txr->txtag);
3676 		txr->txtag = NULL;
3677 	}
3678 	return;
3679 }
3680 
3681 
3682 /*********************************************************************
3683  *  The offload context is protocol specific (TCP/UDP) and thus
3684  *  only needs to be set when the protocol changes. The occasion
3685  *  of a context change can be a performance detriment, and
3686  *  might be better just disabled. The reason arises in the way
3687  *  in which the controller supports pipelined requests from the
3688  *  Tx data DMA. Up to four requests can be pipelined, and they may
3689  *  belong to the same packet or to multiple packets. However all
3690  *  requests for one packet are issued before a request is issued
3691  *  for a subsequent packet and if a request for the next packet
3692  *  requires a context change, that request will be stalled
3693  *  until the previous request completes. This means setting up
3694  *  a new context effectively disables pipelined Tx data DMA which
3695  *  in turn greatly slow down performance to send small sized
3696  *  frames.
3697  **********************************************************************/
3698 static void
3699 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3700     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3701 {
3702 	struct adapter			*adapter = txr->adapter;
3703 	struct e1000_context_desc	*TXD = NULL;
3704 	struct em_txbuffer		*tx_buffer;
3705 	int				cur, hdr_len;
3706 	u32				cmd = 0;
3707 	u16				offload = 0;
3708 	u8				ipcso, ipcss, tucso, tucss;
3709 
3710 	ipcss = ipcso = tucss = tucso = 0;
3711 	hdr_len = ip_off + (ip->ip_hl << 2);
3712 	cur = txr->next_avail_desc;
3713 
3714 	/* Setup of IP header checksum. */
3715 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3716 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3717 		offload |= CSUM_IP;
3718 		ipcss = ip_off;
3719 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3720 		/*
3721 		 * Start offset for header checksum calculation.
3722 		 * End offset for header checksum calculation.
3723 		 * Offset of place to put the checksum.
3724 		 */
3725 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3726 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3727 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3728 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3729 		cmd |= E1000_TXD_CMD_IP;
3730 	}
3731 
3732 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3733  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3734  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3735  		offload |= CSUM_TCP;
3736  		tucss = hdr_len;
3737  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3738 		/*
3739 		 * The 82574L can only remember the *last* context used
3740 		 * regardless of queue that it was use for.  We cannot reuse
3741 		 * contexts on this hardware platform and must generate a new
3742 		 * context every time.  82574L hardware spec, section 7.2.6,
3743 		 * second note.
3744 		 */
3745 		if (adapter->num_queues < 2) {
3746  			/*
3747  		 	* Setting up new checksum offload context for every
3748 			* frames takes a lot of processing time for hardware.
3749 			* This also reduces performance a lot for small sized
3750 			* frames so avoid it if driver can use previously
3751 			* configured checksum offload context.
3752  		 	*/
3753  			if (txr->last_hw_offload == offload) {
3754  				if (offload & CSUM_IP) {
3755  					if (txr->last_hw_ipcss == ipcss &&
3756  				    	txr->last_hw_ipcso == ipcso &&
3757  				    	txr->last_hw_tucss == tucss &&
3758  				    	txr->last_hw_tucso == tucso)
3759  						return;
3760  				} else {
3761  					if (txr->last_hw_tucss == tucss &&
3762  				    	txr->last_hw_tucso == tucso)
3763  						return;
3764  				}
3765   			}
3766  			txr->last_hw_offload = offload;
3767  			txr->last_hw_tucss = tucss;
3768  			txr->last_hw_tucso = tucso;
3769 		}
3770  		/*
3771  		 * Start offset for payload checksum calculation.
3772  		 * End offset for payload checksum calculation.
3773  		 * Offset of place to put the checksum.
3774  		 */
3775 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3776  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3777  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3778  		TXD->upper_setup.tcp_fields.tucso = tucso;
3779  		cmd |= E1000_TXD_CMD_TCP;
3780  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3781  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3782  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3783  		tucss = hdr_len;
3784  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3785 		/*
3786 		 * The 82574L can only remember the *last* context used
3787 		 * regardless of queue that it was use for.  We cannot reuse
3788 		 * contexts on this hardware platform and must generate a new
3789 		 * context every time.  82574L hardware spec, section 7.2.6,
3790 		 * second note.
3791 		 */
3792 		if (adapter->num_queues < 2) {
3793  			/*
3794  		 	* Setting up new checksum offload context for every
3795 			* frames takes a lot of processing time for hardware.
3796 			* This also reduces performance a lot for small sized
3797 			* frames so avoid it if driver can use previously
3798 			* configured checksum offload context.
3799  		 	*/
3800  			if (txr->last_hw_offload == offload) {
3801  				if (offload & CSUM_IP) {
3802  					if (txr->last_hw_ipcss == ipcss &&
3803  				    	txr->last_hw_ipcso == ipcso &&
3804  				    	txr->last_hw_tucss == tucss &&
3805  				    	txr->last_hw_tucso == tucso)
3806  						return;
3807  				} else {
3808  					if (txr->last_hw_tucss == tucss &&
3809  				    	txr->last_hw_tucso == tucso)
3810  						return;
3811  				}
3812  			}
3813  			txr->last_hw_offload = offload;
3814  			txr->last_hw_tucss = tucss;
3815  			txr->last_hw_tucso = tucso;
3816 		}
3817  		/*
3818  		 * Start offset for header checksum calculation.
3819  		 * End offset for header checksum calculation.
3820  		 * Offset of place to put the checksum.
3821  		 */
3822 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3823  		TXD->upper_setup.tcp_fields.tucss = tucss;
3824  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3825  		TXD->upper_setup.tcp_fields.tucso = tucso;
3826   	}
3827 
3828  	if (offload & CSUM_IP) {
3829  		txr->last_hw_ipcss = ipcss;
3830  		txr->last_hw_ipcso = ipcso;
3831   	}
3832 
3833 	TXD->tcp_seg_setup.data = htole32(0);
3834 	TXD->cmd_and_length =
3835 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3836 	tx_buffer = &txr->tx_buffers[cur];
3837 	tx_buffer->m_head = NULL;
3838 	tx_buffer->next_eop = -1;
3839 
3840 	if (++cur == adapter->num_tx_desc)
3841 		cur = 0;
3842 
3843 	txr->tx_avail--;
3844 	txr->next_avail_desc = cur;
3845 }
3846 
3847 
3848 /**********************************************************************
3849  *
3850  *  Setup work for hardware segmentation offload (TSO)
3851  *
3852  **********************************************************************/
3853 static void
3854 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3855     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3856 {
3857 	struct adapter			*adapter = txr->adapter;
3858 	struct e1000_context_desc	*TXD;
3859 	struct em_txbuffer		*tx_buffer;
3860 	int cur, hdr_len;
3861 
3862 	/*
3863 	 * In theory we can use the same TSO context if and only if
3864 	 * frame is the same type(IP/TCP) and the same MSS. However
3865 	 * checking whether a frame has the same IP/TCP structure is
3866 	 * hard thing so just ignore that and always restablish a
3867 	 * new TSO context.
3868 	 */
3869 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3870 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3871 		      E1000_TXD_DTYP_D |	/* Data descr type */
3872 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3873 
3874 	/* IP and/or TCP header checksum calculation and insertion. */
3875 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3876 
3877 	cur = txr->next_avail_desc;
3878 	tx_buffer = &txr->tx_buffers[cur];
3879 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3880 
3881 	/*
3882 	 * Start offset for header checksum calculation.
3883 	 * End offset for header checksum calculation.
3884 	 * Offset of place put the checksum.
3885 	 */
3886 	TXD->lower_setup.ip_fields.ipcss = ip_off;
3887 	TXD->lower_setup.ip_fields.ipcse =
3888 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3889 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3890 	/*
3891 	 * Start offset for payload checksum calculation.
3892 	 * End offset for payload checksum calculation.
3893 	 * Offset of place to put the checksum.
3894 	 */
3895 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3896 	TXD->upper_setup.tcp_fields.tucse = 0;
3897 	TXD->upper_setup.tcp_fields.tucso =
3898 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3899 	/*
3900 	 * Payload size per packet w/o any headers.
3901 	 * Length of all headers up to payload.
3902 	 */
3903 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3904 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3905 
3906 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3907 				E1000_TXD_CMD_DEXT |	/* Extended descr */
3908 				E1000_TXD_CMD_TSE |	/* TSE context */
3909 				E1000_TXD_CMD_IP |	/* Do IP csum */
3910 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3911 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3912 
3913 	tx_buffer->m_head = NULL;
3914 	tx_buffer->next_eop = -1;
3915 
3916 	if (++cur == adapter->num_tx_desc)
3917 		cur = 0;
3918 
3919 	txr->tx_avail--;
3920 	txr->next_avail_desc = cur;
3921 	txr->tx_tso = TRUE;
3922 }
3923 
3924 
3925 /**********************************************************************
3926  *
3927  *  Examine each tx_buffer in the used queue. If the hardware is done
3928  *  processing the packet then free associated resources. The
3929  *  tx_buffer is put back on the free queue.
3930  *
3931  **********************************************************************/
3932 static void
3933 em_txeof(struct tx_ring *txr)
3934 {
3935 	struct adapter	*adapter = txr->adapter;
3936         int first, last, done, processed;
3937         struct em_txbuffer *tx_buffer;
3938         struct e1000_tx_desc   *tx_desc, *eop_desc;
3939 	if_t ifp = adapter->ifp;
3940 
3941 	EM_TX_LOCK_ASSERT(txr);
3942 #ifdef DEV_NETMAP
3943 	if (netmap_tx_irq(ifp, txr->me))
3944 		return;
3945 #endif /* DEV_NETMAP */
3946 
3947 	/* No work, make sure hang detection is disabled */
3948         if (txr->tx_avail == adapter->num_tx_desc) {
3949 		txr->busy = EM_TX_IDLE;
3950                 return;
3951 	}
3952 
3953 	processed = 0;
3954         first = txr->next_to_clean;
3955         tx_desc = &txr->tx_base[first];
3956         tx_buffer = &txr->tx_buffers[first];
3957 	last = tx_buffer->next_eop;
3958         eop_desc = &txr->tx_base[last];
3959 
3960 	/*
3961 	 * What this does is get the index of the
3962 	 * first descriptor AFTER the EOP of the
3963 	 * first packet, that way we can do the
3964 	 * simple comparison on the inner while loop.
3965 	 */
3966 	if (++last == adapter->num_tx_desc)
3967  		last = 0;
3968 	done = last;
3969 
3970         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3971             BUS_DMASYNC_POSTREAD);
3972 
3973         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3974 		/* We clean the range of the packet */
3975 		while (first != done) {
3976                 	tx_desc->upper.data = 0;
3977                 	tx_desc->lower.data = 0;
3978                 	tx_desc->buffer_addr = 0;
3979                 	++txr->tx_avail;
3980 			++processed;
3981 
3982 			if (tx_buffer->m_head) {
3983 				bus_dmamap_sync(txr->txtag,
3984 				    tx_buffer->map,
3985 				    BUS_DMASYNC_POSTWRITE);
3986 				bus_dmamap_unload(txr->txtag,
3987 				    tx_buffer->map);
3988                         	m_freem(tx_buffer->m_head);
3989                         	tx_buffer->m_head = NULL;
3990                 	}
3991 			tx_buffer->next_eop = -1;
3992 
3993 	                if (++first == adapter->num_tx_desc)
3994 				first = 0;
3995 
3996 	                tx_buffer = &txr->tx_buffers[first];
3997 			tx_desc = &txr->tx_base[first];
3998 		}
3999 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
4000 		/* See if we can continue to the next packet */
4001 		last = tx_buffer->next_eop;
4002 		if (last != -1) {
4003         		eop_desc = &txr->tx_base[last];
4004 			/* Get new done point */
4005 			if (++last == adapter->num_tx_desc) last = 0;
4006 			done = last;
4007 		} else
4008 			break;
4009         }
4010         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4011             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4012 
4013         txr->next_to_clean = first;
4014 
4015 	/*
4016 	** Hang detection: we know there's work outstanding
4017 	** or the entry return would have been taken, so no
4018 	** descriptor processed here indicates a potential hang.
4019 	** The local timer will examine this and do a reset if needed.
4020 	*/
4021 	if (processed == 0) {
4022 		if (txr->busy != EM_TX_HUNG)
4023 			++txr->busy;
4024 	} else /* At least one descriptor was cleaned */
4025 		txr->busy = EM_TX_BUSY; /* note this clears HUNG */
4026 
4027         /*
4028          * If we have a minimum free, clear IFF_DRV_OACTIVE
4029          * to tell the stack that it is OK to send packets.
4030 	 * Notice that all writes of OACTIVE happen under the
4031 	 * TX lock which, with a single queue, guarantees
4032 	 * sanity.
4033          */
4034         if (txr->tx_avail >= EM_MAX_SCATTER) {
4035 		if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
4036 	}
4037 
4038 	/* Disable hang detection if all clean */
4039 	if (txr->tx_avail == adapter->num_tx_desc)
4040 		txr->busy = EM_TX_IDLE;
4041 }
4042 
4043 /*********************************************************************
4044  *
4045  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
4046  *
4047  **********************************************************************/
4048 static void
4049 em_refresh_mbufs(struct rx_ring *rxr, int limit)
4050 {
4051 	struct adapter		*adapter = rxr->adapter;
4052 	struct mbuf		*m;
4053 	bus_dma_segment_t	segs;
4054 	struct em_rxbuffer	*rxbuf;
4055 	int			i, j, error, nsegs;
4056 	bool			cleaned = FALSE;
4057 
4058 	i = j = rxr->next_to_refresh;
4059 	/*
4060 	** Get one descriptor beyond
4061 	** our work mark to control
4062 	** the loop.
4063 	*/
4064 	if (++j == adapter->num_rx_desc)
4065 		j = 0;
4066 
4067 	while (j != limit) {
4068 		rxbuf = &rxr->rx_buffers[i];
4069 		if (rxbuf->m_head == NULL) {
4070 			m = m_getjcl(M_NOWAIT, MT_DATA,
4071 			    M_PKTHDR, adapter->rx_mbuf_sz);
4072 			/*
4073 			** If we have a temporary resource shortage
4074 			** that causes a failure, just abort refresh
4075 			** for now, we will return to this point when
4076 			** reinvoked from em_rxeof.
4077 			*/
4078 			if (m == NULL)
4079 				goto update;
4080 		} else
4081 			m = rxbuf->m_head;
4082 
4083 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4084 		m->m_flags |= M_PKTHDR;
4085 		m->m_data = m->m_ext.ext_buf;
4086 
4087 		/* Use bus_dma machinery to setup the memory mapping  */
4088 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4089 		    m, &segs, &nsegs, BUS_DMA_NOWAIT);
4090 		if (error != 0) {
4091 			printf("Refresh mbufs: hdr dmamap load"
4092 			    " failure - %d\n", error);
4093 			m_free(m);
4094 			rxbuf->m_head = NULL;
4095 			goto update;
4096 		}
4097 		rxbuf->m_head = m;
4098 		rxbuf->paddr = segs.ds_addr;
4099 		bus_dmamap_sync(rxr->rxtag,
4100 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4101 		em_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4102 		cleaned = TRUE;
4103 
4104 		i = j; /* Next is precalulated for us */
4105 		rxr->next_to_refresh = i;
4106 		/* Calculate next controlling index */
4107 		if (++j == adapter->num_rx_desc)
4108 			j = 0;
4109 	}
4110 update:
4111 	/*
4112 	** Update the tail pointer only if,
4113 	** and as far as we have refreshed.
4114 	*/
4115 	if (cleaned)
4116 		E1000_WRITE_REG(&adapter->hw,
4117 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4118 
4119 	return;
4120 }
4121 
4122 
4123 /*********************************************************************
4124  *
4125  *  Allocate memory for rx_buffer structures. Since we use one
4126  *  rx_buffer per received packet, the maximum number of rx_buffer's
4127  *  that we'll need is equal to the number of receive descriptors
4128  *  that we've allocated.
4129  *
4130  **********************************************************************/
4131 static int
4132 em_allocate_receive_buffers(struct rx_ring *rxr)
4133 {
4134 	struct adapter		*adapter = rxr->adapter;
4135 	device_t		dev = adapter->dev;
4136 	struct em_rxbuffer	*rxbuf;
4137 	int			error;
4138 
4139 	rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) *
4140 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4141 	if (rxr->rx_buffers == NULL) {
4142 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4143 		return (ENOMEM);
4144 	}
4145 
4146 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4147 				1, 0,			/* alignment, bounds */
4148 				BUS_SPACE_MAXADDR,	/* lowaddr */
4149 				BUS_SPACE_MAXADDR,	/* highaddr */
4150 				NULL, NULL,		/* filter, filterarg */
4151 				MJUM9BYTES,		/* maxsize */
4152 				1,			/* nsegments */
4153 				MJUM9BYTES,		/* maxsegsize */
4154 				0,			/* flags */
4155 				NULL,			/* lockfunc */
4156 				NULL,			/* lockarg */
4157 				&rxr->rxtag);
4158 	if (error) {
4159 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4160 		    __func__, error);
4161 		goto fail;
4162 	}
4163 
4164 	rxbuf = rxr->rx_buffers;
4165 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4166 		rxbuf = &rxr->rx_buffers[i];
4167 		error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4168 		if (error) {
4169 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4170 			    __func__, error);
4171 			goto fail;
4172 		}
4173 	}
4174 
4175 	return (0);
4176 
4177 fail:
4178 	em_free_receive_structures(adapter);
4179 	return (error);
4180 }
4181 
4182 
4183 /*********************************************************************
4184  *
4185  *  Initialize a receive ring and its buffers.
4186  *
4187  **********************************************************************/
4188 static int
4189 em_setup_receive_ring(struct rx_ring *rxr)
4190 {
4191 	struct	adapter 	*adapter = rxr->adapter;
4192 	struct em_rxbuffer	*rxbuf;
4193 	bus_dma_segment_t	seg[1];
4194 	int			rsize, nsegs, error = 0;
4195 #ifdef DEV_NETMAP
4196 	struct netmap_slot *slot;
4197 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
4198 #endif
4199 
4200 
4201 	/* Clear the ring contents */
4202 	EM_RX_LOCK(rxr);
4203 	rsize = roundup2(adapter->num_rx_desc *
4204 	    sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
4205 	bzero((void *)rxr->rx_base, rsize);
4206 #ifdef DEV_NETMAP
4207 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4208 #endif
4209 
4210 	/*
4211 	** Free current RX buffer structs and their mbufs
4212 	*/
4213 	for (int i = 0; i < adapter->num_rx_desc; i++) {
4214 		rxbuf = &rxr->rx_buffers[i];
4215 		if (rxbuf->m_head != NULL) {
4216 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4217 			    BUS_DMASYNC_POSTREAD);
4218 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4219 			m_freem(rxbuf->m_head);
4220 			rxbuf->m_head = NULL; /* mark as freed */
4221 		}
4222 	}
4223 
4224 	/* Now replenish the mbufs */
4225         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4226 		rxbuf = &rxr->rx_buffers[j];
4227 #ifdef DEV_NETMAP
4228 		if (slot) {
4229 			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4230 			uint64_t paddr;
4231 			void *addr;
4232 
4233 			addr = PNMB(na, slot + si, &paddr);
4234 			netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4235 			em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4236 			continue;
4237 		}
4238 #endif /* DEV_NETMAP */
4239 		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4240 		    M_PKTHDR, adapter->rx_mbuf_sz);
4241 		if (rxbuf->m_head == NULL) {
4242 			error = ENOBUFS;
4243 			goto fail;
4244 		}
4245 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4246 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4247 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4248 
4249 		/* Get the memory mapping */
4250 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4251 		    rxbuf->map, rxbuf->m_head, seg,
4252 		    &nsegs, BUS_DMA_NOWAIT);
4253 		if (error != 0) {
4254 			m_freem(rxbuf->m_head);
4255 			rxbuf->m_head = NULL;
4256 			goto fail;
4257 		}
4258 		bus_dmamap_sync(rxr->rxtag,
4259 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4260 
4261 		rxbuf->paddr = seg[0].ds_addr;
4262 		em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4263 	}
4264 	rxr->next_to_check = 0;
4265 	rxr->next_to_refresh = 0;
4266 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4267 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4268 
4269 fail:
4270 	EM_RX_UNLOCK(rxr);
4271 	return (error);
4272 }
4273 
4274 /*********************************************************************
4275  *
4276  *  Initialize all receive rings.
4277  *
4278  **********************************************************************/
4279 static int
4280 em_setup_receive_structures(struct adapter *adapter)
4281 {
4282 	struct rx_ring *rxr = adapter->rx_rings;
4283 	int q;
4284 
4285 	for (q = 0; q < adapter->num_queues; q++, rxr++)
4286 		if (em_setup_receive_ring(rxr))
4287 			goto fail;
4288 
4289 	return (0);
4290 fail:
4291 	/*
4292 	 * Free RX buffers allocated so far, we will only handle
4293 	 * the rings that completed, the failing case will have
4294 	 * cleaned up for itself. 'q' failed, so its the terminus.
4295 	 */
4296 	for (int i = 0; i < q; ++i) {
4297 		rxr = &adapter->rx_rings[i];
4298 		for (int n = 0; n < adapter->num_rx_desc; n++) {
4299 			struct em_rxbuffer *rxbuf;
4300 			rxbuf = &rxr->rx_buffers[n];
4301 			if (rxbuf->m_head != NULL) {
4302 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4303 			  	  BUS_DMASYNC_POSTREAD);
4304 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4305 				m_freem(rxbuf->m_head);
4306 				rxbuf->m_head = NULL;
4307 			}
4308 		}
4309 		rxr->next_to_check = 0;
4310 		rxr->next_to_refresh = 0;
4311 	}
4312 
4313 	return (ENOBUFS);
4314 }
4315 
4316 /*********************************************************************
4317  *
4318  *  Free all receive rings.
4319  *
4320  **********************************************************************/
4321 static void
4322 em_free_receive_structures(struct adapter *adapter)
4323 {
4324 	struct rx_ring *rxr = adapter->rx_rings;
4325 
4326 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4327 		em_free_receive_buffers(rxr);
4328 		/* Free the ring memory as well */
4329 		em_dma_free(adapter, &rxr->rxdma);
4330 		EM_RX_LOCK_DESTROY(rxr);
4331 	}
4332 
4333 	free(adapter->rx_rings, M_DEVBUF);
4334 }
4335 
4336 
4337 /*********************************************************************
4338  *
4339  *  Free receive ring data structures
4340  *
4341  **********************************************************************/
4342 static void
4343 em_free_receive_buffers(struct rx_ring *rxr)
4344 {
4345 	struct adapter		*adapter = rxr->adapter;
4346 	struct em_rxbuffer	*rxbuf = NULL;
4347 
4348 	INIT_DEBUGOUT("free_receive_buffers: begin");
4349 
4350 	if (rxr->rx_buffers != NULL) {
4351 		for (int i = 0; i < adapter->num_rx_desc; i++) {
4352 			rxbuf = &rxr->rx_buffers[i];
4353 			if (rxbuf->map != NULL) {
4354 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4355 				    BUS_DMASYNC_POSTREAD);
4356 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4357 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4358 			}
4359 			if (rxbuf->m_head != NULL) {
4360 				m_freem(rxbuf->m_head);
4361 				rxbuf->m_head = NULL;
4362 			}
4363 		}
4364 		free(rxr->rx_buffers, M_DEVBUF);
4365 		rxr->rx_buffers = NULL;
4366 		rxr->next_to_check = 0;
4367 		rxr->next_to_refresh = 0;
4368 	}
4369 
4370 	if (rxr->rxtag != NULL) {
4371 		bus_dma_tag_destroy(rxr->rxtag);
4372 		rxr->rxtag = NULL;
4373 	}
4374 
4375 	return;
4376 }
4377 
4378 
4379 /*********************************************************************
4380  *
4381  *  Enable receive unit.
4382  *
4383  **********************************************************************/
4384 
4385 static void
4386 em_initialize_receive_unit(struct adapter *adapter)
4387 {
4388 	struct rx_ring *rxr = adapter->rx_rings;
4389 	if_t ifp = adapter->ifp;
4390 	struct e1000_hw	*hw = &adapter->hw;
4391 	u32	rctl, rxcsum, rfctl;
4392 
4393 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4394 
4395 	/*
4396 	 * Make sure receives are disabled while setting
4397 	 * up the descriptor ring
4398 	 */
4399 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4400 	/* Do not disable if ever enabled on this hardware */
4401 	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4402 		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4403 
4404 	/* Setup the Receive Control Register */
4405 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4406 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4407 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4408 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4409 
4410 	/* Do not store bad packets */
4411 	rctl &= ~E1000_RCTL_SBP;
4412 
4413 	/* Enable Long Packet receive */
4414 	if (if_getmtu(ifp) > ETHERMTU)
4415 		rctl |= E1000_RCTL_LPE;
4416 	else
4417 		rctl &= ~E1000_RCTL_LPE;
4418 
4419         /* Strip the CRC */
4420         if (!em_disable_crc_stripping)
4421 		rctl |= E1000_RCTL_SECRC;
4422 
4423 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4424 	    adapter->rx_abs_int_delay.value);
4425 
4426 	E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4427 	    adapter->rx_int_delay.value);
4428 	/*
4429 	 * Set the interrupt throttling rate. Value is calculated
4430 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4431 	 */
4432 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4433 
4434 	/* Use extended rx descriptor formats */
4435 	rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4436 	rfctl |= E1000_RFCTL_EXTEN;
4437 	/*
4438 	** When using MSIX interrupts we need to throttle
4439 	** using the EITR register (82574 only)
4440 	*/
4441 	if (hw->mac.type == e1000_82574) {
4442 		for (int i = 0; i < 4; i++)
4443 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4444 			    DEFAULT_ITR);
4445 		/* Disable accelerated acknowledge */
4446 		rfctl |= E1000_RFCTL_ACK_DIS;
4447 	}
4448 	E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4449 
4450 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4451 	if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
4452 #ifdef EM_MULTIQUEUE
4453 		rxcsum |= E1000_RXCSUM_TUOFL |
4454 			  E1000_RXCSUM_IPOFL |
4455 			  E1000_RXCSUM_PCSD;
4456 #else
4457 		rxcsum |= E1000_RXCSUM_TUOFL;
4458 #endif
4459 	} else
4460 		rxcsum &= ~E1000_RXCSUM_TUOFL;
4461 
4462 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4463 
4464 #ifdef EM_MULTIQUEUE
4465 #define RSSKEYLEN 10
4466 	if (adapter->num_queues > 1) {
4467 		uint8_t  rss_key[4 * RSSKEYLEN];
4468 		uint32_t reta = 0;
4469 		int i;
4470 
4471 		/*
4472 		* Configure RSS key
4473 		*/
4474 		arc4rand(rss_key, sizeof(rss_key), 0);
4475 		for (i = 0; i < RSSKEYLEN; ++i) {
4476 			uint32_t rssrk = 0;
4477 
4478 			rssrk = EM_RSSRK_VAL(rss_key, i);
4479 			E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk);
4480 		}
4481 
4482 		/*
4483 		* Configure RSS redirect table in following fashion:
4484 		* (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4485 		*/
4486 		for (i = 0; i < sizeof(reta); ++i) {
4487 			uint32_t q;
4488 
4489 			q = (i % adapter->num_queues) << 7;
4490 			reta |= q << (8 * i);
4491 		}
4492 
4493 		for (i = 0; i < 32; ++i) {
4494 			E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4495 		}
4496 
4497 		E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q |
4498 				E1000_MRQC_RSS_FIELD_IPV4_TCP |
4499 				E1000_MRQC_RSS_FIELD_IPV4 |
4500 				E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4501 				E1000_MRQC_RSS_FIELD_IPV6_EX |
4502 				E1000_MRQC_RSS_FIELD_IPV6);
4503 	}
4504 #endif
4505 	/*
4506 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4507 	** long latencies are observed, like Lenovo X60. This
4508 	** change eliminates the problem, but since having positive
4509 	** values in RDTR is a known source of problems on other
4510 	** platforms another solution is being sought.
4511 	*/
4512 	if (hw->mac.type == e1000_82573)
4513 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4514 
4515 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4516 		/* Setup the Base and Length of the Rx Descriptor Ring */
4517 		u64 bus_addr = rxr->rxdma.dma_paddr;
4518 		u32 rdt = adapter->num_rx_desc - 1; /* default */
4519 
4520 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4521 		    adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended));
4522 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4523 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4524 		/* Setup the Head and Tail Descriptor Pointers */
4525 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4526 #ifdef DEV_NETMAP
4527 		/*
4528 		 * an init() while a netmap client is active must
4529 		 * preserve the rx buffers passed to userspace.
4530 		 */
4531 		if (if_getcapenable(ifp) & IFCAP_NETMAP) {
4532 			struct netmap_adapter *na = netmap_getna(adapter->ifp);
4533 			rdt -= nm_kr_rxspace(&na->rx_rings[i]);
4534 		}
4535 #endif /* DEV_NETMAP */
4536 		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4537 	}
4538 
4539 	/*
4540 	 * Set PTHRESH for improved jumbo performance
4541 	 * According to 10.2.5.11 of Intel 82574 Datasheet,
4542 	 * RXDCTL(1) is written whenever RXDCTL(0) is written.
4543 	 * Only write to RXDCTL(1) if there is a need for different
4544 	 * settings.
4545 	 */
4546 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4547 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4548 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4549 	    (if_getmtu(ifp) > ETHERMTU)) {
4550 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4551 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4552 	} else if (adapter->hw.mac.type == e1000_82574) {
4553 		for (int i = 0; i < adapter->num_queues; i++) {
4554 			u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4555 
4556 			rxdctl |= 0x20; /* PTHRESH */
4557 			rxdctl |= 4 << 8; /* HTHRESH */
4558 			rxdctl |= 4 << 16;/* WTHRESH */
4559 			rxdctl |= 1 << 24; /* Switch to granularity */
4560 			E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4561 		}
4562 	}
4563 
4564 	if (adapter->hw.mac.type >= e1000_pch2lan) {
4565 		if (if_getmtu(ifp) > ETHERMTU)
4566 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4567 		else
4568 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4569 	}
4570 
4571         /* Make sure VLAN Filters are off */
4572         rctl &= ~E1000_RCTL_VFE;
4573 
4574 	if (adapter->rx_mbuf_sz == MCLBYTES)
4575 		rctl |= E1000_RCTL_SZ_2048;
4576 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4577 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4578 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4579 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4580 
4581 	/* ensure we clear use DTYPE of 00 here */
4582 	rctl &= ~0x00000C00;
4583 	/* Write out the settings */
4584 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4585 
4586 	return;
4587 }
4588 
4589 
4590 /*********************************************************************
4591  *
4592  *  This routine executes in interrupt context. It replenishes
4593  *  the mbufs in the descriptor and sends data which has been
4594  *  dma'ed into host memory to upper layer.
4595  *
4596  *  We loop at most count times if count is > 0, or until done if
4597  *  count < 0.
4598  *
4599  *  For polling we also now return the number of cleaned packets
4600  *********************************************************************/
4601 static bool
4602 em_rxeof(struct rx_ring *rxr, int count, int *done)
4603 {
4604 	struct adapter		*adapter = rxr->adapter;
4605 	if_t ifp = adapter->ifp;
4606 	struct mbuf		*mp, *sendmp;
4607 	u32			status = 0;
4608 	u16 			len;
4609 	int			i, processed, rxdone = 0;
4610 	bool			eop;
4611 	union e1000_rx_desc_extended	*cur;
4612 
4613 	EM_RX_LOCK(rxr);
4614 
4615 	/* Sync the ring */
4616 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4617 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4618 
4619 
4620 #ifdef DEV_NETMAP
4621 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4622 		EM_RX_UNLOCK(rxr);
4623 		return (FALSE);
4624 	}
4625 #endif /* DEV_NETMAP */
4626 
4627 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4628 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
4629 			break;
4630 
4631 		cur = &rxr->rx_base[i];
4632 		status = le32toh(cur->wb.upper.status_error);
4633 		mp = sendmp = NULL;
4634 
4635 		if ((status & E1000_RXD_STAT_DD) == 0)
4636 			break;
4637 
4638 		len = le16toh(cur->wb.upper.length);
4639 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4640 
4641 		if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
4642 		    (rxr->discard == TRUE)) {
4643 			adapter->dropped_pkts++;
4644 			++rxr->rx_discarded;
4645 			if (!eop) /* Catch subsequent segs */
4646 				rxr->discard = TRUE;
4647 			else
4648 				rxr->discard = FALSE;
4649 			em_rx_discard(rxr, i);
4650 			goto next_desc;
4651 		}
4652 		bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4653 
4654 		/* Assign correct length to the current fragment */
4655 		mp = rxr->rx_buffers[i].m_head;
4656 		mp->m_len = len;
4657 
4658 		/* Trigger for refresh */
4659 		rxr->rx_buffers[i].m_head = NULL;
4660 
4661 		/* First segment? */
4662 		if (rxr->fmp == NULL) {
4663 			mp->m_pkthdr.len = len;
4664 			rxr->fmp = rxr->lmp = mp;
4665 		} else {
4666 			/* Chain mbuf's together */
4667 			mp->m_flags &= ~M_PKTHDR;
4668 			rxr->lmp->m_next = mp;
4669 			rxr->lmp = mp;
4670 			rxr->fmp->m_pkthdr.len += len;
4671 		}
4672 
4673 		if (eop) {
4674 			--count;
4675 			sendmp = rxr->fmp;
4676 			if_setrcvif(sendmp, ifp);
4677 			if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
4678 			em_receive_checksum(status, sendmp);
4679 #ifndef __NO_STRICT_ALIGNMENT
4680 			if (adapter->hw.mac.max_frame_size >
4681 			    (MCLBYTES - ETHER_ALIGN) &&
4682 			    em_fixup_rx(rxr) != 0)
4683 				goto skip;
4684 #endif
4685 			if (status & E1000_RXD_STAT_VP) {
4686 				if_setvtag(sendmp,
4687 				    le16toh(cur->wb.upper.vlan));
4688 				sendmp->m_flags |= M_VLANTAG;
4689 			}
4690 #ifndef __NO_STRICT_ALIGNMENT
4691 skip:
4692 #endif
4693 			rxr->fmp = rxr->lmp = NULL;
4694 		}
4695 next_desc:
4696 		/* Sync the ring */
4697 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4698 	    		BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4699 
4700 		/* Zero out the receive descriptors status. */
4701 		cur->wb.upper.status_error &= htole32(~0xFF);
4702 		++rxdone;	/* cumulative for POLL */
4703 		++processed;
4704 
4705 		/* Advance our pointers to the next descriptor. */
4706 		if (++i == adapter->num_rx_desc)
4707 			i = 0;
4708 
4709 		/* Send to the stack */
4710 		if (sendmp != NULL) {
4711 			rxr->next_to_check = i;
4712 			EM_RX_UNLOCK(rxr);
4713 			if_input(ifp, sendmp);
4714 			EM_RX_LOCK(rxr);
4715 			i = rxr->next_to_check;
4716 		}
4717 
4718 		/* Only refresh mbufs every 8 descriptors */
4719 		if (processed == 8) {
4720 			em_refresh_mbufs(rxr, i);
4721 			processed = 0;
4722 		}
4723 	}
4724 
4725 	/* Catch any remaining refresh work */
4726 	if (e1000_rx_unrefreshed(rxr))
4727 		em_refresh_mbufs(rxr, i);
4728 
4729 	rxr->next_to_check = i;
4730 	if (done != NULL)
4731 		*done = rxdone;
4732 	EM_RX_UNLOCK(rxr);
4733 
4734 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4735 }
4736 
4737 static __inline void
4738 em_rx_discard(struct rx_ring *rxr, int i)
4739 {
4740 	struct em_rxbuffer	*rbuf;
4741 
4742 	rbuf = &rxr->rx_buffers[i];
4743 	bus_dmamap_unload(rxr->rxtag, rbuf->map);
4744 
4745 	/* Free any previous pieces */
4746 	if (rxr->fmp != NULL) {
4747 		rxr->fmp->m_flags |= M_PKTHDR;
4748 		m_freem(rxr->fmp);
4749 		rxr->fmp = NULL;
4750 		rxr->lmp = NULL;
4751 	}
4752 	/*
4753 	** Free buffer and allow em_refresh_mbufs()
4754 	** to clean up and recharge buffer.
4755 	*/
4756 	if (rbuf->m_head) {
4757 		m_free(rbuf->m_head);
4758 		rbuf->m_head = NULL;
4759 	}
4760 	return;
4761 }
4762 
4763 #ifndef __NO_STRICT_ALIGNMENT
4764 /*
4765  * When jumbo frames are enabled we should realign entire payload on
4766  * architecures with strict alignment. This is serious design mistake of 8254x
4767  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4768  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4769  * payload. On architecures without strict alignment restrictions 8254x still
4770  * performs unaligned memory access which would reduce the performance too.
4771  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4772  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4773  * existing mbuf chain.
4774  *
4775  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4776  * not used at all on architectures with strict alignment.
4777  */
4778 static int
4779 em_fixup_rx(struct rx_ring *rxr)
4780 {
4781 	struct adapter *adapter = rxr->adapter;
4782 	struct mbuf *m, *n;
4783 	int error;
4784 
4785 	error = 0;
4786 	m = rxr->fmp;
4787 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4788 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4789 		m->m_data += ETHER_HDR_LEN;
4790 	} else {
4791 		MGETHDR(n, M_NOWAIT, MT_DATA);
4792 		if (n != NULL) {
4793 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4794 			m->m_data += ETHER_HDR_LEN;
4795 			m->m_len -= ETHER_HDR_LEN;
4796 			n->m_len = ETHER_HDR_LEN;
4797 			M_MOVE_PKTHDR(n, m);
4798 			n->m_next = m;
4799 			rxr->fmp = n;
4800 		} else {
4801 			adapter->dropped_pkts++;
4802 			m_freem(rxr->fmp);
4803 			rxr->fmp = NULL;
4804 			error = ENOMEM;
4805 		}
4806 	}
4807 
4808 	return (error);
4809 }
4810 #endif
4811 
4812 static void
4813 em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf)
4814 {
4815 	rxd->read.buffer_addr = htole64(rxbuf->paddr);
4816 	/* DD bits must be cleared */
4817 	rxd->wb.upper.status_error= 0;
4818 }
4819 
4820 /*********************************************************************
4821  *
4822  *  Verify that the hardware indicated that the checksum is valid.
4823  *  Inform the stack about the status of checksum so that stack
4824  *  doesn't spend time verifying the checksum.
4825  *
4826  *********************************************************************/
4827 static void
4828 em_receive_checksum(uint32_t status, struct mbuf *mp)
4829 {
4830 	mp->m_pkthdr.csum_flags = 0;
4831 
4832 	/* Ignore Checksum bit is set */
4833 	if (status & E1000_RXD_STAT_IXSM)
4834 		return;
4835 
4836 	/* If the IP checksum exists and there is no IP Checksum error */
4837 	if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
4838 		E1000_RXD_STAT_IPCS) {
4839 		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4840 	}
4841 
4842 	/* TCP or UDP checksum */
4843 	if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
4844 	    E1000_RXD_STAT_TCPCS) {
4845 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4846 		mp->m_pkthdr.csum_data = htons(0xffff);
4847 	}
4848 	if (status & E1000_RXD_STAT_UDPCS) {
4849 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4850 		mp->m_pkthdr.csum_data = htons(0xffff);
4851 	}
4852 }
4853 
4854 /*
4855  * This routine is run via an vlan
4856  * config EVENT
4857  */
4858 static void
4859 em_register_vlan(void *arg, if_t ifp, u16 vtag)
4860 {
4861 	struct adapter	*adapter = if_getsoftc(ifp);
4862 	u32		index, bit;
4863 
4864 	if ((void*)adapter !=  arg)   /* Not our event */
4865 		return;
4866 
4867 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4868                 return;
4869 
4870 	EM_CORE_LOCK(adapter);
4871 	index = (vtag >> 5) & 0x7F;
4872 	bit = vtag & 0x1F;
4873 	adapter->shadow_vfta[index] |= (1 << bit);
4874 	++adapter->num_vlans;
4875 	/* Re-init to load the changes */
4876 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
4877 		em_init_locked(adapter);
4878 	EM_CORE_UNLOCK(adapter);
4879 }
4880 
4881 /*
4882  * This routine is run via an vlan
4883  * unconfig EVENT
4884  */
4885 static void
4886 em_unregister_vlan(void *arg, if_t ifp, u16 vtag)
4887 {
4888 	struct adapter	*adapter = if_getsoftc(ifp);
4889 	u32		index, bit;
4890 
4891 	if (adapter != arg)
4892 		return;
4893 
4894 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4895                 return;
4896 
4897 	EM_CORE_LOCK(adapter);
4898 	index = (vtag >> 5) & 0x7F;
4899 	bit = vtag & 0x1F;
4900 	adapter->shadow_vfta[index] &= ~(1 << bit);
4901 	--adapter->num_vlans;
4902 	/* Re-init to load the changes */
4903 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
4904 		em_init_locked(adapter);
4905 	EM_CORE_UNLOCK(adapter);
4906 }
4907 
4908 static void
4909 em_setup_vlan_hw_support(struct adapter *adapter)
4910 {
4911 	struct e1000_hw *hw = &adapter->hw;
4912 	u32             reg;
4913 
4914 	/*
4915 	** We get here thru init_locked, meaning
4916 	** a soft reset, this has already cleared
4917 	** the VFTA and other state, so if there
4918 	** have been no vlan's registered do nothing.
4919 	*/
4920 	if (adapter->num_vlans == 0)
4921                 return;
4922 
4923 	/*
4924 	** A soft reset zero's out the VFTA, so
4925 	** we need to repopulate it now.
4926 	*/
4927 	for (int i = 0; i < EM_VFTA_SIZE; i++)
4928                 if (adapter->shadow_vfta[i] != 0)
4929 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4930                             i, adapter->shadow_vfta[i]);
4931 
4932 	reg = E1000_READ_REG(hw, E1000_CTRL);
4933 	reg |= E1000_CTRL_VME;
4934 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4935 
4936 	/* Enable the Filter Table */
4937 	reg = E1000_READ_REG(hw, E1000_RCTL);
4938 	reg &= ~E1000_RCTL_CFIEN;
4939 	reg |= E1000_RCTL_VFE;
4940 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4941 }
4942 
4943 static void
4944 em_enable_intr(struct adapter *adapter)
4945 {
4946 	struct e1000_hw *hw = &adapter->hw;
4947 	u32 ims_mask = IMS_ENABLE_MASK;
4948 
4949 	if (hw->mac.type == e1000_82574) {
4950 		E1000_WRITE_REG(hw, EM_EIAC, adapter->ims);
4951 		ims_mask |= adapter->ims;
4952 	}
4953 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4954 }
4955 
4956 static void
4957 em_disable_intr(struct adapter *adapter)
4958 {
4959 	struct e1000_hw *hw = &adapter->hw;
4960 
4961 	if (hw->mac.type == e1000_82574)
4962 		E1000_WRITE_REG(hw, EM_EIAC, 0);
4963 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4964 }
4965 
4966 /*
4967  * Bit of a misnomer, what this really means is
4968  * to enable OS management of the system... aka
4969  * to disable special hardware management features
4970  */
4971 static void
4972 em_init_manageability(struct adapter *adapter)
4973 {
4974 	/* A shared code workaround */
4975 #define E1000_82542_MANC2H E1000_MANC2H
4976 	if (adapter->has_manage) {
4977 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4978 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4979 
4980 		/* disable hardware interception of ARP */
4981 		manc &= ~(E1000_MANC_ARP_EN);
4982 
4983                 /* enable receiving management packets to the host */
4984 		manc |= E1000_MANC_EN_MNG2HOST;
4985 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4986 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4987 		manc2h |= E1000_MNG2HOST_PORT_623;
4988 		manc2h |= E1000_MNG2HOST_PORT_664;
4989 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4990 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4991 	}
4992 }
4993 
4994 /*
4995  * Give control back to hardware management
4996  * controller if there is one.
4997  */
4998 static void
4999 em_release_manageability(struct adapter *adapter)
5000 {
5001 	if (adapter->has_manage) {
5002 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5003 
5004 		/* re-enable hardware interception of ARP */
5005 		manc |= E1000_MANC_ARP_EN;
5006 		manc &= ~E1000_MANC_EN_MNG2HOST;
5007 
5008 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5009 	}
5010 }
5011 
5012 /*
5013  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
5014  * For ASF and Pass Through versions of f/w this means
5015  * that the driver is loaded. For AMT version type f/w
5016  * this means that the network i/f is open.
5017  */
5018 static void
5019 em_get_hw_control(struct adapter *adapter)
5020 {
5021 	u32 ctrl_ext, swsm;
5022 
5023 	if (adapter->hw.mac.type == e1000_82573) {
5024 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5025 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5026 		    swsm | E1000_SWSM_DRV_LOAD);
5027 		return;
5028 	}
5029 	/* else */
5030 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5031 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5032 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5033 	return;
5034 }
5035 
5036 /*
5037  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
5038  * For ASF and Pass Through versions of f/w this means that
5039  * the driver is no longer loaded. For AMT versions of the
5040  * f/w this means that the network i/f is closed.
5041  */
5042 static void
5043 em_release_hw_control(struct adapter *adapter)
5044 {
5045 	u32 ctrl_ext, swsm;
5046 
5047 	if (!adapter->has_manage)
5048 		return;
5049 
5050 	if (adapter->hw.mac.type == e1000_82573) {
5051 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5052 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5053 		    swsm & ~E1000_SWSM_DRV_LOAD);
5054 		return;
5055 	}
5056 	/* else */
5057 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5058 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5059 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5060 	return;
5061 }
5062 
5063 static int
5064 em_is_valid_ether_addr(u8 *addr)
5065 {
5066 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5067 
5068 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5069 		return (FALSE);
5070 	}
5071 
5072 	return (TRUE);
5073 }
5074 
5075 /*
5076 ** Parse the interface capabilities with regard
5077 ** to both system management and wake-on-lan for
5078 ** later use.
5079 */
5080 static void
5081 em_get_wakeup(device_t dev)
5082 {
5083 	struct adapter	*adapter = device_get_softc(dev);
5084 	u16		eeprom_data = 0, device_id, apme_mask;
5085 
5086 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5087 	apme_mask = EM_EEPROM_APME;
5088 
5089 	switch (adapter->hw.mac.type) {
5090 	case e1000_82573:
5091 	case e1000_82583:
5092 		adapter->has_amt = TRUE;
5093 		/* Falls thru */
5094 	case e1000_82571:
5095 	case e1000_82572:
5096 	case e1000_80003es2lan:
5097 		if (adapter->hw.bus.func == 1) {
5098 			e1000_read_nvm(&adapter->hw,
5099 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5100 			break;
5101 		} else
5102 			e1000_read_nvm(&adapter->hw,
5103 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5104 		break;
5105 	case e1000_ich8lan:
5106 	case e1000_ich9lan:
5107 	case e1000_ich10lan:
5108 	case e1000_pchlan:
5109 	case e1000_pch2lan:
5110 		apme_mask = E1000_WUC_APME;
5111 		adapter->has_amt = TRUE;
5112 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5113 		break;
5114 	default:
5115 		e1000_read_nvm(&adapter->hw,
5116 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5117 		break;
5118 	}
5119 	if (eeprom_data & apme_mask)
5120 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5121 	/*
5122          * We have the eeprom settings, now apply the special cases
5123          * where the eeprom may be wrong or the board won't support
5124          * wake on lan on a particular port
5125 	 */
5126 	device_id = pci_get_device(dev);
5127         switch (device_id) {
5128 	case E1000_DEV_ID_82571EB_FIBER:
5129 		/* Wake events only supported on port A for dual fiber
5130 		 * regardless of eeprom setting */
5131 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5132 		    E1000_STATUS_FUNC_1)
5133 			adapter->wol = 0;
5134 		break;
5135 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
5136 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
5137 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5138                 /* if quad port adapter, disable WoL on all but port A */
5139 		if (global_quad_port_a != 0)
5140 			adapter->wol = 0;
5141 		/* Reset for multiple quad port adapters */
5142 		if (++global_quad_port_a == 4)
5143 			global_quad_port_a = 0;
5144                 break;
5145 	}
5146 	return;
5147 }
5148 
5149 
5150 /*
5151  * Enable PCI Wake On Lan capability
5152  */
5153 static void
5154 em_enable_wakeup(device_t dev)
5155 {
5156 	struct adapter	*adapter = device_get_softc(dev);
5157 	if_t ifp = adapter->ifp;
5158 	u32		pmc, ctrl, ctrl_ext, rctl;
5159 	u16     	status;
5160 
5161 	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
5162 		return;
5163 
5164 	/* Advertise the wakeup capability */
5165 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5166 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5167 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5168 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5169 
5170 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
5171 	    (adapter->hw.mac.type == e1000_pchlan) ||
5172 	    (adapter->hw.mac.type == e1000_ich9lan) ||
5173 	    (adapter->hw.mac.type == e1000_ich10lan))
5174 		e1000_suspend_workarounds_ich8lan(&adapter->hw);
5175 
5176 	/* Keep the laser running on Fiber adapters */
5177 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5178 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5179 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5180 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5181 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5182 	}
5183 
5184 	/*
5185 	** Determine type of Wakeup: note that wol
5186 	** is set with all bits on by default.
5187 	*/
5188 	if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
5189 		adapter->wol &= ~E1000_WUFC_MAG;
5190 
5191 	if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
5192 		adapter->wol &= ~E1000_WUFC_MC;
5193 	else {
5194 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5195 		rctl |= E1000_RCTL_MPE;
5196 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5197 	}
5198 
5199 	if ((adapter->hw.mac.type == e1000_pchlan) ||
5200 	    (adapter->hw.mac.type == e1000_pch2lan)) {
5201 		if (em_enable_phy_wakeup(adapter))
5202 			return;
5203 	} else {
5204 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5205 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5206 	}
5207 
5208 	if (adapter->hw.phy.type == e1000_phy_igp_3)
5209 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5210 
5211         /* Request PME */
5212         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5213 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5214 	if (if_getcapenable(ifp) & IFCAP_WOL)
5215 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5216         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5217 
5218 	return;
5219 }
5220 
5221 /*
5222 ** WOL in the newer chipset interfaces (pchlan)
5223 ** require thing to be copied into the phy
5224 */
5225 static int
5226 em_enable_phy_wakeup(struct adapter *adapter)
5227 {
5228 	struct e1000_hw *hw = &adapter->hw;
5229 	u32 mreg, ret = 0;
5230 	u16 preg;
5231 
5232 	/* copy MAC RARs to PHY RARs */
5233 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5234 
5235 	/* copy MAC MTA to PHY MTA */
5236 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5237 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5238 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5239 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5240 		    (u16)((mreg >> 16) & 0xFFFF));
5241 	}
5242 
5243 	/* configure PHY Rx Control register */
5244 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5245 	mreg = E1000_READ_REG(hw, E1000_RCTL);
5246 	if (mreg & E1000_RCTL_UPE)
5247 		preg |= BM_RCTL_UPE;
5248 	if (mreg & E1000_RCTL_MPE)
5249 		preg |= BM_RCTL_MPE;
5250 	preg &= ~(BM_RCTL_MO_MASK);
5251 	if (mreg & E1000_RCTL_MO_3)
5252 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5253 				<< BM_RCTL_MO_SHIFT);
5254 	if (mreg & E1000_RCTL_BAM)
5255 		preg |= BM_RCTL_BAM;
5256 	if (mreg & E1000_RCTL_PMCF)
5257 		preg |= BM_RCTL_PMCF;
5258 	mreg = E1000_READ_REG(hw, E1000_CTRL);
5259 	if (mreg & E1000_CTRL_RFCE)
5260 		preg |= BM_RCTL_RFCE;
5261 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5262 
5263 	/* enable PHY wakeup in MAC register */
5264 	E1000_WRITE_REG(hw, E1000_WUC,
5265 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5266 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5267 
5268 	/* configure and enable PHY wakeup in PHY registers */
5269 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5270 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5271 
5272 	/* activate PHY wakeup */
5273 	ret = hw->phy.ops.acquire(hw);
5274 	if (ret) {
5275 		printf("Could not acquire PHY\n");
5276 		return ret;
5277 	}
5278 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5279 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5280 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5281 	if (ret) {
5282 		printf("Could not read PHY page 769\n");
5283 		goto out;
5284 	}
5285 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5286 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5287 	if (ret)
5288 		printf("Could not set PHY Host Wakeup bit\n");
5289 out:
5290 	hw->phy.ops.release(hw);
5291 
5292 	return ret;
5293 }
5294 
5295 static void
5296 em_led_func(void *arg, int onoff)
5297 {
5298 	struct adapter	*adapter = arg;
5299 
5300 	EM_CORE_LOCK(adapter);
5301 	if (onoff) {
5302 		e1000_setup_led(&adapter->hw);
5303 		e1000_led_on(&adapter->hw);
5304 	} else {
5305 		e1000_led_off(&adapter->hw);
5306 		e1000_cleanup_led(&adapter->hw);
5307 	}
5308 	EM_CORE_UNLOCK(adapter);
5309 }
5310 
5311 /*
5312 ** Disable the L0S and L1 LINK states
5313 */
5314 static void
5315 em_disable_aspm(struct adapter *adapter)
5316 {
5317 	int		base, reg;
5318 	u16		link_cap,link_ctrl;
5319 	device_t	dev = adapter->dev;
5320 
5321 	switch (adapter->hw.mac.type) {
5322 		case e1000_82573:
5323 		case e1000_82574:
5324 		case e1000_82583:
5325 			break;
5326 		default:
5327 			return;
5328 	}
5329 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5330 		return;
5331 	reg = base + PCIER_LINK_CAP;
5332 	link_cap = pci_read_config(dev, reg, 2);
5333 	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5334 		return;
5335 	reg = base + PCIER_LINK_CTL;
5336 	link_ctrl = pci_read_config(dev, reg, 2);
5337 	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5338 	pci_write_config(dev, reg, link_ctrl, 2);
5339 	return;
5340 }
5341 
5342 /**********************************************************************
5343  *
5344  *  Update the board statistics counters.
5345  *
5346  **********************************************************************/
5347 static void
5348 em_update_stats_counters(struct adapter *adapter)
5349 {
5350 
5351 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5352 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5353 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5354 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5355 	}
5356 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5357 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5358 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5359 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5360 
5361 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5362 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5363 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5364 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5365 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5366 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5367 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5368 	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5369 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5370 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5371 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5372 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5373 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5374 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5375 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5376 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5377 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5378 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5379 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5380 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5381 
5382 	/* For the 64-bit byte counters the low dword must be read first. */
5383 	/* Both registers clear on the read of the high dword */
5384 
5385 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5386 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5387 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5388 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5389 
5390 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5391 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5392 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5393 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5394 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5395 
5396 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5397 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5398 
5399 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5400 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5401 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5402 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5403 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5404 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5405 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5406 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5407 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5408 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5409 
5410 	/* Interrupt Counts */
5411 
5412 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5413 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5414 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5415 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5416 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5417 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5418 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5419 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5420 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5421 
5422 	if (adapter->hw.mac.type >= e1000_82543) {
5423 		adapter->stats.algnerrc +=
5424 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5425 		adapter->stats.rxerrc +=
5426 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5427 		adapter->stats.tncrs +=
5428 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5429 		adapter->stats.cexterr +=
5430 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5431 		adapter->stats.tsctc +=
5432 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5433 		adapter->stats.tsctfc +=
5434 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5435 	}
5436 }
5437 
5438 static uint64_t
5439 em_get_counter(if_t ifp, ift_counter cnt)
5440 {
5441 	struct adapter *adapter;
5442 
5443 	adapter = if_getsoftc(ifp);
5444 
5445 	switch (cnt) {
5446 	case IFCOUNTER_COLLISIONS:
5447 		return (adapter->stats.colc);
5448 	case IFCOUNTER_IERRORS:
5449 		return (adapter->dropped_pkts + adapter->stats.rxerrc +
5450 		    adapter->stats.crcerrs + adapter->stats.algnerrc +
5451 		    adapter->stats.ruc + adapter->stats.roc +
5452 		    adapter->stats.mpc + adapter->stats.cexterr);
5453 	case IFCOUNTER_OERRORS:
5454 		return (adapter->stats.ecol + adapter->stats.latecol +
5455 		    adapter->watchdog_events);
5456 	default:
5457 		return (if_get_counter_default(ifp, cnt));
5458 	}
5459 }
5460 
5461 /* Export a single 32-bit register via a read-only sysctl. */
5462 static int
5463 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5464 {
5465 	struct adapter *adapter;
5466 	u_int val;
5467 
5468 	adapter = oidp->oid_arg1;
5469 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5470 	return (sysctl_handle_int(oidp, &val, 0, req));
5471 }
5472 
5473 /*
5474  * Add sysctl variables, one per statistic, to the system.
5475  */
5476 static void
5477 em_add_hw_stats(struct adapter *adapter)
5478 {
5479 	device_t dev = adapter->dev;
5480 
5481 	struct tx_ring *txr = adapter->tx_rings;
5482 	struct rx_ring *rxr = adapter->rx_rings;
5483 
5484 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5485 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5486 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5487 	struct e1000_hw_stats *stats = &adapter->stats;
5488 
5489 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5490 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5491 
5492 #define QUEUE_NAME_LEN 32
5493 	char namebuf[QUEUE_NAME_LEN];
5494 
5495 	/* Driver Statistics */
5496 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5497 			CTLFLAG_RD, &adapter->dropped_pkts,
5498 			"Driver dropped packets");
5499 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5500 			CTLFLAG_RD, &adapter->link_irq,
5501 			"Link MSIX IRQ Handled");
5502 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5503 			 CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5504 			 "Defragmenting mbuf chain failed");
5505 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5506 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5507 			"Driver tx dma failure in xmit");
5508 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5509 			CTLFLAG_RD, &adapter->rx_overruns,
5510 			"RX overruns");
5511 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5512 			CTLFLAG_RD, &adapter->watchdog_events,
5513 			"Watchdog timeouts");
5514 
5515 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5516 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5517 			em_sysctl_reg_handler, "IU",
5518 			"Device Control Register");
5519 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5520 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5521 			em_sysctl_reg_handler, "IU",
5522 			"Receiver Control Register");
5523 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5524 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5525 			"Flow Control High Watermark");
5526 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5527 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5528 			"Flow Control Low Watermark");
5529 
5530 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5531 		snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5532 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5533 					    CTLFLAG_RD, NULL, "TX Queue Name");
5534 		queue_list = SYSCTL_CHILDREN(queue_node);
5535 
5536 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5537 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5538 				E1000_TDH(txr->me),
5539 				em_sysctl_reg_handler, "IU",
5540  				"Transmit Descriptor Head");
5541 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5542 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5543 				E1000_TDT(txr->me),
5544 				em_sysctl_reg_handler, "IU",
5545  				"Transmit Descriptor Tail");
5546 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5547 				CTLFLAG_RD, &txr->tx_irq,
5548 				"Queue MSI-X Transmit Interrupts");
5549 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5550 				CTLFLAG_RD, &txr->no_desc_avail,
5551 				"Queue No Descriptor Available");
5552 
5553 		snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5554 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5555 					    CTLFLAG_RD, NULL, "RX Queue Name");
5556 		queue_list = SYSCTL_CHILDREN(queue_node);
5557 
5558 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5559 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5560 				E1000_RDH(rxr->me),
5561 				em_sysctl_reg_handler, "IU",
5562 				"Receive Descriptor Head");
5563 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5564 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5565 				E1000_RDT(rxr->me),
5566 				em_sysctl_reg_handler, "IU",
5567 				"Receive Descriptor Tail");
5568 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5569 				CTLFLAG_RD, &rxr->rx_irq,
5570 				"Queue MSI-X Receive Interrupts");
5571 	}
5572 
5573 	/* MAC stats get their own sub node */
5574 
5575 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5576 				    CTLFLAG_RD, NULL, "Statistics");
5577 	stat_list = SYSCTL_CHILDREN(stat_node);
5578 
5579 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5580 			CTLFLAG_RD, &stats->ecol,
5581 			"Excessive collisions");
5582 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5583 			CTLFLAG_RD, &stats->scc,
5584 			"Single collisions");
5585 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5586 			CTLFLAG_RD, &stats->mcc,
5587 			"Multiple collisions");
5588 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5589 			CTLFLAG_RD, &stats->latecol,
5590 			"Late collisions");
5591 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5592 			CTLFLAG_RD, &stats->colc,
5593 			"Collision Count");
5594 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5595 			CTLFLAG_RD, &adapter->stats.symerrs,
5596 			"Symbol Errors");
5597 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5598 			CTLFLAG_RD, &adapter->stats.sec,
5599 			"Sequence Errors");
5600 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5601 			CTLFLAG_RD, &adapter->stats.dc,
5602 			"Defer Count");
5603 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5604 			CTLFLAG_RD, &adapter->stats.mpc,
5605 			"Missed Packets");
5606 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5607 			CTLFLAG_RD, &adapter->stats.rnbc,
5608 			"Receive No Buffers");
5609 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5610 			CTLFLAG_RD, &adapter->stats.ruc,
5611 			"Receive Undersize");
5612 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5613 			CTLFLAG_RD, &adapter->stats.rfc,
5614 			"Fragmented Packets Received ");
5615 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5616 			CTLFLAG_RD, &adapter->stats.roc,
5617 			"Oversized Packets Received");
5618 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5619 			CTLFLAG_RD, &adapter->stats.rjc,
5620 			"Recevied Jabber");
5621 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5622 			CTLFLAG_RD, &adapter->stats.rxerrc,
5623 			"Receive Errors");
5624 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5625 			CTLFLAG_RD, &adapter->stats.crcerrs,
5626 			"CRC errors");
5627 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5628 			CTLFLAG_RD, &adapter->stats.algnerrc,
5629 			"Alignment Errors");
5630 	/* On 82575 these are collision counts */
5631 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5632 			CTLFLAG_RD, &adapter->stats.cexterr,
5633 			"Collision/Carrier extension errors");
5634 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5635 			CTLFLAG_RD, &adapter->stats.xonrxc,
5636 			"XON Received");
5637 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5638 			CTLFLAG_RD, &adapter->stats.xontxc,
5639 			"XON Transmitted");
5640 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5641 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5642 			"XOFF Received");
5643 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5644 			CTLFLAG_RD, &adapter->stats.xofftxc,
5645 			"XOFF Transmitted");
5646 
5647 	/* Packet Reception Stats */
5648 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5649 			CTLFLAG_RD, &adapter->stats.tpr,
5650 			"Total Packets Received ");
5651 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5652 			CTLFLAG_RD, &adapter->stats.gprc,
5653 			"Good Packets Received");
5654 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5655 			CTLFLAG_RD, &adapter->stats.bprc,
5656 			"Broadcast Packets Received");
5657 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5658 			CTLFLAG_RD, &adapter->stats.mprc,
5659 			"Multicast Packets Received");
5660 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5661 			CTLFLAG_RD, &adapter->stats.prc64,
5662 			"64 byte frames received ");
5663 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5664 			CTLFLAG_RD, &adapter->stats.prc127,
5665 			"65-127 byte frames received");
5666 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5667 			CTLFLAG_RD, &adapter->stats.prc255,
5668 			"128-255 byte frames received");
5669 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5670 			CTLFLAG_RD, &adapter->stats.prc511,
5671 			"256-511 byte frames received");
5672 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5673 			CTLFLAG_RD, &adapter->stats.prc1023,
5674 			"512-1023 byte frames received");
5675 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5676 			CTLFLAG_RD, &adapter->stats.prc1522,
5677 			"1023-1522 byte frames received");
5678  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5679  			CTLFLAG_RD, &adapter->stats.gorc,
5680  			"Good Octets Received");
5681 
5682 	/* Packet Transmission Stats */
5683  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5684  			CTLFLAG_RD, &adapter->stats.gotc,
5685  			"Good Octets Transmitted");
5686 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5687 			CTLFLAG_RD, &adapter->stats.tpt,
5688 			"Total Packets Transmitted");
5689 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5690 			CTLFLAG_RD, &adapter->stats.gptc,
5691 			"Good Packets Transmitted");
5692 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5693 			CTLFLAG_RD, &adapter->stats.bptc,
5694 			"Broadcast Packets Transmitted");
5695 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5696 			CTLFLAG_RD, &adapter->stats.mptc,
5697 			"Multicast Packets Transmitted");
5698 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5699 			CTLFLAG_RD, &adapter->stats.ptc64,
5700 			"64 byte frames transmitted ");
5701 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5702 			CTLFLAG_RD, &adapter->stats.ptc127,
5703 			"65-127 byte frames transmitted");
5704 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5705 			CTLFLAG_RD, &adapter->stats.ptc255,
5706 			"128-255 byte frames transmitted");
5707 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5708 			CTLFLAG_RD, &adapter->stats.ptc511,
5709 			"256-511 byte frames transmitted");
5710 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5711 			CTLFLAG_RD, &adapter->stats.ptc1023,
5712 			"512-1023 byte frames transmitted");
5713 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5714 			CTLFLAG_RD, &adapter->stats.ptc1522,
5715 			"1024-1522 byte frames transmitted");
5716 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5717 			CTLFLAG_RD, &adapter->stats.tsctc,
5718 			"TSO Contexts Transmitted");
5719 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5720 			CTLFLAG_RD, &adapter->stats.tsctfc,
5721 			"TSO Contexts Failed");
5722 
5723 
5724 	/* Interrupt Stats */
5725 
5726 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5727 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5728 	int_list = SYSCTL_CHILDREN(int_node);
5729 
5730 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5731 			CTLFLAG_RD, &adapter->stats.iac,
5732 			"Interrupt Assertion Count");
5733 
5734 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5735 			CTLFLAG_RD, &adapter->stats.icrxptc,
5736 			"Interrupt Cause Rx Pkt Timer Expire Count");
5737 
5738 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5739 			CTLFLAG_RD, &adapter->stats.icrxatc,
5740 			"Interrupt Cause Rx Abs Timer Expire Count");
5741 
5742 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5743 			CTLFLAG_RD, &adapter->stats.ictxptc,
5744 			"Interrupt Cause Tx Pkt Timer Expire Count");
5745 
5746 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5747 			CTLFLAG_RD, &adapter->stats.ictxatc,
5748 			"Interrupt Cause Tx Abs Timer Expire Count");
5749 
5750 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5751 			CTLFLAG_RD, &adapter->stats.ictxqec,
5752 			"Interrupt Cause Tx Queue Empty Count");
5753 
5754 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5755 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5756 			"Interrupt Cause Tx Queue Min Thresh Count");
5757 
5758 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5759 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5760 			"Interrupt Cause Rx Desc Min Thresh Count");
5761 
5762 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5763 			CTLFLAG_RD, &adapter->stats.icrxoc,
5764 			"Interrupt Cause Receiver Overrun Count");
5765 }
5766 
5767 /**********************************************************************
5768  *
5769  *  This routine provides a way to dump out the adapter eeprom,
5770  *  often a useful debug/service tool. This only dumps the first
5771  *  32 words, stuff that matters is in that extent.
5772  *
5773  **********************************************************************/
5774 static int
5775 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5776 {
5777 	struct adapter *adapter = (struct adapter *)arg1;
5778 	int error;
5779 	int result;
5780 
5781 	result = -1;
5782 	error = sysctl_handle_int(oidp, &result, 0, req);
5783 
5784 	if (error || !req->newptr)
5785 		return (error);
5786 
5787 	/*
5788 	 * This value will cause a hex dump of the
5789 	 * first 32 16-bit words of the EEPROM to
5790 	 * the screen.
5791 	 */
5792 	if (result == 1)
5793 		em_print_nvm_info(adapter);
5794 
5795 	return (error);
5796 }
5797 
5798 static void
5799 em_print_nvm_info(struct adapter *adapter)
5800 {
5801 	u16	eeprom_data;
5802 	int	i, j, row = 0;
5803 
5804 	/* Its a bit crude, but it gets the job done */
5805 	printf("\nInterface EEPROM Dump:\n");
5806 	printf("Offset\n0x0000  ");
5807 	for (i = 0, j = 0; i < 32; i++, j++) {
5808 		if (j == 8) { /* Make the offset block */
5809 			j = 0; ++row;
5810 			printf("\n0x00%x0  ",row);
5811 		}
5812 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5813 		printf("%04x ", eeprom_data);
5814 	}
5815 	printf("\n");
5816 }
5817 
5818 static int
5819 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5820 {
5821 	struct em_int_delay_info *info;
5822 	struct adapter *adapter;
5823 	u32 regval;
5824 	int error, usecs, ticks;
5825 
5826 	info = (struct em_int_delay_info *)arg1;
5827 	usecs = info->value;
5828 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5829 	if (error != 0 || req->newptr == NULL)
5830 		return (error);
5831 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5832 		return (EINVAL);
5833 	info->value = usecs;
5834 	ticks = EM_USECS_TO_TICKS(usecs);
5835 	if (info->offset == E1000_ITR)	/* units are 256ns here */
5836 		ticks *= 4;
5837 
5838 	adapter = info->adapter;
5839 
5840 	EM_CORE_LOCK(adapter);
5841 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5842 	regval = (regval & ~0xffff) | (ticks & 0xffff);
5843 	/* Handle a few special cases. */
5844 	switch (info->offset) {
5845 	case E1000_RDTR:
5846 		break;
5847 	case E1000_TIDV:
5848 		if (ticks == 0) {
5849 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5850 			/* Don't write 0 into the TIDV register. */
5851 			regval++;
5852 		} else
5853 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5854 		break;
5855 	}
5856 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5857 	EM_CORE_UNLOCK(adapter);
5858 	return (0);
5859 }
5860 
5861 static void
5862 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5863 	const char *description, struct em_int_delay_info *info,
5864 	int offset, int value)
5865 {
5866 	info->adapter = adapter;
5867 	info->offset = offset;
5868 	info->value = value;
5869 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5870 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5871 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5872 	    info, 0, em_sysctl_int_delay, "I", description);
5873 }
5874 
5875 static void
5876 em_set_sysctl_value(struct adapter *adapter, const char *name,
5877 	const char *description, int *limit, int value)
5878 {
5879 	*limit = value;
5880 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5881 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5882 	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
5883 }
5884 
5885 
5886 /*
5887 ** Set flow control using sysctl:
5888 ** Flow control values:
5889 **      0 - off
5890 **      1 - rx pause
5891 **      2 - tx pause
5892 **      3 - full
5893 */
5894 static int
5895 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5896 {
5897         int		error;
5898 	static int	input = 3; /* default is full */
5899         struct adapter	*adapter = (struct adapter *) arg1;
5900 
5901         error = sysctl_handle_int(oidp, &input, 0, req);
5902 
5903         if ((error) || (req->newptr == NULL))
5904                 return (error);
5905 
5906 	if (input == adapter->fc) /* no change? */
5907 		return (error);
5908 
5909         switch (input) {
5910                 case e1000_fc_rx_pause:
5911                 case e1000_fc_tx_pause:
5912                 case e1000_fc_full:
5913                 case e1000_fc_none:
5914                         adapter->hw.fc.requested_mode = input;
5915 			adapter->fc = input;
5916                         break;
5917                 default:
5918 			/* Do nothing */
5919 			return (error);
5920         }
5921 
5922         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5923         e1000_force_mac_fc(&adapter->hw);
5924         return (error);
5925 }
5926 
5927 /*
5928 ** Manage Energy Efficient Ethernet:
5929 ** Control values:
5930 **     0/1 - enabled/disabled
5931 */
5932 static int
5933 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5934 {
5935        struct adapter *adapter = (struct adapter *) arg1;
5936        int             error, value;
5937 
5938        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5939        error = sysctl_handle_int(oidp, &value, 0, req);
5940        if (error || req->newptr == NULL)
5941                return (error);
5942        EM_CORE_LOCK(adapter);
5943        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5944        em_init_locked(adapter);
5945        EM_CORE_UNLOCK(adapter);
5946        return (0);
5947 }
5948 
5949 static int
5950 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5951 {
5952 	struct adapter *adapter;
5953 	int error;
5954 	int result;
5955 
5956 	result = -1;
5957 	error = sysctl_handle_int(oidp, &result, 0, req);
5958 
5959 	if (error || !req->newptr)
5960 		return (error);
5961 
5962 	if (result == 1) {
5963 		adapter = (struct adapter *)arg1;
5964 		em_print_debug_info(adapter);
5965         }
5966 
5967 	return (error);
5968 }
5969 
5970 /*
5971 ** This routine is meant to be fluid, add whatever is
5972 ** needed for debugging a problem.  -jfv
5973 */
5974 static void
5975 em_print_debug_info(struct adapter *adapter)
5976 {
5977 	device_t dev = adapter->dev;
5978 	struct tx_ring *txr = adapter->tx_rings;
5979 	struct rx_ring *rxr = adapter->rx_rings;
5980 
5981 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)
5982 		printf("Interface is RUNNING ");
5983 	else
5984 		printf("Interface is NOT RUNNING\n");
5985 
5986 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE)
5987 		printf("and INACTIVE\n");
5988 	else
5989 		printf("and ACTIVE\n");
5990 
5991 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5992 		device_printf(dev, "TX Queue %d ------\n", i);
5993 		device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5994 	    		E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
5995 	    		E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
5996 		device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
5997 		device_printf(dev, "TX descriptors avail = %d\n",
5998 	    		txr->tx_avail);
5999 		device_printf(dev, "Tx Descriptors avail failure = %ld\n",
6000 	    		txr->no_desc_avail);
6001 		device_printf(dev, "RX Queue %d ------\n", i);
6002 		device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
6003 	    		E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
6004 	    		E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
6005 		device_printf(dev, "RX discarded packets = %ld\n",
6006 	    		rxr->rx_discarded);
6007 		device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
6008 		device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
6009 	}
6010 }
6011 
6012 #ifdef EM_MULTIQUEUE
6013 /*
6014  * 82574 only:
6015  * Write a new value to the EEPROM increasing the number of MSIX
6016  * vectors from 3 to 5, for proper multiqueue support.
6017  */
6018 static void
6019 em_enable_vectors_82574(struct adapter *adapter)
6020 {
6021 	struct e1000_hw *hw = &adapter->hw;
6022 	device_t dev = adapter->dev;
6023 	u16 edata;
6024 
6025 	e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6026 	printf("Current cap: %#06x\n", edata);
6027 	if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
6028 		device_printf(dev, "Writing to eeprom: increasing "
6029 		    "reported MSIX vectors from 3 to 5...\n");
6030 		edata &= ~(EM_NVM_MSIX_N_MASK);
6031 		edata |= 4 << EM_NVM_MSIX_N_SHIFT;
6032 		e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6033 		e1000_update_nvm_checksum(hw);
6034 		device_printf(dev, "Writing to eeprom: done\n");
6035 	}
6036 }
6037 #endif
6038 
6039 #ifdef DDB
6040 DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
6041 {
6042 	devclass_t	dc;
6043 	int max_em;
6044 
6045 	dc = devclass_find("em");
6046 	max_em = devclass_get_maxunit(dc);
6047 
6048 	for (int index = 0; index < (max_em - 1); index++) {
6049 		device_t dev;
6050 		dev = devclass_get_device(dc, index);
6051 		if (device_get_driver(dev) == &em_driver) {
6052 			struct adapter *adapter = device_get_softc(dev);
6053 			EM_CORE_LOCK(adapter);
6054 			em_init_locked(adapter);
6055 			EM_CORE_UNLOCK(adapter);
6056 		}
6057 	}
6058 }
6059 DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6060 {
6061 	devclass_t	dc;
6062 	int max_em;
6063 
6064 	dc = devclass_find("em");
6065 	max_em = devclass_get_maxunit(dc);
6066 
6067 	for (int index = 0; index < (max_em - 1); index++) {
6068 		device_t dev;
6069 		dev = devclass_get_device(dc, index);
6070 		if (device_get_driver(dev) == &em_driver)
6071 			em_print_debug_info(device_get_softc(dev));
6072 	}
6073 
6074 }
6075 #endif
6076