xref: /freebsd/sys/dev/e1000/if_em.c (revision b5864e6de2f3aa8eb9bb269ec86282598b5201b1)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2015, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #include "opt_em.h"
36 #include "opt_ddb.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #endif
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #ifdef DDB
47 #include <sys/types.h>
48 #include <ddb/ddb.h>
49 #endif
50 #if __FreeBSD_version >= 800000
51 #include <sys/buf_ring.h>
52 #endif
53 #include <sys/bus.h>
54 #include <sys/endian.h>
55 #include <sys/kernel.h>
56 #include <sys/kthread.h>
57 #include <sys/malloc.h>
58 #include <sys/mbuf.h>
59 #include <sys/module.h>
60 #include <sys/rman.h>
61 #include <sys/smp.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/sysctl.h>
65 #include <sys/taskqueue.h>
66 #include <sys/eventhandler.h>
67 #include <machine/bus.h>
68 #include <machine/resource.h>
69 
70 #include <net/bpf.h>
71 #include <net/ethernet.h>
72 #include <net/if.h>
73 #include <net/if_var.h>
74 #include <net/if_arp.h>
75 #include <net/if_dl.h>
76 #include <net/if_media.h>
77 
78 #include <net/if_types.h>
79 #include <net/if_vlan_var.h>
80 
81 #include <netinet/in_systm.h>
82 #include <netinet/in.h>
83 #include <netinet/if_ether.h>
84 #include <netinet/ip.h>
85 #include <netinet/ip6.h>
86 #include <netinet/tcp.h>
87 #include <netinet/udp.h>
88 
89 #include <machine/in_cksum.h>
90 #include <dev/led/led.h>
91 #include <dev/pci/pcivar.h>
92 #include <dev/pci/pcireg.h>
93 
94 #include "e1000_api.h"
95 #include "e1000_82571.h"
96 #include "if_em.h"
97 
98 /*********************************************************************
99  *  Driver version:
100  *********************************************************************/
101 char em_driver_version[] = "7.6.1-k";
102 
103 /*********************************************************************
104  *  PCI Device ID Table
105  *
106  *  Used by probe to select devices to load on
107  *  Last field stores an index into e1000_strings
108  *  Last entry must be all 0s
109  *
110  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
111  *********************************************************************/
112 
113 static em_vendor_info_t em_vendor_info_array[] =
114 {
115 	/* Intel(R) PRO/1000 Network Connection */
116 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
117 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
118 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
119 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
120 						PCI_ANY_ID, PCI_ANY_ID, 0},
121 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
122 						PCI_ANY_ID, PCI_ANY_ID, 0},
123 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
124 						PCI_ANY_ID, PCI_ANY_ID, 0},
125 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
126 						PCI_ANY_ID, PCI_ANY_ID, 0},
127 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
128 						PCI_ANY_ID, PCI_ANY_ID, 0},
129 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
130 						PCI_ANY_ID, PCI_ANY_ID, 0},
131 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
132 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
134 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
135 
136 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
137 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
138 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
139 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
140 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
141 						PCI_ANY_ID, PCI_ANY_ID, 0},
142 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
143 						PCI_ANY_ID, PCI_ANY_ID, 0},
144 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
145 						PCI_ANY_ID, PCI_ANY_ID, 0},
146 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
147 						PCI_ANY_ID, PCI_ANY_ID, 0},
148 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
150 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
152 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
175 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
176 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
177 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
178 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
179 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
180 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
181 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
182 						PCI_ANY_ID, PCI_ANY_ID, 0},
183 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
184 						PCI_ANY_ID, PCI_ANY_ID, 0},
185 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM2,	PCI_ANY_ID, PCI_ANY_ID, 0},
186 	{ 0x8086, E1000_DEV_ID_PCH_I218_V2,	PCI_ANY_ID, PCI_ANY_ID, 0},
187 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM3,	PCI_ANY_ID, PCI_ANY_ID, 0},
188 	{ 0x8086, E1000_DEV_ID_PCH_I218_V3,	PCI_ANY_ID, PCI_ANY_ID, 0},
189 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
190 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
191 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2,
192                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
193 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
194 	{ 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3,
195 						PCI_ANY_ID, PCI_ANY_ID, 0},
196 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
197 						PCI_ANY_ID, PCI_ANY_ID, 0},
198 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
199 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
200 						PCI_ANY_ID, PCI_ANY_ID, 0},
201 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
202 	/* required last entry */
203 	{ 0, 0, 0, 0, 0}
204 };
205 
206 /*********************************************************************
207  *  Table of branding strings for all supported NICs.
208  *********************************************************************/
209 
210 static char *em_strings[] = {
211 	"Intel(R) PRO/1000 Network Connection"
212 };
213 
214 /*********************************************************************
215  *  Function prototypes
216  *********************************************************************/
217 static int	em_probe(device_t);
218 static int	em_attach(device_t);
219 static int	em_detach(device_t);
220 static int	em_shutdown(device_t);
221 static int	em_suspend(device_t);
222 static int	em_resume(device_t);
223 #ifdef EM_MULTIQUEUE
224 static int	em_mq_start(if_t, struct mbuf *);
225 static int	em_mq_start_locked(if_t,
226 		    struct tx_ring *);
227 static void	em_qflush(if_t);
228 #else
229 static void	em_start(if_t);
230 static void	em_start_locked(if_t, struct tx_ring *);
231 #endif
232 static int	em_ioctl(if_t, u_long, caddr_t);
233 static uint64_t	em_get_counter(if_t, ift_counter);
234 static void	em_init(void *);
235 static void	em_init_locked(struct adapter *);
236 static void	em_stop(void *);
237 static void	em_media_status(if_t, struct ifmediareq *);
238 static int	em_media_change(if_t);
239 static void	em_identify_hardware(struct adapter *);
240 static int	em_allocate_pci_resources(struct adapter *);
241 static int	em_allocate_legacy(struct adapter *);
242 static int	em_allocate_msix(struct adapter *);
243 static int	em_allocate_queues(struct adapter *);
244 static int	em_setup_msix(struct adapter *);
245 static void	em_free_pci_resources(struct adapter *);
246 static void	em_local_timer(void *);
247 static void	em_reset(struct adapter *);
248 static int	em_setup_interface(device_t, struct adapter *);
249 static void	em_flush_desc_rings(struct adapter *);
250 
251 static void	em_setup_transmit_structures(struct adapter *);
252 static void	em_initialize_transmit_unit(struct adapter *);
253 static int	em_allocate_transmit_buffers(struct tx_ring *);
254 static void	em_free_transmit_structures(struct adapter *);
255 static void	em_free_transmit_buffers(struct tx_ring *);
256 
257 static int	em_setup_receive_structures(struct adapter *);
258 static int	em_allocate_receive_buffers(struct rx_ring *);
259 static void	em_initialize_receive_unit(struct adapter *);
260 static void	em_free_receive_structures(struct adapter *);
261 static void	em_free_receive_buffers(struct rx_ring *);
262 
263 static void	em_enable_intr(struct adapter *);
264 static void	em_disable_intr(struct adapter *);
265 static void	em_update_stats_counters(struct adapter *);
266 static void	em_add_hw_stats(struct adapter *adapter);
267 static void	em_txeof(struct tx_ring *);
268 static bool	em_rxeof(struct rx_ring *, int, int *);
269 #ifndef __NO_STRICT_ALIGNMENT
270 static int	em_fixup_rx(struct rx_ring *);
271 #endif
272 static void	em_setup_rxdesc(union e1000_rx_desc_extended *,
273 		    const struct em_rxbuffer *rxbuf);
274 static void	em_receive_checksum(uint32_t status, struct mbuf *);
275 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
276 		    struct ip *, u32 *, u32 *);
277 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
278 		    struct tcphdr *, u32 *, u32 *);
279 static void	em_set_promisc(struct adapter *);
280 static void	em_disable_promisc(struct adapter *);
281 static void	em_set_multi(struct adapter *);
282 static void	em_update_link_status(struct adapter *);
283 static void	em_refresh_mbufs(struct rx_ring *, int);
284 static void	em_register_vlan(void *, if_t, u16);
285 static void	em_unregister_vlan(void *, if_t, u16);
286 static void	em_setup_vlan_hw_support(struct adapter *);
287 static int	em_xmit(struct tx_ring *, struct mbuf **);
288 static int	em_dma_malloc(struct adapter *, bus_size_t,
289 		    struct em_dma_alloc *, int);
290 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
291 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
292 static void	em_print_nvm_info(struct adapter *);
293 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
294 static void	em_print_debug_info(struct adapter *);
295 static int 	em_is_valid_ether_addr(u8 *);
296 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
297 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
298 		    const char *, struct em_int_delay_info *, int, int);
299 /* Management and WOL Support */
300 static void	em_init_manageability(struct adapter *);
301 static void	em_release_manageability(struct adapter *);
302 static void     em_get_hw_control(struct adapter *);
303 static void     em_release_hw_control(struct adapter *);
304 static void	em_get_wakeup(device_t);
305 static void     em_enable_wakeup(device_t);
306 static int	em_enable_phy_wakeup(struct adapter *);
307 static void	em_led_func(void *, int);
308 static void	em_disable_aspm(struct adapter *);
309 
310 static int	em_irq_fast(void *);
311 
312 /* MSIX handlers */
313 static void	em_msix_tx(void *);
314 static void	em_msix_rx(void *);
315 static void	em_msix_link(void *);
316 static void	em_handle_tx(void *context, int pending);
317 static void	em_handle_rx(void *context, int pending);
318 static void	em_handle_link(void *context, int pending);
319 
320 #ifdef EM_MULTIQUEUE
321 static void	em_enable_vectors_82574(struct adapter *);
322 #endif
323 
324 static void	em_set_sysctl_value(struct adapter *, const char *,
325 		    const char *, int *, int);
326 static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
327 static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
328 
329 static __inline void em_rx_discard(struct rx_ring *, int);
330 
331 #ifdef DEVICE_POLLING
332 static poll_handler_t em_poll;
333 #endif /* POLLING */
334 
335 /*********************************************************************
336  *  FreeBSD Device Interface Entry Points
337  *********************************************************************/
338 
339 static device_method_t em_methods[] = {
340 	/* Device interface */
341 	DEVMETHOD(device_probe, em_probe),
342 	DEVMETHOD(device_attach, em_attach),
343 	DEVMETHOD(device_detach, em_detach),
344 	DEVMETHOD(device_shutdown, em_shutdown),
345 	DEVMETHOD(device_suspend, em_suspend),
346 	DEVMETHOD(device_resume, em_resume),
347 	DEVMETHOD_END
348 };
349 
350 static driver_t em_driver = {
351 	"em", em_methods, sizeof(struct adapter),
352 };
353 
354 devclass_t em_devclass;
355 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
356 MODULE_DEPEND(em, pci, 1, 1, 1);
357 MODULE_DEPEND(em, ether, 1, 1, 1);
358 #ifdef DEV_NETMAP
359 MODULE_DEPEND(em, netmap, 1, 1, 1);
360 #endif /* DEV_NETMAP */
361 
362 /*********************************************************************
363  *  Tunable default values.
364  *********************************************************************/
365 
366 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
367 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
368 #define M_TSO_LEN			66
369 
370 #define MAX_INTS_PER_SEC	8000
371 #define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
372 
373 /* Allow common code without TSO */
374 #ifndef CSUM_TSO
375 #define CSUM_TSO	0
376 #endif
377 
378 #define TSO_WORKAROUND	4
379 
380 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
381 
382 static int em_disable_crc_stripping = 0;
383 SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
384     &em_disable_crc_stripping, 0, "Disable CRC Stripping");
385 
386 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
387 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
388 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
389     0, "Default transmit interrupt delay in usecs");
390 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
391     0, "Default receive interrupt delay in usecs");
392 
393 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
394 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
395 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
396     &em_tx_abs_int_delay_dflt, 0,
397     "Default transmit interrupt delay limit in usecs");
398 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
399     &em_rx_abs_int_delay_dflt, 0,
400     "Default receive interrupt delay limit in usecs");
401 
402 static int em_rxd = EM_DEFAULT_RXD;
403 static int em_txd = EM_DEFAULT_TXD;
404 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
405     "Number of receive descriptors per queue");
406 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
407     "Number of transmit descriptors per queue");
408 
409 static int em_smart_pwr_down = FALSE;
410 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
411     0, "Set to true to leave smart power down enabled on newer adapters");
412 
413 /* Controls whether promiscuous also shows bad packets */
414 static int em_debug_sbp = FALSE;
415 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
416     "Show bad packets in promiscuous mode");
417 
418 static int em_enable_msix = TRUE;
419 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
420     "Enable MSI-X interrupts");
421 
422 #ifdef EM_MULTIQUEUE
423 static int em_num_queues = 1;
424 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
425     "82574 only: Number of queues to configure, 0 indicates autoconfigure");
426 #endif
427 
428 /*
429 ** Global variable to store last used CPU when binding queues
430 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
431 ** queue is bound to a cpu.
432 */
433 static int em_last_bind_cpu = -1;
434 
435 /* How many packets rxeof tries to clean at a time */
436 static int em_rx_process_limit = 100;
437 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
438     &em_rx_process_limit, 0,
439     "Maximum number of received packets to process "
440     "at a time, -1 means unlimited");
441 
442 /* Energy efficient ethernet - default to OFF */
443 static int eee_setting = 1;
444 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
445     "Enable Energy Efficient Ethernet");
446 
447 /* Global used in WOL setup with multiport cards */
448 static int global_quad_port_a = 0;
449 
450 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
451 #include <dev/netmap/if_em_netmap.h>
452 #endif /* DEV_NETMAP */
453 
454 /*********************************************************************
455  *  Device identification routine
456  *
457  *  em_probe determines if the driver should be loaded on
458  *  adapter based on PCI vendor/device id of the adapter.
459  *
460  *  return BUS_PROBE_DEFAULT on success, positive on failure
461  *********************************************************************/
462 
463 static int
464 em_probe(device_t dev)
465 {
466 	char		adapter_name[60];
467 	uint16_t	pci_vendor_id = 0;
468 	uint16_t	pci_device_id = 0;
469 	uint16_t	pci_subvendor_id = 0;
470 	uint16_t	pci_subdevice_id = 0;
471 	em_vendor_info_t *ent;
472 
473 	INIT_DEBUGOUT("em_probe: begin");
474 
475 	pci_vendor_id = pci_get_vendor(dev);
476 	if (pci_vendor_id != EM_VENDOR_ID)
477 		return (ENXIO);
478 
479 	pci_device_id = pci_get_device(dev);
480 	pci_subvendor_id = pci_get_subvendor(dev);
481 	pci_subdevice_id = pci_get_subdevice(dev);
482 
483 	ent = em_vendor_info_array;
484 	while (ent->vendor_id != 0) {
485 		if ((pci_vendor_id == ent->vendor_id) &&
486 		    (pci_device_id == ent->device_id) &&
487 
488 		    ((pci_subvendor_id == ent->subvendor_id) ||
489 		    (ent->subvendor_id == PCI_ANY_ID)) &&
490 
491 		    ((pci_subdevice_id == ent->subdevice_id) ||
492 		    (ent->subdevice_id == PCI_ANY_ID))) {
493 			sprintf(adapter_name, "%s %s",
494 				em_strings[ent->index],
495 				em_driver_version);
496 			device_set_desc_copy(dev, adapter_name);
497 			return (BUS_PROBE_DEFAULT);
498 		}
499 		ent++;
500 	}
501 
502 	return (ENXIO);
503 }
504 
505 /*********************************************************************
506  *  Device initialization routine
507  *
508  *  The attach entry point is called when the driver is being loaded.
509  *  This routine identifies the type of hardware, allocates all resources
510  *  and initializes the hardware.
511  *
512  *  return 0 on success, positive on failure
513  *********************************************************************/
514 
515 static int
516 em_attach(device_t dev)
517 {
518 	struct adapter	*adapter;
519 	struct e1000_hw	*hw;
520 	int		error = 0;
521 
522 	INIT_DEBUGOUT("em_attach: begin");
523 
524 	if (resource_disabled("em", device_get_unit(dev))) {
525 		device_printf(dev, "Disabled by device hint\n");
526 		return (ENXIO);
527 	}
528 
529 	adapter = device_get_softc(dev);
530 	adapter->dev = adapter->osdep.dev = dev;
531 	hw = &adapter->hw;
532 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
533 
534 	/* SYSCTL stuff */
535 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
536 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
537 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
538 	    em_sysctl_nvm_info, "I", "NVM Information");
539 
540 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
541 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
542 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
543 	    em_sysctl_debug_info, "I", "Debug Information");
544 
545 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
546 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
547 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
548 	    em_set_flowcntl, "I", "Flow Control");
549 
550 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
551 
552 	/* Determine hardware and mac info */
553 	em_identify_hardware(adapter);
554 
555 	/* Setup PCI resources */
556 	if (em_allocate_pci_resources(adapter)) {
557 		device_printf(dev, "Allocation of PCI resources failed\n");
558 		error = ENXIO;
559 		goto err_pci;
560 	}
561 
562 	/*
563 	** For ICH8 and family we need to
564 	** map the flash memory, and this
565 	** must happen after the MAC is
566 	** identified
567 	*/
568 	if ((hw->mac.type == e1000_ich8lan) ||
569 	    (hw->mac.type == e1000_ich9lan) ||
570 	    (hw->mac.type == e1000_ich10lan) ||
571 	    (hw->mac.type == e1000_pchlan) ||
572 	    (hw->mac.type == e1000_pch2lan) ||
573 	    (hw->mac.type == e1000_pch_lpt)) {
574 		int rid = EM_BAR_TYPE_FLASH;
575 		adapter->flash = bus_alloc_resource_any(dev,
576 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
577 		if (adapter->flash == NULL) {
578 			device_printf(dev, "Mapping of Flash failed\n");
579 			error = ENXIO;
580 			goto err_pci;
581 		}
582 		/* This is used in the shared code */
583 		hw->flash_address = (u8 *)adapter->flash;
584 		adapter->osdep.flash_bus_space_tag =
585 		    rman_get_bustag(adapter->flash);
586 		adapter->osdep.flash_bus_space_handle =
587 		    rman_get_bushandle(adapter->flash);
588 	}
589 	/*
590 	** In the new SPT device flash is not  a
591 	** separate BAR, rather it is also in BAR0,
592 	** so use the same tag and an offset handle for the
593 	** FLASH read/write macros in the shared code.
594 	*/
595 	else if (hw->mac.type == e1000_pch_spt) {
596 		adapter->osdep.flash_bus_space_tag =
597 		    adapter->osdep.mem_bus_space_tag;
598 		adapter->osdep.flash_bus_space_handle =
599 		    adapter->osdep.mem_bus_space_handle
600 		    + E1000_FLASH_BASE_ADDR;
601 	}
602 
603 	/* Do Shared Code initialization */
604 	error = e1000_setup_init_funcs(hw, TRUE);
605 	if (error) {
606 		device_printf(dev, "Setup of Shared code failed, error %d\n",
607 		    error);
608 		error = ENXIO;
609 		goto err_pci;
610 	}
611 
612 	/*
613 	 * Setup MSI/X or MSI if PCI Express
614 	 */
615 	adapter->msix = em_setup_msix(adapter);
616 
617 	e1000_get_bus_info(hw);
618 
619 	/* Set up some sysctls for the tunable interrupt delays */
620 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
621 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
622 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
623 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
624 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
625 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
626 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
627 	    "receive interrupt delay limit in usecs",
628 	    &adapter->rx_abs_int_delay,
629 	    E1000_REGISTER(hw, E1000_RADV),
630 	    em_rx_abs_int_delay_dflt);
631 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
632 	    "transmit interrupt delay limit in usecs",
633 	    &adapter->tx_abs_int_delay,
634 	    E1000_REGISTER(hw, E1000_TADV),
635 	    em_tx_abs_int_delay_dflt);
636 	em_add_int_delay_sysctl(adapter, "itr",
637 	    "interrupt delay limit in usecs/4",
638 	    &adapter->tx_itr,
639 	    E1000_REGISTER(hw, E1000_ITR),
640 	    DEFAULT_ITR);
641 
642 	/* Sysctl for limiting the amount of work done in the taskqueue */
643 	em_set_sysctl_value(adapter, "rx_processing_limit",
644 	    "max number of rx packets to process", &adapter->rx_process_limit,
645 	    em_rx_process_limit);
646 
647 	/*
648 	 * Validate number of transmit and receive descriptors. It
649 	 * must not exceed hardware maximum, and must be multiple
650 	 * of E1000_DBA_ALIGN.
651 	 */
652 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
653 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
654 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
655 		    EM_DEFAULT_TXD, em_txd);
656 		adapter->num_tx_desc = EM_DEFAULT_TXD;
657 	} else
658 		adapter->num_tx_desc = em_txd;
659 
660 	if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 ||
661 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
662 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
663 		    EM_DEFAULT_RXD, em_rxd);
664 		adapter->num_rx_desc = EM_DEFAULT_RXD;
665 	} else
666 		adapter->num_rx_desc = em_rxd;
667 
668 	hw->mac.autoneg = DO_AUTO_NEG;
669 	hw->phy.autoneg_wait_to_complete = FALSE;
670 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
671 
672 	/* Copper options */
673 	if (hw->phy.media_type == e1000_media_type_copper) {
674 		hw->phy.mdix = AUTO_ALL_MODES;
675 		hw->phy.disable_polarity_correction = FALSE;
676 		hw->phy.ms_type = EM_MASTER_SLAVE;
677 	}
678 
679 	/*
680 	 * Set the frame limits assuming
681 	 * standard ethernet sized frames.
682 	 */
683 	adapter->hw.mac.max_frame_size =
684 	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
685 
686 	/*
687 	 * This controls when hardware reports transmit completion
688 	 * status.
689 	 */
690 	hw->mac.report_tx_early = 1;
691 
692 	/*
693 	** Get queue/ring memory
694 	*/
695 	if (em_allocate_queues(adapter)) {
696 		error = ENOMEM;
697 		goto err_pci;
698 	}
699 
700 	/* Allocate multicast array memory. */
701 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
702 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
703 	if (adapter->mta == NULL) {
704 		device_printf(dev, "Can not allocate multicast setup array\n");
705 		error = ENOMEM;
706 		goto err_late;
707 	}
708 
709 	/* Check SOL/IDER usage */
710 	if (e1000_check_reset_block(hw))
711 		device_printf(dev, "PHY reset is blocked"
712 		    " due to SOL/IDER session.\n");
713 
714 	/* Sysctl for setting Energy Efficient Ethernet */
715 	hw->dev_spec.ich8lan.eee_disable = eee_setting;
716 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
717 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
718 	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
719 	    adapter, 0, em_sysctl_eee, "I",
720 	    "Disable Energy Efficient Ethernet");
721 
722 	/*
723 	** Start from a known state, this is
724 	** important in reading the nvm and
725 	** mac from that.
726 	*/
727 	e1000_reset_hw(hw);
728 
729 
730 	/* Make sure we have a good EEPROM before we read from it */
731 	if (e1000_validate_nvm_checksum(hw) < 0) {
732 		/*
733 		** Some PCI-E parts fail the first check due to
734 		** the link being in sleep state, call it again,
735 		** if it fails a second time its a real issue.
736 		*/
737 		if (e1000_validate_nvm_checksum(hw) < 0) {
738 			device_printf(dev,
739 			    "The EEPROM Checksum Is Not Valid\n");
740 			error = EIO;
741 			goto err_late;
742 		}
743 	}
744 
745 	/* Copy the permanent MAC address out of the EEPROM */
746 	if (e1000_read_mac_addr(hw) < 0) {
747 		device_printf(dev, "EEPROM read error while reading MAC"
748 		    " address\n");
749 		error = EIO;
750 		goto err_late;
751 	}
752 
753 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
754 		device_printf(dev, "Invalid MAC address\n");
755 		error = EIO;
756 		goto err_late;
757 	}
758 
759 	/* Disable ULP support */
760 	e1000_disable_ulp_lpt_lp(hw, TRUE);
761 
762 	/*
763 	**  Do interrupt configuration
764 	*/
765 	if (adapter->msix > 1) /* Do MSIX */
766 		error = em_allocate_msix(adapter);
767 	else  /* MSI or Legacy */
768 		error = em_allocate_legacy(adapter);
769 	if (error)
770 		goto err_late;
771 
772 	/*
773 	 * Get Wake-on-Lan and Management info for later use
774 	 */
775 	em_get_wakeup(dev);
776 
777 	/* Setup OS specific network interface */
778 	if (em_setup_interface(dev, adapter) != 0)
779 		goto err_late;
780 
781 	em_reset(adapter);
782 
783 	/* Initialize statistics */
784 	em_update_stats_counters(adapter);
785 
786 	hw->mac.get_link_status = 1;
787 	em_update_link_status(adapter);
788 
789 	/* Register for VLAN events */
790 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
791 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
792 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
793 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
794 
795 	em_add_hw_stats(adapter);
796 
797 	/* Non-AMT based hardware can now take control from firmware */
798 	if (adapter->has_manage && !adapter->has_amt)
799 		em_get_hw_control(adapter);
800 
801 	/* Tell the stack that the interface is not active */
802 	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
803 
804 	adapter->led_dev = led_create(em_led_func, adapter,
805 	    device_get_nameunit(dev));
806 #ifdef DEV_NETMAP
807 	em_netmap_attach(adapter);
808 #endif /* DEV_NETMAP */
809 
810 	INIT_DEBUGOUT("em_attach: end");
811 
812 	return (0);
813 
814 err_late:
815 	em_free_transmit_structures(adapter);
816 	em_free_receive_structures(adapter);
817 	em_release_hw_control(adapter);
818 	if (adapter->ifp != (void *)NULL)
819 		if_free(adapter->ifp);
820 err_pci:
821 	em_free_pci_resources(adapter);
822 	free(adapter->mta, M_DEVBUF);
823 	EM_CORE_LOCK_DESTROY(adapter);
824 
825 	return (error);
826 }
827 
828 /*********************************************************************
829  *  Device removal routine
830  *
831  *  The detach entry point is called when the driver is being removed.
832  *  This routine stops the adapter and deallocates all the resources
833  *  that were allocated for driver operation.
834  *
835  *  return 0 on success, positive on failure
836  *********************************************************************/
837 
838 static int
839 em_detach(device_t dev)
840 {
841 	struct adapter	*adapter = device_get_softc(dev);
842 	if_t ifp = adapter->ifp;
843 
844 	INIT_DEBUGOUT("em_detach: begin");
845 
846 	/* Make sure VLANS are not using driver */
847 	if (if_vlantrunkinuse(ifp)) {
848 		device_printf(dev,"Vlan in use, detach first\n");
849 		return (EBUSY);
850 	}
851 
852 #ifdef DEVICE_POLLING
853 	if (if_getcapenable(ifp) & IFCAP_POLLING)
854 		ether_poll_deregister(ifp);
855 #endif
856 
857 	if (adapter->led_dev != NULL)
858 		led_destroy(adapter->led_dev);
859 
860 	EM_CORE_LOCK(adapter);
861 	adapter->in_detach = 1;
862 	em_stop(adapter);
863 	EM_CORE_UNLOCK(adapter);
864 	EM_CORE_LOCK_DESTROY(adapter);
865 
866 	e1000_phy_hw_reset(&adapter->hw);
867 
868 	em_release_manageability(adapter);
869 	em_release_hw_control(adapter);
870 
871 	/* Unregister VLAN events */
872 	if (adapter->vlan_attach != NULL)
873 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
874 	if (adapter->vlan_detach != NULL)
875 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
876 
877 	ether_ifdetach(adapter->ifp);
878 	callout_drain(&adapter->timer);
879 
880 #ifdef DEV_NETMAP
881 	netmap_detach(ifp);
882 #endif /* DEV_NETMAP */
883 
884 	em_free_pci_resources(adapter);
885 	bus_generic_detach(dev);
886 	if_free(ifp);
887 
888 	em_free_transmit_structures(adapter);
889 	em_free_receive_structures(adapter);
890 
891 	em_release_hw_control(adapter);
892 	free(adapter->mta, M_DEVBUF);
893 
894 	return (0);
895 }
896 
897 /*********************************************************************
898  *
899  *  Shutdown entry point
900  *
901  **********************************************************************/
902 
903 static int
904 em_shutdown(device_t dev)
905 {
906 	return em_suspend(dev);
907 }
908 
909 /*
910  * Suspend/resume device methods.
911  */
912 static int
913 em_suspend(device_t dev)
914 {
915 	struct adapter *adapter = device_get_softc(dev);
916 
917 	EM_CORE_LOCK(adapter);
918 
919         em_release_manageability(adapter);
920 	em_release_hw_control(adapter);
921 	em_enable_wakeup(dev);
922 
923 	EM_CORE_UNLOCK(adapter);
924 
925 	return bus_generic_suspend(dev);
926 }
927 
928 static int
929 em_resume(device_t dev)
930 {
931 	struct adapter *adapter = device_get_softc(dev);
932 	struct tx_ring	*txr = adapter->tx_rings;
933 	if_t ifp = adapter->ifp;
934 
935 	EM_CORE_LOCK(adapter);
936 	if (adapter->hw.mac.type == e1000_pch2lan)
937 		e1000_resume_workarounds_pchlan(&adapter->hw);
938 	em_init_locked(adapter);
939 	em_init_manageability(adapter);
940 
941 	if ((if_getflags(ifp) & IFF_UP) &&
942 	    (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) {
943 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
944 			EM_TX_LOCK(txr);
945 #ifdef EM_MULTIQUEUE
946 			if (!drbr_empty(ifp, txr->br))
947 				em_mq_start_locked(ifp, txr);
948 #else
949 			if (!if_sendq_empty(ifp))
950 				em_start_locked(ifp, txr);
951 #endif
952 			EM_TX_UNLOCK(txr);
953 		}
954 	}
955 	EM_CORE_UNLOCK(adapter);
956 
957 	return bus_generic_resume(dev);
958 }
959 
960 
961 #ifndef EM_MULTIQUEUE
962 static void
963 em_start_locked(if_t ifp, struct tx_ring *txr)
964 {
965 	struct adapter	*adapter = if_getsoftc(ifp);
966 	struct mbuf	*m_head;
967 
968 	EM_TX_LOCK_ASSERT(txr);
969 
970 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
971 	    IFF_DRV_RUNNING)
972 		return;
973 
974 	if (!adapter->link_active)
975 		return;
976 
977 	while (!if_sendq_empty(ifp)) {
978         	/* Call cleanup if number of TX descriptors low */
979 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
980 			em_txeof(txr);
981 		if (txr->tx_avail < EM_MAX_SCATTER) {
982 			if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0);
983 			break;
984 		}
985 		m_head = if_dequeue(ifp);
986 		if (m_head == NULL)
987 			break;
988 		/*
989 		 *  Encapsulation can modify our pointer, and or make it
990 		 *  NULL on failure.  In that event, we can't requeue.
991 		 */
992 		if (em_xmit(txr, &m_head)) {
993 			if (m_head == NULL)
994 				break;
995 			if_sendq_prepend(ifp, m_head);
996 			break;
997 		}
998 
999 		/* Mark the queue as having work */
1000 		if (txr->busy == EM_TX_IDLE)
1001 			txr->busy = EM_TX_BUSY;
1002 
1003 		/* Send a copy of the frame to the BPF listener */
1004 		ETHER_BPF_MTAP(ifp, m_head);
1005 
1006 	}
1007 
1008 	return;
1009 }
1010 
1011 static void
1012 em_start(if_t ifp)
1013 {
1014 	struct adapter	*adapter = if_getsoftc(ifp);
1015 	struct tx_ring	*txr = adapter->tx_rings;
1016 
1017 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1018 		EM_TX_LOCK(txr);
1019 		em_start_locked(ifp, txr);
1020 		EM_TX_UNLOCK(txr);
1021 	}
1022 	return;
1023 }
1024 #else /* EM_MULTIQUEUE */
1025 /*********************************************************************
1026  *  Multiqueue Transmit routines
1027  *
1028  *  em_mq_start is called by the stack to initiate a transmit.
1029  *  however, if busy the driver can queue the request rather
1030  *  than do an immediate send. It is this that is an advantage
1031  *  in this driver, rather than also having multiple tx queues.
1032  **********************************************************************/
1033 /*
1034 ** Multiqueue capable stack interface
1035 */
1036 static int
1037 em_mq_start(if_t ifp, struct mbuf *m)
1038 {
1039 	struct adapter	*adapter = if_getsoftc(ifp);
1040 	struct tx_ring	*txr = adapter->tx_rings;
1041 	unsigned int	i, error;
1042 
1043 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1044 		i = m->m_pkthdr.flowid % adapter->num_queues;
1045 	else
1046 		i = curcpu % adapter->num_queues;
1047 
1048 	txr = &adapter->tx_rings[i];
1049 
1050 	error = drbr_enqueue(ifp, txr->br, m);
1051 	if (error)
1052 		return (error);
1053 
1054 	if (EM_TX_TRYLOCK(txr)) {
1055 		em_mq_start_locked(ifp, txr);
1056 		EM_TX_UNLOCK(txr);
1057 	} else
1058 		taskqueue_enqueue(txr->tq, &txr->tx_task);
1059 
1060 	return (0);
1061 }
1062 
1063 static int
1064 em_mq_start_locked(if_t ifp, struct tx_ring *txr)
1065 {
1066 	struct adapter  *adapter = txr->adapter;
1067         struct mbuf     *next;
1068         int             err = 0, enq = 0;
1069 
1070 	EM_TX_LOCK_ASSERT(txr);
1071 
1072 	if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) ||
1073 	    adapter->link_active == 0) {
1074 		return (ENETDOWN);
1075 	}
1076 
1077 	/* Process the queue */
1078 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1079 		if ((err = em_xmit(txr, &next)) != 0) {
1080 			if (next == NULL) {
1081 				/* It was freed, move forward */
1082 				drbr_advance(ifp, txr->br);
1083 			} else {
1084 				/*
1085 				 * Still have one left, it may not be
1086 				 * the same since the transmit function
1087 				 * may have changed it.
1088 				 */
1089 				drbr_putback(ifp, txr->br, next);
1090 			}
1091 			break;
1092 		}
1093 		drbr_advance(ifp, txr->br);
1094 		enq++;
1095 		if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
1096 		if (next->m_flags & M_MCAST)
1097 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
1098 		ETHER_BPF_MTAP(ifp, next);
1099 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
1100                         break;
1101 	}
1102 
1103 	/* Mark the queue as having work */
1104 	if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1105 		txr->busy = EM_TX_BUSY;
1106 
1107 	if (txr->tx_avail < EM_MAX_SCATTER)
1108 		em_txeof(txr);
1109 	if (txr->tx_avail < EM_MAX_SCATTER) {
1110 		if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
1111 	}
1112 	return (err);
1113 }
1114 
1115 /*
1116 ** Flush all ring buffers
1117 */
1118 static void
1119 em_qflush(if_t ifp)
1120 {
1121 	struct adapter  *adapter = if_getsoftc(ifp);
1122 	struct tx_ring  *txr = adapter->tx_rings;
1123 	struct mbuf     *m;
1124 
1125 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1126 		EM_TX_LOCK(txr);
1127 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1128 			m_freem(m);
1129 		EM_TX_UNLOCK(txr);
1130 	}
1131 	if_qflush(ifp);
1132 }
1133 #endif /* EM_MULTIQUEUE */
1134 
1135 /*********************************************************************
1136  *  Ioctl entry point
1137  *
1138  *  em_ioctl is called when the user wants to configure the
1139  *  interface.
1140  *
1141  *  return 0 on success, positive on failure
1142  **********************************************************************/
1143 
1144 static int
1145 em_ioctl(if_t ifp, u_long command, caddr_t data)
1146 {
1147 	struct adapter	*adapter = if_getsoftc(ifp);
1148 	struct ifreq	*ifr = (struct ifreq *)data;
1149 #if defined(INET) || defined(INET6)
1150 	struct ifaddr	*ifa = (struct ifaddr *)data;
1151 #endif
1152 	bool		avoid_reset = FALSE;
1153 	int		error = 0;
1154 
1155 	if (adapter->in_detach)
1156 		return (error);
1157 
1158 	switch (command) {
1159 	case SIOCSIFADDR:
1160 #ifdef INET
1161 		if (ifa->ifa_addr->sa_family == AF_INET)
1162 			avoid_reset = TRUE;
1163 #endif
1164 #ifdef INET6
1165 		if (ifa->ifa_addr->sa_family == AF_INET6)
1166 			avoid_reset = TRUE;
1167 #endif
1168 		/*
1169 		** Calling init results in link renegotiation,
1170 		** so we avoid doing it when possible.
1171 		*/
1172 		if (avoid_reset) {
1173 			if_setflagbits(ifp,IFF_UP,0);
1174 			if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
1175 				em_init(adapter);
1176 #ifdef INET
1177 			if (!(if_getflags(ifp) & IFF_NOARP))
1178 				arp_ifinit(ifp, ifa);
1179 #endif
1180 		} else
1181 			error = ether_ioctl(ifp, command, data);
1182 		break;
1183 	case SIOCSIFMTU:
1184 	    {
1185 		int max_frame_size;
1186 
1187 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1188 
1189 		EM_CORE_LOCK(adapter);
1190 		switch (adapter->hw.mac.type) {
1191 		case e1000_82571:
1192 		case e1000_82572:
1193 		case e1000_ich9lan:
1194 		case e1000_ich10lan:
1195 		case e1000_pch2lan:
1196 		case e1000_pch_lpt:
1197 		case e1000_pch_spt:
1198 		case e1000_82574:
1199 		case e1000_82583:
1200 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1201 			max_frame_size = 9234;
1202 			break;
1203 		case e1000_pchlan:
1204 			max_frame_size = 4096;
1205 			break;
1206 			/* Adapters that do not support jumbo frames */
1207 		case e1000_ich8lan:
1208 			max_frame_size = ETHER_MAX_LEN;
1209 			break;
1210 		default:
1211 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1212 		}
1213 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1214 		    ETHER_CRC_LEN) {
1215 			EM_CORE_UNLOCK(adapter);
1216 			error = EINVAL;
1217 			break;
1218 		}
1219 
1220 		if_setmtu(ifp, ifr->ifr_mtu);
1221 		adapter->hw.mac.max_frame_size =
1222 		    if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
1223 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1224 			em_init_locked(adapter);
1225 		EM_CORE_UNLOCK(adapter);
1226 		break;
1227 	    }
1228 	case SIOCSIFFLAGS:
1229 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1230 		    SIOCSIFFLAGS (Set Interface Flags)");
1231 		EM_CORE_LOCK(adapter);
1232 		if (if_getflags(ifp) & IFF_UP) {
1233 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1234 				if ((if_getflags(ifp) ^ adapter->if_flags) &
1235 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1236 					em_disable_promisc(adapter);
1237 					em_set_promisc(adapter);
1238 				}
1239 			} else
1240 				em_init_locked(adapter);
1241 		} else
1242 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1243 				em_stop(adapter);
1244 		adapter->if_flags = if_getflags(ifp);
1245 		EM_CORE_UNLOCK(adapter);
1246 		break;
1247 	case SIOCADDMULTI:
1248 	case SIOCDELMULTI:
1249 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1250 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1251 			EM_CORE_LOCK(adapter);
1252 			em_disable_intr(adapter);
1253 			em_set_multi(adapter);
1254 #ifdef DEVICE_POLLING
1255 			if (!(if_getcapenable(ifp) & IFCAP_POLLING))
1256 #endif
1257 				em_enable_intr(adapter);
1258 			EM_CORE_UNLOCK(adapter);
1259 		}
1260 		break;
1261 	case SIOCSIFMEDIA:
1262 		/* Check SOL/IDER usage */
1263 		EM_CORE_LOCK(adapter);
1264 		if (e1000_check_reset_block(&adapter->hw)) {
1265 			EM_CORE_UNLOCK(adapter);
1266 			device_printf(adapter->dev, "Media change is"
1267 			    " blocked due to SOL/IDER session.\n");
1268 			break;
1269 		}
1270 		EM_CORE_UNLOCK(adapter);
1271 		/* falls thru */
1272 	case SIOCGIFMEDIA:
1273 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1274 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1275 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1276 		break;
1277 	case SIOCSIFCAP:
1278 	    {
1279 		int mask, reinit;
1280 
1281 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1282 		reinit = 0;
1283 		mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
1284 #ifdef DEVICE_POLLING
1285 		if (mask & IFCAP_POLLING) {
1286 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1287 				error = ether_poll_register(em_poll, ifp);
1288 				if (error)
1289 					return (error);
1290 				EM_CORE_LOCK(adapter);
1291 				em_disable_intr(adapter);
1292 				if_setcapenablebit(ifp, IFCAP_POLLING, 0);
1293 				EM_CORE_UNLOCK(adapter);
1294 			} else {
1295 				error = ether_poll_deregister(ifp);
1296 				/* Enable interrupt even in error case */
1297 				EM_CORE_LOCK(adapter);
1298 				em_enable_intr(adapter);
1299 				if_setcapenablebit(ifp, 0, IFCAP_POLLING);
1300 				EM_CORE_UNLOCK(adapter);
1301 			}
1302 		}
1303 #endif
1304 		if (mask & IFCAP_HWCSUM) {
1305 			if_togglecapenable(ifp,IFCAP_HWCSUM);
1306 			reinit = 1;
1307 		}
1308 		if (mask & IFCAP_TSO4) {
1309 			if_togglecapenable(ifp,IFCAP_TSO4);
1310 			reinit = 1;
1311 		}
1312 		if (mask & IFCAP_VLAN_HWTAGGING) {
1313 			if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING);
1314 			reinit = 1;
1315 		}
1316 		if (mask & IFCAP_VLAN_HWFILTER) {
1317 			if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1318 			reinit = 1;
1319 		}
1320 		if (mask & IFCAP_VLAN_HWTSO) {
1321 			if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1322 			reinit = 1;
1323 		}
1324 		if ((mask & IFCAP_WOL) &&
1325 		    (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
1326 			if (mask & IFCAP_WOL_MCAST)
1327 				if_togglecapenable(ifp, IFCAP_WOL_MCAST);
1328 			if (mask & IFCAP_WOL_MAGIC)
1329 				if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
1330 		}
1331 		if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1332 			em_init(adapter);
1333 		if_vlancap(ifp);
1334 		break;
1335 	    }
1336 
1337 	default:
1338 		error = ether_ioctl(ifp, command, data);
1339 		break;
1340 	}
1341 
1342 	return (error);
1343 }
1344 
1345 
1346 /*********************************************************************
1347  *  Init entry point
1348  *
1349  *  This routine is used in two ways. It is used by the stack as
1350  *  init entry point in network interface structure. It is also used
1351  *  by the driver as a hw/sw initialization routine to get to a
1352  *  consistent state.
1353  *
1354  *  return 0 on success, positive on failure
1355  **********************************************************************/
1356 
1357 static void
1358 em_init_locked(struct adapter *adapter)
1359 {
1360 	if_t ifp = adapter->ifp;
1361 	device_t	dev = adapter->dev;
1362 
1363 	INIT_DEBUGOUT("em_init: begin");
1364 
1365 	EM_CORE_LOCK_ASSERT(adapter);
1366 
1367 	em_disable_intr(adapter);
1368 	callout_stop(&adapter->timer);
1369 
1370 	/* Get the latest mac address, User can use a LAA */
1371         bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
1372               ETHER_ADDR_LEN);
1373 
1374 	/* Put the address into the Receive Address Array */
1375 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1376 
1377 	/*
1378 	 * With the 82571 adapter, RAR[0] may be overwritten
1379 	 * when the other port is reset, we make a duplicate
1380 	 * in RAR[14] for that eventuality, this assures
1381 	 * the interface continues to function.
1382 	 */
1383 	if (adapter->hw.mac.type == e1000_82571) {
1384 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1385 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1386 		    E1000_RAR_ENTRIES - 1);
1387 	}
1388 
1389 	/* Initialize the hardware */
1390 	em_reset(adapter);
1391 	em_update_link_status(adapter);
1392 
1393 	/* Setup VLAN support, basic and offload if available */
1394 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1395 
1396 	/* Set hardware offload abilities */
1397 	if_clearhwassist(ifp);
1398 	if (if_getcapenable(ifp) & IFCAP_TXCSUM)
1399 		if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
1400 	/*
1401 	** There have proven to be problems with TSO when not
1402 	** at full gigabit speed, so disable the assist automatically
1403 	** when at lower speeds.  -jfv
1404 	*/
1405 	if (if_getcapenable(ifp) & IFCAP_TSO4) {
1406 		if (adapter->link_speed == SPEED_1000)
1407 			if_sethwassistbits(ifp, CSUM_TSO, 0);
1408 	}
1409 
1410 	/* Configure for OS presence */
1411 	em_init_manageability(adapter);
1412 
1413 	/* Prepare transmit descriptors and buffers */
1414 	em_setup_transmit_structures(adapter);
1415 	em_initialize_transmit_unit(adapter);
1416 
1417 	/* Setup Multicast table */
1418 	em_set_multi(adapter);
1419 
1420 	/*
1421 	** Figure out the desired mbuf
1422 	** pool for doing jumbos
1423 	*/
1424 	if (adapter->hw.mac.max_frame_size <= 2048)
1425 		adapter->rx_mbuf_sz = MCLBYTES;
1426 	else if (adapter->hw.mac.max_frame_size <= 4096)
1427 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1428 	else
1429 		adapter->rx_mbuf_sz = MJUM9BYTES;
1430 
1431 	/* Prepare receive descriptors and buffers */
1432 	if (em_setup_receive_structures(adapter)) {
1433 		device_printf(dev, "Could not setup receive structures\n");
1434 		em_stop(adapter);
1435 		return;
1436 	}
1437 	em_initialize_receive_unit(adapter);
1438 
1439 	/* Use real VLAN Filter support? */
1440 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
1441 		if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1442 			/* Use real VLAN Filter support */
1443 			em_setup_vlan_hw_support(adapter);
1444 		else {
1445 			u32 ctrl;
1446 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1447 			ctrl |= E1000_CTRL_VME;
1448 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1449 		}
1450 	}
1451 
1452 	/* Don't lose promiscuous settings */
1453 	em_set_promisc(adapter);
1454 
1455 	/* Set the interface as ACTIVE */
1456 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
1457 
1458 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1459 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1460 
1461 	/* MSI/X configuration for 82574 */
1462 	if (adapter->hw.mac.type == e1000_82574) {
1463 		int tmp;
1464 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1465 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1466 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1467 		/* Set the IVAR - interrupt vector routing. */
1468 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1469 	}
1470 
1471 #ifdef DEVICE_POLLING
1472 	/*
1473 	 * Only enable interrupts if we are not polling, make sure
1474 	 * they are off otherwise.
1475 	 */
1476 	if (if_getcapenable(ifp) & IFCAP_POLLING)
1477 		em_disable_intr(adapter);
1478 	else
1479 #endif /* DEVICE_POLLING */
1480 		em_enable_intr(adapter);
1481 
1482 	/* AMT based hardware can now take control from firmware */
1483 	if (adapter->has_manage && adapter->has_amt)
1484 		em_get_hw_control(adapter);
1485 }
1486 
1487 static void
1488 em_init(void *arg)
1489 {
1490 	struct adapter *adapter = arg;
1491 
1492 	EM_CORE_LOCK(adapter);
1493 	em_init_locked(adapter);
1494 	EM_CORE_UNLOCK(adapter);
1495 }
1496 
1497 
1498 #ifdef DEVICE_POLLING
1499 /*********************************************************************
1500  *
1501  *  Legacy polling routine: note this only works with single queue
1502  *
1503  *********************************************************************/
1504 static int
1505 em_poll(if_t ifp, enum poll_cmd cmd, int count)
1506 {
1507 	struct adapter *adapter = if_getsoftc(ifp);
1508 	struct tx_ring	*txr = adapter->tx_rings;
1509 	struct rx_ring	*rxr = adapter->rx_rings;
1510 	u32		reg_icr;
1511 	int		rx_done;
1512 
1513 	EM_CORE_LOCK(adapter);
1514 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
1515 		EM_CORE_UNLOCK(adapter);
1516 		return (0);
1517 	}
1518 
1519 	if (cmd == POLL_AND_CHECK_STATUS) {
1520 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1521 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1522 			callout_stop(&adapter->timer);
1523 			adapter->hw.mac.get_link_status = 1;
1524 			em_update_link_status(adapter);
1525 			callout_reset(&adapter->timer, hz,
1526 			    em_local_timer, adapter);
1527 		}
1528 	}
1529 	EM_CORE_UNLOCK(adapter);
1530 
1531 	em_rxeof(rxr, count, &rx_done);
1532 
1533 	EM_TX_LOCK(txr);
1534 	em_txeof(txr);
1535 #ifdef EM_MULTIQUEUE
1536 	if (!drbr_empty(ifp, txr->br))
1537 		em_mq_start_locked(ifp, txr);
1538 #else
1539 	if (!if_sendq_empty(ifp))
1540 		em_start_locked(ifp, txr);
1541 #endif
1542 	EM_TX_UNLOCK(txr);
1543 
1544 	return (rx_done);
1545 }
1546 #endif /* DEVICE_POLLING */
1547 
1548 
1549 /*********************************************************************
1550  *
1551  *  Fast Legacy/MSI Combined Interrupt Service routine
1552  *
1553  *********************************************************************/
1554 static int
1555 em_irq_fast(void *arg)
1556 {
1557 	struct adapter	*adapter = arg;
1558 	if_t ifp;
1559 	u32		reg_icr;
1560 
1561 	ifp = adapter->ifp;
1562 
1563 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1564 
1565 	/* Hot eject?  */
1566 	if (reg_icr == 0xffffffff)
1567 		return FILTER_STRAY;
1568 
1569 	/* Definitely not our interrupt.  */
1570 	if (reg_icr == 0x0)
1571 		return FILTER_STRAY;
1572 
1573 	/*
1574 	 * Starting with the 82571 chip, bit 31 should be used to
1575 	 * determine whether the interrupt belongs to us.
1576 	 */
1577 	if (adapter->hw.mac.type >= e1000_82571 &&
1578 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1579 		return FILTER_STRAY;
1580 
1581 	em_disable_intr(adapter);
1582 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1583 
1584 	/* Link status change */
1585 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1586 		adapter->hw.mac.get_link_status = 1;
1587 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1588 	}
1589 
1590 	if (reg_icr & E1000_ICR_RXO)
1591 		adapter->rx_overruns++;
1592 	return FILTER_HANDLED;
1593 }
1594 
1595 /* Combined RX/TX handler, used by Legacy and MSI */
1596 static void
1597 em_handle_que(void *context, int pending)
1598 {
1599 	struct adapter	*adapter = context;
1600 	if_t ifp = adapter->ifp;
1601 	struct tx_ring	*txr = adapter->tx_rings;
1602 	struct rx_ring	*rxr = adapter->rx_rings;
1603 
1604 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1605 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1606 
1607 		EM_TX_LOCK(txr);
1608 		em_txeof(txr);
1609 #ifdef EM_MULTIQUEUE
1610 		if (!drbr_empty(ifp, txr->br))
1611 			em_mq_start_locked(ifp, txr);
1612 #else
1613 		if (!if_sendq_empty(ifp))
1614 			em_start_locked(ifp, txr);
1615 #endif
1616 		EM_TX_UNLOCK(txr);
1617 		if (more) {
1618 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1619 			return;
1620 		}
1621 	}
1622 
1623 	em_enable_intr(adapter);
1624 	return;
1625 }
1626 
1627 
1628 /*********************************************************************
1629  *
1630  *  MSIX Interrupt Service Routines
1631  *
1632  **********************************************************************/
1633 static void
1634 em_msix_tx(void *arg)
1635 {
1636 	struct tx_ring *txr = arg;
1637 	struct adapter *adapter = txr->adapter;
1638 	if_t ifp = adapter->ifp;
1639 
1640 	++txr->tx_irq;
1641 	EM_TX_LOCK(txr);
1642 	em_txeof(txr);
1643 #ifdef EM_MULTIQUEUE
1644 	if (!drbr_empty(ifp, txr->br))
1645 		em_mq_start_locked(ifp, txr);
1646 #else
1647 	if (!if_sendq_empty(ifp))
1648 		em_start_locked(ifp, txr);
1649 #endif
1650 
1651 	/* Reenable this interrupt */
1652 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1653 	EM_TX_UNLOCK(txr);
1654 	return;
1655 }
1656 
1657 /*********************************************************************
1658  *
1659  *  MSIX RX Interrupt Service routine
1660  *
1661  **********************************************************************/
1662 
1663 static void
1664 em_msix_rx(void *arg)
1665 {
1666 	struct rx_ring	*rxr = arg;
1667 	struct adapter	*adapter = rxr->adapter;
1668 	bool		more;
1669 
1670 	++rxr->rx_irq;
1671 	if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING))
1672 		return;
1673 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1674 	if (more)
1675 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1676 	else {
1677 		/* Reenable this interrupt */
1678 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1679 	}
1680 	return;
1681 }
1682 
1683 /*********************************************************************
1684  *
1685  *  MSIX Link Fast Interrupt Service routine
1686  *
1687  **********************************************************************/
1688 static void
1689 em_msix_link(void *arg)
1690 {
1691 	struct adapter	*adapter = arg;
1692 	u32		reg_icr;
1693 
1694 	++adapter->link_irq;
1695 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1696 
1697 	if (reg_icr & E1000_ICR_RXO)
1698 		adapter->rx_overruns++;
1699 
1700 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1701 		adapter->hw.mac.get_link_status = 1;
1702 		em_handle_link(adapter, 0);
1703 	} else
1704 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1705 		    EM_MSIX_LINK | E1000_IMS_LSC);
1706 	/*
1707  	** Because we must read the ICR for this interrupt
1708  	** it may clear other causes using autoclear, for
1709  	** this reason we simply create a soft interrupt
1710  	** for all these vectors.
1711  	*/
1712 	if (reg_icr) {
1713 		E1000_WRITE_REG(&adapter->hw,
1714 			E1000_ICS, adapter->ims);
1715 	}
1716 	return;
1717 }
1718 
1719 static void
1720 em_handle_rx(void *context, int pending)
1721 {
1722 	struct rx_ring	*rxr = context;
1723 	struct adapter	*adapter = rxr->adapter;
1724         bool            more;
1725 
1726 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1727 	if (more)
1728 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1729 	else {
1730 		/* Reenable this interrupt */
1731 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1732 	}
1733 }
1734 
1735 static void
1736 em_handle_tx(void *context, int pending)
1737 {
1738 	struct tx_ring	*txr = context;
1739 	struct adapter	*adapter = txr->adapter;
1740 	if_t ifp = adapter->ifp;
1741 
1742 	EM_TX_LOCK(txr);
1743 	em_txeof(txr);
1744 #ifdef EM_MULTIQUEUE
1745 	if (!drbr_empty(ifp, txr->br))
1746 		em_mq_start_locked(ifp, txr);
1747 #else
1748 	if (!if_sendq_empty(ifp))
1749 		em_start_locked(ifp, txr);
1750 #endif
1751 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1752 	EM_TX_UNLOCK(txr);
1753 }
1754 
1755 static void
1756 em_handle_link(void *context, int pending)
1757 {
1758 	struct adapter	*adapter = context;
1759 	struct tx_ring	*txr = adapter->tx_rings;
1760 	if_t ifp = adapter->ifp;
1761 
1762 	if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1763 		return;
1764 
1765 	EM_CORE_LOCK(adapter);
1766 	callout_stop(&adapter->timer);
1767 	em_update_link_status(adapter);
1768 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1769 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1770 	    EM_MSIX_LINK | E1000_IMS_LSC);
1771 	if (adapter->link_active) {
1772 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1773 			EM_TX_LOCK(txr);
1774 #ifdef EM_MULTIQUEUE
1775 			if (!drbr_empty(ifp, txr->br))
1776 				em_mq_start_locked(ifp, txr);
1777 #else
1778 			if (if_sendq_empty(ifp))
1779 				em_start_locked(ifp, txr);
1780 #endif
1781 			EM_TX_UNLOCK(txr);
1782 		}
1783 	}
1784 	EM_CORE_UNLOCK(adapter);
1785 }
1786 
1787 
1788 /*********************************************************************
1789  *
1790  *  Media Ioctl callback
1791  *
1792  *  This routine is called whenever the user queries the status of
1793  *  the interface using ifconfig.
1794  *
1795  **********************************************************************/
1796 static void
1797 em_media_status(if_t ifp, struct ifmediareq *ifmr)
1798 {
1799 	struct adapter *adapter = if_getsoftc(ifp);
1800 	u_char fiber_type = IFM_1000_SX;
1801 
1802 	INIT_DEBUGOUT("em_media_status: begin");
1803 
1804 	EM_CORE_LOCK(adapter);
1805 	em_update_link_status(adapter);
1806 
1807 	ifmr->ifm_status = IFM_AVALID;
1808 	ifmr->ifm_active = IFM_ETHER;
1809 
1810 	if (!adapter->link_active) {
1811 		EM_CORE_UNLOCK(adapter);
1812 		return;
1813 	}
1814 
1815 	ifmr->ifm_status |= IFM_ACTIVE;
1816 
1817 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1818 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1819 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1820 	} else {
1821 		switch (adapter->link_speed) {
1822 		case 10:
1823 			ifmr->ifm_active |= IFM_10_T;
1824 			break;
1825 		case 100:
1826 			ifmr->ifm_active |= IFM_100_TX;
1827 			break;
1828 		case 1000:
1829 			ifmr->ifm_active |= IFM_1000_T;
1830 			break;
1831 		}
1832 		if (adapter->link_duplex == FULL_DUPLEX)
1833 			ifmr->ifm_active |= IFM_FDX;
1834 		else
1835 			ifmr->ifm_active |= IFM_HDX;
1836 	}
1837 	EM_CORE_UNLOCK(adapter);
1838 }
1839 
1840 /*********************************************************************
1841  *
1842  *  Media Ioctl callback
1843  *
1844  *  This routine is called when the user changes speed/duplex using
1845  *  media/mediopt option with ifconfig.
1846  *
1847  **********************************************************************/
1848 static int
1849 em_media_change(if_t ifp)
1850 {
1851 	struct adapter *adapter = if_getsoftc(ifp);
1852 	struct ifmedia  *ifm = &adapter->media;
1853 
1854 	INIT_DEBUGOUT("em_media_change: begin");
1855 
1856 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1857 		return (EINVAL);
1858 
1859 	EM_CORE_LOCK(adapter);
1860 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1861 	case IFM_AUTO:
1862 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1863 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1864 		break;
1865 	case IFM_1000_LX:
1866 	case IFM_1000_SX:
1867 	case IFM_1000_T:
1868 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1869 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1870 		break;
1871 	case IFM_100_TX:
1872 		adapter->hw.mac.autoneg = FALSE;
1873 		adapter->hw.phy.autoneg_advertised = 0;
1874 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1875 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1876 		else
1877 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1878 		break;
1879 	case IFM_10_T:
1880 		adapter->hw.mac.autoneg = FALSE;
1881 		adapter->hw.phy.autoneg_advertised = 0;
1882 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1883 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1884 		else
1885 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1886 		break;
1887 	default:
1888 		device_printf(adapter->dev, "Unsupported media type\n");
1889 	}
1890 
1891 	em_init_locked(adapter);
1892 	EM_CORE_UNLOCK(adapter);
1893 
1894 	return (0);
1895 }
1896 
1897 /*********************************************************************
1898  *
1899  *  This routine maps the mbufs to tx descriptors.
1900  *
1901  *  return 0 on success, positive on failure
1902  **********************************************************************/
1903 
1904 static int
1905 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1906 {
1907 	struct adapter		*adapter = txr->adapter;
1908 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1909 	bus_dmamap_t		map;
1910 	struct em_txbuffer	*tx_buffer, *tx_buffer_mapped;
1911 	struct e1000_tx_desc	*ctxd = NULL;
1912 	struct mbuf		*m_head;
1913 	struct ether_header	*eh;
1914 	struct ip		*ip = NULL;
1915 	struct tcphdr		*tp = NULL;
1916 	u32			txd_upper = 0, txd_lower = 0;
1917 	int			ip_off, poff;
1918 	int			nsegs, i, j, first, last = 0;
1919 	int			error;
1920 	bool			do_tso, tso_desc, remap = TRUE;
1921 
1922 	m_head = *m_headp;
1923 	do_tso = (m_head->m_pkthdr.csum_flags & CSUM_TSO);
1924 	tso_desc = FALSE;
1925 	ip_off = poff = 0;
1926 
1927 	/*
1928 	 * Intel recommends entire IP/TCP header length reside in a single
1929 	 * buffer. If multiple descriptors are used to describe the IP and
1930 	 * TCP header, each descriptor should describe one or more
1931 	 * complete headers; descriptors referencing only parts of headers
1932 	 * are not supported. If all layer headers are not coalesced into
1933 	 * a single buffer, each buffer should not cross a 4KB boundary,
1934 	 * or be larger than the maximum read request size.
1935 	 * Controller also requires modifing IP/TCP header to make TSO work
1936 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1937 	 * IP/TCP header into a single buffer to meet the requirement of
1938 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1939 	 * which also has similar restrictions.
1940 	 */
1941 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1942 		if (do_tso || (m_head->m_next != NULL &&
1943 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1944 			if (M_WRITABLE(*m_headp) == 0) {
1945 				m_head = m_dup(*m_headp, M_NOWAIT);
1946 				m_freem(*m_headp);
1947 				if (m_head == NULL) {
1948 					*m_headp = NULL;
1949 					return (ENOBUFS);
1950 				}
1951 				*m_headp = m_head;
1952 			}
1953 		}
1954 		/*
1955 		 * XXX
1956 		 * Assume IPv4, we don't have TSO/checksum offload support
1957 		 * for IPv6 yet.
1958 		 */
1959 		ip_off = sizeof(struct ether_header);
1960 		if (m_head->m_len < ip_off) {
1961 			m_head = m_pullup(m_head, ip_off);
1962 			if (m_head == NULL) {
1963 				*m_headp = NULL;
1964 				return (ENOBUFS);
1965 			}
1966 		}
1967 		eh = mtod(m_head, struct ether_header *);
1968 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1969 			ip_off = sizeof(struct ether_vlan_header);
1970 			if (m_head->m_len < ip_off) {
1971 				m_head = m_pullup(m_head, ip_off);
1972 				if (m_head == NULL) {
1973 					*m_headp = NULL;
1974 					return (ENOBUFS);
1975 				}
1976 			}
1977 		}
1978 		if (m_head->m_len < ip_off + sizeof(struct ip)) {
1979 			m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1980 			if (m_head == NULL) {
1981 				*m_headp = NULL;
1982 				return (ENOBUFS);
1983 			}
1984 		}
1985 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1986 		poff = ip_off + (ip->ip_hl << 2);
1987 
1988 		if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
1989 			if (m_head->m_len < poff + sizeof(struct tcphdr)) {
1990 				m_head = m_pullup(m_head, poff +
1991 				    sizeof(struct tcphdr));
1992 				if (m_head == NULL) {
1993 					*m_headp = NULL;
1994 					return (ENOBUFS);
1995 				}
1996 			}
1997 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1998 			/*
1999 			 * TSO workaround:
2000 			 *   pull 4 more bytes of data into it.
2001 			 */
2002 			if (m_head->m_len < poff + (tp->th_off << 2)) {
2003 				m_head = m_pullup(m_head, poff +
2004 				                 (tp->th_off << 2) +
2005 				                 TSO_WORKAROUND);
2006 				if (m_head == NULL) {
2007 					*m_headp = NULL;
2008 					return (ENOBUFS);
2009 				}
2010 			}
2011 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2012 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2013 			if (do_tso) {
2014 				ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
2015 				                  (ip->ip_hl << 2) +
2016 				                  (tp->th_off << 2));
2017 				ip->ip_sum = 0;
2018 				/*
2019 				 * The pseudo TCP checksum does not include TCP
2020 				 * payload length so driver should recompute
2021 				 * the checksum here what hardware expect to
2022 				 * see. This is adherence of Microsoft's Large
2023 				 * Send specification.
2024 			 	*/
2025 				tp->th_sum = in_pseudo(ip->ip_src.s_addr,
2026 				    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2027 			}
2028 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
2029 			if (m_head->m_len < poff + sizeof(struct udphdr)) {
2030 				m_head = m_pullup(m_head, poff +
2031 				    sizeof(struct udphdr));
2032 				if (m_head == NULL) {
2033 					*m_headp = NULL;
2034 					return (ENOBUFS);
2035 				}
2036 			}
2037 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2038 		}
2039 		*m_headp = m_head;
2040 	}
2041 
2042 	/*
2043 	 * Map the packet for DMA
2044 	 *
2045 	 * Capture the first descriptor index,
2046 	 * this descriptor will have the index
2047 	 * of the EOP which is the only one that
2048 	 * now gets a DONE bit writeback.
2049 	 */
2050 	first = txr->next_avail_desc;
2051 	tx_buffer = &txr->tx_buffers[first];
2052 	tx_buffer_mapped = tx_buffer;
2053 	map = tx_buffer->map;
2054 
2055 retry:
2056 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2057 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2058 
2059 	/*
2060 	 * There are two types of errors we can (try) to handle:
2061 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
2062 	 *   out of segments.  Defragment the mbuf chain and try again.
2063 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
2064 	 *   at this point in time.  Defer sending and try again later.
2065 	 * All other errors, in particular EINVAL, are fatal and prevent the
2066 	 * mbuf chain from ever going through.  Drop it and report error.
2067 	 */
2068 	if (error == EFBIG && remap) {
2069 		struct mbuf *m;
2070 
2071 		m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
2072 		if (m == NULL) {
2073 			adapter->mbuf_defrag_failed++;
2074 			m_freem(*m_headp);
2075 			*m_headp = NULL;
2076 			return (ENOBUFS);
2077 		}
2078 		*m_headp = m;
2079 
2080 		/* Try it again, but only once */
2081 		remap = FALSE;
2082 		goto retry;
2083 	} else if (error != 0) {
2084 		adapter->no_tx_dma_setup++;
2085 		m_freem(*m_headp);
2086 		*m_headp = NULL;
2087 		return (error);
2088 	}
2089 
2090 	/*
2091 	 * TSO Hardware workaround, if this packet is not
2092 	 * TSO, and is only a single descriptor long, and
2093 	 * it follows a TSO burst, then we need to add a
2094 	 * sentinel descriptor to prevent premature writeback.
2095 	 */
2096 	if ((!do_tso) && (txr->tx_tso == TRUE)) {
2097 		if (nsegs == 1)
2098 			tso_desc = TRUE;
2099 		txr->tx_tso = FALSE;
2100 	}
2101 
2102         if (txr->tx_avail < (nsegs + EM_MAX_SCATTER)) {
2103                 txr->no_desc_avail++;
2104 		bus_dmamap_unload(txr->txtag, map);
2105 		return (ENOBUFS);
2106         }
2107 	m_head = *m_headp;
2108 
2109 	/* Do hardware assists */
2110 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2111 		em_tso_setup(txr, m_head, ip_off, ip, tp,
2112 		    &txd_upper, &txd_lower);
2113 		/* we need to make a final sentinel transmit desc */
2114 		tso_desc = TRUE;
2115 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2116 		em_transmit_checksum_setup(txr, m_head,
2117 		    ip_off, ip, &txd_upper, &txd_lower);
2118 
2119 	if (m_head->m_flags & M_VLANTAG) {
2120 		/* Set the vlan id. */
2121 		txd_upper |= htole16(if_getvtag(m_head)) << 16;
2122                 /* Tell hardware to add tag */
2123                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2124         }
2125 
2126 	i = txr->next_avail_desc;
2127 
2128 	/* Set up our transmit descriptors */
2129 	for (j = 0; j < nsegs; j++) {
2130 		bus_size_t seg_len;
2131 		bus_addr_t seg_addr;
2132 
2133 		tx_buffer = &txr->tx_buffers[i];
2134 		ctxd = &txr->tx_base[i];
2135 		seg_addr = segs[j].ds_addr;
2136 		seg_len  = segs[j].ds_len;
2137 		/*
2138 		** TSO Workaround:
2139 		** If this is the last descriptor, we want to
2140 		** split it so we have a small final sentinel
2141 		*/
2142 		if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
2143 			seg_len -= TSO_WORKAROUND;
2144 			ctxd->buffer_addr = htole64(seg_addr);
2145 			ctxd->lower.data = htole32(
2146 				adapter->txd_cmd | txd_lower | seg_len);
2147 			ctxd->upper.data = htole32(txd_upper);
2148 			if (++i == adapter->num_tx_desc)
2149 				i = 0;
2150 
2151 			/* Now make the sentinel */
2152 			txr->tx_avail--;
2153 			ctxd = &txr->tx_base[i];
2154 			tx_buffer = &txr->tx_buffers[i];
2155 			ctxd->buffer_addr =
2156 			    htole64(seg_addr + seg_len);
2157 			ctxd->lower.data = htole32(
2158 			adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
2159 			ctxd->upper.data =
2160 			    htole32(txd_upper);
2161 			last = i;
2162 			if (++i == adapter->num_tx_desc)
2163 				i = 0;
2164 		} else {
2165 			ctxd->buffer_addr = htole64(seg_addr);
2166 			ctxd->lower.data = htole32(
2167 			adapter->txd_cmd | txd_lower | seg_len);
2168 			ctxd->upper.data = htole32(txd_upper);
2169 			last = i;
2170 			if (++i == adapter->num_tx_desc)
2171 				i = 0;
2172 		}
2173 		tx_buffer->m_head = NULL;
2174 		tx_buffer->next_eop = -1;
2175 	}
2176 
2177 	txr->next_avail_desc = i;
2178 	txr->tx_avail -= nsegs;
2179 
2180         tx_buffer->m_head = m_head;
2181 	/*
2182 	** Here we swap the map so the last descriptor,
2183 	** which gets the completion interrupt has the
2184 	** real map, and the first descriptor gets the
2185 	** unused map from this descriptor.
2186 	*/
2187 	tx_buffer_mapped->map = tx_buffer->map;
2188 	tx_buffer->map = map;
2189         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2190 
2191         /*
2192          * Last Descriptor of Packet
2193 	 * needs End Of Packet (EOP)
2194 	 * and Report Status (RS)
2195          */
2196         ctxd->lower.data |=
2197 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2198 	/*
2199 	 * Keep track in the first buffer which
2200 	 * descriptor will be written back
2201 	 */
2202 	tx_buffer = &txr->tx_buffers[first];
2203 	tx_buffer->next_eop = last;
2204 
2205 	/*
2206 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2207 	 * that this frame is available to transmit.
2208 	 */
2209 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2210 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2211 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2212 
2213 	return (0);
2214 }
2215 
2216 static void
2217 em_set_promisc(struct adapter *adapter)
2218 {
2219 	if_t ifp = adapter->ifp;
2220 	u32		reg_rctl;
2221 
2222 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2223 
2224 	if (if_getflags(ifp) & IFF_PROMISC) {
2225 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2226 		/* Turn this on if you want to see bad packets */
2227 		if (em_debug_sbp)
2228 			reg_rctl |= E1000_RCTL_SBP;
2229 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2230 	} else if (if_getflags(ifp) & IFF_ALLMULTI) {
2231 		reg_rctl |= E1000_RCTL_MPE;
2232 		reg_rctl &= ~E1000_RCTL_UPE;
2233 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2234 	}
2235 }
2236 
2237 static void
2238 em_disable_promisc(struct adapter *adapter)
2239 {
2240 	if_t		ifp = adapter->ifp;
2241 	u32		reg_rctl;
2242 	int		mcnt = 0;
2243 
2244 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2245 	reg_rctl &=  (~E1000_RCTL_UPE);
2246 	if (if_getflags(ifp) & IFF_ALLMULTI)
2247 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2248 	else
2249 		mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
2250 	/* Don't disable if in MAX groups */
2251 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2252 		reg_rctl &=  (~E1000_RCTL_MPE);
2253 	reg_rctl &=  (~E1000_RCTL_SBP);
2254 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2255 }
2256 
2257 
2258 /*********************************************************************
2259  *  Multicast Update
2260  *
2261  *  This routine is called whenever multicast address list is updated.
2262  *
2263  **********************************************************************/
2264 
2265 static void
2266 em_set_multi(struct adapter *adapter)
2267 {
2268 	if_t ifp = adapter->ifp;
2269 	u32 reg_rctl = 0;
2270 	u8  *mta; /* Multicast array memory */
2271 	int mcnt = 0;
2272 
2273 	IOCTL_DEBUGOUT("em_set_multi: begin");
2274 
2275 	mta = adapter->mta;
2276 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2277 
2278 	if (adapter->hw.mac.type == e1000_82542 &&
2279 	    adapter->hw.revision_id == E1000_REVISION_2) {
2280 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2281 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2282 			e1000_pci_clear_mwi(&adapter->hw);
2283 		reg_rctl |= E1000_RCTL_RST;
2284 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2285 		msec_delay(5);
2286 	}
2287 
2288 	if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
2289 
2290 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2291 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2292 		reg_rctl |= E1000_RCTL_MPE;
2293 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2294 	} else
2295 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2296 
2297 	if (adapter->hw.mac.type == e1000_82542 &&
2298 	    adapter->hw.revision_id == E1000_REVISION_2) {
2299 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2300 		reg_rctl &= ~E1000_RCTL_RST;
2301 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2302 		msec_delay(5);
2303 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2304 			e1000_pci_set_mwi(&adapter->hw);
2305 	}
2306 }
2307 
2308 
2309 /*********************************************************************
2310  *  Timer routine
2311  *
2312  *  This routine checks for link status and updates statistics.
2313  *
2314  **********************************************************************/
2315 
2316 static void
2317 em_local_timer(void *arg)
2318 {
2319 	struct adapter	*adapter = arg;
2320 	if_t ifp = adapter->ifp;
2321 	struct tx_ring	*txr = adapter->tx_rings;
2322 	struct rx_ring	*rxr = adapter->rx_rings;
2323 	u32		trigger = 0;
2324 
2325 	EM_CORE_LOCK_ASSERT(adapter);
2326 
2327 	em_update_link_status(adapter);
2328 	em_update_stats_counters(adapter);
2329 
2330 	/* Reset LAA into RAR[0] on 82571 */
2331 	if ((adapter->hw.mac.type == e1000_82571) &&
2332 	    e1000_get_laa_state_82571(&adapter->hw))
2333 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2334 
2335 	/* Mask to use in the irq trigger */
2336 	if (adapter->msix_mem) {
2337 		for (int i = 0; i < adapter->num_queues; i++, rxr++)
2338 			trigger |= rxr->ims;
2339 		rxr = adapter->rx_rings;
2340 	} else
2341 		trigger = E1000_ICS_RXDMT0;
2342 
2343 	/*
2344 	** Check on the state of the TX queue(s), this
2345 	** can be done without the lock because its RO
2346 	** and the HUNG state will be static if set.
2347 	*/
2348 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2349 		if (txr->busy == EM_TX_HUNG)
2350 			goto hung;
2351 		if (txr->busy >= EM_TX_MAXTRIES)
2352 			txr->busy = EM_TX_HUNG;
2353 		/* Schedule a TX tasklet if needed */
2354 		if (txr->tx_avail <= EM_MAX_SCATTER)
2355 			taskqueue_enqueue(txr->tq, &txr->tx_task);
2356 	}
2357 
2358 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2359 #ifndef DEVICE_POLLING
2360 	/* Trigger an RX interrupt to guarantee mbuf refresh */
2361 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2362 #endif
2363 	return;
2364 hung:
2365 	/* Looks like we're hung */
2366 	device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2367 			txr->me);
2368 	em_print_debug_info(adapter);
2369 	if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
2370 	adapter->watchdog_events++;
2371 	em_init_locked(adapter);
2372 }
2373 
2374 
2375 static void
2376 em_update_link_status(struct adapter *adapter)
2377 {
2378 	struct e1000_hw *hw = &adapter->hw;
2379 	if_t ifp = adapter->ifp;
2380 	device_t dev = adapter->dev;
2381 	struct tx_ring *txr = adapter->tx_rings;
2382 	u32 link_check = 0;
2383 
2384 	/* Get the cached link value or read phy for real */
2385 	switch (hw->phy.media_type) {
2386 	case e1000_media_type_copper:
2387 		if (hw->mac.get_link_status) {
2388 			if (hw->mac.type == e1000_pch_spt)
2389 				msec_delay(50);
2390 			/* Do the work to read phy */
2391 			e1000_check_for_link(hw);
2392 			link_check = !hw->mac.get_link_status;
2393 			if (link_check) /* ESB2 fix */
2394 				e1000_cfg_on_link_up(hw);
2395 		} else
2396 			link_check = TRUE;
2397 		break;
2398 	case e1000_media_type_fiber:
2399 		e1000_check_for_link(hw);
2400 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2401                                  E1000_STATUS_LU);
2402 		break;
2403 	case e1000_media_type_internal_serdes:
2404 		e1000_check_for_link(hw);
2405 		link_check = adapter->hw.mac.serdes_has_link;
2406 		break;
2407 	default:
2408 	case e1000_media_type_unknown:
2409 		break;
2410 	}
2411 
2412 	/* Now check for a transition */
2413 	if (link_check && (adapter->link_active == 0)) {
2414 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2415 		    &adapter->link_duplex);
2416 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2417 		if ((adapter->link_speed != SPEED_1000) &&
2418 		    ((hw->mac.type == e1000_82571) ||
2419 		    (hw->mac.type == e1000_82572))) {
2420 			int tarc0;
2421 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2422 			tarc0 &= ~TARC_SPEED_MODE_BIT;
2423 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2424 		}
2425 		if (bootverbose)
2426 			device_printf(dev, "Link is up %d Mbps %s\n",
2427 			    adapter->link_speed,
2428 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2429 			    "Full Duplex" : "Half Duplex"));
2430 		adapter->link_active = 1;
2431 		adapter->smartspeed = 0;
2432 		if_setbaudrate(ifp, adapter->link_speed * 1000000);
2433 		if_link_state_change(ifp, LINK_STATE_UP);
2434 	} else if (!link_check && (adapter->link_active == 1)) {
2435 		if_setbaudrate(ifp, 0);
2436 		adapter->link_speed = 0;
2437 		adapter->link_duplex = 0;
2438 		if (bootverbose)
2439 			device_printf(dev, "Link is Down\n");
2440 		adapter->link_active = 0;
2441 		/* Link down, disable hang detection */
2442 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2443 			txr->busy = EM_TX_IDLE;
2444 		if_link_state_change(ifp, LINK_STATE_DOWN);
2445 	}
2446 }
2447 
2448 /*********************************************************************
2449  *
2450  *  This routine disables all traffic on the adapter by issuing a
2451  *  global reset on the MAC and deallocates TX/RX buffers.
2452  *
2453  *  This routine should always be called with BOTH the CORE
2454  *  and TX locks.
2455  **********************************************************************/
2456 
2457 static void
2458 em_stop(void *arg)
2459 {
2460 	struct adapter	*adapter = arg;
2461 	if_t ifp = adapter->ifp;
2462 	struct tx_ring	*txr = adapter->tx_rings;
2463 
2464 	EM_CORE_LOCK_ASSERT(adapter);
2465 
2466 	INIT_DEBUGOUT("em_stop: begin");
2467 
2468 	em_disable_intr(adapter);
2469 	callout_stop(&adapter->timer);
2470 
2471 	/* Tell the stack that the interface is no longer active */
2472 	if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2473 
2474         /* Disarm Hang Detection. */
2475 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2476 		EM_TX_LOCK(txr);
2477 		txr->busy = EM_TX_IDLE;
2478 		EM_TX_UNLOCK(txr);
2479 	}
2480 
2481 	/* I219 needs some special flushing to avoid hangs */
2482 	if (adapter->hw.mac.type == e1000_pch_spt)
2483 		em_flush_desc_rings(adapter);
2484 
2485 	e1000_reset_hw(&adapter->hw);
2486 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2487 
2488 	e1000_led_off(&adapter->hw);
2489 	e1000_cleanup_led(&adapter->hw);
2490 }
2491 
2492 
2493 /*********************************************************************
2494  *
2495  *  Determine hardware revision.
2496  *
2497  **********************************************************************/
2498 static void
2499 em_identify_hardware(struct adapter *adapter)
2500 {
2501 	device_t dev = adapter->dev;
2502 
2503 	/* Make sure our PCI config space has the necessary stuff set */
2504 	pci_enable_busmaster(dev);
2505 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2506 
2507 	/* Save off the information about this board */
2508 	adapter->hw.vendor_id = pci_get_vendor(dev);
2509 	adapter->hw.device_id = pci_get_device(dev);
2510 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2511 	adapter->hw.subsystem_vendor_id =
2512 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2513 	adapter->hw.subsystem_device_id =
2514 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2515 
2516 	/* Do Shared Code Init and Setup */
2517 	if (e1000_set_mac_type(&adapter->hw)) {
2518 		device_printf(dev, "Setup init failure\n");
2519 		return;
2520 	}
2521 }
2522 
2523 static int
2524 em_allocate_pci_resources(struct adapter *adapter)
2525 {
2526 	device_t	dev = adapter->dev;
2527 	int		rid;
2528 
2529 	rid = PCIR_BAR(0);
2530 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2531 	    &rid, RF_ACTIVE);
2532 	if (adapter->memory == NULL) {
2533 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2534 		return (ENXIO);
2535 	}
2536 	adapter->osdep.mem_bus_space_tag =
2537 	    rman_get_bustag(adapter->memory);
2538 	adapter->osdep.mem_bus_space_handle =
2539 	    rman_get_bushandle(adapter->memory);
2540 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2541 
2542 	adapter->hw.back = &adapter->osdep;
2543 
2544 	return (0);
2545 }
2546 
2547 /*********************************************************************
2548  *
2549  *  Setup the Legacy or MSI Interrupt handler
2550  *
2551  **********************************************************************/
2552 int
2553 em_allocate_legacy(struct adapter *adapter)
2554 {
2555 	device_t dev = adapter->dev;
2556 	struct tx_ring	*txr = adapter->tx_rings;
2557 	int error, rid = 0;
2558 
2559 	/* Manually turn off all interrupts */
2560 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2561 
2562 	if (adapter->msix == 1) /* using MSI */
2563 		rid = 1;
2564 	/* We allocate a single interrupt resource */
2565 	adapter->res = bus_alloc_resource_any(dev,
2566 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2567 	if (adapter->res == NULL) {
2568 		device_printf(dev, "Unable to allocate bus resource: "
2569 		    "interrupt\n");
2570 		return (ENXIO);
2571 	}
2572 
2573 	/*
2574 	 * Allocate a fast interrupt and the associated
2575 	 * deferred processing contexts.
2576 	 */
2577 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2578 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2579 	    taskqueue_thread_enqueue, &adapter->tq);
2580 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2581 	    device_get_nameunit(adapter->dev));
2582 	/* Use a TX only tasklet for local timer */
2583 	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2584 	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2585 	    taskqueue_thread_enqueue, &txr->tq);
2586 	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2587 	    device_get_nameunit(adapter->dev));
2588 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2589 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2590 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2591 		device_printf(dev, "Failed to register fast interrupt "
2592 			    "handler: %d\n", error);
2593 		taskqueue_free(adapter->tq);
2594 		adapter->tq = NULL;
2595 		return (error);
2596 	}
2597 
2598 	return (0);
2599 }
2600 
2601 /*********************************************************************
2602  *
2603  *  Setup the MSIX Interrupt handlers
2604  *   This is not really Multiqueue, rather
2605  *   its just separate interrupt vectors
2606  *   for TX, RX, and Link.
2607  *
2608  **********************************************************************/
2609 int
2610 em_allocate_msix(struct adapter *adapter)
2611 {
2612 	device_t	dev = adapter->dev;
2613 	struct		tx_ring *txr = adapter->tx_rings;
2614 	struct		rx_ring *rxr = adapter->rx_rings;
2615 	int		error, rid, vector = 0;
2616 	int		cpu_id = 0;
2617 
2618 
2619 	/* Make sure all interrupts are disabled */
2620 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2621 
2622 	/* First set up ring resources */
2623 	for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2624 
2625 		/* RX ring */
2626 		rid = vector + 1;
2627 
2628 		rxr->res = bus_alloc_resource_any(dev,
2629 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2630 		if (rxr->res == NULL) {
2631 			device_printf(dev,
2632 			    "Unable to allocate bus resource: "
2633 			    "RX MSIX Interrupt %d\n", i);
2634 			return (ENXIO);
2635 		}
2636 		if ((error = bus_setup_intr(dev, rxr->res,
2637 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2638 		    rxr, &rxr->tag)) != 0) {
2639 			device_printf(dev, "Failed to register RX handler");
2640 			return (error);
2641 		}
2642 #if __FreeBSD_version >= 800504
2643 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2644 #endif
2645 		rxr->msix = vector;
2646 
2647 		if (em_last_bind_cpu < 0)
2648 			em_last_bind_cpu = CPU_FIRST();
2649 		cpu_id = em_last_bind_cpu;
2650 		bus_bind_intr(dev, rxr->res, cpu_id);
2651 
2652 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2653 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2654 		    taskqueue_thread_enqueue, &rxr->tq);
2655 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2656 		    device_get_nameunit(adapter->dev), cpu_id);
2657 		/*
2658 		** Set the bit to enable interrupt
2659 		** in E1000_IMS -- bits 20 and 21
2660 		** are for RX0 and RX1, note this has
2661 		** NOTHING to do with the MSIX vector
2662 		*/
2663 		rxr->ims = 1 << (20 + i);
2664 		adapter->ims |= rxr->ims;
2665 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2666 
2667 		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2668 	}
2669 
2670 	for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2671 		/* TX ring */
2672 		rid = vector + 1;
2673 		txr->res = bus_alloc_resource_any(dev,
2674 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2675 		if (txr->res == NULL) {
2676 			device_printf(dev,
2677 			    "Unable to allocate bus resource: "
2678 			    "TX MSIX Interrupt %d\n", i);
2679 			return (ENXIO);
2680 		}
2681 		if ((error = bus_setup_intr(dev, txr->res,
2682 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2683 		    txr, &txr->tag)) != 0) {
2684 			device_printf(dev, "Failed to register TX handler");
2685 			return (error);
2686 		}
2687 #if __FreeBSD_version >= 800504
2688 		bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2689 #endif
2690 		txr->msix = vector;
2691 
2692                 if (em_last_bind_cpu < 0)
2693                         em_last_bind_cpu = CPU_FIRST();
2694                 cpu_id = em_last_bind_cpu;
2695                 bus_bind_intr(dev, txr->res, cpu_id);
2696 
2697 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2698 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2699 		    taskqueue_thread_enqueue, &txr->tq);
2700 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2701 		    device_get_nameunit(adapter->dev), cpu_id);
2702 		/*
2703 		** Set the bit to enable interrupt
2704 		** in E1000_IMS -- bits 22 and 23
2705 		** are for TX0 and TX1, note this has
2706 		** NOTHING to do with the MSIX vector
2707 		*/
2708 		txr->ims = 1 << (22 + i);
2709 		adapter->ims |= txr->ims;
2710 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2711 
2712 		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2713 	}
2714 
2715 	/* Link interrupt */
2716 	rid = vector + 1;
2717 	adapter->res = bus_alloc_resource_any(dev,
2718 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2719 	if (!adapter->res) {
2720 		device_printf(dev,"Unable to allocate "
2721 		    "bus resource: Link interrupt [%d]\n", rid);
2722 		return (ENXIO);
2723         }
2724 	/* Set the link handler function */
2725 	error = bus_setup_intr(dev, adapter->res,
2726 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2727 	    em_msix_link, adapter, &adapter->tag);
2728 	if (error) {
2729 		adapter->res = NULL;
2730 		device_printf(dev, "Failed to register LINK handler");
2731 		return (error);
2732 	}
2733 #if __FreeBSD_version >= 800504
2734 	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2735 #endif
2736 	adapter->linkvec = vector;
2737 	adapter->ivars |=  (8 | vector) << 16;
2738 	adapter->ivars |= 0x80000000;
2739 
2740 	return (0);
2741 }
2742 
2743 
2744 static void
2745 em_free_pci_resources(struct adapter *adapter)
2746 {
2747 	device_t	dev = adapter->dev;
2748 	struct tx_ring	*txr;
2749 	struct rx_ring	*rxr;
2750 	int		rid;
2751 
2752 
2753 	/*
2754 	** Release all the queue interrupt resources:
2755 	*/
2756 	for (int i = 0; i < adapter->num_queues; i++) {
2757 		txr = &adapter->tx_rings[i];
2758 		/* an early abort? */
2759 		if (txr == NULL)
2760 			break;
2761 		rid = txr->msix +1;
2762 		if (txr->tag != NULL) {
2763 			bus_teardown_intr(dev, txr->res, txr->tag);
2764 			txr->tag = NULL;
2765 		}
2766 		if (txr->res != NULL)
2767 			bus_release_resource(dev, SYS_RES_IRQ,
2768 			    rid, txr->res);
2769 
2770 		rxr = &adapter->rx_rings[i];
2771 		/* an early abort? */
2772 		if (rxr == NULL)
2773 			break;
2774 		rid = rxr->msix +1;
2775 		if (rxr->tag != NULL) {
2776 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2777 			rxr->tag = NULL;
2778 		}
2779 		if (rxr->res != NULL)
2780 			bus_release_resource(dev, SYS_RES_IRQ,
2781 			    rid, rxr->res);
2782 	}
2783 
2784         if (adapter->linkvec) /* we are doing MSIX */
2785                 rid = adapter->linkvec + 1;
2786         else
2787                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2788 
2789 	if (adapter->tag != NULL) {
2790 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2791 		adapter->tag = NULL;
2792 	}
2793 
2794 	if (adapter->res != NULL)
2795 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2796 
2797 
2798 	if (adapter->msix)
2799 		pci_release_msi(dev);
2800 
2801 	if (adapter->msix_mem != NULL)
2802 		bus_release_resource(dev, SYS_RES_MEMORY,
2803 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2804 
2805 	if (adapter->memory != NULL)
2806 		bus_release_resource(dev, SYS_RES_MEMORY,
2807 		    PCIR_BAR(0), adapter->memory);
2808 
2809 	if (adapter->flash != NULL)
2810 		bus_release_resource(dev, SYS_RES_MEMORY,
2811 		    EM_FLASH, adapter->flash);
2812 }
2813 
2814 /*
2815  * Setup MSI or MSI/X
2816  */
2817 static int
2818 em_setup_msix(struct adapter *adapter)
2819 {
2820 	device_t dev = adapter->dev;
2821 	int val;
2822 
2823 	/* Nearly always going to use one queue */
2824 	adapter->num_queues = 1;
2825 
2826 	/*
2827 	** Try using MSI-X for Hartwell adapters
2828 	*/
2829 	if ((adapter->hw.mac.type == e1000_82574) &&
2830 	    (em_enable_msix == TRUE)) {
2831 #ifdef EM_MULTIQUEUE
2832 		adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2833 		if (adapter->num_queues > 1)
2834 			em_enable_vectors_82574(adapter);
2835 #endif
2836 		/* Map the MSIX BAR */
2837 		int rid = PCIR_BAR(EM_MSIX_BAR);
2838 		adapter->msix_mem = bus_alloc_resource_any(dev,
2839 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2840        		if (adapter->msix_mem == NULL) {
2841 			/* May not be enabled */
2842                		device_printf(adapter->dev,
2843 			    "Unable to map MSIX table \n");
2844 			goto msi;
2845        		}
2846 		val = pci_msix_count(dev);
2847 
2848 #ifdef EM_MULTIQUEUE
2849 		/* We need 5 vectors in the multiqueue case */
2850 		if (adapter->num_queues > 1 ) {
2851 			if (val >= 5)
2852 				val = 5;
2853 			else {
2854 				adapter->num_queues = 1;
2855 				device_printf(adapter->dev,
2856 				    "Insufficient MSIX vectors for >1 queue, "
2857 				    "using single queue...\n");
2858 				goto msix_one;
2859 			}
2860 		} else {
2861 msix_one:
2862 #endif
2863 			if (val >= 3)
2864 				val = 3;
2865 			else {
2866 				device_printf(adapter->dev,
2867 			    	"Insufficient MSIX vectors, using MSI\n");
2868 				goto msi;
2869 			}
2870 #ifdef EM_MULTIQUEUE
2871 		}
2872 #endif
2873 
2874 		if ((pci_alloc_msix(dev, &val) == 0)) {
2875 			device_printf(adapter->dev,
2876 			    "Using MSIX interrupts "
2877 			    "with %d vectors\n", val);
2878 			return (val);
2879 		}
2880 
2881 		/*
2882 		** If MSIX alloc failed or provided us with
2883 		** less than needed, free and fall through to MSI
2884 		*/
2885 		pci_release_msi(dev);
2886 	}
2887 msi:
2888 	if (adapter->msix_mem != NULL) {
2889 		bus_release_resource(dev, SYS_RES_MEMORY,
2890 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2891 		adapter->msix_mem = NULL;
2892 	}
2893        	val = 1;
2894        	if (pci_alloc_msi(dev, &val) == 0) {
2895                	device_printf(adapter->dev, "Using an MSI interrupt\n");
2896 		return (val);
2897 	}
2898 	/* Should only happen due to manual configuration */
2899 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2900 	return (0);
2901 }
2902 
2903 
2904 /*
2905 ** The 3 following flush routines are used as a workaround in the
2906 ** I219 client parts and only for them.
2907 **
2908 ** em_flush_tx_ring - remove all descriptors from the tx_ring
2909 **
2910 ** We want to clear all pending descriptors from the TX ring.
2911 ** zeroing happens when the HW reads the regs. We  assign the ring itself as
2912 ** the data of the next descriptor. We don't care about the data we are about
2913 ** to reset the HW.
2914 */
2915 static void
2916 em_flush_tx_ring(struct adapter *adapter)
2917 {
2918 	struct e1000_hw		*hw = &adapter->hw;
2919 	struct tx_ring		*txr = adapter->tx_rings;
2920 	struct e1000_tx_desc	*txd;
2921 	u32			tctl, txd_lower = E1000_TXD_CMD_IFCS;
2922 	u16			size = 512;
2923 
2924 	tctl = E1000_READ_REG(hw, E1000_TCTL);
2925 	E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
2926 
2927 	txd = &txr->tx_base[txr->next_avail_desc++];
2928 	if (txr->next_avail_desc == adapter->num_tx_desc)
2929 		txr->next_avail_desc = 0;
2930 
2931 	/* Just use the ring as a dummy buffer addr */
2932 	txd->buffer_addr = txr->txdma.dma_paddr;
2933 	txd->lower.data = htole32(txd_lower | size);
2934 	txd->upper.data = 0;
2935 
2936 	/* flush descriptors to memory before notifying the HW */
2937 	wmb();
2938 
2939 	E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc);
2940 	mb();
2941 	usec_delay(250);
2942 }
2943 
2944 /*
2945 ** em_flush_rx_ring - remove all descriptors from the rx_ring
2946 **
2947 ** Mark all descriptors in the RX ring as consumed and disable the rx ring
2948 */
2949 static void
2950 em_flush_rx_ring(struct adapter *adapter)
2951 {
2952 	struct e1000_hw	*hw = &adapter->hw;
2953 	u32		rctl, rxdctl;
2954 
2955 	rctl = E1000_READ_REG(hw, E1000_RCTL);
2956 	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2957 	E1000_WRITE_FLUSH(hw);
2958 	usec_delay(150);
2959 
2960 	rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
2961 	/* zero the lower 14 bits (prefetch and host thresholds) */
2962 	rxdctl &= 0xffffc000;
2963 	/*
2964 	 * update thresholds: prefetch threshold to 31, host threshold to 1
2965 	 * and make sure the granularity is "descriptors" and not "cache lines"
2966 	 */
2967 	rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
2968 	E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl);
2969 
2970 	/* momentarily enable the RX ring for the changes to take effect */
2971 	E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
2972 	E1000_WRITE_FLUSH(hw);
2973 	usec_delay(150);
2974 	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2975 }
2976 
2977 /*
2978 ** em_flush_desc_rings - remove all descriptors from the descriptor rings
2979 **
2980 ** In i219, the descriptor rings must be emptied before resetting the HW
2981 ** or before changing the device state to D3 during runtime (runtime PM).
2982 **
2983 ** Failure to do this will cause the HW to enter a unit hang state which can
2984 ** only be released by PCI reset on the device
2985 **
2986 */
2987 static void
2988 em_flush_desc_rings(struct adapter *adapter)
2989 {
2990 	struct e1000_hw	*hw = &adapter->hw;
2991 	device_t	dev = adapter->dev;
2992 	u16		hang_state;
2993 	u32		fext_nvm11, tdlen;
2994 
2995 	/* First, disable MULR fix in FEXTNVM11 */
2996 	fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
2997 	fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
2998 	E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11);
2999 
3000 	/* do nothing if we're not in faulty state, or if the queue is empty */
3001 	tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
3002 	hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3003 	if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
3004 		return;
3005 	em_flush_tx_ring(adapter);
3006 
3007 	/* recheck, maybe the fault is caused by the rx ring */
3008 	hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3009 	if (hang_state & FLUSH_DESC_REQUIRED)
3010 		em_flush_rx_ring(adapter);
3011 }
3012 
3013 
3014 /*********************************************************************
3015  *
3016  *  Initialize the hardware to a configuration
3017  *  as specified by the adapter structure.
3018  *
3019  **********************************************************************/
3020 static void
3021 em_reset(struct adapter *adapter)
3022 {
3023 	device_t	dev = adapter->dev;
3024 	if_t ifp = adapter->ifp;
3025 	struct e1000_hw	*hw = &adapter->hw;
3026 	u16		rx_buffer_size;
3027 	u32		pba;
3028 
3029 	INIT_DEBUGOUT("em_reset: begin");
3030 
3031 	/* Set up smart power down as default off on newer adapters. */
3032 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
3033 	    hw->mac.type == e1000_82572)) {
3034 		u16 phy_tmp = 0;
3035 
3036 		/* Speed up time to link by disabling smart power down. */
3037 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
3038 		phy_tmp &= ~IGP02E1000_PM_SPD;
3039 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
3040 	}
3041 
3042 	/*
3043 	 * Packet Buffer Allocation (PBA)
3044 	 * Writing PBA sets the receive portion of the buffer
3045 	 * the remainder is used for the transmit buffer.
3046 	 */
3047 	switch (hw->mac.type) {
3048 	/* Total Packet Buffer on these is 48K */
3049 	case e1000_82571:
3050 	case e1000_82572:
3051 	case e1000_80003es2lan:
3052 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
3053 		break;
3054 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
3055 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
3056 		break;
3057 	case e1000_82574:
3058 	case e1000_82583:
3059 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
3060 		break;
3061 	case e1000_ich8lan:
3062 		pba = E1000_PBA_8K;
3063 		break;
3064 	case e1000_ich9lan:
3065 	case e1000_ich10lan:
3066 		/* Boost Receive side for jumbo frames */
3067 		if (adapter->hw.mac.max_frame_size > 4096)
3068 			pba = E1000_PBA_14K;
3069 		else
3070 			pba = E1000_PBA_10K;
3071 		break;
3072 	case e1000_pchlan:
3073 	case e1000_pch2lan:
3074 	case e1000_pch_lpt:
3075 	case e1000_pch_spt:
3076 		pba = E1000_PBA_26K;
3077 		break;
3078 	default:
3079 		if (adapter->hw.mac.max_frame_size > 8192)
3080 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
3081 		else
3082 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
3083 	}
3084 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
3085 
3086 	/*
3087 	 * These parameters control the automatic generation (Tx) and
3088 	 * response (Rx) to Ethernet PAUSE frames.
3089 	 * - High water mark should allow for at least two frames to be
3090 	 *   received after sending an XOFF.
3091 	 * - Low water mark works best when it is very near the high water mark.
3092 	 *   This allows the receiver to restart by sending XON when it has
3093 	 *   drained a bit. Here we use an arbitrary value of 1500 which will
3094 	 *   restart after one full frame is pulled from the buffer. There
3095 	 *   could be several smaller frames in the buffer and if so they will
3096 	 *   not trigger the XON until their total number reduces the buffer
3097 	 *   by 1500.
3098 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
3099 	 */
3100 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
3101 	hw->fc.high_water = rx_buffer_size -
3102 	    roundup2(adapter->hw.mac.max_frame_size, 1024);
3103 	hw->fc.low_water = hw->fc.high_water - 1500;
3104 
3105 	if (adapter->fc) /* locally set flow control value? */
3106 		hw->fc.requested_mode = adapter->fc;
3107 	else
3108 		hw->fc.requested_mode = e1000_fc_full;
3109 
3110 	if (hw->mac.type == e1000_80003es2lan)
3111 		hw->fc.pause_time = 0xFFFF;
3112 	else
3113 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
3114 
3115 	hw->fc.send_xon = TRUE;
3116 
3117 	/* Device specific overrides/settings */
3118 	switch (hw->mac.type) {
3119 	case e1000_pchlan:
3120 		/* Workaround: no TX flow ctrl for PCH */
3121                 hw->fc.requested_mode = e1000_fc_rx_pause;
3122 		hw->fc.pause_time = 0xFFFF; /* override */
3123 		if (if_getmtu(ifp) > ETHERMTU) {
3124 			hw->fc.high_water = 0x3500;
3125 			hw->fc.low_water = 0x1500;
3126 		} else {
3127 			hw->fc.high_water = 0x5000;
3128 			hw->fc.low_water = 0x3000;
3129 		}
3130 		hw->fc.refresh_time = 0x1000;
3131 		break;
3132 	case e1000_pch2lan:
3133 	case e1000_pch_lpt:
3134 	case e1000_pch_spt:
3135 		hw->fc.high_water = 0x5C20;
3136 		hw->fc.low_water = 0x5048;
3137 		hw->fc.pause_time = 0x0650;
3138 		hw->fc.refresh_time = 0x0400;
3139 		/* Jumbos need adjusted PBA */
3140 		if (if_getmtu(ifp) > ETHERMTU)
3141 			E1000_WRITE_REG(hw, E1000_PBA, 12);
3142 		else
3143 			E1000_WRITE_REG(hw, E1000_PBA, 26);
3144 		break;
3145         case e1000_ich9lan:
3146         case e1000_ich10lan:
3147 		if (if_getmtu(ifp) > ETHERMTU) {
3148 			hw->fc.high_water = 0x2800;
3149 			hw->fc.low_water = hw->fc.high_water - 8;
3150 			break;
3151 		}
3152 		/* else fall thru */
3153 	default:
3154 		if (hw->mac.type == e1000_80003es2lan)
3155 			hw->fc.pause_time = 0xFFFF;
3156 		break;
3157 	}
3158 
3159 	/* I219 needs some special flushing to avoid hangs */
3160 	if (hw->mac.type == e1000_pch_spt)
3161 		em_flush_desc_rings(adapter);
3162 
3163 	/* Issue a global reset */
3164 	e1000_reset_hw(hw);
3165 	E1000_WRITE_REG(hw, E1000_WUC, 0);
3166 	em_disable_aspm(adapter);
3167 	/* and a re-init */
3168 	if (e1000_init_hw(hw) < 0) {
3169 		device_printf(dev, "Hardware Initialization Failed\n");
3170 		return;
3171 	}
3172 
3173 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3174 	e1000_get_phy_info(hw);
3175 	e1000_check_for_link(hw);
3176 	return;
3177 }
3178 
3179 /*********************************************************************
3180  *
3181  *  Setup networking device structure and register an interface.
3182  *
3183  **********************************************************************/
3184 static int
3185 em_setup_interface(device_t dev, struct adapter *adapter)
3186 {
3187 	if_t ifp;
3188 
3189 	INIT_DEBUGOUT("em_setup_interface: begin");
3190 
3191 	ifp = adapter->ifp = if_gethandle(IFT_ETHER);
3192 	if (ifp == 0) {
3193 		device_printf(dev, "can not allocate ifnet structure\n");
3194 		return (-1);
3195 	}
3196 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3197 	if_setdev(ifp, dev);
3198 	if_setinitfn(ifp, em_init);
3199 	if_setsoftc(ifp, adapter);
3200 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
3201 	if_setioctlfn(ifp, em_ioctl);
3202 	if_setgetcounterfn(ifp, em_get_counter);
3203 
3204 	/* TSO parameters */
3205 	ifp->if_hw_tsomax = IP_MAXPACKET;
3206 	/* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */
3207 	ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5;
3208 	ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
3209 
3210 #ifdef EM_MULTIQUEUE
3211 	/* Multiqueue stack interface */
3212 	if_settransmitfn(ifp, em_mq_start);
3213 	if_setqflushfn(ifp, em_qflush);
3214 #else
3215 	if_setstartfn(ifp, em_start);
3216 	if_setsendqlen(ifp, adapter->num_tx_desc - 1);
3217 	if_setsendqready(ifp);
3218 #endif
3219 
3220 	ether_ifattach(ifp, adapter->hw.mac.addr);
3221 
3222 	if_setcapabilities(ifp, 0);
3223 	if_setcapenable(ifp, 0);
3224 
3225 
3226 	if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM |
3227 	    IFCAP_TSO4, 0);
3228 	/*
3229 	 * Tell the upper layer(s) we
3230 	 * support full VLAN capability
3231 	 */
3232 	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
3233 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
3234 	    IFCAP_VLAN_MTU, 0);
3235 	if_setcapenable(ifp, if_getcapabilities(ifp));
3236 
3237 	/*
3238 	** Don't turn this on by default, if vlans are
3239 	** created on another pseudo device (eg. lagg)
3240 	** then vlan events are not passed thru, breaking
3241 	** operation, but with HW FILTER off it works. If
3242 	** using vlans directly on the em driver you can
3243 	** enable this and get full hardware tag filtering.
3244 	*/
3245 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
3246 
3247 #ifdef DEVICE_POLLING
3248 	if_setcapabilitiesbit(ifp, IFCAP_POLLING,0);
3249 #endif
3250 
3251 	/* Enable only WOL MAGIC by default */
3252 	if (adapter->wol) {
3253 		if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
3254 		if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
3255 	}
3256 
3257 	/*
3258 	 * Specify the media types supported by this adapter and register
3259 	 * callbacks to update media and link information
3260 	 */
3261 	ifmedia_init(&adapter->media, IFM_IMASK,
3262 	    em_media_change, em_media_status);
3263 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3264 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3265 		u_char fiber_type = IFM_1000_SX;	/* default type */
3266 
3267 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3268 			    0, NULL);
3269 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3270 	} else {
3271 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3272 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3273 			    0, NULL);
3274 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3275 			    0, NULL);
3276 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3277 			    0, NULL);
3278 		if (adapter->hw.phy.type != e1000_phy_ife) {
3279 			ifmedia_add(&adapter->media,
3280 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3281 			ifmedia_add(&adapter->media,
3282 				IFM_ETHER | IFM_1000_T, 0, NULL);
3283 		}
3284 	}
3285 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3286 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3287 	return (0);
3288 }
3289 
3290 
3291 /*
3292  * Manage DMA'able memory.
3293  */
3294 static void
3295 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3296 {
3297 	if (error)
3298 		return;
3299 	*(bus_addr_t *) arg = segs[0].ds_addr;
3300 }
3301 
3302 static int
3303 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3304         struct em_dma_alloc *dma, int mapflags)
3305 {
3306 	int error;
3307 
3308 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3309 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3310 				BUS_SPACE_MAXADDR,	/* lowaddr */
3311 				BUS_SPACE_MAXADDR,	/* highaddr */
3312 				NULL, NULL,		/* filter, filterarg */
3313 				size,			/* maxsize */
3314 				1,			/* nsegments */
3315 				size,			/* maxsegsize */
3316 				0,			/* flags */
3317 				NULL,			/* lockfunc */
3318 				NULL,			/* lockarg */
3319 				&dma->dma_tag);
3320 	if (error) {
3321 		device_printf(adapter->dev,
3322 		    "%s: bus_dma_tag_create failed: %d\n",
3323 		    __func__, error);
3324 		goto fail_0;
3325 	}
3326 
3327 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3328 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3329 	if (error) {
3330 		device_printf(adapter->dev,
3331 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3332 		    __func__, (uintmax_t)size, error);
3333 		goto fail_2;
3334 	}
3335 
3336 	dma->dma_paddr = 0;
3337 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3338 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3339 	if (error || dma->dma_paddr == 0) {
3340 		device_printf(adapter->dev,
3341 		    "%s: bus_dmamap_load failed: %d\n",
3342 		    __func__, error);
3343 		goto fail_3;
3344 	}
3345 
3346 	return (0);
3347 
3348 fail_3:
3349 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3350 fail_2:
3351 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3352 	bus_dma_tag_destroy(dma->dma_tag);
3353 fail_0:
3354 	dma->dma_tag = NULL;
3355 
3356 	return (error);
3357 }
3358 
3359 static void
3360 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3361 {
3362 	if (dma->dma_tag == NULL)
3363 		return;
3364 	if (dma->dma_paddr != 0) {
3365 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3366 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3367 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3368 		dma->dma_paddr = 0;
3369 	}
3370 	if (dma->dma_vaddr != NULL) {
3371 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3372 		dma->dma_vaddr = NULL;
3373 	}
3374 	bus_dma_tag_destroy(dma->dma_tag);
3375 	dma->dma_tag = NULL;
3376 }
3377 
3378 
3379 /*********************************************************************
3380  *
3381  *  Allocate memory for the transmit and receive rings, and then
3382  *  the descriptors associated with each, called only once at attach.
3383  *
3384  **********************************************************************/
3385 static int
3386 em_allocate_queues(struct adapter *adapter)
3387 {
3388 	device_t		dev = adapter->dev;
3389 	struct tx_ring		*txr = NULL;
3390 	struct rx_ring		*rxr = NULL;
3391 	int rsize, tsize, error = E1000_SUCCESS;
3392 	int txconf = 0, rxconf = 0;
3393 
3394 
3395 	/* Allocate the TX ring struct memory */
3396 	if (!(adapter->tx_rings =
3397 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3398 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3399 		device_printf(dev, "Unable to allocate TX ring memory\n");
3400 		error = ENOMEM;
3401 		goto fail;
3402 	}
3403 
3404 	/* Now allocate the RX */
3405 	if (!(adapter->rx_rings =
3406 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3407 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3408 		device_printf(dev, "Unable to allocate RX ring memory\n");
3409 		error = ENOMEM;
3410 		goto rx_fail;
3411 	}
3412 
3413 	tsize = roundup2(adapter->num_tx_desc *
3414 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3415 	/*
3416 	 * Now set up the TX queues, txconf is needed to handle the
3417 	 * possibility that things fail midcourse and we need to
3418 	 * undo memory gracefully
3419 	 */
3420 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3421 		/* Set up some basics */
3422 		txr = &adapter->tx_rings[i];
3423 		txr->adapter = adapter;
3424 		txr->me = i;
3425 
3426 		/* Initialize the TX lock */
3427 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3428 		    device_get_nameunit(dev), txr->me);
3429 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3430 
3431 		if (em_dma_malloc(adapter, tsize,
3432 			&txr->txdma, BUS_DMA_NOWAIT)) {
3433 			device_printf(dev,
3434 			    "Unable to allocate TX Descriptor memory\n");
3435 			error = ENOMEM;
3436 			goto err_tx_desc;
3437 		}
3438 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3439 		bzero((void *)txr->tx_base, tsize);
3440 
3441         	if (em_allocate_transmit_buffers(txr)) {
3442 			device_printf(dev,
3443 			    "Critical Failure setting up transmit buffers\n");
3444 			error = ENOMEM;
3445 			goto err_tx_desc;
3446         	}
3447 #if __FreeBSD_version >= 800000
3448 		/* Allocate a buf ring */
3449 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3450 		    M_WAITOK, &txr->tx_mtx);
3451 #endif
3452 	}
3453 
3454 	/*
3455 	 * Next the RX queues...
3456 	 */
3457 	rsize = roundup2(adapter->num_rx_desc *
3458 	    sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
3459 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3460 		rxr = &adapter->rx_rings[i];
3461 		rxr->adapter = adapter;
3462 		rxr->me = i;
3463 
3464 		/* Initialize the RX lock */
3465 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3466 		    device_get_nameunit(dev), txr->me);
3467 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3468 
3469 		if (em_dma_malloc(adapter, rsize,
3470 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3471 			device_printf(dev,
3472 			    "Unable to allocate RxDescriptor memory\n");
3473 			error = ENOMEM;
3474 			goto err_rx_desc;
3475 		}
3476 		rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr;
3477 		bzero((void *)rxr->rx_base, rsize);
3478 
3479         	/* Allocate receive buffers for the ring*/
3480 		if (em_allocate_receive_buffers(rxr)) {
3481 			device_printf(dev,
3482 			    "Critical Failure setting up receive buffers\n");
3483 			error = ENOMEM;
3484 			goto err_rx_desc;
3485 		}
3486 	}
3487 
3488 	return (0);
3489 
3490 err_rx_desc:
3491 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3492 		em_dma_free(adapter, &rxr->rxdma);
3493 err_tx_desc:
3494 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3495 		em_dma_free(adapter, &txr->txdma);
3496 	free(adapter->rx_rings, M_DEVBUF);
3497 rx_fail:
3498 #if __FreeBSD_version >= 800000
3499 	buf_ring_free(txr->br, M_DEVBUF);
3500 #endif
3501 	free(adapter->tx_rings, M_DEVBUF);
3502 fail:
3503 	return (error);
3504 }
3505 
3506 
3507 /*********************************************************************
3508  *
3509  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3510  *  the information needed to transmit a packet on the wire. This is
3511  *  called only once at attach, setup is done every reset.
3512  *
3513  **********************************************************************/
3514 static int
3515 em_allocate_transmit_buffers(struct tx_ring *txr)
3516 {
3517 	struct adapter *adapter = txr->adapter;
3518 	device_t dev = adapter->dev;
3519 	struct em_txbuffer *txbuf;
3520 	int error, i;
3521 
3522 	/*
3523 	 * Setup DMA descriptor areas.
3524 	 */
3525 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3526 			       1, 0,			/* alignment, bounds */
3527 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3528 			       BUS_SPACE_MAXADDR,	/* highaddr */
3529 			       NULL, NULL,		/* filter, filterarg */
3530 			       EM_TSO_SIZE,		/* maxsize */
3531 			       EM_MAX_SCATTER,		/* nsegments */
3532 			       PAGE_SIZE,		/* maxsegsize */
3533 			       0,			/* flags */
3534 			       NULL,			/* lockfunc */
3535 			       NULL,			/* lockfuncarg */
3536 			       &txr->txtag))) {
3537 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3538 		goto fail;
3539 	}
3540 
3541 	if (!(txr->tx_buffers =
3542 	    (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) *
3543 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3544 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3545 		error = ENOMEM;
3546 		goto fail;
3547 	}
3548 
3549         /* Create the descriptor buffer dma maps */
3550 	txbuf = txr->tx_buffers;
3551 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3552 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3553 		if (error != 0) {
3554 			device_printf(dev, "Unable to create TX DMA map\n");
3555 			goto fail;
3556 		}
3557 	}
3558 
3559 	return 0;
3560 fail:
3561 	/* We free all, it handles case where we are in the middle */
3562 	em_free_transmit_structures(adapter);
3563 	return (error);
3564 }
3565 
3566 /*********************************************************************
3567  *
3568  *  Initialize a transmit ring.
3569  *
3570  **********************************************************************/
3571 static void
3572 em_setup_transmit_ring(struct tx_ring *txr)
3573 {
3574 	struct adapter *adapter = txr->adapter;
3575 	struct em_txbuffer *txbuf;
3576 	int i;
3577 #ifdef DEV_NETMAP
3578 	struct netmap_slot *slot;
3579 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
3580 #endif /* DEV_NETMAP */
3581 
3582 	/* Clear the old descriptor contents */
3583 	EM_TX_LOCK(txr);
3584 #ifdef DEV_NETMAP
3585 	slot = netmap_reset(na, NR_TX, txr->me, 0);
3586 #endif /* DEV_NETMAP */
3587 
3588 	bzero((void *)txr->tx_base,
3589 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3590 	/* Reset indices */
3591 	txr->next_avail_desc = 0;
3592 	txr->next_to_clean = 0;
3593 
3594 	/* Free any existing tx buffers. */
3595         txbuf = txr->tx_buffers;
3596 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3597 		if (txbuf->m_head != NULL) {
3598 			bus_dmamap_sync(txr->txtag, txbuf->map,
3599 			    BUS_DMASYNC_POSTWRITE);
3600 			bus_dmamap_unload(txr->txtag, txbuf->map);
3601 			m_freem(txbuf->m_head);
3602 			txbuf->m_head = NULL;
3603 		}
3604 #ifdef DEV_NETMAP
3605 		if (slot) {
3606 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3607 			uint64_t paddr;
3608 			void *addr;
3609 
3610 			addr = PNMB(na, slot + si, &paddr);
3611 			txr->tx_base[i].buffer_addr = htole64(paddr);
3612 			/* reload the map for netmap mode */
3613 			netmap_load_map(na, txr->txtag, txbuf->map, addr);
3614 		}
3615 #endif /* DEV_NETMAP */
3616 
3617 		/* clear the watch index */
3618 		txbuf->next_eop = -1;
3619         }
3620 
3621 	/* Set number of descriptors available */
3622 	txr->tx_avail = adapter->num_tx_desc;
3623 	txr->busy = EM_TX_IDLE;
3624 
3625 	/* Clear checksum offload context. */
3626 	txr->last_hw_offload = 0;
3627 	txr->last_hw_ipcss = 0;
3628 	txr->last_hw_ipcso = 0;
3629 	txr->last_hw_tucss = 0;
3630 	txr->last_hw_tucso = 0;
3631 
3632 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3633 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3634 	EM_TX_UNLOCK(txr);
3635 }
3636 
3637 /*********************************************************************
3638  *
3639  *  Initialize all transmit rings.
3640  *
3641  **********************************************************************/
3642 static void
3643 em_setup_transmit_structures(struct adapter *adapter)
3644 {
3645 	struct tx_ring *txr = adapter->tx_rings;
3646 
3647 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3648 		em_setup_transmit_ring(txr);
3649 
3650 	return;
3651 }
3652 
3653 /*********************************************************************
3654  *
3655  *  Enable transmit unit.
3656  *
3657  **********************************************************************/
3658 static void
3659 em_initialize_transmit_unit(struct adapter *adapter)
3660 {
3661 	struct tx_ring	*txr = adapter->tx_rings;
3662 	struct e1000_hw	*hw = &adapter->hw;
3663 	u32	tctl, txdctl = 0, tarc, tipg = 0;
3664 
3665 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3666 
3667 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3668 		u64 bus_addr = txr->txdma.dma_paddr;
3669 		/* Base and Len of TX Ring */
3670 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3671 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3672 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3673 	    	    (u32)(bus_addr >> 32));
3674 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3675 	    	    (u32)bus_addr);
3676 		/* Init the HEAD/TAIL indices */
3677 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3678 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3679 
3680 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3681 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3682 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3683 
3684 		txr->busy = EM_TX_IDLE;
3685 		txdctl = 0; /* clear txdctl */
3686                 txdctl |= 0x1f; /* PTHRESH */
3687                 txdctl |= 1 << 8; /* HTHRESH */
3688                 txdctl |= 1 << 16;/* WTHRESH */
3689 		txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3690 		txdctl |= E1000_TXDCTL_GRAN;
3691                 txdctl |= 1 << 25; /* LWTHRESH */
3692 
3693                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3694 	}
3695 
3696 	/* Set the default values for the Tx Inter Packet Gap timer */
3697 	switch (adapter->hw.mac.type) {
3698 	case e1000_80003es2lan:
3699 		tipg = DEFAULT_82543_TIPG_IPGR1;
3700 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3701 		    E1000_TIPG_IPGR2_SHIFT;
3702 		break;
3703 	default:
3704 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3705 		    (adapter->hw.phy.media_type ==
3706 		    e1000_media_type_internal_serdes))
3707 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3708 		else
3709 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3710 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3711 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3712 	}
3713 
3714 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3715 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3716 
3717 	if(adapter->hw.mac.type >= e1000_82540)
3718 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3719 		    adapter->tx_abs_int_delay.value);
3720 
3721 	if ((adapter->hw.mac.type == e1000_82571) ||
3722 	    (adapter->hw.mac.type == e1000_82572)) {
3723 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3724 		tarc |= TARC_SPEED_MODE_BIT;
3725 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3726 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3727 		/* errata: program both queues to unweighted RR */
3728 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3729 		tarc |= 1;
3730 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3731 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3732 		tarc |= 1;
3733 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3734 	} else if (adapter->hw.mac.type == e1000_82574) {
3735 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3736 		tarc |= TARC_ERRATA_BIT;
3737 		if ( adapter->num_queues > 1) {
3738 			tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3739 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3740 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3741 		} else
3742 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3743 	}
3744 
3745 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3746 	if (adapter->tx_int_delay.value > 0)
3747 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3748 
3749 	/* Program the Transmit Control Register */
3750 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3751 	tctl &= ~E1000_TCTL_CT;
3752 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3753 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3754 
3755 	if (adapter->hw.mac.type >= e1000_82571)
3756 		tctl |= E1000_TCTL_MULR;
3757 
3758 	/* This write will effectively turn on the transmit unit. */
3759 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3760 
3761 	if (hw->mac.type == e1000_pch_spt) {
3762 		u32 reg;
3763 		reg = E1000_READ_REG(hw, E1000_IOSFPC);
3764 		reg |= E1000_RCTL_RDMTS_HEX;
3765 		E1000_WRITE_REG(hw, E1000_IOSFPC, reg);
3766 		reg = E1000_READ_REG(hw, E1000_TARC(0));
3767 		reg |= E1000_TARC0_CB_MULTIQ_3_REQ;
3768 		E1000_WRITE_REG(hw, E1000_TARC(0), reg);
3769 	}
3770 }
3771 
3772 
3773 /*********************************************************************
3774  *
3775  *  Free all transmit rings.
3776  *
3777  **********************************************************************/
3778 static void
3779 em_free_transmit_structures(struct adapter *adapter)
3780 {
3781 	struct tx_ring *txr = adapter->tx_rings;
3782 
3783 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3784 		EM_TX_LOCK(txr);
3785 		em_free_transmit_buffers(txr);
3786 		em_dma_free(adapter, &txr->txdma);
3787 		EM_TX_UNLOCK(txr);
3788 		EM_TX_LOCK_DESTROY(txr);
3789 	}
3790 
3791 	free(adapter->tx_rings, M_DEVBUF);
3792 }
3793 
3794 /*********************************************************************
3795  *
3796  *  Free transmit ring related data structures.
3797  *
3798  **********************************************************************/
3799 static void
3800 em_free_transmit_buffers(struct tx_ring *txr)
3801 {
3802 	struct adapter		*adapter = txr->adapter;
3803 	struct em_txbuffer	*txbuf;
3804 
3805 	INIT_DEBUGOUT("free_transmit_ring: begin");
3806 
3807 	if (txr->tx_buffers == NULL)
3808 		return;
3809 
3810 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3811 		txbuf = &txr->tx_buffers[i];
3812 		if (txbuf->m_head != NULL) {
3813 			bus_dmamap_sync(txr->txtag, txbuf->map,
3814 			    BUS_DMASYNC_POSTWRITE);
3815 			bus_dmamap_unload(txr->txtag,
3816 			    txbuf->map);
3817 			m_freem(txbuf->m_head);
3818 			txbuf->m_head = NULL;
3819 			if (txbuf->map != NULL) {
3820 				bus_dmamap_destroy(txr->txtag,
3821 				    txbuf->map);
3822 				txbuf->map = NULL;
3823 			}
3824 		} else if (txbuf->map != NULL) {
3825 			bus_dmamap_unload(txr->txtag,
3826 			    txbuf->map);
3827 			bus_dmamap_destroy(txr->txtag,
3828 			    txbuf->map);
3829 			txbuf->map = NULL;
3830 		}
3831 	}
3832 #if __FreeBSD_version >= 800000
3833 	if (txr->br != NULL)
3834 		buf_ring_free(txr->br, M_DEVBUF);
3835 #endif
3836 	if (txr->tx_buffers != NULL) {
3837 		free(txr->tx_buffers, M_DEVBUF);
3838 		txr->tx_buffers = NULL;
3839 	}
3840 	if (txr->txtag != NULL) {
3841 		bus_dma_tag_destroy(txr->txtag);
3842 		txr->txtag = NULL;
3843 	}
3844 	return;
3845 }
3846 
3847 
3848 /*********************************************************************
3849  *  The offload context is protocol specific (TCP/UDP) and thus
3850  *  only needs to be set when the protocol changes. The occasion
3851  *  of a context change can be a performance detriment, and
3852  *  might be better just disabled. The reason arises in the way
3853  *  in which the controller supports pipelined requests from the
3854  *  Tx data DMA. Up to four requests can be pipelined, and they may
3855  *  belong to the same packet or to multiple packets. However all
3856  *  requests for one packet are issued before a request is issued
3857  *  for a subsequent packet and if a request for the next packet
3858  *  requires a context change, that request will be stalled
3859  *  until the previous request completes. This means setting up
3860  *  a new context effectively disables pipelined Tx data DMA which
3861  *  in turn greatly slow down performance to send small sized
3862  *  frames.
3863  **********************************************************************/
3864 static void
3865 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3866     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3867 {
3868 	struct adapter			*adapter = txr->adapter;
3869 	struct e1000_context_desc	*TXD = NULL;
3870 	struct em_txbuffer		*tx_buffer;
3871 	int				cur, hdr_len;
3872 	u32				cmd = 0;
3873 	u16				offload = 0;
3874 	u8				ipcso, ipcss, tucso, tucss;
3875 
3876 	ipcss = ipcso = tucss = tucso = 0;
3877 	hdr_len = ip_off + (ip->ip_hl << 2);
3878 	cur = txr->next_avail_desc;
3879 
3880 	/* Setup of IP header checksum. */
3881 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3882 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3883 		offload |= CSUM_IP;
3884 		ipcss = ip_off;
3885 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3886 		/*
3887 		 * Start offset for header checksum calculation.
3888 		 * End offset for header checksum calculation.
3889 		 * Offset of place to put the checksum.
3890 		 */
3891 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3892 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3893 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3894 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3895 		cmd |= E1000_TXD_CMD_IP;
3896 	}
3897 
3898 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3899  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3900  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3901  		offload |= CSUM_TCP;
3902  		tucss = hdr_len;
3903  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3904 		/*
3905 		 * The 82574L can only remember the *last* context used
3906 		 * regardless of queue that it was use for.  We cannot reuse
3907 		 * contexts on this hardware platform and must generate a new
3908 		 * context every time.  82574L hardware spec, section 7.2.6,
3909 		 * second note.
3910 		 */
3911 		if (adapter->num_queues < 2) {
3912  			/*
3913  		 	* Setting up new checksum offload context for every
3914 			* frames takes a lot of processing time for hardware.
3915 			* This also reduces performance a lot for small sized
3916 			* frames so avoid it if driver can use previously
3917 			* configured checksum offload context.
3918  		 	*/
3919  			if (txr->last_hw_offload == offload) {
3920  				if (offload & CSUM_IP) {
3921  					if (txr->last_hw_ipcss == ipcss &&
3922  				    	txr->last_hw_ipcso == ipcso &&
3923  				    	txr->last_hw_tucss == tucss &&
3924  				    	txr->last_hw_tucso == tucso)
3925  						return;
3926  				} else {
3927  					if (txr->last_hw_tucss == tucss &&
3928  				    	txr->last_hw_tucso == tucso)
3929  						return;
3930  				}
3931   			}
3932  			txr->last_hw_offload = offload;
3933  			txr->last_hw_tucss = tucss;
3934  			txr->last_hw_tucso = tucso;
3935 		}
3936  		/*
3937  		 * Start offset for payload checksum calculation.
3938  		 * End offset for payload checksum calculation.
3939  		 * Offset of place to put the checksum.
3940  		 */
3941 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3942  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3943  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3944  		TXD->upper_setup.tcp_fields.tucso = tucso;
3945  		cmd |= E1000_TXD_CMD_TCP;
3946  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3947  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3948  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3949  		tucss = hdr_len;
3950  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3951 		/*
3952 		 * The 82574L can only remember the *last* context used
3953 		 * regardless of queue that it was use for.  We cannot reuse
3954 		 * contexts on this hardware platform and must generate a new
3955 		 * context every time.  82574L hardware spec, section 7.2.6,
3956 		 * second note.
3957 		 */
3958 		if (adapter->num_queues < 2) {
3959  			/*
3960  		 	* Setting up new checksum offload context for every
3961 			* frames takes a lot of processing time for hardware.
3962 			* This also reduces performance a lot for small sized
3963 			* frames so avoid it if driver can use previously
3964 			* configured checksum offload context.
3965  		 	*/
3966  			if (txr->last_hw_offload == offload) {
3967  				if (offload & CSUM_IP) {
3968  					if (txr->last_hw_ipcss == ipcss &&
3969  				    	txr->last_hw_ipcso == ipcso &&
3970  				    	txr->last_hw_tucss == tucss &&
3971  				    	txr->last_hw_tucso == tucso)
3972  						return;
3973  				} else {
3974  					if (txr->last_hw_tucss == tucss &&
3975  				    	txr->last_hw_tucso == tucso)
3976  						return;
3977  				}
3978  			}
3979  			txr->last_hw_offload = offload;
3980  			txr->last_hw_tucss = tucss;
3981  			txr->last_hw_tucso = tucso;
3982 		}
3983  		/*
3984  		 * Start offset for header checksum calculation.
3985  		 * End offset for header checksum calculation.
3986  		 * Offset of place to put the checksum.
3987  		 */
3988 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3989  		TXD->upper_setup.tcp_fields.tucss = tucss;
3990  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3991  		TXD->upper_setup.tcp_fields.tucso = tucso;
3992   	}
3993 
3994  	if (offload & CSUM_IP) {
3995  		txr->last_hw_ipcss = ipcss;
3996  		txr->last_hw_ipcso = ipcso;
3997   	}
3998 
3999 	TXD->tcp_seg_setup.data = htole32(0);
4000 	TXD->cmd_and_length =
4001 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
4002 	tx_buffer = &txr->tx_buffers[cur];
4003 	tx_buffer->m_head = NULL;
4004 	tx_buffer->next_eop = -1;
4005 
4006 	if (++cur == adapter->num_tx_desc)
4007 		cur = 0;
4008 
4009 	txr->tx_avail--;
4010 	txr->next_avail_desc = cur;
4011 }
4012 
4013 
4014 /**********************************************************************
4015  *
4016  *  Setup work for hardware segmentation offload (TSO)
4017  *
4018  **********************************************************************/
4019 static void
4020 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
4021     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
4022 {
4023 	struct adapter			*adapter = txr->adapter;
4024 	struct e1000_context_desc	*TXD;
4025 	struct em_txbuffer		*tx_buffer;
4026 	int cur, hdr_len;
4027 
4028 	/*
4029 	 * In theory we can use the same TSO context if and only if
4030 	 * frame is the same type(IP/TCP) and the same MSS. However
4031 	 * checking whether a frame has the same IP/TCP structure is
4032 	 * hard thing so just ignore that and always restablish a
4033 	 * new TSO context.
4034 	 */
4035 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
4036 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
4037 		      E1000_TXD_DTYP_D |	/* Data descr type */
4038 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
4039 
4040 	/* IP and/or TCP header checksum calculation and insertion. */
4041 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
4042 
4043 	cur = txr->next_avail_desc;
4044 	tx_buffer = &txr->tx_buffers[cur];
4045 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
4046 
4047 	/*
4048 	 * Start offset for header checksum calculation.
4049 	 * End offset for header checksum calculation.
4050 	 * Offset of place put the checksum.
4051 	 */
4052 	TXD->lower_setup.ip_fields.ipcss = ip_off;
4053 	TXD->lower_setup.ip_fields.ipcse =
4054 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
4055 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
4056 	/*
4057 	 * Start offset for payload checksum calculation.
4058 	 * End offset for payload checksum calculation.
4059 	 * Offset of place to put the checksum.
4060 	 */
4061 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
4062 	TXD->upper_setup.tcp_fields.tucse = 0;
4063 	TXD->upper_setup.tcp_fields.tucso =
4064 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
4065 	/*
4066 	 * Payload size per packet w/o any headers.
4067 	 * Length of all headers up to payload.
4068 	 */
4069 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
4070 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
4071 
4072 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
4073 				E1000_TXD_CMD_DEXT |	/* Extended descr */
4074 				E1000_TXD_CMD_TSE |	/* TSE context */
4075 				E1000_TXD_CMD_IP |	/* Do IP csum */
4076 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
4077 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
4078 
4079 	tx_buffer->m_head = NULL;
4080 	tx_buffer->next_eop = -1;
4081 
4082 	if (++cur == adapter->num_tx_desc)
4083 		cur = 0;
4084 
4085 	txr->tx_avail--;
4086 	txr->next_avail_desc = cur;
4087 	txr->tx_tso = TRUE;
4088 }
4089 
4090 
4091 /**********************************************************************
4092  *
4093  *  Examine each tx_buffer in the used queue. If the hardware is done
4094  *  processing the packet then free associated resources. The
4095  *  tx_buffer is put back on the free queue.
4096  *
4097  **********************************************************************/
4098 static void
4099 em_txeof(struct tx_ring *txr)
4100 {
4101 	struct adapter	*adapter = txr->adapter;
4102         int first, last, done, processed;
4103         struct em_txbuffer *tx_buffer;
4104         struct e1000_tx_desc   *tx_desc, *eop_desc;
4105 	if_t ifp = adapter->ifp;
4106 
4107 	EM_TX_LOCK_ASSERT(txr);
4108 #ifdef DEV_NETMAP
4109 	if (netmap_tx_irq(ifp, txr->me))
4110 		return;
4111 #endif /* DEV_NETMAP */
4112 
4113 	/* No work, make sure hang detection is disabled */
4114         if (txr->tx_avail == adapter->num_tx_desc) {
4115 		txr->busy = EM_TX_IDLE;
4116                 return;
4117 	}
4118 
4119 	processed = 0;
4120         first = txr->next_to_clean;
4121         tx_desc = &txr->tx_base[first];
4122         tx_buffer = &txr->tx_buffers[first];
4123 	last = tx_buffer->next_eop;
4124         eop_desc = &txr->tx_base[last];
4125 
4126 	/*
4127 	 * What this does is get the index of the
4128 	 * first descriptor AFTER the EOP of the
4129 	 * first packet, that way we can do the
4130 	 * simple comparison on the inner while loop.
4131 	 */
4132 	if (++last == adapter->num_tx_desc)
4133  		last = 0;
4134 	done = last;
4135 
4136         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4137             BUS_DMASYNC_POSTREAD);
4138 
4139         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
4140 		/* We clean the range of the packet */
4141 		while (first != done) {
4142                 	tx_desc->upper.data = 0;
4143                 	tx_desc->lower.data = 0;
4144                 	tx_desc->buffer_addr = 0;
4145                 	++txr->tx_avail;
4146 			++processed;
4147 
4148 			if (tx_buffer->m_head) {
4149 				bus_dmamap_sync(txr->txtag,
4150 				    tx_buffer->map,
4151 				    BUS_DMASYNC_POSTWRITE);
4152 				bus_dmamap_unload(txr->txtag,
4153 				    tx_buffer->map);
4154                         	m_freem(tx_buffer->m_head);
4155                         	tx_buffer->m_head = NULL;
4156                 	}
4157 			tx_buffer->next_eop = -1;
4158 
4159 	                if (++first == adapter->num_tx_desc)
4160 				first = 0;
4161 
4162 	                tx_buffer = &txr->tx_buffers[first];
4163 			tx_desc = &txr->tx_base[first];
4164 		}
4165 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
4166 		/* See if we can continue to the next packet */
4167 		last = tx_buffer->next_eop;
4168 		if (last != -1) {
4169         		eop_desc = &txr->tx_base[last];
4170 			/* Get new done point */
4171 			if (++last == adapter->num_tx_desc) last = 0;
4172 			done = last;
4173 		} else
4174 			break;
4175         }
4176         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4177             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4178 
4179         txr->next_to_clean = first;
4180 
4181 	/*
4182 	** Hang detection: we know there's work outstanding
4183 	** or the entry return would have been taken, so no
4184 	** descriptor processed here indicates a potential hang.
4185 	** The local timer will examine this and do a reset if needed.
4186 	*/
4187 	if (processed == 0) {
4188 		if (txr->busy != EM_TX_HUNG)
4189 			++txr->busy;
4190 	} else /* At least one descriptor was cleaned */
4191 		txr->busy = EM_TX_BUSY; /* note this clears HUNG */
4192 
4193         /*
4194          * If we have a minimum free, clear IFF_DRV_OACTIVE
4195          * to tell the stack that it is OK to send packets.
4196 	 * Notice that all writes of OACTIVE happen under the
4197 	 * TX lock which, with a single queue, guarantees
4198 	 * sanity.
4199          */
4200         if (txr->tx_avail >= EM_MAX_SCATTER) {
4201 		if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
4202 	}
4203 
4204 	/* Disable hang detection if all clean */
4205 	if (txr->tx_avail == adapter->num_tx_desc)
4206 		txr->busy = EM_TX_IDLE;
4207 }
4208 
4209 /*********************************************************************
4210  *
4211  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
4212  *
4213  **********************************************************************/
4214 static void
4215 em_refresh_mbufs(struct rx_ring *rxr, int limit)
4216 {
4217 	struct adapter		*adapter = rxr->adapter;
4218 	struct mbuf		*m;
4219 	bus_dma_segment_t	segs;
4220 	struct em_rxbuffer	*rxbuf;
4221 	int			i, j, error, nsegs;
4222 	bool			cleaned = FALSE;
4223 
4224 	i = j = rxr->next_to_refresh;
4225 	/*
4226 	** Get one descriptor beyond
4227 	** our work mark to control
4228 	** the loop.
4229 	*/
4230 	if (++j == adapter->num_rx_desc)
4231 		j = 0;
4232 
4233 	while (j != limit) {
4234 		rxbuf = &rxr->rx_buffers[i];
4235 		if (rxbuf->m_head == NULL) {
4236 			m = m_getjcl(M_NOWAIT, MT_DATA,
4237 			    M_PKTHDR, adapter->rx_mbuf_sz);
4238 			/*
4239 			** If we have a temporary resource shortage
4240 			** that causes a failure, just abort refresh
4241 			** for now, we will return to this point when
4242 			** reinvoked from em_rxeof.
4243 			*/
4244 			if (m == NULL)
4245 				goto update;
4246 		} else
4247 			m = rxbuf->m_head;
4248 
4249 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4250 		m->m_flags |= M_PKTHDR;
4251 		m->m_data = m->m_ext.ext_buf;
4252 
4253 		/* Use bus_dma machinery to setup the memory mapping  */
4254 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4255 		    m, &segs, &nsegs, BUS_DMA_NOWAIT);
4256 		if (error != 0) {
4257 			printf("Refresh mbufs: hdr dmamap load"
4258 			    " failure - %d\n", error);
4259 			m_free(m);
4260 			rxbuf->m_head = NULL;
4261 			goto update;
4262 		}
4263 		rxbuf->m_head = m;
4264 		rxbuf->paddr = segs.ds_addr;
4265 		bus_dmamap_sync(rxr->rxtag,
4266 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4267 		em_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4268 		cleaned = TRUE;
4269 
4270 		i = j; /* Next is precalulated for us */
4271 		rxr->next_to_refresh = i;
4272 		/* Calculate next controlling index */
4273 		if (++j == adapter->num_rx_desc)
4274 			j = 0;
4275 	}
4276 update:
4277 	/*
4278 	** Update the tail pointer only if,
4279 	** and as far as we have refreshed.
4280 	*/
4281 	if (cleaned)
4282 		E1000_WRITE_REG(&adapter->hw,
4283 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4284 
4285 	return;
4286 }
4287 
4288 
4289 /*********************************************************************
4290  *
4291  *  Allocate memory for rx_buffer structures. Since we use one
4292  *  rx_buffer per received packet, the maximum number of rx_buffer's
4293  *  that we'll need is equal to the number of receive descriptors
4294  *  that we've allocated.
4295  *
4296  **********************************************************************/
4297 static int
4298 em_allocate_receive_buffers(struct rx_ring *rxr)
4299 {
4300 	struct adapter		*adapter = rxr->adapter;
4301 	device_t		dev = adapter->dev;
4302 	struct em_rxbuffer	*rxbuf;
4303 	int			error;
4304 
4305 	rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) *
4306 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4307 	if (rxr->rx_buffers == NULL) {
4308 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4309 		return (ENOMEM);
4310 	}
4311 
4312 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4313 				1, 0,			/* alignment, bounds */
4314 				BUS_SPACE_MAXADDR,	/* lowaddr */
4315 				BUS_SPACE_MAXADDR,	/* highaddr */
4316 				NULL, NULL,		/* filter, filterarg */
4317 				MJUM9BYTES,		/* maxsize */
4318 				1,			/* nsegments */
4319 				MJUM9BYTES,		/* maxsegsize */
4320 				0,			/* flags */
4321 				NULL,			/* lockfunc */
4322 				NULL,			/* lockarg */
4323 				&rxr->rxtag);
4324 	if (error) {
4325 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4326 		    __func__, error);
4327 		goto fail;
4328 	}
4329 
4330 	rxbuf = rxr->rx_buffers;
4331 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4332 		rxbuf = &rxr->rx_buffers[i];
4333 		error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4334 		if (error) {
4335 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4336 			    __func__, error);
4337 			goto fail;
4338 		}
4339 	}
4340 
4341 	return (0);
4342 
4343 fail:
4344 	em_free_receive_structures(adapter);
4345 	return (error);
4346 }
4347 
4348 
4349 /*********************************************************************
4350  *
4351  *  Initialize a receive ring and its buffers.
4352  *
4353  **********************************************************************/
4354 static int
4355 em_setup_receive_ring(struct rx_ring *rxr)
4356 {
4357 	struct	adapter 	*adapter = rxr->adapter;
4358 	struct em_rxbuffer	*rxbuf;
4359 	bus_dma_segment_t	seg[1];
4360 	int			rsize, nsegs, error = 0;
4361 #ifdef DEV_NETMAP
4362 	struct netmap_slot *slot;
4363 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
4364 #endif
4365 
4366 
4367 	/* Clear the ring contents */
4368 	EM_RX_LOCK(rxr);
4369 	rsize = roundup2(adapter->num_rx_desc *
4370 	    sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
4371 	bzero((void *)rxr->rx_base, rsize);
4372 #ifdef DEV_NETMAP
4373 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4374 #endif
4375 
4376 	/*
4377 	** Free current RX buffer structs and their mbufs
4378 	*/
4379 	for (int i = 0; i < adapter->num_rx_desc; i++) {
4380 		rxbuf = &rxr->rx_buffers[i];
4381 		if (rxbuf->m_head != NULL) {
4382 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4383 			    BUS_DMASYNC_POSTREAD);
4384 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4385 			m_freem(rxbuf->m_head);
4386 			rxbuf->m_head = NULL; /* mark as freed */
4387 		}
4388 	}
4389 
4390 	/* Now replenish the mbufs */
4391         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4392 		rxbuf = &rxr->rx_buffers[j];
4393 #ifdef DEV_NETMAP
4394 		if (slot) {
4395 			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4396 			uint64_t paddr;
4397 			void *addr;
4398 
4399 			addr = PNMB(na, slot + si, &paddr);
4400 			netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4401 			rxbuf->paddr = paddr;
4402 			em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4403 			continue;
4404 		}
4405 #endif /* DEV_NETMAP */
4406 		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4407 		    M_PKTHDR, adapter->rx_mbuf_sz);
4408 		if (rxbuf->m_head == NULL) {
4409 			error = ENOBUFS;
4410 			goto fail;
4411 		}
4412 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4413 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4414 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4415 
4416 		/* Get the memory mapping */
4417 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4418 		    rxbuf->map, rxbuf->m_head, seg,
4419 		    &nsegs, BUS_DMA_NOWAIT);
4420 		if (error != 0) {
4421 			m_freem(rxbuf->m_head);
4422 			rxbuf->m_head = NULL;
4423 			goto fail;
4424 		}
4425 		bus_dmamap_sync(rxr->rxtag,
4426 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4427 
4428 		rxbuf->paddr = seg[0].ds_addr;
4429 		em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4430 	}
4431 	rxr->next_to_check = 0;
4432 	rxr->next_to_refresh = 0;
4433 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4434 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4435 
4436 fail:
4437 	EM_RX_UNLOCK(rxr);
4438 	return (error);
4439 }
4440 
4441 /*********************************************************************
4442  *
4443  *  Initialize all receive rings.
4444  *
4445  **********************************************************************/
4446 static int
4447 em_setup_receive_structures(struct adapter *adapter)
4448 {
4449 	struct rx_ring *rxr = adapter->rx_rings;
4450 	int q;
4451 
4452 	for (q = 0; q < adapter->num_queues; q++, rxr++)
4453 		if (em_setup_receive_ring(rxr))
4454 			goto fail;
4455 
4456 	return (0);
4457 fail:
4458 	/*
4459 	 * Free RX buffers allocated so far, we will only handle
4460 	 * the rings that completed, the failing case will have
4461 	 * cleaned up for itself. 'q' failed, so its the terminus.
4462 	 */
4463 	for (int i = 0; i < q; ++i) {
4464 		rxr = &adapter->rx_rings[i];
4465 		for (int n = 0; n < adapter->num_rx_desc; n++) {
4466 			struct em_rxbuffer *rxbuf;
4467 			rxbuf = &rxr->rx_buffers[n];
4468 			if (rxbuf->m_head != NULL) {
4469 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4470 			  	  BUS_DMASYNC_POSTREAD);
4471 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4472 				m_freem(rxbuf->m_head);
4473 				rxbuf->m_head = NULL;
4474 			}
4475 		}
4476 		rxr->next_to_check = 0;
4477 		rxr->next_to_refresh = 0;
4478 	}
4479 
4480 	return (ENOBUFS);
4481 }
4482 
4483 /*********************************************************************
4484  *
4485  *  Free all receive rings.
4486  *
4487  **********************************************************************/
4488 static void
4489 em_free_receive_structures(struct adapter *adapter)
4490 {
4491 	struct rx_ring *rxr = adapter->rx_rings;
4492 
4493 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4494 		em_free_receive_buffers(rxr);
4495 		/* Free the ring memory as well */
4496 		em_dma_free(adapter, &rxr->rxdma);
4497 		EM_RX_LOCK_DESTROY(rxr);
4498 	}
4499 
4500 	free(adapter->rx_rings, M_DEVBUF);
4501 }
4502 
4503 
4504 /*********************************************************************
4505  *
4506  *  Free receive ring data structures
4507  *
4508  **********************************************************************/
4509 static void
4510 em_free_receive_buffers(struct rx_ring *rxr)
4511 {
4512 	struct adapter		*adapter = rxr->adapter;
4513 	struct em_rxbuffer	*rxbuf = NULL;
4514 
4515 	INIT_DEBUGOUT("free_receive_buffers: begin");
4516 
4517 	if (rxr->rx_buffers != NULL) {
4518 		for (int i = 0; i < adapter->num_rx_desc; i++) {
4519 			rxbuf = &rxr->rx_buffers[i];
4520 			if (rxbuf->map != NULL) {
4521 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4522 				    BUS_DMASYNC_POSTREAD);
4523 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4524 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4525 			}
4526 			if (rxbuf->m_head != NULL) {
4527 				m_freem(rxbuf->m_head);
4528 				rxbuf->m_head = NULL;
4529 			}
4530 		}
4531 		free(rxr->rx_buffers, M_DEVBUF);
4532 		rxr->rx_buffers = NULL;
4533 		rxr->next_to_check = 0;
4534 		rxr->next_to_refresh = 0;
4535 	}
4536 
4537 	if (rxr->rxtag != NULL) {
4538 		bus_dma_tag_destroy(rxr->rxtag);
4539 		rxr->rxtag = NULL;
4540 	}
4541 
4542 	return;
4543 }
4544 
4545 
4546 /*********************************************************************
4547  *
4548  *  Enable receive unit.
4549  *
4550  **********************************************************************/
4551 
4552 static void
4553 em_initialize_receive_unit(struct adapter *adapter)
4554 {
4555 	struct rx_ring *rxr = adapter->rx_rings;
4556 	if_t ifp = adapter->ifp;
4557 	struct e1000_hw	*hw = &adapter->hw;
4558 	u32	rctl, rxcsum, rfctl;
4559 
4560 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4561 
4562 	/*
4563 	 * Make sure receives are disabled while setting
4564 	 * up the descriptor ring
4565 	 */
4566 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4567 	/* Do not disable if ever enabled on this hardware */
4568 	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4569 		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4570 
4571 	/* Setup the Receive Control Register */
4572 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4573 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4574 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4575 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4576 
4577 	/* Do not store bad packets */
4578 	rctl &= ~E1000_RCTL_SBP;
4579 
4580 	/* Enable Long Packet receive */
4581 	if (if_getmtu(ifp) > ETHERMTU)
4582 		rctl |= E1000_RCTL_LPE;
4583 	else
4584 		rctl &= ~E1000_RCTL_LPE;
4585 
4586         /* Strip the CRC */
4587         if (!em_disable_crc_stripping)
4588 		rctl |= E1000_RCTL_SECRC;
4589 
4590 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4591 	    adapter->rx_abs_int_delay.value);
4592 
4593 	E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4594 	    adapter->rx_int_delay.value);
4595 	/*
4596 	 * Set the interrupt throttling rate. Value is calculated
4597 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4598 	 */
4599 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4600 
4601 	/* Use extended rx descriptor formats */
4602 	rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4603 	rfctl |= E1000_RFCTL_EXTEN;
4604 	/*
4605 	** When using MSIX interrupts we need to throttle
4606 	** using the EITR register (82574 only)
4607 	*/
4608 	if (hw->mac.type == e1000_82574) {
4609 		for (int i = 0; i < 4; i++)
4610 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4611 			    DEFAULT_ITR);
4612 		/* Disable accelerated acknowledge */
4613 		rfctl |= E1000_RFCTL_ACK_DIS;
4614 	}
4615 	E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4616 
4617 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4618 	if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
4619 #ifdef EM_MULTIQUEUE
4620 		rxcsum |= E1000_RXCSUM_TUOFL |
4621 			  E1000_RXCSUM_IPOFL |
4622 			  E1000_RXCSUM_PCSD;
4623 #else
4624 		rxcsum |= E1000_RXCSUM_TUOFL;
4625 #endif
4626 	} else
4627 		rxcsum &= ~E1000_RXCSUM_TUOFL;
4628 
4629 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4630 
4631 #ifdef EM_MULTIQUEUE
4632 #define RSSKEYLEN 10
4633 	if (adapter->num_queues > 1) {
4634 		uint8_t  rss_key[4 * RSSKEYLEN];
4635 		uint32_t reta = 0;
4636 		int i;
4637 
4638 		/*
4639 		* Configure RSS key
4640 		*/
4641 		arc4rand(rss_key, sizeof(rss_key), 0);
4642 		for (i = 0; i < RSSKEYLEN; ++i) {
4643 			uint32_t rssrk = 0;
4644 
4645 			rssrk = EM_RSSRK_VAL(rss_key, i);
4646 			E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk);
4647 		}
4648 
4649 		/*
4650 		* Configure RSS redirect table in following fashion:
4651 		* (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4652 		*/
4653 		for (i = 0; i < sizeof(reta); ++i) {
4654 			uint32_t q;
4655 
4656 			q = (i % adapter->num_queues) << 7;
4657 			reta |= q << (8 * i);
4658 		}
4659 
4660 		for (i = 0; i < 32; ++i) {
4661 			E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4662 		}
4663 
4664 		E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q |
4665 				E1000_MRQC_RSS_FIELD_IPV4_TCP |
4666 				E1000_MRQC_RSS_FIELD_IPV4 |
4667 				E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4668 				E1000_MRQC_RSS_FIELD_IPV6_EX |
4669 				E1000_MRQC_RSS_FIELD_IPV6);
4670 	}
4671 #endif
4672 	/*
4673 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4674 	** long latencies are observed, like Lenovo X60. This
4675 	** change eliminates the problem, but since having positive
4676 	** values in RDTR is a known source of problems on other
4677 	** platforms another solution is being sought.
4678 	*/
4679 	if (hw->mac.type == e1000_82573)
4680 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4681 
4682 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4683 		/* Setup the Base and Length of the Rx Descriptor Ring */
4684 		u64 bus_addr = rxr->rxdma.dma_paddr;
4685 		u32 rdt = adapter->num_rx_desc - 1; /* default */
4686 
4687 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4688 		    adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended));
4689 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4690 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4691 		/* Setup the Head and Tail Descriptor Pointers */
4692 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4693 #ifdef DEV_NETMAP
4694 		/*
4695 		 * an init() while a netmap client is active must
4696 		 * preserve the rx buffers passed to userspace.
4697 		 */
4698 		if (if_getcapenable(ifp) & IFCAP_NETMAP) {
4699 			struct netmap_adapter *na = netmap_getna(adapter->ifp);
4700 			rdt -= nm_kr_rxspace(&na->rx_rings[i]);
4701 		}
4702 #endif /* DEV_NETMAP */
4703 		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4704 	}
4705 
4706 	/*
4707 	 * Set PTHRESH for improved jumbo performance
4708 	 * According to 10.2.5.11 of Intel 82574 Datasheet,
4709 	 * RXDCTL(1) is written whenever RXDCTL(0) is written.
4710 	 * Only write to RXDCTL(1) if there is a need for different
4711 	 * settings.
4712 	 */
4713 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4714 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4715 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4716 	    (if_getmtu(ifp) > ETHERMTU)) {
4717 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4718 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4719 	} else if (adapter->hw.mac.type == e1000_82574) {
4720 		for (int i = 0; i < adapter->num_queues; i++) {
4721 			u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4722 
4723 			rxdctl |= 0x20; /* PTHRESH */
4724 			rxdctl |= 4 << 8; /* HTHRESH */
4725 			rxdctl |= 4 << 16;/* WTHRESH */
4726 			rxdctl |= 1 << 24; /* Switch to granularity */
4727 			E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4728 		}
4729 	}
4730 
4731 	if (adapter->hw.mac.type >= e1000_pch2lan) {
4732 		if (if_getmtu(ifp) > ETHERMTU)
4733 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4734 		else
4735 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4736 	}
4737 
4738         /* Make sure VLAN Filters are off */
4739         rctl &= ~E1000_RCTL_VFE;
4740 
4741 	if (adapter->rx_mbuf_sz == MCLBYTES)
4742 		rctl |= E1000_RCTL_SZ_2048;
4743 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4744 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4745 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4746 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4747 
4748 	/* ensure we clear use DTYPE of 00 here */
4749 	rctl &= ~0x00000C00;
4750 	/* Write out the settings */
4751 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4752 
4753 	return;
4754 }
4755 
4756 
4757 /*********************************************************************
4758  *
4759  *  This routine executes in interrupt context. It replenishes
4760  *  the mbufs in the descriptor and sends data which has been
4761  *  dma'ed into host memory to upper layer.
4762  *
4763  *  We loop at most count times if count is > 0, or until done if
4764  *  count < 0.
4765  *
4766  *  For polling we also now return the number of cleaned packets
4767  *********************************************************************/
4768 static bool
4769 em_rxeof(struct rx_ring *rxr, int count, int *done)
4770 {
4771 	struct adapter		*adapter = rxr->adapter;
4772 	if_t ifp = adapter->ifp;
4773 	struct mbuf		*mp, *sendmp;
4774 	u32			status = 0;
4775 	u16 			len;
4776 	int			i, processed, rxdone = 0;
4777 	bool			eop;
4778 	union e1000_rx_desc_extended	*cur;
4779 
4780 	EM_RX_LOCK(rxr);
4781 
4782 	/* Sync the ring */
4783 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4784 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4785 
4786 
4787 #ifdef DEV_NETMAP
4788 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4789 		EM_RX_UNLOCK(rxr);
4790 		return (FALSE);
4791 	}
4792 #endif /* DEV_NETMAP */
4793 
4794 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4795 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
4796 			break;
4797 
4798 		cur = &rxr->rx_base[i];
4799 		status = le32toh(cur->wb.upper.status_error);
4800 		mp = sendmp = NULL;
4801 
4802 		if ((status & E1000_RXD_STAT_DD) == 0)
4803 			break;
4804 
4805 		len = le16toh(cur->wb.upper.length);
4806 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4807 
4808 		if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
4809 		    (rxr->discard == TRUE)) {
4810 			adapter->dropped_pkts++;
4811 			++rxr->rx_discarded;
4812 			if (!eop) /* Catch subsequent segs */
4813 				rxr->discard = TRUE;
4814 			else
4815 				rxr->discard = FALSE;
4816 			em_rx_discard(rxr, i);
4817 			goto next_desc;
4818 		}
4819 		bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4820 
4821 		/* Assign correct length to the current fragment */
4822 		mp = rxr->rx_buffers[i].m_head;
4823 		mp->m_len = len;
4824 
4825 		/* Trigger for refresh */
4826 		rxr->rx_buffers[i].m_head = NULL;
4827 
4828 		/* First segment? */
4829 		if (rxr->fmp == NULL) {
4830 			mp->m_pkthdr.len = len;
4831 			rxr->fmp = rxr->lmp = mp;
4832 		} else {
4833 			/* Chain mbuf's together */
4834 			mp->m_flags &= ~M_PKTHDR;
4835 			rxr->lmp->m_next = mp;
4836 			rxr->lmp = mp;
4837 			rxr->fmp->m_pkthdr.len += len;
4838 		}
4839 
4840 		if (eop) {
4841 			--count;
4842 			sendmp = rxr->fmp;
4843 			if_setrcvif(sendmp, ifp);
4844 			if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
4845 			em_receive_checksum(status, sendmp);
4846 #ifndef __NO_STRICT_ALIGNMENT
4847 			if (adapter->hw.mac.max_frame_size >
4848 			    (MCLBYTES - ETHER_ALIGN) &&
4849 			    em_fixup_rx(rxr) != 0)
4850 				goto skip;
4851 #endif
4852 			if (status & E1000_RXD_STAT_VP) {
4853 				if_setvtag(sendmp,
4854 				    le16toh(cur->wb.upper.vlan));
4855 				sendmp->m_flags |= M_VLANTAG;
4856 			}
4857 #ifndef __NO_STRICT_ALIGNMENT
4858 skip:
4859 #endif
4860 			rxr->fmp = rxr->lmp = NULL;
4861 		}
4862 next_desc:
4863 		/* Sync the ring */
4864 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4865 	    		BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4866 
4867 		/* Zero out the receive descriptors status. */
4868 		cur->wb.upper.status_error &= htole32(~0xFF);
4869 		++rxdone;	/* cumulative for POLL */
4870 		++processed;
4871 
4872 		/* Advance our pointers to the next descriptor. */
4873 		if (++i == adapter->num_rx_desc)
4874 			i = 0;
4875 
4876 		/* Send to the stack */
4877 		if (sendmp != NULL) {
4878 			rxr->next_to_check = i;
4879 			EM_RX_UNLOCK(rxr);
4880 			if_input(ifp, sendmp);
4881 			EM_RX_LOCK(rxr);
4882 			i = rxr->next_to_check;
4883 		}
4884 
4885 		/* Only refresh mbufs every 8 descriptors */
4886 		if (processed == 8) {
4887 			em_refresh_mbufs(rxr, i);
4888 			processed = 0;
4889 		}
4890 	}
4891 
4892 	/* Catch any remaining refresh work */
4893 	if (e1000_rx_unrefreshed(rxr))
4894 		em_refresh_mbufs(rxr, i);
4895 
4896 	rxr->next_to_check = i;
4897 	if (done != NULL)
4898 		*done = rxdone;
4899 	EM_RX_UNLOCK(rxr);
4900 
4901 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4902 }
4903 
4904 static __inline void
4905 em_rx_discard(struct rx_ring *rxr, int i)
4906 {
4907 	struct em_rxbuffer	*rbuf;
4908 
4909 	rbuf = &rxr->rx_buffers[i];
4910 	bus_dmamap_unload(rxr->rxtag, rbuf->map);
4911 
4912 	/* Free any previous pieces */
4913 	if (rxr->fmp != NULL) {
4914 		rxr->fmp->m_flags |= M_PKTHDR;
4915 		m_freem(rxr->fmp);
4916 		rxr->fmp = NULL;
4917 		rxr->lmp = NULL;
4918 	}
4919 	/*
4920 	** Free buffer and allow em_refresh_mbufs()
4921 	** to clean up and recharge buffer.
4922 	*/
4923 	if (rbuf->m_head) {
4924 		m_free(rbuf->m_head);
4925 		rbuf->m_head = NULL;
4926 	}
4927 	return;
4928 }
4929 
4930 #ifndef __NO_STRICT_ALIGNMENT
4931 /*
4932  * When jumbo frames are enabled we should realign entire payload on
4933  * architecures with strict alignment. This is serious design mistake of 8254x
4934  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4935  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4936  * payload. On architecures without strict alignment restrictions 8254x still
4937  * performs unaligned memory access which would reduce the performance too.
4938  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4939  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4940  * existing mbuf chain.
4941  *
4942  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4943  * not used at all on architectures with strict alignment.
4944  */
4945 static int
4946 em_fixup_rx(struct rx_ring *rxr)
4947 {
4948 	struct adapter *adapter = rxr->adapter;
4949 	struct mbuf *m, *n;
4950 	int error;
4951 
4952 	error = 0;
4953 	m = rxr->fmp;
4954 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4955 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4956 		m->m_data += ETHER_HDR_LEN;
4957 	} else {
4958 		MGETHDR(n, M_NOWAIT, MT_DATA);
4959 		if (n != NULL) {
4960 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4961 			m->m_data += ETHER_HDR_LEN;
4962 			m->m_len -= ETHER_HDR_LEN;
4963 			n->m_len = ETHER_HDR_LEN;
4964 			M_MOVE_PKTHDR(n, m);
4965 			n->m_next = m;
4966 			rxr->fmp = n;
4967 		} else {
4968 			adapter->dropped_pkts++;
4969 			m_freem(rxr->fmp);
4970 			rxr->fmp = NULL;
4971 			error = ENOMEM;
4972 		}
4973 	}
4974 
4975 	return (error);
4976 }
4977 #endif
4978 
4979 static void
4980 em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf)
4981 {
4982 	rxd->read.buffer_addr = htole64(rxbuf->paddr);
4983 	/* DD bits must be cleared */
4984 	rxd->wb.upper.status_error= 0;
4985 }
4986 
4987 /*********************************************************************
4988  *
4989  *  Verify that the hardware indicated that the checksum is valid.
4990  *  Inform the stack about the status of checksum so that stack
4991  *  doesn't spend time verifying the checksum.
4992  *
4993  *********************************************************************/
4994 static void
4995 em_receive_checksum(uint32_t status, struct mbuf *mp)
4996 {
4997 	mp->m_pkthdr.csum_flags = 0;
4998 
4999 	/* Ignore Checksum bit is set */
5000 	if (status & E1000_RXD_STAT_IXSM)
5001 		return;
5002 
5003 	/* If the IP checksum exists and there is no IP Checksum error */
5004 	if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
5005 		E1000_RXD_STAT_IPCS) {
5006 		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
5007 	}
5008 
5009 	/* TCP or UDP checksum */
5010 	if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
5011 	    E1000_RXD_STAT_TCPCS) {
5012 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5013 		mp->m_pkthdr.csum_data = htons(0xffff);
5014 	}
5015 	if (status & E1000_RXD_STAT_UDPCS) {
5016 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5017 		mp->m_pkthdr.csum_data = htons(0xffff);
5018 	}
5019 }
5020 
5021 /*
5022  * This routine is run via an vlan
5023  * config EVENT
5024  */
5025 static void
5026 em_register_vlan(void *arg, if_t ifp, u16 vtag)
5027 {
5028 	struct adapter	*adapter = if_getsoftc(ifp);
5029 	u32		index, bit;
5030 
5031 	if ((void*)adapter !=  arg)   /* Not our event */
5032 		return;
5033 
5034 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
5035                 return;
5036 
5037 	EM_CORE_LOCK(adapter);
5038 	index = (vtag >> 5) & 0x7F;
5039 	bit = vtag & 0x1F;
5040 	adapter->shadow_vfta[index] |= (1 << bit);
5041 	++adapter->num_vlans;
5042 	/* Re-init to load the changes */
5043 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
5044 		em_init_locked(adapter);
5045 	EM_CORE_UNLOCK(adapter);
5046 }
5047 
5048 /*
5049  * This routine is run via an vlan
5050  * unconfig EVENT
5051  */
5052 static void
5053 em_unregister_vlan(void *arg, if_t ifp, u16 vtag)
5054 {
5055 	struct adapter	*adapter = if_getsoftc(ifp);
5056 	u32		index, bit;
5057 
5058 	if (adapter != arg)
5059 		return;
5060 
5061 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5062                 return;
5063 
5064 	EM_CORE_LOCK(adapter);
5065 	index = (vtag >> 5) & 0x7F;
5066 	bit = vtag & 0x1F;
5067 	adapter->shadow_vfta[index] &= ~(1 << bit);
5068 	--adapter->num_vlans;
5069 	/* Re-init to load the changes */
5070 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
5071 		em_init_locked(adapter);
5072 	EM_CORE_UNLOCK(adapter);
5073 }
5074 
5075 static void
5076 em_setup_vlan_hw_support(struct adapter *adapter)
5077 {
5078 	struct e1000_hw *hw = &adapter->hw;
5079 	u32             reg;
5080 
5081 	/*
5082 	** We get here thru init_locked, meaning
5083 	** a soft reset, this has already cleared
5084 	** the VFTA and other state, so if there
5085 	** have been no vlan's registered do nothing.
5086 	*/
5087 	if (adapter->num_vlans == 0)
5088                 return;
5089 
5090 	/*
5091 	** A soft reset zero's out the VFTA, so
5092 	** we need to repopulate it now.
5093 	*/
5094 	for (int i = 0; i < EM_VFTA_SIZE; i++)
5095                 if (adapter->shadow_vfta[i] != 0)
5096 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
5097                             i, adapter->shadow_vfta[i]);
5098 
5099 	reg = E1000_READ_REG(hw, E1000_CTRL);
5100 	reg |= E1000_CTRL_VME;
5101 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
5102 
5103 	/* Enable the Filter Table */
5104 	reg = E1000_READ_REG(hw, E1000_RCTL);
5105 	reg &= ~E1000_RCTL_CFIEN;
5106 	reg |= E1000_RCTL_VFE;
5107 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
5108 }
5109 
5110 static void
5111 em_enable_intr(struct adapter *adapter)
5112 {
5113 	struct e1000_hw *hw = &adapter->hw;
5114 	u32 ims_mask = IMS_ENABLE_MASK;
5115 
5116 	if (hw->mac.type == e1000_82574) {
5117 		E1000_WRITE_REG(hw, EM_EIAC, adapter->ims);
5118 		ims_mask |= adapter->ims;
5119 	}
5120 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
5121 }
5122 
5123 static void
5124 em_disable_intr(struct adapter *adapter)
5125 {
5126 	struct e1000_hw *hw = &adapter->hw;
5127 
5128 	if (hw->mac.type == e1000_82574)
5129 		E1000_WRITE_REG(hw, EM_EIAC, 0);
5130 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
5131 }
5132 
5133 /*
5134  * Bit of a misnomer, what this really means is
5135  * to enable OS management of the system... aka
5136  * to disable special hardware management features
5137  */
5138 static void
5139 em_init_manageability(struct adapter *adapter)
5140 {
5141 	/* A shared code workaround */
5142 #define E1000_82542_MANC2H E1000_MANC2H
5143 	if (adapter->has_manage) {
5144 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5145 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5146 
5147 		/* disable hardware interception of ARP */
5148 		manc &= ~(E1000_MANC_ARP_EN);
5149 
5150                 /* enable receiving management packets to the host */
5151 		manc |= E1000_MANC_EN_MNG2HOST;
5152 #define E1000_MNG2HOST_PORT_623 (1 << 5)
5153 #define E1000_MNG2HOST_PORT_664 (1 << 6)
5154 		manc2h |= E1000_MNG2HOST_PORT_623;
5155 		manc2h |= E1000_MNG2HOST_PORT_664;
5156 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5157 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5158 	}
5159 }
5160 
5161 /*
5162  * Give control back to hardware management
5163  * controller if there is one.
5164  */
5165 static void
5166 em_release_manageability(struct adapter *adapter)
5167 {
5168 	if (adapter->has_manage) {
5169 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5170 
5171 		/* re-enable hardware interception of ARP */
5172 		manc |= E1000_MANC_ARP_EN;
5173 		manc &= ~E1000_MANC_EN_MNG2HOST;
5174 
5175 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5176 	}
5177 }
5178 
5179 /*
5180  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
5181  * For ASF and Pass Through versions of f/w this means
5182  * that the driver is loaded. For AMT version type f/w
5183  * this means that the network i/f is open.
5184  */
5185 static void
5186 em_get_hw_control(struct adapter *adapter)
5187 {
5188 	u32 ctrl_ext, swsm;
5189 
5190 	if (adapter->hw.mac.type == e1000_82573) {
5191 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5192 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5193 		    swsm | E1000_SWSM_DRV_LOAD);
5194 		return;
5195 	}
5196 	/* else */
5197 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5198 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5199 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5200 	return;
5201 }
5202 
5203 /*
5204  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
5205  * For ASF and Pass Through versions of f/w this means that
5206  * the driver is no longer loaded. For AMT versions of the
5207  * f/w this means that the network i/f is closed.
5208  */
5209 static void
5210 em_release_hw_control(struct adapter *adapter)
5211 {
5212 	u32 ctrl_ext, swsm;
5213 
5214 	if (!adapter->has_manage)
5215 		return;
5216 
5217 	if (adapter->hw.mac.type == e1000_82573) {
5218 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5219 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5220 		    swsm & ~E1000_SWSM_DRV_LOAD);
5221 		return;
5222 	}
5223 	/* else */
5224 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5225 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5226 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5227 	return;
5228 }
5229 
5230 static int
5231 em_is_valid_ether_addr(u8 *addr)
5232 {
5233 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5234 
5235 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5236 		return (FALSE);
5237 	}
5238 
5239 	return (TRUE);
5240 }
5241 
5242 /*
5243 ** Parse the interface capabilities with regard
5244 ** to both system management and wake-on-lan for
5245 ** later use.
5246 */
5247 static void
5248 em_get_wakeup(device_t dev)
5249 {
5250 	struct adapter	*adapter = device_get_softc(dev);
5251 	u16		eeprom_data = 0, device_id, apme_mask;
5252 
5253 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5254 	apme_mask = EM_EEPROM_APME;
5255 
5256 	switch (adapter->hw.mac.type) {
5257 	case e1000_82573:
5258 	case e1000_82583:
5259 		adapter->has_amt = TRUE;
5260 		/* Falls thru */
5261 	case e1000_82571:
5262 	case e1000_82572:
5263 	case e1000_80003es2lan:
5264 		if (adapter->hw.bus.func == 1) {
5265 			e1000_read_nvm(&adapter->hw,
5266 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5267 			break;
5268 		} else
5269 			e1000_read_nvm(&adapter->hw,
5270 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5271 		break;
5272 	case e1000_ich8lan:
5273 	case e1000_ich9lan:
5274 	case e1000_ich10lan:
5275 	case e1000_pchlan:
5276 	case e1000_pch2lan:
5277 		apme_mask = E1000_WUC_APME;
5278 		adapter->has_amt = TRUE;
5279 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5280 		break;
5281 	default:
5282 		e1000_read_nvm(&adapter->hw,
5283 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5284 		break;
5285 	}
5286 	if (eeprom_data & apme_mask)
5287 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5288 	/*
5289          * We have the eeprom settings, now apply the special cases
5290          * where the eeprom may be wrong or the board won't support
5291          * wake on lan on a particular port
5292 	 */
5293 	device_id = pci_get_device(dev);
5294         switch (device_id) {
5295 	case E1000_DEV_ID_82571EB_FIBER:
5296 		/* Wake events only supported on port A for dual fiber
5297 		 * regardless of eeprom setting */
5298 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5299 		    E1000_STATUS_FUNC_1)
5300 			adapter->wol = 0;
5301 		break;
5302 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
5303 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
5304 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5305                 /* if quad port adapter, disable WoL on all but port A */
5306 		if (global_quad_port_a != 0)
5307 			adapter->wol = 0;
5308 		/* Reset for multiple quad port adapters */
5309 		if (++global_quad_port_a == 4)
5310 			global_quad_port_a = 0;
5311                 break;
5312 	}
5313 	return;
5314 }
5315 
5316 
5317 /*
5318  * Enable PCI Wake On Lan capability
5319  */
5320 static void
5321 em_enable_wakeup(device_t dev)
5322 {
5323 	struct adapter	*adapter = device_get_softc(dev);
5324 	if_t ifp = adapter->ifp;
5325 	u32		pmc, ctrl, ctrl_ext, rctl;
5326 	u16     	status;
5327 
5328 	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
5329 		return;
5330 
5331 	/* Advertise the wakeup capability */
5332 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5333 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5334 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5335 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5336 
5337 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
5338 	    (adapter->hw.mac.type == e1000_pchlan) ||
5339 	    (adapter->hw.mac.type == e1000_ich9lan) ||
5340 	    (adapter->hw.mac.type == e1000_ich10lan))
5341 		e1000_suspend_workarounds_ich8lan(&adapter->hw);
5342 
5343 	/* Keep the laser running on Fiber adapters */
5344 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5345 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5346 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5347 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5348 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5349 	}
5350 
5351 	/*
5352 	** Determine type of Wakeup: note that wol
5353 	** is set with all bits on by default.
5354 	*/
5355 	if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
5356 		adapter->wol &= ~E1000_WUFC_MAG;
5357 
5358 	if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
5359 		adapter->wol &= ~E1000_WUFC_MC;
5360 	else {
5361 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5362 		rctl |= E1000_RCTL_MPE;
5363 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5364 	}
5365 
5366 	if ((adapter->hw.mac.type == e1000_pchlan) ||
5367 	    (adapter->hw.mac.type == e1000_pch2lan)) {
5368 		if (em_enable_phy_wakeup(adapter))
5369 			return;
5370 	} else {
5371 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5372 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5373 	}
5374 
5375 	if (adapter->hw.phy.type == e1000_phy_igp_3)
5376 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5377 
5378         /* Request PME */
5379         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5380 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5381 	if (if_getcapenable(ifp) & IFCAP_WOL)
5382 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5383         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5384 
5385 	return;
5386 }
5387 
5388 /*
5389 ** WOL in the newer chipset interfaces (pchlan)
5390 ** require thing to be copied into the phy
5391 */
5392 static int
5393 em_enable_phy_wakeup(struct adapter *adapter)
5394 {
5395 	struct e1000_hw *hw = &adapter->hw;
5396 	u32 mreg, ret = 0;
5397 	u16 preg;
5398 
5399 	/* copy MAC RARs to PHY RARs */
5400 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5401 
5402 	/* copy MAC MTA to PHY MTA */
5403 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5404 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5405 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5406 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5407 		    (u16)((mreg >> 16) & 0xFFFF));
5408 	}
5409 
5410 	/* configure PHY Rx Control register */
5411 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5412 	mreg = E1000_READ_REG(hw, E1000_RCTL);
5413 	if (mreg & E1000_RCTL_UPE)
5414 		preg |= BM_RCTL_UPE;
5415 	if (mreg & E1000_RCTL_MPE)
5416 		preg |= BM_RCTL_MPE;
5417 	preg &= ~(BM_RCTL_MO_MASK);
5418 	if (mreg & E1000_RCTL_MO_3)
5419 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5420 				<< BM_RCTL_MO_SHIFT);
5421 	if (mreg & E1000_RCTL_BAM)
5422 		preg |= BM_RCTL_BAM;
5423 	if (mreg & E1000_RCTL_PMCF)
5424 		preg |= BM_RCTL_PMCF;
5425 	mreg = E1000_READ_REG(hw, E1000_CTRL);
5426 	if (mreg & E1000_CTRL_RFCE)
5427 		preg |= BM_RCTL_RFCE;
5428 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5429 
5430 	/* enable PHY wakeup in MAC register */
5431 	E1000_WRITE_REG(hw, E1000_WUC,
5432 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5433 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5434 
5435 	/* configure and enable PHY wakeup in PHY registers */
5436 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5437 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5438 
5439 	/* activate PHY wakeup */
5440 	ret = hw->phy.ops.acquire(hw);
5441 	if (ret) {
5442 		printf("Could not acquire PHY\n");
5443 		return ret;
5444 	}
5445 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5446 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5447 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5448 	if (ret) {
5449 		printf("Could not read PHY page 769\n");
5450 		goto out;
5451 	}
5452 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5453 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5454 	if (ret)
5455 		printf("Could not set PHY Host Wakeup bit\n");
5456 out:
5457 	hw->phy.ops.release(hw);
5458 
5459 	return ret;
5460 }
5461 
5462 static void
5463 em_led_func(void *arg, int onoff)
5464 {
5465 	struct adapter	*adapter = arg;
5466 
5467 	EM_CORE_LOCK(adapter);
5468 	if (onoff) {
5469 		e1000_setup_led(&adapter->hw);
5470 		e1000_led_on(&adapter->hw);
5471 	} else {
5472 		e1000_led_off(&adapter->hw);
5473 		e1000_cleanup_led(&adapter->hw);
5474 	}
5475 	EM_CORE_UNLOCK(adapter);
5476 }
5477 
5478 /*
5479 ** Disable the L0S and L1 LINK states
5480 */
5481 static void
5482 em_disable_aspm(struct adapter *adapter)
5483 {
5484 	int		base, reg;
5485 	u16		link_cap,link_ctrl;
5486 	device_t	dev = adapter->dev;
5487 
5488 	switch (adapter->hw.mac.type) {
5489 		case e1000_82573:
5490 		case e1000_82574:
5491 		case e1000_82583:
5492 			break;
5493 		default:
5494 			return;
5495 	}
5496 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5497 		return;
5498 	reg = base + PCIER_LINK_CAP;
5499 	link_cap = pci_read_config(dev, reg, 2);
5500 	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5501 		return;
5502 	reg = base + PCIER_LINK_CTL;
5503 	link_ctrl = pci_read_config(dev, reg, 2);
5504 	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5505 	pci_write_config(dev, reg, link_ctrl, 2);
5506 	return;
5507 }
5508 
5509 /**********************************************************************
5510  *
5511  *  Update the board statistics counters.
5512  *
5513  **********************************************************************/
5514 static void
5515 em_update_stats_counters(struct adapter *adapter)
5516 {
5517 
5518 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5519 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5520 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5521 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5522 	}
5523 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5524 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5525 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5526 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5527 
5528 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5529 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5530 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5531 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5532 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5533 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5534 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5535 	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5536 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5537 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5538 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5539 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5540 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5541 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5542 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5543 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5544 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5545 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5546 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5547 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5548 
5549 	/* For the 64-bit byte counters the low dword must be read first. */
5550 	/* Both registers clear on the read of the high dword */
5551 
5552 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5553 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5554 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5555 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5556 
5557 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5558 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5559 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5560 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5561 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5562 
5563 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5564 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5565 
5566 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5567 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5568 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5569 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5570 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5571 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5572 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5573 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5574 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5575 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5576 
5577 	/* Interrupt Counts */
5578 
5579 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5580 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5581 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5582 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5583 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5584 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5585 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5586 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5587 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5588 
5589 	if (adapter->hw.mac.type >= e1000_82543) {
5590 		adapter->stats.algnerrc +=
5591 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5592 		adapter->stats.rxerrc +=
5593 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5594 		adapter->stats.tncrs +=
5595 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5596 		adapter->stats.cexterr +=
5597 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5598 		adapter->stats.tsctc +=
5599 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5600 		adapter->stats.tsctfc +=
5601 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5602 	}
5603 }
5604 
5605 static uint64_t
5606 em_get_counter(if_t ifp, ift_counter cnt)
5607 {
5608 	struct adapter *adapter;
5609 
5610 	adapter = if_getsoftc(ifp);
5611 
5612 	switch (cnt) {
5613 	case IFCOUNTER_COLLISIONS:
5614 		return (adapter->stats.colc);
5615 	case IFCOUNTER_IERRORS:
5616 		return (adapter->dropped_pkts + adapter->stats.rxerrc +
5617 		    adapter->stats.crcerrs + adapter->stats.algnerrc +
5618 		    adapter->stats.ruc + adapter->stats.roc +
5619 		    adapter->stats.mpc + adapter->stats.cexterr);
5620 	case IFCOUNTER_OERRORS:
5621 		return (adapter->stats.ecol + adapter->stats.latecol +
5622 		    adapter->watchdog_events);
5623 	default:
5624 		return (if_get_counter_default(ifp, cnt));
5625 	}
5626 }
5627 
5628 /* Export a single 32-bit register via a read-only sysctl. */
5629 static int
5630 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5631 {
5632 	struct adapter *adapter;
5633 	u_int val;
5634 
5635 	adapter = oidp->oid_arg1;
5636 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5637 	return (sysctl_handle_int(oidp, &val, 0, req));
5638 }
5639 
5640 /*
5641  * Add sysctl variables, one per statistic, to the system.
5642  */
5643 static void
5644 em_add_hw_stats(struct adapter *adapter)
5645 {
5646 	device_t dev = adapter->dev;
5647 
5648 	struct tx_ring *txr = adapter->tx_rings;
5649 	struct rx_ring *rxr = adapter->rx_rings;
5650 
5651 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5652 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5653 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5654 	struct e1000_hw_stats *stats = &adapter->stats;
5655 
5656 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5657 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5658 
5659 #define QUEUE_NAME_LEN 32
5660 	char namebuf[QUEUE_NAME_LEN];
5661 
5662 	/* Driver Statistics */
5663 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5664 			CTLFLAG_RD, &adapter->dropped_pkts,
5665 			"Driver dropped packets");
5666 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5667 			CTLFLAG_RD, &adapter->link_irq,
5668 			"Link MSIX IRQ Handled");
5669 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5670 			 CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5671 			 "Defragmenting mbuf chain failed");
5672 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5673 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5674 			"Driver tx dma failure in xmit");
5675 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5676 			CTLFLAG_RD, &adapter->rx_overruns,
5677 			"RX overruns");
5678 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5679 			CTLFLAG_RD, &adapter->watchdog_events,
5680 			"Watchdog timeouts");
5681 
5682 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5683 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5684 			em_sysctl_reg_handler, "IU",
5685 			"Device Control Register");
5686 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5687 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5688 			em_sysctl_reg_handler, "IU",
5689 			"Receiver Control Register");
5690 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5691 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5692 			"Flow Control High Watermark");
5693 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5694 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5695 			"Flow Control Low Watermark");
5696 
5697 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5698 		snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5699 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5700 					    CTLFLAG_RD, NULL, "TX Queue Name");
5701 		queue_list = SYSCTL_CHILDREN(queue_node);
5702 
5703 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5704 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5705 				E1000_TDH(txr->me),
5706 				em_sysctl_reg_handler, "IU",
5707  				"Transmit Descriptor Head");
5708 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5709 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5710 				E1000_TDT(txr->me),
5711 				em_sysctl_reg_handler, "IU",
5712  				"Transmit Descriptor Tail");
5713 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5714 				CTLFLAG_RD, &txr->tx_irq,
5715 				"Queue MSI-X Transmit Interrupts");
5716 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5717 				CTLFLAG_RD, &txr->no_desc_avail,
5718 				"Queue No Descriptor Available");
5719 
5720 		snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5721 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5722 					    CTLFLAG_RD, NULL, "RX Queue Name");
5723 		queue_list = SYSCTL_CHILDREN(queue_node);
5724 
5725 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5726 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5727 				E1000_RDH(rxr->me),
5728 				em_sysctl_reg_handler, "IU",
5729 				"Receive Descriptor Head");
5730 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5731 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5732 				E1000_RDT(rxr->me),
5733 				em_sysctl_reg_handler, "IU",
5734 				"Receive Descriptor Tail");
5735 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5736 				CTLFLAG_RD, &rxr->rx_irq,
5737 				"Queue MSI-X Receive Interrupts");
5738 	}
5739 
5740 	/* MAC stats get their own sub node */
5741 
5742 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5743 				    CTLFLAG_RD, NULL, "Statistics");
5744 	stat_list = SYSCTL_CHILDREN(stat_node);
5745 
5746 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5747 			CTLFLAG_RD, &stats->ecol,
5748 			"Excessive collisions");
5749 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5750 			CTLFLAG_RD, &stats->scc,
5751 			"Single collisions");
5752 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5753 			CTLFLAG_RD, &stats->mcc,
5754 			"Multiple collisions");
5755 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5756 			CTLFLAG_RD, &stats->latecol,
5757 			"Late collisions");
5758 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5759 			CTLFLAG_RD, &stats->colc,
5760 			"Collision Count");
5761 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5762 			CTLFLAG_RD, &adapter->stats.symerrs,
5763 			"Symbol Errors");
5764 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5765 			CTLFLAG_RD, &adapter->stats.sec,
5766 			"Sequence Errors");
5767 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5768 			CTLFLAG_RD, &adapter->stats.dc,
5769 			"Defer Count");
5770 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5771 			CTLFLAG_RD, &adapter->stats.mpc,
5772 			"Missed Packets");
5773 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5774 			CTLFLAG_RD, &adapter->stats.rnbc,
5775 			"Receive No Buffers");
5776 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5777 			CTLFLAG_RD, &adapter->stats.ruc,
5778 			"Receive Undersize");
5779 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5780 			CTLFLAG_RD, &adapter->stats.rfc,
5781 			"Fragmented Packets Received ");
5782 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5783 			CTLFLAG_RD, &adapter->stats.roc,
5784 			"Oversized Packets Received");
5785 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5786 			CTLFLAG_RD, &adapter->stats.rjc,
5787 			"Recevied Jabber");
5788 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5789 			CTLFLAG_RD, &adapter->stats.rxerrc,
5790 			"Receive Errors");
5791 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5792 			CTLFLAG_RD, &adapter->stats.crcerrs,
5793 			"CRC errors");
5794 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5795 			CTLFLAG_RD, &adapter->stats.algnerrc,
5796 			"Alignment Errors");
5797 	/* On 82575 these are collision counts */
5798 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5799 			CTLFLAG_RD, &adapter->stats.cexterr,
5800 			"Collision/Carrier extension errors");
5801 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5802 			CTLFLAG_RD, &adapter->stats.xonrxc,
5803 			"XON Received");
5804 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5805 			CTLFLAG_RD, &adapter->stats.xontxc,
5806 			"XON Transmitted");
5807 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5808 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5809 			"XOFF Received");
5810 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5811 			CTLFLAG_RD, &adapter->stats.xofftxc,
5812 			"XOFF Transmitted");
5813 
5814 	/* Packet Reception Stats */
5815 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5816 			CTLFLAG_RD, &adapter->stats.tpr,
5817 			"Total Packets Received ");
5818 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5819 			CTLFLAG_RD, &adapter->stats.gprc,
5820 			"Good Packets Received");
5821 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5822 			CTLFLAG_RD, &adapter->stats.bprc,
5823 			"Broadcast Packets Received");
5824 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5825 			CTLFLAG_RD, &adapter->stats.mprc,
5826 			"Multicast Packets Received");
5827 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5828 			CTLFLAG_RD, &adapter->stats.prc64,
5829 			"64 byte frames received ");
5830 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5831 			CTLFLAG_RD, &adapter->stats.prc127,
5832 			"65-127 byte frames received");
5833 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5834 			CTLFLAG_RD, &adapter->stats.prc255,
5835 			"128-255 byte frames received");
5836 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5837 			CTLFLAG_RD, &adapter->stats.prc511,
5838 			"256-511 byte frames received");
5839 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5840 			CTLFLAG_RD, &adapter->stats.prc1023,
5841 			"512-1023 byte frames received");
5842 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5843 			CTLFLAG_RD, &adapter->stats.prc1522,
5844 			"1023-1522 byte frames received");
5845  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5846  			CTLFLAG_RD, &adapter->stats.gorc,
5847  			"Good Octets Received");
5848 
5849 	/* Packet Transmission Stats */
5850  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5851  			CTLFLAG_RD, &adapter->stats.gotc,
5852  			"Good Octets Transmitted");
5853 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5854 			CTLFLAG_RD, &adapter->stats.tpt,
5855 			"Total Packets Transmitted");
5856 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5857 			CTLFLAG_RD, &adapter->stats.gptc,
5858 			"Good Packets Transmitted");
5859 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5860 			CTLFLAG_RD, &adapter->stats.bptc,
5861 			"Broadcast Packets Transmitted");
5862 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5863 			CTLFLAG_RD, &adapter->stats.mptc,
5864 			"Multicast Packets Transmitted");
5865 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5866 			CTLFLAG_RD, &adapter->stats.ptc64,
5867 			"64 byte frames transmitted ");
5868 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5869 			CTLFLAG_RD, &adapter->stats.ptc127,
5870 			"65-127 byte frames transmitted");
5871 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5872 			CTLFLAG_RD, &adapter->stats.ptc255,
5873 			"128-255 byte frames transmitted");
5874 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5875 			CTLFLAG_RD, &adapter->stats.ptc511,
5876 			"256-511 byte frames transmitted");
5877 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5878 			CTLFLAG_RD, &adapter->stats.ptc1023,
5879 			"512-1023 byte frames transmitted");
5880 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5881 			CTLFLAG_RD, &adapter->stats.ptc1522,
5882 			"1024-1522 byte frames transmitted");
5883 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5884 			CTLFLAG_RD, &adapter->stats.tsctc,
5885 			"TSO Contexts Transmitted");
5886 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5887 			CTLFLAG_RD, &adapter->stats.tsctfc,
5888 			"TSO Contexts Failed");
5889 
5890 
5891 	/* Interrupt Stats */
5892 
5893 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5894 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5895 	int_list = SYSCTL_CHILDREN(int_node);
5896 
5897 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5898 			CTLFLAG_RD, &adapter->stats.iac,
5899 			"Interrupt Assertion Count");
5900 
5901 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5902 			CTLFLAG_RD, &adapter->stats.icrxptc,
5903 			"Interrupt Cause Rx Pkt Timer Expire Count");
5904 
5905 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5906 			CTLFLAG_RD, &adapter->stats.icrxatc,
5907 			"Interrupt Cause Rx Abs Timer Expire Count");
5908 
5909 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5910 			CTLFLAG_RD, &adapter->stats.ictxptc,
5911 			"Interrupt Cause Tx Pkt Timer Expire Count");
5912 
5913 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5914 			CTLFLAG_RD, &adapter->stats.ictxatc,
5915 			"Interrupt Cause Tx Abs Timer Expire Count");
5916 
5917 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5918 			CTLFLAG_RD, &adapter->stats.ictxqec,
5919 			"Interrupt Cause Tx Queue Empty Count");
5920 
5921 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5922 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5923 			"Interrupt Cause Tx Queue Min Thresh Count");
5924 
5925 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5926 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5927 			"Interrupt Cause Rx Desc Min Thresh Count");
5928 
5929 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5930 			CTLFLAG_RD, &adapter->stats.icrxoc,
5931 			"Interrupt Cause Receiver Overrun Count");
5932 }
5933 
5934 /**********************************************************************
5935  *
5936  *  This routine provides a way to dump out the adapter eeprom,
5937  *  often a useful debug/service tool. This only dumps the first
5938  *  32 words, stuff that matters is in that extent.
5939  *
5940  **********************************************************************/
5941 static int
5942 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5943 {
5944 	struct adapter *adapter = (struct adapter *)arg1;
5945 	int error;
5946 	int result;
5947 
5948 	result = -1;
5949 	error = sysctl_handle_int(oidp, &result, 0, req);
5950 
5951 	if (error || !req->newptr)
5952 		return (error);
5953 
5954 	/*
5955 	 * This value will cause a hex dump of the
5956 	 * first 32 16-bit words of the EEPROM to
5957 	 * the screen.
5958 	 */
5959 	if (result == 1)
5960 		em_print_nvm_info(adapter);
5961 
5962 	return (error);
5963 }
5964 
5965 static void
5966 em_print_nvm_info(struct adapter *adapter)
5967 {
5968 	u16	eeprom_data;
5969 	int	i, j, row = 0;
5970 
5971 	/* Its a bit crude, but it gets the job done */
5972 	printf("\nInterface EEPROM Dump:\n");
5973 	printf("Offset\n0x0000  ");
5974 	for (i = 0, j = 0; i < 32; i++, j++) {
5975 		if (j == 8) { /* Make the offset block */
5976 			j = 0; ++row;
5977 			printf("\n0x00%x0  ",row);
5978 		}
5979 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5980 		printf("%04x ", eeprom_data);
5981 	}
5982 	printf("\n");
5983 }
5984 
5985 static int
5986 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5987 {
5988 	struct em_int_delay_info *info;
5989 	struct adapter *adapter;
5990 	u32 regval;
5991 	int error, usecs, ticks;
5992 
5993 	info = (struct em_int_delay_info *)arg1;
5994 	usecs = info->value;
5995 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5996 	if (error != 0 || req->newptr == NULL)
5997 		return (error);
5998 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5999 		return (EINVAL);
6000 	info->value = usecs;
6001 	ticks = EM_USECS_TO_TICKS(usecs);
6002 	if (info->offset == E1000_ITR)	/* units are 256ns here */
6003 		ticks *= 4;
6004 
6005 	adapter = info->adapter;
6006 
6007 	EM_CORE_LOCK(adapter);
6008 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
6009 	regval = (regval & ~0xffff) | (ticks & 0xffff);
6010 	/* Handle a few special cases. */
6011 	switch (info->offset) {
6012 	case E1000_RDTR:
6013 		break;
6014 	case E1000_TIDV:
6015 		if (ticks == 0) {
6016 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
6017 			/* Don't write 0 into the TIDV register. */
6018 			regval++;
6019 		} else
6020 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
6021 		break;
6022 	}
6023 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
6024 	EM_CORE_UNLOCK(adapter);
6025 	return (0);
6026 }
6027 
6028 static void
6029 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
6030 	const char *description, struct em_int_delay_info *info,
6031 	int offset, int value)
6032 {
6033 	info->adapter = adapter;
6034 	info->offset = offset;
6035 	info->value = value;
6036 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
6037 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6038 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
6039 	    info, 0, em_sysctl_int_delay, "I", description);
6040 }
6041 
6042 static void
6043 em_set_sysctl_value(struct adapter *adapter, const char *name,
6044 	const char *description, int *limit, int value)
6045 {
6046 	*limit = value;
6047 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6048 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6049 	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6050 }
6051 
6052 
6053 /*
6054 ** Set flow control using sysctl:
6055 ** Flow control values:
6056 **      0 - off
6057 **      1 - rx pause
6058 **      2 - tx pause
6059 **      3 - full
6060 */
6061 static int
6062 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
6063 {
6064         int		error;
6065 	static int	input = 3; /* default is full */
6066         struct adapter	*adapter = (struct adapter *) arg1;
6067 
6068         error = sysctl_handle_int(oidp, &input, 0, req);
6069 
6070         if ((error) || (req->newptr == NULL))
6071                 return (error);
6072 
6073 	if (input == adapter->fc) /* no change? */
6074 		return (error);
6075 
6076         switch (input) {
6077                 case e1000_fc_rx_pause:
6078                 case e1000_fc_tx_pause:
6079                 case e1000_fc_full:
6080                 case e1000_fc_none:
6081                         adapter->hw.fc.requested_mode = input;
6082 			adapter->fc = input;
6083                         break;
6084                 default:
6085 			/* Do nothing */
6086 			return (error);
6087         }
6088 
6089         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6090         e1000_force_mac_fc(&adapter->hw);
6091         return (error);
6092 }
6093 
6094 /*
6095 ** Manage Energy Efficient Ethernet:
6096 ** Control values:
6097 **     0/1 - enabled/disabled
6098 */
6099 static int
6100 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
6101 {
6102        struct adapter *adapter = (struct adapter *) arg1;
6103        int             error, value;
6104 
6105        value = adapter->hw.dev_spec.ich8lan.eee_disable;
6106        error = sysctl_handle_int(oidp, &value, 0, req);
6107        if (error || req->newptr == NULL)
6108                return (error);
6109        EM_CORE_LOCK(adapter);
6110        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
6111        em_init_locked(adapter);
6112        EM_CORE_UNLOCK(adapter);
6113        return (0);
6114 }
6115 
6116 static int
6117 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
6118 {
6119 	struct adapter *adapter;
6120 	int error;
6121 	int result;
6122 
6123 	result = -1;
6124 	error = sysctl_handle_int(oidp, &result, 0, req);
6125 
6126 	if (error || !req->newptr)
6127 		return (error);
6128 
6129 	if (result == 1) {
6130 		adapter = (struct adapter *)arg1;
6131 		em_print_debug_info(adapter);
6132         }
6133 
6134 	return (error);
6135 }
6136 
6137 /*
6138 ** This routine is meant to be fluid, add whatever is
6139 ** needed for debugging a problem.  -jfv
6140 */
6141 static void
6142 em_print_debug_info(struct adapter *adapter)
6143 {
6144 	device_t dev = adapter->dev;
6145 	struct tx_ring *txr = adapter->tx_rings;
6146 	struct rx_ring *rxr = adapter->rx_rings;
6147 
6148 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)
6149 		printf("Interface is RUNNING ");
6150 	else
6151 		printf("Interface is NOT RUNNING\n");
6152 
6153 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE)
6154 		printf("and INACTIVE\n");
6155 	else
6156 		printf("and ACTIVE\n");
6157 
6158 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
6159 		device_printf(dev, "TX Queue %d ------\n", i);
6160 		device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
6161 	    		E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
6162 	    		E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
6163 		device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
6164 		device_printf(dev, "TX descriptors avail = %d\n",
6165 	    		txr->tx_avail);
6166 		device_printf(dev, "Tx Descriptors avail failure = %ld\n",
6167 	    		txr->no_desc_avail);
6168 		device_printf(dev, "RX Queue %d ------\n", i);
6169 		device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
6170 	    		E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
6171 	    		E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
6172 		device_printf(dev, "RX discarded packets = %ld\n",
6173 	    		rxr->rx_discarded);
6174 		device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
6175 		device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
6176 	}
6177 }
6178 
6179 #ifdef EM_MULTIQUEUE
6180 /*
6181  * 82574 only:
6182  * Write a new value to the EEPROM increasing the number of MSIX
6183  * vectors from 3 to 5, for proper multiqueue support.
6184  */
6185 static void
6186 em_enable_vectors_82574(struct adapter *adapter)
6187 {
6188 	struct e1000_hw *hw = &adapter->hw;
6189 	device_t dev = adapter->dev;
6190 	u16 edata;
6191 
6192 	e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6193 	printf("Current cap: %#06x\n", edata);
6194 	if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
6195 		device_printf(dev, "Writing to eeprom: increasing "
6196 		    "reported MSIX vectors from 3 to 5...\n");
6197 		edata &= ~(EM_NVM_MSIX_N_MASK);
6198 		edata |= 4 << EM_NVM_MSIX_N_SHIFT;
6199 		e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6200 		e1000_update_nvm_checksum(hw);
6201 		device_printf(dev, "Writing to eeprom: done\n");
6202 	}
6203 }
6204 #endif
6205 
6206 #ifdef DDB
6207 DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
6208 {
6209 	devclass_t	dc;
6210 	int max_em;
6211 
6212 	dc = devclass_find("em");
6213 	max_em = devclass_get_maxunit(dc);
6214 
6215 	for (int index = 0; index < (max_em - 1); index++) {
6216 		device_t dev;
6217 		dev = devclass_get_device(dc, index);
6218 		if (device_get_driver(dev) == &em_driver) {
6219 			struct adapter *adapter = device_get_softc(dev);
6220 			EM_CORE_LOCK(adapter);
6221 			em_init_locked(adapter);
6222 			EM_CORE_UNLOCK(adapter);
6223 		}
6224 	}
6225 }
6226 DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6227 {
6228 	devclass_t	dc;
6229 	int max_em;
6230 
6231 	dc = devclass_find("em");
6232 	max_em = devclass_get_maxunit(dc);
6233 
6234 	for (int index = 0; index < (max_em - 1); index++) {
6235 		device_t dev;
6236 		dev = devclass_get_device(dc, index);
6237 		if (device_get_driver(dev) == &em_driver)
6238 			em_print_debug_info(device_get_softc(dev));
6239 	}
6240 
6241 }
6242 #endif
6243