xref: /freebsd/sys/dev/e1000/if_em.c (revision ac2875fa16f0a3747b0e3f249814c4297605be61)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2015, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #include "opt_em.h"
36 #include "opt_ddb.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #endif
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #ifdef DDB
47 #include <sys/types.h>
48 #include <ddb/ddb.h>
49 #endif
50 #if __FreeBSD_version >= 800000
51 #include <sys/buf_ring.h>
52 #endif
53 #include <sys/bus.h>
54 #include <sys/endian.h>
55 #include <sys/kernel.h>
56 #include <sys/kthread.h>
57 #include <sys/malloc.h>
58 #include <sys/mbuf.h>
59 #include <sys/module.h>
60 #include <sys/rman.h>
61 #include <sys/smp.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/sysctl.h>
65 #include <sys/taskqueue.h>
66 #include <sys/eventhandler.h>
67 #include <machine/bus.h>
68 #include <machine/resource.h>
69 
70 #include <net/bpf.h>
71 #include <net/ethernet.h>
72 #include <net/if.h>
73 #include <net/if_var.h>
74 #include <net/if_arp.h>
75 #include <net/if_dl.h>
76 #include <net/if_media.h>
77 
78 #include <net/if_types.h>
79 #include <net/if_vlan_var.h>
80 
81 #include <netinet/in_systm.h>
82 #include <netinet/in.h>
83 #include <netinet/if_ether.h>
84 #include <netinet/ip.h>
85 #include <netinet/ip6.h>
86 #include <netinet/tcp.h>
87 #include <netinet/udp.h>
88 
89 #include <machine/in_cksum.h>
90 #include <dev/led/led.h>
91 #include <dev/pci/pcivar.h>
92 #include <dev/pci/pcireg.h>
93 
94 #include "e1000_api.h"
95 #include "e1000_82571.h"
96 #include "if_em.h"
97 
98 /*********************************************************************
99  *  Driver version:
100  *********************************************************************/
101 char em_driver_version[] = "7.6.1-k";
102 
103 /*********************************************************************
104  *  PCI Device ID Table
105  *
106  *  Used by probe to select devices to load on
107  *  Last field stores an index into e1000_strings
108  *  Last entry must be all 0s
109  *
110  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
111  *********************************************************************/
112 
113 static em_vendor_info_t em_vendor_info_array[] =
114 {
115 	/* Intel(R) PRO/1000 Network Connection */
116 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
117 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
118 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
119 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
120 						PCI_ANY_ID, PCI_ANY_ID, 0},
121 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
122 						PCI_ANY_ID, PCI_ANY_ID, 0},
123 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
124 						PCI_ANY_ID, PCI_ANY_ID, 0},
125 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
126 						PCI_ANY_ID, PCI_ANY_ID, 0},
127 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
128 						PCI_ANY_ID, PCI_ANY_ID, 0},
129 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
130 						PCI_ANY_ID, PCI_ANY_ID, 0},
131 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
132 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
134 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
135 
136 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
137 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
138 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
139 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
140 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
141 						PCI_ANY_ID, PCI_ANY_ID, 0},
142 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
143 						PCI_ANY_ID, PCI_ANY_ID, 0},
144 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
145 						PCI_ANY_ID, PCI_ANY_ID, 0},
146 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
147 						PCI_ANY_ID, PCI_ANY_ID, 0},
148 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
150 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
152 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
175 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
176 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
177 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
178 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
179 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
180 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
181 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
182 						PCI_ANY_ID, PCI_ANY_ID, 0},
183 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
184 						PCI_ANY_ID, PCI_ANY_ID, 0},
185 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM2,	PCI_ANY_ID, PCI_ANY_ID, 0},
186 	{ 0x8086, E1000_DEV_ID_PCH_I218_V2,	PCI_ANY_ID, PCI_ANY_ID, 0},
187 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM3,	PCI_ANY_ID, PCI_ANY_ID, 0},
188 	{ 0x8086, E1000_DEV_ID_PCH_I218_V3,	PCI_ANY_ID, PCI_ANY_ID, 0},
189 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
190 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
191 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2,
192                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
193 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
194 	{ 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3,
195 						PCI_ANY_ID, PCI_ANY_ID, 0},
196 	/* required last entry */
197 	{ 0, 0, 0, 0, 0}
198 };
199 
200 /*********************************************************************
201  *  Table of branding strings for all supported NICs.
202  *********************************************************************/
203 
204 static char *em_strings[] = {
205 	"Intel(R) PRO/1000 Network Connection"
206 };
207 
208 /*********************************************************************
209  *  Function prototypes
210  *********************************************************************/
211 static int	em_probe(device_t);
212 static int	em_attach(device_t);
213 static int	em_detach(device_t);
214 static int	em_shutdown(device_t);
215 static int	em_suspend(device_t);
216 static int	em_resume(device_t);
217 #ifdef EM_MULTIQUEUE
218 static int	em_mq_start(if_t, struct mbuf *);
219 static int	em_mq_start_locked(if_t,
220 		    struct tx_ring *);
221 static void	em_qflush(if_t);
222 #else
223 static void	em_start(if_t);
224 static void	em_start_locked(if_t, struct tx_ring *);
225 #endif
226 static int	em_ioctl(if_t, u_long, caddr_t);
227 static uint64_t	em_get_counter(if_t, ift_counter);
228 static void	em_init(void *);
229 static void	em_init_locked(struct adapter *);
230 static void	em_stop(void *);
231 static void	em_media_status(if_t, struct ifmediareq *);
232 static int	em_media_change(if_t);
233 static void	em_identify_hardware(struct adapter *);
234 static int	em_allocate_pci_resources(struct adapter *);
235 static int	em_allocate_legacy(struct adapter *);
236 static int	em_allocate_msix(struct adapter *);
237 static int	em_allocate_queues(struct adapter *);
238 static int	em_setup_msix(struct adapter *);
239 static void	em_free_pci_resources(struct adapter *);
240 static void	em_local_timer(void *);
241 static void	em_reset(struct adapter *);
242 static int	em_setup_interface(device_t, struct adapter *);
243 static void	em_flush_desc_rings(struct adapter *);
244 
245 static void	em_setup_transmit_structures(struct adapter *);
246 static void	em_initialize_transmit_unit(struct adapter *);
247 static int	em_allocate_transmit_buffers(struct tx_ring *);
248 static void	em_free_transmit_structures(struct adapter *);
249 static void	em_free_transmit_buffers(struct tx_ring *);
250 
251 static int	em_setup_receive_structures(struct adapter *);
252 static int	em_allocate_receive_buffers(struct rx_ring *);
253 static void	em_initialize_receive_unit(struct adapter *);
254 static void	em_free_receive_structures(struct adapter *);
255 static void	em_free_receive_buffers(struct rx_ring *);
256 
257 static void	em_enable_intr(struct adapter *);
258 static void	em_disable_intr(struct adapter *);
259 static void	em_update_stats_counters(struct adapter *);
260 static void	em_add_hw_stats(struct adapter *adapter);
261 static void	em_txeof(struct tx_ring *);
262 static bool	em_rxeof(struct rx_ring *, int, int *);
263 #ifndef __NO_STRICT_ALIGNMENT
264 static int	em_fixup_rx(struct rx_ring *);
265 #endif
266 static void	em_setup_rxdesc(union e1000_rx_desc_extended *,
267 		    const struct em_rxbuffer *rxbuf);
268 static void	em_receive_checksum(uint32_t status, struct mbuf *);
269 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
270 		    struct ip *, u32 *, u32 *);
271 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
272 		    struct tcphdr *, u32 *, u32 *);
273 static void	em_set_promisc(struct adapter *);
274 static void	em_disable_promisc(struct adapter *);
275 static void	em_set_multi(struct adapter *);
276 static void	em_update_link_status(struct adapter *);
277 static void	em_refresh_mbufs(struct rx_ring *, int);
278 static void	em_register_vlan(void *, if_t, u16);
279 static void	em_unregister_vlan(void *, if_t, u16);
280 static void	em_setup_vlan_hw_support(struct adapter *);
281 static int	em_xmit(struct tx_ring *, struct mbuf **);
282 static int	em_dma_malloc(struct adapter *, bus_size_t,
283 		    struct em_dma_alloc *, int);
284 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
285 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
286 static void	em_print_nvm_info(struct adapter *);
287 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
288 static void	em_print_debug_info(struct adapter *);
289 static int 	em_is_valid_ether_addr(u8 *);
290 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
291 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
292 		    const char *, struct em_int_delay_info *, int, int);
293 /* Management and WOL Support */
294 static void	em_init_manageability(struct adapter *);
295 static void	em_release_manageability(struct adapter *);
296 static void     em_get_hw_control(struct adapter *);
297 static void     em_release_hw_control(struct adapter *);
298 static void	em_get_wakeup(device_t);
299 static void     em_enable_wakeup(device_t);
300 static int	em_enable_phy_wakeup(struct adapter *);
301 static void	em_led_func(void *, int);
302 static void	em_disable_aspm(struct adapter *);
303 
304 static int	em_irq_fast(void *);
305 
306 /* MSIX handlers */
307 static void	em_msix_tx(void *);
308 static void	em_msix_rx(void *);
309 static void	em_msix_link(void *);
310 static void	em_handle_tx(void *context, int pending);
311 static void	em_handle_rx(void *context, int pending);
312 static void	em_handle_link(void *context, int pending);
313 
314 #ifdef EM_MULTIQUEUE
315 static void	em_enable_vectors_82574(struct adapter *);
316 #endif
317 
318 static void	em_set_sysctl_value(struct adapter *, const char *,
319 		    const char *, int *, int);
320 static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
321 static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
322 
323 static __inline void em_rx_discard(struct rx_ring *, int);
324 
325 #ifdef DEVICE_POLLING
326 static poll_handler_t em_poll;
327 #endif /* POLLING */
328 
329 /*********************************************************************
330  *  FreeBSD Device Interface Entry Points
331  *********************************************************************/
332 
333 static device_method_t em_methods[] = {
334 	/* Device interface */
335 	DEVMETHOD(device_probe, em_probe),
336 	DEVMETHOD(device_attach, em_attach),
337 	DEVMETHOD(device_detach, em_detach),
338 	DEVMETHOD(device_shutdown, em_shutdown),
339 	DEVMETHOD(device_suspend, em_suspend),
340 	DEVMETHOD(device_resume, em_resume),
341 	DEVMETHOD_END
342 };
343 
344 static driver_t em_driver = {
345 	"em", em_methods, sizeof(struct adapter),
346 };
347 
348 devclass_t em_devclass;
349 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
350 MODULE_DEPEND(em, pci, 1, 1, 1);
351 MODULE_DEPEND(em, ether, 1, 1, 1);
352 #ifdef DEV_NETMAP
353 MODULE_DEPEND(em, netmap, 1, 1, 1);
354 #endif /* DEV_NETMAP */
355 
356 /*********************************************************************
357  *  Tunable default values.
358  *********************************************************************/
359 
360 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
361 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
362 #define M_TSO_LEN			66
363 
364 #define MAX_INTS_PER_SEC	8000
365 #define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
366 
367 /* Allow common code without TSO */
368 #ifndef CSUM_TSO
369 #define CSUM_TSO	0
370 #endif
371 
372 #define TSO_WORKAROUND	4
373 
374 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
375 
376 static int em_disable_crc_stripping = 0;
377 SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
378     &em_disable_crc_stripping, 0, "Disable CRC Stripping");
379 
380 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
381 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
382 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
383     0, "Default transmit interrupt delay in usecs");
384 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
385     0, "Default receive interrupt delay in usecs");
386 
387 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
388 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
389 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
390     &em_tx_abs_int_delay_dflt, 0,
391     "Default transmit interrupt delay limit in usecs");
392 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
393     &em_rx_abs_int_delay_dflt, 0,
394     "Default receive interrupt delay limit in usecs");
395 
396 static int em_rxd = EM_DEFAULT_RXD;
397 static int em_txd = EM_DEFAULT_TXD;
398 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
399     "Number of receive descriptors per queue");
400 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
401     "Number of transmit descriptors per queue");
402 
403 static int em_smart_pwr_down = FALSE;
404 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
405     0, "Set to true to leave smart power down enabled on newer adapters");
406 
407 /* Controls whether promiscuous also shows bad packets */
408 static int em_debug_sbp = FALSE;
409 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
410     "Show bad packets in promiscuous mode");
411 
412 static int em_enable_msix = TRUE;
413 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
414     "Enable MSI-X interrupts");
415 
416 #ifdef EM_MULTIQUEUE
417 static int em_num_queues = 1;
418 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
419     "82574 only: Number of queues to configure, 0 indicates autoconfigure");
420 #endif
421 
422 /*
423 ** Global variable to store last used CPU when binding queues
424 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
425 ** queue is bound to a cpu.
426 */
427 static int em_last_bind_cpu = -1;
428 
429 /* How many packets rxeof tries to clean at a time */
430 static int em_rx_process_limit = 100;
431 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
432     &em_rx_process_limit, 0,
433     "Maximum number of received packets to process "
434     "at a time, -1 means unlimited");
435 
436 /* Energy efficient ethernet - default to OFF */
437 static int eee_setting = 1;
438 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
439     "Enable Energy Efficient Ethernet");
440 
441 /* Global used in WOL setup with multiport cards */
442 static int global_quad_port_a = 0;
443 
444 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
445 #include <dev/netmap/if_em_netmap.h>
446 #endif /* DEV_NETMAP */
447 
448 /*********************************************************************
449  *  Device identification routine
450  *
451  *  em_probe determines if the driver should be loaded on
452  *  adapter based on PCI vendor/device id of the adapter.
453  *
454  *  return BUS_PROBE_DEFAULT on success, positive on failure
455  *********************************************************************/
456 
457 static int
458 em_probe(device_t dev)
459 {
460 	char		adapter_name[60];
461 	uint16_t	pci_vendor_id = 0;
462 	uint16_t	pci_device_id = 0;
463 	uint16_t	pci_subvendor_id = 0;
464 	uint16_t	pci_subdevice_id = 0;
465 	em_vendor_info_t *ent;
466 
467 	INIT_DEBUGOUT("em_probe: begin");
468 
469 	pci_vendor_id = pci_get_vendor(dev);
470 	if (pci_vendor_id != EM_VENDOR_ID)
471 		return (ENXIO);
472 
473 	pci_device_id = pci_get_device(dev);
474 	pci_subvendor_id = pci_get_subvendor(dev);
475 	pci_subdevice_id = pci_get_subdevice(dev);
476 
477 	ent = em_vendor_info_array;
478 	while (ent->vendor_id != 0) {
479 		if ((pci_vendor_id == ent->vendor_id) &&
480 		    (pci_device_id == ent->device_id) &&
481 
482 		    ((pci_subvendor_id == ent->subvendor_id) ||
483 		    (ent->subvendor_id == PCI_ANY_ID)) &&
484 
485 		    ((pci_subdevice_id == ent->subdevice_id) ||
486 		    (ent->subdevice_id == PCI_ANY_ID))) {
487 			sprintf(adapter_name, "%s %s",
488 				em_strings[ent->index],
489 				em_driver_version);
490 			device_set_desc_copy(dev, adapter_name);
491 			return (BUS_PROBE_DEFAULT);
492 		}
493 		ent++;
494 	}
495 
496 	return (ENXIO);
497 }
498 
499 /*********************************************************************
500  *  Device initialization routine
501  *
502  *  The attach entry point is called when the driver is being loaded.
503  *  This routine identifies the type of hardware, allocates all resources
504  *  and initializes the hardware.
505  *
506  *  return 0 on success, positive on failure
507  *********************************************************************/
508 
509 static int
510 em_attach(device_t dev)
511 {
512 	struct adapter	*adapter;
513 	struct e1000_hw	*hw;
514 	int		error = 0;
515 
516 	INIT_DEBUGOUT("em_attach: begin");
517 
518 	if (resource_disabled("em", device_get_unit(dev))) {
519 		device_printf(dev, "Disabled by device hint\n");
520 		return (ENXIO);
521 	}
522 
523 	adapter = device_get_softc(dev);
524 	adapter->dev = adapter->osdep.dev = dev;
525 	hw = &adapter->hw;
526 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
527 
528 	/* SYSCTL stuff */
529 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
530 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
531 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
532 	    em_sysctl_nvm_info, "I", "NVM Information");
533 
534 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
535 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
536 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
537 	    em_sysctl_debug_info, "I", "Debug Information");
538 
539 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
540 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
541 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
542 	    em_set_flowcntl, "I", "Flow Control");
543 
544 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
545 
546 	/* Determine hardware and mac info */
547 	em_identify_hardware(adapter);
548 
549 	/* Setup PCI resources */
550 	if (em_allocate_pci_resources(adapter)) {
551 		device_printf(dev, "Allocation of PCI resources failed\n");
552 		error = ENXIO;
553 		goto err_pci;
554 	}
555 
556 	/*
557 	** For ICH8 and family we need to
558 	** map the flash memory, and this
559 	** must happen after the MAC is
560 	** identified
561 	*/
562 	if ((hw->mac.type == e1000_ich8lan) ||
563 	    (hw->mac.type == e1000_ich9lan) ||
564 	    (hw->mac.type == e1000_ich10lan) ||
565 	    (hw->mac.type == e1000_pchlan) ||
566 	    (hw->mac.type == e1000_pch2lan) ||
567 	    (hw->mac.type == e1000_pch_lpt)) {
568 		int rid = EM_BAR_TYPE_FLASH;
569 		adapter->flash = bus_alloc_resource_any(dev,
570 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
571 		if (adapter->flash == NULL) {
572 			device_printf(dev, "Mapping of Flash failed\n");
573 			error = ENXIO;
574 			goto err_pci;
575 		}
576 		/* This is used in the shared code */
577 		hw->flash_address = (u8 *)adapter->flash;
578 		adapter->osdep.flash_bus_space_tag =
579 		    rman_get_bustag(adapter->flash);
580 		adapter->osdep.flash_bus_space_handle =
581 		    rman_get_bushandle(adapter->flash);
582 	}
583 	/*
584 	** In the new SPT device flash is not  a
585 	** seperate BAR, rather it is also in BAR0,
586 	** so use the same tag and an offset handle for the
587 	** FLASH read/write macros in the shared code.
588 	*/
589 	else if (hw->mac.type == e1000_pch_spt) {
590 		adapter->osdep.flash_bus_space_tag =
591 		    adapter->osdep.mem_bus_space_tag;
592 		adapter->osdep.flash_bus_space_handle =
593 		    adapter->osdep.mem_bus_space_handle
594 		    + E1000_FLASH_BASE_ADDR;
595 	}
596 
597 	/* Do Shared Code initialization */
598 	error = e1000_setup_init_funcs(hw, TRUE);
599 	if (error) {
600 		device_printf(dev, "Setup of Shared code failed, error %d\n",
601 		    error);
602 		error = ENXIO;
603 		goto err_pci;
604 	}
605 
606 	/*
607 	 * Setup MSI/X or MSI if PCI Express
608 	 */
609 	adapter->msix = em_setup_msix(adapter);
610 
611 	e1000_get_bus_info(hw);
612 
613 	/* Set up some sysctls for the tunable interrupt delays */
614 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
615 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
616 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
617 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
618 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
619 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
620 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
621 	    "receive interrupt delay limit in usecs",
622 	    &adapter->rx_abs_int_delay,
623 	    E1000_REGISTER(hw, E1000_RADV),
624 	    em_rx_abs_int_delay_dflt);
625 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
626 	    "transmit interrupt delay limit in usecs",
627 	    &adapter->tx_abs_int_delay,
628 	    E1000_REGISTER(hw, E1000_TADV),
629 	    em_tx_abs_int_delay_dflt);
630 	em_add_int_delay_sysctl(adapter, "itr",
631 	    "interrupt delay limit in usecs/4",
632 	    &adapter->tx_itr,
633 	    E1000_REGISTER(hw, E1000_ITR),
634 	    DEFAULT_ITR);
635 
636 	/* Sysctl for limiting the amount of work done in the taskqueue */
637 	em_set_sysctl_value(adapter, "rx_processing_limit",
638 	    "max number of rx packets to process", &adapter->rx_process_limit,
639 	    em_rx_process_limit);
640 
641 	/*
642 	 * Validate number of transmit and receive descriptors. It
643 	 * must not exceed hardware maximum, and must be multiple
644 	 * of E1000_DBA_ALIGN.
645 	 */
646 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
647 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
648 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
649 		    EM_DEFAULT_TXD, em_txd);
650 		adapter->num_tx_desc = EM_DEFAULT_TXD;
651 	} else
652 		adapter->num_tx_desc = em_txd;
653 
654 	if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 ||
655 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
656 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
657 		    EM_DEFAULT_RXD, em_rxd);
658 		adapter->num_rx_desc = EM_DEFAULT_RXD;
659 	} else
660 		adapter->num_rx_desc = em_rxd;
661 
662 	hw->mac.autoneg = DO_AUTO_NEG;
663 	hw->phy.autoneg_wait_to_complete = FALSE;
664 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
665 
666 	/* Copper options */
667 	if (hw->phy.media_type == e1000_media_type_copper) {
668 		hw->phy.mdix = AUTO_ALL_MODES;
669 		hw->phy.disable_polarity_correction = FALSE;
670 		hw->phy.ms_type = EM_MASTER_SLAVE;
671 	}
672 
673 	/*
674 	 * Set the frame limits assuming
675 	 * standard ethernet sized frames.
676 	 */
677 	adapter->hw.mac.max_frame_size =
678 	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
679 
680 	/*
681 	 * This controls when hardware reports transmit completion
682 	 * status.
683 	 */
684 	hw->mac.report_tx_early = 1;
685 
686 	/*
687 	** Get queue/ring memory
688 	*/
689 	if (em_allocate_queues(adapter)) {
690 		error = ENOMEM;
691 		goto err_pci;
692 	}
693 
694 	/* Allocate multicast array memory. */
695 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
696 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
697 	if (adapter->mta == NULL) {
698 		device_printf(dev, "Can not allocate multicast setup array\n");
699 		error = ENOMEM;
700 		goto err_late;
701 	}
702 
703 	/* Check SOL/IDER usage */
704 	if (e1000_check_reset_block(hw))
705 		device_printf(dev, "PHY reset is blocked"
706 		    " due to SOL/IDER session.\n");
707 
708 	/* Sysctl for setting Energy Efficient Ethernet */
709 	hw->dev_spec.ich8lan.eee_disable = eee_setting;
710 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
711 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
712 	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
713 	    adapter, 0, em_sysctl_eee, "I",
714 	    "Disable Energy Efficient Ethernet");
715 
716 	/*
717 	** Start from a known state, this is
718 	** important in reading the nvm and
719 	** mac from that.
720 	*/
721 	e1000_reset_hw(hw);
722 
723 
724 	/* Make sure we have a good EEPROM before we read from it */
725 	if (e1000_validate_nvm_checksum(hw) < 0) {
726 		/*
727 		** Some PCI-E parts fail the first check due to
728 		** the link being in sleep state, call it again,
729 		** if it fails a second time its a real issue.
730 		*/
731 		if (e1000_validate_nvm_checksum(hw) < 0) {
732 			device_printf(dev,
733 			    "The EEPROM Checksum Is Not Valid\n");
734 			error = EIO;
735 			goto err_late;
736 		}
737 	}
738 
739 	/* Copy the permanent MAC address out of the EEPROM */
740 	if (e1000_read_mac_addr(hw) < 0) {
741 		device_printf(dev, "EEPROM read error while reading MAC"
742 		    " address\n");
743 		error = EIO;
744 		goto err_late;
745 	}
746 
747 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
748 		device_printf(dev, "Invalid MAC address\n");
749 		error = EIO;
750 		goto err_late;
751 	}
752 
753 	/* Disable ULP support */
754 	e1000_disable_ulp_lpt_lp(hw, TRUE);
755 
756 	/*
757 	**  Do interrupt configuration
758 	*/
759 	if (adapter->msix > 1) /* Do MSIX */
760 		error = em_allocate_msix(adapter);
761 	else  /* MSI or Legacy */
762 		error = em_allocate_legacy(adapter);
763 	if (error)
764 		goto err_late;
765 
766 	/*
767 	 * Get Wake-on-Lan and Management info for later use
768 	 */
769 	em_get_wakeup(dev);
770 
771 	/* Setup OS specific network interface */
772 	if (em_setup_interface(dev, adapter) != 0)
773 		goto err_late;
774 
775 	em_reset(adapter);
776 
777 	/* Initialize statistics */
778 	em_update_stats_counters(adapter);
779 
780 	hw->mac.get_link_status = 1;
781 	em_update_link_status(adapter);
782 
783 	/* Register for VLAN events */
784 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
785 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
786 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
787 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
788 
789 	em_add_hw_stats(adapter);
790 
791 	/* Non-AMT based hardware can now take control from firmware */
792 	if (adapter->has_manage && !adapter->has_amt)
793 		em_get_hw_control(adapter);
794 
795 	/* Tell the stack that the interface is not active */
796 	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
797 
798 	adapter->led_dev = led_create(em_led_func, adapter,
799 	    device_get_nameunit(dev));
800 #ifdef DEV_NETMAP
801 	em_netmap_attach(adapter);
802 #endif /* DEV_NETMAP */
803 
804 	INIT_DEBUGOUT("em_attach: end");
805 
806 	return (0);
807 
808 err_late:
809 	em_free_transmit_structures(adapter);
810 	em_free_receive_structures(adapter);
811 	em_release_hw_control(adapter);
812 	if (adapter->ifp != (void *)NULL)
813 		if_free(adapter->ifp);
814 err_pci:
815 	em_free_pci_resources(adapter);
816 	free(adapter->mta, M_DEVBUF);
817 	EM_CORE_LOCK_DESTROY(adapter);
818 
819 	return (error);
820 }
821 
822 /*********************************************************************
823  *  Device removal routine
824  *
825  *  The detach entry point is called when the driver is being removed.
826  *  This routine stops the adapter and deallocates all the resources
827  *  that were allocated for driver operation.
828  *
829  *  return 0 on success, positive on failure
830  *********************************************************************/
831 
832 static int
833 em_detach(device_t dev)
834 {
835 	struct adapter	*adapter = device_get_softc(dev);
836 	if_t ifp = adapter->ifp;
837 
838 	INIT_DEBUGOUT("em_detach: begin");
839 
840 	/* Make sure VLANS are not using driver */
841 	if (if_vlantrunkinuse(ifp)) {
842 		device_printf(dev,"Vlan in use, detach first\n");
843 		return (EBUSY);
844 	}
845 
846 #ifdef DEVICE_POLLING
847 	if (if_getcapenable(ifp) & IFCAP_POLLING)
848 		ether_poll_deregister(ifp);
849 #endif
850 
851 	if (adapter->led_dev != NULL)
852 		led_destroy(adapter->led_dev);
853 
854 	EM_CORE_LOCK(adapter);
855 	adapter->in_detach = 1;
856 	em_stop(adapter);
857 	EM_CORE_UNLOCK(adapter);
858 	EM_CORE_LOCK_DESTROY(adapter);
859 
860 	e1000_phy_hw_reset(&adapter->hw);
861 
862 	em_release_manageability(adapter);
863 	em_release_hw_control(adapter);
864 
865 	/* Unregister VLAN events */
866 	if (adapter->vlan_attach != NULL)
867 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
868 	if (adapter->vlan_detach != NULL)
869 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
870 
871 	ether_ifdetach(adapter->ifp);
872 	callout_drain(&adapter->timer);
873 
874 #ifdef DEV_NETMAP
875 	netmap_detach(ifp);
876 #endif /* DEV_NETMAP */
877 
878 	em_free_pci_resources(adapter);
879 	bus_generic_detach(dev);
880 	if_free(ifp);
881 
882 	em_free_transmit_structures(adapter);
883 	em_free_receive_structures(adapter);
884 
885 	em_release_hw_control(adapter);
886 	free(adapter->mta, M_DEVBUF);
887 
888 	return (0);
889 }
890 
891 /*********************************************************************
892  *
893  *  Shutdown entry point
894  *
895  **********************************************************************/
896 
897 static int
898 em_shutdown(device_t dev)
899 {
900 	return em_suspend(dev);
901 }
902 
903 /*
904  * Suspend/resume device methods.
905  */
906 static int
907 em_suspend(device_t dev)
908 {
909 	struct adapter *adapter = device_get_softc(dev);
910 
911 	EM_CORE_LOCK(adapter);
912 
913         em_release_manageability(adapter);
914 	em_release_hw_control(adapter);
915 	em_enable_wakeup(dev);
916 
917 	EM_CORE_UNLOCK(adapter);
918 
919 	return bus_generic_suspend(dev);
920 }
921 
922 static int
923 em_resume(device_t dev)
924 {
925 	struct adapter *adapter = device_get_softc(dev);
926 	struct tx_ring	*txr = adapter->tx_rings;
927 	if_t ifp = adapter->ifp;
928 
929 	EM_CORE_LOCK(adapter);
930 	if (adapter->hw.mac.type == e1000_pch2lan)
931 		e1000_resume_workarounds_pchlan(&adapter->hw);
932 	em_init_locked(adapter);
933 	em_init_manageability(adapter);
934 
935 	if ((if_getflags(ifp) & IFF_UP) &&
936 	    (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) {
937 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
938 			EM_TX_LOCK(txr);
939 #ifdef EM_MULTIQUEUE
940 			if (!drbr_empty(ifp, txr->br))
941 				em_mq_start_locked(ifp, txr);
942 #else
943 			if (!if_sendq_empty(ifp))
944 				em_start_locked(ifp, txr);
945 #endif
946 			EM_TX_UNLOCK(txr);
947 		}
948 	}
949 	EM_CORE_UNLOCK(adapter);
950 
951 	return bus_generic_resume(dev);
952 }
953 
954 
955 #ifndef EM_MULTIQUEUE
956 static void
957 em_start_locked(if_t ifp, struct tx_ring *txr)
958 {
959 	struct adapter	*adapter = if_getsoftc(ifp);
960 	struct mbuf	*m_head;
961 
962 	EM_TX_LOCK_ASSERT(txr);
963 
964 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
965 	    IFF_DRV_RUNNING)
966 		return;
967 
968 	if (!adapter->link_active)
969 		return;
970 
971 	while (!if_sendq_empty(ifp)) {
972         	/* Call cleanup if number of TX descriptors low */
973 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
974 			em_txeof(txr);
975 		if (txr->tx_avail < EM_MAX_SCATTER) {
976 			if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0);
977 			break;
978 		}
979 		m_head = if_dequeue(ifp);
980 		if (m_head == NULL)
981 			break;
982 		/*
983 		 *  Encapsulation can modify our pointer, and or make it
984 		 *  NULL on failure.  In that event, we can't requeue.
985 		 */
986 		if (em_xmit(txr, &m_head)) {
987 			if (m_head == NULL)
988 				break;
989 			if_sendq_prepend(ifp, m_head);
990 			break;
991 		}
992 
993 		/* Mark the queue as having work */
994 		if (txr->busy == EM_TX_IDLE)
995 			txr->busy = EM_TX_BUSY;
996 
997 		/* Send a copy of the frame to the BPF listener */
998 		ETHER_BPF_MTAP(ifp, m_head);
999 
1000 	}
1001 
1002 	return;
1003 }
1004 
1005 static void
1006 em_start(if_t ifp)
1007 {
1008 	struct adapter	*adapter = if_getsoftc(ifp);
1009 	struct tx_ring	*txr = adapter->tx_rings;
1010 
1011 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1012 		EM_TX_LOCK(txr);
1013 		em_start_locked(ifp, txr);
1014 		EM_TX_UNLOCK(txr);
1015 	}
1016 	return;
1017 }
1018 #else /* EM_MULTIQUEUE */
1019 /*********************************************************************
1020  *  Multiqueue Transmit routines
1021  *
1022  *  em_mq_start is called by the stack to initiate a transmit.
1023  *  however, if busy the driver can queue the request rather
1024  *  than do an immediate send. It is this that is an advantage
1025  *  in this driver, rather than also having multiple tx queues.
1026  **********************************************************************/
1027 /*
1028 ** Multiqueue capable stack interface
1029 */
1030 static int
1031 em_mq_start(if_t ifp, struct mbuf *m)
1032 {
1033 	struct adapter	*adapter = if_getsoftc(ifp);
1034 	struct tx_ring	*txr = adapter->tx_rings;
1035 	unsigned int	i, error;
1036 
1037 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1038 		i = m->m_pkthdr.flowid % adapter->num_queues;
1039 	else
1040 		i = curcpu % adapter->num_queues;
1041 
1042 	txr = &adapter->tx_rings[i];
1043 
1044 	error = drbr_enqueue(ifp, txr->br, m);
1045 	if (error)
1046 		return (error);
1047 
1048 	if (EM_TX_TRYLOCK(txr)) {
1049 		em_mq_start_locked(ifp, txr);
1050 		EM_TX_UNLOCK(txr);
1051 	} else
1052 		taskqueue_enqueue(txr->tq, &txr->tx_task);
1053 
1054 	return (0);
1055 }
1056 
1057 static int
1058 em_mq_start_locked(if_t ifp, struct tx_ring *txr)
1059 {
1060 	struct adapter  *adapter = txr->adapter;
1061         struct mbuf     *next;
1062         int             err = 0, enq = 0;
1063 
1064 	EM_TX_LOCK_ASSERT(txr);
1065 
1066 	if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) ||
1067 	    adapter->link_active == 0) {
1068 		return (ENETDOWN);
1069 	}
1070 
1071 	/* Process the queue */
1072 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1073 		if ((err = em_xmit(txr, &next)) != 0) {
1074 			if (next == NULL) {
1075 				/* It was freed, move forward */
1076 				drbr_advance(ifp, txr->br);
1077 			} else {
1078 				/*
1079 				 * Still have one left, it may not be
1080 				 * the same since the transmit function
1081 				 * may have changed it.
1082 				 */
1083 				drbr_putback(ifp, txr->br, next);
1084 			}
1085 			break;
1086 		}
1087 		drbr_advance(ifp, txr->br);
1088 		enq++;
1089 		if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
1090 		if (next->m_flags & M_MCAST)
1091 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
1092 		ETHER_BPF_MTAP(ifp, next);
1093 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
1094                         break;
1095 	}
1096 
1097 	/* Mark the queue as having work */
1098 	if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1099 		txr->busy = EM_TX_BUSY;
1100 
1101 	if (txr->tx_avail < EM_MAX_SCATTER)
1102 		em_txeof(txr);
1103 	if (txr->tx_avail < EM_MAX_SCATTER) {
1104 		if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
1105 	}
1106 	return (err);
1107 }
1108 
1109 /*
1110 ** Flush all ring buffers
1111 */
1112 static void
1113 em_qflush(if_t ifp)
1114 {
1115 	struct adapter  *adapter = if_getsoftc(ifp);
1116 	struct tx_ring  *txr = adapter->tx_rings;
1117 	struct mbuf     *m;
1118 
1119 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1120 		EM_TX_LOCK(txr);
1121 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1122 			m_freem(m);
1123 		EM_TX_UNLOCK(txr);
1124 	}
1125 	if_qflush(ifp);
1126 }
1127 #endif /* EM_MULTIQUEUE */
1128 
1129 /*********************************************************************
1130  *  Ioctl entry point
1131  *
1132  *  em_ioctl is called when the user wants to configure the
1133  *  interface.
1134  *
1135  *  return 0 on success, positive on failure
1136  **********************************************************************/
1137 
1138 static int
1139 em_ioctl(if_t ifp, u_long command, caddr_t data)
1140 {
1141 	struct adapter	*adapter = if_getsoftc(ifp);
1142 	struct ifreq	*ifr = (struct ifreq *)data;
1143 #if defined(INET) || defined(INET6)
1144 	struct ifaddr	*ifa = (struct ifaddr *)data;
1145 #endif
1146 	bool		avoid_reset = FALSE;
1147 	int		error = 0;
1148 
1149 	if (adapter->in_detach)
1150 		return (error);
1151 
1152 	switch (command) {
1153 	case SIOCSIFADDR:
1154 #ifdef INET
1155 		if (ifa->ifa_addr->sa_family == AF_INET)
1156 			avoid_reset = TRUE;
1157 #endif
1158 #ifdef INET6
1159 		if (ifa->ifa_addr->sa_family == AF_INET6)
1160 			avoid_reset = TRUE;
1161 #endif
1162 		/*
1163 		** Calling init results in link renegotiation,
1164 		** so we avoid doing it when possible.
1165 		*/
1166 		if (avoid_reset) {
1167 			if_setflagbits(ifp,IFF_UP,0);
1168 			if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
1169 				em_init(adapter);
1170 #ifdef INET
1171 			if (!(if_getflags(ifp) & IFF_NOARP))
1172 				arp_ifinit(ifp, ifa);
1173 #endif
1174 		} else
1175 			error = ether_ioctl(ifp, command, data);
1176 		break;
1177 	case SIOCSIFMTU:
1178 	    {
1179 		int max_frame_size;
1180 
1181 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1182 
1183 		EM_CORE_LOCK(adapter);
1184 		switch (adapter->hw.mac.type) {
1185 		case e1000_82571:
1186 		case e1000_82572:
1187 		case e1000_ich9lan:
1188 		case e1000_ich10lan:
1189 		case e1000_pch2lan:
1190 		case e1000_pch_lpt:
1191 		case e1000_pch_spt:
1192 		case e1000_82574:
1193 		case e1000_82583:
1194 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1195 			max_frame_size = 9234;
1196 			break;
1197 		case e1000_pchlan:
1198 			max_frame_size = 4096;
1199 			break;
1200 			/* Adapters that do not support jumbo frames */
1201 		case e1000_ich8lan:
1202 			max_frame_size = ETHER_MAX_LEN;
1203 			break;
1204 		default:
1205 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1206 		}
1207 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1208 		    ETHER_CRC_LEN) {
1209 			EM_CORE_UNLOCK(adapter);
1210 			error = EINVAL;
1211 			break;
1212 		}
1213 
1214 		if_setmtu(ifp, ifr->ifr_mtu);
1215 		adapter->hw.mac.max_frame_size =
1216 		    if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
1217 		em_init_locked(adapter);
1218 		EM_CORE_UNLOCK(adapter);
1219 		break;
1220 	    }
1221 	case SIOCSIFFLAGS:
1222 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1223 		    SIOCSIFFLAGS (Set Interface Flags)");
1224 		EM_CORE_LOCK(adapter);
1225 		if (if_getflags(ifp) & IFF_UP) {
1226 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1227 				if ((if_getflags(ifp) ^ adapter->if_flags) &
1228 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1229 					em_disable_promisc(adapter);
1230 					em_set_promisc(adapter);
1231 				}
1232 			} else
1233 				em_init_locked(adapter);
1234 		} else
1235 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1236 				em_stop(adapter);
1237 		adapter->if_flags = if_getflags(ifp);
1238 		EM_CORE_UNLOCK(adapter);
1239 		break;
1240 	case SIOCADDMULTI:
1241 	case SIOCDELMULTI:
1242 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1243 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1244 			EM_CORE_LOCK(adapter);
1245 			em_disable_intr(adapter);
1246 			em_set_multi(adapter);
1247 #ifdef DEVICE_POLLING
1248 			if (!(if_getcapenable(ifp) & IFCAP_POLLING))
1249 #endif
1250 				em_enable_intr(adapter);
1251 			EM_CORE_UNLOCK(adapter);
1252 		}
1253 		break;
1254 	case SIOCSIFMEDIA:
1255 		/* Check SOL/IDER usage */
1256 		EM_CORE_LOCK(adapter);
1257 		if (e1000_check_reset_block(&adapter->hw)) {
1258 			EM_CORE_UNLOCK(adapter);
1259 			device_printf(adapter->dev, "Media change is"
1260 			    " blocked due to SOL/IDER session.\n");
1261 			break;
1262 		}
1263 		EM_CORE_UNLOCK(adapter);
1264 		/* falls thru */
1265 	case SIOCGIFMEDIA:
1266 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1267 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1268 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1269 		break;
1270 	case SIOCSIFCAP:
1271 	    {
1272 		int mask, reinit;
1273 
1274 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1275 		reinit = 0;
1276 		mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
1277 #ifdef DEVICE_POLLING
1278 		if (mask & IFCAP_POLLING) {
1279 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1280 				error = ether_poll_register(em_poll, ifp);
1281 				if (error)
1282 					return (error);
1283 				EM_CORE_LOCK(adapter);
1284 				em_disable_intr(adapter);
1285 				if_setcapenablebit(ifp, IFCAP_POLLING, 0);
1286 				EM_CORE_UNLOCK(adapter);
1287 			} else {
1288 				error = ether_poll_deregister(ifp);
1289 				/* Enable interrupt even in error case */
1290 				EM_CORE_LOCK(adapter);
1291 				em_enable_intr(adapter);
1292 				if_setcapenablebit(ifp, 0, IFCAP_POLLING);
1293 				EM_CORE_UNLOCK(adapter);
1294 			}
1295 		}
1296 #endif
1297 		if (mask & IFCAP_HWCSUM) {
1298 			if_togglecapenable(ifp,IFCAP_HWCSUM);
1299 			reinit = 1;
1300 		}
1301 		if (mask & IFCAP_TSO4) {
1302 			if_togglecapenable(ifp,IFCAP_TSO4);
1303 			reinit = 1;
1304 		}
1305 		if (mask & IFCAP_VLAN_HWTAGGING) {
1306 			if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING);
1307 			reinit = 1;
1308 		}
1309 		if (mask & IFCAP_VLAN_HWFILTER) {
1310 			if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1311 			reinit = 1;
1312 		}
1313 		if (mask & IFCAP_VLAN_HWTSO) {
1314 			if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1315 			reinit = 1;
1316 		}
1317 		if ((mask & IFCAP_WOL) &&
1318 		    (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
1319 			if (mask & IFCAP_WOL_MCAST)
1320 				if_togglecapenable(ifp, IFCAP_WOL_MCAST);
1321 			if (mask & IFCAP_WOL_MAGIC)
1322 				if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
1323 		}
1324 		if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1325 			em_init(adapter);
1326 		if_vlancap(ifp);
1327 		break;
1328 	    }
1329 
1330 	default:
1331 		error = ether_ioctl(ifp, command, data);
1332 		break;
1333 	}
1334 
1335 	return (error);
1336 }
1337 
1338 
1339 /*********************************************************************
1340  *  Init entry point
1341  *
1342  *  This routine is used in two ways. It is used by the stack as
1343  *  init entry point in network interface structure. It is also used
1344  *  by the driver as a hw/sw initialization routine to get to a
1345  *  consistent state.
1346  *
1347  *  return 0 on success, positive on failure
1348  **********************************************************************/
1349 
1350 static void
1351 em_init_locked(struct adapter *adapter)
1352 {
1353 	if_t ifp = adapter->ifp;
1354 	device_t	dev = adapter->dev;
1355 
1356 	INIT_DEBUGOUT("em_init: begin");
1357 
1358 	EM_CORE_LOCK_ASSERT(adapter);
1359 
1360 	em_disable_intr(adapter);
1361 	callout_stop(&adapter->timer);
1362 
1363 	/* Get the latest mac address, User can use a LAA */
1364         bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
1365               ETHER_ADDR_LEN);
1366 
1367 	/* Put the address into the Receive Address Array */
1368 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1369 
1370 	/*
1371 	 * With the 82571 adapter, RAR[0] may be overwritten
1372 	 * when the other port is reset, we make a duplicate
1373 	 * in RAR[14] for that eventuality, this assures
1374 	 * the interface continues to function.
1375 	 */
1376 	if (adapter->hw.mac.type == e1000_82571) {
1377 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1378 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1379 		    E1000_RAR_ENTRIES - 1);
1380 	}
1381 
1382 	/* Initialize the hardware */
1383 	em_reset(adapter);
1384 	em_update_link_status(adapter);
1385 
1386 	/* Setup VLAN support, basic and offload if available */
1387 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1388 
1389 	/* Set hardware offload abilities */
1390 	if_clearhwassist(ifp);
1391 	if (if_getcapenable(ifp) & IFCAP_TXCSUM)
1392 		if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
1393 	/*
1394 	** There have proven to be problems with TSO when not
1395 	** at full gigabit speed, so disable the assist automatically
1396 	** when at lower speeds.  -jfv
1397 	*/
1398 	if (if_getcapenable(ifp) & IFCAP_TSO4) {
1399 		if (adapter->link_speed == SPEED_1000)
1400 			if_sethwassistbits(ifp, CSUM_TSO, 0);
1401 	}
1402 
1403 	/* Configure for OS presence */
1404 	em_init_manageability(adapter);
1405 
1406 	/* Prepare transmit descriptors and buffers */
1407 	em_setup_transmit_structures(adapter);
1408 	em_initialize_transmit_unit(adapter);
1409 
1410 	/* Setup Multicast table */
1411 	em_set_multi(adapter);
1412 
1413 	/*
1414 	** Figure out the desired mbuf
1415 	** pool for doing jumbos
1416 	*/
1417 	if (adapter->hw.mac.max_frame_size <= 2048)
1418 		adapter->rx_mbuf_sz = MCLBYTES;
1419 	else if (adapter->hw.mac.max_frame_size <= 4096)
1420 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1421 	else
1422 		adapter->rx_mbuf_sz = MJUM9BYTES;
1423 
1424 	/* Prepare receive descriptors and buffers */
1425 	if (em_setup_receive_structures(adapter)) {
1426 		device_printf(dev, "Could not setup receive structures\n");
1427 		em_stop(adapter);
1428 		return;
1429 	}
1430 	em_initialize_receive_unit(adapter);
1431 
1432 	/* Use real VLAN Filter support? */
1433 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
1434 		if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1435 			/* Use real VLAN Filter support */
1436 			em_setup_vlan_hw_support(adapter);
1437 		else {
1438 			u32 ctrl;
1439 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1440 			ctrl |= E1000_CTRL_VME;
1441 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1442 		}
1443 	}
1444 
1445 	/* Don't lose promiscuous settings */
1446 	em_set_promisc(adapter);
1447 
1448 	/* Set the interface as ACTIVE */
1449 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
1450 
1451 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1452 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1453 
1454 	/* MSI/X configuration for 82574 */
1455 	if (adapter->hw.mac.type == e1000_82574) {
1456 		int tmp;
1457 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1458 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1459 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1460 		/* Set the IVAR - interrupt vector routing. */
1461 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1462 	}
1463 
1464 #ifdef DEVICE_POLLING
1465 	/*
1466 	 * Only enable interrupts if we are not polling, make sure
1467 	 * they are off otherwise.
1468 	 */
1469 	if (if_getcapenable(ifp) & IFCAP_POLLING)
1470 		em_disable_intr(adapter);
1471 	else
1472 #endif /* DEVICE_POLLING */
1473 		em_enable_intr(adapter);
1474 
1475 	/* AMT based hardware can now take control from firmware */
1476 	if (adapter->has_manage && adapter->has_amt)
1477 		em_get_hw_control(adapter);
1478 }
1479 
1480 static void
1481 em_init(void *arg)
1482 {
1483 	struct adapter *adapter = arg;
1484 
1485 	EM_CORE_LOCK(adapter);
1486 	em_init_locked(adapter);
1487 	EM_CORE_UNLOCK(adapter);
1488 }
1489 
1490 
1491 #ifdef DEVICE_POLLING
1492 /*********************************************************************
1493  *
1494  *  Legacy polling routine: note this only works with single queue
1495  *
1496  *********************************************************************/
1497 static int
1498 em_poll(if_t ifp, enum poll_cmd cmd, int count)
1499 {
1500 	struct adapter *adapter = if_getsoftc(ifp);
1501 	struct tx_ring	*txr = adapter->tx_rings;
1502 	struct rx_ring	*rxr = adapter->rx_rings;
1503 	u32		reg_icr;
1504 	int		rx_done;
1505 
1506 	EM_CORE_LOCK(adapter);
1507 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
1508 		EM_CORE_UNLOCK(adapter);
1509 		return (0);
1510 	}
1511 
1512 	if (cmd == POLL_AND_CHECK_STATUS) {
1513 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1514 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1515 			callout_stop(&adapter->timer);
1516 			adapter->hw.mac.get_link_status = 1;
1517 			em_update_link_status(adapter);
1518 			callout_reset(&adapter->timer, hz,
1519 			    em_local_timer, adapter);
1520 		}
1521 	}
1522 	EM_CORE_UNLOCK(adapter);
1523 
1524 	em_rxeof(rxr, count, &rx_done);
1525 
1526 	EM_TX_LOCK(txr);
1527 	em_txeof(txr);
1528 #ifdef EM_MULTIQUEUE
1529 	if (!drbr_empty(ifp, txr->br))
1530 		em_mq_start_locked(ifp, txr);
1531 #else
1532 	if (!if_sendq_empty(ifp))
1533 		em_start_locked(ifp, txr);
1534 #endif
1535 	EM_TX_UNLOCK(txr);
1536 
1537 	return (rx_done);
1538 }
1539 #endif /* DEVICE_POLLING */
1540 
1541 
1542 /*********************************************************************
1543  *
1544  *  Fast Legacy/MSI Combined Interrupt Service routine
1545  *
1546  *********************************************************************/
1547 static int
1548 em_irq_fast(void *arg)
1549 {
1550 	struct adapter	*adapter = arg;
1551 	if_t ifp;
1552 	u32		reg_icr;
1553 
1554 	ifp = adapter->ifp;
1555 
1556 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1557 
1558 	/* Hot eject?  */
1559 	if (reg_icr == 0xffffffff)
1560 		return FILTER_STRAY;
1561 
1562 	/* Definitely not our interrupt.  */
1563 	if (reg_icr == 0x0)
1564 		return FILTER_STRAY;
1565 
1566 	/*
1567 	 * Starting with the 82571 chip, bit 31 should be used to
1568 	 * determine whether the interrupt belongs to us.
1569 	 */
1570 	if (adapter->hw.mac.type >= e1000_82571 &&
1571 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1572 		return FILTER_STRAY;
1573 
1574 	em_disable_intr(adapter);
1575 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1576 
1577 	/* Link status change */
1578 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1579 		adapter->hw.mac.get_link_status = 1;
1580 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1581 	}
1582 
1583 	if (reg_icr & E1000_ICR_RXO)
1584 		adapter->rx_overruns++;
1585 	return FILTER_HANDLED;
1586 }
1587 
1588 /* Combined RX/TX handler, used by Legacy and MSI */
1589 static void
1590 em_handle_que(void *context, int pending)
1591 {
1592 	struct adapter	*adapter = context;
1593 	if_t ifp = adapter->ifp;
1594 	struct tx_ring	*txr = adapter->tx_rings;
1595 	struct rx_ring	*rxr = adapter->rx_rings;
1596 
1597 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1598 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1599 
1600 		EM_TX_LOCK(txr);
1601 		em_txeof(txr);
1602 #ifdef EM_MULTIQUEUE
1603 		if (!drbr_empty(ifp, txr->br))
1604 			em_mq_start_locked(ifp, txr);
1605 #else
1606 		if (!if_sendq_empty(ifp))
1607 			em_start_locked(ifp, txr);
1608 #endif
1609 		EM_TX_UNLOCK(txr);
1610 		if (more) {
1611 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1612 			return;
1613 		}
1614 	}
1615 
1616 	em_enable_intr(adapter);
1617 	return;
1618 }
1619 
1620 
1621 /*********************************************************************
1622  *
1623  *  MSIX Interrupt Service Routines
1624  *
1625  **********************************************************************/
1626 static void
1627 em_msix_tx(void *arg)
1628 {
1629 	struct tx_ring *txr = arg;
1630 	struct adapter *adapter = txr->adapter;
1631 	if_t ifp = adapter->ifp;
1632 
1633 	++txr->tx_irq;
1634 	EM_TX_LOCK(txr);
1635 	em_txeof(txr);
1636 #ifdef EM_MULTIQUEUE
1637 	if (!drbr_empty(ifp, txr->br))
1638 		em_mq_start_locked(ifp, txr);
1639 #else
1640 	if (!if_sendq_empty(ifp))
1641 		em_start_locked(ifp, txr);
1642 #endif
1643 
1644 	/* Reenable this interrupt */
1645 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1646 	EM_TX_UNLOCK(txr);
1647 	return;
1648 }
1649 
1650 /*********************************************************************
1651  *
1652  *  MSIX RX Interrupt Service routine
1653  *
1654  **********************************************************************/
1655 
1656 static void
1657 em_msix_rx(void *arg)
1658 {
1659 	struct rx_ring	*rxr = arg;
1660 	struct adapter	*adapter = rxr->adapter;
1661 	bool		more;
1662 
1663 	++rxr->rx_irq;
1664 	if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING))
1665 		return;
1666 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1667 	if (more)
1668 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1669 	else {
1670 		/* Reenable this interrupt */
1671 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1672 	}
1673 	return;
1674 }
1675 
1676 /*********************************************************************
1677  *
1678  *  MSIX Link Fast Interrupt Service routine
1679  *
1680  **********************************************************************/
1681 static void
1682 em_msix_link(void *arg)
1683 {
1684 	struct adapter	*adapter = arg;
1685 	u32		reg_icr;
1686 
1687 	++adapter->link_irq;
1688 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1689 
1690 	if (reg_icr & E1000_ICR_RXO)
1691 		adapter->rx_overruns++;
1692 
1693 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1694 		adapter->hw.mac.get_link_status = 1;
1695 		em_handle_link(adapter, 0);
1696 	} else
1697 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1698 		    EM_MSIX_LINK | E1000_IMS_LSC);
1699 	/*
1700  	** Because we must read the ICR for this interrupt
1701  	** it may clear other causes using autoclear, for
1702  	** this reason we simply create a soft interrupt
1703  	** for all these vectors.
1704  	*/
1705 	if (reg_icr) {
1706 		E1000_WRITE_REG(&adapter->hw,
1707 			E1000_ICS, adapter->ims);
1708 	}
1709 	return;
1710 }
1711 
1712 static void
1713 em_handle_rx(void *context, int pending)
1714 {
1715 	struct rx_ring	*rxr = context;
1716 	struct adapter	*adapter = rxr->adapter;
1717         bool            more;
1718 
1719 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1720 	if (more)
1721 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1722 	else {
1723 		/* Reenable this interrupt */
1724 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1725 	}
1726 }
1727 
1728 static void
1729 em_handle_tx(void *context, int pending)
1730 {
1731 	struct tx_ring	*txr = context;
1732 	struct adapter	*adapter = txr->adapter;
1733 	if_t ifp = adapter->ifp;
1734 
1735 	EM_TX_LOCK(txr);
1736 	em_txeof(txr);
1737 #ifdef EM_MULTIQUEUE
1738 	if (!drbr_empty(ifp, txr->br))
1739 		em_mq_start_locked(ifp, txr);
1740 #else
1741 	if (!if_sendq_empty(ifp))
1742 		em_start_locked(ifp, txr);
1743 #endif
1744 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1745 	EM_TX_UNLOCK(txr);
1746 }
1747 
1748 static void
1749 em_handle_link(void *context, int pending)
1750 {
1751 	struct adapter	*adapter = context;
1752 	struct tx_ring	*txr = adapter->tx_rings;
1753 	if_t ifp = adapter->ifp;
1754 
1755 	if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1756 		return;
1757 
1758 	EM_CORE_LOCK(adapter);
1759 	callout_stop(&adapter->timer);
1760 	em_update_link_status(adapter);
1761 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1762 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1763 	    EM_MSIX_LINK | E1000_IMS_LSC);
1764 	if (adapter->link_active) {
1765 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1766 			EM_TX_LOCK(txr);
1767 #ifdef EM_MULTIQUEUE
1768 			if (!drbr_empty(ifp, txr->br))
1769 				em_mq_start_locked(ifp, txr);
1770 #else
1771 			if (if_sendq_empty(ifp))
1772 				em_start_locked(ifp, txr);
1773 #endif
1774 			EM_TX_UNLOCK(txr);
1775 		}
1776 	}
1777 	EM_CORE_UNLOCK(adapter);
1778 }
1779 
1780 
1781 /*********************************************************************
1782  *
1783  *  Media Ioctl callback
1784  *
1785  *  This routine is called whenever the user queries the status of
1786  *  the interface using ifconfig.
1787  *
1788  **********************************************************************/
1789 static void
1790 em_media_status(if_t ifp, struct ifmediareq *ifmr)
1791 {
1792 	struct adapter *adapter = if_getsoftc(ifp);
1793 	u_char fiber_type = IFM_1000_SX;
1794 
1795 	INIT_DEBUGOUT("em_media_status: begin");
1796 
1797 	EM_CORE_LOCK(adapter);
1798 	em_update_link_status(adapter);
1799 
1800 	ifmr->ifm_status = IFM_AVALID;
1801 	ifmr->ifm_active = IFM_ETHER;
1802 
1803 	if (!adapter->link_active) {
1804 		EM_CORE_UNLOCK(adapter);
1805 		return;
1806 	}
1807 
1808 	ifmr->ifm_status |= IFM_ACTIVE;
1809 
1810 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1811 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1812 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1813 	} else {
1814 		switch (adapter->link_speed) {
1815 		case 10:
1816 			ifmr->ifm_active |= IFM_10_T;
1817 			break;
1818 		case 100:
1819 			ifmr->ifm_active |= IFM_100_TX;
1820 			break;
1821 		case 1000:
1822 			ifmr->ifm_active |= IFM_1000_T;
1823 			break;
1824 		}
1825 		if (adapter->link_duplex == FULL_DUPLEX)
1826 			ifmr->ifm_active |= IFM_FDX;
1827 		else
1828 			ifmr->ifm_active |= IFM_HDX;
1829 	}
1830 	EM_CORE_UNLOCK(adapter);
1831 }
1832 
1833 /*********************************************************************
1834  *
1835  *  Media Ioctl callback
1836  *
1837  *  This routine is called when the user changes speed/duplex using
1838  *  media/mediopt option with ifconfig.
1839  *
1840  **********************************************************************/
1841 static int
1842 em_media_change(if_t ifp)
1843 {
1844 	struct adapter *adapter = if_getsoftc(ifp);
1845 	struct ifmedia  *ifm = &adapter->media;
1846 
1847 	INIT_DEBUGOUT("em_media_change: begin");
1848 
1849 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1850 		return (EINVAL);
1851 
1852 	EM_CORE_LOCK(adapter);
1853 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1854 	case IFM_AUTO:
1855 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1856 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1857 		break;
1858 	case IFM_1000_LX:
1859 	case IFM_1000_SX:
1860 	case IFM_1000_T:
1861 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1862 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1863 		break;
1864 	case IFM_100_TX:
1865 		adapter->hw.mac.autoneg = FALSE;
1866 		adapter->hw.phy.autoneg_advertised = 0;
1867 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1868 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1869 		else
1870 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1871 		break;
1872 	case IFM_10_T:
1873 		adapter->hw.mac.autoneg = FALSE;
1874 		adapter->hw.phy.autoneg_advertised = 0;
1875 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1876 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1877 		else
1878 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1879 		break;
1880 	default:
1881 		device_printf(adapter->dev, "Unsupported media type\n");
1882 	}
1883 
1884 	em_init_locked(adapter);
1885 	EM_CORE_UNLOCK(adapter);
1886 
1887 	return (0);
1888 }
1889 
1890 /*********************************************************************
1891  *
1892  *  This routine maps the mbufs to tx descriptors.
1893  *
1894  *  return 0 on success, positive on failure
1895  **********************************************************************/
1896 
1897 static int
1898 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1899 {
1900 	struct adapter		*adapter = txr->adapter;
1901 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1902 	bus_dmamap_t		map;
1903 	struct em_txbuffer	*tx_buffer, *tx_buffer_mapped;
1904 	struct e1000_tx_desc	*ctxd = NULL;
1905 	struct mbuf		*m_head;
1906 	struct ether_header	*eh;
1907 	struct ip		*ip = NULL;
1908 	struct tcphdr		*tp = NULL;
1909 	u32			txd_upper = 0, txd_lower = 0;
1910 	int			ip_off, poff;
1911 	int			nsegs, i, j, first, last = 0;
1912 	int			error;
1913 	bool			do_tso, tso_desc, remap = TRUE;
1914 
1915 	m_head = *m_headp;
1916 	do_tso = (m_head->m_pkthdr.csum_flags & CSUM_TSO);
1917 	tso_desc = FALSE;
1918 	ip_off = poff = 0;
1919 
1920 	/*
1921 	 * Intel recommends entire IP/TCP header length reside in a single
1922 	 * buffer. If multiple descriptors are used to describe the IP and
1923 	 * TCP header, each descriptor should describe one or more
1924 	 * complete headers; descriptors referencing only parts of headers
1925 	 * are not supported. If all layer headers are not coalesced into
1926 	 * a single buffer, each buffer should not cross a 4KB boundary,
1927 	 * or be larger than the maximum read request size.
1928 	 * Controller also requires modifing IP/TCP header to make TSO work
1929 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1930 	 * IP/TCP header into a single buffer to meet the requirement of
1931 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1932 	 * which also has similiar restrictions.
1933 	 */
1934 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1935 		if (do_tso || (m_head->m_next != NULL &&
1936 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1937 			if (M_WRITABLE(*m_headp) == 0) {
1938 				m_head = m_dup(*m_headp, M_NOWAIT);
1939 				m_freem(*m_headp);
1940 				if (m_head == NULL) {
1941 					*m_headp = NULL;
1942 					return (ENOBUFS);
1943 				}
1944 				*m_headp = m_head;
1945 			}
1946 		}
1947 		/*
1948 		 * XXX
1949 		 * Assume IPv4, we don't have TSO/checksum offload support
1950 		 * for IPv6 yet.
1951 		 */
1952 		ip_off = sizeof(struct ether_header);
1953 		if (m_head->m_len < ip_off) {
1954 			m_head = m_pullup(m_head, ip_off);
1955 			if (m_head == NULL) {
1956 				*m_headp = NULL;
1957 				return (ENOBUFS);
1958 			}
1959 		}
1960 		eh = mtod(m_head, struct ether_header *);
1961 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1962 			ip_off = sizeof(struct ether_vlan_header);
1963 			if (m_head->m_len < ip_off) {
1964 				m_head = m_pullup(m_head, ip_off);
1965 				if (m_head == NULL) {
1966 					*m_headp = NULL;
1967 					return (ENOBUFS);
1968 				}
1969 			}
1970 		}
1971 		if (m_head->m_len < ip_off + sizeof(struct ip)) {
1972 			m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1973 			if (m_head == NULL) {
1974 				*m_headp = NULL;
1975 				return (ENOBUFS);
1976 			}
1977 		}
1978 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1979 		poff = ip_off + (ip->ip_hl << 2);
1980 
1981 		if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
1982 			if (m_head->m_len < poff + sizeof(struct tcphdr)) {
1983 				m_head = m_pullup(m_head, poff +
1984 				    sizeof(struct tcphdr));
1985 				if (m_head == NULL) {
1986 					*m_headp = NULL;
1987 					return (ENOBUFS);
1988 				}
1989 			}
1990 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1991 			/*
1992 			 * TSO workaround:
1993 			 *   pull 4 more bytes of data into it.
1994 			 */
1995 			if (m_head->m_len < poff + (tp->th_off << 2)) {
1996 				m_head = m_pullup(m_head, poff +
1997 				                 (tp->th_off << 2) +
1998 				                 TSO_WORKAROUND);
1999 				if (m_head == NULL) {
2000 					*m_headp = NULL;
2001 					return (ENOBUFS);
2002 				}
2003 			}
2004 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2005 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2006 			if (do_tso) {
2007 				ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
2008 				                  (ip->ip_hl << 2) +
2009 				                  (tp->th_off << 2));
2010 				ip->ip_sum = 0;
2011 				/*
2012 				 * The pseudo TCP checksum does not include TCP
2013 				 * payload length so driver should recompute
2014 				 * the checksum here what hardware expect to
2015 				 * see. This is adherence of Microsoft's Large
2016 				 * Send specification.
2017 			 	*/
2018 				tp->th_sum = in_pseudo(ip->ip_src.s_addr,
2019 				    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2020 			}
2021 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
2022 			if (m_head->m_len < poff + sizeof(struct udphdr)) {
2023 				m_head = m_pullup(m_head, poff +
2024 				    sizeof(struct udphdr));
2025 				if (m_head == NULL) {
2026 					*m_headp = NULL;
2027 					return (ENOBUFS);
2028 				}
2029 			}
2030 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2031 		}
2032 		*m_headp = m_head;
2033 	}
2034 
2035 	/*
2036 	 * Map the packet for DMA
2037 	 *
2038 	 * Capture the first descriptor index,
2039 	 * this descriptor will have the index
2040 	 * of the EOP which is the only one that
2041 	 * now gets a DONE bit writeback.
2042 	 */
2043 	first = txr->next_avail_desc;
2044 	tx_buffer = &txr->tx_buffers[first];
2045 	tx_buffer_mapped = tx_buffer;
2046 	map = tx_buffer->map;
2047 
2048 retry:
2049 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2050 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2051 
2052 	/*
2053 	 * There are two types of errors we can (try) to handle:
2054 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
2055 	 *   out of segments.  Defragment the mbuf chain and try again.
2056 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
2057 	 *   at this point in time.  Defer sending and try again later.
2058 	 * All other errors, in particular EINVAL, are fatal and prevent the
2059 	 * mbuf chain from ever going through.  Drop it and report error.
2060 	 */
2061 	if (error == EFBIG && remap) {
2062 		struct mbuf *m;
2063 
2064 		m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
2065 		if (m == NULL) {
2066 			adapter->mbuf_defrag_failed++;
2067 			m_freem(*m_headp);
2068 			*m_headp = NULL;
2069 			return (ENOBUFS);
2070 		}
2071 		*m_headp = m;
2072 
2073 		/* Try it again, but only once */
2074 		remap = FALSE;
2075 		goto retry;
2076 	} else if (error != 0) {
2077 		adapter->no_tx_dma_setup++;
2078 		m_freem(*m_headp);
2079 		*m_headp = NULL;
2080 		return (error);
2081 	}
2082 
2083 	/*
2084 	 * TSO Hardware workaround, if this packet is not
2085 	 * TSO, and is only a single descriptor long, and
2086 	 * it follows a TSO burst, then we need to add a
2087 	 * sentinel descriptor to prevent premature writeback.
2088 	 */
2089 	if ((!do_tso) && (txr->tx_tso == TRUE)) {
2090 		if (nsegs == 1)
2091 			tso_desc = TRUE;
2092 		txr->tx_tso = FALSE;
2093 	}
2094 
2095         if (nsegs > (txr->tx_avail - EM_MAX_SCATTER)) {
2096                 txr->no_desc_avail++;
2097 		bus_dmamap_unload(txr->txtag, map);
2098 		return (ENOBUFS);
2099         }
2100 	m_head = *m_headp;
2101 
2102 	/* Do hardware assists */
2103 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2104 		em_tso_setup(txr, m_head, ip_off, ip, tp,
2105 		    &txd_upper, &txd_lower);
2106 		/* we need to make a final sentinel transmit desc */
2107 		tso_desc = TRUE;
2108 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2109 		em_transmit_checksum_setup(txr, m_head,
2110 		    ip_off, ip, &txd_upper, &txd_lower);
2111 
2112 	if (m_head->m_flags & M_VLANTAG) {
2113 		/* Set the vlan id. */
2114 		txd_upper |= htole16(if_getvtag(m_head)) << 16;
2115                 /* Tell hardware to add tag */
2116                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2117         }
2118 
2119 	i = txr->next_avail_desc;
2120 
2121 	/* Set up our transmit descriptors */
2122 	for (j = 0; j < nsegs; j++) {
2123 		bus_size_t seg_len;
2124 		bus_addr_t seg_addr;
2125 
2126 		tx_buffer = &txr->tx_buffers[i];
2127 		ctxd = &txr->tx_base[i];
2128 		seg_addr = segs[j].ds_addr;
2129 		seg_len  = segs[j].ds_len;
2130 		/*
2131 		** TSO Workaround:
2132 		** If this is the last descriptor, we want to
2133 		** split it so we have a small final sentinel
2134 		*/
2135 		if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
2136 			seg_len -= TSO_WORKAROUND;
2137 			ctxd->buffer_addr = htole64(seg_addr);
2138 			ctxd->lower.data = htole32(
2139 				adapter->txd_cmd | txd_lower | seg_len);
2140 			ctxd->upper.data = htole32(txd_upper);
2141 			if (++i == adapter->num_tx_desc)
2142 				i = 0;
2143 
2144 			/* Now make the sentinel */
2145 			txr->tx_avail--;
2146 			ctxd = &txr->tx_base[i];
2147 			tx_buffer = &txr->tx_buffers[i];
2148 			ctxd->buffer_addr =
2149 			    htole64(seg_addr + seg_len);
2150 			ctxd->lower.data = htole32(
2151 			adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
2152 			ctxd->upper.data =
2153 			    htole32(txd_upper);
2154 			last = i;
2155 			if (++i == adapter->num_tx_desc)
2156 				i = 0;
2157 		} else {
2158 			ctxd->buffer_addr = htole64(seg_addr);
2159 			ctxd->lower.data = htole32(
2160 			adapter->txd_cmd | txd_lower | seg_len);
2161 			ctxd->upper.data = htole32(txd_upper);
2162 			last = i;
2163 			if (++i == adapter->num_tx_desc)
2164 				i = 0;
2165 		}
2166 		tx_buffer->m_head = NULL;
2167 		tx_buffer->next_eop = -1;
2168 	}
2169 
2170 	txr->next_avail_desc = i;
2171 	txr->tx_avail -= nsegs;
2172 
2173         tx_buffer->m_head = m_head;
2174 	/*
2175 	** Here we swap the map so the last descriptor,
2176 	** which gets the completion interrupt has the
2177 	** real map, and the first descriptor gets the
2178 	** unused map from this descriptor.
2179 	*/
2180 	tx_buffer_mapped->map = tx_buffer->map;
2181 	tx_buffer->map = map;
2182         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2183 
2184         /*
2185          * Last Descriptor of Packet
2186 	 * needs End Of Packet (EOP)
2187 	 * and Report Status (RS)
2188          */
2189         ctxd->lower.data |=
2190 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2191 	/*
2192 	 * Keep track in the first buffer which
2193 	 * descriptor will be written back
2194 	 */
2195 	tx_buffer = &txr->tx_buffers[first];
2196 	tx_buffer->next_eop = last;
2197 
2198 	/*
2199 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2200 	 * that this frame is available to transmit.
2201 	 */
2202 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2203 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2204 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2205 
2206 	return (0);
2207 }
2208 
2209 static void
2210 em_set_promisc(struct adapter *adapter)
2211 {
2212 	if_t ifp = adapter->ifp;
2213 	u32		reg_rctl;
2214 
2215 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2216 
2217 	if (if_getflags(ifp) & IFF_PROMISC) {
2218 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2219 		/* Turn this on if you want to see bad packets */
2220 		if (em_debug_sbp)
2221 			reg_rctl |= E1000_RCTL_SBP;
2222 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2223 	} else if (if_getflags(ifp) & IFF_ALLMULTI) {
2224 		reg_rctl |= E1000_RCTL_MPE;
2225 		reg_rctl &= ~E1000_RCTL_UPE;
2226 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2227 	}
2228 }
2229 
2230 static void
2231 em_disable_promisc(struct adapter *adapter)
2232 {
2233 	if_t		ifp = adapter->ifp;
2234 	u32		reg_rctl;
2235 	int		mcnt = 0;
2236 
2237 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2238 	reg_rctl &=  (~E1000_RCTL_UPE);
2239 	if (if_getflags(ifp) & IFF_ALLMULTI)
2240 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2241 	else
2242 		mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
2243 	/* Don't disable if in MAX groups */
2244 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2245 		reg_rctl &=  (~E1000_RCTL_MPE);
2246 	reg_rctl &=  (~E1000_RCTL_SBP);
2247 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2248 }
2249 
2250 
2251 /*********************************************************************
2252  *  Multicast Update
2253  *
2254  *  This routine is called whenever multicast address list is updated.
2255  *
2256  **********************************************************************/
2257 
2258 static void
2259 em_set_multi(struct adapter *adapter)
2260 {
2261 	if_t ifp = adapter->ifp;
2262 	u32 reg_rctl = 0;
2263 	u8  *mta; /* Multicast array memory */
2264 	int mcnt = 0;
2265 
2266 	IOCTL_DEBUGOUT("em_set_multi: begin");
2267 
2268 	mta = adapter->mta;
2269 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2270 
2271 	if (adapter->hw.mac.type == e1000_82542 &&
2272 	    adapter->hw.revision_id == E1000_REVISION_2) {
2273 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2274 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2275 			e1000_pci_clear_mwi(&adapter->hw);
2276 		reg_rctl |= E1000_RCTL_RST;
2277 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2278 		msec_delay(5);
2279 	}
2280 
2281 	if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
2282 
2283 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2284 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2285 		reg_rctl |= E1000_RCTL_MPE;
2286 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2287 	} else
2288 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2289 
2290 	if (adapter->hw.mac.type == e1000_82542 &&
2291 	    adapter->hw.revision_id == E1000_REVISION_2) {
2292 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2293 		reg_rctl &= ~E1000_RCTL_RST;
2294 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2295 		msec_delay(5);
2296 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2297 			e1000_pci_set_mwi(&adapter->hw);
2298 	}
2299 }
2300 
2301 
2302 /*********************************************************************
2303  *  Timer routine
2304  *
2305  *  This routine checks for link status and updates statistics.
2306  *
2307  **********************************************************************/
2308 
2309 static void
2310 em_local_timer(void *arg)
2311 {
2312 	struct adapter	*adapter = arg;
2313 	if_t ifp = adapter->ifp;
2314 	struct tx_ring	*txr = adapter->tx_rings;
2315 	struct rx_ring	*rxr = adapter->rx_rings;
2316 	u32		trigger = 0;
2317 
2318 	EM_CORE_LOCK_ASSERT(adapter);
2319 
2320 	em_update_link_status(adapter);
2321 	em_update_stats_counters(adapter);
2322 
2323 	/* Reset LAA into RAR[0] on 82571 */
2324 	if ((adapter->hw.mac.type == e1000_82571) &&
2325 	    e1000_get_laa_state_82571(&adapter->hw))
2326 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2327 
2328 	/* Mask to use in the irq trigger */
2329 	if (adapter->msix_mem) {
2330 		for (int i = 0; i < adapter->num_queues; i++, rxr++)
2331 			trigger |= rxr->ims;
2332 		rxr = adapter->rx_rings;
2333 	} else
2334 		trigger = E1000_ICS_RXDMT0;
2335 
2336 	/*
2337 	** Check on the state of the TX queue(s), this
2338 	** can be done without the lock because its RO
2339 	** and the HUNG state will be static if set.
2340 	*/
2341 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2342 		if (txr->busy == EM_TX_HUNG)
2343 			goto hung;
2344 		if (txr->busy >= EM_TX_MAXTRIES)
2345 			txr->busy = EM_TX_HUNG;
2346 		/* Schedule a TX tasklet if needed */
2347 		if (txr->tx_avail <= EM_MAX_SCATTER)
2348 			taskqueue_enqueue(txr->tq, &txr->tx_task);
2349 	}
2350 
2351 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2352 #ifndef DEVICE_POLLING
2353 	/* Trigger an RX interrupt to guarantee mbuf refresh */
2354 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2355 #endif
2356 	return;
2357 hung:
2358 	/* Looks like we're hung */
2359 	device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2360 			txr->me);
2361 	em_print_debug_info(adapter);
2362 	if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
2363 	adapter->watchdog_events++;
2364 	em_init_locked(adapter);
2365 }
2366 
2367 
2368 static void
2369 em_update_link_status(struct adapter *adapter)
2370 {
2371 	struct e1000_hw *hw = &adapter->hw;
2372 	if_t ifp = adapter->ifp;
2373 	device_t dev = adapter->dev;
2374 	struct tx_ring *txr = adapter->tx_rings;
2375 	u32 link_check = 0;
2376 
2377 	/* Get the cached link value or read phy for real */
2378 	switch (hw->phy.media_type) {
2379 	case e1000_media_type_copper:
2380 		if (hw->mac.get_link_status) {
2381 			if (hw->mac.type == e1000_pch_spt)
2382 				msec_delay(50);
2383 			/* Do the work to read phy */
2384 			e1000_check_for_link(hw);
2385 			link_check = !hw->mac.get_link_status;
2386 			if (link_check) /* ESB2 fix */
2387 				e1000_cfg_on_link_up(hw);
2388 		} else
2389 			link_check = TRUE;
2390 		break;
2391 	case e1000_media_type_fiber:
2392 		e1000_check_for_link(hw);
2393 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2394                                  E1000_STATUS_LU);
2395 		break;
2396 	case e1000_media_type_internal_serdes:
2397 		e1000_check_for_link(hw);
2398 		link_check = adapter->hw.mac.serdes_has_link;
2399 		break;
2400 	default:
2401 	case e1000_media_type_unknown:
2402 		break;
2403 	}
2404 
2405 	/* Now check for a transition */
2406 	if (link_check && (adapter->link_active == 0)) {
2407 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2408 		    &adapter->link_duplex);
2409 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2410 		if ((adapter->link_speed != SPEED_1000) &&
2411 		    ((hw->mac.type == e1000_82571) ||
2412 		    (hw->mac.type == e1000_82572))) {
2413 			int tarc0;
2414 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2415 			tarc0 &= ~TARC_SPEED_MODE_BIT;
2416 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2417 		}
2418 		if (bootverbose)
2419 			device_printf(dev, "Link is up %d Mbps %s\n",
2420 			    adapter->link_speed,
2421 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2422 			    "Full Duplex" : "Half Duplex"));
2423 		adapter->link_active = 1;
2424 		adapter->smartspeed = 0;
2425 		if_setbaudrate(ifp, adapter->link_speed * 1000000);
2426 		if_link_state_change(ifp, LINK_STATE_UP);
2427 	} else if (!link_check && (adapter->link_active == 1)) {
2428 		if_setbaudrate(ifp, 0);
2429 		adapter->link_speed = 0;
2430 		adapter->link_duplex = 0;
2431 		if (bootverbose)
2432 			device_printf(dev, "Link is Down\n");
2433 		adapter->link_active = 0;
2434 		/* Link down, disable hang detection */
2435 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2436 			txr->busy = EM_TX_IDLE;
2437 		if_link_state_change(ifp, LINK_STATE_DOWN);
2438 	}
2439 }
2440 
2441 /*********************************************************************
2442  *
2443  *  This routine disables all traffic on the adapter by issuing a
2444  *  global reset on the MAC and deallocates TX/RX buffers.
2445  *
2446  *  This routine should always be called with BOTH the CORE
2447  *  and TX locks.
2448  **********************************************************************/
2449 
2450 static void
2451 em_stop(void *arg)
2452 {
2453 	struct adapter	*adapter = arg;
2454 	if_t ifp = adapter->ifp;
2455 	struct tx_ring	*txr = adapter->tx_rings;
2456 
2457 	EM_CORE_LOCK_ASSERT(adapter);
2458 
2459 	INIT_DEBUGOUT("em_stop: begin");
2460 
2461 	em_disable_intr(adapter);
2462 	callout_stop(&adapter->timer);
2463 
2464 	/* Tell the stack that the interface is no longer active */
2465 	if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2466 
2467         /* Disarm Hang Detection. */
2468 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2469 		EM_TX_LOCK(txr);
2470 		txr->busy = EM_TX_IDLE;
2471 		EM_TX_UNLOCK(txr);
2472 	}
2473 
2474 	/* I219 needs some special flushing to avoid hangs */
2475 	if (adapter->hw.mac.type == e1000_pch_spt)
2476 		em_flush_desc_rings(adapter);
2477 
2478 	e1000_reset_hw(&adapter->hw);
2479 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2480 
2481 	e1000_led_off(&adapter->hw);
2482 	e1000_cleanup_led(&adapter->hw);
2483 }
2484 
2485 
2486 /*********************************************************************
2487  *
2488  *  Determine hardware revision.
2489  *
2490  **********************************************************************/
2491 static void
2492 em_identify_hardware(struct adapter *adapter)
2493 {
2494 	device_t dev = adapter->dev;
2495 
2496 	/* Make sure our PCI config space has the necessary stuff set */
2497 	pci_enable_busmaster(dev);
2498 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2499 
2500 	/* Save off the information about this board */
2501 	adapter->hw.vendor_id = pci_get_vendor(dev);
2502 	adapter->hw.device_id = pci_get_device(dev);
2503 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2504 	adapter->hw.subsystem_vendor_id =
2505 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2506 	adapter->hw.subsystem_device_id =
2507 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2508 
2509 	/* Do Shared Code Init and Setup */
2510 	if (e1000_set_mac_type(&adapter->hw)) {
2511 		device_printf(dev, "Setup init failure\n");
2512 		return;
2513 	}
2514 }
2515 
2516 static int
2517 em_allocate_pci_resources(struct adapter *adapter)
2518 {
2519 	device_t	dev = adapter->dev;
2520 	int		rid;
2521 
2522 	rid = PCIR_BAR(0);
2523 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2524 	    &rid, RF_ACTIVE);
2525 	if (adapter->memory == NULL) {
2526 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2527 		return (ENXIO);
2528 	}
2529 	adapter->osdep.mem_bus_space_tag =
2530 	    rman_get_bustag(adapter->memory);
2531 	adapter->osdep.mem_bus_space_handle =
2532 	    rman_get_bushandle(adapter->memory);
2533 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2534 
2535 	adapter->hw.back = &adapter->osdep;
2536 
2537 	return (0);
2538 }
2539 
2540 /*********************************************************************
2541  *
2542  *  Setup the Legacy or MSI Interrupt handler
2543  *
2544  **********************************************************************/
2545 int
2546 em_allocate_legacy(struct adapter *adapter)
2547 {
2548 	device_t dev = adapter->dev;
2549 	struct tx_ring	*txr = adapter->tx_rings;
2550 	int error, rid = 0;
2551 
2552 	/* Manually turn off all interrupts */
2553 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2554 
2555 	if (adapter->msix == 1) /* using MSI */
2556 		rid = 1;
2557 	/* We allocate a single interrupt resource */
2558 	adapter->res = bus_alloc_resource_any(dev,
2559 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2560 	if (adapter->res == NULL) {
2561 		device_printf(dev, "Unable to allocate bus resource: "
2562 		    "interrupt\n");
2563 		return (ENXIO);
2564 	}
2565 
2566 	/*
2567 	 * Allocate a fast interrupt and the associated
2568 	 * deferred processing contexts.
2569 	 */
2570 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2571 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2572 	    taskqueue_thread_enqueue, &adapter->tq);
2573 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2574 	    device_get_nameunit(adapter->dev));
2575 	/* Use a TX only tasklet for local timer */
2576 	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2577 	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2578 	    taskqueue_thread_enqueue, &txr->tq);
2579 	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2580 	    device_get_nameunit(adapter->dev));
2581 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2582 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2583 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2584 		device_printf(dev, "Failed to register fast interrupt "
2585 			    "handler: %d\n", error);
2586 		taskqueue_free(adapter->tq);
2587 		adapter->tq = NULL;
2588 		return (error);
2589 	}
2590 
2591 	return (0);
2592 }
2593 
2594 /*********************************************************************
2595  *
2596  *  Setup the MSIX Interrupt handlers
2597  *   This is not really Multiqueue, rather
2598  *   its just seperate interrupt vectors
2599  *   for TX, RX, and Link.
2600  *
2601  **********************************************************************/
2602 int
2603 em_allocate_msix(struct adapter *adapter)
2604 {
2605 	device_t	dev = adapter->dev;
2606 	struct		tx_ring *txr = adapter->tx_rings;
2607 	struct		rx_ring *rxr = adapter->rx_rings;
2608 	int		error, rid, vector = 0;
2609 	int		cpu_id = 0;
2610 
2611 
2612 	/* Make sure all interrupts are disabled */
2613 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2614 
2615 	/* First set up ring resources */
2616 	for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2617 
2618 		/* RX ring */
2619 		rid = vector + 1;
2620 
2621 		rxr->res = bus_alloc_resource_any(dev,
2622 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2623 		if (rxr->res == NULL) {
2624 			device_printf(dev,
2625 			    "Unable to allocate bus resource: "
2626 			    "RX MSIX Interrupt %d\n", i);
2627 			return (ENXIO);
2628 		}
2629 		if ((error = bus_setup_intr(dev, rxr->res,
2630 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2631 		    rxr, &rxr->tag)) != 0) {
2632 			device_printf(dev, "Failed to register RX handler");
2633 			return (error);
2634 		}
2635 #if __FreeBSD_version >= 800504
2636 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2637 #endif
2638 		rxr->msix = vector;
2639 
2640 		if (em_last_bind_cpu < 0)
2641 			em_last_bind_cpu = CPU_FIRST();
2642 		cpu_id = em_last_bind_cpu;
2643 		bus_bind_intr(dev, rxr->res, cpu_id);
2644 
2645 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2646 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2647 		    taskqueue_thread_enqueue, &rxr->tq);
2648 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2649 		    device_get_nameunit(adapter->dev), cpu_id);
2650 		/*
2651 		** Set the bit to enable interrupt
2652 		** in E1000_IMS -- bits 20 and 21
2653 		** are for RX0 and RX1, note this has
2654 		** NOTHING to do with the MSIX vector
2655 		*/
2656 		rxr->ims = 1 << (20 + i);
2657 		adapter->ims |= rxr->ims;
2658 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2659 
2660 		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2661 	}
2662 
2663 	for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2664 		/* TX ring */
2665 		rid = vector + 1;
2666 		txr->res = bus_alloc_resource_any(dev,
2667 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2668 		if (txr->res == NULL) {
2669 			device_printf(dev,
2670 			    "Unable to allocate bus resource: "
2671 			    "TX MSIX Interrupt %d\n", i);
2672 			return (ENXIO);
2673 		}
2674 		if ((error = bus_setup_intr(dev, txr->res,
2675 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2676 		    txr, &txr->tag)) != 0) {
2677 			device_printf(dev, "Failed to register TX handler");
2678 			return (error);
2679 		}
2680 #if __FreeBSD_version >= 800504
2681 		bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2682 #endif
2683 		txr->msix = vector;
2684 
2685                 if (em_last_bind_cpu < 0)
2686                         em_last_bind_cpu = CPU_FIRST();
2687                 cpu_id = em_last_bind_cpu;
2688                 bus_bind_intr(dev, txr->res, cpu_id);
2689 
2690 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2691 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2692 		    taskqueue_thread_enqueue, &txr->tq);
2693 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2694 		    device_get_nameunit(adapter->dev), cpu_id);
2695 		/*
2696 		** Set the bit to enable interrupt
2697 		** in E1000_IMS -- bits 22 and 23
2698 		** are for TX0 and TX1, note this has
2699 		** NOTHING to do with the MSIX vector
2700 		*/
2701 		txr->ims = 1 << (22 + i);
2702 		adapter->ims |= txr->ims;
2703 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2704 
2705 		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2706 	}
2707 
2708 	/* Link interrupt */
2709 	rid = vector + 1;
2710 	adapter->res = bus_alloc_resource_any(dev,
2711 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2712 	if (!adapter->res) {
2713 		device_printf(dev,"Unable to allocate "
2714 		    "bus resource: Link interrupt [%d]\n", rid);
2715 		return (ENXIO);
2716         }
2717 	/* Set the link handler function */
2718 	error = bus_setup_intr(dev, adapter->res,
2719 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2720 	    em_msix_link, adapter, &adapter->tag);
2721 	if (error) {
2722 		adapter->res = NULL;
2723 		device_printf(dev, "Failed to register LINK handler");
2724 		return (error);
2725 	}
2726 #if __FreeBSD_version >= 800504
2727 	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2728 #endif
2729 	adapter->linkvec = vector;
2730 	adapter->ivars |=  (8 | vector) << 16;
2731 	adapter->ivars |= 0x80000000;
2732 
2733 	return (0);
2734 }
2735 
2736 
2737 static void
2738 em_free_pci_resources(struct adapter *adapter)
2739 {
2740 	device_t	dev = adapter->dev;
2741 	struct tx_ring	*txr;
2742 	struct rx_ring	*rxr;
2743 	int		rid;
2744 
2745 
2746 	/*
2747 	** Release all the queue interrupt resources:
2748 	*/
2749 	for (int i = 0; i < adapter->num_queues; i++) {
2750 		txr = &adapter->tx_rings[i];
2751 		/* an early abort? */
2752 		if (txr == NULL)
2753 			break;
2754 		rid = txr->msix +1;
2755 		if (txr->tag != NULL) {
2756 			bus_teardown_intr(dev, txr->res, txr->tag);
2757 			txr->tag = NULL;
2758 		}
2759 		if (txr->res != NULL)
2760 			bus_release_resource(dev, SYS_RES_IRQ,
2761 			    rid, txr->res);
2762 
2763 		rxr = &adapter->rx_rings[i];
2764 		/* an early abort? */
2765 		if (rxr == NULL)
2766 			break;
2767 		rid = rxr->msix +1;
2768 		if (rxr->tag != NULL) {
2769 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2770 			rxr->tag = NULL;
2771 		}
2772 		if (rxr->res != NULL)
2773 			bus_release_resource(dev, SYS_RES_IRQ,
2774 			    rid, rxr->res);
2775 	}
2776 
2777         if (adapter->linkvec) /* we are doing MSIX */
2778                 rid = adapter->linkvec + 1;
2779         else
2780                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2781 
2782 	if (adapter->tag != NULL) {
2783 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2784 		adapter->tag = NULL;
2785 	}
2786 
2787 	if (adapter->res != NULL)
2788 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2789 
2790 
2791 	if (adapter->msix)
2792 		pci_release_msi(dev);
2793 
2794 	if (adapter->msix_mem != NULL)
2795 		bus_release_resource(dev, SYS_RES_MEMORY,
2796 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2797 
2798 	if (adapter->memory != NULL)
2799 		bus_release_resource(dev, SYS_RES_MEMORY,
2800 		    PCIR_BAR(0), adapter->memory);
2801 
2802 	if (adapter->flash != NULL)
2803 		bus_release_resource(dev, SYS_RES_MEMORY,
2804 		    EM_FLASH, adapter->flash);
2805 }
2806 
2807 /*
2808  * Setup MSI or MSI/X
2809  */
2810 static int
2811 em_setup_msix(struct adapter *adapter)
2812 {
2813 	device_t dev = adapter->dev;
2814 	int val;
2815 
2816 	/* Nearly always going to use one queue */
2817 	adapter->num_queues = 1;
2818 
2819 	/*
2820 	** Try using MSI-X for Hartwell adapters
2821 	*/
2822 	if ((adapter->hw.mac.type == e1000_82574) &&
2823 	    (em_enable_msix == TRUE)) {
2824 #ifdef EM_MULTIQUEUE
2825 		adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2826 		if (adapter->num_queues > 1)
2827 			em_enable_vectors_82574(adapter);
2828 #endif
2829 		/* Map the MSIX BAR */
2830 		int rid = PCIR_BAR(EM_MSIX_BAR);
2831 		adapter->msix_mem = bus_alloc_resource_any(dev,
2832 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2833        		if (adapter->msix_mem == NULL) {
2834 			/* May not be enabled */
2835                		device_printf(adapter->dev,
2836 			    "Unable to map MSIX table \n");
2837 			goto msi;
2838        		}
2839 		val = pci_msix_count(dev);
2840 
2841 #ifdef EM_MULTIQUEUE
2842 		/* We need 5 vectors in the multiqueue case */
2843 		if (adapter->num_queues > 1 ) {
2844 			if (val >= 5)
2845 				val = 5;
2846 			else {
2847 				adapter->num_queues = 1;
2848 				device_printf(adapter->dev,
2849 				    "Insufficient MSIX vectors for >1 queue, "
2850 				    "using single queue...\n");
2851 				goto msix_one;
2852 			}
2853 		} else {
2854 msix_one:
2855 #endif
2856 			if (val >= 3)
2857 				val = 3;
2858 			else {
2859 				device_printf(adapter->dev,
2860 			    	"Insufficient MSIX vectors, using MSI\n");
2861 				goto msi;
2862 			}
2863 #ifdef EM_MULTIQUEUE
2864 		}
2865 #endif
2866 
2867 		if ((pci_alloc_msix(dev, &val) == 0)) {
2868 			device_printf(adapter->dev,
2869 			    "Using MSIX interrupts "
2870 			    "with %d vectors\n", val);
2871 			return (val);
2872 		}
2873 
2874 		/*
2875 		** If MSIX alloc failed or provided us with
2876 		** less than needed, free and fall through to MSI
2877 		*/
2878 		pci_release_msi(dev);
2879 	}
2880 msi:
2881 	if (adapter->msix_mem != NULL) {
2882 		bus_release_resource(dev, SYS_RES_MEMORY,
2883 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2884 		adapter->msix_mem = NULL;
2885 	}
2886        	val = 1;
2887        	if (pci_alloc_msi(dev, &val) == 0) {
2888                	device_printf(adapter->dev, "Using an MSI interrupt\n");
2889 		return (val);
2890 	}
2891 	/* Should only happen due to manual configuration */
2892 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2893 	return (0);
2894 }
2895 
2896 
2897 /*
2898 ** The 3 following flush routines are used as a workaround in the
2899 ** I219 client parts and only for them.
2900 **
2901 ** em_flush_tx_ring - remove all descriptors from the tx_ring
2902 **
2903 ** We want to clear all pending descriptors from the TX ring.
2904 ** zeroing happens when the HW reads the regs. We  assign the ring itself as
2905 ** the data of the next descriptor. We don't care about the data we are about
2906 ** to reset the HW.
2907 */
2908 static void
2909 em_flush_tx_ring(struct adapter *adapter)
2910 {
2911 	struct e1000_hw		*hw = &adapter->hw;
2912 	struct tx_ring		*txr = adapter->tx_rings;
2913 	struct e1000_tx_desc	*txd;
2914 	u32			tctl, txd_lower = E1000_TXD_CMD_IFCS;
2915 	u16			size = 512;
2916 
2917 	tctl = E1000_READ_REG(hw, E1000_TCTL);
2918 	E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
2919 
2920 	txd = &txr->tx_base[txr->next_avail_desc++];
2921 	if (txr->next_avail_desc == adapter->num_tx_desc)
2922 		txr->next_avail_desc = 0;
2923 
2924 	/* Just use the ring as a dummy buffer addr */
2925 	txd->buffer_addr = txr->txdma.dma_paddr;
2926 	txd->lower.data = htole32(txd_lower | size);
2927 	txd->upper.data = 0;
2928 
2929 	/* flush descriptors to memory before notifying the HW */
2930 	wmb();
2931 
2932 	E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc);
2933 	mb();
2934 	usec_delay(250);
2935 }
2936 
2937 /*
2938 ** em_flush_rx_ring - remove all descriptors from the rx_ring
2939 **
2940 ** Mark all descriptors in the RX ring as consumed and disable the rx ring
2941 */
2942 static void
2943 em_flush_rx_ring(struct adapter *adapter)
2944 {
2945 	struct e1000_hw	*hw = &adapter->hw;
2946 	u32		rctl, rxdctl;
2947 
2948 	rctl = E1000_READ_REG(hw, E1000_RCTL);
2949 	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2950 	E1000_WRITE_FLUSH(hw);
2951 	usec_delay(150);
2952 
2953 	rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
2954 	/* zero the lower 14 bits (prefetch and host thresholds) */
2955 	rxdctl &= 0xffffc000;
2956 	/*
2957 	 * update thresholds: prefetch threshold to 31, host threshold to 1
2958 	 * and make sure the granularity is "descriptors" and not "cache lines"
2959 	 */
2960 	rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
2961 	E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl);
2962 
2963 	/* momentarily enable the RX ring for the changes to take effect */
2964 	E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
2965 	E1000_WRITE_FLUSH(hw);
2966 	usec_delay(150);
2967 	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2968 }
2969 
2970 /*
2971 ** em_flush_desc_rings - remove all descriptors from the descriptor rings
2972 **
2973 ** In i219, the descriptor rings must be emptied before resetting the HW
2974 ** or before changing the device state to D3 during runtime (runtime PM).
2975 **
2976 ** Failure to do this will cause the HW to enter a unit hang state which can
2977 ** only be released by PCI reset on the device
2978 **
2979 */
2980 static void
2981 em_flush_desc_rings(struct adapter *adapter)
2982 {
2983 	struct e1000_hw	*hw = &adapter->hw;
2984 	device_t	dev = adapter->dev;
2985 	u16		hang_state;
2986 	u32		fext_nvm11, tdlen;
2987 
2988 	/* First, disable MULR fix in FEXTNVM11 */
2989 	fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
2990 	fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
2991 	E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11);
2992 
2993 	/* do nothing if we're not in faulty state, or if the queue is empty */
2994 	tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
2995 	hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
2996 	if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
2997 		return;
2998 	em_flush_tx_ring(adapter);
2999 
3000 	/* recheck, maybe the fault is caused by the rx ring */
3001 	hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3002 	if (hang_state & FLUSH_DESC_REQUIRED)
3003 		em_flush_rx_ring(adapter);
3004 }
3005 
3006 
3007 /*********************************************************************
3008  *
3009  *  Initialize the hardware to a configuration
3010  *  as specified by the adapter structure.
3011  *
3012  **********************************************************************/
3013 static void
3014 em_reset(struct adapter *adapter)
3015 {
3016 	device_t	dev = adapter->dev;
3017 	if_t ifp = adapter->ifp;
3018 	struct e1000_hw	*hw = &adapter->hw;
3019 	u16		rx_buffer_size;
3020 	u32		pba;
3021 
3022 	INIT_DEBUGOUT("em_reset: begin");
3023 
3024 	/* Set up smart power down as default off on newer adapters. */
3025 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
3026 	    hw->mac.type == e1000_82572)) {
3027 		u16 phy_tmp = 0;
3028 
3029 		/* Speed up time to link by disabling smart power down. */
3030 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
3031 		phy_tmp &= ~IGP02E1000_PM_SPD;
3032 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
3033 	}
3034 
3035 	/*
3036 	 * Packet Buffer Allocation (PBA)
3037 	 * Writing PBA sets the receive portion of the buffer
3038 	 * the remainder is used for the transmit buffer.
3039 	 */
3040 	switch (hw->mac.type) {
3041 	/* Total Packet Buffer on these is 48K */
3042 	case e1000_82571:
3043 	case e1000_82572:
3044 	case e1000_80003es2lan:
3045 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
3046 		break;
3047 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
3048 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
3049 		break;
3050 	case e1000_82574:
3051 	case e1000_82583:
3052 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
3053 		break;
3054 	case e1000_ich8lan:
3055 		pba = E1000_PBA_8K;
3056 		break;
3057 	case e1000_ich9lan:
3058 	case e1000_ich10lan:
3059 		/* Boost Receive side for jumbo frames */
3060 		if (adapter->hw.mac.max_frame_size > 4096)
3061 			pba = E1000_PBA_14K;
3062 		else
3063 			pba = E1000_PBA_10K;
3064 		break;
3065 	case e1000_pchlan:
3066 	case e1000_pch2lan:
3067 	case e1000_pch_lpt:
3068 	case e1000_pch_spt:
3069 		pba = E1000_PBA_26K;
3070 		break;
3071 	default:
3072 		if (adapter->hw.mac.max_frame_size > 8192)
3073 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
3074 		else
3075 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
3076 	}
3077 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
3078 
3079 	/*
3080 	 * These parameters control the automatic generation (Tx) and
3081 	 * response (Rx) to Ethernet PAUSE frames.
3082 	 * - High water mark should allow for at least two frames to be
3083 	 *   received after sending an XOFF.
3084 	 * - Low water mark works best when it is very near the high water mark.
3085 	 *   This allows the receiver to restart by sending XON when it has
3086 	 *   drained a bit. Here we use an arbitary value of 1500 which will
3087 	 *   restart after one full frame is pulled from the buffer. There
3088 	 *   could be several smaller frames in the buffer and if so they will
3089 	 *   not trigger the XON until their total number reduces the buffer
3090 	 *   by 1500.
3091 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
3092 	 */
3093 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
3094 	hw->fc.high_water = rx_buffer_size -
3095 	    roundup2(adapter->hw.mac.max_frame_size, 1024);
3096 	hw->fc.low_water = hw->fc.high_water - 1500;
3097 
3098 	if (adapter->fc) /* locally set flow control value? */
3099 		hw->fc.requested_mode = adapter->fc;
3100 	else
3101 		hw->fc.requested_mode = e1000_fc_full;
3102 
3103 	if (hw->mac.type == e1000_80003es2lan)
3104 		hw->fc.pause_time = 0xFFFF;
3105 	else
3106 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
3107 
3108 	hw->fc.send_xon = TRUE;
3109 
3110 	/* Device specific overrides/settings */
3111 	switch (hw->mac.type) {
3112 	case e1000_pchlan:
3113 		/* Workaround: no TX flow ctrl for PCH */
3114                 hw->fc.requested_mode = e1000_fc_rx_pause;
3115 		hw->fc.pause_time = 0xFFFF; /* override */
3116 		if (if_getmtu(ifp) > ETHERMTU) {
3117 			hw->fc.high_water = 0x3500;
3118 			hw->fc.low_water = 0x1500;
3119 		} else {
3120 			hw->fc.high_water = 0x5000;
3121 			hw->fc.low_water = 0x3000;
3122 		}
3123 		hw->fc.refresh_time = 0x1000;
3124 		break;
3125 	case e1000_pch2lan:
3126 	case e1000_pch_lpt:
3127 	case e1000_pch_spt:
3128 		hw->fc.high_water = 0x5C20;
3129 		hw->fc.low_water = 0x5048;
3130 		hw->fc.pause_time = 0x0650;
3131 		hw->fc.refresh_time = 0x0400;
3132 		/* Jumbos need adjusted PBA */
3133 		if (if_getmtu(ifp) > ETHERMTU)
3134 			E1000_WRITE_REG(hw, E1000_PBA, 12);
3135 		else
3136 			E1000_WRITE_REG(hw, E1000_PBA, 26);
3137 		break;
3138         case e1000_ich9lan:
3139         case e1000_ich10lan:
3140 		if (if_getmtu(ifp) > ETHERMTU) {
3141 			hw->fc.high_water = 0x2800;
3142 			hw->fc.low_water = hw->fc.high_water - 8;
3143 			break;
3144 		}
3145 		/* else fall thru */
3146 	default:
3147 		if (hw->mac.type == e1000_80003es2lan)
3148 			hw->fc.pause_time = 0xFFFF;
3149 		break;
3150 	}
3151 
3152 	/* I219 needs some special flushing to avoid hangs */
3153 	if (hw->mac.type == e1000_pch_spt)
3154 		em_flush_desc_rings(adapter);
3155 
3156 	/* Issue a global reset */
3157 	e1000_reset_hw(hw);
3158 	E1000_WRITE_REG(hw, E1000_WUC, 0);
3159 	em_disable_aspm(adapter);
3160 	/* and a re-init */
3161 	if (e1000_init_hw(hw) < 0) {
3162 		device_printf(dev, "Hardware Initialization Failed\n");
3163 		return;
3164 	}
3165 
3166 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3167 	e1000_get_phy_info(hw);
3168 	e1000_check_for_link(hw);
3169 	return;
3170 }
3171 
3172 /*********************************************************************
3173  *
3174  *  Setup networking device structure and register an interface.
3175  *
3176  **********************************************************************/
3177 static int
3178 em_setup_interface(device_t dev, struct adapter *adapter)
3179 {
3180 	if_t ifp;
3181 
3182 	INIT_DEBUGOUT("em_setup_interface: begin");
3183 
3184 	ifp = adapter->ifp = if_gethandle(IFT_ETHER);
3185 	if (ifp == 0) {
3186 		device_printf(dev, "can not allocate ifnet structure\n");
3187 		return (-1);
3188 	}
3189 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3190 	if_setdev(ifp, dev);
3191 	if_setinitfn(ifp, em_init);
3192 	if_setsoftc(ifp, adapter);
3193 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
3194 	if_setioctlfn(ifp, em_ioctl);
3195 	if_setgetcounterfn(ifp, em_get_counter);
3196 	/* TSO parameters */
3197 	ifp->if_hw_tsomax = IP_MAXPACKET;
3198 	ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER;
3199 	ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
3200 
3201 #ifdef EM_MULTIQUEUE
3202 	/* Multiqueue stack interface */
3203 	if_settransmitfn(ifp, em_mq_start);
3204 	if_setqflushfn(ifp, em_qflush);
3205 #else
3206 	if_setstartfn(ifp, em_start);
3207 	if_setsendqlen(ifp, adapter->num_tx_desc - 1);
3208 	if_setsendqready(ifp);
3209 #endif
3210 
3211 	ether_ifattach(ifp, adapter->hw.mac.addr);
3212 
3213 	if_setcapabilities(ifp, 0);
3214 	if_setcapenable(ifp, 0);
3215 
3216 
3217 	if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM |
3218 	    IFCAP_TSO4, 0);
3219 	/*
3220 	 * Tell the upper layer(s) we
3221 	 * support full VLAN capability
3222 	 */
3223 	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
3224 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
3225 	    IFCAP_VLAN_MTU, 0);
3226 	if_setcapenable(ifp, if_getcapabilities(ifp));
3227 
3228 	/*
3229 	** Don't turn this on by default, if vlans are
3230 	** created on another pseudo device (eg. lagg)
3231 	** then vlan events are not passed thru, breaking
3232 	** operation, but with HW FILTER off it works. If
3233 	** using vlans directly on the em driver you can
3234 	** enable this and get full hardware tag filtering.
3235 	*/
3236 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
3237 
3238 #ifdef DEVICE_POLLING
3239 	if_setcapabilitiesbit(ifp, IFCAP_POLLING,0);
3240 #endif
3241 
3242 	/* Enable only WOL MAGIC by default */
3243 	if (adapter->wol) {
3244 		if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
3245 		if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
3246 	}
3247 
3248 	/*
3249 	 * Specify the media types supported by this adapter and register
3250 	 * callbacks to update media and link information
3251 	 */
3252 	ifmedia_init(&adapter->media, IFM_IMASK,
3253 	    em_media_change, em_media_status);
3254 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3255 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3256 		u_char fiber_type = IFM_1000_SX;	/* default type */
3257 
3258 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3259 			    0, NULL);
3260 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3261 	} else {
3262 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3263 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3264 			    0, NULL);
3265 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3266 			    0, NULL);
3267 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3268 			    0, NULL);
3269 		if (adapter->hw.phy.type != e1000_phy_ife) {
3270 			ifmedia_add(&adapter->media,
3271 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3272 			ifmedia_add(&adapter->media,
3273 				IFM_ETHER | IFM_1000_T, 0, NULL);
3274 		}
3275 	}
3276 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3277 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3278 	return (0);
3279 }
3280 
3281 
3282 /*
3283  * Manage DMA'able memory.
3284  */
3285 static void
3286 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3287 {
3288 	if (error)
3289 		return;
3290 	*(bus_addr_t *) arg = segs[0].ds_addr;
3291 }
3292 
3293 static int
3294 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3295         struct em_dma_alloc *dma, int mapflags)
3296 {
3297 	int error;
3298 
3299 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3300 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3301 				BUS_SPACE_MAXADDR,	/* lowaddr */
3302 				BUS_SPACE_MAXADDR,	/* highaddr */
3303 				NULL, NULL,		/* filter, filterarg */
3304 				size,			/* maxsize */
3305 				1,			/* nsegments */
3306 				size,			/* maxsegsize */
3307 				0,			/* flags */
3308 				NULL,			/* lockfunc */
3309 				NULL,			/* lockarg */
3310 				&dma->dma_tag);
3311 	if (error) {
3312 		device_printf(adapter->dev,
3313 		    "%s: bus_dma_tag_create failed: %d\n",
3314 		    __func__, error);
3315 		goto fail_0;
3316 	}
3317 
3318 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3319 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3320 	if (error) {
3321 		device_printf(adapter->dev,
3322 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3323 		    __func__, (uintmax_t)size, error);
3324 		goto fail_2;
3325 	}
3326 
3327 	dma->dma_paddr = 0;
3328 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3329 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3330 	if (error || dma->dma_paddr == 0) {
3331 		device_printf(adapter->dev,
3332 		    "%s: bus_dmamap_load failed: %d\n",
3333 		    __func__, error);
3334 		goto fail_3;
3335 	}
3336 
3337 	return (0);
3338 
3339 fail_3:
3340 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3341 fail_2:
3342 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3343 	bus_dma_tag_destroy(dma->dma_tag);
3344 fail_0:
3345 	dma->dma_tag = NULL;
3346 
3347 	return (error);
3348 }
3349 
3350 static void
3351 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3352 {
3353 	if (dma->dma_tag == NULL)
3354 		return;
3355 	if (dma->dma_paddr != 0) {
3356 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3357 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3358 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3359 		dma->dma_paddr = 0;
3360 	}
3361 	if (dma->dma_vaddr != NULL) {
3362 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3363 		dma->dma_vaddr = NULL;
3364 	}
3365 	bus_dma_tag_destroy(dma->dma_tag);
3366 	dma->dma_tag = NULL;
3367 }
3368 
3369 
3370 /*********************************************************************
3371  *
3372  *  Allocate memory for the transmit and receive rings, and then
3373  *  the descriptors associated with each, called only once at attach.
3374  *
3375  **********************************************************************/
3376 static int
3377 em_allocate_queues(struct adapter *adapter)
3378 {
3379 	device_t		dev = adapter->dev;
3380 	struct tx_ring		*txr = NULL;
3381 	struct rx_ring		*rxr = NULL;
3382 	int rsize, tsize, error = E1000_SUCCESS;
3383 	int txconf = 0, rxconf = 0;
3384 
3385 
3386 	/* Allocate the TX ring struct memory */
3387 	if (!(adapter->tx_rings =
3388 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3389 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3390 		device_printf(dev, "Unable to allocate TX ring memory\n");
3391 		error = ENOMEM;
3392 		goto fail;
3393 	}
3394 
3395 	/* Now allocate the RX */
3396 	if (!(adapter->rx_rings =
3397 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3398 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3399 		device_printf(dev, "Unable to allocate RX ring memory\n");
3400 		error = ENOMEM;
3401 		goto rx_fail;
3402 	}
3403 
3404 	tsize = roundup2(adapter->num_tx_desc *
3405 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3406 	/*
3407 	 * Now set up the TX queues, txconf is needed to handle the
3408 	 * possibility that things fail midcourse and we need to
3409 	 * undo memory gracefully
3410 	 */
3411 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3412 		/* Set up some basics */
3413 		txr = &adapter->tx_rings[i];
3414 		txr->adapter = adapter;
3415 		txr->me = i;
3416 
3417 		/* Initialize the TX lock */
3418 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3419 		    device_get_nameunit(dev), txr->me);
3420 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3421 
3422 		if (em_dma_malloc(adapter, tsize,
3423 			&txr->txdma, BUS_DMA_NOWAIT)) {
3424 			device_printf(dev,
3425 			    "Unable to allocate TX Descriptor memory\n");
3426 			error = ENOMEM;
3427 			goto err_tx_desc;
3428 		}
3429 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3430 		bzero((void *)txr->tx_base, tsize);
3431 
3432         	if (em_allocate_transmit_buffers(txr)) {
3433 			device_printf(dev,
3434 			    "Critical Failure setting up transmit buffers\n");
3435 			error = ENOMEM;
3436 			goto err_tx_desc;
3437         	}
3438 #if __FreeBSD_version >= 800000
3439 		/* Allocate a buf ring */
3440 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3441 		    M_WAITOK, &txr->tx_mtx);
3442 #endif
3443 	}
3444 
3445 	/*
3446 	 * Next the RX queues...
3447 	 */
3448 	rsize = roundup2(adapter->num_rx_desc *
3449 	    sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
3450 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3451 		rxr = &adapter->rx_rings[i];
3452 		rxr->adapter = adapter;
3453 		rxr->me = i;
3454 
3455 		/* Initialize the RX lock */
3456 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3457 		    device_get_nameunit(dev), txr->me);
3458 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3459 
3460 		if (em_dma_malloc(adapter, rsize,
3461 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3462 			device_printf(dev,
3463 			    "Unable to allocate RxDescriptor memory\n");
3464 			error = ENOMEM;
3465 			goto err_rx_desc;
3466 		}
3467 		rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr;
3468 		bzero((void *)rxr->rx_base, rsize);
3469 
3470         	/* Allocate receive buffers for the ring*/
3471 		if (em_allocate_receive_buffers(rxr)) {
3472 			device_printf(dev,
3473 			    "Critical Failure setting up receive buffers\n");
3474 			error = ENOMEM;
3475 			goto err_rx_desc;
3476 		}
3477 	}
3478 
3479 	return (0);
3480 
3481 err_rx_desc:
3482 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3483 		em_dma_free(adapter, &rxr->rxdma);
3484 err_tx_desc:
3485 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3486 		em_dma_free(adapter, &txr->txdma);
3487 	free(adapter->rx_rings, M_DEVBUF);
3488 rx_fail:
3489 #if __FreeBSD_version >= 800000
3490 	buf_ring_free(txr->br, M_DEVBUF);
3491 #endif
3492 	free(adapter->tx_rings, M_DEVBUF);
3493 fail:
3494 	return (error);
3495 }
3496 
3497 
3498 /*********************************************************************
3499  *
3500  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3501  *  the information needed to transmit a packet on the wire. This is
3502  *  called only once at attach, setup is done every reset.
3503  *
3504  **********************************************************************/
3505 static int
3506 em_allocate_transmit_buffers(struct tx_ring *txr)
3507 {
3508 	struct adapter *adapter = txr->adapter;
3509 	device_t dev = adapter->dev;
3510 	struct em_txbuffer *txbuf;
3511 	int error, i;
3512 
3513 	/*
3514 	 * Setup DMA descriptor areas.
3515 	 */
3516 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3517 			       1, 0,			/* alignment, bounds */
3518 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3519 			       BUS_SPACE_MAXADDR,	/* highaddr */
3520 			       NULL, NULL,		/* filter, filterarg */
3521 			       EM_TSO_SIZE,		/* maxsize */
3522 			       EM_MAX_SCATTER,		/* nsegments */
3523 			       PAGE_SIZE,		/* maxsegsize */
3524 			       0,			/* flags */
3525 			       NULL,			/* lockfunc */
3526 			       NULL,			/* lockfuncarg */
3527 			       &txr->txtag))) {
3528 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3529 		goto fail;
3530 	}
3531 
3532 	if (!(txr->tx_buffers =
3533 	    (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) *
3534 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3535 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3536 		error = ENOMEM;
3537 		goto fail;
3538 	}
3539 
3540         /* Create the descriptor buffer dma maps */
3541 	txbuf = txr->tx_buffers;
3542 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3543 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3544 		if (error != 0) {
3545 			device_printf(dev, "Unable to create TX DMA map\n");
3546 			goto fail;
3547 		}
3548 	}
3549 
3550 	return 0;
3551 fail:
3552 	/* We free all, it handles case where we are in the middle */
3553 	em_free_transmit_structures(adapter);
3554 	return (error);
3555 }
3556 
3557 /*********************************************************************
3558  *
3559  *  Initialize a transmit ring.
3560  *
3561  **********************************************************************/
3562 static void
3563 em_setup_transmit_ring(struct tx_ring *txr)
3564 {
3565 	struct adapter *adapter = txr->adapter;
3566 	struct em_txbuffer *txbuf;
3567 	int i;
3568 #ifdef DEV_NETMAP
3569 	struct netmap_slot *slot;
3570 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
3571 #endif /* DEV_NETMAP */
3572 
3573 	/* Clear the old descriptor contents */
3574 	EM_TX_LOCK(txr);
3575 #ifdef DEV_NETMAP
3576 	slot = netmap_reset(na, NR_TX, txr->me, 0);
3577 #endif /* DEV_NETMAP */
3578 
3579 	bzero((void *)txr->tx_base,
3580 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3581 	/* Reset indices */
3582 	txr->next_avail_desc = 0;
3583 	txr->next_to_clean = 0;
3584 
3585 	/* Free any existing tx buffers. */
3586         txbuf = txr->tx_buffers;
3587 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3588 		if (txbuf->m_head != NULL) {
3589 			bus_dmamap_sync(txr->txtag, txbuf->map,
3590 			    BUS_DMASYNC_POSTWRITE);
3591 			bus_dmamap_unload(txr->txtag, txbuf->map);
3592 			m_freem(txbuf->m_head);
3593 			txbuf->m_head = NULL;
3594 		}
3595 #ifdef DEV_NETMAP
3596 		if (slot) {
3597 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3598 			uint64_t paddr;
3599 			void *addr;
3600 
3601 			addr = PNMB(na, slot + si, &paddr);
3602 			txr->tx_base[i].buffer_addr = htole64(paddr);
3603 			/* reload the map for netmap mode */
3604 			netmap_load_map(na, txr->txtag, txbuf->map, addr);
3605 		}
3606 #endif /* DEV_NETMAP */
3607 
3608 		/* clear the watch index */
3609 		txbuf->next_eop = -1;
3610         }
3611 
3612 	/* Set number of descriptors available */
3613 	txr->tx_avail = adapter->num_tx_desc;
3614 	txr->busy = EM_TX_IDLE;
3615 
3616 	/* Clear checksum offload context. */
3617 	txr->last_hw_offload = 0;
3618 	txr->last_hw_ipcss = 0;
3619 	txr->last_hw_ipcso = 0;
3620 	txr->last_hw_tucss = 0;
3621 	txr->last_hw_tucso = 0;
3622 
3623 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3624 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3625 	EM_TX_UNLOCK(txr);
3626 }
3627 
3628 /*********************************************************************
3629  *
3630  *  Initialize all transmit rings.
3631  *
3632  **********************************************************************/
3633 static void
3634 em_setup_transmit_structures(struct adapter *adapter)
3635 {
3636 	struct tx_ring *txr = adapter->tx_rings;
3637 
3638 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3639 		em_setup_transmit_ring(txr);
3640 
3641 	return;
3642 }
3643 
3644 /*********************************************************************
3645  *
3646  *  Enable transmit unit.
3647  *
3648  **********************************************************************/
3649 static void
3650 em_initialize_transmit_unit(struct adapter *adapter)
3651 {
3652 	struct tx_ring	*txr = adapter->tx_rings;
3653 	struct e1000_hw	*hw = &adapter->hw;
3654 	u32	tctl, txdctl = 0, tarc, tipg = 0;
3655 
3656 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3657 
3658 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3659 		u64 bus_addr = txr->txdma.dma_paddr;
3660 		/* Base and Len of TX Ring */
3661 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3662 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3663 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3664 	    	    (u32)(bus_addr >> 32));
3665 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3666 	    	    (u32)bus_addr);
3667 		/* Init the HEAD/TAIL indices */
3668 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3669 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3670 
3671 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3672 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3673 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3674 
3675 		txr->busy = EM_TX_IDLE;
3676 		txdctl = 0; /* clear txdctl */
3677                 txdctl |= 0x1f; /* PTHRESH */
3678                 txdctl |= 1 << 8; /* HTHRESH */
3679                 txdctl |= 1 << 16;/* WTHRESH */
3680 		txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3681 		txdctl |= E1000_TXDCTL_GRAN;
3682                 txdctl |= 1 << 25; /* LWTHRESH */
3683 
3684                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3685 	}
3686 
3687 	/* Set the default values for the Tx Inter Packet Gap timer */
3688 	switch (adapter->hw.mac.type) {
3689 	case e1000_80003es2lan:
3690 		tipg = DEFAULT_82543_TIPG_IPGR1;
3691 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3692 		    E1000_TIPG_IPGR2_SHIFT;
3693 		break;
3694 	default:
3695 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3696 		    (adapter->hw.phy.media_type ==
3697 		    e1000_media_type_internal_serdes))
3698 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3699 		else
3700 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3701 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3702 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3703 	}
3704 
3705 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3706 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3707 
3708 	if(adapter->hw.mac.type >= e1000_82540)
3709 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3710 		    adapter->tx_abs_int_delay.value);
3711 
3712 	if ((adapter->hw.mac.type == e1000_82571) ||
3713 	    (adapter->hw.mac.type == e1000_82572)) {
3714 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3715 		tarc |= TARC_SPEED_MODE_BIT;
3716 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3717 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3718 		/* errata: program both queues to unweighted RR */
3719 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3720 		tarc |= 1;
3721 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3722 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3723 		tarc |= 1;
3724 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3725 	} else if (adapter->hw.mac.type == e1000_82574) {
3726 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3727 		tarc |= TARC_ERRATA_BIT;
3728 		if ( adapter->num_queues > 1) {
3729 			tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3730 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3731 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3732 		} else
3733 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3734 	}
3735 
3736 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3737 	if (adapter->tx_int_delay.value > 0)
3738 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3739 
3740 	/* Program the Transmit Control Register */
3741 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3742 	tctl &= ~E1000_TCTL_CT;
3743 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3744 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3745 
3746 	if (adapter->hw.mac.type >= e1000_82571)
3747 		tctl |= E1000_TCTL_MULR;
3748 
3749 	/* This write will effectively turn on the transmit unit. */
3750 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3751 
3752 	if (hw->mac.type == e1000_pch_spt) {
3753 		u32 reg;
3754 		reg = E1000_READ_REG(hw, E1000_IOSFPC);
3755 		reg |= E1000_RCTL_RDMTS_HEX;
3756 		E1000_WRITE_REG(hw, E1000_IOSFPC, reg);
3757 		reg = E1000_READ_REG(hw, E1000_TARC(0));
3758 		reg |= E1000_TARC0_CB_MULTIQ_3_REQ;
3759 		E1000_WRITE_REG(hw, E1000_TARC(0), reg);
3760 	}
3761 }
3762 
3763 
3764 /*********************************************************************
3765  *
3766  *  Free all transmit rings.
3767  *
3768  **********************************************************************/
3769 static void
3770 em_free_transmit_structures(struct adapter *adapter)
3771 {
3772 	struct tx_ring *txr = adapter->tx_rings;
3773 
3774 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3775 		EM_TX_LOCK(txr);
3776 		em_free_transmit_buffers(txr);
3777 		em_dma_free(adapter, &txr->txdma);
3778 		EM_TX_UNLOCK(txr);
3779 		EM_TX_LOCK_DESTROY(txr);
3780 	}
3781 
3782 	free(adapter->tx_rings, M_DEVBUF);
3783 }
3784 
3785 /*********************************************************************
3786  *
3787  *  Free transmit ring related data structures.
3788  *
3789  **********************************************************************/
3790 static void
3791 em_free_transmit_buffers(struct tx_ring *txr)
3792 {
3793 	struct adapter		*adapter = txr->adapter;
3794 	struct em_txbuffer	*txbuf;
3795 
3796 	INIT_DEBUGOUT("free_transmit_ring: begin");
3797 
3798 	if (txr->tx_buffers == NULL)
3799 		return;
3800 
3801 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3802 		txbuf = &txr->tx_buffers[i];
3803 		if (txbuf->m_head != NULL) {
3804 			bus_dmamap_sync(txr->txtag, txbuf->map,
3805 			    BUS_DMASYNC_POSTWRITE);
3806 			bus_dmamap_unload(txr->txtag,
3807 			    txbuf->map);
3808 			m_freem(txbuf->m_head);
3809 			txbuf->m_head = NULL;
3810 			if (txbuf->map != NULL) {
3811 				bus_dmamap_destroy(txr->txtag,
3812 				    txbuf->map);
3813 				txbuf->map = NULL;
3814 			}
3815 		} else if (txbuf->map != NULL) {
3816 			bus_dmamap_unload(txr->txtag,
3817 			    txbuf->map);
3818 			bus_dmamap_destroy(txr->txtag,
3819 			    txbuf->map);
3820 			txbuf->map = NULL;
3821 		}
3822 	}
3823 #if __FreeBSD_version >= 800000
3824 	if (txr->br != NULL)
3825 		buf_ring_free(txr->br, M_DEVBUF);
3826 #endif
3827 	if (txr->tx_buffers != NULL) {
3828 		free(txr->tx_buffers, M_DEVBUF);
3829 		txr->tx_buffers = NULL;
3830 	}
3831 	if (txr->txtag != NULL) {
3832 		bus_dma_tag_destroy(txr->txtag);
3833 		txr->txtag = NULL;
3834 	}
3835 	return;
3836 }
3837 
3838 
3839 /*********************************************************************
3840  *  The offload context is protocol specific (TCP/UDP) and thus
3841  *  only needs to be set when the protocol changes. The occasion
3842  *  of a context change can be a performance detriment, and
3843  *  might be better just disabled. The reason arises in the way
3844  *  in which the controller supports pipelined requests from the
3845  *  Tx data DMA. Up to four requests can be pipelined, and they may
3846  *  belong to the same packet or to multiple packets. However all
3847  *  requests for one packet are issued before a request is issued
3848  *  for a subsequent packet and if a request for the next packet
3849  *  requires a context change, that request will be stalled
3850  *  until the previous request completes. This means setting up
3851  *  a new context effectively disables pipelined Tx data DMA which
3852  *  in turn greatly slow down performance to send small sized
3853  *  frames.
3854  **********************************************************************/
3855 static void
3856 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3857     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3858 {
3859 	struct adapter			*adapter = txr->adapter;
3860 	struct e1000_context_desc	*TXD = NULL;
3861 	struct em_txbuffer		*tx_buffer;
3862 	int				cur, hdr_len;
3863 	u32				cmd = 0;
3864 	u16				offload = 0;
3865 	u8				ipcso, ipcss, tucso, tucss;
3866 
3867 	ipcss = ipcso = tucss = tucso = 0;
3868 	hdr_len = ip_off + (ip->ip_hl << 2);
3869 	cur = txr->next_avail_desc;
3870 
3871 	/* Setup of IP header checksum. */
3872 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3873 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3874 		offload |= CSUM_IP;
3875 		ipcss = ip_off;
3876 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3877 		/*
3878 		 * Start offset for header checksum calculation.
3879 		 * End offset for header checksum calculation.
3880 		 * Offset of place to put the checksum.
3881 		 */
3882 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3883 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3884 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3885 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3886 		cmd |= E1000_TXD_CMD_IP;
3887 	}
3888 
3889 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3890  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3891  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3892  		offload |= CSUM_TCP;
3893  		tucss = hdr_len;
3894  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3895 		/*
3896 		 * The 82574L can only remember the *last* context used
3897 		 * regardless of queue that it was use for.  We cannot reuse
3898 		 * contexts on this hardware platform and must generate a new
3899 		 * context every time.  82574L hardware spec, section 7.2.6,
3900 		 * second note.
3901 		 */
3902 		if (adapter->num_queues < 2) {
3903  			/*
3904  		 	* Setting up new checksum offload context for every
3905 			* frames takes a lot of processing time for hardware.
3906 			* This also reduces performance a lot for small sized
3907 			* frames so avoid it if driver can use previously
3908 			* configured checksum offload context.
3909  		 	*/
3910  			if (txr->last_hw_offload == offload) {
3911  				if (offload & CSUM_IP) {
3912  					if (txr->last_hw_ipcss == ipcss &&
3913  				    	txr->last_hw_ipcso == ipcso &&
3914  				    	txr->last_hw_tucss == tucss &&
3915  				    	txr->last_hw_tucso == tucso)
3916  						return;
3917  				} else {
3918  					if (txr->last_hw_tucss == tucss &&
3919  				    	txr->last_hw_tucso == tucso)
3920  						return;
3921  				}
3922   			}
3923  			txr->last_hw_offload = offload;
3924  			txr->last_hw_tucss = tucss;
3925  			txr->last_hw_tucso = tucso;
3926 		}
3927  		/*
3928  		 * Start offset for payload checksum calculation.
3929  		 * End offset for payload checksum calculation.
3930  		 * Offset of place to put the checksum.
3931  		 */
3932 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3933  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3934  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3935  		TXD->upper_setup.tcp_fields.tucso = tucso;
3936  		cmd |= E1000_TXD_CMD_TCP;
3937  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3938  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3939  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3940  		tucss = hdr_len;
3941  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3942 		/*
3943 		 * The 82574L can only remember the *last* context used
3944 		 * regardless of queue that it was use for.  We cannot reuse
3945 		 * contexts on this hardware platform and must generate a new
3946 		 * context every time.  82574L hardware spec, section 7.2.6,
3947 		 * second note.
3948 		 */
3949 		if (adapter->num_queues < 2) {
3950  			/*
3951  		 	* Setting up new checksum offload context for every
3952 			* frames takes a lot of processing time for hardware.
3953 			* This also reduces performance a lot for small sized
3954 			* frames so avoid it if driver can use previously
3955 			* configured checksum offload context.
3956  		 	*/
3957  			if (txr->last_hw_offload == offload) {
3958  				if (offload & CSUM_IP) {
3959  					if (txr->last_hw_ipcss == ipcss &&
3960  				    	txr->last_hw_ipcso == ipcso &&
3961  				    	txr->last_hw_tucss == tucss &&
3962  				    	txr->last_hw_tucso == tucso)
3963  						return;
3964  				} else {
3965  					if (txr->last_hw_tucss == tucss &&
3966  				    	txr->last_hw_tucso == tucso)
3967  						return;
3968  				}
3969  			}
3970  			txr->last_hw_offload = offload;
3971  			txr->last_hw_tucss = tucss;
3972  			txr->last_hw_tucso = tucso;
3973 		}
3974  		/*
3975  		 * Start offset for header checksum calculation.
3976  		 * End offset for header checksum calculation.
3977  		 * Offset of place to put the checksum.
3978  		 */
3979 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3980  		TXD->upper_setup.tcp_fields.tucss = tucss;
3981  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3982  		TXD->upper_setup.tcp_fields.tucso = tucso;
3983   	}
3984 
3985  	if (offload & CSUM_IP) {
3986  		txr->last_hw_ipcss = ipcss;
3987  		txr->last_hw_ipcso = ipcso;
3988   	}
3989 
3990 	TXD->tcp_seg_setup.data = htole32(0);
3991 	TXD->cmd_and_length =
3992 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3993 	tx_buffer = &txr->tx_buffers[cur];
3994 	tx_buffer->m_head = NULL;
3995 	tx_buffer->next_eop = -1;
3996 
3997 	if (++cur == adapter->num_tx_desc)
3998 		cur = 0;
3999 
4000 	txr->tx_avail--;
4001 	txr->next_avail_desc = cur;
4002 }
4003 
4004 
4005 /**********************************************************************
4006  *
4007  *  Setup work for hardware segmentation offload (TSO)
4008  *
4009  **********************************************************************/
4010 static void
4011 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
4012     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
4013 {
4014 	struct adapter			*adapter = txr->adapter;
4015 	struct e1000_context_desc	*TXD;
4016 	struct em_txbuffer		*tx_buffer;
4017 	int cur, hdr_len;
4018 
4019 	/*
4020 	 * In theory we can use the same TSO context if and only if
4021 	 * frame is the same type(IP/TCP) and the same MSS. However
4022 	 * checking whether a frame has the same IP/TCP structure is
4023 	 * hard thing so just ignore that and always restablish a
4024 	 * new TSO context.
4025 	 */
4026 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
4027 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
4028 		      E1000_TXD_DTYP_D |	/* Data descr type */
4029 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
4030 
4031 	/* IP and/or TCP header checksum calculation and insertion. */
4032 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
4033 
4034 	cur = txr->next_avail_desc;
4035 	tx_buffer = &txr->tx_buffers[cur];
4036 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
4037 
4038 	/*
4039 	 * Start offset for header checksum calculation.
4040 	 * End offset for header checksum calculation.
4041 	 * Offset of place put the checksum.
4042 	 */
4043 	TXD->lower_setup.ip_fields.ipcss = ip_off;
4044 	TXD->lower_setup.ip_fields.ipcse =
4045 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
4046 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
4047 	/*
4048 	 * Start offset for payload checksum calculation.
4049 	 * End offset for payload checksum calculation.
4050 	 * Offset of place to put the checksum.
4051 	 */
4052 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
4053 	TXD->upper_setup.tcp_fields.tucse = 0;
4054 	TXD->upper_setup.tcp_fields.tucso =
4055 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
4056 	/*
4057 	 * Payload size per packet w/o any headers.
4058 	 * Length of all headers up to payload.
4059 	 */
4060 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
4061 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
4062 
4063 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
4064 				E1000_TXD_CMD_DEXT |	/* Extended descr */
4065 				E1000_TXD_CMD_TSE |	/* TSE context */
4066 				E1000_TXD_CMD_IP |	/* Do IP csum */
4067 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
4068 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
4069 
4070 	tx_buffer->m_head = NULL;
4071 	tx_buffer->next_eop = -1;
4072 
4073 	if (++cur == adapter->num_tx_desc)
4074 		cur = 0;
4075 
4076 	txr->tx_avail--;
4077 	txr->next_avail_desc = cur;
4078 	txr->tx_tso = TRUE;
4079 }
4080 
4081 
4082 /**********************************************************************
4083  *
4084  *  Examine each tx_buffer in the used queue. If the hardware is done
4085  *  processing the packet then free associated resources. The
4086  *  tx_buffer is put back on the free queue.
4087  *
4088  **********************************************************************/
4089 static void
4090 em_txeof(struct tx_ring *txr)
4091 {
4092 	struct adapter	*adapter = txr->adapter;
4093         int first, last, done, processed;
4094         struct em_txbuffer *tx_buffer;
4095         struct e1000_tx_desc   *tx_desc, *eop_desc;
4096 	if_t ifp = adapter->ifp;
4097 
4098 	EM_TX_LOCK_ASSERT(txr);
4099 #ifdef DEV_NETMAP
4100 	if (netmap_tx_irq(ifp, txr->me))
4101 		return;
4102 #endif /* DEV_NETMAP */
4103 
4104 	/* No work, make sure hang detection is disabled */
4105         if (txr->tx_avail == adapter->num_tx_desc) {
4106 		txr->busy = EM_TX_IDLE;
4107                 return;
4108 	}
4109 
4110 	processed = 0;
4111         first = txr->next_to_clean;
4112         tx_desc = &txr->tx_base[first];
4113         tx_buffer = &txr->tx_buffers[first];
4114 	last = tx_buffer->next_eop;
4115         eop_desc = &txr->tx_base[last];
4116 
4117 	/*
4118 	 * What this does is get the index of the
4119 	 * first descriptor AFTER the EOP of the
4120 	 * first packet, that way we can do the
4121 	 * simple comparison on the inner while loop.
4122 	 */
4123 	if (++last == adapter->num_tx_desc)
4124  		last = 0;
4125 	done = last;
4126 
4127         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4128             BUS_DMASYNC_POSTREAD);
4129 
4130         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
4131 		/* We clean the range of the packet */
4132 		while (first != done) {
4133                 	tx_desc->upper.data = 0;
4134                 	tx_desc->lower.data = 0;
4135                 	tx_desc->buffer_addr = 0;
4136                 	++txr->tx_avail;
4137 			++processed;
4138 
4139 			if (tx_buffer->m_head) {
4140 				bus_dmamap_sync(txr->txtag,
4141 				    tx_buffer->map,
4142 				    BUS_DMASYNC_POSTWRITE);
4143 				bus_dmamap_unload(txr->txtag,
4144 				    tx_buffer->map);
4145                         	m_freem(tx_buffer->m_head);
4146                         	tx_buffer->m_head = NULL;
4147                 	}
4148 			tx_buffer->next_eop = -1;
4149 
4150 	                if (++first == adapter->num_tx_desc)
4151 				first = 0;
4152 
4153 	                tx_buffer = &txr->tx_buffers[first];
4154 			tx_desc = &txr->tx_base[first];
4155 		}
4156 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
4157 		/* See if we can continue to the next packet */
4158 		last = tx_buffer->next_eop;
4159 		if (last != -1) {
4160         		eop_desc = &txr->tx_base[last];
4161 			/* Get new done point */
4162 			if (++last == adapter->num_tx_desc) last = 0;
4163 			done = last;
4164 		} else
4165 			break;
4166         }
4167         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4168             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4169 
4170         txr->next_to_clean = first;
4171 
4172 	/*
4173 	** Hang detection: we know there's work outstanding
4174 	** or the entry return would have been taken, so no
4175 	** descriptor processed here indicates a potential hang.
4176 	** The local timer will examine this and do a reset if needed.
4177 	*/
4178 	if (processed == 0) {
4179 		if (txr->busy != EM_TX_HUNG)
4180 			++txr->busy;
4181 	} else /* At least one descriptor was cleaned */
4182 		txr->busy = EM_TX_BUSY; /* note this clears HUNG */
4183 
4184         /*
4185          * If we have a minimum free, clear IFF_DRV_OACTIVE
4186          * to tell the stack that it is OK to send packets.
4187 	 * Notice that all writes of OACTIVE happen under the
4188 	 * TX lock which, with a single queue, guarantees
4189 	 * sanity.
4190          */
4191         if (txr->tx_avail >= EM_MAX_SCATTER) {
4192 		if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
4193 	}
4194 
4195 	/* Disable hang detection if all clean */
4196 	if (txr->tx_avail == adapter->num_tx_desc)
4197 		txr->busy = EM_TX_IDLE;
4198 }
4199 
4200 /*********************************************************************
4201  *
4202  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
4203  *
4204  **********************************************************************/
4205 static void
4206 em_refresh_mbufs(struct rx_ring *rxr, int limit)
4207 {
4208 	struct adapter		*adapter = rxr->adapter;
4209 	struct mbuf		*m;
4210 	bus_dma_segment_t	segs;
4211 	struct em_rxbuffer	*rxbuf;
4212 	int			i, j, error, nsegs;
4213 	bool			cleaned = FALSE;
4214 
4215 	i = j = rxr->next_to_refresh;
4216 	/*
4217 	** Get one descriptor beyond
4218 	** our work mark to control
4219 	** the loop.
4220 	*/
4221 	if (++j == adapter->num_rx_desc)
4222 		j = 0;
4223 
4224 	while (j != limit) {
4225 		rxbuf = &rxr->rx_buffers[i];
4226 		if (rxbuf->m_head == NULL) {
4227 			m = m_getjcl(M_NOWAIT, MT_DATA,
4228 			    M_PKTHDR, adapter->rx_mbuf_sz);
4229 			/*
4230 			** If we have a temporary resource shortage
4231 			** that causes a failure, just abort refresh
4232 			** for now, we will return to this point when
4233 			** reinvoked from em_rxeof.
4234 			*/
4235 			if (m == NULL)
4236 				goto update;
4237 		} else
4238 			m = rxbuf->m_head;
4239 
4240 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4241 		m->m_flags |= M_PKTHDR;
4242 		m->m_data = m->m_ext.ext_buf;
4243 
4244 		/* Use bus_dma machinery to setup the memory mapping  */
4245 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4246 		    m, &segs, &nsegs, BUS_DMA_NOWAIT);
4247 		if (error != 0) {
4248 			printf("Refresh mbufs: hdr dmamap load"
4249 			    " failure - %d\n", error);
4250 			m_free(m);
4251 			rxbuf->m_head = NULL;
4252 			goto update;
4253 		}
4254 		rxbuf->m_head = m;
4255 		rxbuf->paddr = segs.ds_addr;
4256 		bus_dmamap_sync(rxr->rxtag,
4257 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4258 		em_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4259 		cleaned = TRUE;
4260 
4261 		i = j; /* Next is precalulated for us */
4262 		rxr->next_to_refresh = i;
4263 		/* Calculate next controlling index */
4264 		if (++j == adapter->num_rx_desc)
4265 			j = 0;
4266 	}
4267 update:
4268 	/*
4269 	** Update the tail pointer only if,
4270 	** and as far as we have refreshed.
4271 	*/
4272 	if (cleaned)
4273 		E1000_WRITE_REG(&adapter->hw,
4274 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4275 
4276 	return;
4277 }
4278 
4279 
4280 /*********************************************************************
4281  *
4282  *  Allocate memory for rx_buffer structures. Since we use one
4283  *  rx_buffer per received packet, the maximum number of rx_buffer's
4284  *  that we'll need is equal to the number of receive descriptors
4285  *  that we've allocated.
4286  *
4287  **********************************************************************/
4288 static int
4289 em_allocate_receive_buffers(struct rx_ring *rxr)
4290 {
4291 	struct adapter		*adapter = rxr->adapter;
4292 	device_t		dev = adapter->dev;
4293 	struct em_rxbuffer	*rxbuf;
4294 	int			error;
4295 
4296 	rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) *
4297 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4298 	if (rxr->rx_buffers == NULL) {
4299 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4300 		return (ENOMEM);
4301 	}
4302 
4303 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4304 				1, 0,			/* alignment, bounds */
4305 				BUS_SPACE_MAXADDR,	/* lowaddr */
4306 				BUS_SPACE_MAXADDR,	/* highaddr */
4307 				NULL, NULL,		/* filter, filterarg */
4308 				MJUM9BYTES,		/* maxsize */
4309 				1,			/* nsegments */
4310 				MJUM9BYTES,		/* maxsegsize */
4311 				0,			/* flags */
4312 				NULL,			/* lockfunc */
4313 				NULL,			/* lockarg */
4314 				&rxr->rxtag);
4315 	if (error) {
4316 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4317 		    __func__, error);
4318 		goto fail;
4319 	}
4320 
4321 	rxbuf = rxr->rx_buffers;
4322 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4323 		rxbuf = &rxr->rx_buffers[i];
4324 		error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4325 		if (error) {
4326 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4327 			    __func__, error);
4328 			goto fail;
4329 		}
4330 	}
4331 
4332 	return (0);
4333 
4334 fail:
4335 	em_free_receive_structures(adapter);
4336 	return (error);
4337 }
4338 
4339 
4340 /*********************************************************************
4341  *
4342  *  Initialize a receive ring and its buffers.
4343  *
4344  **********************************************************************/
4345 static int
4346 em_setup_receive_ring(struct rx_ring *rxr)
4347 {
4348 	struct	adapter 	*adapter = rxr->adapter;
4349 	struct em_rxbuffer	*rxbuf;
4350 	bus_dma_segment_t	seg[1];
4351 	int			rsize, nsegs, error = 0;
4352 #ifdef DEV_NETMAP
4353 	struct netmap_slot *slot;
4354 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
4355 #endif
4356 
4357 
4358 	/* Clear the ring contents */
4359 	EM_RX_LOCK(rxr);
4360 	rsize = roundup2(adapter->num_rx_desc *
4361 	    sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
4362 	bzero((void *)rxr->rx_base, rsize);
4363 #ifdef DEV_NETMAP
4364 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4365 #endif
4366 
4367 	/*
4368 	** Free current RX buffer structs and their mbufs
4369 	*/
4370 	for (int i = 0; i < adapter->num_rx_desc; i++) {
4371 		rxbuf = &rxr->rx_buffers[i];
4372 		if (rxbuf->m_head != NULL) {
4373 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4374 			    BUS_DMASYNC_POSTREAD);
4375 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4376 			m_freem(rxbuf->m_head);
4377 			rxbuf->m_head = NULL; /* mark as freed */
4378 		}
4379 	}
4380 
4381 	/* Now replenish the mbufs */
4382         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4383 		rxbuf = &rxr->rx_buffers[j];
4384 #ifdef DEV_NETMAP
4385 		if (slot) {
4386 			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4387 			uint64_t paddr;
4388 			void *addr;
4389 
4390 			addr = PNMB(na, slot + si, &paddr);
4391 			netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4392 			em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4393 			continue;
4394 		}
4395 #endif /* DEV_NETMAP */
4396 		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4397 		    M_PKTHDR, adapter->rx_mbuf_sz);
4398 		if (rxbuf->m_head == NULL) {
4399 			error = ENOBUFS;
4400 			goto fail;
4401 		}
4402 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4403 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4404 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4405 
4406 		/* Get the memory mapping */
4407 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4408 		    rxbuf->map, rxbuf->m_head, seg,
4409 		    &nsegs, BUS_DMA_NOWAIT);
4410 		if (error != 0) {
4411 			m_freem(rxbuf->m_head);
4412 			rxbuf->m_head = NULL;
4413 			goto fail;
4414 		}
4415 		bus_dmamap_sync(rxr->rxtag,
4416 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4417 
4418 		rxbuf->paddr = seg[0].ds_addr;
4419 		em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4420 	}
4421 	rxr->next_to_check = 0;
4422 	rxr->next_to_refresh = 0;
4423 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4424 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4425 
4426 fail:
4427 	EM_RX_UNLOCK(rxr);
4428 	return (error);
4429 }
4430 
4431 /*********************************************************************
4432  *
4433  *  Initialize all receive rings.
4434  *
4435  **********************************************************************/
4436 static int
4437 em_setup_receive_structures(struct adapter *adapter)
4438 {
4439 	struct rx_ring *rxr = adapter->rx_rings;
4440 	int q;
4441 
4442 	for (q = 0; q < adapter->num_queues; q++, rxr++)
4443 		if (em_setup_receive_ring(rxr))
4444 			goto fail;
4445 
4446 	return (0);
4447 fail:
4448 	/*
4449 	 * Free RX buffers allocated so far, we will only handle
4450 	 * the rings that completed, the failing case will have
4451 	 * cleaned up for itself. 'q' failed, so its the terminus.
4452 	 */
4453 	for (int i = 0; i < q; ++i) {
4454 		rxr = &adapter->rx_rings[i];
4455 		for (int n = 0; n < adapter->num_rx_desc; n++) {
4456 			struct em_rxbuffer *rxbuf;
4457 			rxbuf = &rxr->rx_buffers[n];
4458 			if (rxbuf->m_head != NULL) {
4459 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4460 			  	  BUS_DMASYNC_POSTREAD);
4461 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4462 				m_freem(rxbuf->m_head);
4463 				rxbuf->m_head = NULL;
4464 			}
4465 		}
4466 		rxr->next_to_check = 0;
4467 		rxr->next_to_refresh = 0;
4468 	}
4469 
4470 	return (ENOBUFS);
4471 }
4472 
4473 /*********************************************************************
4474  *
4475  *  Free all receive rings.
4476  *
4477  **********************************************************************/
4478 static void
4479 em_free_receive_structures(struct adapter *adapter)
4480 {
4481 	struct rx_ring *rxr = adapter->rx_rings;
4482 
4483 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4484 		em_free_receive_buffers(rxr);
4485 		/* Free the ring memory as well */
4486 		em_dma_free(adapter, &rxr->rxdma);
4487 		EM_RX_LOCK_DESTROY(rxr);
4488 	}
4489 
4490 	free(adapter->rx_rings, M_DEVBUF);
4491 }
4492 
4493 
4494 /*********************************************************************
4495  *
4496  *  Free receive ring data structures
4497  *
4498  **********************************************************************/
4499 static void
4500 em_free_receive_buffers(struct rx_ring *rxr)
4501 {
4502 	struct adapter		*adapter = rxr->adapter;
4503 	struct em_rxbuffer	*rxbuf = NULL;
4504 
4505 	INIT_DEBUGOUT("free_receive_buffers: begin");
4506 
4507 	if (rxr->rx_buffers != NULL) {
4508 		for (int i = 0; i < adapter->num_rx_desc; i++) {
4509 			rxbuf = &rxr->rx_buffers[i];
4510 			if (rxbuf->map != NULL) {
4511 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4512 				    BUS_DMASYNC_POSTREAD);
4513 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4514 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4515 			}
4516 			if (rxbuf->m_head != NULL) {
4517 				m_freem(rxbuf->m_head);
4518 				rxbuf->m_head = NULL;
4519 			}
4520 		}
4521 		free(rxr->rx_buffers, M_DEVBUF);
4522 		rxr->rx_buffers = NULL;
4523 		rxr->next_to_check = 0;
4524 		rxr->next_to_refresh = 0;
4525 	}
4526 
4527 	if (rxr->rxtag != NULL) {
4528 		bus_dma_tag_destroy(rxr->rxtag);
4529 		rxr->rxtag = NULL;
4530 	}
4531 
4532 	return;
4533 }
4534 
4535 
4536 /*********************************************************************
4537  *
4538  *  Enable receive unit.
4539  *
4540  **********************************************************************/
4541 
4542 static void
4543 em_initialize_receive_unit(struct adapter *adapter)
4544 {
4545 	struct rx_ring *rxr = adapter->rx_rings;
4546 	if_t ifp = adapter->ifp;
4547 	struct e1000_hw	*hw = &adapter->hw;
4548 	u32	rctl, rxcsum, rfctl;
4549 
4550 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4551 
4552 	/*
4553 	 * Make sure receives are disabled while setting
4554 	 * up the descriptor ring
4555 	 */
4556 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4557 	/* Do not disable if ever enabled on this hardware */
4558 	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4559 		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4560 
4561 	/* Setup the Receive Control Register */
4562 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4563 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4564 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4565 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4566 
4567 	/* Do not store bad packets */
4568 	rctl &= ~E1000_RCTL_SBP;
4569 
4570 	/* Enable Long Packet receive */
4571 	if (if_getmtu(ifp) > ETHERMTU)
4572 		rctl |= E1000_RCTL_LPE;
4573 	else
4574 		rctl &= ~E1000_RCTL_LPE;
4575 
4576         /* Strip the CRC */
4577         if (!em_disable_crc_stripping)
4578 		rctl |= E1000_RCTL_SECRC;
4579 
4580 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4581 	    adapter->rx_abs_int_delay.value);
4582 
4583 	E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4584 	    adapter->rx_int_delay.value);
4585 	/*
4586 	 * Set the interrupt throttling rate. Value is calculated
4587 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4588 	 */
4589 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4590 
4591 	/* Use extended rx descriptor formats */
4592 	rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4593 	rfctl |= E1000_RFCTL_EXTEN;
4594 	/*
4595 	** When using MSIX interrupts we need to throttle
4596 	** using the EITR register (82574 only)
4597 	*/
4598 	if (hw->mac.type == e1000_82574) {
4599 		for (int i = 0; i < 4; i++)
4600 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4601 			    DEFAULT_ITR);
4602 		/* Disable accelerated acknowledge */
4603 		rfctl |= E1000_RFCTL_ACK_DIS;
4604 	}
4605 	E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4606 
4607 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4608 	if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
4609 #ifdef EM_MULTIQUEUE
4610 		rxcsum |= E1000_RXCSUM_TUOFL |
4611 			  E1000_RXCSUM_IPOFL |
4612 			  E1000_RXCSUM_PCSD;
4613 #else
4614 		rxcsum |= E1000_RXCSUM_TUOFL;
4615 #endif
4616 	} else
4617 		rxcsum &= ~E1000_RXCSUM_TUOFL;
4618 
4619 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4620 
4621 #ifdef EM_MULTIQUEUE
4622 #define RSSKEYLEN 10
4623 	if (adapter->num_queues > 1) {
4624 		uint8_t  rss_key[4 * RSSKEYLEN];
4625 		uint32_t reta = 0;
4626 		int i;
4627 
4628 		/*
4629 		* Configure RSS key
4630 		*/
4631 		arc4rand(rss_key, sizeof(rss_key), 0);
4632 		for (i = 0; i < RSSKEYLEN; ++i) {
4633 			uint32_t rssrk = 0;
4634 
4635 			rssrk = EM_RSSRK_VAL(rss_key, i);
4636 			E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk);
4637 		}
4638 
4639 		/*
4640 		* Configure RSS redirect table in following fashion:
4641 		* (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4642 		*/
4643 		for (i = 0; i < sizeof(reta); ++i) {
4644 			uint32_t q;
4645 
4646 			q = (i % adapter->num_queues) << 7;
4647 			reta |= q << (8 * i);
4648 		}
4649 
4650 		for (i = 0; i < 32; ++i) {
4651 			E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4652 		}
4653 
4654 		E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q |
4655 				E1000_MRQC_RSS_FIELD_IPV4_TCP |
4656 				E1000_MRQC_RSS_FIELD_IPV4 |
4657 				E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4658 				E1000_MRQC_RSS_FIELD_IPV6_EX |
4659 				E1000_MRQC_RSS_FIELD_IPV6);
4660 	}
4661 #endif
4662 	/*
4663 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4664 	** long latencies are observed, like Lenovo X60. This
4665 	** change eliminates the problem, but since having positive
4666 	** values in RDTR is a known source of problems on other
4667 	** platforms another solution is being sought.
4668 	*/
4669 	if (hw->mac.type == e1000_82573)
4670 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4671 
4672 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4673 		/* Setup the Base and Length of the Rx Descriptor Ring */
4674 		u64 bus_addr = rxr->rxdma.dma_paddr;
4675 		u32 rdt = adapter->num_rx_desc - 1; /* default */
4676 
4677 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4678 		    adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended));
4679 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4680 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4681 		/* Setup the Head and Tail Descriptor Pointers */
4682 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4683 #ifdef DEV_NETMAP
4684 		/*
4685 		 * an init() while a netmap client is active must
4686 		 * preserve the rx buffers passed to userspace.
4687 		 */
4688 		if (if_getcapenable(ifp) & IFCAP_NETMAP) {
4689 			struct netmap_adapter *na = netmap_getna(adapter->ifp);
4690 			rdt -= nm_kr_rxspace(&na->rx_rings[i]);
4691 		}
4692 #endif /* DEV_NETMAP */
4693 		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4694 	}
4695 
4696 	/*
4697 	 * Set PTHRESH for improved jumbo performance
4698 	 * According to 10.2.5.11 of Intel 82574 Datasheet,
4699 	 * RXDCTL(1) is written whenever RXDCTL(0) is written.
4700 	 * Only write to RXDCTL(1) if there is a need for different
4701 	 * settings.
4702 	 */
4703 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4704 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4705 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4706 	    (if_getmtu(ifp) > ETHERMTU)) {
4707 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4708 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4709 	} else if (adapter->hw.mac.type == e1000_82574) {
4710 		for (int i = 0; i < adapter->num_queues; i++) {
4711 			u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4712 
4713 			rxdctl |= 0x20; /* PTHRESH */
4714 			rxdctl |= 4 << 8; /* HTHRESH */
4715 			rxdctl |= 4 << 16;/* WTHRESH */
4716 			rxdctl |= 1 << 24; /* Switch to granularity */
4717 			E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4718 		}
4719 	}
4720 
4721 	if (adapter->hw.mac.type >= e1000_pch2lan) {
4722 		if (if_getmtu(ifp) > ETHERMTU)
4723 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4724 		else
4725 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4726 	}
4727 
4728         /* Make sure VLAN Filters are off */
4729         rctl &= ~E1000_RCTL_VFE;
4730 
4731 	if (adapter->rx_mbuf_sz == MCLBYTES)
4732 		rctl |= E1000_RCTL_SZ_2048;
4733 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4734 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4735 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4736 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4737 
4738 	/* ensure we clear use DTYPE of 00 here */
4739 	rctl &= ~0x00000C00;
4740 	/* Write out the settings */
4741 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4742 
4743 	return;
4744 }
4745 
4746 
4747 /*********************************************************************
4748  *
4749  *  This routine executes in interrupt context. It replenishes
4750  *  the mbufs in the descriptor and sends data which has been
4751  *  dma'ed into host memory to upper layer.
4752  *
4753  *  We loop at most count times if count is > 0, or until done if
4754  *  count < 0.
4755  *
4756  *  For polling we also now return the number of cleaned packets
4757  *********************************************************************/
4758 static bool
4759 em_rxeof(struct rx_ring *rxr, int count, int *done)
4760 {
4761 	struct adapter		*adapter = rxr->adapter;
4762 	if_t ifp = adapter->ifp;
4763 	struct mbuf		*mp, *sendmp;
4764 	u32			status = 0;
4765 	u16 			len;
4766 	int			i, processed, rxdone = 0;
4767 	bool			eop;
4768 	union e1000_rx_desc_extended	*cur;
4769 
4770 	EM_RX_LOCK(rxr);
4771 
4772 	/* Sync the ring */
4773 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4774 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4775 
4776 
4777 #ifdef DEV_NETMAP
4778 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4779 		EM_RX_UNLOCK(rxr);
4780 		return (FALSE);
4781 	}
4782 #endif /* DEV_NETMAP */
4783 
4784 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4785 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
4786 			break;
4787 
4788 		cur = &rxr->rx_base[i];
4789 		status = le32toh(cur->wb.upper.status_error);
4790 		mp = sendmp = NULL;
4791 
4792 		if ((status & E1000_RXD_STAT_DD) == 0)
4793 			break;
4794 
4795 		len = le16toh(cur->wb.upper.length);
4796 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4797 
4798 		if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
4799 		    (rxr->discard == TRUE)) {
4800 			adapter->dropped_pkts++;
4801 			++rxr->rx_discarded;
4802 			if (!eop) /* Catch subsequent segs */
4803 				rxr->discard = TRUE;
4804 			else
4805 				rxr->discard = FALSE;
4806 			em_rx_discard(rxr, i);
4807 			goto next_desc;
4808 		}
4809 		bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4810 
4811 		/* Assign correct length to the current fragment */
4812 		mp = rxr->rx_buffers[i].m_head;
4813 		mp->m_len = len;
4814 
4815 		/* Trigger for refresh */
4816 		rxr->rx_buffers[i].m_head = NULL;
4817 
4818 		/* First segment? */
4819 		if (rxr->fmp == NULL) {
4820 			mp->m_pkthdr.len = len;
4821 			rxr->fmp = rxr->lmp = mp;
4822 		} else {
4823 			/* Chain mbuf's together */
4824 			mp->m_flags &= ~M_PKTHDR;
4825 			rxr->lmp->m_next = mp;
4826 			rxr->lmp = mp;
4827 			rxr->fmp->m_pkthdr.len += len;
4828 		}
4829 
4830 		if (eop) {
4831 			--count;
4832 			sendmp = rxr->fmp;
4833 			if_setrcvif(sendmp, ifp);
4834 			if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
4835 			em_receive_checksum(status, sendmp);
4836 #ifndef __NO_STRICT_ALIGNMENT
4837 			if (adapter->hw.mac.max_frame_size >
4838 			    (MCLBYTES - ETHER_ALIGN) &&
4839 			    em_fixup_rx(rxr) != 0)
4840 				goto skip;
4841 #endif
4842 			if (status & E1000_RXD_STAT_VP) {
4843 				if_setvtag(sendmp,
4844 				    le16toh(cur->wb.upper.vlan));
4845 				sendmp->m_flags |= M_VLANTAG;
4846 			}
4847 #ifndef __NO_STRICT_ALIGNMENT
4848 skip:
4849 #endif
4850 			rxr->fmp = rxr->lmp = NULL;
4851 		}
4852 next_desc:
4853 		/* Sync the ring */
4854 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4855 	    		BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4856 
4857 		/* Zero out the receive descriptors status. */
4858 		cur->wb.upper.status_error &= htole32(~0xFF);
4859 		++rxdone;	/* cumulative for POLL */
4860 		++processed;
4861 
4862 		/* Advance our pointers to the next descriptor. */
4863 		if (++i == adapter->num_rx_desc)
4864 			i = 0;
4865 
4866 		/* Send to the stack */
4867 		if (sendmp != NULL) {
4868 			rxr->next_to_check = i;
4869 			EM_RX_UNLOCK(rxr);
4870 			if_input(ifp, sendmp);
4871 			EM_RX_LOCK(rxr);
4872 			i = rxr->next_to_check;
4873 		}
4874 
4875 		/* Only refresh mbufs every 8 descriptors */
4876 		if (processed == 8) {
4877 			em_refresh_mbufs(rxr, i);
4878 			processed = 0;
4879 		}
4880 	}
4881 
4882 	/* Catch any remaining refresh work */
4883 	if (e1000_rx_unrefreshed(rxr))
4884 		em_refresh_mbufs(rxr, i);
4885 
4886 	rxr->next_to_check = i;
4887 	if (done != NULL)
4888 		*done = rxdone;
4889 	EM_RX_UNLOCK(rxr);
4890 
4891 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4892 }
4893 
4894 static __inline void
4895 em_rx_discard(struct rx_ring *rxr, int i)
4896 {
4897 	struct em_rxbuffer	*rbuf;
4898 
4899 	rbuf = &rxr->rx_buffers[i];
4900 	bus_dmamap_unload(rxr->rxtag, rbuf->map);
4901 
4902 	/* Free any previous pieces */
4903 	if (rxr->fmp != NULL) {
4904 		rxr->fmp->m_flags |= M_PKTHDR;
4905 		m_freem(rxr->fmp);
4906 		rxr->fmp = NULL;
4907 		rxr->lmp = NULL;
4908 	}
4909 	/*
4910 	** Free buffer and allow em_refresh_mbufs()
4911 	** to clean up and recharge buffer.
4912 	*/
4913 	if (rbuf->m_head) {
4914 		m_free(rbuf->m_head);
4915 		rbuf->m_head = NULL;
4916 	}
4917 	return;
4918 }
4919 
4920 #ifndef __NO_STRICT_ALIGNMENT
4921 /*
4922  * When jumbo frames are enabled we should realign entire payload on
4923  * architecures with strict alignment. This is serious design mistake of 8254x
4924  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4925  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4926  * payload. On architecures without strict alignment restrictions 8254x still
4927  * performs unaligned memory access which would reduce the performance too.
4928  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4929  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4930  * existing mbuf chain.
4931  *
4932  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4933  * not used at all on architectures with strict alignment.
4934  */
4935 static int
4936 em_fixup_rx(struct rx_ring *rxr)
4937 {
4938 	struct adapter *adapter = rxr->adapter;
4939 	struct mbuf *m, *n;
4940 	int error;
4941 
4942 	error = 0;
4943 	m = rxr->fmp;
4944 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4945 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4946 		m->m_data += ETHER_HDR_LEN;
4947 	} else {
4948 		MGETHDR(n, M_NOWAIT, MT_DATA);
4949 		if (n != NULL) {
4950 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4951 			m->m_data += ETHER_HDR_LEN;
4952 			m->m_len -= ETHER_HDR_LEN;
4953 			n->m_len = ETHER_HDR_LEN;
4954 			M_MOVE_PKTHDR(n, m);
4955 			n->m_next = m;
4956 			rxr->fmp = n;
4957 		} else {
4958 			adapter->dropped_pkts++;
4959 			m_freem(rxr->fmp);
4960 			rxr->fmp = NULL;
4961 			error = ENOMEM;
4962 		}
4963 	}
4964 
4965 	return (error);
4966 }
4967 #endif
4968 
4969 static void
4970 em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf)
4971 {
4972 	rxd->read.buffer_addr = htole64(rxbuf->paddr);
4973 	/* DD bits must be cleared */
4974 	rxd->wb.upper.status_error= 0;
4975 }
4976 
4977 /*********************************************************************
4978  *
4979  *  Verify that the hardware indicated that the checksum is valid.
4980  *  Inform the stack about the status of checksum so that stack
4981  *  doesn't spend time verifying the checksum.
4982  *
4983  *********************************************************************/
4984 static void
4985 em_receive_checksum(uint32_t status, struct mbuf *mp)
4986 {
4987 	mp->m_pkthdr.csum_flags = 0;
4988 
4989 	/* Ignore Checksum bit is set */
4990 	if (status & E1000_RXD_STAT_IXSM)
4991 		return;
4992 
4993 	/* If the IP checksum exists and there is no IP Checksum error */
4994 	if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
4995 		E1000_RXD_STAT_IPCS) {
4996 		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4997 	}
4998 
4999 	/* TCP or UDP checksum */
5000 	if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
5001 	    E1000_RXD_STAT_TCPCS) {
5002 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5003 		mp->m_pkthdr.csum_data = htons(0xffff);
5004 	}
5005 	if (status & E1000_RXD_STAT_UDPCS) {
5006 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5007 		mp->m_pkthdr.csum_data = htons(0xffff);
5008 	}
5009 }
5010 
5011 /*
5012  * This routine is run via an vlan
5013  * config EVENT
5014  */
5015 static void
5016 em_register_vlan(void *arg, if_t ifp, u16 vtag)
5017 {
5018 	struct adapter	*adapter = if_getsoftc(ifp);
5019 	u32		index, bit;
5020 
5021 	if ((void*)adapter !=  arg)   /* Not our event */
5022 		return;
5023 
5024 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
5025                 return;
5026 
5027 	EM_CORE_LOCK(adapter);
5028 	index = (vtag >> 5) & 0x7F;
5029 	bit = vtag & 0x1F;
5030 	adapter->shadow_vfta[index] |= (1 << bit);
5031 	++adapter->num_vlans;
5032 	/* Re-init to load the changes */
5033 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
5034 		em_init_locked(adapter);
5035 	EM_CORE_UNLOCK(adapter);
5036 }
5037 
5038 /*
5039  * This routine is run via an vlan
5040  * unconfig EVENT
5041  */
5042 static void
5043 em_unregister_vlan(void *arg, if_t ifp, u16 vtag)
5044 {
5045 	struct adapter	*adapter = if_getsoftc(ifp);
5046 	u32		index, bit;
5047 
5048 	if (adapter != arg)
5049 		return;
5050 
5051 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5052                 return;
5053 
5054 	EM_CORE_LOCK(adapter);
5055 	index = (vtag >> 5) & 0x7F;
5056 	bit = vtag & 0x1F;
5057 	adapter->shadow_vfta[index] &= ~(1 << bit);
5058 	--adapter->num_vlans;
5059 	/* Re-init to load the changes */
5060 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
5061 		em_init_locked(adapter);
5062 	EM_CORE_UNLOCK(adapter);
5063 }
5064 
5065 static void
5066 em_setup_vlan_hw_support(struct adapter *adapter)
5067 {
5068 	struct e1000_hw *hw = &adapter->hw;
5069 	u32             reg;
5070 
5071 	/*
5072 	** We get here thru init_locked, meaning
5073 	** a soft reset, this has already cleared
5074 	** the VFTA and other state, so if there
5075 	** have been no vlan's registered do nothing.
5076 	*/
5077 	if (adapter->num_vlans == 0)
5078                 return;
5079 
5080 	/*
5081 	** A soft reset zero's out the VFTA, so
5082 	** we need to repopulate it now.
5083 	*/
5084 	for (int i = 0; i < EM_VFTA_SIZE; i++)
5085                 if (adapter->shadow_vfta[i] != 0)
5086 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
5087                             i, adapter->shadow_vfta[i]);
5088 
5089 	reg = E1000_READ_REG(hw, E1000_CTRL);
5090 	reg |= E1000_CTRL_VME;
5091 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
5092 
5093 	/* Enable the Filter Table */
5094 	reg = E1000_READ_REG(hw, E1000_RCTL);
5095 	reg &= ~E1000_RCTL_CFIEN;
5096 	reg |= E1000_RCTL_VFE;
5097 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
5098 }
5099 
5100 static void
5101 em_enable_intr(struct adapter *adapter)
5102 {
5103 	struct e1000_hw *hw = &adapter->hw;
5104 	u32 ims_mask = IMS_ENABLE_MASK;
5105 
5106 	if (hw->mac.type == e1000_82574) {
5107 		E1000_WRITE_REG(hw, EM_EIAC, adapter->ims);
5108 		ims_mask |= adapter->ims;
5109 	}
5110 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
5111 }
5112 
5113 static void
5114 em_disable_intr(struct adapter *adapter)
5115 {
5116 	struct e1000_hw *hw = &adapter->hw;
5117 
5118 	if (hw->mac.type == e1000_82574)
5119 		E1000_WRITE_REG(hw, EM_EIAC, 0);
5120 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
5121 }
5122 
5123 /*
5124  * Bit of a misnomer, what this really means is
5125  * to enable OS management of the system... aka
5126  * to disable special hardware management features
5127  */
5128 static void
5129 em_init_manageability(struct adapter *adapter)
5130 {
5131 	/* A shared code workaround */
5132 #define E1000_82542_MANC2H E1000_MANC2H
5133 	if (adapter->has_manage) {
5134 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5135 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5136 
5137 		/* disable hardware interception of ARP */
5138 		manc &= ~(E1000_MANC_ARP_EN);
5139 
5140                 /* enable receiving management packets to the host */
5141 		manc |= E1000_MANC_EN_MNG2HOST;
5142 #define E1000_MNG2HOST_PORT_623 (1 << 5)
5143 #define E1000_MNG2HOST_PORT_664 (1 << 6)
5144 		manc2h |= E1000_MNG2HOST_PORT_623;
5145 		manc2h |= E1000_MNG2HOST_PORT_664;
5146 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5147 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5148 	}
5149 }
5150 
5151 /*
5152  * Give control back to hardware management
5153  * controller if there is one.
5154  */
5155 static void
5156 em_release_manageability(struct adapter *adapter)
5157 {
5158 	if (adapter->has_manage) {
5159 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5160 
5161 		/* re-enable hardware interception of ARP */
5162 		manc |= E1000_MANC_ARP_EN;
5163 		manc &= ~E1000_MANC_EN_MNG2HOST;
5164 
5165 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5166 	}
5167 }
5168 
5169 /*
5170  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
5171  * For ASF and Pass Through versions of f/w this means
5172  * that the driver is loaded. For AMT version type f/w
5173  * this means that the network i/f is open.
5174  */
5175 static void
5176 em_get_hw_control(struct adapter *adapter)
5177 {
5178 	u32 ctrl_ext, swsm;
5179 
5180 	if (adapter->hw.mac.type == e1000_82573) {
5181 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5182 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5183 		    swsm | E1000_SWSM_DRV_LOAD);
5184 		return;
5185 	}
5186 	/* else */
5187 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5188 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5189 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5190 	return;
5191 }
5192 
5193 /*
5194  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
5195  * For ASF and Pass Through versions of f/w this means that
5196  * the driver is no longer loaded. For AMT versions of the
5197  * f/w this means that the network i/f is closed.
5198  */
5199 static void
5200 em_release_hw_control(struct adapter *adapter)
5201 {
5202 	u32 ctrl_ext, swsm;
5203 
5204 	if (!adapter->has_manage)
5205 		return;
5206 
5207 	if (adapter->hw.mac.type == e1000_82573) {
5208 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5209 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5210 		    swsm & ~E1000_SWSM_DRV_LOAD);
5211 		return;
5212 	}
5213 	/* else */
5214 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5215 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5216 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5217 	return;
5218 }
5219 
5220 static int
5221 em_is_valid_ether_addr(u8 *addr)
5222 {
5223 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5224 
5225 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5226 		return (FALSE);
5227 	}
5228 
5229 	return (TRUE);
5230 }
5231 
5232 /*
5233 ** Parse the interface capabilities with regard
5234 ** to both system management and wake-on-lan for
5235 ** later use.
5236 */
5237 static void
5238 em_get_wakeup(device_t dev)
5239 {
5240 	struct adapter	*adapter = device_get_softc(dev);
5241 	u16		eeprom_data = 0, device_id, apme_mask;
5242 
5243 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5244 	apme_mask = EM_EEPROM_APME;
5245 
5246 	switch (adapter->hw.mac.type) {
5247 	case e1000_82573:
5248 	case e1000_82583:
5249 		adapter->has_amt = TRUE;
5250 		/* Falls thru */
5251 	case e1000_82571:
5252 	case e1000_82572:
5253 	case e1000_80003es2lan:
5254 		if (adapter->hw.bus.func == 1) {
5255 			e1000_read_nvm(&adapter->hw,
5256 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5257 			break;
5258 		} else
5259 			e1000_read_nvm(&adapter->hw,
5260 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5261 		break;
5262 	case e1000_ich8lan:
5263 	case e1000_ich9lan:
5264 	case e1000_ich10lan:
5265 	case e1000_pchlan:
5266 	case e1000_pch2lan:
5267 		apme_mask = E1000_WUC_APME;
5268 		adapter->has_amt = TRUE;
5269 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5270 		break;
5271 	default:
5272 		e1000_read_nvm(&adapter->hw,
5273 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5274 		break;
5275 	}
5276 	if (eeprom_data & apme_mask)
5277 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5278 	/*
5279          * We have the eeprom settings, now apply the special cases
5280          * where the eeprom may be wrong or the board won't support
5281          * wake on lan on a particular port
5282 	 */
5283 	device_id = pci_get_device(dev);
5284         switch (device_id) {
5285 	case E1000_DEV_ID_82571EB_FIBER:
5286 		/* Wake events only supported on port A for dual fiber
5287 		 * regardless of eeprom setting */
5288 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5289 		    E1000_STATUS_FUNC_1)
5290 			adapter->wol = 0;
5291 		break;
5292 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
5293 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
5294 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5295                 /* if quad port adapter, disable WoL on all but port A */
5296 		if (global_quad_port_a != 0)
5297 			adapter->wol = 0;
5298 		/* Reset for multiple quad port adapters */
5299 		if (++global_quad_port_a == 4)
5300 			global_quad_port_a = 0;
5301                 break;
5302 	}
5303 	return;
5304 }
5305 
5306 
5307 /*
5308  * Enable PCI Wake On Lan capability
5309  */
5310 static void
5311 em_enable_wakeup(device_t dev)
5312 {
5313 	struct adapter	*adapter = device_get_softc(dev);
5314 	if_t ifp = adapter->ifp;
5315 	u32		pmc, ctrl, ctrl_ext, rctl;
5316 	u16     	status;
5317 
5318 	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
5319 		return;
5320 
5321 	/* Advertise the wakeup capability */
5322 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5323 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5324 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5325 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5326 
5327 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
5328 	    (adapter->hw.mac.type == e1000_pchlan) ||
5329 	    (adapter->hw.mac.type == e1000_ich9lan) ||
5330 	    (adapter->hw.mac.type == e1000_ich10lan))
5331 		e1000_suspend_workarounds_ich8lan(&adapter->hw);
5332 
5333 	/* Keep the laser running on Fiber adapters */
5334 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5335 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5336 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5337 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5338 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5339 	}
5340 
5341 	/*
5342 	** Determine type of Wakeup: note that wol
5343 	** is set with all bits on by default.
5344 	*/
5345 	if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
5346 		adapter->wol &= ~E1000_WUFC_MAG;
5347 
5348 	if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
5349 		adapter->wol &= ~E1000_WUFC_MC;
5350 	else {
5351 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5352 		rctl |= E1000_RCTL_MPE;
5353 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5354 	}
5355 
5356 	if ((adapter->hw.mac.type == e1000_pchlan) ||
5357 	    (adapter->hw.mac.type == e1000_pch2lan)) {
5358 		if (em_enable_phy_wakeup(adapter))
5359 			return;
5360 	} else {
5361 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5362 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5363 	}
5364 
5365 	if (adapter->hw.phy.type == e1000_phy_igp_3)
5366 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5367 
5368         /* Request PME */
5369         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5370 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5371 	if (if_getcapenable(ifp) & IFCAP_WOL)
5372 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5373         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5374 
5375 	return;
5376 }
5377 
5378 /*
5379 ** WOL in the newer chipset interfaces (pchlan)
5380 ** require thing to be copied into the phy
5381 */
5382 static int
5383 em_enable_phy_wakeup(struct adapter *adapter)
5384 {
5385 	struct e1000_hw *hw = &adapter->hw;
5386 	u32 mreg, ret = 0;
5387 	u16 preg;
5388 
5389 	/* copy MAC RARs to PHY RARs */
5390 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5391 
5392 	/* copy MAC MTA to PHY MTA */
5393 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5394 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5395 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5396 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5397 		    (u16)((mreg >> 16) & 0xFFFF));
5398 	}
5399 
5400 	/* configure PHY Rx Control register */
5401 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5402 	mreg = E1000_READ_REG(hw, E1000_RCTL);
5403 	if (mreg & E1000_RCTL_UPE)
5404 		preg |= BM_RCTL_UPE;
5405 	if (mreg & E1000_RCTL_MPE)
5406 		preg |= BM_RCTL_MPE;
5407 	preg &= ~(BM_RCTL_MO_MASK);
5408 	if (mreg & E1000_RCTL_MO_3)
5409 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5410 				<< BM_RCTL_MO_SHIFT);
5411 	if (mreg & E1000_RCTL_BAM)
5412 		preg |= BM_RCTL_BAM;
5413 	if (mreg & E1000_RCTL_PMCF)
5414 		preg |= BM_RCTL_PMCF;
5415 	mreg = E1000_READ_REG(hw, E1000_CTRL);
5416 	if (mreg & E1000_CTRL_RFCE)
5417 		preg |= BM_RCTL_RFCE;
5418 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5419 
5420 	/* enable PHY wakeup in MAC register */
5421 	E1000_WRITE_REG(hw, E1000_WUC,
5422 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5423 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5424 
5425 	/* configure and enable PHY wakeup in PHY registers */
5426 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5427 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5428 
5429 	/* activate PHY wakeup */
5430 	ret = hw->phy.ops.acquire(hw);
5431 	if (ret) {
5432 		printf("Could not acquire PHY\n");
5433 		return ret;
5434 	}
5435 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5436 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5437 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5438 	if (ret) {
5439 		printf("Could not read PHY page 769\n");
5440 		goto out;
5441 	}
5442 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5443 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5444 	if (ret)
5445 		printf("Could not set PHY Host Wakeup bit\n");
5446 out:
5447 	hw->phy.ops.release(hw);
5448 
5449 	return ret;
5450 }
5451 
5452 static void
5453 em_led_func(void *arg, int onoff)
5454 {
5455 	struct adapter	*adapter = arg;
5456 
5457 	EM_CORE_LOCK(adapter);
5458 	if (onoff) {
5459 		e1000_setup_led(&adapter->hw);
5460 		e1000_led_on(&adapter->hw);
5461 	} else {
5462 		e1000_led_off(&adapter->hw);
5463 		e1000_cleanup_led(&adapter->hw);
5464 	}
5465 	EM_CORE_UNLOCK(adapter);
5466 }
5467 
5468 /*
5469 ** Disable the L0S and L1 LINK states
5470 */
5471 static void
5472 em_disable_aspm(struct adapter *adapter)
5473 {
5474 	int		base, reg;
5475 	u16		link_cap,link_ctrl;
5476 	device_t	dev = adapter->dev;
5477 
5478 	switch (adapter->hw.mac.type) {
5479 		case e1000_82573:
5480 		case e1000_82574:
5481 		case e1000_82583:
5482 			break;
5483 		default:
5484 			return;
5485 	}
5486 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5487 		return;
5488 	reg = base + PCIER_LINK_CAP;
5489 	link_cap = pci_read_config(dev, reg, 2);
5490 	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5491 		return;
5492 	reg = base + PCIER_LINK_CTL;
5493 	link_ctrl = pci_read_config(dev, reg, 2);
5494 	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5495 	pci_write_config(dev, reg, link_ctrl, 2);
5496 	return;
5497 }
5498 
5499 /**********************************************************************
5500  *
5501  *  Update the board statistics counters.
5502  *
5503  **********************************************************************/
5504 static void
5505 em_update_stats_counters(struct adapter *adapter)
5506 {
5507 
5508 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5509 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5510 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5511 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5512 	}
5513 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5514 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5515 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5516 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5517 
5518 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5519 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5520 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5521 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5522 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5523 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5524 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5525 	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5526 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5527 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5528 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5529 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5530 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5531 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5532 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5533 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5534 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5535 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5536 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5537 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5538 
5539 	/* For the 64-bit byte counters the low dword must be read first. */
5540 	/* Both registers clear on the read of the high dword */
5541 
5542 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5543 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5544 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5545 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5546 
5547 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5548 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5549 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5550 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5551 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5552 
5553 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5554 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5555 
5556 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5557 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5558 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5559 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5560 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5561 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5562 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5563 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5564 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5565 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5566 
5567 	/* Interrupt Counts */
5568 
5569 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5570 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5571 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5572 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5573 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5574 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5575 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5576 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5577 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5578 
5579 	if (adapter->hw.mac.type >= e1000_82543) {
5580 		adapter->stats.algnerrc +=
5581 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5582 		adapter->stats.rxerrc +=
5583 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5584 		adapter->stats.tncrs +=
5585 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5586 		adapter->stats.cexterr +=
5587 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5588 		adapter->stats.tsctc +=
5589 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5590 		adapter->stats.tsctfc +=
5591 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5592 	}
5593 }
5594 
5595 static uint64_t
5596 em_get_counter(if_t ifp, ift_counter cnt)
5597 {
5598 	struct adapter *adapter;
5599 
5600 	adapter = if_getsoftc(ifp);
5601 
5602 	switch (cnt) {
5603 	case IFCOUNTER_COLLISIONS:
5604 		return (adapter->stats.colc);
5605 	case IFCOUNTER_IERRORS:
5606 		return (adapter->dropped_pkts + adapter->stats.rxerrc +
5607 		    adapter->stats.crcerrs + adapter->stats.algnerrc +
5608 		    adapter->stats.ruc + adapter->stats.roc +
5609 		    adapter->stats.mpc + adapter->stats.cexterr);
5610 	case IFCOUNTER_OERRORS:
5611 		return (adapter->stats.ecol + adapter->stats.latecol +
5612 		    adapter->watchdog_events);
5613 	default:
5614 		return (if_get_counter_default(ifp, cnt));
5615 	}
5616 }
5617 
5618 /* Export a single 32-bit register via a read-only sysctl. */
5619 static int
5620 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5621 {
5622 	struct adapter *adapter;
5623 	u_int val;
5624 
5625 	adapter = oidp->oid_arg1;
5626 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5627 	return (sysctl_handle_int(oidp, &val, 0, req));
5628 }
5629 
5630 /*
5631  * Add sysctl variables, one per statistic, to the system.
5632  */
5633 static void
5634 em_add_hw_stats(struct adapter *adapter)
5635 {
5636 	device_t dev = adapter->dev;
5637 
5638 	struct tx_ring *txr = adapter->tx_rings;
5639 	struct rx_ring *rxr = adapter->rx_rings;
5640 
5641 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5642 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5643 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5644 	struct e1000_hw_stats *stats = &adapter->stats;
5645 
5646 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5647 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5648 
5649 #define QUEUE_NAME_LEN 32
5650 	char namebuf[QUEUE_NAME_LEN];
5651 
5652 	/* Driver Statistics */
5653 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5654 			CTLFLAG_RD, &adapter->dropped_pkts,
5655 			"Driver dropped packets");
5656 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5657 			CTLFLAG_RD, &adapter->link_irq,
5658 			"Link MSIX IRQ Handled");
5659 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5660 			 CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5661 			 "Defragmenting mbuf chain failed");
5662 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5663 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5664 			"Driver tx dma failure in xmit");
5665 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5666 			CTLFLAG_RD, &adapter->rx_overruns,
5667 			"RX overruns");
5668 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5669 			CTLFLAG_RD, &adapter->watchdog_events,
5670 			"Watchdog timeouts");
5671 
5672 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5673 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5674 			em_sysctl_reg_handler, "IU",
5675 			"Device Control Register");
5676 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5677 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5678 			em_sysctl_reg_handler, "IU",
5679 			"Receiver Control Register");
5680 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5681 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5682 			"Flow Control High Watermark");
5683 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5684 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5685 			"Flow Control Low Watermark");
5686 
5687 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5688 		snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5689 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5690 					    CTLFLAG_RD, NULL, "TX Queue Name");
5691 		queue_list = SYSCTL_CHILDREN(queue_node);
5692 
5693 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5694 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5695 				E1000_TDH(txr->me),
5696 				em_sysctl_reg_handler, "IU",
5697  				"Transmit Descriptor Head");
5698 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5699 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5700 				E1000_TDT(txr->me),
5701 				em_sysctl_reg_handler, "IU",
5702  				"Transmit Descriptor Tail");
5703 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5704 				CTLFLAG_RD, &txr->tx_irq,
5705 				"Queue MSI-X Transmit Interrupts");
5706 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5707 				CTLFLAG_RD, &txr->no_desc_avail,
5708 				"Queue No Descriptor Available");
5709 
5710 		snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5711 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5712 					    CTLFLAG_RD, NULL, "RX Queue Name");
5713 		queue_list = SYSCTL_CHILDREN(queue_node);
5714 
5715 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5716 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5717 				E1000_RDH(rxr->me),
5718 				em_sysctl_reg_handler, "IU",
5719 				"Receive Descriptor Head");
5720 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5721 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5722 				E1000_RDT(rxr->me),
5723 				em_sysctl_reg_handler, "IU",
5724 				"Receive Descriptor Tail");
5725 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5726 				CTLFLAG_RD, &rxr->rx_irq,
5727 				"Queue MSI-X Receive Interrupts");
5728 	}
5729 
5730 	/* MAC stats get their own sub node */
5731 
5732 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5733 				    CTLFLAG_RD, NULL, "Statistics");
5734 	stat_list = SYSCTL_CHILDREN(stat_node);
5735 
5736 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5737 			CTLFLAG_RD, &stats->ecol,
5738 			"Excessive collisions");
5739 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5740 			CTLFLAG_RD, &stats->scc,
5741 			"Single collisions");
5742 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5743 			CTLFLAG_RD, &stats->mcc,
5744 			"Multiple collisions");
5745 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5746 			CTLFLAG_RD, &stats->latecol,
5747 			"Late collisions");
5748 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5749 			CTLFLAG_RD, &stats->colc,
5750 			"Collision Count");
5751 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5752 			CTLFLAG_RD, &adapter->stats.symerrs,
5753 			"Symbol Errors");
5754 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5755 			CTLFLAG_RD, &adapter->stats.sec,
5756 			"Sequence Errors");
5757 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5758 			CTLFLAG_RD, &adapter->stats.dc,
5759 			"Defer Count");
5760 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5761 			CTLFLAG_RD, &adapter->stats.mpc,
5762 			"Missed Packets");
5763 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5764 			CTLFLAG_RD, &adapter->stats.rnbc,
5765 			"Receive No Buffers");
5766 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5767 			CTLFLAG_RD, &adapter->stats.ruc,
5768 			"Receive Undersize");
5769 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5770 			CTLFLAG_RD, &adapter->stats.rfc,
5771 			"Fragmented Packets Received ");
5772 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5773 			CTLFLAG_RD, &adapter->stats.roc,
5774 			"Oversized Packets Received");
5775 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5776 			CTLFLAG_RD, &adapter->stats.rjc,
5777 			"Recevied Jabber");
5778 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5779 			CTLFLAG_RD, &adapter->stats.rxerrc,
5780 			"Receive Errors");
5781 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5782 			CTLFLAG_RD, &adapter->stats.crcerrs,
5783 			"CRC errors");
5784 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5785 			CTLFLAG_RD, &adapter->stats.algnerrc,
5786 			"Alignment Errors");
5787 	/* On 82575 these are collision counts */
5788 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5789 			CTLFLAG_RD, &adapter->stats.cexterr,
5790 			"Collision/Carrier extension errors");
5791 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5792 			CTLFLAG_RD, &adapter->stats.xonrxc,
5793 			"XON Received");
5794 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5795 			CTLFLAG_RD, &adapter->stats.xontxc,
5796 			"XON Transmitted");
5797 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5798 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5799 			"XOFF Received");
5800 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5801 			CTLFLAG_RD, &adapter->stats.xofftxc,
5802 			"XOFF Transmitted");
5803 
5804 	/* Packet Reception Stats */
5805 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5806 			CTLFLAG_RD, &adapter->stats.tpr,
5807 			"Total Packets Received ");
5808 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5809 			CTLFLAG_RD, &adapter->stats.gprc,
5810 			"Good Packets Received");
5811 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5812 			CTLFLAG_RD, &adapter->stats.bprc,
5813 			"Broadcast Packets Received");
5814 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5815 			CTLFLAG_RD, &adapter->stats.mprc,
5816 			"Multicast Packets Received");
5817 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5818 			CTLFLAG_RD, &adapter->stats.prc64,
5819 			"64 byte frames received ");
5820 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5821 			CTLFLAG_RD, &adapter->stats.prc127,
5822 			"65-127 byte frames received");
5823 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5824 			CTLFLAG_RD, &adapter->stats.prc255,
5825 			"128-255 byte frames received");
5826 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5827 			CTLFLAG_RD, &adapter->stats.prc511,
5828 			"256-511 byte frames received");
5829 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5830 			CTLFLAG_RD, &adapter->stats.prc1023,
5831 			"512-1023 byte frames received");
5832 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5833 			CTLFLAG_RD, &adapter->stats.prc1522,
5834 			"1023-1522 byte frames received");
5835  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5836  			CTLFLAG_RD, &adapter->stats.gorc,
5837  			"Good Octets Received");
5838 
5839 	/* Packet Transmission Stats */
5840  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5841  			CTLFLAG_RD, &adapter->stats.gotc,
5842  			"Good Octets Transmitted");
5843 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5844 			CTLFLAG_RD, &adapter->stats.tpt,
5845 			"Total Packets Transmitted");
5846 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5847 			CTLFLAG_RD, &adapter->stats.gptc,
5848 			"Good Packets Transmitted");
5849 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5850 			CTLFLAG_RD, &adapter->stats.bptc,
5851 			"Broadcast Packets Transmitted");
5852 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5853 			CTLFLAG_RD, &adapter->stats.mptc,
5854 			"Multicast Packets Transmitted");
5855 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5856 			CTLFLAG_RD, &adapter->stats.ptc64,
5857 			"64 byte frames transmitted ");
5858 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5859 			CTLFLAG_RD, &adapter->stats.ptc127,
5860 			"65-127 byte frames transmitted");
5861 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5862 			CTLFLAG_RD, &adapter->stats.ptc255,
5863 			"128-255 byte frames transmitted");
5864 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5865 			CTLFLAG_RD, &adapter->stats.ptc511,
5866 			"256-511 byte frames transmitted");
5867 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5868 			CTLFLAG_RD, &adapter->stats.ptc1023,
5869 			"512-1023 byte frames transmitted");
5870 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5871 			CTLFLAG_RD, &adapter->stats.ptc1522,
5872 			"1024-1522 byte frames transmitted");
5873 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5874 			CTLFLAG_RD, &adapter->stats.tsctc,
5875 			"TSO Contexts Transmitted");
5876 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5877 			CTLFLAG_RD, &adapter->stats.tsctfc,
5878 			"TSO Contexts Failed");
5879 
5880 
5881 	/* Interrupt Stats */
5882 
5883 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5884 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5885 	int_list = SYSCTL_CHILDREN(int_node);
5886 
5887 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5888 			CTLFLAG_RD, &adapter->stats.iac,
5889 			"Interrupt Assertion Count");
5890 
5891 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5892 			CTLFLAG_RD, &adapter->stats.icrxptc,
5893 			"Interrupt Cause Rx Pkt Timer Expire Count");
5894 
5895 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5896 			CTLFLAG_RD, &adapter->stats.icrxatc,
5897 			"Interrupt Cause Rx Abs Timer Expire Count");
5898 
5899 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5900 			CTLFLAG_RD, &adapter->stats.ictxptc,
5901 			"Interrupt Cause Tx Pkt Timer Expire Count");
5902 
5903 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5904 			CTLFLAG_RD, &adapter->stats.ictxatc,
5905 			"Interrupt Cause Tx Abs Timer Expire Count");
5906 
5907 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5908 			CTLFLAG_RD, &adapter->stats.ictxqec,
5909 			"Interrupt Cause Tx Queue Empty Count");
5910 
5911 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5912 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5913 			"Interrupt Cause Tx Queue Min Thresh Count");
5914 
5915 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5916 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5917 			"Interrupt Cause Rx Desc Min Thresh Count");
5918 
5919 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5920 			CTLFLAG_RD, &adapter->stats.icrxoc,
5921 			"Interrupt Cause Receiver Overrun Count");
5922 }
5923 
5924 /**********************************************************************
5925  *
5926  *  This routine provides a way to dump out the adapter eeprom,
5927  *  often a useful debug/service tool. This only dumps the first
5928  *  32 words, stuff that matters is in that extent.
5929  *
5930  **********************************************************************/
5931 static int
5932 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5933 {
5934 	struct adapter *adapter = (struct adapter *)arg1;
5935 	int error;
5936 	int result;
5937 
5938 	result = -1;
5939 	error = sysctl_handle_int(oidp, &result, 0, req);
5940 
5941 	if (error || !req->newptr)
5942 		return (error);
5943 
5944 	/*
5945 	 * This value will cause a hex dump of the
5946 	 * first 32 16-bit words of the EEPROM to
5947 	 * the screen.
5948 	 */
5949 	if (result == 1)
5950 		em_print_nvm_info(adapter);
5951 
5952 	return (error);
5953 }
5954 
5955 static void
5956 em_print_nvm_info(struct adapter *adapter)
5957 {
5958 	u16	eeprom_data;
5959 	int	i, j, row = 0;
5960 
5961 	/* Its a bit crude, but it gets the job done */
5962 	printf("\nInterface EEPROM Dump:\n");
5963 	printf("Offset\n0x0000  ");
5964 	for (i = 0, j = 0; i < 32; i++, j++) {
5965 		if (j == 8) { /* Make the offset block */
5966 			j = 0; ++row;
5967 			printf("\n0x00%x0  ",row);
5968 		}
5969 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5970 		printf("%04x ", eeprom_data);
5971 	}
5972 	printf("\n");
5973 }
5974 
5975 static int
5976 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5977 {
5978 	struct em_int_delay_info *info;
5979 	struct adapter *adapter;
5980 	u32 regval;
5981 	int error, usecs, ticks;
5982 
5983 	info = (struct em_int_delay_info *)arg1;
5984 	usecs = info->value;
5985 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5986 	if (error != 0 || req->newptr == NULL)
5987 		return (error);
5988 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5989 		return (EINVAL);
5990 	info->value = usecs;
5991 	ticks = EM_USECS_TO_TICKS(usecs);
5992 	if (info->offset == E1000_ITR)	/* units are 256ns here */
5993 		ticks *= 4;
5994 
5995 	adapter = info->adapter;
5996 
5997 	EM_CORE_LOCK(adapter);
5998 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5999 	regval = (regval & ~0xffff) | (ticks & 0xffff);
6000 	/* Handle a few special cases. */
6001 	switch (info->offset) {
6002 	case E1000_RDTR:
6003 		break;
6004 	case E1000_TIDV:
6005 		if (ticks == 0) {
6006 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
6007 			/* Don't write 0 into the TIDV register. */
6008 			regval++;
6009 		} else
6010 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
6011 		break;
6012 	}
6013 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
6014 	EM_CORE_UNLOCK(adapter);
6015 	return (0);
6016 }
6017 
6018 static void
6019 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
6020 	const char *description, struct em_int_delay_info *info,
6021 	int offset, int value)
6022 {
6023 	info->adapter = adapter;
6024 	info->offset = offset;
6025 	info->value = value;
6026 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
6027 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6028 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
6029 	    info, 0, em_sysctl_int_delay, "I", description);
6030 }
6031 
6032 static void
6033 em_set_sysctl_value(struct adapter *adapter, const char *name,
6034 	const char *description, int *limit, int value)
6035 {
6036 	*limit = value;
6037 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6038 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6039 	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6040 }
6041 
6042 
6043 /*
6044 ** Set flow control using sysctl:
6045 ** Flow control values:
6046 **      0 - off
6047 **      1 - rx pause
6048 **      2 - tx pause
6049 **      3 - full
6050 */
6051 static int
6052 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
6053 {
6054         int		error;
6055 	static int	input = 3; /* default is full */
6056         struct adapter	*adapter = (struct adapter *) arg1;
6057 
6058         error = sysctl_handle_int(oidp, &input, 0, req);
6059 
6060         if ((error) || (req->newptr == NULL))
6061                 return (error);
6062 
6063 	if (input == adapter->fc) /* no change? */
6064 		return (error);
6065 
6066         switch (input) {
6067                 case e1000_fc_rx_pause:
6068                 case e1000_fc_tx_pause:
6069                 case e1000_fc_full:
6070                 case e1000_fc_none:
6071                         adapter->hw.fc.requested_mode = input;
6072 			adapter->fc = input;
6073                         break;
6074                 default:
6075 			/* Do nothing */
6076 			return (error);
6077         }
6078 
6079         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6080         e1000_force_mac_fc(&adapter->hw);
6081         return (error);
6082 }
6083 
6084 /*
6085 ** Manage Energy Efficient Ethernet:
6086 ** Control values:
6087 **     0/1 - enabled/disabled
6088 */
6089 static int
6090 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
6091 {
6092        struct adapter *adapter = (struct adapter *) arg1;
6093        int             error, value;
6094 
6095        value = adapter->hw.dev_spec.ich8lan.eee_disable;
6096        error = sysctl_handle_int(oidp, &value, 0, req);
6097        if (error || req->newptr == NULL)
6098                return (error);
6099        EM_CORE_LOCK(adapter);
6100        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
6101        em_init_locked(adapter);
6102        EM_CORE_UNLOCK(adapter);
6103        return (0);
6104 }
6105 
6106 static int
6107 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
6108 {
6109 	struct adapter *adapter;
6110 	int error;
6111 	int result;
6112 
6113 	result = -1;
6114 	error = sysctl_handle_int(oidp, &result, 0, req);
6115 
6116 	if (error || !req->newptr)
6117 		return (error);
6118 
6119 	if (result == 1) {
6120 		adapter = (struct adapter *)arg1;
6121 		em_print_debug_info(adapter);
6122         }
6123 
6124 	return (error);
6125 }
6126 
6127 /*
6128 ** This routine is meant to be fluid, add whatever is
6129 ** needed for debugging a problem.  -jfv
6130 */
6131 static void
6132 em_print_debug_info(struct adapter *adapter)
6133 {
6134 	device_t dev = adapter->dev;
6135 	struct tx_ring *txr = adapter->tx_rings;
6136 	struct rx_ring *rxr = adapter->rx_rings;
6137 
6138 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)
6139 		printf("Interface is RUNNING ");
6140 	else
6141 		printf("Interface is NOT RUNNING\n");
6142 
6143 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE)
6144 		printf("and INACTIVE\n");
6145 	else
6146 		printf("and ACTIVE\n");
6147 
6148 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
6149 		device_printf(dev, "TX Queue %d ------\n", i);
6150 		device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
6151 	    		E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
6152 	    		E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
6153 		device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
6154 		device_printf(dev, "TX descriptors avail = %d\n",
6155 	    		txr->tx_avail);
6156 		device_printf(dev, "Tx Descriptors avail failure = %ld\n",
6157 	    		txr->no_desc_avail);
6158 		device_printf(dev, "RX Queue %d ------\n", i);
6159 		device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
6160 	    		E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
6161 	    		E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
6162 		device_printf(dev, "RX discarded packets = %ld\n",
6163 	    		rxr->rx_discarded);
6164 		device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
6165 		device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
6166 	}
6167 }
6168 
6169 #ifdef EM_MULTIQUEUE
6170 /*
6171  * 82574 only:
6172  * Write a new value to the EEPROM increasing the number of MSIX
6173  * vectors from 3 to 5, for proper multiqueue support.
6174  */
6175 static void
6176 em_enable_vectors_82574(struct adapter *adapter)
6177 {
6178 	struct e1000_hw *hw = &adapter->hw;
6179 	device_t dev = adapter->dev;
6180 	u16 edata;
6181 
6182 	e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6183 	printf("Current cap: %#06x\n", edata);
6184 	if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
6185 		device_printf(dev, "Writing to eeprom: increasing "
6186 		    "reported MSIX vectors from 3 to 5...\n");
6187 		edata &= ~(EM_NVM_MSIX_N_MASK);
6188 		edata |= 4 << EM_NVM_MSIX_N_SHIFT;
6189 		e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6190 		e1000_update_nvm_checksum(hw);
6191 		device_printf(dev, "Writing to eeprom: done\n");
6192 	}
6193 }
6194 #endif
6195 
6196 #ifdef DDB
6197 DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
6198 {
6199 	devclass_t	dc;
6200 	int max_em;
6201 
6202 	dc = devclass_find("em");
6203 	max_em = devclass_get_maxunit(dc);
6204 
6205 	for (int index = 0; index < (max_em - 1); index++) {
6206 		device_t dev;
6207 		dev = devclass_get_device(dc, index);
6208 		if (device_get_driver(dev) == &em_driver) {
6209 			struct adapter *adapter = device_get_softc(dev);
6210 			EM_CORE_LOCK(adapter);
6211 			em_init_locked(adapter);
6212 			EM_CORE_UNLOCK(adapter);
6213 		}
6214 	}
6215 }
6216 DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6217 {
6218 	devclass_t	dc;
6219 	int max_em;
6220 
6221 	dc = devclass_find("em");
6222 	max_em = devclass_get_maxunit(dc);
6223 
6224 	for (int index = 0; index < (max_em - 1); index++) {
6225 		device_t dev;
6226 		dev = devclass_get_device(dc, index);
6227 		if (device_get_driver(dev) == &em_driver)
6228 			em_print_debug_info(device_get_softc(dev));
6229 	}
6230 
6231 }
6232 #endif
6233