xref: /freebsd/sys/dev/e1000/if_em.c (revision f4dc9bf43457515e5c88d1400d4f5ff70a82d9c7)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2015, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #include "opt_em.h"
36 #include "opt_ddb.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #endif
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #ifdef DDB
47 #include <sys/types.h>
48 #include <ddb/ddb.h>
49 #endif
50 #if __FreeBSD_version >= 800000
51 #include <sys/buf_ring.h>
52 #endif
53 #include <sys/bus.h>
54 #include <sys/endian.h>
55 #include <sys/kernel.h>
56 #include <sys/kthread.h>
57 #include <sys/malloc.h>
58 #include <sys/mbuf.h>
59 #include <sys/module.h>
60 #include <sys/rman.h>
61 #include <sys/smp.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/sysctl.h>
65 #include <sys/taskqueue.h>
66 #include <sys/eventhandler.h>
67 #include <machine/bus.h>
68 #include <machine/resource.h>
69 
70 #include <net/bpf.h>
71 #include <net/ethernet.h>
72 #include <net/if.h>
73 #include <net/if_var.h>
74 #include <net/if_arp.h>
75 #include <net/if_dl.h>
76 #include <net/if_media.h>
77 
78 #include <net/if_types.h>
79 #include <net/if_vlan_var.h>
80 
81 #include <netinet/in_systm.h>
82 #include <netinet/in.h>
83 #include <netinet/if_ether.h>
84 #include <netinet/ip.h>
85 #include <netinet/ip6.h>
86 #include <netinet/tcp.h>
87 #include <netinet/udp.h>
88 
89 #include <machine/in_cksum.h>
90 #include <dev/led/led.h>
91 #include <dev/pci/pcivar.h>
92 #include <dev/pci/pcireg.h>
93 
94 #include "e1000_api.h"
95 #include "e1000_82571.h"
96 #include "if_em.h"
97 
98 /*********************************************************************
99  *  Driver version:
100  *********************************************************************/
101 char em_driver_version[] = "7.6.1-k";
102 
103 /*********************************************************************
104  *  PCI Device ID Table
105  *
106  *  Used by probe to select devices to load on
107  *  Last field stores an index into e1000_strings
108  *  Last entry must be all 0s
109  *
110  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
111  *********************************************************************/
112 
113 static em_vendor_info_t em_vendor_info_array[] =
114 {
115 	/* Intel(R) PRO/1000 Network Connection */
116 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
117 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
118 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
119 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
120 						PCI_ANY_ID, PCI_ANY_ID, 0},
121 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
122 						PCI_ANY_ID, PCI_ANY_ID, 0},
123 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
124 						PCI_ANY_ID, PCI_ANY_ID, 0},
125 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
126 						PCI_ANY_ID, PCI_ANY_ID, 0},
127 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
128 						PCI_ANY_ID, PCI_ANY_ID, 0},
129 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
130 						PCI_ANY_ID, PCI_ANY_ID, 0},
131 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
132 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
134 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
135 
136 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
137 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
138 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
139 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
140 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
141 						PCI_ANY_ID, PCI_ANY_ID, 0},
142 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
143 						PCI_ANY_ID, PCI_ANY_ID, 0},
144 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
145 						PCI_ANY_ID, PCI_ANY_ID, 0},
146 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
147 						PCI_ANY_ID, PCI_ANY_ID, 0},
148 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
150 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
152 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
175 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
176 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
177 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
178 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
179 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
180 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
181 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
182 						PCI_ANY_ID, PCI_ANY_ID, 0},
183 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
184 						PCI_ANY_ID, PCI_ANY_ID, 0},
185 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM2,	PCI_ANY_ID, PCI_ANY_ID, 0},
186 	{ 0x8086, E1000_DEV_ID_PCH_I218_V2,	PCI_ANY_ID, PCI_ANY_ID, 0},
187 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM3,	PCI_ANY_ID, PCI_ANY_ID, 0},
188 	{ 0x8086, E1000_DEV_ID_PCH_I218_V3,	PCI_ANY_ID, PCI_ANY_ID, 0},
189 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
190 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
191 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2,
192                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
193 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
194 	{ 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3,
195 						PCI_ANY_ID, PCI_ANY_ID, 0},
196 	/* required last entry */
197 	{ 0, 0, 0, 0, 0}
198 };
199 
200 /*********************************************************************
201  *  Table of branding strings for all supported NICs.
202  *********************************************************************/
203 
204 static char *em_strings[] = {
205 	"Intel(R) PRO/1000 Network Connection"
206 };
207 
208 /*********************************************************************
209  *  Function prototypes
210  *********************************************************************/
211 static int	em_probe(device_t);
212 static int	em_attach(device_t);
213 static int	em_detach(device_t);
214 static int	em_shutdown(device_t);
215 static int	em_suspend(device_t);
216 static int	em_resume(device_t);
217 #ifdef EM_MULTIQUEUE
218 static int	em_mq_start(if_t, struct mbuf *);
219 static int	em_mq_start_locked(if_t,
220 		    struct tx_ring *);
221 static void	em_qflush(if_t);
222 #else
223 static void	em_start(if_t);
224 static void	em_start_locked(if_t, struct tx_ring *);
225 #endif
226 static int	em_ioctl(if_t, u_long, caddr_t);
227 static uint64_t	em_get_counter(if_t, ift_counter);
228 static void	em_init(void *);
229 static void	em_init_locked(struct adapter *);
230 static void	em_stop(void *);
231 static void	em_media_status(if_t, struct ifmediareq *);
232 static int	em_media_change(if_t);
233 static void	em_identify_hardware(struct adapter *);
234 static int	em_allocate_pci_resources(struct adapter *);
235 static int	em_allocate_legacy(struct adapter *);
236 static int	em_allocate_msix(struct adapter *);
237 static int	em_allocate_queues(struct adapter *);
238 static int	em_setup_msix(struct adapter *);
239 static void	em_free_pci_resources(struct adapter *);
240 static void	em_local_timer(void *);
241 static void	em_reset(struct adapter *);
242 static int	em_setup_interface(device_t, struct adapter *);
243 static void	em_flush_desc_rings(struct adapter *);
244 
245 static void	em_setup_transmit_structures(struct adapter *);
246 static void	em_initialize_transmit_unit(struct adapter *);
247 static int	em_allocate_transmit_buffers(struct tx_ring *);
248 static void	em_free_transmit_structures(struct adapter *);
249 static void	em_free_transmit_buffers(struct tx_ring *);
250 
251 static int	em_setup_receive_structures(struct adapter *);
252 static int	em_allocate_receive_buffers(struct rx_ring *);
253 static void	em_initialize_receive_unit(struct adapter *);
254 static void	em_free_receive_structures(struct adapter *);
255 static void	em_free_receive_buffers(struct rx_ring *);
256 
257 static void	em_enable_intr(struct adapter *);
258 static void	em_disable_intr(struct adapter *);
259 static void	em_update_stats_counters(struct adapter *);
260 static void	em_add_hw_stats(struct adapter *adapter);
261 static void	em_txeof(struct tx_ring *);
262 static bool	em_rxeof(struct rx_ring *, int, int *);
263 #ifndef __NO_STRICT_ALIGNMENT
264 static int	em_fixup_rx(struct rx_ring *);
265 #endif
266 static void	em_setup_rxdesc(union e1000_rx_desc_extended *,
267 		    const struct em_rxbuffer *rxbuf);
268 static void	em_receive_checksum(uint32_t status, struct mbuf *);
269 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
270 		    struct ip *, u32 *, u32 *);
271 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
272 		    struct tcphdr *, u32 *, u32 *);
273 static void	em_set_promisc(struct adapter *);
274 static void	em_disable_promisc(struct adapter *);
275 static void	em_set_multi(struct adapter *);
276 static void	em_update_link_status(struct adapter *);
277 static void	em_refresh_mbufs(struct rx_ring *, int);
278 static void	em_register_vlan(void *, if_t, u16);
279 static void	em_unregister_vlan(void *, if_t, u16);
280 static void	em_setup_vlan_hw_support(struct adapter *);
281 static int	em_xmit(struct tx_ring *, struct mbuf **);
282 static int	em_dma_malloc(struct adapter *, bus_size_t,
283 		    struct em_dma_alloc *, int);
284 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
285 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
286 static void	em_print_nvm_info(struct adapter *);
287 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
288 static void	em_print_debug_info(struct adapter *);
289 static int 	em_is_valid_ether_addr(u8 *);
290 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
291 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
292 		    const char *, struct em_int_delay_info *, int, int);
293 /* Management and WOL Support */
294 static void	em_init_manageability(struct adapter *);
295 static void	em_release_manageability(struct adapter *);
296 static void     em_get_hw_control(struct adapter *);
297 static void     em_release_hw_control(struct adapter *);
298 static void	em_get_wakeup(device_t);
299 static void     em_enable_wakeup(device_t);
300 static int	em_enable_phy_wakeup(struct adapter *);
301 static void	em_led_func(void *, int);
302 static void	em_disable_aspm(struct adapter *);
303 
304 static int	em_irq_fast(void *);
305 
306 /* MSIX handlers */
307 static void	em_msix_tx(void *);
308 static void	em_msix_rx(void *);
309 static void	em_msix_link(void *);
310 static void	em_handle_tx(void *context, int pending);
311 static void	em_handle_rx(void *context, int pending);
312 static void	em_handle_link(void *context, int pending);
313 
314 #ifdef EM_MULTIQUEUE
315 static void	em_enable_vectors_82574(struct adapter *);
316 #endif
317 
318 static void	em_set_sysctl_value(struct adapter *, const char *,
319 		    const char *, int *, int);
320 static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
321 static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
322 
323 static __inline void em_rx_discard(struct rx_ring *, int);
324 
325 #ifdef DEVICE_POLLING
326 static poll_handler_t em_poll;
327 #endif /* POLLING */
328 
329 /*********************************************************************
330  *  FreeBSD Device Interface Entry Points
331  *********************************************************************/
332 
333 static device_method_t em_methods[] = {
334 	/* Device interface */
335 	DEVMETHOD(device_probe, em_probe),
336 	DEVMETHOD(device_attach, em_attach),
337 	DEVMETHOD(device_detach, em_detach),
338 	DEVMETHOD(device_shutdown, em_shutdown),
339 	DEVMETHOD(device_suspend, em_suspend),
340 	DEVMETHOD(device_resume, em_resume),
341 	DEVMETHOD_END
342 };
343 
344 static driver_t em_driver = {
345 	"em", em_methods, sizeof(struct adapter),
346 };
347 
348 devclass_t em_devclass;
349 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
350 MODULE_DEPEND(em, pci, 1, 1, 1);
351 MODULE_DEPEND(em, ether, 1, 1, 1);
352 #ifdef DEV_NETMAP
353 MODULE_DEPEND(em, netmap, 1, 1, 1);
354 #endif /* DEV_NETMAP */
355 
356 /*********************************************************************
357  *  Tunable default values.
358  *********************************************************************/
359 
360 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
361 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
362 #define M_TSO_LEN			66
363 
364 #define MAX_INTS_PER_SEC	8000
365 #define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
366 
367 /* Allow common code without TSO */
368 #ifndef CSUM_TSO
369 #define CSUM_TSO	0
370 #endif
371 
372 #define TSO_WORKAROUND	4
373 
374 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
375 
376 static int em_disable_crc_stripping = 0;
377 SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
378     &em_disable_crc_stripping, 0, "Disable CRC Stripping");
379 
380 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
381 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
382 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
383     0, "Default transmit interrupt delay in usecs");
384 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
385     0, "Default receive interrupt delay in usecs");
386 
387 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
388 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
389 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
390     &em_tx_abs_int_delay_dflt, 0,
391     "Default transmit interrupt delay limit in usecs");
392 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
393     &em_rx_abs_int_delay_dflt, 0,
394     "Default receive interrupt delay limit in usecs");
395 
396 static int em_rxd = EM_DEFAULT_RXD;
397 static int em_txd = EM_DEFAULT_TXD;
398 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
399     "Number of receive descriptors per queue");
400 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
401     "Number of transmit descriptors per queue");
402 
403 static int em_smart_pwr_down = FALSE;
404 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
405     0, "Set to true to leave smart power down enabled on newer adapters");
406 
407 /* Controls whether promiscuous also shows bad packets */
408 static int em_debug_sbp = FALSE;
409 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
410     "Show bad packets in promiscuous mode");
411 
412 static int em_enable_msix = TRUE;
413 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
414     "Enable MSI-X interrupts");
415 
416 #ifdef EM_MULTIQUEUE
417 static int em_num_queues = 1;
418 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
419     "82574 only: Number of queues to configure, 0 indicates autoconfigure");
420 #endif
421 
422 /*
423 ** Global variable to store last used CPU when binding queues
424 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
425 ** queue is bound to a cpu.
426 */
427 static int em_last_bind_cpu = -1;
428 
429 /* How many packets rxeof tries to clean at a time */
430 static int em_rx_process_limit = 100;
431 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
432     &em_rx_process_limit, 0,
433     "Maximum number of received packets to process "
434     "at a time, -1 means unlimited");
435 
436 /* Energy efficient ethernet - default to OFF */
437 static int eee_setting = 1;
438 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
439     "Enable Energy Efficient Ethernet");
440 
441 /* Global used in WOL setup with multiport cards */
442 static int global_quad_port_a = 0;
443 
444 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
445 #include <dev/netmap/if_em_netmap.h>
446 #endif /* DEV_NETMAP */
447 
448 /*********************************************************************
449  *  Device identification routine
450  *
451  *  em_probe determines if the driver should be loaded on
452  *  adapter based on PCI vendor/device id of the adapter.
453  *
454  *  return BUS_PROBE_DEFAULT on success, positive on failure
455  *********************************************************************/
456 
457 static int
458 em_probe(device_t dev)
459 {
460 	char		adapter_name[60];
461 	uint16_t	pci_vendor_id = 0;
462 	uint16_t	pci_device_id = 0;
463 	uint16_t	pci_subvendor_id = 0;
464 	uint16_t	pci_subdevice_id = 0;
465 	em_vendor_info_t *ent;
466 
467 	INIT_DEBUGOUT("em_probe: begin");
468 
469 	pci_vendor_id = pci_get_vendor(dev);
470 	if (pci_vendor_id != EM_VENDOR_ID)
471 		return (ENXIO);
472 
473 	pci_device_id = pci_get_device(dev);
474 	pci_subvendor_id = pci_get_subvendor(dev);
475 	pci_subdevice_id = pci_get_subdevice(dev);
476 
477 	ent = em_vendor_info_array;
478 	while (ent->vendor_id != 0) {
479 		if ((pci_vendor_id == ent->vendor_id) &&
480 		    (pci_device_id == ent->device_id) &&
481 
482 		    ((pci_subvendor_id == ent->subvendor_id) ||
483 		    (ent->subvendor_id == PCI_ANY_ID)) &&
484 
485 		    ((pci_subdevice_id == ent->subdevice_id) ||
486 		    (ent->subdevice_id == PCI_ANY_ID))) {
487 			sprintf(adapter_name, "%s %s",
488 				em_strings[ent->index],
489 				em_driver_version);
490 			device_set_desc_copy(dev, adapter_name);
491 			return (BUS_PROBE_DEFAULT);
492 		}
493 		ent++;
494 	}
495 
496 	return (ENXIO);
497 }
498 
499 /*********************************************************************
500  *  Device initialization routine
501  *
502  *  The attach entry point is called when the driver is being loaded.
503  *  This routine identifies the type of hardware, allocates all resources
504  *  and initializes the hardware.
505  *
506  *  return 0 on success, positive on failure
507  *********************************************************************/
508 
509 static int
510 em_attach(device_t dev)
511 {
512 	struct adapter	*adapter;
513 	struct e1000_hw	*hw;
514 	int		error = 0;
515 
516 	INIT_DEBUGOUT("em_attach: begin");
517 
518 	if (resource_disabled("em", device_get_unit(dev))) {
519 		device_printf(dev, "Disabled by device hint\n");
520 		return (ENXIO);
521 	}
522 
523 	adapter = device_get_softc(dev);
524 	adapter->dev = adapter->osdep.dev = dev;
525 	hw = &adapter->hw;
526 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
527 
528 	/* SYSCTL stuff */
529 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
530 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
531 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
532 	    em_sysctl_nvm_info, "I", "NVM Information");
533 
534 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
535 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
536 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
537 	    em_sysctl_debug_info, "I", "Debug Information");
538 
539 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
540 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
541 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
542 	    em_set_flowcntl, "I", "Flow Control");
543 
544 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
545 
546 	/* Determine hardware and mac info */
547 	em_identify_hardware(adapter);
548 
549 	/* Setup PCI resources */
550 	if (em_allocate_pci_resources(adapter)) {
551 		device_printf(dev, "Allocation of PCI resources failed\n");
552 		error = ENXIO;
553 		goto err_pci;
554 	}
555 
556 	/*
557 	** For ICH8 and family we need to
558 	** map the flash memory, and this
559 	** must happen after the MAC is
560 	** identified
561 	*/
562 	if ((hw->mac.type == e1000_ich8lan) ||
563 	    (hw->mac.type == e1000_ich9lan) ||
564 	    (hw->mac.type == e1000_ich10lan) ||
565 	    (hw->mac.type == e1000_pchlan) ||
566 	    (hw->mac.type == e1000_pch2lan) ||
567 	    (hw->mac.type == e1000_pch_lpt)) {
568 		int rid = EM_BAR_TYPE_FLASH;
569 		adapter->flash = bus_alloc_resource_any(dev,
570 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
571 		if (adapter->flash == NULL) {
572 			device_printf(dev, "Mapping of Flash failed\n");
573 			error = ENXIO;
574 			goto err_pci;
575 		}
576 		/* This is used in the shared code */
577 		hw->flash_address = (u8 *)adapter->flash;
578 		adapter->osdep.flash_bus_space_tag =
579 		    rman_get_bustag(adapter->flash);
580 		adapter->osdep.flash_bus_space_handle =
581 		    rman_get_bushandle(adapter->flash);
582 	}
583 	/*
584 	** In the new SPT device flash is not  a
585 	** separate BAR, rather it is also in BAR0,
586 	** so use the same tag and an offset handle for the
587 	** FLASH read/write macros in the shared code.
588 	*/
589 	else if (hw->mac.type == e1000_pch_spt) {
590 		adapter->osdep.flash_bus_space_tag =
591 		    adapter->osdep.mem_bus_space_tag;
592 		adapter->osdep.flash_bus_space_handle =
593 		    adapter->osdep.mem_bus_space_handle
594 		    + E1000_FLASH_BASE_ADDR;
595 	}
596 
597 	/* Do Shared Code initialization */
598 	error = e1000_setup_init_funcs(hw, TRUE);
599 	if (error) {
600 		device_printf(dev, "Setup of Shared code failed, error %d\n",
601 		    error);
602 		error = ENXIO;
603 		goto err_pci;
604 	}
605 
606 	/*
607 	 * Setup MSI/X or MSI if PCI Express
608 	 */
609 	adapter->msix = em_setup_msix(adapter);
610 
611 	e1000_get_bus_info(hw);
612 
613 	/* Set up some sysctls for the tunable interrupt delays */
614 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
615 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
616 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
617 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
618 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
619 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
620 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
621 	    "receive interrupt delay limit in usecs",
622 	    &adapter->rx_abs_int_delay,
623 	    E1000_REGISTER(hw, E1000_RADV),
624 	    em_rx_abs_int_delay_dflt);
625 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
626 	    "transmit interrupt delay limit in usecs",
627 	    &adapter->tx_abs_int_delay,
628 	    E1000_REGISTER(hw, E1000_TADV),
629 	    em_tx_abs_int_delay_dflt);
630 	em_add_int_delay_sysctl(adapter, "itr",
631 	    "interrupt delay limit in usecs/4",
632 	    &adapter->tx_itr,
633 	    E1000_REGISTER(hw, E1000_ITR),
634 	    DEFAULT_ITR);
635 
636 	/* Sysctl for limiting the amount of work done in the taskqueue */
637 	em_set_sysctl_value(adapter, "rx_processing_limit",
638 	    "max number of rx packets to process", &adapter->rx_process_limit,
639 	    em_rx_process_limit);
640 
641 	/*
642 	 * Validate number of transmit and receive descriptors. It
643 	 * must not exceed hardware maximum, and must be multiple
644 	 * of E1000_DBA_ALIGN.
645 	 */
646 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
647 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
648 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
649 		    EM_DEFAULT_TXD, em_txd);
650 		adapter->num_tx_desc = EM_DEFAULT_TXD;
651 	} else
652 		adapter->num_tx_desc = em_txd;
653 
654 	if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 ||
655 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
656 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
657 		    EM_DEFAULT_RXD, em_rxd);
658 		adapter->num_rx_desc = EM_DEFAULT_RXD;
659 	} else
660 		adapter->num_rx_desc = em_rxd;
661 
662 	hw->mac.autoneg = DO_AUTO_NEG;
663 	hw->phy.autoneg_wait_to_complete = FALSE;
664 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
665 
666 	/* Copper options */
667 	if (hw->phy.media_type == e1000_media_type_copper) {
668 		hw->phy.mdix = AUTO_ALL_MODES;
669 		hw->phy.disable_polarity_correction = FALSE;
670 		hw->phy.ms_type = EM_MASTER_SLAVE;
671 	}
672 
673 	/*
674 	 * Set the frame limits assuming
675 	 * standard ethernet sized frames.
676 	 */
677 	adapter->hw.mac.max_frame_size =
678 	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
679 
680 	/*
681 	 * This controls when hardware reports transmit completion
682 	 * status.
683 	 */
684 	hw->mac.report_tx_early = 1;
685 
686 	/*
687 	** Get queue/ring memory
688 	*/
689 	if (em_allocate_queues(adapter)) {
690 		error = ENOMEM;
691 		goto err_pci;
692 	}
693 
694 	/* Allocate multicast array memory. */
695 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
696 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
697 	if (adapter->mta == NULL) {
698 		device_printf(dev, "Can not allocate multicast setup array\n");
699 		error = ENOMEM;
700 		goto err_late;
701 	}
702 
703 	/* Check SOL/IDER usage */
704 	if (e1000_check_reset_block(hw))
705 		device_printf(dev, "PHY reset is blocked"
706 		    " due to SOL/IDER session.\n");
707 
708 	/* Sysctl for setting Energy Efficient Ethernet */
709 	hw->dev_spec.ich8lan.eee_disable = eee_setting;
710 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
711 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
712 	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
713 	    adapter, 0, em_sysctl_eee, "I",
714 	    "Disable Energy Efficient Ethernet");
715 
716 	/*
717 	** Start from a known state, this is
718 	** important in reading the nvm and
719 	** mac from that.
720 	*/
721 	e1000_reset_hw(hw);
722 
723 
724 	/* Make sure we have a good EEPROM before we read from it */
725 	if (e1000_validate_nvm_checksum(hw) < 0) {
726 		/*
727 		** Some PCI-E parts fail the first check due to
728 		** the link being in sleep state, call it again,
729 		** if it fails a second time its a real issue.
730 		*/
731 		if (e1000_validate_nvm_checksum(hw) < 0) {
732 			device_printf(dev,
733 			    "The EEPROM Checksum Is Not Valid\n");
734 			error = EIO;
735 			goto err_late;
736 		}
737 	}
738 
739 	/* Copy the permanent MAC address out of the EEPROM */
740 	if (e1000_read_mac_addr(hw) < 0) {
741 		device_printf(dev, "EEPROM read error while reading MAC"
742 		    " address\n");
743 		error = EIO;
744 		goto err_late;
745 	}
746 
747 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
748 		device_printf(dev, "Invalid MAC address\n");
749 		error = EIO;
750 		goto err_late;
751 	}
752 
753 	/* Disable ULP support */
754 	e1000_disable_ulp_lpt_lp(hw, TRUE);
755 
756 	/*
757 	**  Do interrupt configuration
758 	*/
759 	if (adapter->msix > 1) /* Do MSIX */
760 		error = em_allocate_msix(adapter);
761 	else  /* MSI or Legacy */
762 		error = em_allocate_legacy(adapter);
763 	if (error)
764 		goto err_late;
765 
766 	/*
767 	 * Get Wake-on-Lan and Management info for later use
768 	 */
769 	em_get_wakeup(dev);
770 
771 	/* Setup OS specific network interface */
772 	if (em_setup_interface(dev, adapter) != 0)
773 		goto err_late;
774 
775 	em_reset(adapter);
776 
777 	/* Initialize statistics */
778 	em_update_stats_counters(adapter);
779 
780 	hw->mac.get_link_status = 1;
781 	em_update_link_status(adapter);
782 
783 	/* Register for VLAN events */
784 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
785 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
786 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
787 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
788 
789 	em_add_hw_stats(adapter);
790 
791 	/* Non-AMT based hardware can now take control from firmware */
792 	if (adapter->has_manage && !adapter->has_amt)
793 		em_get_hw_control(adapter);
794 
795 	/* Tell the stack that the interface is not active */
796 	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
797 
798 	adapter->led_dev = led_create(em_led_func, adapter,
799 	    device_get_nameunit(dev));
800 #ifdef DEV_NETMAP
801 	em_netmap_attach(adapter);
802 #endif /* DEV_NETMAP */
803 
804 	INIT_DEBUGOUT("em_attach: end");
805 
806 	return (0);
807 
808 err_late:
809 	em_free_transmit_structures(adapter);
810 	em_free_receive_structures(adapter);
811 	em_release_hw_control(adapter);
812 	if (adapter->ifp != (void *)NULL)
813 		if_free(adapter->ifp);
814 err_pci:
815 	em_free_pci_resources(adapter);
816 	free(adapter->mta, M_DEVBUF);
817 	EM_CORE_LOCK_DESTROY(adapter);
818 
819 	return (error);
820 }
821 
822 /*********************************************************************
823  *  Device removal routine
824  *
825  *  The detach entry point is called when the driver is being removed.
826  *  This routine stops the adapter and deallocates all the resources
827  *  that were allocated for driver operation.
828  *
829  *  return 0 on success, positive on failure
830  *********************************************************************/
831 
832 static int
833 em_detach(device_t dev)
834 {
835 	struct adapter	*adapter = device_get_softc(dev);
836 	if_t ifp = adapter->ifp;
837 
838 	INIT_DEBUGOUT("em_detach: begin");
839 
840 	/* Make sure VLANS are not using driver */
841 	if (if_vlantrunkinuse(ifp)) {
842 		device_printf(dev,"Vlan in use, detach first\n");
843 		return (EBUSY);
844 	}
845 
846 #ifdef DEVICE_POLLING
847 	if (if_getcapenable(ifp) & IFCAP_POLLING)
848 		ether_poll_deregister(ifp);
849 #endif
850 
851 	if (adapter->led_dev != NULL)
852 		led_destroy(adapter->led_dev);
853 
854 	EM_CORE_LOCK(adapter);
855 	adapter->in_detach = 1;
856 	em_stop(adapter);
857 	EM_CORE_UNLOCK(adapter);
858 	EM_CORE_LOCK_DESTROY(adapter);
859 
860 	e1000_phy_hw_reset(&adapter->hw);
861 
862 	em_release_manageability(adapter);
863 	em_release_hw_control(adapter);
864 
865 	/* Unregister VLAN events */
866 	if (adapter->vlan_attach != NULL)
867 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
868 	if (adapter->vlan_detach != NULL)
869 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
870 
871 	ether_ifdetach(adapter->ifp);
872 	callout_drain(&adapter->timer);
873 
874 #ifdef DEV_NETMAP
875 	netmap_detach(ifp);
876 #endif /* DEV_NETMAP */
877 
878 	em_free_pci_resources(adapter);
879 	bus_generic_detach(dev);
880 	if_free(ifp);
881 
882 	em_free_transmit_structures(adapter);
883 	em_free_receive_structures(adapter);
884 
885 	em_release_hw_control(adapter);
886 	free(adapter->mta, M_DEVBUF);
887 
888 	return (0);
889 }
890 
891 /*********************************************************************
892  *
893  *  Shutdown entry point
894  *
895  **********************************************************************/
896 
897 static int
898 em_shutdown(device_t dev)
899 {
900 	return em_suspend(dev);
901 }
902 
903 /*
904  * Suspend/resume device methods.
905  */
906 static int
907 em_suspend(device_t dev)
908 {
909 	struct adapter *adapter = device_get_softc(dev);
910 
911 	EM_CORE_LOCK(adapter);
912 
913         em_release_manageability(adapter);
914 	em_release_hw_control(adapter);
915 	em_enable_wakeup(dev);
916 
917 	EM_CORE_UNLOCK(adapter);
918 
919 	return bus_generic_suspend(dev);
920 }
921 
922 static int
923 em_resume(device_t dev)
924 {
925 	struct adapter *adapter = device_get_softc(dev);
926 	struct tx_ring	*txr = adapter->tx_rings;
927 	if_t ifp = adapter->ifp;
928 
929 	EM_CORE_LOCK(adapter);
930 	if (adapter->hw.mac.type == e1000_pch2lan)
931 		e1000_resume_workarounds_pchlan(&adapter->hw);
932 	em_init_locked(adapter);
933 	em_init_manageability(adapter);
934 
935 	if ((if_getflags(ifp) & IFF_UP) &&
936 	    (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) {
937 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
938 			EM_TX_LOCK(txr);
939 #ifdef EM_MULTIQUEUE
940 			if (!drbr_empty(ifp, txr->br))
941 				em_mq_start_locked(ifp, txr);
942 #else
943 			if (!if_sendq_empty(ifp))
944 				em_start_locked(ifp, txr);
945 #endif
946 			EM_TX_UNLOCK(txr);
947 		}
948 	}
949 	EM_CORE_UNLOCK(adapter);
950 
951 	return bus_generic_resume(dev);
952 }
953 
954 
955 #ifndef EM_MULTIQUEUE
956 static void
957 em_start_locked(if_t ifp, struct tx_ring *txr)
958 {
959 	struct adapter	*adapter = if_getsoftc(ifp);
960 	struct mbuf	*m_head;
961 
962 	EM_TX_LOCK_ASSERT(txr);
963 
964 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
965 	    IFF_DRV_RUNNING)
966 		return;
967 
968 	if (!adapter->link_active)
969 		return;
970 
971 	while (!if_sendq_empty(ifp)) {
972         	/* Call cleanup if number of TX descriptors low */
973 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
974 			em_txeof(txr);
975 		if (txr->tx_avail < EM_MAX_SCATTER) {
976 			if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0);
977 			break;
978 		}
979 		m_head = if_dequeue(ifp);
980 		if (m_head == NULL)
981 			break;
982 		/*
983 		 *  Encapsulation can modify our pointer, and or make it
984 		 *  NULL on failure.  In that event, we can't requeue.
985 		 */
986 		if (em_xmit(txr, &m_head)) {
987 			if (m_head == NULL)
988 				break;
989 			if_sendq_prepend(ifp, m_head);
990 			break;
991 		}
992 
993 		/* Mark the queue as having work */
994 		if (txr->busy == EM_TX_IDLE)
995 			txr->busy = EM_TX_BUSY;
996 
997 		/* Send a copy of the frame to the BPF listener */
998 		ETHER_BPF_MTAP(ifp, m_head);
999 
1000 	}
1001 
1002 	return;
1003 }
1004 
1005 static void
1006 em_start(if_t ifp)
1007 {
1008 	struct adapter	*adapter = if_getsoftc(ifp);
1009 	struct tx_ring	*txr = adapter->tx_rings;
1010 
1011 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1012 		EM_TX_LOCK(txr);
1013 		em_start_locked(ifp, txr);
1014 		EM_TX_UNLOCK(txr);
1015 	}
1016 	return;
1017 }
1018 #else /* EM_MULTIQUEUE */
1019 /*********************************************************************
1020  *  Multiqueue Transmit routines
1021  *
1022  *  em_mq_start is called by the stack to initiate a transmit.
1023  *  however, if busy the driver can queue the request rather
1024  *  than do an immediate send. It is this that is an advantage
1025  *  in this driver, rather than also having multiple tx queues.
1026  **********************************************************************/
1027 /*
1028 ** Multiqueue capable stack interface
1029 */
1030 static int
1031 em_mq_start(if_t ifp, struct mbuf *m)
1032 {
1033 	struct adapter	*adapter = if_getsoftc(ifp);
1034 	struct tx_ring	*txr = adapter->tx_rings;
1035 	unsigned int	i, error;
1036 
1037 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1038 		i = m->m_pkthdr.flowid % adapter->num_queues;
1039 	else
1040 		i = curcpu % adapter->num_queues;
1041 
1042 	txr = &adapter->tx_rings[i];
1043 
1044 	error = drbr_enqueue(ifp, txr->br, m);
1045 	if (error)
1046 		return (error);
1047 
1048 	if (EM_TX_TRYLOCK(txr)) {
1049 		em_mq_start_locked(ifp, txr);
1050 		EM_TX_UNLOCK(txr);
1051 	} else
1052 		taskqueue_enqueue(txr->tq, &txr->tx_task);
1053 
1054 	return (0);
1055 }
1056 
1057 static int
1058 em_mq_start_locked(if_t ifp, struct tx_ring *txr)
1059 {
1060 	struct adapter  *adapter = txr->adapter;
1061         struct mbuf     *next;
1062         int             err = 0, enq = 0;
1063 
1064 	EM_TX_LOCK_ASSERT(txr);
1065 
1066 	if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) ||
1067 	    adapter->link_active == 0) {
1068 		return (ENETDOWN);
1069 	}
1070 
1071 	/* Process the queue */
1072 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1073 		if ((err = em_xmit(txr, &next)) != 0) {
1074 			if (next == NULL) {
1075 				/* It was freed, move forward */
1076 				drbr_advance(ifp, txr->br);
1077 			} else {
1078 				/*
1079 				 * Still have one left, it may not be
1080 				 * the same since the transmit function
1081 				 * may have changed it.
1082 				 */
1083 				drbr_putback(ifp, txr->br, next);
1084 			}
1085 			break;
1086 		}
1087 		drbr_advance(ifp, txr->br);
1088 		enq++;
1089 		if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
1090 		if (next->m_flags & M_MCAST)
1091 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
1092 		ETHER_BPF_MTAP(ifp, next);
1093 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
1094                         break;
1095 	}
1096 
1097 	/* Mark the queue as having work */
1098 	if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1099 		txr->busy = EM_TX_BUSY;
1100 
1101 	if (txr->tx_avail < EM_MAX_SCATTER)
1102 		em_txeof(txr);
1103 	if (txr->tx_avail < EM_MAX_SCATTER) {
1104 		if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
1105 	}
1106 	return (err);
1107 }
1108 
1109 /*
1110 ** Flush all ring buffers
1111 */
1112 static void
1113 em_qflush(if_t ifp)
1114 {
1115 	struct adapter  *adapter = if_getsoftc(ifp);
1116 	struct tx_ring  *txr = adapter->tx_rings;
1117 	struct mbuf     *m;
1118 
1119 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1120 		EM_TX_LOCK(txr);
1121 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1122 			m_freem(m);
1123 		EM_TX_UNLOCK(txr);
1124 	}
1125 	if_qflush(ifp);
1126 }
1127 #endif /* EM_MULTIQUEUE */
1128 
1129 /*********************************************************************
1130  *  Ioctl entry point
1131  *
1132  *  em_ioctl is called when the user wants to configure the
1133  *  interface.
1134  *
1135  *  return 0 on success, positive on failure
1136  **********************************************************************/
1137 
1138 static int
1139 em_ioctl(if_t ifp, u_long command, caddr_t data)
1140 {
1141 	struct adapter	*adapter = if_getsoftc(ifp);
1142 	struct ifreq	*ifr = (struct ifreq *)data;
1143 #if defined(INET) || defined(INET6)
1144 	struct ifaddr	*ifa = (struct ifaddr *)data;
1145 #endif
1146 	bool		avoid_reset = FALSE;
1147 	int		error = 0;
1148 
1149 	if (adapter->in_detach)
1150 		return (error);
1151 
1152 	switch (command) {
1153 	case SIOCSIFADDR:
1154 #ifdef INET
1155 		if (ifa->ifa_addr->sa_family == AF_INET)
1156 			avoid_reset = TRUE;
1157 #endif
1158 #ifdef INET6
1159 		if (ifa->ifa_addr->sa_family == AF_INET6)
1160 			avoid_reset = TRUE;
1161 #endif
1162 		/*
1163 		** Calling init results in link renegotiation,
1164 		** so we avoid doing it when possible.
1165 		*/
1166 		if (avoid_reset) {
1167 			if_setflagbits(ifp,IFF_UP,0);
1168 			if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
1169 				em_init(adapter);
1170 #ifdef INET
1171 			if (!(if_getflags(ifp) & IFF_NOARP))
1172 				arp_ifinit(ifp, ifa);
1173 #endif
1174 		} else
1175 			error = ether_ioctl(ifp, command, data);
1176 		break;
1177 	case SIOCSIFMTU:
1178 	    {
1179 		int max_frame_size;
1180 
1181 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1182 
1183 		EM_CORE_LOCK(adapter);
1184 		switch (adapter->hw.mac.type) {
1185 		case e1000_82571:
1186 		case e1000_82572:
1187 		case e1000_ich9lan:
1188 		case e1000_ich10lan:
1189 		case e1000_pch2lan:
1190 		case e1000_pch_lpt:
1191 		case e1000_pch_spt:
1192 		case e1000_82574:
1193 		case e1000_82583:
1194 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1195 			max_frame_size = 9234;
1196 			break;
1197 		case e1000_pchlan:
1198 			max_frame_size = 4096;
1199 			break;
1200 			/* Adapters that do not support jumbo frames */
1201 		case e1000_ich8lan:
1202 			max_frame_size = ETHER_MAX_LEN;
1203 			break;
1204 		default:
1205 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1206 		}
1207 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1208 		    ETHER_CRC_LEN) {
1209 			EM_CORE_UNLOCK(adapter);
1210 			error = EINVAL;
1211 			break;
1212 		}
1213 
1214 		if_setmtu(ifp, ifr->ifr_mtu);
1215 		adapter->hw.mac.max_frame_size =
1216 		    if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
1217 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1218 			em_init_locked(adapter);
1219 		EM_CORE_UNLOCK(adapter);
1220 		break;
1221 	    }
1222 	case SIOCSIFFLAGS:
1223 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1224 		    SIOCSIFFLAGS (Set Interface Flags)");
1225 		EM_CORE_LOCK(adapter);
1226 		if (if_getflags(ifp) & IFF_UP) {
1227 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1228 				if ((if_getflags(ifp) ^ adapter->if_flags) &
1229 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1230 					em_disable_promisc(adapter);
1231 					em_set_promisc(adapter);
1232 				}
1233 			} else
1234 				em_init_locked(adapter);
1235 		} else
1236 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1237 				em_stop(adapter);
1238 		adapter->if_flags = if_getflags(ifp);
1239 		EM_CORE_UNLOCK(adapter);
1240 		break;
1241 	case SIOCADDMULTI:
1242 	case SIOCDELMULTI:
1243 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1244 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1245 			EM_CORE_LOCK(adapter);
1246 			em_disable_intr(adapter);
1247 			em_set_multi(adapter);
1248 #ifdef DEVICE_POLLING
1249 			if (!(if_getcapenable(ifp) & IFCAP_POLLING))
1250 #endif
1251 				em_enable_intr(adapter);
1252 			EM_CORE_UNLOCK(adapter);
1253 		}
1254 		break;
1255 	case SIOCSIFMEDIA:
1256 		/* Check SOL/IDER usage */
1257 		EM_CORE_LOCK(adapter);
1258 		if (e1000_check_reset_block(&adapter->hw)) {
1259 			EM_CORE_UNLOCK(adapter);
1260 			device_printf(adapter->dev, "Media change is"
1261 			    " blocked due to SOL/IDER session.\n");
1262 			break;
1263 		}
1264 		EM_CORE_UNLOCK(adapter);
1265 		/* falls thru */
1266 	case SIOCGIFMEDIA:
1267 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1268 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1269 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1270 		break;
1271 	case SIOCSIFCAP:
1272 	    {
1273 		int mask, reinit;
1274 
1275 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1276 		reinit = 0;
1277 		mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
1278 #ifdef DEVICE_POLLING
1279 		if (mask & IFCAP_POLLING) {
1280 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1281 				error = ether_poll_register(em_poll, ifp);
1282 				if (error)
1283 					return (error);
1284 				EM_CORE_LOCK(adapter);
1285 				em_disable_intr(adapter);
1286 				if_setcapenablebit(ifp, IFCAP_POLLING, 0);
1287 				EM_CORE_UNLOCK(adapter);
1288 			} else {
1289 				error = ether_poll_deregister(ifp);
1290 				/* Enable interrupt even in error case */
1291 				EM_CORE_LOCK(adapter);
1292 				em_enable_intr(adapter);
1293 				if_setcapenablebit(ifp, 0, IFCAP_POLLING);
1294 				EM_CORE_UNLOCK(adapter);
1295 			}
1296 		}
1297 #endif
1298 		if (mask & IFCAP_HWCSUM) {
1299 			if_togglecapenable(ifp,IFCAP_HWCSUM);
1300 			reinit = 1;
1301 		}
1302 		if (mask & IFCAP_TSO4) {
1303 			if_togglecapenable(ifp,IFCAP_TSO4);
1304 			reinit = 1;
1305 		}
1306 		if (mask & IFCAP_VLAN_HWTAGGING) {
1307 			if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING);
1308 			reinit = 1;
1309 		}
1310 		if (mask & IFCAP_VLAN_HWFILTER) {
1311 			if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1312 			reinit = 1;
1313 		}
1314 		if (mask & IFCAP_VLAN_HWTSO) {
1315 			if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1316 			reinit = 1;
1317 		}
1318 		if ((mask & IFCAP_WOL) &&
1319 		    (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
1320 			if (mask & IFCAP_WOL_MCAST)
1321 				if_togglecapenable(ifp, IFCAP_WOL_MCAST);
1322 			if (mask & IFCAP_WOL_MAGIC)
1323 				if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
1324 		}
1325 		if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1326 			em_init(adapter);
1327 		if_vlancap(ifp);
1328 		break;
1329 	    }
1330 
1331 	default:
1332 		error = ether_ioctl(ifp, command, data);
1333 		break;
1334 	}
1335 
1336 	return (error);
1337 }
1338 
1339 
1340 /*********************************************************************
1341  *  Init entry point
1342  *
1343  *  This routine is used in two ways. It is used by the stack as
1344  *  init entry point in network interface structure. It is also used
1345  *  by the driver as a hw/sw initialization routine to get to a
1346  *  consistent state.
1347  *
1348  *  return 0 on success, positive on failure
1349  **********************************************************************/
1350 
1351 static void
1352 em_init_locked(struct adapter *adapter)
1353 {
1354 	if_t ifp = adapter->ifp;
1355 	device_t	dev = adapter->dev;
1356 
1357 	INIT_DEBUGOUT("em_init: begin");
1358 
1359 	EM_CORE_LOCK_ASSERT(adapter);
1360 
1361 	em_disable_intr(adapter);
1362 	callout_stop(&adapter->timer);
1363 
1364 	/* Get the latest mac address, User can use a LAA */
1365         bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
1366               ETHER_ADDR_LEN);
1367 
1368 	/* Put the address into the Receive Address Array */
1369 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1370 
1371 	/*
1372 	 * With the 82571 adapter, RAR[0] may be overwritten
1373 	 * when the other port is reset, we make a duplicate
1374 	 * in RAR[14] for that eventuality, this assures
1375 	 * the interface continues to function.
1376 	 */
1377 	if (adapter->hw.mac.type == e1000_82571) {
1378 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1379 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1380 		    E1000_RAR_ENTRIES - 1);
1381 	}
1382 
1383 	/* Initialize the hardware */
1384 	em_reset(adapter);
1385 	em_update_link_status(adapter);
1386 
1387 	/* Setup VLAN support, basic and offload if available */
1388 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1389 
1390 	/* Set hardware offload abilities */
1391 	if_clearhwassist(ifp);
1392 	if (if_getcapenable(ifp) & IFCAP_TXCSUM)
1393 		if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
1394 	/*
1395 	** There have proven to be problems with TSO when not
1396 	** at full gigabit speed, so disable the assist automatically
1397 	** when at lower speeds.  -jfv
1398 	*/
1399 	if (if_getcapenable(ifp) & IFCAP_TSO4) {
1400 		if (adapter->link_speed == SPEED_1000)
1401 			if_sethwassistbits(ifp, CSUM_TSO, 0);
1402 	}
1403 
1404 	/* Configure for OS presence */
1405 	em_init_manageability(adapter);
1406 
1407 	/* Prepare transmit descriptors and buffers */
1408 	em_setup_transmit_structures(adapter);
1409 	em_initialize_transmit_unit(adapter);
1410 
1411 	/* Setup Multicast table */
1412 	em_set_multi(adapter);
1413 
1414 	/*
1415 	** Figure out the desired mbuf
1416 	** pool for doing jumbos
1417 	*/
1418 	if (adapter->hw.mac.max_frame_size <= 2048)
1419 		adapter->rx_mbuf_sz = MCLBYTES;
1420 	else if (adapter->hw.mac.max_frame_size <= 4096)
1421 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1422 	else
1423 		adapter->rx_mbuf_sz = MJUM9BYTES;
1424 
1425 	/* Prepare receive descriptors and buffers */
1426 	if (em_setup_receive_structures(adapter)) {
1427 		device_printf(dev, "Could not setup receive structures\n");
1428 		em_stop(adapter);
1429 		return;
1430 	}
1431 	em_initialize_receive_unit(adapter);
1432 
1433 	/* Use real VLAN Filter support? */
1434 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
1435 		if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1436 			/* Use real VLAN Filter support */
1437 			em_setup_vlan_hw_support(adapter);
1438 		else {
1439 			u32 ctrl;
1440 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1441 			ctrl |= E1000_CTRL_VME;
1442 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1443 		}
1444 	}
1445 
1446 	/* Don't lose promiscuous settings */
1447 	em_set_promisc(adapter);
1448 
1449 	/* Set the interface as ACTIVE */
1450 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
1451 
1452 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1453 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1454 
1455 	/* MSI/X configuration for 82574 */
1456 	if (adapter->hw.mac.type == e1000_82574) {
1457 		int tmp;
1458 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1459 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1460 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1461 		/* Set the IVAR - interrupt vector routing. */
1462 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1463 	}
1464 
1465 #ifdef DEVICE_POLLING
1466 	/*
1467 	 * Only enable interrupts if we are not polling, make sure
1468 	 * they are off otherwise.
1469 	 */
1470 	if (if_getcapenable(ifp) & IFCAP_POLLING)
1471 		em_disable_intr(adapter);
1472 	else
1473 #endif /* DEVICE_POLLING */
1474 		em_enable_intr(adapter);
1475 
1476 	/* AMT based hardware can now take control from firmware */
1477 	if (adapter->has_manage && adapter->has_amt)
1478 		em_get_hw_control(adapter);
1479 }
1480 
1481 static void
1482 em_init(void *arg)
1483 {
1484 	struct adapter *adapter = arg;
1485 
1486 	EM_CORE_LOCK(adapter);
1487 	em_init_locked(adapter);
1488 	EM_CORE_UNLOCK(adapter);
1489 }
1490 
1491 
1492 #ifdef DEVICE_POLLING
1493 /*********************************************************************
1494  *
1495  *  Legacy polling routine: note this only works with single queue
1496  *
1497  *********************************************************************/
1498 static int
1499 em_poll(if_t ifp, enum poll_cmd cmd, int count)
1500 {
1501 	struct adapter *adapter = if_getsoftc(ifp);
1502 	struct tx_ring	*txr = adapter->tx_rings;
1503 	struct rx_ring	*rxr = adapter->rx_rings;
1504 	u32		reg_icr;
1505 	int		rx_done;
1506 
1507 	EM_CORE_LOCK(adapter);
1508 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
1509 		EM_CORE_UNLOCK(adapter);
1510 		return (0);
1511 	}
1512 
1513 	if (cmd == POLL_AND_CHECK_STATUS) {
1514 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1515 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1516 			callout_stop(&adapter->timer);
1517 			adapter->hw.mac.get_link_status = 1;
1518 			em_update_link_status(adapter);
1519 			callout_reset(&adapter->timer, hz,
1520 			    em_local_timer, adapter);
1521 		}
1522 	}
1523 	EM_CORE_UNLOCK(adapter);
1524 
1525 	em_rxeof(rxr, count, &rx_done);
1526 
1527 	EM_TX_LOCK(txr);
1528 	em_txeof(txr);
1529 #ifdef EM_MULTIQUEUE
1530 	if (!drbr_empty(ifp, txr->br))
1531 		em_mq_start_locked(ifp, txr);
1532 #else
1533 	if (!if_sendq_empty(ifp))
1534 		em_start_locked(ifp, txr);
1535 #endif
1536 	EM_TX_UNLOCK(txr);
1537 
1538 	return (rx_done);
1539 }
1540 #endif /* DEVICE_POLLING */
1541 
1542 
1543 /*********************************************************************
1544  *
1545  *  Fast Legacy/MSI Combined Interrupt Service routine
1546  *
1547  *********************************************************************/
1548 static int
1549 em_irq_fast(void *arg)
1550 {
1551 	struct adapter	*adapter = arg;
1552 	if_t ifp;
1553 	u32		reg_icr;
1554 
1555 	ifp = adapter->ifp;
1556 
1557 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1558 
1559 	/* Hot eject?  */
1560 	if (reg_icr == 0xffffffff)
1561 		return FILTER_STRAY;
1562 
1563 	/* Definitely not our interrupt.  */
1564 	if (reg_icr == 0x0)
1565 		return FILTER_STRAY;
1566 
1567 	/*
1568 	 * Starting with the 82571 chip, bit 31 should be used to
1569 	 * determine whether the interrupt belongs to us.
1570 	 */
1571 	if (adapter->hw.mac.type >= e1000_82571 &&
1572 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1573 		return FILTER_STRAY;
1574 
1575 	em_disable_intr(adapter);
1576 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1577 
1578 	/* Link status change */
1579 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1580 		adapter->hw.mac.get_link_status = 1;
1581 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1582 	}
1583 
1584 	if (reg_icr & E1000_ICR_RXO)
1585 		adapter->rx_overruns++;
1586 	return FILTER_HANDLED;
1587 }
1588 
1589 /* Combined RX/TX handler, used by Legacy and MSI */
1590 static void
1591 em_handle_que(void *context, int pending)
1592 {
1593 	struct adapter	*adapter = context;
1594 	if_t ifp = adapter->ifp;
1595 	struct tx_ring	*txr = adapter->tx_rings;
1596 	struct rx_ring	*rxr = adapter->rx_rings;
1597 
1598 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1599 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1600 
1601 		EM_TX_LOCK(txr);
1602 		em_txeof(txr);
1603 #ifdef EM_MULTIQUEUE
1604 		if (!drbr_empty(ifp, txr->br))
1605 			em_mq_start_locked(ifp, txr);
1606 #else
1607 		if (!if_sendq_empty(ifp))
1608 			em_start_locked(ifp, txr);
1609 #endif
1610 		EM_TX_UNLOCK(txr);
1611 		if (more) {
1612 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1613 			return;
1614 		}
1615 	}
1616 
1617 	em_enable_intr(adapter);
1618 	return;
1619 }
1620 
1621 
1622 /*********************************************************************
1623  *
1624  *  MSIX Interrupt Service Routines
1625  *
1626  **********************************************************************/
1627 static void
1628 em_msix_tx(void *arg)
1629 {
1630 	struct tx_ring *txr = arg;
1631 	struct adapter *adapter = txr->adapter;
1632 	if_t ifp = adapter->ifp;
1633 
1634 	++txr->tx_irq;
1635 	EM_TX_LOCK(txr);
1636 	em_txeof(txr);
1637 #ifdef EM_MULTIQUEUE
1638 	if (!drbr_empty(ifp, txr->br))
1639 		em_mq_start_locked(ifp, txr);
1640 #else
1641 	if (!if_sendq_empty(ifp))
1642 		em_start_locked(ifp, txr);
1643 #endif
1644 
1645 	/* Reenable this interrupt */
1646 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1647 	EM_TX_UNLOCK(txr);
1648 	return;
1649 }
1650 
1651 /*********************************************************************
1652  *
1653  *  MSIX RX Interrupt Service routine
1654  *
1655  **********************************************************************/
1656 
1657 static void
1658 em_msix_rx(void *arg)
1659 {
1660 	struct rx_ring	*rxr = arg;
1661 	struct adapter	*adapter = rxr->adapter;
1662 	bool		more;
1663 
1664 	++rxr->rx_irq;
1665 	if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING))
1666 		return;
1667 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1668 	if (more)
1669 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1670 	else {
1671 		/* Reenable this interrupt */
1672 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1673 	}
1674 	return;
1675 }
1676 
1677 /*********************************************************************
1678  *
1679  *  MSIX Link Fast Interrupt Service routine
1680  *
1681  **********************************************************************/
1682 static void
1683 em_msix_link(void *arg)
1684 {
1685 	struct adapter	*adapter = arg;
1686 	u32		reg_icr;
1687 
1688 	++adapter->link_irq;
1689 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1690 
1691 	if (reg_icr & E1000_ICR_RXO)
1692 		adapter->rx_overruns++;
1693 
1694 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1695 		adapter->hw.mac.get_link_status = 1;
1696 		em_handle_link(adapter, 0);
1697 	} else
1698 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1699 		    EM_MSIX_LINK | E1000_IMS_LSC);
1700 	/*
1701  	** Because we must read the ICR for this interrupt
1702  	** it may clear other causes using autoclear, for
1703  	** this reason we simply create a soft interrupt
1704  	** for all these vectors.
1705  	*/
1706 	if (reg_icr) {
1707 		E1000_WRITE_REG(&adapter->hw,
1708 			E1000_ICS, adapter->ims);
1709 	}
1710 	return;
1711 }
1712 
1713 static void
1714 em_handle_rx(void *context, int pending)
1715 {
1716 	struct rx_ring	*rxr = context;
1717 	struct adapter	*adapter = rxr->adapter;
1718         bool            more;
1719 
1720 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1721 	if (more)
1722 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1723 	else {
1724 		/* Reenable this interrupt */
1725 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1726 	}
1727 }
1728 
1729 static void
1730 em_handle_tx(void *context, int pending)
1731 {
1732 	struct tx_ring	*txr = context;
1733 	struct adapter	*adapter = txr->adapter;
1734 	if_t ifp = adapter->ifp;
1735 
1736 	EM_TX_LOCK(txr);
1737 	em_txeof(txr);
1738 #ifdef EM_MULTIQUEUE
1739 	if (!drbr_empty(ifp, txr->br))
1740 		em_mq_start_locked(ifp, txr);
1741 #else
1742 	if (!if_sendq_empty(ifp))
1743 		em_start_locked(ifp, txr);
1744 #endif
1745 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1746 	EM_TX_UNLOCK(txr);
1747 }
1748 
1749 static void
1750 em_handle_link(void *context, int pending)
1751 {
1752 	struct adapter	*adapter = context;
1753 	struct tx_ring	*txr = adapter->tx_rings;
1754 	if_t ifp = adapter->ifp;
1755 
1756 	if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1757 		return;
1758 
1759 	EM_CORE_LOCK(adapter);
1760 	callout_stop(&adapter->timer);
1761 	em_update_link_status(adapter);
1762 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1763 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1764 	    EM_MSIX_LINK | E1000_IMS_LSC);
1765 	if (adapter->link_active) {
1766 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1767 			EM_TX_LOCK(txr);
1768 #ifdef EM_MULTIQUEUE
1769 			if (!drbr_empty(ifp, txr->br))
1770 				em_mq_start_locked(ifp, txr);
1771 #else
1772 			if (if_sendq_empty(ifp))
1773 				em_start_locked(ifp, txr);
1774 #endif
1775 			EM_TX_UNLOCK(txr);
1776 		}
1777 	}
1778 	EM_CORE_UNLOCK(adapter);
1779 }
1780 
1781 
1782 /*********************************************************************
1783  *
1784  *  Media Ioctl callback
1785  *
1786  *  This routine is called whenever the user queries the status of
1787  *  the interface using ifconfig.
1788  *
1789  **********************************************************************/
1790 static void
1791 em_media_status(if_t ifp, struct ifmediareq *ifmr)
1792 {
1793 	struct adapter *adapter = if_getsoftc(ifp);
1794 	u_char fiber_type = IFM_1000_SX;
1795 
1796 	INIT_DEBUGOUT("em_media_status: begin");
1797 
1798 	EM_CORE_LOCK(adapter);
1799 	em_update_link_status(adapter);
1800 
1801 	ifmr->ifm_status = IFM_AVALID;
1802 	ifmr->ifm_active = IFM_ETHER;
1803 
1804 	if (!adapter->link_active) {
1805 		EM_CORE_UNLOCK(adapter);
1806 		return;
1807 	}
1808 
1809 	ifmr->ifm_status |= IFM_ACTIVE;
1810 
1811 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1812 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1813 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1814 	} else {
1815 		switch (adapter->link_speed) {
1816 		case 10:
1817 			ifmr->ifm_active |= IFM_10_T;
1818 			break;
1819 		case 100:
1820 			ifmr->ifm_active |= IFM_100_TX;
1821 			break;
1822 		case 1000:
1823 			ifmr->ifm_active |= IFM_1000_T;
1824 			break;
1825 		}
1826 		if (adapter->link_duplex == FULL_DUPLEX)
1827 			ifmr->ifm_active |= IFM_FDX;
1828 		else
1829 			ifmr->ifm_active |= IFM_HDX;
1830 	}
1831 	EM_CORE_UNLOCK(adapter);
1832 }
1833 
1834 /*********************************************************************
1835  *
1836  *  Media Ioctl callback
1837  *
1838  *  This routine is called when the user changes speed/duplex using
1839  *  media/mediopt option with ifconfig.
1840  *
1841  **********************************************************************/
1842 static int
1843 em_media_change(if_t ifp)
1844 {
1845 	struct adapter *adapter = if_getsoftc(ifp);
1846 	struct ifmedia  *ifm = &adapter->media;
1847 
1848 	INIT_DEBUGOUT("em_media_change: begin");
1849 
1850 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1851 		return (EINVAL);
1852 
1853 	EM_CORE_LOCK(adapter);
1854 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1855 	case IFM_AUTO:
1856 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1857 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1858 		break;
1859 	case IFM_1000_LX:
1860 	case IFM_1000_SX:
1861 	case IFM_1000_T:
1862 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1863 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1864 		break;
1865 	case IFM_100_TX:
1866 		adapter->hw.mac.autoneg = FALSE;
1867 		adapter->hw.phy.autoneg_advertised = 0;
1868 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1869 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1870 		else
1871 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1872 		break;
1873 	case IFM_10_T:
1874 		adapter->hw.mac.autoneg = FALSE;
1875 		adapter->hw.phy.autoneg_advertised = 0;
1876 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1877 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1878 		else
1879 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1880 		break;
1881 	default:
1882 		device_printf(adapter->dev, "Unsupported media type\n");
1883 	}
1884 
1885 	em_init_locked(adapter);
1886 	EM_CORE_UNLOCK(adapter);
1887 
1888 	return (0);
1889 }
1890 
1891 /*********************************************************************
1892  *
1893  *  This routine maps the mbufs to tx descriptors.
1894  *
1895  *  return 0 on success, positive on failure
1896  **********************************************************************/
1897 
1898 static int
1899 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1900 {
1901 	struct adapter		*adapter = txr->adapter;
1902 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1903 	bus_dmamap_t		map;
1904 	struct em_txbuffer	*tx_buffer, *tx_buffer_mapped;
1905 	struct e1000_tx_desc	*ctxd = NULL;
1906 	struct mbuf		*m_head;
1907 	struct ether_header	*eh;
1908 	struct ip		*ip = NULL;
1909 	struct tcphdr		*tp = NULL;
1910 	u32			txd_upper = 0, txd_lower = 0;
1911 	int			ip_off, poff;
1912 	int			nsegs, i, j, first, last = 0;
1913 	int			error;
1914 	bool			do_tso, tso_desc, remap = TRUE;
1915 
1916 	m_head = *m_headp;
1917 	do_tso = (m_head->m_pkthdr.csum_flags & CSUM_TSO);
1918 	tso_desc = FALSE;
1919 	ip_off = poff = 0;
1920 
1921 	/*
1922 	 * Intel recommends entire IP/TCP header length reside in a single
1923 	 * buffer. If multiple descriptors are used to describe the IP and
1924 	 * TCP header, each descriptor should describe one or more
1925 	 * complete headers; descriptors referencing only parts of headers
1926 	 * are not supported. If all layer headers are not coalesced into
1927 	 * a single buffer, each buffer should not cross a 4KB boundary,
1928 	 * or be larger than the maximum read request size.
1929 	 * Controller also requires modifing IP/TCP header to make TSO work
1930 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1931 	 * IP/TCP header into a single buffer to meet the requirement of
1932 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1933 	 * which also has similar restrictions.
1934 	 */
1935 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1936 		if (do_tso || (m_head->m_next != NULL &&
1937 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1938 			if (M_WRITABLE(*m_headp) == 0) {
1939 				m_head = m_dup(*m_headp, M_NOWAIT);
1940 				m_freem(*m_headp);
1941 				if (m_head == NULL) {
1942 					*m_headp = NULL;
1943 					return (ENOBUFS);
1944 				}
1945 				*m_headp = m_head;
1946 			}
1947 		}
1948 		/*
1949 		 * XXX
1950 		 * Assume IPv4, we don't have TSO/checksum offload support
1951 		 * for IPv6 yet.
1952 		 */
1953 		ip_off = sizeof(struct ether_header);
1954 		if (m_head->m_len < ip_off) {
1955 			m_head = m_pullup(m_head, ip_off);
1956 			if (m_head == NULL) {
1957 				*m_headp = NULL;
1958 				return (ENOBUFS);
1959 			}
1960 		}
1961 		eh = mtod(m_head, struct ether_header *);
1962 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1963 			ip_off = sizeof(struct ether_vlan_header);
1964 			if (m_head->m_len < ip_off) {
1965 				m_head = m_pullup(m_head, ip_off);
1966 				if (m_head == NULL) {
1967 					*m_headp = NULL;
1968 					return (ENOBUFS);
1969 				}
1970 			}
1971 		}
1972 		if (m_head->m_len < ip_off + sizeof(struct ip)) {
1973 			m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1974 			if (m_head == NULL) {
1975 				*m_headp = NULL;
1976 				return (ENOBUFS);
1977 			}
1978 		}
1979 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1980 		poff = ip_off + (ip->ip_hl << 2);
1981 
1982 		if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
1983 			if (m_head->m_len < poff + sizeof(struct tcphdr)) {
1984 				m_head = m_pullup(m_head, poff +
1985 				    sizeof(struct tcphdr));
1986 				if (m_head == NULL) {
1987 					*m_headp = NULL;
1988 					return (ENOBUFS);
1989 				}
1990 			}
1991 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1992 			/*
1993 			 * TSO workaround:
1994 			 *   pull 4 more bytes of data into it.
1995 			 */
1996 			if (m_head->m_len < poff + (tp->th_off << 2)) {
1997 				m_head = m_pullup(m_head, poff +
1998 				                 (tp->th_off << 2) +
1999 				                 TSO_WORKAROUND);
2000 				if (m_head == NULL) {
2001 					*m_headp = NULL;
2002 					return (ENOBUFS);
2003 				}
2004 			}
2005 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2006 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2007 			if (do_tso) {
2008 				ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
2009 				                  (ip->ip_hl << 2) +
2010 				                  (tp->th_off << 2));
2011 				ip->ip_sum = 0;
2012 				/*
2013 				 * The pseudo TCP checksum does not include TCP
2014 				 * payload length so driver should recompute
2015 				 * the checksum here what hardware expect to
2016 				 * see. This is adherence of Microsoft's Large
2017 				 * Send specification.
2018 			 	*/
2019 				tp->th_sum = in_pseudo(ip->ip_src.s_addr,
2020 				    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2021 			}
2022 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
2023 			if (m_head->m_len < poff + sizeof(struct udphdr)) {
2024 				m_head = m_pullup(m_head, poff +
2025 				    sizeof(struct udphdr));
2026 				if (m_head == NULL) {
2027 					*m_headp = NULL;
2028 					return (ENOBUFS);
2029 				}
2030 			}
2031 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2032 		}
2033 		*m_headp = m_head;
2034 	}
2035 
2036 	/*
2037 	 * Map the packet for DMA
2038 	 *
2039 	 * Capture the first descriptor index,
2040 	 * this descriptor will have the index
2041 	 * of the EOP which is the only one that
2042 	 * now gets a DONE bit writeback.
2043 	 */
2044 	first = txr->next_avail_desc;
2045 	tx_buffer = &txr->tx_buffers[first];
2046 	tx_buffer_mapped = tx_buffer;
2047 	map = tx_buffer->map;
2048 
2049 retry:
2050 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2051 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2052 
2053 	/*
2054 	 * There are two types of errors we can (try) to handle:
2055 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
2056 	 *   out of segments.  Defragment the mbuf chain and try again.
2057 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
2058 	 *   at this point in time.  Defer sending and try again later.
2059 	 * All other errors, in particular EINVAL, are fatal and prevent the
2060 	 * mbuf chain from ever going through.  Drop it and report error.
2061 	 */
2062 	if (error == EFBIG && remap) {
2063 		struct mbuf *m;
2064 
2065 		m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
2066 		if (m == NULL) {
2067 			adapter->mbuf_defrag_failed++;
2068 			m_freem(*m_headp);
2069 			*m_headp = NULL;
2070 			return (ENOBUFS);
2071 		}
2072 		*m_headp = m;
2073 
2074 		/* Try it again, but only once */
2075 		remap = FALSE;
2076 		goto retry;
2077 	} else if (error != 0) {
2078 		adapter->no_tx_dma_setup++;
2079 		m_freem(*m_headp);
2080 		*m_headp = NULL;
2081 		return (error);
2082 	}
2083 
2084 	/*
2085 	 * TSO Hardware workaround, if this packet is not
2086 	 * TSO, and is only a single descriptor long, and
2087 	 * it follows a TSO burst, then we need to add a
2088 	 * sentinel descriptor to prevent premature writeback.
2089 	 */
2090 	if ((!do_tso) && (txr->tx_tso == TRUE)) {
2091 		if (nsegs == 1)
2092 			tso_desc = TRUE;
2093 		txr->tx_tso = FALSE;
2094 	}
2095 
2096         if (txr->tx_avail < (nsegs + EM_MAX_SCATTER)) {
2097                 txr->no_desc_avail++;
2098 		bus_dmamap_unload(txr->txtag, map);
2099 		return (ENOBUFS);
2100         }
2101 	m_head = *m_headp;
2102 
2103 	/* Do hardware assists */
2104 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2105 		em_tso_setup(txr, m_head, ip_off, ip, tp,
2106 		    &txd_upper, &txd_lower);
2107 		/* we need to make a final sentinel transmit desc */
2108 		tso_desc = TRUE;
2109 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2110 		em_transmit_checksum_setup(txr, m_head,
2111 		    ip_off, ip, &txd_upper, &txd_lower);
2112 
2113 	if (m_head->m_flags & M_VLANTAG) {
2114 		/* Set the vlan id. */
2115 		txd_upper |= htole16(if_getvtag(m_head)) << 16;
2116                 /* Tell hardware to add tag */
2117                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2118         }
2119 
2120 	i = txr->next_avail_desc;
2121 
2122 	/* Set up our transmit descriptors */
2123 	for (j = 0; j < nsegs; j++) {
2124 		bus_size_t seg_len;
2125 		bus_addr_t seg_addr;
2126 
2127 		tx_buffer = &txr->tx_buffers[i];
2128 		ctxd = &txr->tx_base[i];
2129 		seg_addr = segs[j].ds_addr;
2130 		seg_len  = segs[j].ds_len;
2131 		/*
2132 		** TSO Workaround:
2133 		** If this is the last descriptor, we want to
2134 		** split it so we have a small final sentinel
2135 		*/
2136 		if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
2137 			seg_len -= TSO_WORKAROUND;
2138 			ctxd->buffer_addr = htole64(seg_addr);
2139 			ctxd->lower.data = htole32(
2140 				adapter->txd_cmd | txd_lower | seg_len);
2141 			ctxd->upper.data = htole32(txd_upper);
2142 			if (++i == adapter->num_tx_desc)
2143 				i = 0;
2144 
2145 			/* Now make the sentinel */
2146 			txr->tx_avail--;
2147 			ctxd = &txr->tx_base[i];
2148 			tx_buffer = &txr->tx_buffers[i];
2149 			ctxd->buffer_addr =
2150 			    htole64(seg_addr + seg_len);
2151 			ctxd->lower.data = htole32(
2152 			adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
2153 			ctxd->upper.data =
2154 			    htole32(txd_upper);
2155 			last = i;
2156 			if (++i == adapter->num_tx_desc)
2157 				i = 0;
2158 		} else {
2159 			ctxd->buffer_addr = htole64(seg_addr);
2160 			ctxd->lower.data = htole32(
2161 			adapter->txd_cmd | txd_lower | seg_len);
2162 			ctxd->upper.data = htole32(txd_upper);
2163 			last = i;
2164 			if (++i == adapter->num_tx_desc)
2165 				i = 0;
2166 		}
2167 		tx_buffer->m_head = NULL;
2168 		tx_buffer->next_eop = -1;
2169 	}
2170 
2171 	txr->next_avail_desc = i;
2172 	txr->tx_avail -= nsegs;
2173 
2174         tx_buffer->m_head = m_head;
2175 	/*
2176 	** Here we swap the map so the last descriptor,
2177 	** which gets the completion interrupt has the
2178 	** real map, and the first descriptor gets the
2179 	** unused map from this descriptor.
2180 	*/
2181 	tx_buffer_mapped->map = tx_buffer->map;
2182 	tx_buffer->map = map;
2183         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2184 
2185         /*
2186          * Last Descriptor of Packet
2187 	 * needs End Of Packet (EOP)
2188 	 * and Report Status (RS)
2189          */
2190         ctxd->lower.data |=
2191 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2192 	/*
2193 	 * Keep track in the first buffer which
2194 	 * descriptor will be written back
2195 	 */
2196 	tx_buffer = &txr->tx_buffers[first];
2197 	tx_buffer->next_eop = last;
2198 
2199 	/*
2200 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2201 	 * that this frame is available to transmit.
2202 	 */
2203 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2204 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2205 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2206 
2207 	return (0);
2208 }
2209 
2210 static void
2211 em_set_promisc(struct adapter *adapter)
2212 {
2213 	if_t ifp = adapter->ifp;
2214 	u32		reg_rctl;
2215 
2216 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2217 
2218 	if (if_getflags(ifp) & IFF_PROMISC) {
2219 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2220 		/* Turn this on if you want to see bad packets */
2221 		if (em_debug_sbp)
2222 			reg_rctl |= E1000_RCTL_SBP;
2223 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2224 	} else if (if_getflags(ifp) & IFF_ALLMULTI) {
2225 		reg_rctl |= E1000_RCTL_MPE;
2226 		reg_rctl &= ~E1000_RCTL_UPE;
2227 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2228 	}
2229 }
2230 
2231 static void
2232 em_disable_promisc(struct adapter *adapter)
2233 {
2234 	if_t		ifp = adapter->ifp;
2235 	u32		reg_rctl;
2236 	int		mcnt = 0;
2237 
2238 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2239 	reg_rctl &=  (~E1000_RCTL_UPE);
2240 	if (if_getflags(ifp) & IFF_ALLMULTI)
2241 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2242 	else
2243 		mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
2244 	/* Don't disable if in MAX groups */
2245 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2246 		reg_rctl &=  (~E1000_RCTL_MPE);
2247 	reg_rctl &=  (~E1000_RCTL_SBP);
2248 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2249 }
2250 
2251 
2252 /*********************************************************************
2253  *  Multicast Update
2254  *
2255  *  This routine is called whenever multicast address list is updated.
2256  *
2257  **********************************************************************/
2258 
2259 static void
2260 em_set_multi(struct adapter *adapter)
2261 {
2262 	if_t ifp = adapter->ifp;
2263 	u32 reg_rctl = 0;
2264 	u8  *mta; /* Multicast array memory */
2265 	int mcnt = 0;
2266 
2267 	IOCTL_DEBUGOUT("em_set_multi: begin");
2268 
2269 	mta = adapter->mta;
2270 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2271 
2272 	if (adapter->hw.mac.type == e1000_82542 &&
2273 	    adapter->hw.revision_id == E1000_REVISION_2) {
2274 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2275 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2276 			e1000_pci_clear_mwi(&adapter->hw);
2277 		reg_rctl |= E1000_RCTL_RST;
2278 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2279 		msec_delay(5);
2280 	}
2281 
2282 	if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
2283 
2284 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2285 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2286 		reg_rctl |= E1000_RCTL_MPE;
2287 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2288 	} else
2289 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2290 
2291 	if (adapter->hw.mac.type == e1000_82542 &&
2292 	    adapter->hw.revision_id == E1000_REVISION_2) {
2293 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2294 		reg_rctl &= ~E1000_RCTL_RST;
2295 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2296 		msec_delay(5);
2297 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2298 			e1000_pci_set_mwi(&adapter->hw);
2299 	}
2300 }
2301 
2302 
2303 /*********************************************************************
2304  *  Timer routine
2305  *
2306  *  This routine checks for link status and updates statistics.
2307  *
2308  **********************************************************************/
2309 
2310 static void
2311 em_local_timer(void *arg)
2312 {
2313 	struct adapter	*adapter = arg;
2314 	if_t ifp = adapter->ifp;
2315 	struct tx_ring	*txr = adapter->tx_rings;
2316 	struct rx_ring	*rxr = adapter->rx_rings;
2317 	u32		trigger = 0;
2318 
2319 	EM_CORE_LOCK_ASSERT(adapter);
2320 
2321 	em_update_link_status(adapter);
2322 	em_update_stats_counters(adapter);
2323 
2324 	/* Reset LAA into RAR[0] on 82571 */
2325 	if ((adapter->hw.mac.type == e1000_82571) &&
2326 	    e1000_get_laa_state_82571(&adapter->hw))
2327 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2328 
2329 	/* Mask to use in the irq trigger */
2330 	if (adapter->msix_mem) {
2331 		for (int i = 0; i < adapter->num_queues; i++, rxr++)
2332 			trigger |= rxr->ims;
2333 		rxr = adapter->rx_rings;
2334 	} else
2335 		trigger = E1000_ICS_RXDMT0;
2336 
2337 	/*
2338 	** Check on the state of the TX queue(s), this
2339 	** can be done without the lock because its RO
2340 	** and the HUNG state will be static if set.
2341 	*/
2342 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2343 		if (txr->busy == EM_TX_HUNG)
2344 			goto hung;
2345 		if (txr->busy >= EM_TX_MAXTRIES)
2346 			txr->busy = EM_TX_HUNG;
2347 		/* Schedule a TX tasklet if needed */
2348 		if (txr->tx_avail <= EM_MAX_SCATTER)
2349 			taskqueue_enqueue(txr->tq, &txr->tx_task);
2350 	}
2351 
2352 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2353 #ifndef DEVICE_POLLING
2354 	/* Trigger an RX interrupt to guarantee mbuf refresh */
2355 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2356 #endif
2357 	return;
2358 hung:
2359 	/* Looks like we're hung */
2360 	device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2361 			txr->me);
2362 	em_print_debug_info(adapter);
2363 	if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
2364 	adapter->watchdog_events++;
2365 	em_init_locked(adapter);
2366 }
2367 
2368 
2369 static void
2370 em_update_link_status(struct adapter *adapter)
2371 {
2372 	struct e1000_hw *hw = &adapter->hw;
2373 	if_t ifp = adapter->ifp;
2374 	device_t dev = adapter->dev;
2375 	struct tx_ring *txr = adapter->tx_rings;
2376 	u32 link_check = 0;
2377 
2378 	/* Get the cached link value or read phy for real */
2379 	switch (hw->phy.media_type) {
2380 	case e1000_media_type_copper:
2381 		if (hw->mac.get_link_status) {
2382 			if (hw->mac.type == e1000_pch_spt)
2383 				msec_delay(50);
2384 			/* Do the work to read phy */
2385 			e1000_check_for_link(hw);
2386 			link_check = !hw->mac.get_link_status;
2387 			if (link_check) /* ESB2 fix */
2388 				e1000_cfg_on_link_up(hw);
2389 		} else
2390 			link_check = TRUE;
2391 		break;
2392 	case e1000_media_type_fiber:
2393 		e1000_check_for_link(hw);
2394 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2395                                  E1000_STATUS_LU);
2396 		break;
2397 	case e1000_media_type_internal_serdes:
2398 		e1000_check_for_link(hw);
2399 		link_check = adapter->hw.mac.serdes_has_link;
2400 		break;
2401 	default:
2402 	case e1000_media_type_unknown:
2403 		break;
2404 	}
2405 
2406 	/* Now check for a transition */
2407 	if (link_check && (adapter->link_active == 0)) {
2408 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2409 		    &adapter->link_duplex);
2410 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2411 		if ((adapter->link_speed != SPEED_1000) &&
2412 		    ((hw->mac.type == e1000_82571) ||
2413 		    (hw->mac.type == e1000_82572))) {
2414 			int tarc0;
2415 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2416 			tarc0 &= ~TARC_SPEED_MODE_BIT;
2417 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2418 		}
2419 		if (bootverbose)
2420 			device_printf(dev, "Link is up %d Mbps %s\n",
2421 			    adapter->link_speed,
2422 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2423 			    "Full Duplex" : "Half Duplex"));
2424 		adapter->link_active = 1;
2425 		adapter->smartspeed = 0;
2426 		if_setbaudrate(ifp, adapter->link_speed * 1000000);
2427 		if_link_state_change(ifp, LINK_STATE_UP);
2428 	} else if (!link_check && (adapter->link_active == 1)) {
2429 		if_setbaudrate(ifp, 0);
2430 		adapter->link_speed = 0;
2431 		adapter->link_duplex = 0;
2432 		if (bootverbose)
2433 			device_printf(dev, "Link is Down\n");
2434 		adapter->link_active = 0;
2435 		/* Link down, disable hang detection */
2436 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2437 			txr->busy = EM_TX_IDLE;
2438 		if_link_state_change(ifp, LINK_STATE_DOWN);
2439 	}
2440 }
2441 
2442 /*********************************************************************
2443  *
2444  *  This routine disables all traffic on the adapter by issuing a
2445  *  global reset on the MAC and deallocates TX/RX buffers.
2446  *
2447  *  This routine should always be called with BOTH the CORE
2448  *  and TX locks.
2449  **********************************************************************/
2450 
2451 static void
2452 em_stop(void *arg)
2453 {
2454 	struct adapter	*adapter = arg;
2455 	if_t ifp = adapter->ifp;
2456 	struct tx_ring	*txr = adapter->tx_rings;
2457 
2458 	EM_CORE_LOCK_ASSERT(adapter);
2459 
2460 	INIT_DEBUGOUT("em_stop: begin");
2461 
2462 	em_disable_intr(adapter);
2463 	callout_stop(&adapter->timer);
2464 
2465 	/* Tell the stack that the interface is no longer active */
2466 	if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2467 
2468         /* Disarm Hang Detection. */
2469 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2470 		EM_TX_LOCK(txr);
2471 		txr->busy = EM_TX_IDLE;
2472 		EM_TX_UNLOCK(txr);
2473 	}
2474 
2475 	/* I219 needs some special flushing to avoid hangs */
2476 	if (adapter->hw.mac.type == e1000_pch_spt)
2477 		em_flush_desc_rings(adapter);
2478 
2479 	e1000_reset_hw(&adapter->hw);
2480 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2481 
2482 	e1000_led_off(&adapter->hw);
2483 	e1000_cleanup_led(&adapter->hw);
2484 }
2485 
2486 
2487 /*********************************************************************
2488  *
2489  *  Determine hardware revision.
2490  *
2491  **********************************************************************/
2492 static void
2493 em_identify_hardware(struct adapter *adapter)
2494 {
2495 	device_t dev = adapter->dev;
2496 
2497 	/* Make sure our PCI config space has the necessary stuff set */
2498 	pci_enable_busmaster(dev);
2499 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2500 
2501 	/* Save off the information about this board */
2502 	adapter->hw.vendor_id = pci_get_vendor(dev);
2503 	adapter->hw.device_id = pci_get_device(dev);
2504 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2505 	adapter->hw.subsystem_vendor_id =
2506 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2507 	adapter->hw.subsystem_device_id =
2508 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2509 
2510 	/* Do Shared Code Init and Setup */
2511 	if (e1000_set_mac_type(&adapter->hw)) {
2512 		device_printf(dev, "Setup init failure\n");
2513 		return;
2514 	}
2515 }
2516 
2517 static int
2518 em_allocate_pci_resources(struct adapter *adapter)
2519 {
2520 	device_t	dev = adapter->dev;
2521 	int		rid;
2522 
2523 	rid = PCIR_BAR(0);
2524 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2525 	    &rid, RF_ACTIVE);
2526 	if (adapter->memory == NULL) {
2527 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2528 		return (ENXIO);
2529 	}
2530 	adapter->osdep.mem_bus_space_tag =
2531 	    rman_get_bustag(adapter->memory);
2532 	adapter->osdep.mem_bus_space_handle =
2533 	    rman_get_bushandle(adapter->memory);
2534 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2535 
2536 	adapter->hw.back = &adapter->osdep;
2537 
2538 	return (0);
2539 }
2540 
2541 /*********************************************************************
2542  *
2543  *  Setup the Legacy or MSI Interrupt handler
2544  *
2545  **********************************************************************/
2546 int
2547 em_allocate_legacy(struct adapter *adapter)
2548 {
2549 	device_t dev = adapter->dev;
2550 	struct tx_ring	*txr = adapter->tx_rings;
2551 	int error, rid = 0;
2552 
2553 	/* Manually turn off all interrupts */
2554 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2555 
2556 	if (adapter->msix == 1) /* using MSI */
2557 		rid = 1;
2558 	/* We allocate a single interrupt resource */
2559 	adapter->res = bus_alloc_resource_any(dev,
2560 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2561 	if (adapter->res == NULL) {
2562 		device_printf(dev, "Unable to allocate bus resource: "
2563 		    "interrupt\n");
2564 		return (ENXIO);
2565 	}
2566 
2567 	/*
2568 	 * Allocate a fast interrupt and the associated
2569 	 * deferred processing contexts.
2570 	 */
2571 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2572 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2573 	    taskqueue_thread_enqueue, &adapter->tq);
2574 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2575 	    device_get_nameunit(adapter->dev));
2576 	/* Use a TX only tasklet for local timer */
2577 	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2578 	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2579 	    taskqueue_thread_enqueue, &txr->tq);
2580 	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2581 	    device_get_nameunit(adapter->dev));
2582 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2583 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2584 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2585 		device_printf(dev, "Failed to register fast interrupt "
2586 			    "handler: %d\n", error);
2587 		taskqueue_free(adapter->tq);
2588 		adapter->tq = NULL;
2589 		return (error);
2590 	}
2591 
2592 	return (0);
2593 }
2594 
2595 /*********************************************************************
2596  *
2597  *  Setup the MSIX Interrupt handlers
2598  *   This is not really Multiqueue, rather
2599  *   its just separate interrupt vectors
2600  *   for TX, RX, and Link.
2601  *
2602  **********************************************************************/
2603 int
2604 em_allocate_msix(struct adapter *adapter)
2605 {
2606 	device_t	dev = adapter->dev;
2607 	struct		tx_ring *txr = adapter->tx_rings;
2608 	struct		rx_ring *rxr = adapter->rx_rings;
2609 	int		error, rid, vector = 0;
2610 	int		cpu_id = 0;
2611 
2612 
2613 	/* Make sure all interrupts are disabled */
2614 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2615 
2616 	/* First set up ring resources */
2617 	for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2618 
2619 		/* RX ring */
2620 		rid = vector + 1;
2621 
2622 		rxr->res = bus_alloc_resource_any(dev,
2623 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2624 		if (rxr->res == NULL) {
2625 			device_printf(dev,
2626 			    "Unable to allocate bus resource: "
2627 			    "RX MSIX Interrupt %d\n", i);
2628 			return (ENXIO);
2629 		}
2630 		if ((error = bus_setup_intr(dev, rxr->res,
2631 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2632 		    rxr, &rxr->tag)) != 0) {
2633 			device_printf(dev, "Failed to register RX handler");
2634 			return (error);
2635 		}
2636 #if __FreeBSD_version >= 800504
2637 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2638 #endif
2639 		rxr->msix = vector;
2640 
2641 		if (em_last_bind_cpu < 0)
2642 			em_last_bind_cpu = CPU_FIRST();
2643 		cpu_id = em_last_bind_cpu;
2644 		bus_bind_intr(dev, rxr->res, cpu_id);
2645 
2646 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2647 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2648 		    taskqueue_thread_enqueue, &rxr->tq);
2649 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2650 		    device_get_nameunit(adapter->dev), cpu_id);
2651 		/*
2652 		** Set the bit to enable interrupt
2653 		** in E1000_IMS -- bits 20 and 21
2654 		** are for RX0 and RX1, note this has
2655 		** NOTHING to do with the MSIX vector
2656 		*/
2657 		rxr->ims = 1 << (20 + i);
2658 		adapter->ims |= rxr->ims;
2659 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2660 
2661 		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2662 	}
2663 
2664 	for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2665 		/* TX ring */
2666 		rid = vector + 1;
2667 		txr->res = bus_alloc_resource_any(dev,
2668 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2669 		if (txr->res == NULL) {
2670 			device_printf(dev,
2671 			    "Unable to allocate bus resource: "
2672 			    "TX MSIX Interrupt %d\n", i);
2673 			return (ENXIO);
2674 		}
2675 		if ((error = bus_setup_intr(dev, txr->res,
2676 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2677 		    txr, &txr->tag)) != 0) {
2678 			device_printf(dev, "Failed to register TX handler");
2679 			return (error);
2680 		}
2681 #if __FreeBSD_version >= 800504
2682 		bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2683 #endif
2684 		txr->msix = vector;
2685 
2686                 if (em_last_bind_cpu < 0)
2687                         em_last_bind_cpu = CPU_FIRST();
2688                 cpu_id = em_last_bind_cpu;
2689                 bus_bind_intr(dev, txr->res, cpu_id);
2690 
2691 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2692 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2693 		    taskqueue_thread_enqueue, &txr->tq);
2694 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2695 		    device_get_nameunit(adapter->dev), cpu_id);
2696 		/*
2697 		** Set the bit to enable interrupt
2698 		** in E1000_IMS -- bits 22 and 23
2699 		** are for TX0 and TX1, note this has
2700 		** NOTHING to do with the MSIX vector
2701 		*/
2702 		txr->ims = 1 << (22 + i);
2703 		adapter->ims |= txr->ims;
2704 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2705 
2706 		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2707 	}
2708 
2709 	/* Link interrupt */
2710 	rid = vector + 1;
2711 	adapter->res = bus_alloc_resource_any(dev,
2712 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2713 	if (!adapter->res) {
2714 		device_printf(dev,"Unable to allocate "
2715 		    "bus resource: Link interrupt [%d]\n", rid);
2716 		return (ENXIO);
2717         }
2718 	/* Set the link handler function */
2719 	error = bus_setup_intr(dev, adapter->res,
2720 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2721 	    em_msix_link, adapter, &adapter->tag);
2722 	if (error) {
2723 		adapter->res = NULL;
2724 		device_printf(dev, "Failed to register LINK handler");
2725 		return (error);
2726 	}
2727 #if __FreeBSD_version >= 800504
2728 	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2729 #endif
2730 	adapter->linkvec = vector;
2731 	adapter->ivars |=  (8 | vector) << 16;
2732 	adapter->ivars |= 0x80000000;
2733 
2734 	return (0);
2735 }
2736 
2737 
2738 static void
2739 em_free_pci_resources(struct adapter *adapter)
2740 {
2741 	device_t	dev = adapter->dev;
2742 	struct tx_ring	*txr;
2743 	struct rx_ring	*rxr;
2744 	int		rid;
2745 
2746 
2747 	/*
2748 	** Release all the queue interrupt resources:
2749 	*/
2750 	for (int i = 0; i < adapter->num_queues; i++) {
2751 		txr = &adapter->tx_rings[i];
2752 		/* an early abort? */
2753 		if (txr == NULL)
2754 			break;
2755 		rid = txr->msix +1;
2756 		if (txr->tag != NULL) {
2757 			bus_teardown_intr(dev, txr->res, txr->tag);
2758 			txr->tag = NULL;
2759 		}
2760 		if (txr->res != NULL)
2761 			bus_release_resource(dev, SYS_RES_IRQ,
2762 			    rid, txr->res);
2763 
2764 		rxr = &adapter->rx_rings[i];
2765 		/* an early abort? */
2766 		if (rxr == NULL)
2767 			break;
2768 		rid = rxr->msix +1;
2769 		if (rxr->tag != NULL) {
2770 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2771 			rxr->tag = NULL;
2772 		}
2773 		if (rxr->res != NULL)
2774 			bus_release_resource(dev, SYS_RES_IRQ,
2775 			    rid, rxr->res);
2776 	}
2777 
2778         if (adapter->linkvec) /* we are doing MSIX */
2779                 rid = adapter->linkvec + 1;
2780         else
2781                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2782 
2783 	if (adapter->tag != NULL) {
2784 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2785 		adapter->tag = NULL;
2786 	}
2787 
2788 	if (adapter->res != NULL)
2789 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2790 
2791 
2792 	if (adapter->msix)
2793 		pci_release_msi(dev);
2794 
2795 	if (adapter->msix_mem != NULL)
2796 		bus_release_resource(dev, SYS_RES_MEMORY,
2797 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2798 
2799 	if (adapter->memory != NULL)
2800 		bus_release_resource(dev, SYS_RES_MEMORY,
2801 		    PCIR_BAR(0), adapter->memory);
2802 
2803 	if (adapter->flash != NULL)
2804 		bus_release_resource(dev, SYS_RES_MEMORY,
2805 		    EM_FLASH, adapter->flash);
2806 }
2807 
2808 /*
2809  * Setup MSI or MSI/X
2810  */
2811 static int
2812 em_setup_msix(struct adapter *adapter)
2813 {
2814 	device_t dev = adapter->dev;
2815 	int val;
2816 
2817 	/* Nearly always going to use one queue */
2818 	adapter->num_queues = 1;
2819 
2820 	/*
2821 	** Try using MSI-X for Hartwell adapters
2822 	*/
2823 	if ((adapter->hw.mac.type == e1000_82574) &&
2824 	    (em_enable_msix == TRUE)) {
2825 #ifdef EM_MULTIQUEUE
2826 		adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2827 		if (adapter->num_queues > 1)
2828 			em_enable_vectors_82574(adapter);
2829 #endif
2830 		/* Map the MSIX BAR */
2831 		int rid = PCIR_BAR(EM_MSIX_BAR);
2832 		adapter->msix_mem = bus_alloc_resource_any(dev,
2833 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2834        		if (adapter->msix_mem == NULL) {
2835 			/* May not be enabled */
2836                		device_printf(adapter->dev,
2837 			    "Unable to map MSIX table \n");
2838 			goto msi;
2839        		}
2840 		val = pci_msix_count(dev);
2841 
2842 #ifdef EM_MULTIQUEUE
2843 		/* We need 5 vectors in the multiqueue case */
2844 		if (adapter->num_queues > 1 ) {
2845 			if (val >= 5)
2846 				val = 5;
2847 			else {
2848 				adapter->num_queues = 1;
2849 				device_printf(adapter->dev,
2850 				    "Insufficient MSIX vectors for >1 queue, "
2851 				    "using single queue...\n");
2852 				goto msix_one;
2853 			}
2854 		} else {
2855 msix_one:
2856 #endif
2857 			if (val >= 3)
2858 				val = 3;
2859 			else {
2860 				device_printf(adapter->dev,
2861 			    	"Insufficient MSIX vectors, using MSI\n");
2862 				goto msi;
2863 			}
2864 #ifdef EM_MULTIQUEUE
2865 		}
2866 #endif
2867 
2868 		if ((pci_alloc_msix(dev, &val) == 0)) {
2869 			device_printf(adapter->dev,
2870 			    "Using MSIX interrupts "
2871 			    "with %d vectors\n", val);
2872 			return (val);
2873 		}
2874 
2875 		/*
2876 		** If MSIX alloc failed or provided us with
2877 		** less than needed, free and fall through to MSI
2878 		*/
2879 		pci_release_msi(dev);
2880 	}
2881 msi:
2882 	if (adapter->msix_mem != NULL) {
2883 		bus_release_resource(dev, SYS_RES_MEMORY,
2884 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2885 		adapter->msix_mem = NULL;
2886 	}
2887        	val = 1;
2888        	if (pci_alloc_msi(dev, &val) == 0) {
2889                	device_printf(adapter->dev, "Using an MSI interrupt\n");
2890 		return (val);
2891 	}
2892 	/* Should only happen due to manual configuration */
2893 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2894 	return (0);
2895 }
2896 
2897 
2898 /*
2899 ** The 3 following flush routines are used as a workaround in the
2900 ** I219 client parts and only for them.
2901 **
2902 ** em_flush_tx_ring - remove all descriptors from the tx_ring
2903 **
2904 ** We want to clear all pending descriptors from the TX ring.
2905 ** zeroing happens when the HW reads the regs. We  assign the ring itself as
2906 ** the data of the next descriptor. We don't care about the data we are about
2907 ** to reset the HW.
2908 */
2909 static void
2910 em_flush_tx_ring(struct adapter *adapter)
2911 {
2912 	struct e1000_hw		*hw = &adapter->hw;
2913 	struct tx_ring		*txr = adapter->tx_rings;
2914 	struct e1000_tx_desc	*txd;
2915 	u32			tctl, txd_lower = E1000_TXD_CMD_IFCS;
2916 	u16			size = 512;
2917 
2918 	tctl = E1000_READ_REG(hw, E1000_TCTL);
2919 	E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
2920 
2921 	txd = &txr->tx_base[txr->next_avail_desc++];
2922 	if (txr->next_avail_desc == adapter->num_tx_desc)
2923 		txr->next_avail_desc = 0;
2924 
2925 	/* Just use the ring as a dummy buffer addr */
2926 	txd->buffer_addr = txr->txdma.dma_paddr;
2927 	txd->lower.data = htole32(txd_lower | size);
2928 	txd->upper.data = 0;
2929 
2930 	/* flush descriptors to memory before notifying the HW */
2931 	wmb();
2932 
2933 	E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc);
2934 	mb();
2935 	usec_delay(250);
2936 }
2937 
2938 /*
2939 ** em_flush_rx_ring - remove all descriptors from the rx_ring
2940 **
2941 ** Mark all descriptors in the RX ring as consumed and disable the rx ring
2942 */
2943 static void
2944 em_flush_rx_ring(struct adapter *adapter)
2945 {
2946 	struct e1000_hw	*hw = &adapter->hw;
2947 	u32		rctl, rxdctl;
2948 
2949 	rctl = E1000_READ_REG(hw, E1000_RCTL);
2950 	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2951 	E1000_WRITE_FLUSH(hw);
2952 	usec_delay(150);
2953 
2954 	rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
2955 	/* zero the lower 14 bits (prefetch and host thresholds) */
2956 	rxdctl &= 0xffffc000;
2957 	/*
2958 	 * update thresholds: prefetch threshold to 31, host threshold to 1
2959 	 * and make sure the granularity is "descriptors" and not "cache lines"
2960 	 */
2961 	rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
2962 	E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl);
2963 
2964 	/* momentarily enable the RX ring for the changes to take effect */
2965 	E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
2966 	E1000_WRITE_FLUSH(hw);
2967 	usec_delay(150);
2968 	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2969 }
2970 
2971 /*
2972 ** em_flush_desc_rings - remove all descriptors from the descriptor rings
2973 **
2974 ** In i219, the descriptor rings must be emptied before resetting the HW
2975 ** or before changing the device state to D3 during runtime (runtime PM).
2976 **
2977 ** Failure to do this will cause the HW to enter a unit hang state which can
2978 ** only be released by PCI reset on the device
2979 **
2980 */
2981 static void
2982 em_flush_desc_rings(struct adapter *adapter)
2983 {
2984 	struct e1000_hw	*hw = &adapter->hw;
2985 	device_t	dev = adapter->dev;
2986 	u16		hang_state;
2987 	u32		fext_nvm11, tdlen;
2988 
2989 	/* First, disable MULR fix in FEXTNVM11 */
2990 	fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
2991 	fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
2992 	E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11);
2993 
2994 	/* do nothing if we're not in faulty state, or if the queue is empty */
2995 	tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
2996 	hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
2997 	if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
2998 		return;
2999 	em_flush_tx_ring(adapter);
3000 
3001 	/* recheck, maybe the fault is caused by the rx ring */
3002 	hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3003 	if (hang_state & FLUSH_DESC_REQUIRED)
3004 		em_flush_rx_ring(adapter);
3005 }
3006 
3007 
3008 /*********************************************************************
3009  *
3010  *  Initialize the hardware to a configuration
3011  *  as specified by the adapter structure.
3012  *
3013  **********************************************************************/
3014 static void
3015 em_reset(struct adapter *adapter)
3016 {
3017 	device_t	dev = adapter->dev;
3018 	if_t ifp = adapter->ifp;
3019 	struct e1000_hw	*hw = &adapter->hw;
3020 	u16		rx_buffer_size;
3021 	u32		pba;
3022 
3023 	INIT_DEBUGOUT("em_reset: begin");
3024 
3025 	/* Set up smart power down as default off on newer adapters. */
3026 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
3027 	    hw->mac.type == e1000_82572)) {
3028 		u16 phy_tmp = 0;
3029 
3030 		/* Speed up time to link by disabling smart power down. */
3031 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
3032 		phy_tmp &= ~IGP02E1000_PM_SPD;
3033 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
3034 	}
3035 
3036 	/*
3037 	 * Packet Buffer Allocation (PBA)
3038 	 * Writing PBA sets the receive portion of the buffer
3039 	 * the remainder is used for the transmit buffer.
3040 	 */
3041 	switch (hw->mac.type) {
3042 	/* Total Packet Buffer on these is 48K */
3043 	case e1000_82571:
3044 	case e1000_82572:
3045 	case e1000_80003es2lan:
3046 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
3047 		break;
3048 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
3049 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
3050 		break;
3051 	case e1000_82574:
3052 	case e1000_82583:
3053 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
3054 		break;
3055 	case e1000_ich8lan:
3056 		pba = E1000_PBA_8K;
3057 		break;
3058 	case e1000_ich9lan:
3059 	case e1000_ich10lan:
3060 		/* Boost Receive side for jumbo frames */
3061 		if (adapter->hw.mac.max_frame_size > 4096)
3062 			pba = E1000_PBA_14K;
3063 		else
3064 			pba = E1000_PBA_10K;
3065 		break;
3066 	case e1000_pchlan:
3067 	case e1000_pch2lan:
3068 	case e1000_pch_lpt:
3069 	case e1000_pch_spt:
3070 		pba = E1000_PBA_26K;
3071 		break;
3072 	default:
3073 		if (adapter->hw.mac.max_frame_size > 8192)
3074 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
3075 		else
3076 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
3077 	}
3078 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
3079 
3080 	/*
3081 	 * These parameters control the automatic generation (Tx) and
3082 	 * response (Rx) to Ethernet PAUSE frames.
3083 	 * - High water mark should allow for at least two frames to be
3084 	 *   received after sending an XOFF.
3085 	 * - Low water mark works best when it is very near the high water mark.
3086 	 *   This allows the receiver to restart by sending XON when it has
3087 	 *   drained a bit. Here we use an arbitrary value of 1500 which will
3088 	 *   restart after one full frame is pulled from the buffer. There
3089 	 *   could be several smaller frames in the buffer and if so they will
3090 	 *   not trigger the XON until their total number reduces the buffer
3091 	 *   by 1500.
3092 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
3093 	 */
3094 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
3095 	hw->fc.high_water = rx_buffer_size -
3096 	    roundup2(adapter->hw.mac.max_frame_size, 1024);
3097 	hw->fc.low_water = hw->fc.high_water - 1500;
3098 
3099 	if (adapter->fc) /* locally set flow control value? */
3100 		hw->fc.requested_mode = adapter->fc;
3101 	else
3102 		hw->fc.requested_mode = e1000_fc_full;
3103 
3104 	if (hw->mac.type == e1000_80003es2lan)
3105 		hw->fc.pause_time = 0xFFFF;
3106 	else
3107 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
3108 
3109 	hw->fc.send_xon = TRUE;
3110 
3111 	/* Device specific overrides/settings */
3112 	switch (hw->mac.type) {
3113 	case e1000_pchlan:
3114 		/* Workaround: no TX flow ctrl for PCH */
3115                 hw->fc.requested_mode = e1000_fc_rx_pause;
3116 		hw->fc.pause_time = 0xFFFF; /* override */
3117 		if (if_getmtu(ifp) > ETHERMTU) {
3118 			hw->fc.high_water = 0x3500;
3119 			hw->fc.low_water = 0x1500;
3120 		} else {
3121 			hw->fc.high_water = 0x5000;
3122 			hw->fc.low_water = 0x3000;
3123 		}
3124 		hw->fc.refresh_time = 0x1000;
3125 		break;
3126 	case e1000_pch2lan:
3127 	case e1000_pch_lpt:
3128 	case e1000_pch_spt:
3129 		hw->fc.high_water = 0x5C20;
3130 		hw->fc.low_water = 0x5048;
3131 		hw->fc.pause_time = 0x0650;
3132 		hw->fc.refresh_time = 0x0400;
3133 		/* Jumbos need adjusted PBA */
3134 		if (if_getmtu(ifp) > ETHERMTU)
3135 			E1000_WRITE_REG(hw, E1000_PBA, 12);
3136 		else
3137 			E1000_WRITE_REG(hw, E1000_PBA, 26);
3138 		break;
3139         case e1000_ich9lan:
3140         case e1000_ich10lan:
3141 		if (if_getmtu(ifp) > ETHERMTU) {
3142 			hw->fc.high_water = 0x2800;
3143 			hw->fc.low_water = hw->fc.high_water - 8;
3144 			break;
3145 		}
3146 		/* else fall thru */
3147 	default:
3148 		if (hw->mac.type == e1000_80003es2lan)
3149 			hw->fc.pause_time = 0xFFFF;
3150 		break;
3151 	}
3152 
3153 	/* I219 needs some special flushing to avoid hangs */
3154 	if (hw->mac.type == e1000_pch_spt)
3155 		em_flush_desc_rings(adapter);
3156 
3157 	/* Issue a global reset */
3158 	e1000_reset_hw(hw);
3159 	E1000_WRITE_REG(hw, E1000_WUC, 0);
3160 	em_disable_aspm(adapter);
3161 	/* and a re-init */
3162 	if (e1000_init_hw(hw) < 0) {
3163 		device_printf(dev, "Hardware Initialization Failed\n");
3164 		return;
3165 	}
3166 
3167 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3168 	e1000_get_phy_info(hw);
3169 	e1000_check_for_link(hw);
3170 	return;
3171 }
3172 
3173 /*********************************************************************
3174  *
3175  *  Setup networking device structure and register an interface.
3176  *
3177  **********************************************************************/
3178 static int
3179 em_setup_interface(device_t dev, struct adapter *adapter)
3180 {
3181 	if_t ifp;
3182 
3183 	INIT_DEBUGOUT("em_setup_interface: begin");
3184 
3185 	ifp = adapter->ifp = if_gethandle(IFT_ETHER);
3186 	if (ifp == 0) {
3187 		device_printf(dev, "can not allocate ifnet structure\n");
3188 		return (-1);
3189 	}
3190 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3191 	if_setdev(ifp, dev);
3192 	if_setinitfn(ifp, em_init);
3193 	if_setsoftc(ifp, adapter);
3194 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
3195 	if_setioctlfn(ifp, em_ioctl);
3196 	if_setgetcounterfn(ifp, em_get_counter);
3197 
3198 	/* TSO parameters */
3199 	ifp->if_hw_tsomax = IP_MAXPACKET;
3200 	/* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */
3201 	ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5;
3202 	ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
3203 
3204 #ifdef EM_MULTIQUEUE
3205 	/* Multiqueue stack interface */
3206 	if_settransmitfn(ifp, em_mq_start);
3207 	if_setqflushfn(ifp, em_qflush);
3208 #else
3209 	if_setstartfn(ifp, em_start);
3210 	if_setsendqlen(ifp, adapter->num_tx_desc - 1);
3211 	if_setsendqready(ifp);
3212 #endif
3213 
3214 	ether_ifattach(ifp, adapter->hw.mac.addr);
3215 
3216 	if_setcapabilities(ifp, 0);
3217 	if_setcapenable(ifp, 0);
3218 
3219 
3220 	if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM |
3221 	    IFCAP_TSO4, 0);
3222 	/*
3223 	 * Tell the upper layer(s) we
3224 	 * support full VLAN capability
3225 	 */
3226 	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
3227 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
3228 	    IFCAP_VLAN_MTU, 0);
3229 	if_setcapenable(ifp, if_getcapabilities(ifp));
3230 
3231 	/*
3232 	** Don't turn this on by default, if vlans are
3233 	** created on another pseudo device (eg. lagg)
3234 	** then vlan events are not passed thru, breaking
3235 	** operation, but with HW FILTER off it works. If
3236 	** using vlans directly on the em driver you can
3237 	** enable this and get full hardware tag filtering.
3238 	*/
3239 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
3240 
3241 #ifdef DEVICE_POLLING
3242 	if_setcapabilitiesbit(ifp, IFCAP_POLLING,0);
3243 #endif
3244 
3245 	/* Enable only WOL MAGIC by default */
3246 	if (adapter->wol) {
3247 		if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
3248 		if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
3249 	}
3250 
3251 	/*
3252 	 * Specify the media types supported by this adapter and register
3253 	 * callbacks to update media and link information
3254 	 */
3255 	ifmedia_init(&adapter->media, IFM_IMASK,
3256 	    em_media_change, em_media_status);
3257 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3258 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3259 		u_char fiber_type = IFM_1000_SX;	/* default type */
3260 
3261 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3262 			    0, NULL);
3263 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3264 	} else {
3265 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3266 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3267 			    0, NULL);
3268 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3269 			    0, NULL);
3270 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3271 			    0, NULL);
3272 		if (adapter->hw.phy.type != e1000_phy_ife) {
3273 			ifmedia_add(&adapter->media,
3274 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3275 			ifmedia_add(&adapter->media,
3276 				IFM_ETHER | IFM_1000_T, 0, NULL);
3277 		}
3278 	}
3279 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3280 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3281 	return (0);
3282 }
3283 
3284 
3285 /*
3286  * Manage DMA'able memory.
3287  */
3288 static void
3289 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3290 {
3291 	if (error)
3292 		return;
3293 	*(bus_addr_t *) arg = segs[0].ds_addr;
3294 }
3295 
3296 static int
3297 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3298         struct em_dma_alloc *dma, int mapflags)
3299 {
3300 	int error;
3301 
3302 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3303 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3304 				BUS_SPACE_MAXADDR,	/* lowaddr */
3305 				BUS_SPACE_MAXADDR,	/* highaddr */
3306 				NULL, NULL,		/* filter, filterarg */
3307 				size,			/* maxsize */
3308 				1,			/* nsegments */
3309 				size,			/* maxsegsize */
3310 				0,			/* flags */
3311 				NULL,			/* lockfunc */
3312 				NULL,			/* lockarg */
3313 				&dma->dma_tag);
3314 	if (error) {
3315 		device_printf(adapter->dev,
3316 		    "%s: bus_dma_tag_create failed: %d\n",
3317 		    __func__, error);
3318 		goto fail_0;
3319 	}
3320 
3321 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3322 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3323 	if (error) {
3324 		device_printf(adapter->dev,
3325 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3326 		    __func__, (uintmax_t)size, error);
3327 		goto fail_2;
3328 	}
3329 
3330 	dma->dma_paddr = 0;
3331 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3332 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3333 	if (error || dma->dma_paddr == 0) {
3334 		device_printf(adapter->dev,
3335 		    "%s: bus_dmamap_load failed: %d\n",
3336 		    __func__, error);
3337 		goto fail_3;
3338 	}
3339 
3340 	return (0);
3341 
3342 fail_3:
3343 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3344 fail_2:
3345 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3346 	bus_dma_tag_destroy(dma->dma_tag);
3347 fail_0:
3348 	dma->dma_tag = NULL;
3349 
3350 	return (error);
3351 }
3352 
3353 static void
3354 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3355 {
3356 	if (dma->dma_tag == NULL)
3357 		return;
3358 	if (dma->dma_paddr != 0) {
3359 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3360 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3361 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3362 		dma->dma_paddr = 0;
3363 	}
3364 	if (dma->dma_vaddr != NULL) {
3365 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3366 		dma->dma_vaddr = NULL;
3367 	}
3368 	bus_dma_tag_destroy(dma->dma_tag);
3369 	dma->dma_tag = NULL;
3370 }
3371 
3372 
3373 /*********************************************************************
3374  *
3375  *  Allocate memory for the transmit and receive rings, and then
3376  *  the descriptors associated with each, called only once at attach.
3377  *
3378  **********************************************************************/
3379 static int
3380 em_allocate_queues(struct adapter *adapter)
3381 {
3382 	device_t		dev = adapter->dev;
3383 	struct tx_ring		*txr = NULL;
3384 	struct rx_ring		*rxr = NULL;
3385 	int rsize, tsize, error = E1000_SUCCESS;
3386 	int txconf = 0, rxconf = 0;
3387 
3388 
3389 	/* Allocate the TX ring struct memory */
3390 	if (!(adapter->tx_rings =
3391 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3392 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3393 		device_printf(dev, "Unable to allocate TX ring memory\n");
3394 		error = ENOMEM;
3395 		goto fail;
3396 	}
3397 
3398 	/* Now allocate the RX */
3399 	if (!(adapter->rx_rings =
3400 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3401 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3402 		device_printf(dev, "Unable to allocate RX ring memory\n");
3403 		error = ENOMEM;
3404 		goto rx_fail;
3405 	}
3406 
3407 	tsize = roundup2(adapter->num_tx_desc *
3408 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3409 	/*
3410 	 * Now set up the TX queues, txconf is needed to handle the
3411 	 * possibility that things fail midcourse and we need to
3412 	 * undo memory gracefully
3413 	 */
3414 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3415 		/* Set up some basics */
3416 		txr = &adapter->tx_rings[i];
3417 		txr->adapter = adapter;
3418 		txr->me = i;
3419 
3420 		/* Initialize the TX lock */
3421 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3422 		    device_get_nameunit(dev), txr->me);
3423 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3424 
3425 		if (em_dma_malloc(adapter, tsize,
3426 			&txr->txdma, BUS_DMA_NOWAIT)) {
3427 			device_printf(dev,
3428 			    "Unable to allocate TX Descriptor memory\n");
3429 			error = ENOMEM;
3430 			goto err_tx_desc;
3431 		}
3432 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3433 		bzero((void *)txr->tx_base, tsize);
3434 
3435         	if (em_allocate_transmit_buffers(txr)) {
3436 			device_printf(dev,
3437 			    "Critical Failure setting up transmit buffers\n");
3438 			error = ENOMEM;
3439 			goto err_tx_desc;
3440         	}
3441 #if __FreeBSD_version >= 800000
3442 		/* Allocate a buf ring */
3443 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3444 		    M_WAITOK, &txr->tx_mtx);
3445 #endif
3446 	}
3447 
3448 	/*
3449 	 * Next the RX queues...
3450 	 */
3451 	rsize = roundup2(adapter->num_rx_desc *
3452 	    sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
3453 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3454 		rxr = &adapter->rx_rings[i];
3455 		rxr->adapter = adapter;
3456 		rxr->me = i;
3457 
3458 		/* Initialize the RX lock */
3459 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3460 		    device_get_nameunit(dev), txr->me);
3461 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3462 
3463 		if (em_dma_malloc(adapter, rsize,
3464 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3465 			device_printf(dev,
3466 			    "Unable to allocate RxDescriptor memory\n");
3467 			error = ENOMEM;
3468 			goto err_rx_desc;
3469 		}
3470 		rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr;
3471 		bzero((void *)rxr->rx_base, rsize);
3472 
3473         	/* Allocate receive buffers for the ring*/
3474 		if (em_allocate_receive_buffers(rxr)) {
3475 			device_printf(dev,
3476 			    "Critical Failure setting up receive buffers\n");
3477 			error = ENOMEM;
3478 			goto err_rx_desc;
3479 		}
3480 	}
3481 
3482 	return (0);
3483 
3484 err_rx_desc:
3485 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3486 		em_dma_free(adapter, &rxr->rxdma);
3487 err_tx_desc:
3488 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3489 		em_dma_free(adapter, &txr->txdma);
3490 	free(adapter->rx_rings, M_DEVBUF);
3491 rx_fail:
3492 #if __FreeBSD_version >= 800000
3493 	buf_ring_free(txr->br, M_DEVBUF);
3494 #endif
3495 	free(adapter->tx_rings, M_DEVBUF);
3496 fail:
3497 	return (error);
3498 }
3499 
3500 
3501 /*********************************************************************
3502  *
3503  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3504  *  the information needed to transmit a packet on the wire. This is
3505  *  called only once at attach, setup is done every reset.
3506  *
3507  **********************************************************************/
3508 static int
3509 em_allocate_transmit_buffers(struct tx_ring *txr)
3510 {
3511 	struct adapter *adapter = txr->adapter;
3512 	device_t dev = adapter->dev;
3513 	struct em_txbuffer *txbuf;
3514 	int error, i;
3515 
3516 	/*
3517 	 * Setup DMA descriptor areas.
3518 	 */
3519 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3520 			       1, 0,			/* alignment, bounds */
3521 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3522 			       BUS_SPACE_MAXADDR,	/* highaddr */
3523 			       NULL, NULL,		/* filter, filterarg */
3524 			       EM_TSO_SIZE,		/* maxsize */
3525 			       EM_MAX_SCATTER,		/* nsegments */
3526 			       PAGE_SIZE,		/* maxsegsize */
3527 			       0,			/* flags */
3528 			       NULL,			/* lockfunc */
3529 			       NULL,			/* lockfuncarg */
3530 			       &txr->txtag))) {
3531 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3532 		goto fail;
3533 	}
3534 
3535 	if (!(txr->tx_buffers =
3536 	    (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) *
3537 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3538 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3539 		error = ENOMEM;
3540 		goto fail;
3541 	}
3542 
3543         /* Create the descriptor buffer dma maps */
3544 	txbuf = txr->tx_buffers;
3545 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3546 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3547 		if (error != 0) {
3548 			device_printf(dev, "Unable to create TX DMA map\n");
3549 			goto fail;
3550 		}
3551 	}
3552 
3553 	return 0;
3554 fail:
3555 	/* We free all, it handles case where we are in the middle */
3556 	em_free_transmit_structures(adapter);
3557 	return (error);
3558 }
3559 
3560 /*********************************************************************
3561  *
3562  *  Initialize a transmit ring.
3563  *
3564  **********************************************************************/
3565 static void
3566 em_setup_transmit_ring(struct tx_ring *txr)
3567 {
3568 	struct adapter *adapter = txr->adapter;
3569 	struct em_txbuffer *txbuf;
3570 	int i;
3571 #ifdef DEV_NETMAP
3572 	struct netmap_slot *slot;
3573 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
3574 #endif /* DEV_NETMAP */
3575 
3576 	/* Clear the old descriptor contents */
3577 	EM_TX_LOCK(txr);
3578 #ifdef DEV_NETMAP
3579 	slot = netmap_reset(na, NR_TX, txr->me, 0);
3580 #endif /* DEV_NETMAP */
3581 
3582 	bzero((void *)txr->tx_base,
3583 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3584 	/* Reset indices */
3585 	txr->next_avail_desc = 0;
3586 	txr->next_to_clean = 0;
3587 
3588 	/* Free any existing tx buffers. */
3589         txbuf = txr->tx_buffers;
3590 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3591 		if (txbuf->m_head != NULL) {
3592 			bus_dmamap_sync(txr->txtag, txbuf->map,
3593 			    BUS_DMASYNC_POSTWRITE);
3594 			bus_dmamap_unload(txr->txtag, txbuf->map);
3595 			m_freem(txbuf->m_head);
3596 			txbuf->m_head = NULL;
3597 		}
3598 #ifdef DEV_NETMAP
3599 		if (slot) {
3600 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3601 			uint64_t paddr;
3602 			void *addr;
3603 
3604 			addr = PNMB(na, slot + si, &paddr);
3605 			txr->tx_base[i].buffer_addr = htole64(paddr);
3606 			/* reload the map for netmap mode */
3607 			netmap_load_map(na, txr->txtag, txbuf->map, addr);
3608 		}
3609 #endif /* DEV_NETMAP */
3610 
3611 		/* clear the watch index */
3612 		txbuf->next_eop = -1;
3613         }
3614 
3615 	/* Set number of descriptors available */
3616 	txr->tx_avail = adapter->num_tx_desc;
3617 	txr->busy = EM_TX_IDLE;
3618 
3619 	/* Clear checksum offload context. */
3620 	txr->last_hw_offload = 0;
3621 	txr->last_hw_ipcss = 0;
3622 	txr->last_hw_ipcso = 0;
3623 	txr->last_hw_tucss = 0;
3624 	txr->last_hw_tucso = 0;
3625 
3626 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3627 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3628 	EM_TX_UNLOCK(txr);
3629 }
3630 
3631 /*********************************************************************
3632  *
3633  *  Initialize all transmit rings.
3634  *
3635  **********************************************************************/
3636 static void
3637 em_setup_transmit_structures(struct adapter *adapter)
3638 {
3639 	struct tx_ring *txr = adapter->tx_rings;
3640 
3641 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3642 		em_setup_transmit_ring(txr);
3643 
3644 	return;
3645 }
3646 
3647 /*********************************************************************
3648  *
3649  *  Enable transmit unit.
3650  *
3651  **********************************************************************/
3652 static void
3653 em_initialize_transmit_unit(struct adapter *adapter)
3654 {
3655 	struct tx_ring	*txr = adapter->tx_rings;
3656 	struct e1000_hw	*hw = &adapter->hw;
3657 	u32	tctl, txdctl = 0, tarc, tipg = 0;
3658 
3659 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3660 
3661 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3662 		u64 bus_addr = txr->txdma.dma_paddr;
3663 		/* Base and Len of TX Ring */
3664 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3665 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3666 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3667 	    	    (u32)(bus_addr >> 32));
3668 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3669 	    	    (u32)bus_addr);
3670 		/* Init the HEAD/TAIL indices */
3671 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3672 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3673 
3674 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3675 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3676 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3677 
3678 		txr->busy = EM_TX_IDLE;
3679 		txdctl = 0; /* clear txdctl */
3680                 txdctl |= 0x1f; /* PTHRESH */
3681                 txdctl |= 1 << 8; /* HTHRESH */
3682                 txdctl |= 1 << 16;/* WTHRESH */
3683 		txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3684 		txdctl |= E1000_TXDCTL_GRAN;
3685                 txdctl |= 1 << 25; /* LWTHRESH */
3686 
3687                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3688 	}
3689 
3690 	/* Set the default values for the Tx Inter Packet Gap timer */
3691 	switch (adapter->hw.mac.type) {
3692 	case e1000_80003es2lan:
3693 		tipg = DEFAULT_82543_TIPG_IPGR1;
3694 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3695 		    E1000_TIPG_IPGR2_SHIFT;
3696 		break;
3697 	default:
3698 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3699 		    (adapter->hw.phy.media_type ==
3700 		    e1000_media_type_internal_serdes))
3701 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3702 		else
3703 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3704 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3705 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3706 	}
3707 
3708 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3709 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3710 
3711 	if(adapter->hw.mac.type >= e1000_82540)
3712 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3713 		    adapter->tx_abs_int_delay.value);
3714 
3715 	if ((adapter->hw.mac.type == e1000_82571) ||
3716 	    (adapter->hw.mac.type == e1000_82572)) {
3717 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3718 		tarc |= TARC_SPEED_MODE_BIT;
3719 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3720 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3721 		/* errata: program both queues to unweighted RR */
3722 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3723 		tarc |= 1;
3724 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3725 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3726 		tarc |= 1;
3727 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3728 	} else if (adapter->hw.mac.type == e1000_82574) {
3729 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3730 		tarc |= TARC_ERRATA_BIT;
3731 		if ( adapter->num_queues > 1) {
3732 			tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3733 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3734 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3735 		} else
3736 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3737 	}
3738 
3739 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3740 	if (adapter->tx_int_delay.value > 0)
3741 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3742 
3743 	/* Program the Transmit Control Register */
3744 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3745 	tctl &= ~E1000_TCTL_CT;
3746 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3747 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3748 
3749 	if (adapter->hw.mac.type >= e1000_82571)
3750 		tctl |= E1000_TCTL_MULR;
3751 
3752 	/* This write will effectively turn on the transmit unit. */
3753 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3754 
3755 	if (hw->mac.type == e1000_pch_spt) {
3756 		u32 reg;
3757 		reg = E1000_READ_REG(hw, E1000_IOSFPC);
3758 		reg |= E1000_RCTL_RDMTS_HEX;
3759 		E1000_WRITE_REG(hw, E1000_IOSFPC, reg);
3760 		reg = E1000_READ_REG(hw, E1000_TARC(0));
3761 		reg |= E1000_TARC0_CB_MULTIQ_3_REQ;
3762 		E1000_WRITE_REG(hw, E1000_TARC(0), reg);
3763 	}
3764 }
3765 
3766 
3767 /*********************************************************************
3768  *
3769  *  Free all transmit rings.
3770  *
3771  **********************************************************************/
3772 static void
3773 em_free_transmit_structures(struct adapter *adapter)
3774 {
3775 	struct tx_ring *txr = adapter->tx_rings;
3776 
3777 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3778 		EM_TX_LOCK(txr);
3779 		em_free_transmit_buffers(txr);
3780 		em_dma_free(adapter, &txr->txdma);
3781 		EM_TX_UNLOCK(txr);
3782 		EM_TX_LOCK_DESTROY(txr);
3783 	}
3784 
3785 	free(adapter->tx_rings, M_DEVBUF);
3786 }
3787 
3788 /*********************************************************************
3789  *
3790  *  Free transmit ring related data structures.
3791  *
3792  **********************************************************************/
3793 static void
3794 em_free_transmit_buffers(struct tx_ring *txr)
3795 {
3796 	struct adapter		*adapter = txr->adapter;
3797 	struct em_txbuffer	*txbuf;
3798 
3799 	INIT_DEBUGOUT("free_transmit_ring: begin");
3800 
3801 	if (txr->tx_buffers == NULL)
3802 		return;
3803 
3804 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3805 		txbuf = &txr->tx_buffers[i];
3806 		if (txbuf->m_head != NULL) {
3807 			bus_dmamap_sync(txr->txtag, txbuf->map,
3808 			    BUS_DMASYNC_POSTWRITE);
3809 			bus_dmamap_unload(txr->txtag,
3810 			    txbuf->map);
3811 			m_freem(txbuf->m_head);
3812 			txbuf->m_head = NULL;
3813 			if (txbuf->map != NULL) {
3814 				bus_dmamap_destroy(txr->txtag,
3815 				    txbuf->map);
3816 				txbuf->map = NULL;
3817 			}
3818 		} else if (txbuf->map != NULL) {
3819 			bus_dmamap_unload(txr->txtag,
3820 			    txbuf->map);
3821 			bus_dmamap_destroy(txr->txtag,
3822 			    txbuf->map);
3823 			txbuf->map = NULL;
3824 		}
3825 	}
3826 #if __FreeBSD_version >= 800000
3827 	if (txr->br != NULL)
3828 		buf_ring_free(txr->br, M_DEVBUF);
3829 #endif
3830 	if (txr->tx_buffers != NULL) {
3831 		free(txr->tx_buffers, M_DEVBUF);
3832 		txr->tx_buffers = NULL;
3833 	}
3834 	if (txr->txtag != NULL) {
3835 		bus_dma_tag_destroy(txr->txtag);
3836 		txr->txtag = NULL;
3837 	}
3838 	return;
3839 }
3840 
3841 
3842 /*********************************************************************
3843  *  The offload context is protocol specific (TCP/UDP) and thus
3844  *  only needs to be set when the protocol changes. The occasion
3845  *  of a context change can be a performance detriment, and
3846  *  might be better just disabled. The reason arises in the way
3847  *  in which the controller supports pipelined requests from the
3848  *  Tx data DMA. Up to four requests can be pipelined, and they may
3849  *  belong to the same packet or to multiple packets. However all
3850  *  requests for one packet are issued before a request is issued
3851  *  for a subsequent packet and if a request for the next packet
3852  *  requires a context change, that request will be stalled
3853  *  until the previous request completes. This means setting up
3854  *  a new context effectively disables pipelined Tx data DMA which
3855  *  in turn greatly slow down performance to send small sized
3856  *  frames.
3857  **********************************************************************/
3858 static void
3859 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3860     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3861 {
3862 	struct adapter			*adapter = txr->adapter;
3863 	struct e1000_context_desc	*TXD = NULL;
3864 	struct em_txbuffer		*tx_buffer;
3865 	int				cur, hdr_len;
3866 	u32				cmd = 0;
3867 	u16				offload = 0;
3868 	u8				ipcso, ipcss, tucso, tucss;
3869 
3870 	ipcss = ipcso = tucss = tucso = 0;
3871 	hdr_len = ip_off + (ip->ip_hl << 2);
3872 	cur = txr->next_avail_desc;
3873 
3874 	/* Setup of IP header checksum. */
3875 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3876 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3877 		offload |= CSUM_IP;
3878 		ipcss = ip_off;
3879 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3880 		/*
3881 		 * Start offset for header checksum calculation.
3882 		 * End offset for header checksum calculation.
3883 		 * Offset of place to put the checksum.
3884 		 */
3885 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3886 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3887 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3888 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3889 		cmd |= E1000_TXD_CMD_IP;
3890 	}
3891 
3892 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3893  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3894  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3895  		offload |= CSUM_TCP;
3896  		tucss = hdr_len;
3897  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3898 		/*
3899 		 * The 82574L can only remember the *last* context used
3900 		 * regardless of queue that it was use for.  We cannot reuse
3901 		 * contexts on this hardware platform and must generate a new
3902 		 * context every time.  82574L hardware spec, section 7.2.6,
3903 		 * second note.
3904 		 */
3905 		if (adapter->num_queues < 2) {
3906  			/*
3907  		 	* Setting up new checksum offload context for every
3908 			* frames takes a lot of processing time for hardware.
3909 			* This also reduces performance a lot for small sized
3910 			* frames so avoid it if driver can use previously
3911 			* configured checksum offload context.
3912  		 	*/
3913  			if (txr->last_hw_offload == offload) {
3914  				if (offload & CSUM_IP) {
3915  					if (txr->last_hw_ipcss == ipcss &&
3916  				    	txr->last_hw_ipcso == ipcso &&
3917  				    	txr->last_hw_tucss == tucss &&
3918  				    	txr->last_hw_tucso == tucso)
3919  						return;
3920  				} else {
3921  					if (txr->last_hw_tucss == tucss &&
3922  				    	txr->last_hw_tucso == tucso)
3923  						return;
3924  				}
3925   			}
3926  			txr->last_hw_offload = offload;
3927  			txr->last_hw_tucss = tucss;
3928  			txr->last_hw_tucso = tucso;
3929 		}
3930  		/*
3931  		 * Start offset for payload checksum calculation.
3932  		 * End offset for payload checksum calculation.
3933  		 * Offset of place to put the checksum.
3934  		 */
3935 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3936  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3937  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3938  		TXD->upper_setup.tcp_fields.tucso = tucso;
3939  		cmd |= E1000_TXD_CMD_TCP;
3940  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3941  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3942  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3943  		tucss = hdr_len;
3944  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3945 		/*
3946 		 * The 82574L can only remember the *last* context used
3947 		 * regardless of queue that it was use for.  We cannot reuse
3948 		 * contexts on this hardware platform and must generate a new
3949 		 * context every time.  82574L hardware spec, section 7.2.6,
3950 		 * second note.
3951 		 */
3952 		if (adapter->num_queues < 2) {
3953  			/*
3954  		 	* Setting up new checksum offload context for every
3955 			* frames takes a lot of processing time for hardware.
3956 			* This also reduces performance a lot for small sized
3957 			* frames so avoid it if driver can use previously
3958 			* configured checksum offload context.
3959  		 	*/
3960  			if (txr->last_hw_offload == offload) {
3961  				if (offload & CSUM_IP) {
3962  					if (txr->last_hw_ipcss == ipcss &&
3963  				    	txr->last_hw_ipcso == ipcso &&
3964  				    	txr->last_hw_tucss == tucss &&
3965  				    	txr->last_hw_tucso == tucso)
3966  						return;
3967  				} else {
3968  					if (txr->last_hw_tucss == tucss &&
3969  				    	txr->last_hw_tucso == tucso)
3970  						return;
3971  				}
3972  			}
3973  			txr->last_hw_offload = offload;
3974  			txr->last_hw_tucss = tucss;
3975  			txr->last_hw_tucso = tucso;
3976 		}
3977  		/*
3978  		 * Start offset for header checksum calculation.
3979  		 * End offset for header checksum calculation.
3980  		 * Offset of place to put the checksum.
3981  		 */
3982 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3983  		TXD->upper_setup.tcp_fields.tucss = tucss;
3984  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3985  		TXD->upper_setup.tcp_fields.tucso = tucso;
3986   	}
3987 
3988  	if (offload & CSUM_IP) {
3989  		txr->last_hw_ipcss = ipcss;
3990  		txr->last_hw_ipcso = ipcso;
3991   	}
3992 
3993 	TXD->tcp_seg_setup.data = htole32(0);
3994 	TXD->cmd_and_length =
3995 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3996 	tx_buffer = &txr->tx_buffers[cur];
3997 	tx_buffer->m_head = NULL;
3998 	tx_buffer->next_eop = -1;
3999 
4000 	if (++cur == adapter->num_tx_desc)
4001 		cur = 0;
4002 
4003 	txr->tx_avail--;
4004 	txr->next_avail_desc = cur;
4005 }
4006 
4007 
4008 /**********************************************************************
4009  *
4010  *  Setup work for hardware segmentation offload (TSO)
4011  *
4012  **********************************************************************/
4013 static void
4014 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
4015     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
4016 {
4017 	struct adapter			*adapter = txr->adapter;
4018 	struct e1000_context_desc	*TXD;
4019 	struct em_txbuffer		*tx_buffer;
4020 	int cur, hdr_len;
4021 
4022 	/*
4023 	 * In theory we can use the same TSO context if and only if
4024 	 * frame is the same type(IP/TCP) and the same MSS. However
4025 	 * checking whether a frame has the same IP/TCP structure is
4026 	 * hard thing so just ignore that and always restablish a
4027 	 * new TSO context.
4028 	 */
4029 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
4030 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
4031 		      E1000_TXD_DTYP_D |	/* Data descr type */
4032 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
4033 
4034 	/* IP and/or TCP header checksum calculation and insertion. */
4035 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
4036 
4037 	cur = txr->next_avail_desc;
4038 	tx_buffer = &txr->tx_buffers[cur];
4039 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
4040 
4041 	/*
4042 	 * Start offset for header checksum calculation.
4043 	 * End offset for header checksum calculation.
4044 	 * Offset of place put the checksum.
4045 	 */
4046 	TXD->lower_setup.ip_fields.ipcss = ip_off;
4047 	TXD->lower_setup.ip_fields.ipcse =
4048 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
4049 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
4050 	/*
4051 	 * Start offset for payload checksum calculation.
4052 	 * End offset for payload checksum calculation.
4053 	 * Offset of place to put the checksum.
4054 	 */
4055 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
4056 	TXD->upper_setup.tcp_fields.tucse = 0;
4057 	TXD->upper_setup.tcp_fields.tucso =
4058 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
4059 	/*
4060 	 * Payload size per packet w/o any headers.
4061 	 * Length of all headers up to payload.
4062 	 */
4063 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
4064 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
4065 
4066 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
4067 				E1000_TXD_CMD_DEXT |	/* Extended descr */
4068 				E1000_TXD_CMD_TSE |	/* TSE context */
4069 				E1000_TXD_CMD_IP |	/* Do IP csum */
4070 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
4071 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
4072 
4073 	tx_buffer->m_head = NULL;
4074 	tx_buffer->next_eop = -1;
4075 
4076 	if (++cur == adapter->num_tx_desc)
4077 		cur = 0;
4078 
4079 	txr->tx_avail--;
4080 	txr->next_avail_desc = cur;
4081 	txr->tx_tso = TRUE;
4082 }
4083 
4084 
4085 /**********************************************************************
4086  *
4087  *  Examine each tx_buffer in the used queue. If the hardware is done
4088  *  processing the packet then free associated resources. The
4089  *  tx_buffer is put back on the free queue.
4090  *
4091  **********************************************************************/
4092 static void
4093 em_txeof(struct tx_ring *txr)
4094 {
4095 	struct adapter	*adapter = txr->adapter;
4096         int first, last, done, processed;
4097         struct em_txbuffer *tx_buffer;
4098         struct e1000_tx_desc   *tx_desc, *eop_desc;
4099 	if_t ifp = adapter->ifp;
4100 
4101 	EM_TX_LOCK_ASSERT(txr);
4102 #ifdef DEV_NETMAP
4103 	if (netmap_tx_irq(ifp, txr->me))
4104 		return;
4105 #endif /* DEV_NETMAP */
4106 
4107 	/* No work, make sure hang detection is disabled */
4108         if (txr->tx_avail == adapter->num_tx_desc) {
4109 		txr->busy = EM_TX_IDLE;
4110                 return;
4111 	}
4112 
4113 	processed = 0;
4114         first = txr->next_to_clean;
4115         tx_desc = &txr->tx_base[first];
4116         tx_buffer = &txr->tx_buffers[first];
4117 	last = tx_buffer->next_eop;
4118         eop_desc = &txr->tx_base[last];
4119 
4120 	/*
4121 	 * What this does is get the index of the
4122 	 * first descriptor AFTER the EOP of the
4123 	 * first packet, that way we can do the
4124 	 * simple comparison on the inner while loop.
4125 	 */
4126 	if (++last == adapter->num_tx_desc)
4127  		last = 0;
4128 	done = last;
4129 
4130         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4131             BUS_DMASYNC_POSTREAD);
4132 
4133         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
4134 		/* We clean the range of the packet */
4135 		while (first != done) {
4136                 	tx_desc->upper.data = 0;
4137                 	tx_desc->lower.data = 0;
4138                 	tx_desc->buffer_addr = 0;
4139                 	++txr->tx_avail;
4140 			++processed;
4141 
4142 			if (tx_buffer->m_head) {
4143 				bus_dmamap_sync(txr->txtag,
4144 				    tx_buffer->map,
4145 				    BUS_DMASYNC_POSTWRITE);
4146 				bus_dmamap_unload(txr->txtag,
4147 				    tx_buffer->map);
4148                         	m_freem(tx_buffer->m_head);
4149                         	tx_buffer->m_head = NULL;
4150                 	}
4151 			tx_buffer->next_eop = -1;
4152 
4153 	                if (++first == adapter->num_tx_desc)
4154 				first = 0;
4155 
4156 	                tx_buffer = &txr->tx_buffers[first];
4157 			tx_desc = &txr->tx_base[first];
4158 		}
4159 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
4160 		/* See if we can continue to the next packet */
4161 		last = tx_buffer->next_eop;
4162 		if (last != -1) {
4163         		eop_desc = &txr->tx_base[last];
4164 			/* Get new done point */
4165 			if (++last == adapter->num_tx_desc) last = 0;
4166 			done = last;
4167 		} else
4168 			break;
4169         }
4170         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4171             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4172 
4173         txr->next_to_clean = first;
4174 
4175 	/*
4176 	** Hang detection: we know there's work outstanding
4177 	** or the entry return would have been taken, so no
4178 	** descriptor processed here indicates a potential hang.
4179 	** The local timer will examine this and do a reset if needed.
4180 	*/
4181 	if (processed == 0) {
4182 		if (txr->busy != EM_TX_HUNG)
4183 			++txr->busy;
4184 	} else /* At least one descriptor was cleaned */
4185 		txr->busy = EM_TX_BUSY; /* note this clears HUNG */
4186 
4187         /*
4188          * If we have a minimum free, clear IFF_DRV_OACTIVE
4189          * to tell the stack that it is OK to send packets.
4190 	 * Notice that all writes of OACTIVE happen under the
4191 	 * TX lock which, with a single queue, guarantees
4192 	 * sanity.
4193          */
4194         if (txr->tx_avail >= EM_MAX_SCATTER) {
4195 		if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
4196 	}
4197 
4198 	/* Disable hang detection if all clean */
4199 	if (txr->tx_avail == adapter->num_tx_desc)
4200 		txr->busy = EM_TX_IDLE;
4201 }
4202 
4203 /*********************************************************************
4204  *
4205  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
4206  *
4207  **********************************************************************/
4208 static void
4209 em_refresh_mbufs(struct rx_ring *rxr, int limit)
4210 {
4211 	struct adapter		*adapter = rxr->adapter;
4212 	struct mbuf		*m;
4213 	bus_dma_segment_t	segs;
4214 	struct em_rxbuffer	*rxbuf;
4215 	int			i, j, error, nsegs;
4216 	bool			cleaned = FALSE;
4217 
4218 	i = j = rxr->next_to_refresh;
4219 	/*
4220 	** Get one descriptor beyond
4221 	** our work mark to control
4222 	** the loop.
4223 	*/
4224 	if (++j == adapter->num_rx_desc)
4225 		j = 0;
4226 
4227 	while (j != limit) {
4228 		rxbuf = &rxr->rx_buffers[i];
4229 		if (rxbuf->m_head == NULL) {
4230 			m = m_getjcl(M_NOWAIT, MT_DATA,
4231 			    M_PKTHDR, adapter->rx_mbuf_sz);
4232 			/*
4233 			** If we have a temporary resource shortage
4234 			** that causes a failure, just abort refresh
4235 			** for now, we will return to this point when
4236 			** reinvoked from em_rxeof.
4237 			*/
4238 			if (m == NULL)
4239 				goto update;
4240 		} else
4241 			m = rxbuf->m_head;
4242 
4243 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4244 		m->m_flags |= M_PKTHDR;
4245 		m->m_data = m->m_ext.ext_buf;
4246 
4247 		/* Use bus_dma machinery to setup the memory mapping  */
4248 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4249 		    m, &segs, &nsegs, BUS_DMA_NOWAIT);
4250 		if (error != 0) {
4251 			printf("Refresh mbufs: hdr dmamap load"
4252 			    " failure - %d\n", error);
4253 			m_free(m);
4254 			rxbuf->m_head = NULL;
4255 			goto update;
4256 		}
4257 		rxbuf->m_head = m;
4258 		rxbuf->paddr = segs.ds_addr;
4259 		bus_dmamap_sync(rxr->rxtag,
4260 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4261 		em_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4262 		cleaned = TRUE;
4263 
4264 		i = j; /* Next is precalulated for us */
4265 		rxr->next_to_refresh = i;
4266 		/* Calculate next controlling index */
4267 		if (++j == adapter->num_rx_desc)
4268 			j = 0;
4269 	}
4270 update:
4271 	/*
4272 	** Update the tail pointer only if,
4273 	** and as far as we have refreshed.
4274 	*/
4275 	if (cleaned)
4276 		E1000_WRITE_REG(&adapter->hw,
4277 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4278 
4279 	return;
4280 }
4281 
4282 
4283 /*********************************************************************
4284  *
4285  *  Allocate memory for rx_buffer structures. Since we use one
4286  *  rx_buffer per received packet, the maximum number of rx_buffer's
4287  *  that we'll need is equal to the number of receive descriptors
4288  *  that we've allocated.
4289  *
4290  **********************************************************************/
4291 static int
4292 em_allocate_receive_buffers(struct rx_ring *rxr)
4293 {
4294 	struct adapter		*adapter = rxr->adapter;
4295 	device_t		dev = adapter->dev;
4296 	struct em_rxbuffer	*rxbuf;
4297 	int			error;
4298 
4299 	rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) *
4300 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4301 	if (rxr->rx_buffers == NULL) {
4302 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4303 		return (ENOMEM);
4304 	}
4305 
4306 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4307 				1, 0,			/* alignment, bounds */
4308 				BUS_SPACE_MAXADDR,	/* lowaddr */
4309 				BUS_SPACE_MAXADDR,	/* highaddr */
4310 				NULL, NULL,		/* filter, filterarg */
4311 				MJUM9BYTES,		/* maxsize */
4312 				1,			/* nsegments */
4313 				MJUM9BYTES,		/* maxsegsize */
4314 				0,			/* flags */
4315 				NULL,			/* lockfunc */
4316 				NULL,			/* lockarg */
4317 				&rxr->rxtag);
4318 	if (error) {
4319 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4320 		    __func__, error);
4321 		goto fail;
4322 	}
4323 
4324 	rxbuf = rxr->rx_buffers;
4325 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4326 		rxbuf = &rxr->rx_buffers[i];
4327 		error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4328 		if (error) {
4329 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4330 			    __func__, error);
4331 			goto fail;
4332 		}
4333 	}
4334 
4335 	return (0);
4336 
4337 fail:
4338 	em_free_receive_structures(adapter);
4339 	return (error);
4340 }
4341 
4342 
4343 /*********************************************************************
4344  *
4345  *  Initialize a receive ring and its buffers.
4346  *
4347  **********************************************************************/
4348 static int
4349 em_setup_receive_ring(struct rx_ring *rxr)
4350 {
4351 	struct	adapter 	*adapter = rxr->adapter;
4352 	struct em_rxbuffer	*rxbuf;
4353 	bus_dma_segment_t	seg[1];
4354 	int			rsize, nsegs, error = 0;
4355 #ifdef DEV_NETMAP
4356 	struct netmap_slot *slot;
4357 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
4358 #endif
4359 
4360 
4361 	/* Clear the ring contents */
4362 	EM_RX_LOCK(rxr);
4363 	rsize = roundup2(adapter->num_rx_desc *
4364 	    sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
4365 	bzero((void *)rxr->rx_base, rsize);
4366 #ifdef DEV_NETMAP
4367 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4368 #endif
4369 
4370 	/*
4371 	** Free current RX buffer structs and their mbufs
4372 	*/
4373 	for (int i = 0; i < adapter->num_rx_desc; i++) {
4374 		rxbuf = &rxr->rx_buffers[i];
4375 		if (rxbuf->m_head != NULL) {
4376 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4377 			    BUS_DMASYNC_POSTREAD);
4378 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4379 			m_freem(rxbuf->m_head);
4380 			rxbuf->m_head = NULL; /* mark as freed */
4381 		}
4382 	}
4383 
4384 	/* Now replenish the mbufs */
4385         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4386 		rxbuf = &rxr->rx_buffers[j];
4387 #ifdef DEV_NETMAP
4388 		if (slot) {
4389 			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4390 			uint64_t paddr;
4391 			void *addr;
4392 
4393 			addr = PNMB(na, slot + si, &paddr);
4394 			netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4395 			em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4396 			continue;
4397 		}
4398 #endif /* DEV_NETMAP */
4399 		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4400 		    M_PKTHDR, adapter->rx_mbuf_sz);
4401 		if (rxbuf->m_head == NULL) {
4402 			error = ENOBUFS;
4403 			goto fail;
4404 		}
4405 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4406 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4407 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4408 
4409 		/* Get the memory mapping */
4410 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4411 		    rxbuf->map, rxbuf->m_head, seg,
4412 		    &nsegs, BUS_DMA_NOWAIT);
4413 		if (error != 0) {
4414 			m_freem(rxbuf->m_head);
4415 			rxbuf->m_head = NULL;
4416 			goto fail;
4417 		}
4418 		bus_dmamap_sync(rxr->rxtag,
4419 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4420 
4421 		rxbuf->paddr = seg[0].ds_addr;
4422 		em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4423 	}
4424 	rxr->next_to_check = 0;
4425 	rxr->next_to_refresh = 0;
4426 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4427 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4428 
4429 fail:
4430 	EM_RX_UNLOCK(rxr);
4431 	return (error);
4432 }
4433 
4434 /*********************************************************************
4435  *
4436  *  Initialize all receive rings.
4437  *
4438  **********************************************************************/
4439 static int
4440 em_setup_receive_structures(struct adapter *adapter)
4441 {
4442 	struct rx_ring *rxr = adapter->rx_rings;
4443 	int q;
4444 
4445 	for (q = 0; q < adapter->num_queues; q++, rxr++)
4446 		if (em_setup_receive_ring(rxr))
4447 			goto fail;
4448 
4449 	return (0);
4450 fail:
4451 	/*
4452 	 * Free RX buffers allocated so far, we will only handle
4453 	 * the rings that completed, the failing case will have
4454 	 * cleaned up for itself. 'q' failed, so its the terminus.
4455 	 */
4456 	for (int i = 0; i < q; ++i) {
4457 		rxr = &adapter->rx_rings[i];
4458 		for (int n = 0; n < adapter->num_rx_desc; n++) {
4459 			struct em_rxbuffer *rxbuf;
4460 			rxbuf = &rxr->rx_buffers[n];
4461 			if (rxbuf->m_head != NULL) {
4462 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4463 			  	  BUS_DMASYNC_POSTREAD);
4464 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4465 				m_freem(rxbuf->m_head);
4466 				rxbuf->m_head = NULL;
4467 			}
4468 		}
4469 		rxr->next_to_check = 0;
4470 		rxr->next_to_refresh = 0;
4471 	}
4472 
4473 	return (ENOBUFS);
4474 }
4475 
4476 /*********************************************************************
4477  *
4478  *  Free all receive rings.
4479  *
4480  **********************************************************************/
4481 static void
4482 em_free_receive_structures(struct adapter *adapter)
4483 {
4484 	struct rx_ring *rxr = adapter->rx_rings;
4485 
4486 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4487 		em_free_receive_buffers(rxr);
4488 		/* Free the ring memory as well */
4489 		em_dma_free(adapter, &rxr->rxdma);
4490 		EM_RX_LOCK_DESTROY(rxr);
4491 	}
4492 
4493 	free(adapter->rx_rings, M_DEVBUF);
4494 }
4495 
4496 
4497 /*********************************************************************
4498  *
4499  *  Free receive ring data structures
4500  *
4501  **********************************************************************/
4502 static void
4503 em_free_receive_buffers(struct rx_ring *rxr)
4504 {
4505 	struct adapter		*adapter = rxr->adapter;
4506 	struct em_rxbuffer	*rxbuf = NULL;
4507 
4508 	INIT_DEBUGOUT("free_receive_buffers: begin");
4509 
4510 	if (rxr->rx_buffers != NULL) {
4511 		for (int i = 0; i < adapter->num_rx_desc; i++) {
4512 			rxbuf = &rxr->rx_buffers[i];
4513 			if (rxbuf->map != NULL) {
4514 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4515 				    BUS_DMASYNC_POSTREAD);
4516 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4517 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4518 			}
4519 			if (rxbuf->m_head != NULL) {
4520 				m_freem(rxbuf->m_head);
4521 				rxbuf->m_head = NULL;
4522 			}
4523 		}
4524 		free(rxr->rx_buffers, M_DEVBUF);
4525 		rxr->rx_buffers = NULL;
4526 		rxr->next_to_check = 0;
4527 		rxr->next_to_refresh = 0;
4528 	}
4529 
4530 	if (rxr->rxtag != NULL) {
4531 		bus_dma_tag_destroy(rxr->rxtag);
4532 		rxr->rxtag = NULL;
4533 	}
4534 
4535 	return;
4536 }
4537 
4538 
4539 /*********************************************************************
4540  *
4541  *  Enable receive unit.
4542  *
4543  **********************************************************************/
4544 
4545 static void
4546 em_initialize_receive_unit(struct adapter *adapter)
4547 {
4548 	struct rx_ring *rxr = adapter->rx_rings;
4549 	if_t ifp = adapter->ifp;
4550 	struct e1000_hw	*hw = &adapter->hw;
4551 	u32	rctl, rxcsum, rfctl;
4552 
4553 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4554 
4555 	/*
4556 	 * Make sure receives are disabled while setting
4557 	 * up the descriptor ring
4558 	 */
4559 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4560 	/* Do not disable if ever enabled on this hardware */
4561 	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4562 		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4563 
4564 	/* Setup the Receive Control Register */
4565 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4566 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4567 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4568 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4569 
4570 	/* Do not store bad packets */
4571 	rctl &= ~E1000_RCTL_SBP;
4572 
4573 	/* Enable Long Packet receive */
4574 	if (if_getmtu(ifp) > ETHERMTU)
4575 		rctl |= E1000_RCTL_LPE;
4576 	else
4577 		rctl &= ~E1000_RCTL_LPE;
4578 
4579         /* Strip the CRC */
4580         if (!em_disable_crc_stripping)
4581 		rctl |= E1000_RCTL_SECRC;
4582 
4583 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4584 	    adapter->rx_abs_int_delay.value);
4585 
4586 	E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4587 	    adapter->rx_int_delay.value);
4588 	/*
4589 	 * Set the interrupt throttling rate. Value is calculated
4590 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4591 	 */
4592 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4593 
4594 	/* Use extended rx descriptor formats */
4595 	rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4596 	rfctl |= E1000_RFCTL_EXTEN;
4597 	/*
4598 	** When using MSIX interrupts we need to throttle
4599 	** using the EITR register (82574 only)
4600 	*/
4601 	if (hw->mac.type == e1000_82574) {
4602 		for (int i = 0; i < 4; i++)
4603 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4604 			    DEFAULT_ITR);
4605 		/* Disable accelerated acknowledge */
4606 		rfctl |= E1000_RFCTL_ACK_DIS;
4607 	}
4608 	E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4609 
4610 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4611 	if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
4612 #ifdef EM_MULTIQUEUE
4613 		rxcsum |= E1000_RXCSUM_TUOFL |
4614 			  E1000_RXCSUM_IPOFL |
4615 			  E1000_RXCSUM_PCSD;
4616 #else
4617 		rxcsum |= E1000_RXCSUM_TUOFL;
4618 #endif
4619 	} else
4620 		rxcsum &= ~E1000_RXCSUM_TUOFL;
4621 
4622 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4623 
4624 #ifdef EM_MULTIQUEUE
4625 #define RSSKEYLEN 10
4626 	if (adapter->num_queues > 1) {
4627 		uint8_t  rss_key[4 * RSSKEYLEN];
4628 		uint32_t reta = 0;
4629 		int i;
4630 
4631 		/*
4632 		* Configure RSS key
4633 		*/
4634 		arc4rand(rss_key, sizeof(rss_key), 0);
4635 		for (i = 0; i < RSSKEYLEN; ++i) {
4636 			uint32_t rssrk = 0;
4637 
4638 			rssrk = EM_RSSRK_VAL(rss_key, i);
4639 			E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk);
4640 		}
4641 
4642 		/*
4643 		* Configure RSS redirect table in following fashion:
4644 		* (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4645 		*/
4646 		for (i = 0; i < sizeof(reta); ++i) {
4647 			uint32_t q;
4648 
4649 			q = (i % adapter->num_queues) << 7;
4650 			reta |= q << (8 * i);
4651 		}
4652 
4653 		for (i = 0; i < 32; ++i) {
4654 			E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4655 		}
4656 
4657 		E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q |
4658 				E1000_MRQC_RSS_FIELD_IPV4_TCP |
4659 				E1000_MRQC_RSS_FIELD_IPV4 |
4660 				E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4661 				E1000_MRQC_RSS_FIELD_IPV6_EX |
4662 				E1000_MRQC_RSS_FIELD_IPV6);
4663 	}
4664 #endif
4665 	/*
4666 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4667 	** long latencies are observed, like Lenovo X60. This
4668 	** change eliminates the problem, but since having positive
4669 	** values in RDTR is a known source of problems on other
4670 	** platforms another solution is being sought.
4671 	*/
4672 	if (hw->mac.type == e1000_82573)
4673 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4674 
4675 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4676 		/* Setup the Base and Length of the Rx Descriptor Ring */
4677 		u64 bus_addr = rxr->rxdma.dma_paddr;
4678 		u32 rdt = adapter->num_rx_desc - 1; /* default */
4679 
4680 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4681 		    adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended));
4682 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4683 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4684 		/* Setup the Head and Tail Descriptor Pointers */
4685 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4686 #ifdef DEV_NETMAP
4687 		/*
4688 		 * an init() while a netmap client is active must
4689 		 * preserve the rx buffers passed to userspace.
4690 		 */
4691 		if (if_getcapenable(ifp) & IFCAP_NETMAP) {
4692 			struct netmap_adapter *na = netmap_getna(adapter->ifp);
4693 			rdt -= nm_kr_rxspace(&na->rx_rings[i]);
4694 		}
4695 #endif /* DEV_NETMAP */
4696 		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4697 	}
4698 
4699 	/*
4700 	 * Set PTHRESH for improved jumbo performance
4701 	 * According to 10.2.5.11 of Intel 82574 Datasheet,
4702 	 * RXDCTL(1) is written whenever RXDCTL(0) is written.
4703 	 * Only write to RXDCTL(1) if there is a need for different
4704 	 * settings.
4705 	 */
4706 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4707 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4708 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4709 	    (if_getmtu(ifp) > ETHERMTU)) {
4710 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4711 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4712 	} else if (adapter->hw.mac.type == e1000_82574) {
4713 		for (int i = 0; i < adapter->num_queues; i++) {
4714 			u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4715 
4716 			rxdctl |= 0x20; /* PTHRESH */
4717 			rxdctl |= 4 << 8; /* HTHRESH */
4718 			rxdctl |= 4 << 16;/* WTHRESH */
4719 			rxdctl |= 1 << 24; /* Switch to granularity */
4720 			E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4721 		}
4722 	}
4723 
4724 	if (adapter->hw.mac.type >= e1000_pch2lan) {
4725 		if (if_getmtu(ifp) > ETHERMTU)
4726 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4727 		else
4728 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4729 	}
4730 
4731         /* Make sure VLAN Filters are off */
4732         rctl &= ~E1000_RCTL_VFE;
4733 
4734 	if (adapter->rx_mbuf_sz == MCLBYTES)
4735 		rctl |= E1000_RCTL_SZ_2048;
4736 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4737 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4738 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4739 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4740 
4741 	/* ensure we clear use DTYPE of 00 here */
4742 	rctl &= ~0x00000C00;
4743 	/* Write out the settings */
4744 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4745 
4746 	return;
4747 }
4748 
4749 
4750 /*********************************************************************
4751  *
4752  *  This routine executes in interrupt context. It replenishes
4753  *  the mbufs in the descriptor and sends data which has been
4754  *  dma'ed into host memory to upper layer.
4755  *
4756  *  We loop at most count times if count is > 0, or until done if
4757  *  count < 0.
4758  *
4759  *  For polling we also now return the number of cleaned packets
4760  *********************************************************************/
4761 static bool
4762 em_rxeof(struct rx_ring *rxr, int count, int *done)
4763 {
4764 	struct adapter		*adapter = rxr->adapter;
4765 	if_t ifp = adapter->ifp;
4766 	struct mbuf		*mp, *sendmp;
4767 	u32			status = 0;
4768 	u16 			len;
4769 	int			i, processed, rxdone = 0;
4770 	bool			eop;
4771 	union e1000_rx_desc_extended	*cur;
4772 
4773 	EM_RX_LOCK(rxr);
4774 
4775 	/* Sync the ring */
4776 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4777 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4778 
4779 
4780 #ifdef DEV_NETMAP
4781 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4782 		EM_RX_UNLOCK(rxr);
4783 		return (FALSE);
4784 	}
4785 #endif /* DEV_NETMAP */
4786 
4787 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4788 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
4789 			break;
4790 
4791 		cur = &rxr->rx_base[i];
4792 		status = le32toh(cur->wb.upper.status_error);
4793 		mp = sendmp = NULL;
4794 
4795 		if ((status & E1000_RXD_STAT_DD) == 0)
4796 			break;
4797 
4798 		len = le16toh(cur->wb.upper.length);
4799 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4800 
4801 		if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
4802 		    (rxr->discard == TRUE)) {
4803 			adapter->dropped_pkts++;
4804 			++rxr->rx_discarded;
4805 			if (!eop) /* Catch subsequent segs */
4806 				rxr->discard = TRUE;
4807 			else
4808 				rxr->discard = FALSE;
4809 			em_rx_discard(rxr, i);
4810 			goto next_desc;
4811 		}
4812 		bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4813 
4814 		/* Assign correct length to the current fragment */
4815 		mp = rxr->rx_buffers[i].m_head;
4816 		mp->m_len = len;
4817 
4818 		/* Trigger for refresh */
4819 		rxr->rx_buffers[i].m_head = NULL;
4820 
4821 		/* First segment? */
4822 		if (rxr->fmp == NULL) {
4823 			mp->m_pkthdr.len = len;
4824 			rxr->fmp = rxr->lmp = mp;
4825 		} else {
4826 			/* Chain mbuf's together */
4827 			mp->m_flags &= ~M_PKTHDR;
4828 			rxr->lmp->m_next = mp;
4829 			rxr->lmp = mp;
4830 			rxr->fmp->m_pkthdr.len += len;
4831 		}
4832 
4833 		if (eop) {
4834 			--count;
4835 			sendmp = rxr->fmp;
4836 			if_setrcvif(sendmp, ifp);
4837 			if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
4838 			em_receive_checksum(status, sendmp);
4839 #ifndef __NO_STRICT_ALIGNMENT
4840 			if (adapter->hw.mac.max_frame_size >
4841 			    (MCLBYTES - ETHER_ALIGN) &&
4842 			    em_fixup_rx(rxr) != 0)
4843 				goto skip;
4844 #endif
4845 			if (status & E1000_RXD_STAT_VP) {
4846 				if_setvtag(sendmp,
4847 				    le16toh(cur->wb.upper.vlan));
4848 				sendmp->m_flags |= M_VLANTAG;
4849 			}
4850 #ifndef __NO_STRICT_ALIGNMENT
4851 skip:
4852 #endif
4853 			rxr->fmp = rxr->lmp = NULL;
4854 		}
4855 next_desc:
4856 		/* Sync the ring */
4857 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4858 	    		BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4859 
4860 		/* Zero out the receive descriptors status. */
4861 		cur->wb.upper.status_error &= htole32(~0xFF);
4862 		++rxdone;	/* cumulative for POLL */
4863 		++processed;
4864 
4865 		/* Advance our pointers to the next descriptor. */
4866 		if (++i == adapter->num_rx_desc)
4867 			i = 0;
4868 
4869 		/* Send to the stack */
4870 		if (sendmp != NULL) {
4871 			rxr->next_to_check = i;
4872 			EM_RX_UNLOCK(rxr);
4873 			if_input(ifp, sendmp);
4874 			EM_RX_LOCK(rxr);
4875 			i = rxr->next_to_check;
4876 		}
4877 
4878 		/* Only refresh mbufs every 8 descriptors */
4879 		if (processed == 8) {
4880 			em_refresh_mbufs(rxr, i);
4881 			processed = 0;
4882 		}
4883 	}
4884 
4885 	/* Catch any remaining refresh work */
4886 	if (e1000_rx_unrefreshed(rxr))
4887 		em_refresh_mbufs(rxr, i);
4888 
4889 	rxr->next_to_check = i;
4890 	if (done != NULL)
4891 		*done = rxdone;
4892 	EM_RX_UNLOCK(rxr);
4893 
4894 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4895 }
4896 
4897 static __inline void
4898 em_rx_discard(struct rx_ring *rxr, int i)
4899 {
4900 	struct em_rxbuffer	*rbuf;
4901 
4902 	rbuf = &rxr->rx_buffers[i];
4903 	bus_dmamap_unload(rxr->rxtag, rbuf->map);
4904 
4905 	/* Free any previous pieces */
4906 	if (rxr->fmp != NULL) {
4907 		rxr->fmp->m_flags |= M_PKTHDR;
4908 		m_freem(rxr->fmp);
4909 		rxr->fmp = NULL;
4910 		rxr->lmp = NULL;
4911 	}
4912 	/*
4913 	** Free buffer and allow em_refresh_mbufs()
4914 	** to clean up and recharge buffer.
4915 	*/
4916 	if (rbuf->m_head) {
4917 		m_free(rbuf->m_head);
4918 		rbuf->m_head = NULL;
4919 	}
4920 	return;
4921 }
4922 
4923 #ifndef __NO_STRICT_ALIGNMENT
4924 /*
4925  * When jumbo frames are enabled we should realign entire payload on
4926  * architecures with strict alignment. This is serious design mistake of 8254x
4927  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4928  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4929  * payload. On architecures without strict alignment restrictions 8254x still
4930  * performs unaligned memory access which would reduce the performance too.
4931  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4932  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4933  * existing mbuf chain.
4934  *
4935  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4936  * not used at all on architectures with strict alignment.
4937  */
4938 static int
4939 em_fixup_rx(struct rx_ring *rxr)
4940 {
4941 	struct adapter *adapter = rxr->adapter;
4942 	struct mbuf *m, *n;
4943 	int error;
4944 
4945 	error = 0;
4946 	m = rxr->fmp;
4947 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4948 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4949 		m->m_data += ETHER_HDR_LEN;
4950 	} else {
4951 		MGETHDR(n, M_NOWAIT, MT_DATA);
4952 		if (n != NULL) {
4953 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4954 			m->m_data += ETHER_HDR_LEN;
4955 			m->m_len -= ETHER_HDR_LEN;
4956 			n->m_len = ETHER_HDR_LEN;
4957 			M_MOVE_PKTHDR(n, m);
4958 			n->m_next = m;
4959 			rxr->fmp = n;
4960 		} else {
4961 			adapter->dropped_pkts++;
4962 			m_freem(rxr->fmp);
4963 			rxr->fmp = NULL;
4964 			error = ENOMEM;
4965 		}
4966 	}
4967 
4968 	return (error);
4969 }
4970 #endif
4971 
4972 static void
4973 em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf)
4974 {
4975 	rxd->read.buffer_addr = htole64(rxbuf->paddr);
4976 	/* DD bits must be cleared */
4977 	rxd->wb.upper.status_error= 0;
4978 }
4979 
4980 /*********************************************************************
4981  *
4982  *  Verify that the hardware indicated that the checksum is valid.
4983  *  Inform the stack about the status of checksum so that stack
4984  *  doesn't spend time verifying the checksum.
4985  *
4986  *********************************************************************/
4987 static void
4988 em_receive_checksum(uint32_t status, struct mbuf *mp)
4989 {
4990 	mp->m_pkthdr.csum_flags = 0;
4991 
4992 	/* Ignore Checksum bit is set */
4993 	if (status & E1000_RXD_STAT_IXSM)
4994 		return;
4995 
4996 	/* If the IP checksum exists and there is no IP Checksum error */
4997 	if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
4998 		E1000_RXD_STAT_IPCS) {
4999 		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
5000 	}
5001 
5002 	/* TCP or UDP checksum */
5003 	if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
5004 	    E1000_RXD_STAT_TCPCS) {
5005 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5006 		mp->m_pkthdr.csum_data = htons(0xffff);
5007 	}
5008 	if (status & E1000_RXD_STAT_UDPCS) {
5009 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5010 		mp->m_pkthdr.csum_data = htons(0xffff);
5011 	}
5012 }
5013 
5014 /*
5015  * This routine is run via an vlan
5016  * config EVENT
5017  */
5018 static void
5019 em_register_vlan(void *arg, if_t ifp, u16 vtag)
5020 {
5021 	struct adapter	*adapter = if_getsoftc(ifp);
5022 	u32		index, bit;
5023 
5024 	if ((void*)adapter !=  arg)   /* Not our event */
5025 		return;
5026 
5027 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
5028                 return;
5029 
5030 	EM_CORE_LOCK(adapter);
5031 	index = (vtag >> 5) & 0x7F;
5032 	bit = vtag & 0x1F;
5033 	adapter->shadow_vfta[index] |= (1 << bit);
5034 	++adapter->num_vlans;
5035 	/* Re-init to load the changes */
5036 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
5037 		em_init_locked(adapter);
5038 	EM_CORE_UNLOCK(adapter);
5039 }
5040 
5041 /*
5042  * This routine is run via an vlan
5043  * unconfig EVENT
5044  */
5045 static void
5046 em_unregister_vlan(void *arg, if_t ifp, u16 vtag)
5047 {
5048 	struct adapter	*adapter = if_getsoftc(ifp);
5049 	u32		index, bit;
5050 
5051 	if (adapter != arg)
5052 		return;
5053 
5054 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5055                 return;
5056 
5057 	EM_CORE_LOCK(adapter);
5058 	index = (vtag >> 5) & 0x7F;
5059 	bit = vtag & 0x1F;
5060 	adapter->shadow_vfta[index] &= ~(1 << bit);
5061 	--adapter->num_vlans;
5062 	/* Re-init to load the changes */
5063 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
5064 		em_init_locked(adapter);
5065 	EM_CORE_UNLOCK(adapter);
5066 }
5067 
5068 static void
5069 em_setup_vlan_hw_support(struct adapter *adapter)
5070 {
5071 	struct e1000_hw *hw = &adapter->hw;
5072 	u32             reg;
5073 
5074 	/*
5075 	** We get here thru init_locked, meaning
5076 	** a soft reset, this has already cleared
5077 	** the VFTA and other state, so if there
5078 	** have been no vlan's registered do nothing.
5079 	*/
5080 	if (adapter->num_vlans == 0)
5081                 return;
5082 
5083 	/*
5084 	** A soft reset zero's out the VFTA, so
5085 	** we need to repopulate it now.
5086 	*/
5087 	for (int i = 0; i < EM_VFTA_SIZE; i++)
5088                 if (adapter->shadow_vfta[i] != 0)
5089 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
5090                             i, adapter->shadow_vfta[i]);
5091 
5092 	reg = E1000_READ_REG(hw, E1000_CTRL);
5093 	reg |= E1000_CTRL_VME;
5094 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
5095 
5096 	/* Enable the Filter Table */
5097 	reg = E1000_READ_REG(hw, E1000_RCTL);
5098 	reg &= ~E1000_RCTL_CFIEN;
5099 	reg |= E1000_RCTL_VFE;
5100 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
5101 }
5102 
5103 static void
5104 em_enable_intr(struct adapter *adapter)
5105 {
5106 	struct e1000_hw *hw = &adapter->hw;
5107 	u32 ims_mask = IMS_ENABLE_MASK;
5108 
5109 	if (hw->mac.type == e1000_82574) {
5110 		E1000_WRITE_REG(hw, EM_EIAC, adapter->ims);
5111 		ims_mask |= adapter->ims;
5112 	}
5113 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
5114 }
5115 
5116 static void
5117 em_disable_intr(struct adapter *adapter)
5118 {
5119 	struct e1000_hw *hw = &adapter->hw;
5120 
5121 	if (hw->mac.type == e1000_82574)
5122 		E1000_WRITE_REG(hw, EM_EIAC, 0);
5123 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
5124 }
5125 
5126 /*
5127  * Bit of a misnomer, what this really means is
5128  * to enable OS management of the system... aka
5129  * to disable special hardware management features
5130  */
5131 static void
5132 em_init_manageability(struct adapter *adapter)
5133 {
5134 	/* A shared code workaround */
5135 #define E1000_82542_MANC2H E1000_MANC2H
5136 	if (adapter->has_manage) {
5137 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5138 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5139 
5140 		/* disable hardware interception of ARP */
5141 		manc &= ~(E1000_MANC_ARP_EN);
5142 
5143                 /* enable receiving management packets to the host */
5144 		manc |= E1000_MANC_EN_MNG2HOST;
5145 #define E1000_MNG2HOST_PORT_623 (1 << 5)
5146 #define E1000_MNG2HOST_PORT_664 (1 << 6)
5147 		manc2h |= E1000_MNG2HOST_PORT_623;
5148 		manc2h |= E1000_MNG2HOST_PORT_664;
5149 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5150 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5151 	}
5152 }
5153 
5154 /*
5155  * Give control back to hardware management
5156  * controller if there is one.
5157  */
5158 static void
5159 em_release_manageability(struct adapter *adapter)
5160 {
5161 	if (adapter->has_manage) {
5162 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5163 
5164 		/* re-enable hardware interception of ARP */
5165 		manc |= E1000_MANC_ARP_EN;
5166 		manc &= ~E1000_MANC_EN_MNG2HOST;
5167 
5168 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5169 	}
5170 }
5171 
5172 /*
5173  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
5174  * For ASF and Pass Through versions of f/w this means
5175  * that the driver is loaded. For AMT version type f/w
5176  * this means that the network i/f is open.
5177  */
5178 static void
5179 em_get_hw_control(struct adapter *adapter)
5180 {
5181 	u32 ctrl_ext, swsm;
5182 
5183 	if (adapter->hw.mac.type == e1000_82573) {
5184 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5185 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5186 		    swsm | E1000_SWSM_DRV_LOAD);
5187 		return;
5188 	}
5189 	/* else */
5190 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5191 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5192 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5193 	return;
5194 }
5195 
5196 /*
5197  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
5198  * For ASF and Pass Through versions of f/w this means that
5199  * the driver is no longer loaded. For AMT versions of the
5200  * f/w this means that the network i/f is closed.
5201  */
5202 static void
5203 em_release_hw_control(struct adapter *adapter)
5204 {
5205 	u32 ctrl_ext, swsm;
5206 
5207 	if (!adapter->has_manage)
5208 		return;
5209 
5210 	if (adapter->hw.mac.type == e1000_82573) {
5211 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5212 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5213 		    swsm & ~E1000_SWSM_DRV_LOAD);
5214 		return;
5215 	}
5216 	/* else */
5217 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5218 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5219 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5220 	return;
5221 }
5222 
5223 static int
5224 em_is_valid_ether_addr(u8 *addr)
5225 {
5226 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5227 
5228 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5229 		return (FALSE);
5230 	}
5231 
5232 	return (TRUE);
5233 }
5234 
5235 /*
5236 ** Parse the interface capabilities with regard
5237 ** to both system management and wake-on-lan for
5238 ** later use.
5239 */
5240 static void
5241 em_get_wakeup(device_t dev)
5242 {
5243 	struct adapter	*adapter = device_get_softc(dev);
5244 	u16		eeprom_data = 0, device_id, apme_mask;
5245 
5246 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5247 	apme_mask = EM_EEPROM_APME;
5248 
5249 	switch (adapter->hw.mac.type) {
5250 	case e1000_82573:
5251 	case e1000_82583:
5252 		adapter->has_amt = TRUE;
5253 		/* Falls thru */
5254 	case e1000_82571:
5255 	case e1000_82572:
5256 	case e1000_80003es2lan:
5257 		if (adapter->hw.bus.func == 1) {
5258 			e1000_read_nvm(&adapter->hw,
5259 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5260 			break;
5261 		} else
5262 			e1000_read_nvm(&adapter->hw,
5263 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5264 		break;
5265 	case e1000_ich8lan:
5266 	case e1000_ich9lan:
5267 	case e1000_ich10lan:
5268 	case e1000_pchlan:
5269 	case e1000_pch2lan:
5270 		apme_mask = E1000_WUC_APME;
5271 		adapter->has_amt = TRUE;
5272 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5273 		break;
5274 	default:
5275 		e1000_read_nvm(&adapter->hw,
5276 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5277 		break;
5278 	}
5279 	if (eeprom_data & apme_mask)
5280 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5281 	/*
5282          * We have the eeprom settings, now apply the special cases
5283          * where the eeprom may be wrong or the board won't support
5284          * wake on lan on a particular port
5285 	 */
5286 	device_id = pci_get_device(dev);
5287         switch (device_id) {
5288 	case E1000_DEV_ID_82571EB_FIBER:
5289 		/* Wake events only supported on port A for dual fiber
5290 		 * regardless of eeprom setting */
5291 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5292 		    E1000_STATUS_FUNC_1)
5293 			adapter->wol = 0;
5294 		break;
5295 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
5296 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
5297 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5298                 /* if quad port adapter, disable WoL on all but port A */
5299 		if (global_quad_port_a != 0)
5300 			adapter->wol = 0;
5301 		/* Reset for multiple quad port adapters */
5302 		if (++global_quad_port_a == 4)
5303 			global_quad_port_a = 0;
5304                 break;
5305 	}
5306 	return;
5307 }
5308 
5309 
5310 /*
5311  * Enable PCI Wake On Lan capability
5312  */
5313 static void
5314 em_enable_wakeup(device_t dev)
5315 {
5316 	struct adapter	*adapter = device_get_softc(dev);
5317 	if_t ifp = adapter->ifp;
5318 	u32		pmc, ctrl, ctrl_ext, rctl;
5319 	u16     	status;
5320 
5321 	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
5322 		return;
5323 
5324 	/* Advertise the wakeup capability */
5325 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5326 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5327 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5328 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5329 
5330 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
5331 	    (adapter->hw.mac.type == e1000_pchlan) ||
5332 	    (adapter->hw.mac.type == e1000_ich9lan) ||
5333 	    (adapter->hw.mac.type == e1000_ich10lan))
5334 		e1000_suspend_workarounds_ich8lan(&adapter->hw);
5335 
5336 	/* Keep the laser running on Fiber adapters */
5337 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5338 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5339 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5340 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5341 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5342 	}
5343 
5344 	/*
5345 	** Determine type of Wakeup: note that wol
5346 	** is set with all bits on by default.
5347 	*/
5348 	if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
5349 		adapter->wol &= ~E1000_WUFC_MAG;
5350 
5351 	if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
5352 		adapter->wol &= ~E1000_WUFC_MC;
5353 	else {
5354 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5355 		rctl |= E1000_RCTL_MPE;
5356 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5357 	}
5358 
5359 	if ((adapter->hw.mac.type == e1000_pchlan) ||
5360 	    (adapter->hw.mac.type == e1000_pch2lan)) {
5361 		if (em_enable_phy_wakeup(adapter))
5362 			return;
5363 	} else {
5364 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5365 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5366 	}
5367 
5368 	if (adapter->hw.phy.type == e1000_phy_igp_3)
5369 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5370 
5371         /* Request PME */
5372         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5373 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5374 	if (if_getcapenable(ifp) & IFCAP_WOL)
5375 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5376         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5377 
5378 	return;
5379 }
5380 
5381 /*
5382 ** WOL in the newer chipset interfaces (pchlan)
5383 ** require thing to be copied into the phy
5384 */
5385 static int
5386 em_enable_phy_wakeup(struct adapter *adapter)
5387 {
5388 	struct e1000_hw *hw = &adapter->hw;
5389 	u32 mreg, ret = 0;
5390 	u16 preg;
5391 
5392 	/* copy MAC RARs to PHY RARs */
5393 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5394 
5395 	/* copy MAC MTA to PHY MTA */
5396 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5397 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5398 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5399 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5400 		    (u16)((mreg >> 16) & 0xFFFF));
5401 	}
5402 
5403 	/* configure PHY Rx Control register */
5404 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5405 	mreg = E1000_READ_REG(hw, E1000_RCTL);
5406 	if (mreg & E1000_RCTL_UPE)
5407 		preg |= BM_RCTL_UPE;
5408 	if (mreg & E1000_RCTL_MPE)
5409 		preg |= BM_RCTL_MPE;
5410 	preg &= ~(BM_RCTL_MO_MASK);
5411 	if (mreg & E1000_RCTL_MO_3)
5412 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5413 				<< BM_RCTL_MO_SHIFT);
5414 	if (mreg & E1000_RCTL_BAM)
5415 		preg |= BM_RCTL_BAM;
5416 	if (mreg & E1000_RCTL_PMCF)
5417 		preg |= BM_RCTL_PMCF;
5418 	mreg = E1000_READ_REG(hw, E1000_CTRL);
5419 	if (mreg & E1000_CTRL_RFCE)
5420 		preg |= BM_RCTL_RFCE;
5421 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5422 
5423 	/* enable PHY wakeup in MAC register */
5424 	E1000_WRITE_REG(hw, E1000_WUC,
5425 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5426 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5427 
5428 	/* configure and enable PHY wakeup in PHY registers */
5429 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5430 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5431 
5432 	/* activate PHY wakeup */
5433 	ret = hw->phy.ops.acquire(hw);
5434 	if (ret) {
5435 		printf("Could not acquire PHY\n");
5436 		return ret;
5437 	}
5438 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5439 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5440 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5441 	if (ret) {
5442 		printf("Could not read PHY page 769\n");
5443 		goto out;
5444 	}
5445 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5446 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5447 	if (ret)
5448 		printf("Could not set PHY Host Wakeup bit\n");
5449 out:
5450 	hw->phy.ops.release(hw);
5451 
5452 	return ret;
5453 }
5454 
5455 static void
5456 em_led_func(void *arg, int onoff)
5457 {
5458 	struct adapter	*adapter = arg;
5459 
5460 	EM_CORE_LOCK(adapter);
5461 	if (onoff) {
5462 		e1000_setup_led(&adapter->hw);
5463 		e1000_led_on(&adapter->hw);
5464 	} else {
5465 		e1000_led_off(&adapter->hw);
5466 		e1000_cleanup_led(&adapter->hw);
5467 	}
5468 	EM_CORE_UNLOCK(adapter);
5469 }
5470 
5471 /*
5472 ** Disable the L0S and L1 LINK states
5473 */
5474 static void
5475 em_disable_aspm(struct adapter *adapter)
5476 {
5477 	int		base, reg;
5478 	u16		link_cap,link_ctrl;
5479 	device_t	dev = adapter->dev;
5480 
5481 	switch (adapter->hw.mac.type) {
5482 		case e1000_82573:
5483 		case e1000_82574:
5484 		case e1000_82583:
5485 			break;
5486 		default:
5487 			return;
5488 	}
5489 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5490 		return;
5491 	reg = base + PCIER_LINK_CAP;
5492 	link_cap = pci_read_config(dev, reg, 2);
5493 	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5494 		return;
5495 	reg = base + PCIER_LINK_CTL;
5496 	link_ctrl = pci_read_config(dev, reg, 2);
5497 	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5498 	pci_write_config(dev, reg, link_ctrl, 2);
5499 	return;
5500 }
5501 
5502 /**********************************************************************
5503  *
5504  *  Update the board statistics counters.
5505  *
5506  **********************************************************************/
5507 static void
5508 em_update_stats_counters(struct adapter *adapter)
5509 {
5510 
5511 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5512 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5513 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5514 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5515 	}
5516 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5517 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5518 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5519 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5520 
5521 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5522 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5523 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5524 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5525 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5526 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5527 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5528 	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5529 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5530 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5531 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5532 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5533 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5534 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5535 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5536 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5537 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5538 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5539 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5540 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5541 
5542 	/* For the 64-bit byte counters the low dword must be read first. */
5543 	/* Both registers clear on the read of the high dword */
5544 
5545 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5546 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5547 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5548 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5549 
5550 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5551 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5552 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5553 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5554 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5555 
5556 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5557 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5558 
5559 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5560 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5561 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5562 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5563 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5564 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5565 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5566 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5567 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5568 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5569 
5570 	/* Interrupt Counts */
5571 
5572 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5573 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5574 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5575 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5576 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5577 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5578 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5579 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5580 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5581 
5582 	if (adapter->hw.mac.type >= e1000_82543) {
5583 		adapter->stats.algnerrc +=
5584 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5585 		adapter->stats.rxerrc +=
5586 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5587 		adapter->stats.tncrs +=
5588 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5589 		adapter->stats.cexterr +=
5590 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5591 		adapter->stats.tsctc +=
5592 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5593 		adapter->stats.tsctfc +=
5594 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5595 	}
5596 }
5597 
5598 static uint64_t
5599 em_get_counter(if_t ifp, ift_counter cnt)
5600 {
5601 	struct adapter *adapter;
5602 
5603 	adapter = if_getsoftc(ifp);
5604 
5605 	switch (cnt) {
5606 	case IFCOUNTER_COLLISIONS:
5607 		return (adapter->stats.colc);
5608 	case IFCOUNTER_IERRORS:
5609 		return (adapter->dropped_pkts + adapter->stats.rxerrc +
5610 		    adapter->stats.crcerrs + adapter->stats.algnerrc +
5611 		    adapter->stats.ruc + adapter->stats.roc +
5612 		    adapter->stats.mpc + adapter->stats.cexterr);
5613 	case IFCOUNTER_OERRORS:
5614 		return (adapter->stats.ecol + adapter->stats.latecol +
5615 		    adapter->watchdog_events);
5616 	default:
5617 		return (if_get_counter_default(ifp, cnt));
5618 	}
5619 }
5620 
5621 /* Export a single 32-bit register via a read-only sysctl. */
5622 static int
5623 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5624 {
5625 	struct adapter *adapter;
5626 	u_int val;
5627 
5628 	adapter = oidp->oid_arg1;
5629 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5630 	return (sysctl_handle_int(oidp, &val, 0, req));
5631 }
5632 
5633 /*
5634  * Add sysctl variables, one per statistic, to the system.
5635  */
5636 static void
5637 em_add_hw_stats(struct adapter *adapter)
5638 {
5639 	device_t dev = adapter->dev;
5640 
5641 	struct tx_ring *txr = adapter->tx_rings;
5642 	struct rx_ring *rxr = adapter->rx_rings;
5643 
5644 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5645 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5646 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5647 	struct e1000_hw_stats *stats = &adapter->stats;
5648 
5649 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5650 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5651 
5652 #define QUEUE_NAME_LEN 32
5653 	char namebuf[QUEUE_NAME_LEN];
5654 
5655 	/* Driver Statistics */
5656 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5657 			CTLFLAG_RD, &adapter->dropped_pkts,
5658 			"Driver dropped packets");
5659 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5660 			CTLFLAG_RD, &adapter->link_irq,
5661 			"Link MSIX IRQ Handled");
5662 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5663 			 CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5664 			 "Defragmenting mbuf chain failed");
5665 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5666 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5667 			"Driver tx dma failure in xmit");
5668 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5669 			CTLFLAG_RD, &adapter->rx_overruns,
5670 			"RX overruns");
5671 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5672 			CTLFLAG_RD, &adapter->watchdog_events,
5673 			"Watchdog timeouts");
5674 
5675 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5676 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5677 			em_sysctl_reg_handler, "IU",
5678 			"Device Control Register");
5679 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5680 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5681 			em_sysctl_reg_handler, "IU",
5682 			"Receiver Control Register");
5683 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5684 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5685 			"Flow Control High Watermark");
5686 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5687 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5688 			"Flow Control Low Watermark");
5689 
5690 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5691 		snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5692 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5693 					    CTLFLAG_RD, NULL, "TX Queue Name");
5694 		queue_list = SYSCTL_CHILDREN(queue_node);
5695 
5696 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5697 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5698 				E1000_TDH(txr->me),
5699 				em_sysctl_reg_handler, "IU",
5700  				"Transmit Descriptor Head");
5701 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5702 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5703 				E1000_TDT(txr->me),
5704 				em_sysctl_reg_handler, "IU",
5705  				"Transmit Descriptor Tail");
5706 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5707 				CTLFLAG_RD, &txr->tx_irq,
5708 				"Queue MSI-X Transmit Interrupts");
5709 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5710 				CTLFLAG_RD, &txr->no_desc_avail,
5711 				"Queue No Descriptor Available");
5712 
5713 		snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5714 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5715 					    CTLFLAG_RD, NULL, "RX Queue Name");
5716 		queue_list = SYSCTL_CHILDREN(queue_node);
5717 
5718 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5719 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5720 				E1000_RDH(rxr->me),
5721 				em_sysctl_reg_handler, "IU",
5722 				"Receive Descriptor Head");
5723 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5724 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5725 				E1000_RDT(rxr->me),
5726 				em_sysctl_reg_handler, "IU",
5727 				"Receive Descriptor Tail");
5728 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5729 				CTLFLAG_RD, &rxr->rx_irq,
5730 				"Queue MSI-X Receive Interrupts");
5731 	}
5732 
5733 	/* MAC stats get their own sub node */
5734 
5735 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5736 				    CTLFLAG_RD, NULL, "Statistics");
5737 	stat_list = SYSCTL_CHILDREN(stat_node);
5738 
5739 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5740 			CTLFLAG_RD, &stats->ecol,
5741 			"Excessive collisions");
5742 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5743 			CTLFLAG_RD, &stats->scc,
5744 			"Single collisions");
5745 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5746 			CTLFLAG_RD, &stats->mcc,
5747 			"Multiple collisions");
5748 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5749 			CTLFLAG_RD, &stats->latecol,
5750 			"Late collisions");
5751 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5752 			CTLFLAG_RD, &stats->colc,
5753 			"Collision Count");
5754 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5755 			CTLFLAG_RD, &adapter->stats.symerrs,
5756 			"Symbol Errors");
5757 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5758 			CTLFLAG_RD, &adapter->stats.sec,
5759 			"Sequence Errors");
5760 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5761 			CTLFLAG_RD, &adapter->stats.dc,
5762 			"Defer Count");
5763 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5764 			CTLFLAG_RD, &adapter->stats.mpc,
5765 			"Missed Packets");
5766 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5767 			CTLFLAG_RD, &adapter->stats.rnbc,
5768 			"Receive No Buffers");
5769 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5770 			CTLFLAG_RD, &adapter->stats.ruc,
5771 			"Receive Undersize");
5772 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5773 			CTLFLAG_RD, &adapter->stats.rfc,
5774 			"Fragmented Packets Received ");
5775 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5776 			CTLFLAG_RD, &adapter->stats.roc,
5777 			"Oversized Packets Received");
5778 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5779 			CTLFLAG_RD, &adapter->stats.rjc,
5780 			"Recevied Jabber");
5781 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5782 			CTLFLAG_RD, &adapter->stats.rxerrc,
5783 			"Receive Errors");
5784 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5785 			CTLFLAG_RD, &adapter->stats.crcerrs,
5786 			"CRC errors");
5787 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5788 			CTLFLAG_RD, &adapter->stats.algnerrc,
5789 			"Alignment Errors");
5790 	/* On 82575 these are collision counts */
5791 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5792 			CTLFLAG_RD, &adapter->stats.cexterr,
5793 			"Collision/Carrier extension errors");
5794 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5795 			CTLFLAG_RD, &adapter->stats.xonrxc,
5796 			"XON Received");
5797 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5798 			CTLFLAG_RD, &adapter->stats.xontxc,
5799 			"XON Transmitted");
5800 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5801 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5802 			"XOFF Received");
5803 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5804 			CTLFLAG_RD, &adapter->stats.xofftxc,
5805 			"XOFF Transmitted");
5806 
5807 	/* Packet Reception Stats */
5808 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5809 			CTLFLAG_RD, &adapter->stats.tpr,
5810 			"Total Packets Received ");
5811 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5812 			CTLFLAG_RD, &adapter->stats.gprc,
5813 			"Good Packets Received");
5814 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5815 			CTLFLAG_RD, &adapter->stats.bprc,
5816 			"Broadcast Packets Received");
5817 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5818 			CTLFLAG_RD, &adapter->stats.mprc,
5819 			"Multicast Packets Received");
5820 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5821 			CTLFLAG_RD, &adapter->stats.prc64,
5822 			"64 byte frames received ");
5823 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5824 			CTLFLAG_RD, &adapter->stats.prc127,
5825 			"65-127 byte frames received");
5826 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5827 			CTLFLAG_RD, &adapter->stats.prc255,
5828 			"128-255 byte frames received");
5829 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5830 			CTLFLAG_RD, &adapter->stats.prc511,
5831 			"256-511 byte frames received");
5832 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5833 			CTLFLAG_RD, &adapter->stats.prc1023,
5834 			"512-1023 byte frames received");
5835 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5836 			CTLFLAG_RD, &adapter->stats.prc1522,
5837 			"1023-1522 byte frames received");
5838  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5839  			CTLFLAG_RD, &adapter->stats.gorc,
5840  			"Good Octets Received");
5841 
5842 	/* Packet Transmission Stats */
5843  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5844  			CTLFLAG_RD, &adapter->stats.gotc,
5845  			"Good Octets Transmitted");
5846 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5847 			CTLFLAG_RD, &adapter->stats.tpt,
5848 			"Total Packets Transmitted");
5849 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5850 			CTLFLAG_RD, &adapter->stats.gptc,
5851 			"Good Packets Transmitted");
5852 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5853 			CTLFLAG_RD, &adapter->stats.bptc,
5854 			"Broadcast Packets Transmitted");
5855 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5856 			CTLFLAG_RD, &adapter->stats.mptc,
5857 			"Multicast Packets Transmitted");
5858 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5859 			CTLFLAG_RD, &adapter->stats.ptc64,
5860 			"64 byte frames transmitted ");
5861 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5862 			CTLFLAG_RD, &adapter->stats.ptc127,
5863 			"65-127 byte frames transmitted");
5864 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5865 			CTLFLAG_RD, &adapter->stats.ptc255,
5866 			"128-255 byte frames transmitted");
5867 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5868 			CTLFLAG_RD, &adapter->stats.ptc511,
5869 			"256-511 byte frames transmitted");
5870 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5871 			CTLFLAG_RD, &adapter->stats.ptc1023,
5872 			"512-1023 byte frames transmitted");
5873 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5874 			CTLFLAG_RD, &adapter->stats.ptc1522,
5875 			"1024-1522 byte frames transmitted");
5876 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5877 			CTLFLAG_RD, &adapter->stats.tsctc,
5878 			"TSO Contexts Transmitted");
5879 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5880 			CTLFLAG_RD, &adapter->stats.tsctfc,
5881 			"TSO Contexts Failed");
5882 
5883 
5884 	/* Interrupt Stats */
5885 
5886 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5887 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5888 	int_list = SYSCTL_CHILDREN(int_node);
5889 
5890 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5891 			CTLFLAG_RD, &adapter->stats.iac,
5892 			"Interrupt Assertion Count");
5893 
5894 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5895 			CTLFLAG_RD, &adapter->stats.icrxptc,
5896 			"Interrupt Cause Rx Pkt Timer Expire Count");
5897 
5898 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5899 			CTLFLAG_RD, &adapter->stats.icrxatc,
5900 			"Interrupt Cause Rx Abs Timer Expire Count");
5901 
5902 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5903 			CTLFLAG_RD, &adapter->stats.ictxptc,
5904 			"Interrupt Cause Tx Pkt Timer Expire Count");
5905 
5906 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5907 			CTLFLAG_RD, &adapter->stats.ictxatc,
5908 			"Interrupt Cause Tx Abs Timer Expire Count");
5909 
5910 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5911 			CTLFLAG_RD, &adapter->stats.ictxqec,
5912 			"Interrupt Cause Tx Queue Empty Count");
5913 
5914 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5915 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5916 			"Interrupt Cause Tx Queue Min Thresh Count");
5917 
5918 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5919 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5920 			"Interrupt Cause Rx Desc Min Thresh Count");
5921 
5922 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5923 			CTLFLAG_RD, &adapter->stats.icrxoc,
5924 			"Interrupt Cause Receiver Overrun Count");
5925 }
5926 
5927 /**********************************************************************
5928  *
5929  *  This routine provides a way to dump out the adapter eeprom,
5930  *  often a useful debug/service tool. This only dumps the first
5931  *  32 words, stuff that matters is in that extent.
5932  *
5933  **********************************************************************/
5934 static int
5935 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5936 {
5937 	struct adapter *adapter = (struct adapter *)arg1;
5938 	int error;
5939 	int result;
5940 
5941 	result = -1;
5942 	error = sysctl_handle_int(oidp, &result, 0, req);
5943 
5944 	if (error || !req->newptr)
5945 		return (error);
5946 
5947 	/*
5948 	 * This value will cause a hex dump of the
5949 	 * first 32 16-bit words of the EEPROM to
5950 	 * the screen.
5951 	 */
5952 	if (result == 1)
5953 		em_print_nvm_info(adapter);
5954 
5955 	return (error);
5956 }
5957 
5958 static void
5959 em_print_nvm_info(struct adapter *adapter)
5960 {
5961 	u16	eeprom_data;
5962 	int	i, j, row = 0;
5963 
5964 	/* Its a bit crude, but it gets the job done */
5965 	printf("\nInterface EEPROM Dump:\n");
5966 	printf("Offset\n0x0000  ");
5967 	for (i = 0, j = 0; i < 32; i++, j++) {
5968 		if (j == 8) { /* Make the offset block */
5969 			j = 0; ++row;
5970 			printf("\n0x00%x0  ",row);
5971 		}
5972 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5973 		printf("%04x ", eeprom_data);
5974 	}
5975 	printf("\n");
5976 }
5977 
5978 static int
5979 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5980 {
5981 	struct em_int_delay_info *info;
5982 	struct adapter *adapter;
5983 	u32 regval;
5984 	int error, usecs, ticks;
5985 
5986 	info = (struct em_int_delay_info *)arg1;
5987 	usecs = info->value;
5988 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5989 	if (error != 0 || req->newptr == NULL)
5990 		return (error);
5991 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5992 		return (EINVAL);
5993 	info->value = usecs;
5994 	ticks = EM_USECS_TO_TICKS(usecs);
5995 	if (info->offset == E1000_ITR)	/* units are 256ns here */
5996 		ticks *= 4;
5997 
5998 	adapter = info->adapter;
5999 
6000 	EM_CORE_LOCK(adapter);
6001 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
6002 	regval = (regval & ~0xffff) | (ticks & 0xffff);
6003 	/* Handle a few special cases. */
6004 	switch (info->offset) {
6005 	case E1000_RDTR:
6006 		break;
6007 	case E1000_TIDV:
6008 		if (ticks == 0) {
6009 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
6010 			/* Don't write 0 into the TIDV register. */
6011 			regval++;
6012 		} else
6013 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
6014 		break;
6015 	}
6016 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
6017 	EM_CORE_UNLOCK(adapter);
6018 	return (0);
6019 }
6020 
6021 static void
6022 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
6023 	const char *description, struct em_int_delay_info *info,
6024 	int offset, int value)
6025 {
6026 	info->adapter = adapter;
6027 	info->offset = offset;
6028 	info->value = value;
6029 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
6030 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6031 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
6032 	    info, 0, em_sysctl_int_delay, "I", description);
6033 }
6034 
6035 static void
6036 em_set_sysctl_value(struct adapter *adapter, const char *name,
6037 	const char *description, int *limit, int value)
6038 {
6039 	*limit = value;
6040 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6041 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6042 	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6043 }
6044 
6045 
6046 /*
6047 ** Set flow control using sysctl:
6048 ** Flow control values:
6049 **      0 - off
6050 **      1 - rx pause
6051 **      2 - tx pause
6052 **      3 - full
6053 */
6054 static int
6055 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
6056 {
6057         int		error;
6058 	static int	input = 3; /* default is full */
6059         struct adapter	*adapter = (struct adapter *) arg1;
6060 
6061         error = sysctl_handle_int(oidp, &input, 0, req);
6062 
6063         if ((error) || (req->newptr == NULL))
6064                 return (error);
6065 
6066 	if (input == adapter->fc) /* no change? */
6067 		return (error);
6068 
6069         switch (input) {
6070                 case e1000_fc_rx_pause:
6071                 case e1000_fc_tx_pause:
6072                 case e1000_fc_full:
6073                 case e1000_fc_none:
6074                         adapter->hw.fc.requested_mode = input;
6075 			adapter->fc = input;
6076                         break;
6077                 default:
6078 			/* Do nothing */
6079 			return (error);
6080         }
6081 
6082         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6083         e1000_force_mac_fc(&adapter->hw);
6084         return (error);
6085 }
6086 
6087 /*
6088 ** Manage Energy Efficient Ethernet:
6089 ** Control values:
6090 **     0/1 - enabled/disabled
6091 */
6092 static int
6093 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
6094 {
6095        struct adapter *adapter = (struct adapter *) arg1;
6096        int             error, value;
6097 
6098        value = adapter->hw.dev_spec.ich8lan.eee_disable;
6099        error = sysctl_handle_int(oidp, &value, 0, req);
6100        if (error || req->newptr == NULL)
6101                return (error);
6102        EM_CORE_LOCK(adapter);
6103        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
6104        em_init_locked(adapter);
6105        EM_CORE_UNLOCK(adapter);
6106        return (0);
6107 }
6108 
6109 static int
6110 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
6111 {
6112 	struct adapter *adapter;
6113 	int error;
6114 	int result;
6115 
6116 	result = -1;
6117 	error = sysctl_handle_int(oidp, &result, 0, req);
6118 
6119 	if (error || !req->newptr)
6120 		return (error);
6121 
6122 	if (result == 1) {
6123 		adapter = (struct adapter *)arg1;
6124 		em_print_debug_info(adapter);
6125         }
6126 
6127 	return (error);
6128 }
6129 
6130 /*
6131 ** This routine is meant to be fluid, add whatever is
6132 ** needed for debugging a problem.  -jfv
6133 */
6134 static void
6135 em_print_debug_info(struct adapter *adapter)
6136 {
6137 	device_t dev = adapter->dev;
6138 	struct tx_ring *txr = adapter->tx_rings;
6139 	struct rx_ring *rxr = adapter->rx_rings;
6140 
6141 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)
6142 		printf("Interface is RUNNING ");
6143 	else
6144 		printf("Interface is NOT RUNNING\n");
6145 
6146 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE)
6147 		printf("and INACTIVE\n");
6148 	else
6149 		printf("and ACTIVE\n");
6150 
6151 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
6152 		device_printf(dev, "TX Queue %d ------\n", i);
6153 		device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
6154 	    		E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
6155 	    		E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
6156 		device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
6157 		device_printf(dev, "TX descriptors avail = %d\n",
6158 	    		txr->tx_avail);
6159 		device_printf(dev, "Tx Descriptors avail failure = %ld\n",
6160 	    		txr->no_desc_avail);
6161 		device_printf(dev, "RX Queue %d ------\n", i);
6162 		device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
6163 	    		E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
6164 	    		E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
6165 		device_printf(dev, "RX discarded packets = %ld\n",
6166 	    		rxr->rx_discarded);
6167 		device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
6168 		device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
6169 	}
6170 }
6171 
6172 #ifdef EM_MULTIQUEUE
6173 /*
6174  * 82574 only:
6175  * Write a new value to the EEPROM increasing the number of MSIX
6176  * vectors from 3 to 5, for proper multiqueue support.
6177  */
6178 static void
6179 em_enable_vectors_82574(struct adapter *adapter)
6180 {
6181 	struct e1000_hw *hw = &adapter->hw;
6182 	device_t dev = adapter->dev;
6183 	u16 edata;
6184 
6185 	e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6186 	printf("Current cap: %#06x\n", edata);
6187 	if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
6188 		device_printf(dev, "Writing to eeprom: increasing "
6189 		    "reported MSIX vectors from 3 to 5...\n");
6190 		edata &= ~(EM_NVM_MSIX_N_MASK);
6191 		edata |= 4 << EM_NVM_MSIX_N_SHIFT;
6192 		e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6193 		e1000_update_nvm_checksum(hw);
6194 		device_printf(dev, "Writing to eeprom: done\n");
6195 	}
6196 }
6197 #endif
6198 
6199 #ifdef DDB
6200 DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
6201 {
6202 	devclass_t	dc;
6203 	int max_em;
6204 
6205 	dc = devclass_find("em");
6206 	max_em = devclass_get_maxunit(dc);
6207 
6208 	for (int index = 0; index < (max_em - 1); index++) {
6209 		device_t dev;
6210 		dev = devclass_get_device(dc, index);
6211 		if (device_get_driver(dev) == &em_driver) {
6212 			struct adapter *adapter = device_get_softc(dev);
6213 			EM_CORE_LOCK(adapter);
6214 			em_init_locked(adapter);
6215 			EM_CORE_UNLOCK(adapter);
6216 		}
6217 	}
6218 }
6219 DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6220 {
6221 	devclass_t	dc;
6222 	int max_em;
6223 
6224 	dc = devclass_find("em");
6225 	max_em = devclass_get_maxunit(dc);
6226 
6227 	for (int index = 0; index < (max_em - 1); index++) {
6228 		device_t dev;
6229 		dev = devclass_get_device(dc, index);
6230 		if (device_get_driver(dev) == &em_driver)
6231 			em_print_debug_info(device_get_softc(dev));
6232 	}
6233 
6234 }
6235 #endif
6236