xref: /freebsd/sys/dev/e1000/if_em.c (revision faf139cc5dd3396181c11922bc6685c0c59b7b24)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2015, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #include "opt_em.h"
36 #include "opt_ddb.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #endif
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #ifdef DDB
47 #include <sys/types.h>
48 #include <ddb/ddb.h>
49 #endif
50 #if __FreeBSD_version >= 800000
51 #include <sys/buf_ring.h>
52 #endif
53 #include <sys/bus.h>
54 #include <sys/endian.h>
55 #include <sys/kernel.h>
56 #include <sys/kthread.h>
57 #include <sys/malloc.h>
58 #include <sys/mbuf.h>
59 #include <sys/module.h>
60 #include <sys/rman.h>
61 #include <sys/smp.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/sysctl.h>
65 #include <sys/taskqueue.h>
66 #include <sys/eventhandler.h>
67 #include <machine/bus.h>
68 #include <machine/resource.h>
69 
70 #include <net/bpf.h>
71 #include <net/ethernet.h>
72 #include <net/if.h>
73 #include <net/if_var.h>
74 #include <net/if_arp.h>
75 #include <net/if_dl.h>
76 #include <net/if_media.h>
77 
78 #include <net/if_types.h>
79 #include <net/if_vlan_var.h>
80 
81 #include <netinet/in_systm.h>
82 #include <netinet/in.h>
83 #include <netinet/if_ether.h>
84 #include <netinet/ip.h>
85 #include <netinet/ip6.h>
86 #include <netinet/tcp.h>
87 #include <netinet/udp.h>
88 
89 #include <machine/in_cksum.h>
90 #include <dev/led/led.h>
91 #include <dev/pci/pcivar.h>
92 #include <dev/pci/pcireg.h>
93 
94 #include "e1000_api.h"
95 #include "e1000_82571.h"
96 #include "if_em.h"
97 
98 /*********************************************************************
99  *  Driver version:
100  *********************************************************************/
101 char em_driver_version[] = "7.6.1-k";
102 
103 /*********************************************************************
104  *  PCI Device ID Table
105  *
106  *  Used by probe to select devices to load on
107  *  Last field stores an index into e1000_strings
108  *  Last entry must be all 0s
109  *
110  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
111  *********************************************************************/
112 
113 static em_vendor_info_t em_vendor_info_array[] =
114 {
115 	/* Intel(R) PRO/1000 Network Connection */
116 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
117 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
118 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
119 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
120 						PCI_ANY_ID, PCI_ANY_ID, 0},
121 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
122 						PCI_ANY_ID, PCI_ANY_ID, 0},
123 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
124 						PCI_ANY_ID, PCI_ANY_ID, 0},
125 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
126 						PCI_ANY_ID, PCI_ANY_ID, 0},
127 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
128 						PCI_ANY_ID, PCI_ANY_ID, 0},
129 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
130 						PCI_ANY_ID, PCI_ANY_ID, 0},
131 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
132 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
134 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
135 
136 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
137 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
138 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
139 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
140 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
141 						PCI_ANY_ID, PCI_ANY_ID, 0},
142 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
143 						PCI_ANY_ID, PCI_ANY_ID, 0},
144 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
145 						PCI_ANY_ID, PCI_ANY_ID, 0},
146 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
147 						PCI_ANY_ID, PCI_ANY_ID, 0},
148 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
150 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
152 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
175 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
176 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
177 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
178 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
179 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
180 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
181 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
182 						PCI_ANY_ID, PCI_ANY_ID, 0},
183 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
184 						PCI_ANY_ID, PCI_ANY_ID, 0},
185 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM2,	PCI_ANY_ID, PCI_ANY_ID, 0},
186 	{ 0x8086, E1000_DEV_ID_PCH_I218_V2,	PCI_ANY_ID, PCI_ANY_ID, 0},
187 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM3,	PCI_ANY_ID, PCI_ANY_ID, 0},
188 	{ 0x8086, E1000_DEV_ID_PCH_I218_V3,	PCI_ANY_ID, PCI_ANY_ID, 0},
189 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
190 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
191 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2,
192                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
193 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
194 	{ 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3,
195 						PCI_ANY_ID, PCI_ANY_ID, 0},
196 	/* required last entry */
197 	{ 0, 0, 0, 0, 0}
198 };
199 
200 /*********************************************************************
201  *  Table of branding strings for all supported NICs.
202  *********************************************************************/
203 
204 static char *em_strings[] = {
205 	"Intel(R) PRO/1000 Network Connection"
206 };
207 
208 /*********************************************************************
209  *  Function prototypes
210  *********************************************************************/
211 static int	em_probe(device_t);
212 static int	em_attach(device_t);
213 static int	em_detach(device_t);
214 static int	em_shutdown(device_t);
215 static int	em_suspend(device_t);
216 static int	em_resume(device_t);
217 #ifdef EM_MULTIQUEUE
218 static int	em_mq_start(if_t, struct mbuf *);
219 static int	em_mq_start_locked(if_t,
220 		    struct tx_ring *);
221 static void	em_qflush(if_t);
222 #else
223 static void	em_start(if_t);
224 static void	em_start_locked(if_t, struct tx_ring *);
225 #endif
226 static int	em_ioctl(if_t, u_long, caddr_t);
227 static uint64_t	em_get_counter(if_t, ift_counter);
228 static void	em_init(void *);
229 static void	em_init_locked(struct adapter *);
230 static void	em_stop(void *);
231 static void	em_media_status(if_t, struct ifmediareq *);
232 static int	em_media_change(if_t);
233 static void	em_identify_hardware(struct adapter *);
234 static int	em_allocate_pci_resources(struct adapter *);
235 static int	em_allocate_legacy(struct adapter *);
236 static int	em_allocate_msix(struct adapter *);
237 static int	em_allocate_queues(struct adapter *);
238 static int	em_setup_msix(struct adapter *);
239 static void	em_free_pci_resources(struct adapter *);
240 static void	em_local_timer(void *);
241 static void	em_reset(struct adapter *);
242 static int	em_setup_interface(device_t, struct adapter *);
243 static void	em_flush_desc_rings(struct adapter *);
244 
245 static void	em_setup_transmit_structures(struct adapter *);
246 static void	em_initialize_transmit_unit(struct adapter *);
247 static int	em_allocate_transmit_buffers(struct tx_ring *);
248 static void	em_free_transmit_structures(struct adapter *);
249 static void	em_free_transmit_buffers(struct tx_ring *);
250 
251 static int	em_setup_receive_structures(struct adapter *);
252 static int	em_allocate_receive_buffers(struct rx_ring *);
253 static void	em_initialize_receive_unit(struct adapter *);
254 static void	em_free_receive_structures(struct adapter *);
255 static void	em_free_receive_buffers(struct rx_ring *);
256 
257 static void	em_enable_intr(struct adapter *);
258 static void	em_disable_intr(struct adapter *);
259 static void	em_update_stats_counters(struct adapter *);
260 static void	em_add_hw_stats(struct adapter *adapter);
261 static void	em_txeof(struct tx_ring *);
262 static bool	em_rxeof(struct rx_ring *, int, int *);
263 #ifndef __NO_STRICT_ALIGNMENT
264 static int	em_fixup_rx(struct rx_ring *);
265 #endif
266 static void	em_setup_rxdesc(union e1000_rx_desc_extended *,
267 		    const struct em_rxbuffer *rxbuf);
268 static void	em_receive_checksum(uint32_t status, struct mbuf *);
269 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
270 		    struct ip *, u32 *, u32 *);
271 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
272 		    struct tcphdr *, u32 *, u32 *);
273 static void	em_set_promisc(struct adapter *);
274 static void	em_disable_promisc(struct adapter *);
275 static void	em_set_multi(struct adapter *);
276 static void	em_update_link_status(struct adapter *);
277 static void	em_refresh_mbufs(struct rx_ring *, int);
278 static void	em_register_vlan(void *, if_t, u16);
279 static void	em_unregister_vlan(void *, if_t, u16);
280 static void	em_setup_vlan_hw_support(struct adapter *);
281 static int	em_xmit(struct tx_ring *, struct mbuf **);
282 static int	em_dma_malloc(struct adapter *, bus_size_t,
283 		    struct em_dma_alloc *, int);
284 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
285 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
286 static void	em_print_nvm_info(struct adapter *);
287 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
288 static void	em_print_debug_info(struct adapter *);
289 static int 	em_is_valid_ether_addr(u8 *);
290 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
291 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
292 		    const char *, struct em_int_delay_info *, int, int);
293 /* Management and WOL Support */
294 static void	em_init_manageability(struct adapter *);
295 static void	em_release_manageability(struct adapter *);
296 static void     em_get_hw_control(struct adapter *);
297 static void     em_release_hw_control(struct adapter *);
298 static void	em_get_wakeup(device_t);
299 static void     em_enable_wakeup(device_t);
300 static int	em_enable_phy_wakeup(struct adapter *);
301 static void	em_led_func(void *, int);
302 static void	em_disable_aspm(struct adapter *);
303 
304 static int	em_irq_fast(void *);
305 
306 /* MSIX handlers */
307 static void	em_msix_tx(void *);
308 static void	em_msix_rx(void *);
309 static void	em_msix_link(void *);
310 static void	em_handle_tx(void *context, int pending);
311 static void	em_handle_rx(void *context, int pending);
312 static void	em_handle_link(void *context, int pending);
313 
314 #ifdef EM_MULTIQUEUE
315 static void	em_enable_vectors_82574(struct adapter *);
316 #endif
317 
318 static void	em_set_sysctl_value(struct adapter *, const char *,
319 		    const char *, int *, int);
320 static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
321 static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
322 
323 static __inline void em_rx_discard(struct rx_ring *, int);
324 
325 #ifdef DEVICE_POLLING
326 static poll_handler_t em_poll;
327 #endif /* POLLING */
328 
329 /*********************************************************************
330  *  FreeBSD Device Interface Entry Points
331  *********************************************************************/
332 
333 static device_method_t em_methods[] = {
334 	/* Device interface */
335 	DEVMETHOD(device_probe, em_probe),
336 	DEVMETHOD(device_attach, em_attach),
337 	DEVMETHOD(device_detach, em_detach),
338 	DEVMETHOD(device_shutdown, em_shutdown),
339 	DEVMETHOD(device_suspend, em_suspend),
340 	DEVMETHOD(device_resume, em_resume),
341 	DEVMETHOD_END
342 };
343 
344 static driver_t em_driver = {
345 	"em", em_methods, sizeof(struct adapter),
346 };
347 
348 devclass_t em_devclass;
349 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
350 MODULE_DEPEND(em, pci, 1, 1, 1);
351 MODULE_DEPEND(em, ether, 1, 1, 1);
352 #ifdef DEV_NETMAP
353 MODULE_DEPEND(em, netmap, 1, 1, 1);
354 #endif /* DEV_NETMAP */
355 
356 /*********************************************************************
357  *  Tunable default values.
358  *********************************************************************/
359 
360 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
361 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
362 #define M_TSO_LEN			66
363 
364 #define MAX_INTS_PER_SEC	8000
365 #define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
366 
367 /* Allow common code without TSO */
368 #ifndef CSUM_TSO
369 #define CSUM_TSO	0
370 #endif
371 
372 #define TSO_WORKAROUND	4
373 
374 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
375 
376 static int em_disable_crc_stripping = 0;
377 SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
378     &em_disable_crc_stripping, 0, "Disable CRC Stripping");
379 
380 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
381 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
382 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
383     0, "Default transmit interrupt delay in usecs");
384 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
385     0, "Default receive interrupt delay in usecs");
386 
387 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
388 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
389 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
390     &em_tx_abs_int_delay_dflt, 0,
391     "Default transmit interrupt delay limit in usecs");
392 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
393     &em_rx_abs_int_delay_dflt, 0,
394     "Default receive interrupt delay limit in usecs");
395 
396 static int em_rxd = EM_DEFAULT_RXD;
397 static int em_txd = EM_DEFAULT_TXD;
398 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
399     "Number of receive descriptors per queue");
400 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
401     "Number of transmit descriptors per queue");
402 
403 static int em_smart_pwr_down = FALSE;
404 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
405     0, "Set to true to leave smart power down enabled on newer adapters");
406 
407 /* Controls whether promiscuous also shows bad packets */
408 static int em_debug_sbp = FALSE;
409 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
410     "Show bad packets in promiscuous mode");
411 
412 static int em_enable_msix = TRUE;
413 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
414     "Enable MSI-X interrupts");
415 
416 #ifdef EM_MULTIQUEUE
417 static int em_num_queues = 1;
418 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
419     "82574 only: Number of queues to configure, 0 indicates autoconfigure");
420 #endif
421 
422 /*
423 ** Global variable to store last used CPU when binding queues
424 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
425 ** queue is bound to a cpu.
426 */
427 static int em_last_bind_cpu = -1;
428 
429 /* How many packets rxeof tries to clean at a time */
430 static int em_rx_process_limit = 100;
431 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
432     &em_rx_process_limit, 0,
433     "Maximum number of received packets to process "
434     "at a time, -1 means unlimited");
435 
436 /* Energy efficient ethernet - default to OFF */
437 static int eee_setting = 1;
438 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
439     "Enable Energy Efficient Ethernet");
440 
441 /* Global used in WOL setup with multiport cards */
442 static int global_quad_port_a = 0;
443 
444 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
445 #include <dev/netmap/if_em_netmap.h>
446 #endif /* DEV_NETMAP */
447 
448 /*********************************************************************
449  *  Device identification routine
450  *
451  *  em_probe determines if the driver should be loaded on
452  *  adapter based on PCI vendor/device id of the adapter.
453  *
454  *  return BUS_PROBE_DEFAULT on success, positive on failure
455  *********************************************************************/
456 
457 static int
458 em_probe(device_t dev)
459 {
460 	char		adapter_name[60];
461 	uint16_t	pci_vendor_id = 0;
462 	uint16_t	pci_device_id = 0;
463 	uint16_t	pci_subvendor_id = 0;
464 	uint16_t	pci_subdevice_id = 0;
465 	em_vendor_info_t *ent;
466 
467 	INIT_DEBUGOUT("em_probe: begin");
468 
469 	pci_vendor_id = pci_get_vendor(dev);
470 	if (pci_vendor_id != EM_VENDOR_ID)
471 		return (ENXIO);
472 
473 	pci_device_id = pci_get_device(dev);
474 	pci_subvendor_id = pci_get_subvendor(dev);
475 	pci_subdevice_id = pci_get_subdevice(dev);
476 
477 	ent = em_vendor_info_array;
478 	while (ent->vendor_id != 0) {
479 		if ((pci_vendor_id == ent->vendor_id) &&
480 		    (pci_device_id == ent->device_id) &&
481 
482 		    ((pci_subvendor_id == ent->subvendor_id) ||
483 		    (ent->subvendor_id == PCI_ANY_ID)) &&
484 
485 		    ((pci_subdevice_id == ent->subdevice_id) ||
486 		    (ent->subdevice_id == PCI_ANY_ID))) {
487 			sprintf(adapter_name, "%s %s",
488 				em_strings[ent->index],
489 				em_driver_version);
490 			device_set_desc_copy(dev, adapter_name);
491 			return (BUS_PROBE_DEFAULT);
492 		}
493 		ent++;
494 	}
495 
496 	return (ENXIO);
497 }
498 
499 /*********************************************************************
500  *  Device initialization routine
501  *
502  *  The attach entry point is called when the driver is being loaded.
503  *  This routine identifies the type of hardware, allocates all resources
504  *  and initializes the hardware.
505  *
506  *  return 0 on success, positive on failure
507  *********************************************************************/
508 
509 static int
510 em_attach(device_t dev)
511 {
512 	struct adapter	*adapter;
513 	struct e1000_hw	*hw;
514 	int		error = 0;
515 
516 	INIT_DEBUGOUT("em_attach: begin");
517 
518 	if (resource_disabled("em", device_get_unit(dev))) {
519 		device_printf(dev, "Disabled by device hint\n");
520 		return (ENXIO);
521 	}
522 
523 	adapter = device_get_softc(dev);
524 	adapter->dev = adapter->osdep.dev = dev;
525 	hw = &adapter->hw;
526 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
527 
528 	/* SYSCTL stuff */
529 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
530 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
531 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
532 	    em_sysctl_nvm_info, "I", "NVM Information");
533 
534 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
535 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
536 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
537 	    em_sysctl_debug_info, "I", "Debug Information");
538 
539 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
540 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
541 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
542 	    em_set_flowcntl, "I", "Flow Control");
543 
544 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
545 
546 	/* Determine hardware and mac info */
547 	em_identify_hardware(adapter);
548 
549 	/* Setup PCI resources */
550 	if (em_allocate_pci_resources(adapter)) {
551 		device_printf(dev, "Allocation of PCI resources failed\n");
552 		error = ENXIO;
553 		goto err_pci;
554 	}
555 
556 	/*
557 	** For ICH8 and family we need to
558 	** map the flash memory, and this
559 	** must happen after the MAC is
560 	** identified
561 	*/
562 	if ((hw->mac.type == e1000_ich8lan) ||
563 	    (hw->mac.type == e1000_ich9lan) ||
564 	    (hw->mac.type == e1000_ich10lan) ||
565 	    (hw->mac.type == e1000_pchlan) ||
566 	    (hw->mac.type == e1000_pch2lan) ||
567 	    (hw->mac.type == e1000_pch_lpt)) {
568 		int rid = EM_BAR_TYPE_FLASH;
569 		adapter->flash = bus_alloc_resource_any(dev,
570 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
571 		if (adapter->flash == NULL) {
572 			device_printf(dev, "Mapping of Flash failed\n");
573 			error = ENXIO;
574 			goto err_pci;
575 		}
576 		/* This is used in the shared code */
577 		hw->flash_address = (u8 *)adapter->flash;
578 		adapter->osdep.flash_bus_space_tag =
579 		    rman_get_bustag(adapter->flash);
580 		adapter->osdep.flash_bus_space_handle =
581 		    rman_get_bushandle(adapter->flash);
582 	}
583 	/*
584 	** In the new SPT device flash is not  a
585 	** seperate BAR, rather it is also in BAR0,
586 	** so use the same tag and an offset handle for the
587 	** FLASH read/write macros in the shared code.
588 	*/
589 	else if (hw->mac.type == e1000_pch_spt) {
590 		adapter->osdep.flash_bus_space_tag =
591 		    adapter->osdep.mem_bus_space_tag;
592 		adapter->osdep.flash_bus_space_handle =
593 		    adapter->osdep.mem_bus_space_handle
594 		    + E1000_FLASH_BASE_ADDR;
595 	}
596 
597 	/* Do Shared Code initialization */
598 	error = e1000_setup_init_funcs(hw, TRUE);
599 	if (error) {
600 		device_printf(dev, "Setup of Shared code failed, error %d\n",
601 		    error);
602 		error = ENXIO;
603 		goto err_pci;
604 	}
605 
606 	/*
607 	 * Setup MSI/X or MSI if PCI Express
608 	 */
609 	adapter->msix = em_setup_msix(adapter);
610 
611 	e1000_get_bus_info(hw);
612 
613 	/* Set up some sysctls for the tunable interrupt delays */
614 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
615 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
616 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
617 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
618 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
619 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
620 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
621 	    "receive interrupt delay limit in usecs",
622 	    &adapter->rx_abs_int_delay,
623 	    E1000_REGISTER(hw, E1000_RADV),
624 	    em_rx_abs_int_delay_dflt);
625 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
626 	    "transmit interrupt delay limit in usecs",
627 	    &adapter->tx_abs_int_delay,
628 	    E1000_REGISTER(hw, E1000_TADV),
629 	    em_tx_abs_int_delay_dflt);
630 	em_add_int_delay_sysctl(adapter, "itr",
631 	    "interrupt delay limit in usecs/4",
632 	    &adapter->tx_itr,
633 	    E1000_REGISTER(hw, E1000_ITR),
634 	    DEFAULT_ITR);
635 
636 	/* Sysctl for limiting the amount of work done in the taskqueue */
637 	em_set_sysctl_value(adapter, "rx_processing_limit",
638 	    "max number of rx packets to process", &adapter->rx_process_limit,
639 	    em_rx_process_limit);
640 
641 	/*
642 	 * Validate number of transmit and receive descriptors. It
643 	 * must not exceed hardware maximum, and must be multiple
644 	 * of E1000_DBA_ALIGN.
645 	 */
646 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
647 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
648 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
649 		    EM_DEFAULT_TXD, em_txd);
650 		adapter->num_tx_desc = EM_DEFAULT_TXD;
651 	} else
652 		adapter->num_tx_desc = em_txd;
653 
654 	if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 ||
655 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
656 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
657 		    EM_DEFAULT_RXD, em_rxd);
658 		adapter->num_rx_desc = EM_DEFAULT_RXD;
659 	} else
660 		adapter->num_rx_desc = em_rxd;
661 
662 	hw->mac.autoneg = DO_AUTO_NEG;
663 	hw->phy.autoneg_wait_to_complete = FALSE;
664 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
665 
666 	/* Copper options */
667 	if (hw->phy.media_type == e1000_media_type_copper) {
668 		hw->phy.mdix = AUTO_ALL_MODES;
669 		hw->phy.disable_polarity_correction = FALSE;
670 		hw->phy.ms_type = EM_MASTER_SLAVE;
671 	}
672 
673 	/*
674 	 * Set the frame limits assuming
675 	 * standard ethernet sized frames.
676 	 */
677 	adapter->hw.mac.max_frame_size =
678 	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
679 
680 	/*
681 	 * This controls when hardware reports transmit completion
682 	 * status.
683 	 */
684 	hw->mac.report_tx_early = 1;
685 
686 	/*
687 	** Get queue/ring memory
688 	*/
689 	if (em_allocate_queues(adapter)) {
690 		error = ENOMEM;
691 		goto err_pci;
692 	}
693 
694 	/* Allocate multicast array memory. */
695 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
696 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
697 	if (adapter->mta == NULL) {
698 		device_printf(dev, "Can not allocate multicast setup array\n");
699 		error = ENOMEM;
700 		goto err_late;
701 	}
702 
703 	/* Check SOL/IDER usage */
704 	if (e1000_check_reset_block(hw))
705 		device_printf(dev, "PHY reset is blocked"
706 		    " due to SOL/IDER session.\n");
707 
708 	/* Sysctl for setting Energy Efficient Ethernet */
709 	hw->dev_spec.ich8lan.eee_disable = eee_setting;
710 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
711 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
712 	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
713 	    adapter, 0, em_sysctl_eee, "I",
714 	    "Disable Energy Efficient Ethernet");
715 
716 	/*
717 	** Start from a known state, this is
718 	** important in reading the nvm and
719 	** mac from that.
720 	*/
721 	e1000_reset_hw(hw);
722 
723 
724 	/* Make sure we have a good EEPROM before we read from it */
725 	if (e1000_validate_nvm_checksum(hw) < 0) {
726 		/*
727 		** Some PCI-E parts fail the first check due to
728 		** the link being in sleep state, call it again,
729 		** if it fails a second time its a real issue.
730 		*/
731 		if (e1000_validate_nvm_checksum(hw) < 0) {
732 			device_printf(dev,
733 			    "The EEPROM Checksum Is Not Valid\n");
734 			error = EIO;
735 			goto err_late;
736 		}
737 	}
738 
739 	/* Copy the permanent MAC address out of the EEPROM */
740 	if (e1000_read_mac_addr(hw) < 0) {
741 		device_printf(dev, "EEPROM read error while reading MAC"
742 		    " address\n");
743 		error = EIO;
744 		goto err_late;
745 	}
746 
747 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
748 		device_printf(dev, "Invalid MAC address\n");
749 		error = EIO;
750 		goto err_late;
751 	}
752 
753 	/* Disable ULP support */
754 	e1000_disable_ulp_lpt_lp(hw, TRUE);
755 
756 	/*
757 	**  Do interrupt configuration
758 	*/
759 	if (adapter->msix > 1) /* Do MSIX */
760 		error = em_allocate_msix(adapter);
761 	else  /* MSI or Legacy */
762 		error = em_allocate_legacy(adapter);
763 	if (error)
764 		goto err_late;
765 
766 	/*
767 	 * Get Wake-on-Lan and Management info for later use
768 	 */
769 	em_get_wakeup(dev);
770 
771 	/* Setup OS specific network interface */
772 	if (em_setup_interface(dev, adapter) != 0)
773 		goto err_late;
774 
775 	em_reset(adapter);
776 
777 	/* Initialize statistics */
778 	em_update_stats_counters(adapter);
779 
780 	hw->mac.get_link_status = 1;
781 	em_update_link_status(adapter);
782 
783 	/* Register for VLAN events */
784 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
785 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
786 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
787 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
788 
789 	em_add_hw_stats(adapter);
790 
791 	/* Non-AMT based hardware can now take control from firmware */
792 	if (adapter->has_manage && !adapter->has_amt)
793 		em_get_hw_control(adapter);
794 
795 	/* Tell the stack that the interface is not active */
796 	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
797 
798 	adapter->led_dev = led_create(em_led_func, adapter,
799 	    device_get_nameunit(dev));
800 #ifdef DEV_NETMAP
801 	em_netmap_attach(adapter);
802 #endif /* DEV_NETMAP */
803 
804 	INIT_DEBUGOUT("em_attach: end");
805 
806 	return (0);
807 
808 err_late:
809 	em_free_transmit_structures(adapter);
810 	em_free_receive_structures(adapter);
811 	em_release_hw_control(adapter);
812 	if (adapter->ifp != (void *)NULL)
813 		if_free(adapter->ifp);
814 err_pci:
815 	em_free_pci_resources(adapter);
816 	free(adapter->mta, M_DEVBUF);
817 	EM_CORE_LOCK_DESTROY(adapter);
818 
819 	return (error);
820 }
821 
822 /*********************************************************************
823  *  Device removal routine
824  *
825  *  The detach entry point is called when the driver is being removed.
826  *  This routine stops the adapter and deallocates all the resources
827  *  that were allocated for driver operation.
828  *
829  *  return 0 on success, positive on failure
830  *********************************************************************/
831 
832 static int
833 em_detach(device_t dev)
834 {
835 	struct adapter	*adapter = device_get_softc(dev);
836 	if_t ifp = adapter->ifp;
837 
838 	INIT_DEBUGOUT("em_detach: begin");
839 
840 	/* Make sure VLANS are not using driver */
841 	if (if_vlantrunkinuse(ifp)) {
842 		device_printf(dev,"Vlan in use, detach first\n");
843 		return (EBUSY);
844 	}
845 
846 #ifdef DEVICE_POLLING
847 	if (if_getcapenable(ifp) & IFCAP_POLLING)
848 		ether_poll_deregister(ifp);
849 #endif
850 
851 	if (adapter->led_dev != NULL)
852 		led_destroy(adapter->led_dev);
853 
854 	EM_CORE_LOCK(adapter);
855 	adapter->in_detach = 1;
856 	em_stop(adapter);
857 	EM_CORE_UNLOCK(adapter);
858 	EM_CORE_LOCK_DESTROY(adapter);
859 
860 	e1000_phy_hw_reset(&adapter->hw);
861 
862 	em_release_manageability(adapter);
863 	em_release_hw_control(adapter);
864 
865 	/* Unregister VLAN events */
866 	if (adapter->vlan_attach != NULL)
867 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
868 	if (adapter->vlan_detach != NULL)
869 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
870 
871 	ether_ifdetach(adapter->ifp);
872 	callout_drain(&adapter->timer);
873 
874 #ifdef DEV_NETMAP
875 	netmap_detach(ifp);
876 #endif /* DEV_NETMAP */
877 
878 	em_free_pci_resources(adapter);
879 	bus_generic_detach(dev);
880 	if_free(ifp);
881 
882 	em_free_transmit_structures(adapter);
883 	em_free_receive_structures(adapter);
884 
885 	em_release_hw_control(adapter);
886 	free(adapter->mta, M_DEVBUF);
887 
888 	return (0);
889 }
890 
891 /*********************************************************************
892  *
893  *  Shutdown entry point
894  *
895  **********************************************************************/
896 
897 static int
898 em_shutdown(device_t dev)
899 {
900 	return em_suspend(dev);
901 }
902 
903 /*
904  * Suspend/resume device methods.
905  */
906 static int
907 em_suspend(device_t dev)
908 {
909 	struct adapter *adapter = device_get_softc(dev);
910 
911 	EM_CORE_LOCK(adapter);
912 
913         em_release_manageability(adapter);
914 	em_release_hw_control(adapter);
915 	em_enable_wakeup(dev);
916 
917 	EM_CORE_UNLOCK(adapter);
918 
919 	return bus_generic_suspend(dev);
920 }
921 
922 static int
923 em_resume(device_t dev)
924 {
925 	struct adapter *adapter = device_get_softc(dev);
926 	struct tx_ring	*txr = adapter->tx_rings;
927 	if_t ifp = adapter->ifp;
928 
929 	EM_CORE_LOCK(adapter);
930 	if (adapter->hw.mac.type == e1000_pch2lan)
931 		e1000_resume_workarounds_pchlan(&adapter->hw);
932 	em_init_locked(adapter);
933 	em_init_manageability(adapter);
934 
935 	if ((if_getflags(ifp) & IFF_UP) &&
936 	    (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) {
937 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
938 			EM_TX_LOCK(txr);
939 #ifdef EM_MULTIQUEUE
940 			if (!drbr_empty(ifp, txr->br))
941 				em_mq_start_locked(ifp, txr);
942 #else
943 			if (!if_sendq_empty(ifp))
944 				em_start_locked(ifp, txr);
945 #endif
946 			EM_TX_UNLOCK(txr);
947 		}
948 	}
949 	EM_CORE_UNLOCK(adapter);
950 
951 	return bus_generic_resume(dev);
952 }
953 
954 
955 #ifndef EM_MULTIQUEUE
956 static void
957 em_start_locked(if_t ifp, struct tx_ring *txr)
958 {
959 	struct adapter	*adapter = if_getsoftc(ifp);
960 	struct mbuf	*m_head;
961 
962 	EM_TX_LOCK_ASSERT(txr);
963 
964 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
965 	    IFF_DRV_RUNNING)
966 		return;
967 
968 	if (!adapter->link_active)
969 		return;
970 
971 	while (!if_sendq_empty(ifp)) {
972         	/* Call cleanup if number of TX descriptors low */
973 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
974 			em_txeof(txr);
975 		if (txr->tx_avail < EM_MAX_SCATTER) {
976 			if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0);
977 			break;
978 		}
979 		m_head = if_dequeue(ifp);
980 		if (m_head == NULL)
981 			break;
982 		/*
983 		 *  Encapsulation can modify our pointer, and or make it
984 		 *  NULL on failure.  In that event, we can't requeue.
985 		 */
986 		if (em_xmit(txr, &m_head)) {
987 			if (m_head == NULL)
988 				break;
989 			if_sendq_prepend(ifp, m_head);
990 			break;
991 		}
992 
993 		/* Mark the queue as having work */
994 		if (txr->busy == EM_TX_IDLE)
995 			txr->busy = EM_TX_BUSY;
996 
997 		/* Send a copy of the frame to the BPF listener */
998 		ETHER_BPF_MTAP(ifp, m_head);
999 
1000 	}
1001 
1002 	return;
1003 }
1004 
1005 static void
1006 em_start(if_t ifp)
1007 {
1008 	struct adapter	*adapter = if_getsoftc(ifp);
1009 	struct tx_ring	*txr = adapter->tx_rings;
1010 
1011 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1012 		EM_TX_LOCK(txr);
1013 		em_start_locked(ifp, txr);
1014 		EM_TX_UNLOCK(txr);
1015 	}
1016 	return;
1017 }
1018 #else /* EM_MULTIQUEUE */
1019 /*********************************************************************
1020  *  Multiqueue Transmit routines
1021  *
1022  *  em_mq_start is called by the stack to initiate a transmit.
1023  *  however, if busy the driver can queue the request rather
1024  *  than do an immediate send. It is this that is an advantage
1025  *  in this driver, rather than also having multiple tx queues.
1026  **********************************************************************/
1027 /*
1028 ** Multiqueue capable stack interface
1029 */
1030 static int
1031 em_mq_start(if_t ifp, struct mbuf *m)
1032 {
1033 	struct adapter	*adapter = if_getsoftc(ifp);
1034 	struct tx_ring	*txr = adapter->tx_rings;
1035 	unsigned int	i, error;
1036 
1037 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1038 		i = m->m_pkthdr.flowid % adapter->num_queues;
1039 	else
1040 		i = curcpu % adapter->num_queues;
1041 
1042 	txr = &adapter->tx_rings[i];
1043 
1044 	error = drbr_enqueue(ifp, txr->br, m);
1045 	if (error)
1046 		return (error);
1047 
1048 	if (EM_TX_TRYLOCK(txr)) {
1049 		em_mq_start_locked(ifp, txr);
1050 		EM_TX_UNLOCK(txr);
1051 	} else
1052 		taskqueue_enqueue(txr->tq, &txr->tx_task);
1053 
1054 	return (0);
1055 }
1056 
1057 static int
1058 em_mq_start_locked(if_t ifp, struct tx_ring *txr)
1059 {
1060 	struct adapter  *adapter = txr->adapter;
1061         struct mbuf     *next;
1062         int             err = 0, enq = 0;
1063 
1064 	EM_TX_LOCK_ASSERT(txr);
1065 
1066 	if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) ||
1067 	    adapter->link_active == 0) {
1068 		return (ENETDOWN);
1069 	}
1070 
1071 	/* Process the queue */
1072 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1073 		if ((err = em_xmit(txr, &next)) != 0) {
1074 			if (next == NULL) {
1075 				/* It was freed, move forward */
1076 				drbr_advance(ifp, txr->br);
1077 			} else {
1078 				/*
1079 				 * Still have one left, it may not be
1080 				 * the same since the transmit function
1081 				 * may have changed it.
1082 				 */
1083 				drbr_putback(ifp, txr->br, next);
1084 			}
1085 			break;
1086 		}
1087 		drbr_advance(ifp, txr->br);
1088 		enq++;
1089 		if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
1090 		if (next->m_flags & M_MCAST)
1091 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
1092 		ETHER_BPF_MTAP(ifp, next);
1093 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
1094                         break;
1095 	}
1096 
1097 	/* Mark the queue as having work */
1098 	if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1099 		txr->busy = EM_TX_BUSY;
1100 
1101 	if (txr->tx_avail < EM_MAX_SCATTER)
1102 		em_txeof(txr);
1103 	if (txr->tx_avail < EM_MAX_SCATTER) {
1104 		if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
1105 	}
1106 	return (err);
1107 }
1108 
1109 /*
1110 ** Flush all ring buffers
1111 */
1112 static void
1113 em_qflush(if_t ifp)
1114 {
1115 	struct adapter  *adapter = if_getsoftc(ifp);
1116 	struct tx_ring  *txr = adapter->tx_rings;
1117 	struct mbuf     *m;
1118 
1119 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1120 		EM_TX_LOCK(txr);
1121 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1122 			m_freem(m);
1123 		EM_TX_UNLOCK(txr);
1124 	}
1125 	if_qflush(ifp);
1126 }
1127 #endif /* EM_MULTIQUEUE */
1128 
1129 /*********************************************************************
1130  *  Ioctl entry point
1131  *
1132  *  em_ioctl is called when the user wants to configure the
1133  *  interface.
1134  *
1135  *  return 0 on success, positive on failure
1136  **********************************************************************/
1137 
1138 static int
1139 em_ioctl(if_t ifp, u_long command, caddr_t data)
1140 {
1141 	struct adapter	*adapter = if_getsoftc(ifp);
1142 	struct ifreq	*ifr = (struct ifreq *)data;
1143 #if defined(INET) || defined(INET6)
1144 	struct ifaddr	*ifa = (struct ifaddr *)data;
1145 #endif
1146 	bool		avoid_reset = FALSE;
1147 	int		error = 0;
1148 
1149 	if (adapter->in_detach)
1150 		return (error);
1151 
1152 	switch (command) {
1153 	case SIOCSIFADDR:
1154 #ifdef INET
1155 		if (ifa->ifa_addr->sa_family == AF_INET)
1156 			avoid_reset = TRUE;
1157 #endif
1158 #ifdef INET6
1159 		if (ifa->ifa_addr->sa_family == AF_INET6)
1160 			avoid_reset = TRUE;
1161 #endif
1162 		/*
1163 		** Calling init results in link renegotiation,
1164 		** so we avoid doing it when possible.
1165 		*/
1166 		if (avoid_reset) {
1167 			if_setflagbits(ifp,IFF_UP,0);
1168 			if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
1169 				em_init(adapter);
1170 #ifdef INET
1171 			if (!(if_getflags(ifp) & IFF_NOARP))
1172 				arp_ifinit(ifp, ifa);
1173 #endif
1174 		} else
1175 			error = ether_ioctl(ifp, command, data);
1176 		break;
1177 	case SIOCSIFMTU:
1178 	    {
1179 		int max_frame_size;
1180 
1181 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1182 
1183 		EM_CORE_LOCK(adapter);
1184 		switch (adapter->hw.mac.type) {
1185 		case e1000_82571:
1186 		case e1000_82572:
1187 		case e1000_ich9lan:
1188 		case e1000_ich10lan:
1189 		case e1000_pch2lan:
1190 		case e1000_pch_lpt:
1191 		case e1000_pch_spt:
1192 		case e1000_82574:
1193 		case e1000_82583:
1194 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1195 			max_frame_size = 9234;
1196 			break;
1197 		case e1000_pchlan:
1198 			max_frame_size = 4096;
1199 			break;
1200 			/* Adapters that do not support jumbo frames */
1201 		case e1000_ich8lan:
1202 			max_frame_size = ETHER_MAX_LEN;
1203 			break;
1204 		default:
1205 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1206 		}
1207 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1208 		    ETHER_CRC_LEN) {
1209 			EM_CORE_UNLOCK(adapter);
1210 			error = EINVAL;
1211 			break;
1212 		}
1213 
1214 		if_setmtu(ifp, ifr->ifr_mtu);
1215 		adapter->hw.mac.max_frame_size =
1216 		    if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
1217 		em_init_locked(adapter);
1218 		EM_CORE_UNLOCK(adapter);
1219 		break;
1220 	    }
1221 	case SIOCSIFFLAGS:
1222 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1223 		    SIOCSIFFLAGS (Set Interface Flags)");
1224 		EM_CORE_LOCK(adapter);
1225 		if (if_getflags(ifp) & IFF_UP) {
1226 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1227 				if ((if_getflags(ifp) ^ adapter->if_flags) &
1228 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1229 					em_disable_promisc(adapter);
1230 					em_set_promisc(adapter);
1231 				}
1232 			} else
1233 				em_init_locked(adapter);
1234 		} else
1235 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1236 				em_stop(adapter);
1237 		adapter->if_flags = if_getflags(ifp);
1238 		EM_CORE_UNLOCK(adapter);
1239 		break;
1240 	case SIOCADDMULTI:
1241 	case SIOCDELMULTI:
1242 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1243 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1244 			EM_CORE_LOCK(adapter);
1245 			em_disable_intr(adapter);
1246 			em_set_multi(adapter);
1247 #ifdef DEVICE_POLLING
1248 			if (!(if_getcapenable(ifp) & IFCAP_POLLING))
1249 #endif
1250 				em_enable_intr(adapter);
1251 			EM_CORE_UNLOCK(adapter);
1252 		}
1253 		break;
1254 	case SIOCSIFMEDIA:
1255 		/* Check SOL/IDER usage */
1256 		EM_CORE_LOCK(adapter);
1257 		if (e1000_check_reset_block(&adapter->hw)) {
1258 			EM_CORE_UNLOCK(adapter);
1259 			device_printf(adapter->dev, "Media change is"
1260 			    " blocked due to SOL/IDER session.\n");
1261 			break;
1262 		}
1263 		EM_CORE_UNLOCK(adapter);
1264 		/* falls thru */
1265 	case SIOCGIFMEDIA:
1266 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1267 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1268 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1269 		break;
1270 	case SIOCSIFCAP:
1271 	    {
1272 		int mask, reinit;
1273 
1274 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1275 		reinit = 0;
1276 		mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
1277 #ifdef DEVICE_POLLING
1278 		if (mask & IFCAP_POLLING) {
1279 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1280 				error = ether_poll_register(em_poll, ifp);
1281 				if (error)
1282 					return (error);
1283 				EM_CORE_LOCK(adapter);
1284 				em_disable_intr(adapter);
1285 				if_setcapenablebit(ifp, IFCAP_POLLING, 0);
1286 				EM_CORE_UNLOCK(adapter);
1287 			} else {
1288 				error = ether_poll_deregister(ifp);
1289 				/* Enable interrupt even in error case */
1290 				EM_CORE_LOCK(adapter);
1291 				em_enable_intr(adapter);
1292 				if_setcapenablebit(ifp, 0, IFCAP_POLLING);
1293 				EM_CORE_UNLOCK(adapter);
1294 			}
1295 		}
1296 #endif
1297 		if (mask & IFCAP_HWCSUM) {
1298 			if_togglecapenable(ifp,IFCAP_HWCSUM);
1299 			reinit = 1;
1300 		}
1301 		if (mask & IFCAP_TSO4) {
1302 			if_togglecapenable(ifp,IFCAP_TSO4);
1303 			reinit = 1;
1304 		}
1305 		if (mask & IFCAP_VLAN_HWTAGGING) {
1306 			if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING);
1307 			reinit = 1;
1308 		}
1309 		if (mask & IFCAP_VLAN_HWFILTER) {
1310 			if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1311 			reinit = 1;
1312 		}
1313 		if (mask & IFCAP_VLAN_HWTSO) {
1314 			if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1315 			reinit = 1;
1316 		}
1317 		if ((mask & IFCAP_WOL) &&
1318 		    (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
1319 			if (mask & IFCAP_WOL_MCAST)
1320 				if_togglecapenable(ifp, IFCAP_WOL_MCAST);
1321 			if (mask & IFCAP_WOL_MAGIC)
1322 				if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
1323 		}
1324 		if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1325 			em_init(adapter);
1326 		if_vlancap(ifp);
1327 		break;
1328 	    }
1329 
1330 	default:
1331 		error = ether_ioctl(ifp, command, data);
1332 		break;
1333 	}
1334 
1335 	return (error);
1336 }
1337 
1338 
1339 /*********************************************************************
1340  *  Init entry point
1341  *
1342  *  This routine is used in two ways. It is used by the stack as
1343  *  init entry point in network interface structure. It is also used
1344  *  by the driver as a hw/sw initialization routine to get to a
1345  *  consistent state.
1346  *
1347  *  return 0 on success, positive on failure
1348  **********************************************************************/
1349 
1350 static void
1351 em_init_locked(struct adapter *adapter)
1352 {
1353 	if_t ifp = adapter->ifp;
1354 	device_t	dev = adapter->dev;
1355 
1356 	INIT_DEBUGOUT("em_init: begin");
1357 
1358 	EM_CORE_LOCK_ASSERT(adapter);
1359 
1360 	em_disable_intr(adapter);
1361 	callout_stop(&adapter->timer);
1362 
1363 	/* Get the latest mac address, User can use a LAA */
1364         bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
1365               ETHER_ADDR_LEN);
1366 
1367 	/* Put the address into the Receive Address Array */
1368 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1369 
1370 	/*
1371 	 * With the 82571 adapter, RAR[0] may be overwritten
1372 	 * when the other port is reset, we make a duplicate
1373 	 * in RAR[14] for that eventuality, this assures
1374 	 * the interface continues to function.
1375 	 */
1376 	if (adapter->hw.mac.type == e1000_82571) {
1377 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1378 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1379 		    E1000_RAR_ENTRIES - 1);
1380 	}
1381 
1382 	/* Initialize the hardware */
1383 	em_reset(adapter);
1384 	em_update_link_status(adapter);
1385 
1386 	/* Setup VLAN support, basic and offload if available */
1387 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1388 
1389 	/* Set hardware offload abilities */
1390 	if_clearhwassist(ifp);
1391 	if (if_getcapenable(ifp) & IFCAP_TXCSUM)
1392 		if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
1393 	/*
1394 	** There have proven to be problems with TSO when not
1395 	** at full gigabit speed, so disable the assist automatically
1396 	** when at lower speeds.  -jfv
1397 	*/
1398 	if (if_getcapenable(ifp) & IFCAP_TSO4) {
1399 		if (adapter->link_speed == SPEED_1000)
1400 			if_sethwassistbits(ifp, CSUM_TSO, 0);
1401 	}
1402 
1403 	/* Configure for OS presence */
1404 	em_init_manageability(adapter);
1405 
1406 	/* Prepare transmit descriptors and buffers */
1407 	em_setup_transmit_structures(adapter);
1408 	em_initialize_transmit_unit(adapter);
1409 
1410 	/* Setup Multicast table */
1411 	em_set_multi(adapter);
1412 
1413 	/*
1414 	** Figure out the desired mbuf
1415 	** pool for doing jumbos
1416 	*/
1417 	if (adapter->hw.mac.max_frame_size <= 2048)
1418 		adapter->rx_mbuf_sz = MCLBYTES;
1419 	else if (adapter->hw.mac.max_frame_size <= 4096)
1420 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1421 	else
1422 		adapter->rx_mbuf_sz = MJUM9BYTES;
1423 
1424 	/* Prepare receive descriptors and buffers */
1425 	if (em_setup_receive_structures(adapter)) {
1426 		device_printf(dev, "Could not setup receive structures\n");
1427 		em_stop(adapter);
1428 		return;
1429 	}
1430 	em_initialize_receive_unit(adapter);
1431 
1432 	/* Use real VLAN Filter support? */
1433 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
1434 		if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1435 			/* Use real VLAN Filter support */
1436 			em_setup_vlan_hw_support(adapter);
1437 		else {
1438 			u32 ctrl;
1439 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1440 			ctrl |= E1000_CTRL_VME;
1441 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1442 		}
1443 	}
1444 
1445 	/* Don't lose promiscuous settings */
1446 	em_set_promisc(adapter);
1447 
1448 	/* Set the interface as ACTIVE */
1449 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
1450 
1451 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1452 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1453 
1454 	/* MSI/X configuration for 82574 */
1455 	if (adapter->hw.mac.type == e1000_82574) {
1456 		int tmp;
1457 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1458 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1459 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1460 		/* Set the IVAR - interrupt vector routing. */
1461 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1462 	}
1463 
1464 #ifdef DEVICE_POLLING
1465 	/*
1466 	 * Only enable interrupts if we are not polling, make sure
1467 	 * they are off otherwise.
1468 	 */
1469 	if (if_getcapenable(ifp) & IFCAP_POLLING)
1470 		em_disable_intr(adapter);
1471 	else
1472 #endif /* DEVICE_POLLING */
1473 		em_enable_intr(adapter);
1474 
1475 	/* AMT based hardware can now take control from firmware */
1476 	if (adapter->has_manage && adapter->has_amt)
1477 		em_get_hw_control(adapter);
1478 }
1479 
1480 static void
1481 em_init(void *arg)
1482 {
1483 	struct adapter *adapter = arg;
1484 
1485 	EM_CORE_LOCK(adapter);
1486 	em_init_locked(adapter);
1487 	EM_CORE_UNLOCK(adapter);
1488 }
1489 
1490 
1491 #ifdef DEVICE_POLLING
1492 /*********************************************************************
1493  *
1494  *  Legacy polling routine: note this only works with single queue
1495  *
1496  *********************************************************************/
1497 static int
1498 em_poll(if_t ifp, enum poll_cmd cmd, int count)
1499 {
1500 	struct adapter *adapter = if_getsoftc(ifp);
1501 	struct tx_ring	*txr = adapter->tx_rings;
1502 	struct rx_ring	*rxr = adapter->rx_rings;
1503 	u32		reg_icr;
1504 	int		rx_done;
1505 
1506 	EM_CORE_LOCK(adapter);
1507 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
1508 		EM_CORE_UNLOCK(adapter);
1509 		return (0);
1510 	}
1511 
1512 	if (cmd == POLL_AND_CHECK_STATUS) {
1513 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1514 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1515 			callout_stop(&adapter->timer);
1516 			adapter->hw.mac.get_link_status = 1;
1517 			em_update_link_status(adapter);
1518 			callout_reset(&adapter->timer, hz,
1519 			    em_local_timer, adapter);
1520 		}
1521 	}
1522 	EM_CORE_UNLOCK(adapter);
1523 
1524 	em_rxeof(rxr, count, &rx_done);
1525 
1526 	EM_TX_LOCK(txr);
1527 	em_txeof(txr);
1528 #ifdef EM_MULTIQUEUE
1529 	if (!drbr_empty(ifp, txr->br))
1530 		em_mq_start_locked(ifp, txr);
1531 #else
1532 	if (!if_sendq_empty(ifp))
1533 		em_start_locked(ifp, txr);
1534 #endif
1535 	EM_TX_UNLOCK(txr);
1536 
1537 	return (rx_done);
1538 }
1539 #endif /* DEVICE_POLLING */
1540 
1541 
1542 /*********************************************************************
1543  *
1544  *  Fast Legacy/MSI Combined Interrupt Service routine
1545  *
1546  *********************************************************************/
1547 static int
1548 em_irq_fast(void *arg)
1549 {
1550 	struct adapter	*adapter = arg;
1551 	if_t ifp;
1552 	u32		reg_icr;
1553 
1554 	ifp = adapter->ifp;
1555 
1556 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1557 
1558 	/* Hot eject?  */
1559 	if (reg_icr == 0xffffffff)
1560 		return FILTER_STRAY;
1561 
1562 	/* Definitely not our interrupt.  */
1563 	if (reg_icr == 0x0)
1564 		return FILTER_STRAY;
1565 
1566 	/*
1567 	 * Starting with the 82571 chip, bit 31 should be used to
1568 	 * determine whether the interrupt belongs to us.
1569 	 */
1570 	if (adapter->hw.mac.type >= e1000_82571 &&
1571 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1572 		return FILTER_STRAY;
1573 
1574 	em_disable_intr(adapter);
1575 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1576 
1577 	/* Link status change */
1578 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1579 		adapter->hw.mac.get_link_status = 1;
1580 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1581 	}
1582 
1583 	if (reg_icr & E1000_ICR_RXO)
1584 		adapter->rx_overruns++;
1585 	return FILTER_HANDLED;
1586 }
1587 
1588 /* Combined RX/TX handler, used by Legacy and MSI */
1589 static void
1590 em_handle_que(void *context, int pending)
1591 {
1592 	struct adapter	*adapter = context;
1593 	if_t ifp = adapter->ifp;
1594 	struct tx_ring	*txr = adapter->tx_rings;
1595 	struct rx_ring	*rxr = adapter->rx_rings;
1596 
1597 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1598 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1599 
1600 		EM_TX_LOCK(txr);
1601 		em_txeof(txr);
1602 #ifdef EM_MULTIQUEUE
1603 		if (!drbr_empty(ifp, txr->br))
1604 			em_mq_start_locked(ifp, txr);
1605 #else
1606 		if (!if_sendq_empty(ifp))
1607 			em_start_locked(ifp, txr);
1608 #endif
1609 		EM_TX_UNLOCK(txr);
1610 		if (more) {
1611 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1612 			return;
1613 		}
1614 	}
1615 
1616 	em_enable_intr(adapter);
1617 	return;
1618 }
1619 
1620 
1621 /*********************************************************************
1622  *
1623  *  MSIX Interrupt Service Routines
1624  *
1625  **********************************************************************/
1626 static void
1627 em_msix_tx(void *arg)
1628 {
1629 	struct tx_ring *txr = arg;
1630 	struct adapter *adapter = txr->adapter;
1631 	if_t ifp = adapter->ifp;
1632 
1633 	++txr->tx_irq;
1634 	EM_TX_LOCK(txr);
1635 	em_txeof(txr);
1636 #ifdef EM_MULTIQUEUE
1637 	if (!drbr_empty(ifp, txr->br))
1638 		em_mq_start_locked(ifp, txr);
1639 #else
1640 	if (!if_sendq_empty(ifp))
1641 		em_start_locked(ifp, txr);
1642 #endif
1643 
1644 	/* Reenable this interrupt */
1645 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1646 	EM_TX_UNLOCK(txr);
1647 	return;
1648 }
1649 
1650 /*********************************************************************
1651  *
1652  *  MSIX RX Interrupt Service routine
1653  *
1654  **********************************************************************/
1655 
1656 static void
1657 em_msix_rx(void *arg)
1658 {
1659 	struct rx_ring	*rxr = arg;
1660 	struct adapter	*adapter = rxr->adapter;
1661 	bool		more;
1662 
1663 	++rxr->rx_irq;
1664 	if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING))
1665 		return;
1666 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1667 	if (more)
1668 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1669 	else {
1670 		/* Reenable this interrupt */
1671 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1672 	}
1673 	return;
1674 }
1675 
1676 /*********************************************************************
1677  *
1678  *  MSIX Link Fast Interrupt Service routine
1679  *
1680  **********************************************************************/
1681 static void
1682 em_msix_link(void *arg)
1683 {
1684 	struct adapter	*adapter = arg;
1685 	u32		reg_icr;
1686 
1687 	++adapter->link_irq;
1688 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1689 
1690 	if (reg_icr & E1000_ICR_RXO)
1691 		adapter->rx_overruns++;
1692 
1693 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1694 		adapter->hw.mac.get_link_status = 1;
1695 		em_handle_link(adapter, 0);
1696 	} else
1697 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1698 		    EM_MSIX_LINK | E1000_IMS_LSC);
1699 	/*
1700  	** Because we must read the ICR for this interrupt
1701  	** it may clear other causes using autoclear, for
1702  	** this reason we simply create a soft interrupt
1703  	** for all these vectors.
1704  	*/
1705 	if (reg_icr) {
1706 		E1000_WRITE_REG(&adapter->hw,
1707 			E1000_ICS, adapter->ims);
1708 	}
1709 	return;
1710 }
1711 
1712 static void
1713 em_handle_rx(void *context, int pending)
1714 {
1715 	struct rx_ring	*rxr = context;
1716 	struct adapter	*adapter = rxr->adapter;
1717         bool            more;
1718 
1719 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1720 	if (more)
1721 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1722 	else {
1723 		/* Reenable this interrupt */
1724 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1725 	}
1726 }
1727 
1728 static void
1729 em_handle_tx(void *context, int pending)
1730 {
1731 	struct tx_ring	*txr = context;
1732 	struct adapter	*adapter = txr->adapter;
1733 	if_t ifp = adapter->ifp;
1734 
1735 	EM_TX_LOCK(txr);
1736 	em_txeof(txr);
1737 #ifdef EM_MULTIQUEUE
1738 	if (!drbr_empty(ifp, txr->br))
1739 		em_mq_start_locked(ifp, txr);
1740 #else
1741 	if (!if_sendq_empty(ifp))
1742 		em_start_locked(ifp, txr);
1743 #endif
1744 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1745 	EM_TX_UNLOCK(txr);
1746 }
1747 
1748 static void
1749 em_handle_link(void *context, int pending)
1750 {
1751 	struct adapter	*adapter = context;
1752 	struct tx_ring	*txr = adapter->tx_rings;
1753 	if_t ifp = adapter->ifp;
1754 
1755 	if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1756 		return;
1757 
1758 	EM_CORE_LOCK(adapter);
1759 	callout_stop(&adapter->timer);
1760 	em_update_link_status(adapter);
1761 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1762 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1763 	    EM_MSIX_LINK | E1000_IMS_LSC);
1764 	if (adapter->link_active) {
1765 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1766 			EM_TX_LOCK(txr);
1767 #ifdef EM_MULTIQUEUE
1768 			if (!drbr_empty(ifp, txr->br))
1769 				em_mq_start_locked(ifp, txr);
1770 #else
1771 			if (if_sendq_empty(ifp))
1772 				em_start_locked(ifp, txr);
1773 #endif
1774 			EM_TX_UNLOCK(txr);
1775 		}
1776 	}
1777 	EM_CORE_UNLOCK(adapter);
1778 }
1779 
1780 
1781 /*********************************************************************
1782  *
1783  *  Media Ioctl callback
1784  *
1785  *  This routine is called whenever the user queries the status of
1786  *  the interface using ifconfig.
1787  *
1788  **********************************************************************/
1789 static void
1790 em_media_status(if_t ifp, struct ifmediareq *ifmr)
1791 {
1792 	struct adapter *adapter = if_getsoftc(ifp);
1793 	u_char fiber_type = IFM_1000_SX;
1794 
1795 	INIT_DEBUGOUT("em_media_status: begin");
1796 
1797 	EM_CORE_LOCK(adapter);
1798 	em_update_link_status(adapter);
1799 
1800 	ifmr->ifm_status = IFM_AVALID;
1801 	ifmr->ifm_active = IFM_ETHER;
1802 
1803 	if (!adapter->link_active) {
1804 		EM_CORE_UNLOCK(adapter);
1805 		return;
1806 	}
1807 
1808 	ifmr->ifm_status |= IFM_ACTIVE;
1809 
1810 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1811 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1812 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1813 	} else {
1814 		switch (adapter->link_speed) {
1815 		case 10:
1816 			ifmr->ifm_active |= IFM_10_T;
1817 			break;
1818 		case 100:
1819 			ifmr->ifm_active |= IFM_100_TX;
1820 			break;
1821 		case 1000:
1822 			ifmr->ifm_active |= IFM_1000_T;
1823 			break;
1824 		}
1825 		if (adapter->link_duplex == FULL_DUPLEX)
1826 			ifmr->ifm_active |= IFM_FDX;
1827 		else
1828 			ifmr->ifm_active |= IFM_HDX;
1829 	}
1830 	EM_CORE_UNLOCK(adapter);
1831 }
1832 
1833 /*********************************************************************
1834  *
1835  *  Media Ioctl callback
1836  *
1837  *  This routine is called when the user changes speed/duplex using
1838  *  media/mediopt option with ifconfig.
1839  *
1840  **********************************************************************/
1841 static int
1842 em_media_change(if_t ifp)
1843 {
1844 	struct adapter *adapter = if_getsoftc(ifp);
1845 	struct ifmedia  *ifm = &adapter->media;
1846 
1847 	INIT_DEBUGOUT("em_media_change: begin");
1848 
1849 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1850 		return (EINVAL);
1851 
1852 	EM_CORE_LOCK(adapter);
1853 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1854 	case IFM_AUTO:
1855 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1856 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1857 		break;
1858 	case IFM_1000_LX:
1859 	case IFM_1000_SX:
1860 	case IFM_1000_T:
1861 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1862 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1863 		break;
1864 	case IFM_100_TX:
1865 		adapter->hw.mac.autoneg = FALSE;
1866 		adapter->hw.phy.autoneg_advertised = 0;
1867 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1868 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1869 		else
1870 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1871 		break;
1872 	case IFM_10_T:
1873 		adapter->hw.mac.autoneg = FALSE;
1874 		adapter->hw.phy.autoneg_advertised = 0;
1875 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1876 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1877 		else
1878 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1879 		break;
1880 	default:
1881 		device_printf(adapter->dev, "Unsupported media type\n");
1882 	}
1883 
1884 	em_init_locked(adapter);
1885 	EM_CORE_UNLOCK(adapter);
1886 
1887 	return (0);
1888 }
1889 
1890 /*********************************************************************
1891  *
1892  *  This routine maps the mbufs to tx descriptors.
1893  *
1894  *  return 0 on success, positive on failure
1895  **********************************************************************/
1896 
1897 static int
1898 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1899 {
1900 	struct adapter		*adapter = txr->adapter;
1901 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1902 	bus_dmamap_t		map;
1903 	struct em_txbuffer	*tx_buffer, *tx_buffer_mapped;
1904 	struct e1000_tx_desc	*ctxd = NULL;
1905 	struct mbuf		*m_head;
1906 	struct ether_header	*eh;
1907 	struct ip		*ip = NULL;
1908 	struct tcphdr		*tp = NULL;
1909 	u32			txd_upper = 0, txd_lower = 0;
1910 	int			ip_off, poff;
1911 	int			nsegs, i, j, first, last = 0;
1912 	int			error;
1913 	bool			do_tso, tso_desc, remap = TRUE;
1914 
1915 	m_head = *m_headp;
1916 	do_tso = (m_head->m_pkthdr.csum_flags & CSUM_TSO);
1917 	tso_desc = FALSE;
1918 	ip_off = poff = 0;
1919 
1920 	/*
1921 	 * Intel recommends entire IP/TCP header length reside in a single
1922 	 * buffer. If multiple descriptors are used to describe the IP and
1923 	 * TCP header, each descriptor should describe one or more
1924 	 * complete headers; descriptors referencing only parts of headers
1925 	 * are not supported. If all layer headers are not coalesced into
1926 	 * a single buffer, each buffer should not cross a 4KB boundary,
1927 	 * or be larger than the maximum read request size.
1928 	 * Controller also requires modifing IP/TCP header to make TSO work
1929 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1930 	 * IP/TCP header into a single buffer to meet the requirement of
1931 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1932 	 * which also has similiar restrictions.
1933 	 */
1934 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1935 		if (do_tso || (m_head->m_next != NULL &&
1936 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1937 			if (M_WRITABLE(*m_headp) == 0) {
1938 				m_head = m_dup(*m_headp, M_NOWAIT);
1939 				m_freem(*m_headp);
1940 				if (m_head == NULL) {
1941 					*m_headp = NULL;
1942 					return (ENOBUFS);
1943 				}
1944 				*m_headp = m_head;
1945 			}
1946 		}
1947 		/*
1948 		 * XXX
1949 		 * Assume IPv4, we don't have TSO/checksum offload support
1950 		 * for IPv6 yet.
1951 		 */
1952 		ip_off = sizeof(struct ether_header);
1953 		if (m_head->m_len < ip_off) {
1954 			m_head = m_pullup(m_head, ip_off);
1955 			if (m_head == NULL) {
1956 				*m_headp = NULL;
1957 				return (ENOBUFS);
1958 			}
1959 		}
1960 		eh = mtod(m_head, struct ether_header *);
1961 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1962 			ip_off = sizeof(struct ether_vlan_header);
1963 			if (m_head->m_len < ip_off) {
1964 				m_head = m_pullup(m_head, ip_off);
1965 				if (m_head == NULL) {
1966 					*m_headp = NULL;
1967 					return (ENOBUFS);
1968 				}
1969 			}
1970 		}
1971 		if (m_head->m_len < ip_off + sizeof(struct ip)) {
1972 			m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1973 			if (m_head == NULL) {
1974 				*m_headp = NULL;
1975 				return (ENOBUFS);
1976 			}
1977 		}
1978 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1979 		poff = ip_off + (ip->ip_hl << 2);
1980 
1981 		if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
1982 			if (m_head->m_len < poff + sizeof(struct tcphdr)) {
1983 				m_head = m_pullup(m_head, poff +
1984 				    sizeof(struct tcphdr));
1985 				if (m_head == NULL) {
1986 					*m_headp = NULL;
1987 					return (ENOBUFS);
1988 				}
1989 			}
1990 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1991 			/*
1992 			 * TSO workaround:
1993 			 *   pull 4 more bytes of data into it.
1994 			 */
1995 			if (m_head->m_len < poff + (tp->th_off << 2)) {
1996 				m_head = m_pullup(m_head, poff +
1997 				                 (tp->th_off << 2) +
1998 				                 TSO_WORKAROUND);
1999 				if (m_head == NULL) {
2000 					*m_headp = NULL;
2001 					return (ENOBUFS);
2002 				}
2003 			}
2004 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2005 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2006 			if (do_tso) {
2007 				ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
2008 				                  (ip->ip_hl << 2) +
2009 				                  (tp->th_off << 2));
2010 				ip->ip_sum = 0;
2011 				/*
2012 				 * The pseudo TCP checksum does not include TCP
2013 				 * payload length so driver should recompute
2014 				 * the checksum here what hardware expect to
2015 				 * see. This is adherence of Microsoft's Large
2016 				 * Send specification.
2017 			 	*/
2018 				tp->th_sum = in_pseudo(ip->ip_src.s_addr,
2019 				    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2020 			}
2021 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
2022 			if (m_head->m_len < poff + sizeof(struct udphdr)) {
2023 				m_head = m_pullup(m_head, poff +
2024 				    sizeof(struct udphdr));
2025 				if (m_head == NULL) {
2026 					*m_headp = NULL;
2027 					return (ENOBUFS);
2028 				}
2029 			}
2030 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2031 		}
2032 		*m_headp = m_head;
2033 	}
2034 
2035 	/*
2036 	 * Map the packet for DMA
2037 	 *
2038 	 * Capture the first descriptor index,
2039 	 * this descriptor will have the index
2040 	 * of the EOP which is the only one that
2041 	 * now gets a DONE bit writeback.
2042 	 */
2043 	first = txr->next_avail_desc;
2044 	tx_buffer = &txr->tx_buffers[first];
2045 	tx_buffer_mapped = tx_buffer;
2046 	map = tx_buffer->map;
2047 
2048 retry:
2049 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2050 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2051 
2052 	/*
2053 	 * There are two types of errors we can (try) to handle:
2054 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
2055 	 *   out of segments.  Defragment the mbuf chain and try again.
2056 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
2057 	 *   at this point in time.  Defer sending and try again later.
2058 	 * All other errors, in particular EINVAL, are fatal and prevent the
2059 	 * mbuf chain from ever going through.  Drop it and report error.
2060 	 */
2061 	if (error == EFBIG && remap) {
2062 		struct mbuf *m;
2063 
2064 		m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
2065 		if (m == NULL) {
2066 			adapter->mbuf_defrag_failed++;
2067 			m_freem(*m_headp);
2068 			*m_headp = NULL;
2069 			return (ENOBUFS);
2070 		}
2071 		*m_headp = m;
2072 
2073 		/* Try it again, but only once */
2074 		remap = FALSE;
2075 		goto retry;
2076 	} else if (error != 0) {
2077 		adapter->no_tx_dma_setup++;
2078 		m_freem(*m_headp);
2079 		*m_headp = NULL;
2080 		return (error);
2081 	}
2082 
2083 	/*
2084 	 * TSO Hardware workaround, if this packet is not
2085 	 * TSO, and is only a single descriptor long, and
2086 	 * it follows a TSO burst, then we need to add a
2087 	 * sentinel descriptor to prevent premature writeback.
2088 	 */
2089 	if ((!do_tso) && (txr->tx_tso == TRUE)) {
2090 		if (nsegs == 1)
2091 			tso_desc = TRUE;
2092 		txr->tx_tso = FALSE;
2093 	}
2094 
2095         if (nsegs > (txr->tx_avail - EM_MAX_SCATTER)) {
2096                 txr->no_desc_avail++;
2097 		bus_dmamap_unload(txr->txtag, map);
2098 		return (ENOBUFS);
2099         }
2100 	m_head = *m_headp;
2101 
2102 	/* Do hardware assists */
2103 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2104 		em_tso_setup(txr, m_head, ip_off, ip, tp,
2105 		    &txd_upper, &txd_lower);
2106 		/* we need to make a final sentinel transmit desc */
2107 		tso_desc = TRUE;
2108 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2109 		em_transmit_checksum_setup(txr, m_head,
2110 		    ip_off, ip, &txd_upper, &txd_lower);
2111 
2112 	if (m_head->m_flags & M_VLANTAG) {
2113 		/* Set the vlan id. */
2114 		txd_upper |= htole16(if_getvtag(m_head)) << 16;
2115                 /* Tell hardware to add tag */
2116                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2117         }
2118 
2119 	i = txr->next_avail_desc;
2120 
2121 	/* Set up our transmit descriptors */
2122 	for (j = 0; j < nsegs; j++) {
2123 		bus_size_t seg_len;
2124 		bus_addr_t seg_addr;
2125 
2126 		tx_buffer = &txr->tx_buffers[i];
2127 		ctxd = &txr->tx_base[i];
2128 		seg_addr = segs[j].ds_addr;
2129 		seg_len  = segs[j].ds_len;
2130 		/*
2131 		** TSO Workaround:
2132 		** If this is the last descriptor, we want to
2133 		** split it so we have a small final sentinel
2134 		*/
2135 		if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
2136 			seg_len -= TSO_WORKAROUND;
2137 			ctxd->buffer_addr = htole64(seg_addr);
2138 			ctxd->lower.data = htole32(
2139 				adapter->txd_cmd | txd_lower | seg_len);
2140 			ctxd->upper.data = htole32(txd_upper);
2141 			if (++i == adapter->num_tx_desc)
2142 				i = 0;
2143 
2144 			/* Now make the sentinel */
2145 			txr->tx_avail--;
2146 			ctxd = &txr->tx_base[i];
2147 			tx_buffer = &txr->tx_buffers[i];
2148 			ctxd->buffer_addr =
2149 			    htole64(seg_addr + seg_len);
2150 			ctxd->lower.data = htole32(
2151 			adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
2152 			ctxd->upper.data =
2153 			    htole32(txd_upper);
2154 			last = i;
2155 			if (++i == adapter->num_tx_desc)
2156 				i = 0;
2157 		} else {
2158 			ctxd->buffer_addr = htole64(seg_addr);
2159 			ctxd->lower.data = htole32(
2160 			adapter->txd_cmd | txd_lower | seg_len);
2161 			ctxd->upper.data = htole32(txd_upper);
2162 			last = i;
2163 			if (++i == adapter->num_tx_desc)
2164 				i = 0;
2165 		}
2166 		tx_buffer->m_head = NULL;
2167 		tx_buffer->next_eop = -1;
2168 	}
2169 
2170 	txr->next_avail_desc = i;
2171 	txr->tx_avail -= nsegs;
2172 
2173         tx_buffer->m_head = m_head;
2174 	/*
2175 	** Here we swap the map so the last descriptor,
2176 	** which gets the completion interrupt has the
2177 	** real map, and the first descriptor gets the
2178 	** unused map from this descriptor.
2179 	*/
2180 	tx_buffer_mapped->map = tx_buffer->map;
2181 	tx_buffer->map = map;
2182         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2183 
2184         /*
2185          * Last Descriptor of Packet
2186 	 * needs End Of Packet (EOP)
2187 	 * and Report Status (RS)
2188          */
2189         ctxd->lower.data |=
2190 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2191 	/*
2192 	 * Keep track in the first buffer which
2193 	 * descriptor will be written back
2194 	 */
2195 	tx_buffer = &txr->tx_buffers[first];
2196 	tx_buffer->next_eop = last;
2197 
2198 	/*
2199 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2200 	 * that this frame is available to transmit.
2201 	 */
2202 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2203 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2204 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2205 
2206 	return (0);
2207 }
2208 
2209 static void
2210 em_set_promisc(struct adapter *adapter)
2211 {
2212 	if_t ifp = adapter->ifp;
2213 	u32		reg_rctl;
2214 
2215 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2216 
2217 	if (if_getflags(ifp) & IFF_PROMISC) {
2218 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2219 		/* Turn this on if you want to see bad packets */
2220 		if (em_debug_sbp)
2221 			reg_rctl |= E1000_RCTL_SBP;
2222 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2223 	} else if (if_getflags(ifp) & IFF_ALLMULTI) {
2224 		reg_rctl |= E1000_RCTL_MPE;
2225 		reg_rctl &= ~E1000_RCTL_UPE;
2226 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2227 	}
2228 }
2229 
2230 static void
2231 em_disable_promisc(struct adapter *adapter)
2232 {
2233 	if_t		ifp = adapter->ifp;
2234 	u32		reg_rctl;
2235 	int		mcnt = 0;
2236 
2237 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2238 	reg_rctl &=  (~E1000_RCTL_UPE);
2239 	if (if_getflags(ifp) & IFF_ALLMULTI)
2240 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2241 	else
2242 		mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
2243 	/* Don't disable if in MAX groups */
2244 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2245 		reg_rctl &=  (~E1000_RCTL_MPE);
2246 	reg_rctl &=  (~E1000_RCTL_SBP);
2247 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2248 }
2249 
2250 
2251 /*********************************************************************
2252  *  Multicast Update
2253  *
2254  *  This routine is called whenever multicast address list is updated.
2255  *
2256  **********************************************************************/
2257 
2258 static void
2259 em_set_multi(struct adapter *adapter)
2260 {
2261 	if_t ifp = adapter->ifp;
2262 	u32 reg_rctl = 0;
2263 	u8  *mta; /* Multicast array memory */
2264 	int mcnt = 0;
2265 
2266 	IOCTL_DEBUGOUT("em_set_multi: begin");
2267 
2268 	mta = adapter->mta;
2269 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2270 
2271 	if (adapter->hw.mac.type == e1000_82542 &&
2272 	    adapter->hw.revision_id == E1000_REVISION_2) {
2273 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2274 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2275 			e1000_pci_clear_mwi(&adapter->hw);
2276 		reg_rctl |= E1000_RCTL_RST;
2277 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2278 		msec_delay(5);
2279 	}
2280 
2281 	if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
2282 
2283 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2284 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2285 		reg_rctl |= E1000_RCTL_MPE;
2286 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2287 	} else
2288 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2289 
2290 	if (adapter->hw.mac.type == e1000_82542 &&
2291 	    adapter->hw.revision_id == E1000_REVISION_2) {
2292 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2293 		reg_rctl &= ~E1000_RCTL_RST;
2294 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2295 		msec_delay(5);
2296 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2297 			e1000_pci_set_mwi(&adapter->hw);
2298 	}
2299 }
2300 
2301 
2302 /*********************************************************************
2303  *  Timer routine
2304  *
2305  *  This routine checks for link status and updates statistics.
2306  *
2307  **********************************************************************/
2308 
2309 static void
2310 em_local_timer(void *arg)
2311 {
2312 	struct adapter	*adapter = arg;
2313 	if_t ifp = adapter->ifp;
2314 	struct tx_ring	*txr = adapter->tx_rings;
2315 	struct rx_ring	*rxr = adapter->rx_rings;
2316 	u32		trigger = 0;
2317 
2318 	EM_CORE_LOCK_ASSERT(adapter);
2319 
2320 	em_update_link_status(adapter);
2321 	em_update_stats_counters(adapter);
2322 
2323 	/* Reset LAA into RAR[0] on 82571 */
2324 	if ((adapter->hw.mac.type == e1000_82571) &&
2325 	    e1000_get_laa_state_82571(&adapter->hw))
2326 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2327 
2328 	/* Mask to use in the irq trigger */
2329 	if (adapter->msix_mem) {
2330 		for (int i = 0; i < adapter->num_queues; i++, rxr++)
2331 			trigger |= rxr->ims;
2332 		rxr = adapter->rx_rings;
2333 	} else
2334 		trigger = E1000_ICS_RXDMT0;
2335 
2336 	/*
2337 	** Check on the state of the TX queue(s), this
2338 	** can be done without the lock because its RO
2339 	** and the HUNG state will be static if set.
2340 	*/
2341 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2342 		if (txr->busy == EM_TX_HUNG)
2343 			goto hung;
2344 		if (txr->busy >= EM_TX_MAXTRIES)
2345 			txr->busy = EM_TX_HUNG;
2346 		/* Schedule a TX tasklet if needed */
2347 		if (txr->tx_avail <= EM_MAX_SCATTER)
2348 			taskqueue_enqueue(txr->tq, &txr->tx_task);
2349 	}
2350 
2351 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2352 #ifndef DEVICE_POLLING
2353 	/* Trigger an RX interrupt to guarantee mbuf refresh */
2354 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2355 #endif
2356 	return;
2357 hung:
2358 	/* Looks like we're hung */
2359 	device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2360 			txr->me);
2361 	em_print_debug_info(adapter);
2362 	if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
2363 	adapter->watchdog_events++;
2364 	em_init_locked(adapter);
2365 }
2366 
2367 
2368 static void
2369 em_update_link_status(struct adapter *adapter)
2370 {
2371 	struct e1000_hw *hw = &adapter->hw;
2372 	if_t ifp = adapter->ifp;
2373 	device_t dev = adapter->dev;
2374 	struct tx_ring *txr = adapter->tx_rings;
2375 	u32 link_check = 0;
2376 
2377 	/* Get the cached link value or read phy for real */
2378 	switch (hw->phy.media_type) {
2379 	case e1000_media_type_copper:
2380 		if (hw->mac.get_link_status) {
2381 			if (hw->mac.type == e1000_pch_spt)
2382 				msec_delay(50);
2383 			/* Do the work to read phy */
2384 			e1000_check_for_link(hw);
2385 			link_check = !hw->mac.get_link_status;
2386 			if (link_check) /* ESB2 fix */
2387 				e1000_cfg_on_link_up(hw);
2388 		} else
2389 			link_check = TRUE;
2390 		break;
2391 	case e1000_media_type_fiber:
2392 		e1000_check_for_link(hw);
2393 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2394                                  E1000_STATUS_LU);
2395 		break;
2396 	case e1000_media_type_internal_serdes:
2397 		e1000_check_for_link(hw);
2398 		link_check = adapter->hw.mac.serdes_has_link;
2399 		break;
2400 	default:
2401 	case e1000_media_type_unknown:
2402 		break;
2403 	}
2404 
2405 	/* Now check for a transition */
2406 	if (link_check && (adapter->link_active == 0)) {
2407 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2408 		    &adapter->link_duplex);
2409 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2410 		if ((adapter->link_speed != SPEED_1000) &&
2411 		    ((hw->mac.type == e1000_82571) ||
2412 		    (hw->mac.type == e1000_82572))) {
2413 			int tarc0;
2414 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2415 			tarc0 &= ~TARC_SPEED_MODE_BIT;
2416 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2417 		}
2418 		if (bootverbose)
2419 			device_printf(dev, "Link is up %d Mbps %s\n",
2420 			    adapter->link_speed,
2421 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2422 			    "Full Duplex" : "Half Duplex"));
2423 		adapter->link_active = 1;
2424 		adapter->smartspeed = 0;
2425 		if_setbaudrate(ifp, adapter->link_speed * 1000000);
2426 		if_link_state_change(ifp, LINK_STATE_UP);
2427 	} else if (!link_check && (adapter->link_active == 1)) {
2428 		if_setbaudrate(ifp, 0);
2429 		adapter->link_speed = 0;
2430 		adapter->link_duplex = 0;
2431 		if (bootverbose)
2432 			device_printf(dev, "Link is Down\n");
2433 		adapter->link_active = 0;
2434 		/* Link down, disable hang detection */
2435 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2436 			txr->busy = EM_TX_IDLE;
2437 		if_link_state_change(ifp, LINK_STATE_DOWN);
2438 	}
2439 }
2440 
2441 /*********************************************************************
2442  *
2443  *  This routine disables all traffic on the adapter by issuing a
2444  *  global reset on the MAC and deallocates TX/RX buffers.
2445  *
2446  *  This routine should always be called with BOTH the CORE
2447  *  and TX locks.
2448  **********************************************************************/
2449 
2450 static void
2451 em_stop(void *arg)
2452 {
2453 	struct adapter	*adapter = arg;
2454 	if_t ifp = adapter->ifp;
2455 	struct tx_ring	*txr = adapter->tx_rings;
2456 
2457 	EM_CORE_LOCK_ASSERT(adapter);
2458 
2459 	INIT_DEBUGOUT("em_stop: begin");
2460 
2461 	em_disable_intr(adapter);
2462 	callout_stop(&adapter->timer);
2463 
2464 	/* Tell the stack that the interface is no longer active */
2465 	if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2466 
2467         /* Disarm Hang Detection. */
2468 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2469 		EM_TX_LOCK(txr);
2470 		txr->busy = EM_TX_IDLE;
2471 		EM_TX_UNLOCK(txr);
2472 	}
2473 
2474 	/* I219 needs some special flushing to avoid hangs */
2475 	if (adapter->hw.mac.type == e1000_pch_spt)
2476 		em_flush_desc_rings(adapter);
2477 
2478 	e1000_reset_hw(&adapter->hw);
2479 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2480 
2481 	e1000_led_off(&adapter->hw);
2482 	e1000_cleanup_led(&adapter->hw);
2483 }
2484 
2485 
2486 /*********************************************************************
2487  *
2488  *  Determine hardware revision.
2489  *
2490  **********************************************************************/
2491 static void
2492 em_identify_hardware(struct adapter *adapter)
2493 {
2494 	device_t dev = adapter->dev;
2495 
2496 	/* Make sure our PCI config space has the necessary stuff set */
2497 	pci_enable_busmaster(dev);
2498 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2499 
2500 	/* Save off the information about this board */
2501 	adapter->hw.vendor_id = pci_get_vendor(dev);
2502 	adapter->hw.device_id = pci_get_device(dev);
2503 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2504 	adapter->hw.subsystem_vendor_id =
2505 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2506 	adapter->hw.subsystem_device_id =
2507 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2508 
2509 	/* Do Shared Code Init and Setup */
2510 	if (e1000_set_mac_type(&adapter->hw)) {
2511 		device_printf(dev, "Setup init failure\n");
2512 		return;
2513 	}
2514 }
2515 
2516 static int
2517 em_allocate_pci_resources(struct adapter *adapter)
2518 {
2519 	device_t	dev = adapter->dev;
2520 	int		rid;
2521 
2522 	rid = PCIR_BAR(0);
2523 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2524 	    &rid, RF_ACTIVE);
2525 	if (adapter->memory == NULL) {
2526 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2527 		return (ENXIO);
2528 	}
2529 	adapter->osdep.mem_bus_space_tag =
2530 	    rman_get_bustag(adapter->memory);
2531 	adapter->osdep.mem_bus_space_handle =
2532 	    rman_get_bushandle(adapter->memory);
2533 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2534 
2535 	adapter->hw.back = &adapter->osdep;
2536 
2537 	return (0);
2538 }
2539 
2540 /*********************************************************************
2541  *
2542  *  Setup the Legacy or MSI Interrupt handler
2543  *
2544  **********************************************************************/
2545 int
2546 em_allocate_legacy(struct adapter *adapter)
2547 {
2548 	device_t dev = adapter->dev;
2549 	struct tx_ring	*txr = adapter->tx_rings;
2550 	int error, rid = 0;
2551 
2552 	/* Manually turn off all interrupts */
2553 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2554 
2555 	if (adapter->msix == 1) /* using MSI */
2556 		rid = 1;
2557 	/* We allocate a single interrupt resource */
2558 	adapter->res = bus_alloc_resource_any(dev,
2559 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2560 	if (adapter->res == NULL) {
2561 		device_printf(dev, "Unable to allocate bus resource: "
2562 		    "interrupt\n");
2563 		return (ENXIO);
2564 	}
2565 
2566 	/*
2567 	 * Allocate a fast interrupt and the associated
2568 	 * deferred processing contexts.
2569 	 */
2570 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2571 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2572 	    taskqueue_thread_enqueue, &adapter->tq);
2573 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2574 	    device_get_nameunit(adapter->dev));
2575 	/* Use a TX only tasklet for local timer */
2576 	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2577 	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2578 	    taskqueue_thread_enqueue, &txr->tq);
2579 	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2580 	    device_get_nameunit(adapter->dev));
2581 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2582 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2583 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2584 		device_printf(dev, "Failed to register fast interrupt "
2585 			    "handler: %d\n", error);
2586 		taskqueue_free(adapter->tq);
2587 		adapter->tq = NULL;
2588 		return (error);
2589 	}
2590 
2591 	return (0);
2592 }
2593 
2594 /*********************************************************************
2595  *
2596  *  Setup the MSIX Interrupt handlers
2597  *   This is not really Multiqueue, rather
2598  *   its just seperate interrupt vectors
2599  *   for TX, RX, and Link.
2600  *
2601  **********************************************************************/
2602 int
2603 em_allocate_msix(struct adapter *adapter)
2604 {
2605 	device_t	dev = adapter->dev;
2606 	struct		tx_ring *txr = adapter->tx_rings;
2607 	struct		rx_ring *rxr = adapter->rx_rings;
2608 	int		error, rid, vector = 0;
2609 	int		cpu_id = 0;
2610 
2611 
2612 	/* Make sure all interrupts are disabled */
2613 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2614 
2615 	/* First set up ring resources */
2616 	for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2617 
2618 		/* RX ring */
2619 		rid = vector + 1;
2620 
2621 		rxr->res = bus_alloc_resource_any(dev,
2622 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2623 		if (rxr->res == NULL) {
2624 			device_printf(dev,
2625 			    "Unable to allocate bus resource: "
2626 			    "RX MSIX Interrupt %d\n", i);
2627 			return (ENXIO);
2628 		}
2629 		if ((error = bus_setup_intr(dev, rxr->res,
2630 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2631 		    rxr, &rxr->tag)) != 0) {
2632 			device_printf(dev, "Failed to register RX handler");
2633 			return (error);
2634 		}
2635 #if __FreeBSD_version >= 800504
2636 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2637 #endif
2638 		rxr->msix = vector;
2639 
2640 		if (em_last_bind_cpu < 0)
2641 			em_last_bind_cpu = CPU_FIRST();
2642 		cpu_id = em_last_bind_cpu;
2643 		bus_bind_intr(dev, rxr->res, cpu_id);
2644 
2645 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2646 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2647 		    taskqueue_thread_enqueue, &rxr->tq);
2648 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2649 		    device_get_nameunit(adapter->dev), cpu_id);
2650 		/*
2651 		** Set the bit to enable interrupt
2652 		** in E1000_IMS -- bits 20 and 21
2653 		** are for RX0 and RX1, note this has
2654 		** NOTHING to do with the MSIX vector
2655 		*/
2656 		rxr->ims = 1 << (20 + i);
2657 		adapter->ims |= rxr->ims;
2658 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2659 
2660 		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2661 	}
2662 
2663 	for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2664 		/* TX ring */
2665 		rid = vector + 1;
2666 		txr->res = bus_alloc_resource_any(dev,
2667 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2668 		if (txr->res == NULL) {
2669 			device_printf(dev,
2670 			    "Unable to allocate bus resource: "
2671 			    "TX MSIX Interrupt %d\n", i);
2672 			return (ENXIO);
2673 		}
2674 		if ((error = bus_setup_intr(dev, txr->res,
2675 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2676 		    txr, &txr->tag)) != 0) {
2677 			device_printf(dev, "Failed to register TX handler");
2678 			return (error);
2679 		}
2680 #if __FreeBSD_version >= 800504
2681 		bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2682 #endif
2683 		txr->msix = vector;
2684 
2685                 if (em_last_bind_cpu < 0)
2686                         em_last_bind_cpu = CPU_FIRST();
2687                 cpu_id = em_last_bind_cpu;
2688                 bus_bind_intr(dev, txr->res, cpu_id);
2689 
2690 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2691 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2692 		    taskqueue_thread_enqueue, &txr->tq);
2693 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2694 		    device_get_nameunit(adapter->dev), cpu_id);
2695 		/*
2696 		** Set the bit to enable interrupt
2697 		** in E1000_IMS -- bits 22 and 23
2698 		** are for TX0 and TX1, note this has
2699 		** NOTHING to do with the MSIX vector
2700 		*/
2701 		txr->ims = 1 << (22 + i);
2702 		adapter->ims |= txr->ims;
2703 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2704 
2705 		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2706 	}
2707 
2708 	/* Link interrupt */
2709 	rid = vector + 1;
2710 	adapter->res = bus_alloc_resource_any(dev,
2711 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2712 	if (!adapter->res) {
2713 		device_printf(dev,"Unable to allocate "
2714 		    "bus resource: Link interrupt [%d]\n", rid);
2715 		return (ENXIO);
2716         }
2717 	/* Set the link handler function */
2718 	error = bus_setup_intr(dev, adapter->res,
2719 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2720 	    em_msix_link, adapter, &adapter->tag);
2721 	if (error) {
2722 		adapter->res = NULL;
2723 		device_printf(dev, "Failed to register LINK handler");
2724 		return (error);
2725 	}
2726 #if __FreeBSD_version >= 800504
2727 	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2728 #endif
2729 	adapter->linkvec = vector;
2730 	adapter->ivars |=  (8 | vector) << 16;
2731 	adapter->ivars |= 0x80000000;
2732 
2733 	return (0);
2734 }
2735 
2736 
2737 static void
2738 em_free_pci_resources(struct adapter *adapter)
2739 {
2740 	device_t	dev = adapter->dev;
2741 	struct tx_ring	*txr;
2742 	struct rx_ring	*rxr;
2743 	int		rid;
2744 
2745 
2746 	/*
2747 	** Release all the queue interrupt resources:
2748 	*/
2749 	for (int i = 0; i < adapter->num_queues; i++) {
2750 		txr = &adapter->tx_rings[i];
2751 		/* an early abort? */
2752 		if (txr == NULL)
2753 			break;
2754 		rid = txr->msix +1;
2755 		if (txr->tag != NULL) {
2756 			bus_teardown_intr(dev, txr->res, txr->tag);
2757 			txr->tag = NULL;
2758 		}
2759 		if (txr->res != NULL)
2760 			bus_release_resource(dev, SYS_RES_IRQ,
2761 			    rid, txr->res);
2762 
2763 		rxr = &adapter->rx_rings[i];
2764 		/* an early abort? */
2765 		if (rxr == NULL)
2766 			break;
2767 		rid = rxr->msix +1;
2768 		if (rxr->tag != NULL) {
2769 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2770 			rxr->tag = NULL;
2771 		}
2772 		if (rxr->res != NULL)
2773 			bus_release_resource(dev, SYS_RES_IRQ,
2774 			    rid, rxr->res);
2775 	}
2776 
2777         if (adapter->linkvec) /* we are doing MSIX */
2778                 rid = adapter->linkvec + 1;
2779         else
2780                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2781 
2782 	if (adapter->tag != NULL) {
2783 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2784 		adapter->tag = NULL;
2785 	}
2786 
2787 	if (adapter->res != NULL)
2788 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2789 
2790 
2791 	if (adapter->msix)
2792 		pci_release_msi(dev);
2793 
2794 	if (adapter->msix_mem != NULL)
2795 		bus_release_resource(dev, SYS_RES_MEMORY,
2796 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2797 
2798 	if (adapter->memory != NULL)
2799 		bus_release_resource(dev, SYS_RES_MEMORY,
2800 		    PCIR_BAR(0), adapter->memory);
2801 
2802 	if (adapter->flash != NULL)
2803 		bus_release_resource(dev, SYS_RES_MEMORY,
2804 		    EM_FLASH, adapter->flash);
2805 }
2806 
2807 /*
2808  * Setup MSI or MSI/X
2809  */
2810 static int
2811 em_setup_msix(struct adapter *adapter)
2812 {
2813 	device_t dev = adapter->dev;
2814 	int val;
2815 
2816 	/* Nearly always going to use one queue */
2817 	adapter->num_queues = 1;
2818 
2819 	/*
2820 	** Try using MSI-X for Hartwell adapters
2821 	*/
2822 	if ((adapter->hw.mac.type == e1000_82574) &&
2823 	    (em_enable_msix == TRUE)) {
2824 #ifdef EM_MULTIQUEUE
2825 		adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2826 		if (adapter->num_queues > 1)
2827 			em_enable_vectors_82574(adapter);
2828 #endif
2829 		/* Map the MSIX BAR */
2830 		int rid = PCIR_BAR(EM_MSIX_BAR);
2831 		adapter->msix_mem = bus_alloc_resource_any(dev,
2832 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2833        		if (adapter->msix_mem == NULL) {
2834 			/* May not be enabled */
2835                		device_printf(adapter->dev,
2836 			    "Unable to map MSIX table \n");
2837 			goto msi;
2838        		}
2839 		val = pci_msix_count(dev);
2840 
2841 #ifdef EM_MULTIQUEUE
2842 		/* We need 5 vectors in the multiqueue case */
2843 		if (adapter->num_queues > 1 ) {
2844 			if (val >= 5)
2845 				val = 5;
2846 			else {
2847 				adapter->num_queues = 1;
2848 				device_printf(adapter->dev,
2849 				    "Insufficient MSIX vectors for >1 queue, "
2850 				    "using single queue...\n");
2851 				goto msix_one;
2852 			}
2853 		} else {
2854 msix_one:
2855 #endif
2856 			if (val >= 3)
2857 				val = 3;
2858 			else {
2859 				device_printf(adapter->dev,
2860 			    	"Insufficient MSIX vectors, using MSI\n");
2861 				goto msi;
2862 			}
2863 #ifdef EM_MULTIQUEUE
2864 		}
2865 #endif
2866 
2867 		if ((pci_alloc_msix(dev, &val) == 0)) {
2868 			device_printf(adapter->dev,
2869 			    "Using MSIX interrupts "
2870 			    "with %d vectors\n", val);
2871 			return (val);
2872 		}
2873 
2874 		/*
2875 		** If MSIX alloc failed or provided us with
2876 		** less than needed, free and fall through to MSI
2877 		*/
2878 		pci_release_msi(dev);
2879 	}
2880 msi:
2881 	if (adapter->msix_mem != NULL) {
2882 		bus_release_resource(dev, SYS_RES_MEMORY,
2883 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2884 		adapter->msix_mem = NULL;
2885 	}
2886        	val = 1;
2887        	if (pci_alloc_msi(dev, &val) == 0) {
2888                	device_printf(adapter->dev, "Using an MSI interrupt\n");
2889 		return (val);
2890 	}
2891 	/* Should only happen due to manual configuration */
2892 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2893 	return (0);
2894 }
2895 
2896 
2897 /*
2898 ** The 3 following flush routines are used as a workaround in the
2899 ** I219 client parts and only for them.
2900 **
2901 ** em_flush_tx_ring - remove all descriptors from the tx_ring
2902 **
2903 ** We want to clear all pending descriptors from the TX ring.
2904 ** zeroing happens when the HW reads the regs. We  assign the ring itself as
2905 ** the data of the next descriptor. We don't care about the data we are about
2906 ** to reset the HW.
2907 */
2908 static void
2909 em_flush_tx_ring(struct adapter *adapter)
2910 {
2911 	struct e1000_hw		*hw = &adapter->hw;
2912 	struct tx_ring		*txr = adapter->tx_rings;
2913 	struct e1000_tx_desc	*txd;
2914 	u32			tctl, txd_lower = E1000_TXD_CMD_IFCS;
2915 	u16			size = 512;
2916 
2917 	tctl = E1000_READ_REG(hw, E1000_TCTL);
2918 	E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
2919 
2920 	txd = &txr->tx_base[txr->next_avail_desc++];
2921 	if (txr->next_avail_desc == adapter->num_tx_desc)
2922 		txr->next_avail_desc = 0;
2923 
2924 	/* Just use the ring as a dummy buffer addr */
2925 	txd->buffer_addr = txr->txdma.dma_paddr;
2926 	txd->lower.data = htole32(txd_lower | size);
2927 	txd->upper.data = 0;
2928 
2929 	/* flush descriptors to memory before notifying the HW */
2930 	wmb();
2931 
2932 	E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc);
2933 	mb();
2934 	usec_delay(250);
2935 }
2936 
2937 /*
2938 ** em_flush_rx_ring - remove all descriptors from the rx_ring
2939 **
2940 ** Mark all descriptors in the RX ring as consumed and disable the rx ring
2941 */
2942 static void
2943 em_flush_rx_ring(struct adapter *adapter)
2944 {
2945 	struct e1000_hw	*hw = &adapter->hw;
2946 	u32		rctl, rxdctl;
2947 
2948 	rctl = E1000_READ_REG(hw, E1000_RCTL);
2949 	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2950 	E1000_WRITE_FLUSH(hw);
2951 	usec_delay(150);
2952 
2953 	rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
2954 	/* zero the lower 14 bits (prefetch and host thresholds) */
2955 	rxdctl &= 0xffffc000;
2956 	/*
2957 	 * update thresholds: prefetch threshold to 31, host threshold to 1
2958 	 * and make sure the granularity is "descriptors" and not "cache lines"
2959 	 */
2960 	rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
2961 	E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl);
2962 
2963 	/* momentarily enable the RX ring for the changes to take effect */
2964 	E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
2965 	E1000_WRITE_FLUSH(hw);
2966 	usec_delay(150);
2967 	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2968 }
2969 
2970 /*
2971 ** em_flush_desc_rings - remove all descriptors from the descriptor rings
2972 **
2973 ** In i219, the descriptor rings must be emptied before resetting the HW
2974 ** or before changing the device state to D3 during runtime (runtime PM).
2975 **
2976 ** Failure to do this will cause the HW to enter a unit hang state which can
2977 ** only be released by PCI reset on the device
2978 **
2979 */
2980 static void
2981 em_flush_desc_rings(struct adapter *adapter)
2982 {
2983 	struct e1000_hw	*hw = &adapter->hw;
2984 	device_t	dev = adapter->dev;
2985 	u16		hang_state;
2986 	u32		fext_nvm11, tdlen;
2987 
2988 	/* First, disable MULR fix in FEXTNVM11 */
2989 	fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
2990 	fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
2991 	E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11);
2992 
2993 	/* do nothing if we're not in faulty state, or if the queue is empty */
2994 	tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
2995 	hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
2996 	if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
2997 		return;
2998 	em_flush_tx_ring(adapter);
2999 
3000 	/* recheck, maybe the fault is caused by the rx ring */
3001 	hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3002 	if (hang_state & FLUSH_DESC_REQUIRED)
3003 		em_flush_rx_ring(adapter);
3004 }
3005 
3006 
3007 /*********************************************************************
3008  *
3009  *  Initialize the hardware to a configuration
3010  *  as specified by the adapter structure.
3011  *
3012  **********************************************************************/
3013 static void
3014 em_reset(struct adapter *adapter)
3015 {
3016 	device_t	dev = adapter->dev;
3017 	if_t ifp = adapter->ifp;
3018 	struct e1000_hw	*hw = &adapter->hw;
3019 	u16		rx_buffer_size;
3020 	u32		pba;
3021 
3022 	INIT_DEBUGOUT("em_reset: begin");
3023 
3024 	/* Set up smart power down as default off on newer adapters. */
3025 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
3026 	    hw->mac.type == e1000_82572)) {
3027 		u16 phy_tmp = 0;
3028 
3029 		/* Speed up time to link by disabling smart power down. */
3030 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
3031 		phy_tmp &= ~IGP02E1000_PM_SPD;
3032 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
3033 	}
3034 
3035 	/*
3036 	 * Packet Buffer Allocation (PBA)
3037 	 * Writing PBA sets the receive portion of the buffer
3038 	 * the remainder is used for the transmit buffer.
3039 	 */
3040 	switch (hw->mac.type) {
3041 	/* Total Packet Buffer on these is 48K */
3042 	case e1000_82571:
3043 	case e1000_82572:
3044 	case e1000_80003es2lan:
3045 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
3046 		break;
3047 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
3048 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
3049 		break;
3050 	case e1000_82574:
3051 	case e1000_82583:
3052 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
3053 		break;
3054 	case e1000_ich8lan:
3055 		pba = E1000_PBA_8K;
3056 		break;
3057 	case e1000_ich9lan:
3058 	case e1000_ich10lan:
3059 		/* Boost Receive side for jumbo frames */
3060 		if (adapter->hw.mac.max_frame_size > 4096)
3061 			pba = E1000_PBA_14K;
3062 		else
3063 			pba = E1000_PBA_10K;
3064 		break;
3065 	case e1000_pchlan:
3066 	case e1000_pch2lan:
3067 	case e1000_pch_lpt:
3068 	case e1000_pch_spt:
3069 		pba = E1000_PBA_26K;
3070 		break;
3071 	default:
3072 		if (adapter->hw.mac.max_frame_size > 8192)
3073 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
3074 		else
3075 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
3076 	}
3077 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
3078 
3079 	/*
3080 	 * These parameters control the automatic generation (Tx) and
3081 	 * response (Rx) to Ethernet PAUSE frames.
3082 	 * - High water mark should allow for at least two frames to be
3083 	 *   received after sending an XOFF.
3084 	 * - Low water mark works best when it is very near the high water mark.
3085 	 *   This allows the receiver to restart by sending XON when it has
3086 	 *   drained a bit. Here we use an arbitary value of 1500 which will
3087 	 *   restart after one full frame is pulled from the buffer. There
3088 	 *   could be several smaller frames in the buffer and if so they will
3089 	 *   not trigger the XON until their total number reduces the buffer
3090 	 *   by 1500.
3091 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
3092 	 */
3093 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
3094 	hw->fc.high_water = rx_buffer_size -
3095 	    roundup2(adapter->hw.mac.max_frame_size, 1024);
3096 	hw->fc.low_water = hw->fc.high_water - 1500;
3097 
3098 	if (adapter->fc) /* locally set flow control value? */
3099 		hw->fc.requested_mode = adapter->fc;
3100 	else
3101 		hw->fc.requested_mode = e1000_fc_full;
3102 
3103 	if (hw->mac.type == e1000_80003es2lan)
3104 		hw->fc.pause_time = 0xFFFF;
3105 	else
3106 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
3107 
3108 	hw->fc.send_xon = TRUE;
3109 
3110 	/* Device specific overrides/settings */
3111 	switch (hw->mac.type) {
3112 	case e1000_pchlan:
3113 		/* Workaround: no TX flow ctrl for PCH */
3114                 hw->fc.requested_mode = e1000_fc_rx_pause;
3115 		hw->fc.pause_time = 0xFFFF; /* override */
3116 		if (if_getmtu(ifp) > ETHERMTU) {
3117 			hw->fc.high_water = 0x3500;
3118 			hw->fc.low_water = 0x1500;
3119 		} else {
3120 			hw->fc.high_water = 0x5000;
3121 			hw->fc.low_water = 0x3000;
3122 		}
3123 		hw->fc.refresh_time = 0x1000;
3124 		break;
3125 	case e1000_pch2lan:
3126 	case e1000_pch_lpt:
3127 	case e1000_pch_spt:
3128 		hw->fc.high_water = 0x5C20;
3129 		hw->fc.low_water = 0x5048;
3130 		hw->fc.pause_time = 0x0650;
3131 		hw->fc.refresh_time = 0x0400;
3132 		/* Jumbos need adjusted PBA */
3133 		if (if_getmtu(ifp) > ETHERMTU)
3134 			E1000_WRITE_REG(hw, E1000_PBA, 12);
3135 		else
3136 			E1000_WRITE_REG(hw, E1000_PBA, 26);
3137 		break;
3138         case e1000_ich9lan:
3139         case e1000_ich10lan:
3140 		if (if_getmtu(ifp) > ETHERMTU) {
3141 			hw->fc.high_water = 0x2800;
3142 			hw->fc.low_water = hw->fc.high_water - 8;
3143 			break;
3144 		}
3145 		/* else fall thru */
3146 	default:
3147 		if (hw->mac.type == e1000_80003es2lan)
3148 			hw->fc.pause_time = 0xFFFF;
3149 		break;
3150 	}
3151 
3152 	/* I219 needs some special flushing to avoid hangs */
3153 	if (hw->mac.type == e1000_pch_spt)
3154 		em_flush_desc_rings(adapter);
3155 
3156 	/* Issue a global reset */
3157 	e1000_reset_hw(hw);
3158 	E1000_WRITE_REG(hw, E1000_WUC, 0);
3159 	em_disable_aspm(adapter);
3160 	/* and a re-init */
3161 	if (e1000_init_hw(hw) < 0) {
3162 		device_printf(dev, "Hardware Initialization Failed\n");
3163 		return;
3164 	}
3165 
3166 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3167 	e1000_get_phy_info(hw);
3168 	e1000_check_for_link(hw);
3169 	return;
3170 }
3171 
3172 /*********************************************************************
3173  *
3174  *  Setup networking device structure and register an interface.
3175  *
3176  **********************************************************************/
3177 static int
3178 em_setup_interface(device_t dev, struct adapter *adapter)
3179 {
3180 	if_t ifp;
3181 
3182 	INIT_DEBUGOUT("em_setup_interface: begin");
3183 
3184 	ifp = adapter->ifp = if_gethandle(IFT_ETHER);
3185 	if (ifp == 0) {
3186 		device_printf(dev, "can not allocate ifnet structure\n");
3187 		return (-1);
3188 	}
3189 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3190 	if_setdev(ifp, dev);
3191 	if_setinitfn(ifp, em_init);
3192 	if_setsoftc(ifp, adapter);
3193 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
3194 	if_setioctlfn(ifp, em_ioctl);
3195 	if_setgetcounterfn(ifp, em_get_counter);
3196 
3197 	/* TSO parameters */
3198 	ifp->if_hw_tsomax = IP_MAXPACKET;
3199 	/* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */
3200 	ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5;
3201 	ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
3202 
3203 #ifdef EM_MULTIQUEUE
3204 	/* Multiqueue stack interface */
3205 	if_settransmitfn(ifp, em_mq_start);
3206 	if_setqflushfn(ifp, em_qflush);
3207 #else
3208 	if_setstartfn(ifp, em_start);
3209 	if_setsendqlen(ifp, adapter->num_tx_desc - 1);
3210 	if_setsendqready(ifp);
3211 #endif
3212 
3213 	ether_ifattach(ifp, adapter->hw.mac.addr);
3214 
3215 	if_setcapabilities(ifp, 0);
3216 	if_setcapenable(ifp, 0);
3217 
3218 
3219 	if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM |
3220 	    IFCAP_TSO4, 0);
3221 	/*
3222 	 * Tell the upper layer(s) we
3223 	 * support full VLAN capability
3224 	 */
3225 	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
3226 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
3227 	    IFCAP_VLAN_MTU, 0);
3228 	if_setcapenable(ifp, if_getcapabilities(ifp));
3229 
3230 	/*
3231 	** Don't turn this on by default, if vlans are
3232 	** created on another pseudo device (eg. lagg)
3233 	** then vlan events are not passed thru, breaking
3234 	** operation, but with HW FILTER off it works. If
3235 	** using vlans directly on the em driver you can
3236 	** enable this and get full hardware tag filtering.
3237 	*/
3238 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
3239 
3240 #ifdef DEVICE_POLLING
3241 	if_setcapabilitiesbit(ifp, IFCAP_POLLING,0);
3242 #endif
3243 
3244 	/* Enable only WOL MAGIC by default */
3245 	if (adapter->wol) {
3246 		if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
3247 		if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
3248 	}
3249 
3250 	/*
3251 	 * Specify the media types supported by this adapter and register
3252 	 * callbacks to update media and link information
3253 	 */
3254 	ifmedia_init(&adapter->media, IFM_IMASK,
3255 	    em_media_change, em_media_status);
3256 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3257 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3258 		u_char fiber_type = IFM_1000_SX;	/* default type */
3259 
3260 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3261 			    0, NULL);
3262 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3263 	} else {
3264 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3265 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3266 			    0, NULL);
3267 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3268 			    0, NULL);
3269 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3270 			    0, NULL);
3271 		if (adapter->hw.phy.type != e1000_phy_ife) {
3272 			ifmedia_add(&adapter->media,
3273 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3274 			ifmedia_add(&adapter->media,
3275 				IFM_ETHER | IFM_1000_T, 0, NULL);
3276 		}
3277 	}
3278 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3279 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3280 	return (0);
3281 }
3282 
3283 
3284 /*
3285  * Manage DMA'able memory.
3286  */
3287 static void
3288 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3289 {
3290 	if (error)
3291 		return;
3292 	*(bus_addr_t *) arg = segs[0].ds_addr;
3293 }
3294 
3295 static int
3296 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3297         struct em_dma_alloc *dma, int mapflags)
3298 {
3299 	int error;
3300 
3301 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3302 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3303 				BUS_SPACE_MAXADDR,	/* lowaddr */
3304 				BUS_SPACE_MAXADDR,	/* highaddr */
3305 				NULL, NULL,		/* filter, filterarg */
3306 				size,			/* maxsize */
3307 				1,			/* nsegments */
3308 				size,			/* maxsegsize */
3309 				0,			/* flags */
3310 				NULL,			/* lockfunc */
3311 				NULL,			/* lockarg */
3312 				&dma->dma_tag);
3313 	if (error) {
3314 		device_printf(adapter->dev,
3315 		    "%s: bus_dma_tag_create failed: %d\n",
3316 		    __func__, error);
3317 		goto fail_0;
3318 	}
3319 
3320 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3321 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3322 	if (error) {
3323 		device_printf(adapter->dev,
3324 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3325 		    __func__, (uintmax_t)size, error);
3326 		goto fail_2;
3327 	}
3328 
3329 	dma->dma_paddr = 0;
3330 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3331 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3332 	if (error || dma->dma_paddr == 0) {
3333 		device_printf(adapter->dev,
3334 		    "%s: bus_dmamap_load failed: %d\n",
3335 		    __func__, error);
3336 		goto fail_3;
3337 	}
3338 
3339 	return (0);
3340 
3341 fail_3:
3342 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3343 fail_2:
3344 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3345 	bus_dma_tag_destroy(dma->dma_tag);
3346 fail_0:
3347 	dma->dma_tag = NULL;
3348 
3349 	return (error);
3350 }
3351 
3352 static void
3353 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3354 {
3355 	if (dma->dma_tag == NULL)
3356 		return;
3357 	if (dma->dma_paddr != 0) {
3358 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3359 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3360 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3361 		dma->dma_paddr = 0;
3362 	}
3363 	if (dma->dma_vaddr != NULL) {
3364 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3365 		dma->dma_vaddr = NULL;
3366 	}
3367 	bus_dma_tag_destroy(dma->dma_tag);
3368 	dma->dma_tag = NULL;
3369 }
3370 
3371 
3372 /*********************************************************************
3373  *
3374  *  Allocate memory for the transmit and receive rings, and then
3375  *  the descriptors associated with each, called only once at attach.
3376  *
3377  **********************************************************************/
3378 static int
3379 em_allocate_queues(struct adapter *adapter)
3380 {
3381 	device_t		dev = adapter->dev;
3382 	struct tx_ring		*txr = NULL;
3383 	struct rx_ring		*rxr = NULL;
3384 	int rsize, tsize, error = E1000_SUCCESS;
3385 	int txconf = 0, rxconf = 0;
3386 
3387 
3388 	/* Allocate the TX ring struct memory */
3389 	if (!(adapter->tx_rings =
3390 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3391 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3392 		device_printf(dev, "Unable to allocate TX ring memory\n");
3393 		error = ENOMEM;
3394 		goto fail;
3395 	}
3396 
3397 	/* Now allocate the RX */
3398 	if (!(adapter->rx_rings =
3399 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3400 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3401 		device_printf(dev, "Unable to allocate RX ring memory\n");
3402 		error = ENOMEM;
3403 		goto rx_fail;
3404 	}
3405 
3406 	tsize = roundup2(adapter->num_tx_desc *
3407 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3408 	/*
3409 	 * Now set up the TX queues, txconf is needed to handle the
3410 	 * possibility that things fail midcourse and we need to
3411 	 * undo memory gracefully
3412 	 */
3413 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3414 		/* Set up some basics */
3415 		txr = &adapter->tx_rings[i];
3416 		txr->adapter = adapter;
3417 		txr->me = i;
3418 
3419 		/* Initialize the TX lock */
3420 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3421 		    device_get_nameunit(dev), txr->me);
3422 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3423 
3424 		if (em_dma_malloc(adapter, tsize,
3425 			&txr->txdma, BUS_DMA_NOWAIT)) {
3426 			device_printf(dev,
3427 			    "Unable to allocate TX Descriptor memory\n");
3428 			error = ENOMEM;
3429 			goto err_tx_desc;
3430 		}
3431 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3432 		bzero((void *)txr->tx_base, tsize);
3433 
3434         	if (em_allocate_transmit_buffers(txr)) {
3435 			device_printf(dev,
3436 			    "Critical Failure setting up transmit buffers\n");
3437 			error = ENOMEM;
3438 			goto err_tx_desc;
3439         	}
3440 #if __FreeBSD_version >= 800000
3441 		/* Allocate a buf ring */
3442 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3443 		    M_WAITOK, &txr->tx_mtx);
3444 #endif
3445 	}
3446 
3447 	/*
3448 	 * Next the RX queues...
3449 	 */
3450 	rsize = roundup2(adapter->num_rx_desc *
3451 	    sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
3452 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3453 		rxr = &adapter->rx_rings[i];
3454 		rxr->adapter = adapter;
3455 		rxr->me = i;
3456 
3457 		/* Initialize the RX lock */
3458 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3459 		    device_get_nameunit(dev), txr->me);
3460 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3461 
3462 		if (em_dma_malloc(adapter, rsize,
3463 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3464 			device_printf(dev,
3465 			    "Unable to allocate RxDescriptor memory\n");
3466 			error = ENOMEM;
3467 			goto err_rx_desc;
3468 		}
3469 		rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr;
3470 		bzero((void *)rxr->rx_base, rsize);
3471 
3472         	/* Allocate receive buffers for the ring*/
3473 		if (em_allocate_receive_buffers(rxr)) {
3474 			device_printf(dev,
3475 			    "Critical Failure setting up receive buffers\n");
3476 			error = ENOMEM;
3477 			goto err_rx_desc;
3478 		}
3479 	}
3480 
3481 	return (0);
3482 
3483 err_rx_desc:
3484 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3485 		em_dma_free(adapter, &rxr->rxdma);
3486 err_tx_desc:
3487 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3488 		em_dma_free(adapter, &txr->txdma);
3489 	free(adapter->rx_rings, M_DEVBUF);
3490 rx_fail:
3491 #if __FreeBSD_version >= 800000
3492 	buf_ring_free(txr->br, M_DEVBUF);
3493 #endif
3494 	free(adapter->tx_rings, M_DEVBUF);
3495 fail:
3496 	return (error);
3497 }
3498 
3499 
3500 /*********************************************************************
3501  *
3502  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3503  *  the information needed to transmit a packet on the wire. This is
3504  *  called only once at attach, setup is done every reset.
3505  *
3506  **********************************************************************/
3507 static int
3508 em_allocate_transmit_buffers(struct tx_ring *txr)
3509 {
3510 	struct adapter *adapter = txr->adapter;
3511 	device_t dev = adapter->dev;
3512 	struct em_txbuffer *txbuf;
3513 	int error, i;
3514 
3515 	/*
3516 	 * Setup DMA descriptor areas.
3517 	 */
3518 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3519 			       1, 0,			/* alignment, bounds */
3520 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3521 			       BUS_SPACE_MAXADDR,	/* highaddr */
3522 			       NULL, NULL,		/* filter, filterarg */
3523 			       EM_TSO_SIZE,		/* maxsize */
3524 			       EM_MAX_SCATTER,		/* nsegments */
3525 			       PAGE_SIZE,		/* maxsegsize */
3526 			       0,			/* flags */
3527 			       NULL,			/* lockfunc */
3528 			       NULL,			/* lockfuncarg */
3529 			       &txr->txtag))) {
3530 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3531 		goto fail;
3532 	}
3533 
3534 	if (!(txr->tx_buffers =
3535 	    (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) *
3536 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3537 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3538 		error = ENOMEM;
3539 		goto fail;
3540 	}
3541 
3542         /* Create the descriptor buffer dma maps */
3543 	txbuf = txr->tx_buffers;
3544 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3545 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3546 		if (error != 0) {
3547 			device_printf(dev, "Unable to create TX DMA map\n");
3548 			goto fail;
3549 		}
3550 	}
3551 
3552 	return 0;
3553 fail:
3554 	/* We free all, it handles case where we are in the middle */
3555 	em_free_transmit_structures(adapter);
3556 	return (error);
3557 }
3558 
3559 /*********************************************************************
3560  *
3561  *  Initialize a transmit ring.
3562  *
3563  **********************************************************************/
3564 static void
3565 em_setup_transmit_ring(struct tx_ring *txr)
3566 {
3567 	struct adapter *adapter = txr->adapter;
3568 	struct em_txbuffer *txbuf;
3569 	int i;
3570 #ifdef DEV_NETMAP
3571 	struct netmap_slot *slot;
3572 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
3573 #endif /* DEV_NETMAP */
3574 
3575 	/* Clear the old descriptor contents */
3576 	EM_TX_LOCK(txr);
3577 #ifdef DEV_NETMAP
3578 	slot = netmap_reset(na, NR_TX, txr->me, 0);
3579 #endif /* DEV_NETMAP */
3580 
3581 	bzero((void *)txr->tx_base,
3582 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3583 	/* Reset indices */
3584 	txr->next_avail_desc = 0;
3585 	txr->next_to_clean = 0;
3586 
3587 	/* Free any existing tx buffers. */
3588         txbuf = txr->tx_buffers;
3589 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3590 		if (txbuf->m_head != NULL) {
3591 			bus_dmamap_sync(txr->txtag, txbuf->map,
3592 			    BUS_DMASYNC_POSTWRITE);
3593 			bus_dmamap_unload(txr->txtag, txbuf->map);
3594 			m_freem(txbuf->m_head);
3595 			txbuf->m_head = NULL;
3596 		}
3597 #ifdef DEV_NETMAP
3598 		if (slot) {
3599 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3600 			uint64_t paddr;
3601 			void *addr;
3602 
3603 			addr = PNMB(na, slot + si, &paddr);
3604 			txr->tx_base[i].buffer_addr = htole64(paddr);
3605 			/* reload the map for netmap mode */
3606 			netmap_load_map(na, txr->txtag, txbuf->map, addr);
3607 		}
3608 #endif /* DEV_NETMAP */
3609 
3610 		/* clear the watch index */
3611 		txbuf->next_eop = -1;
3612         }
3613 
3614 	/* Set number of descriptors available */
3615 	txr->tx_avail = adapter->num_tx_desc;
3616 	txr->busy = EM_TX_IDLE;
3617 
3618 	/* Clear checksum offload context. */
3619 	txr->last_hw_offload = 0;
3620 	txr->last_hw_ipcss = 0;
3621 	txr->last_hw_ipcso = 0;
3622 	txr->last_hw_tucss = 0;
3623 	txr->last_hw_tucso = 0;
3624 
3625 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3626 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3627 	EM_TX_UNLOCK(txr);
3628 }
3629 
3630 /*********************************************************************
3631  *
3632  *  Initialize all transmit rings.
3633  *
3634  **********************************************************************/
3635 static void
3636 em_setup_transmit_structures(struct adapter *adapter)
3637 {
3638 	struct tx_ring *txr = adapter->tx_rings;
3639 
3640 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3641 		em_setup_transmit_ring(txr);
3642 
3643 	return;
3644 }
3645 
3646 /*********************************************************************
3647  *
3648  *  Enable transmit unit.
3649  *
3650  **********************************************************************/
3651 static void
3652 em_initialize_transmit_unit(struct adapter *adapter)
3653 {
3654 	struct tx_ring	*txr = adapter->tx_rings;
3655 	struct e1000_hw	*hw = &adapter->hw;
3656 	u32	tctl, txdctl = 0, tarc, tipg = 0;
3657 
3658 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3659 
3660 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3661 		u64 bus_addr = txr->txdma.dma_paddr;
3662 		/* Base and Len of TX Ring */
3663 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3664 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3665 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3666 	    	    (u32)(bus_addr >> 32));
3667 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3668 	    	    (u32)bus_addr);
3669 		/* Init the HEAD/TAIL indices */
3670 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3671 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3672 
3673 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3674 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3675 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3676 
3677 		txr->busy = EM_TX_IDLE;
3678 		txdctl = 0; /* clear txdctl */
3679                 txdctl |= 0x1f; /* PTHRESH */
3680                 txdctl |= 1 << 8; /* HTHRESH */
3681                 txdctl |= 1 << 16;/* WTHRESH */
3682 		txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3683 		txdctl |= E1000_TXDCTL_GRAN;
3684                 txdctl |= 1 << 25; /* LWTHRESH */
3685 
3686                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3687 	}
3688 
3689 	/* Set the default values for the Tx Inter Packet Gap timer */
3690 	switch (adapter->hw.mac.type) {
3691 	case e1000_80003es2lan:
3692 		tipg = DEFAULT_82543_TIPG_IPGR1;
3693 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3694 		    E1000_TIPG_IPGR2_SHIFT;
3695 		break;
3696 	default:
3697 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3698 		    (adapter->hw.phy.media_type ==
3699 		    e1000_media_type_internal_serdes))
3700 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3701 		else
3702 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3703 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3704 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3705 	}
3706 
3707 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3708 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3709 
3710 	if(adapter->hw.mac.type >= e1000_82540)
3711 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3712 		    adapter->tx_abs_int_delay.value);
3713 
3714 	if ((adapter->hw.mac.type == e1000_82571) ||
3715 	    (adapter->hw.mac.type == e1000_82572)) {
3716 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3717 		tarc |= TARC_SPEED_MODE_BIT;
3718 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3719 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3720 		/* errata: program both queues to unweighted RR */
3721 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3722 		tarc |= 1;
3723 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3724 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3725 		tarc |= 1;
3726 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3727 	} else if (adapter->hw.mac.type == e1000_82574) {
3728 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3729 		tarc |= TARC_ERRATA_BIT;
3730 		if ( adapter->num_queues > 1) {
3731 			tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3732 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3733 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3734 		} else
3735 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3736 	}
3737 
3738 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3739 	if (adapter->tx_int_delay.value > 0)
3740 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3741 
3742 	/* Program the Transmit Control Register */
3743 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3744 	tctl &= ~E1000_TCTL_CT;
3745 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3746 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3747 
3748 	if (adapter->hw.mac.type >= e1000_82571)
3749 		tctl |= E1000_TCTL_MULR;
3750 
3751 	/* This write will effectively turn on the transmit unit. */
3752 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3753 
3754 	if (hw->mac.type == e1000_pch_spt) {
3755 		u32 reg;
3756 		reg = E1000_READ_REG(hw, E1000_IOSFPC);
3757 		reg |= E1000_RCTL_RDMTS_HEX;
3758 		E1000_WRITE_REG(hw, E1000_IOSFPC, reg);
3759 		reg = E1000_READ_REG(hw, E1000_TARC(0));
3760 		reg |= E1000_TARC0_CB_MULTIQ_3_REQ;
3761 		E1000_WRITE_REG(hw, E1000_TARC(0), reg);
3762 	}
3763 }
3764 
3765 
3766 /*********************************************************************
3767  *
3768  *  Free all transmit rings.
3769  *
3770  **********************************************************************/
3771 static void
3772 em_free_transmit_structures(struct adapter *adapter)
3773 {
3774 	struct tx_ring *txr = adapter->tx_rings;
3775 
3776 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3777 		EM_TX_LOCK(txr);
3778 		em_free_transmit_buffers(txr);
3779 		em_dma_free(adapter, &txr->txdma);
3780 		EM_TX_UNLOCK(txr);
3781 		EM_TX_LOCK_DESTROY(txr);
3782 	}
3783 
3784 	free(adapter->tx_rings, M_DEVBUF);
3785 }
3786 
3787 /*********************************************************************
3788  *
3789  *  Free transmit ring related data structures.
3790  *
3791  **********************************************************************/
3792 static void
3793 em_free_transmit_buffers(struct tx_ring *txr)
3794 {
3795 	struct adapter		*adapter = txr->adapter;
3796 	struct em_txbuffer	*txbuf;
3797 
3798 	INIT_DEBUGOUT("free_transmit_ring: begin");
3799 
3800 	if (txr->tx_buffers == NULL)
3801 		return;
3802 
3803 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3804 		txbuf = &txr->tx_buffers[i];
3805 		if (txbuf->m_head != NULL) {
3806 			bus_dmamap_sync(txr->txtag, txbuf->map,
3807 			    BUS_DMASYNC_POSTWRITE);
3808 			bus_dmamap_unload(txr->txtag,
3809 			    txbuf->map);
3810 			m_freem(txbuf->m_head);
3811 			txbuf->m_head = NULL;
3812 			if (txbuf->map != NULL) {
3813 				bus_dmamap_destroy(txr->txtag,
3814 				    txbuf->map);
3815 				txbuf->map = NULL;
3816 			}
3817 		} else if (txbuf->map != NULL) {
3818 			bus_dmamap_unload(txr->txtag,
3819 			    txbuf->map);
3820 			bus_dmamap_destroy(txr->txtag,
3821 			    txbuf->map);
3822 			txbuf->map = NULL;
3823 		}
3824 	}
3825 #if __FreeBSD_version >= 800000
3826 	if (txr->br != NULL)
3827 		buf_ring_free(txr->br, M_DEVBUF);
3828 #endif
3829 	if (txr->tx_buffers != NULL) {
3830 		free(txr->tx_buffers, M_DEVBUF);
3831 		txr->tx_buffers = NULL;
3832 	}
3833 	if (txr->txtag != NULL) {
3834 		bus_dma_tag_destroy(txr->txtag);
3835 		txr->txtag = NULL;
3836 	}
3837 	return;
3838 }
3839 
3840 
3841 /*********************************************************************
3842  *  The offload context is protocol specific (TCP/UDP) and thus
3843  *  only needs to be set when the protocol changes. The occasion
3844  *  of a context change can be a performance detriment, and
3845  *  might be better just disabled. The reason arises in the way
3846  *  in which the controller supports pipelined requests from the
3847  *  Tx data DMA. Up to four requests can be pipelined, and they may
3848  *  belong to the same packet or to multiple packets. However all
3849  *  requests for one packet are issued before a request is issued
3850  *  for a subsequent packet and if a request for the next packet
3851  *  requires a context change, that request will be stalled
3852  *  until the previous request completes. This means setting up
3853  *  a new context effectively disables pipelined Tx data DMA which
3854  *  in turn greatly slow down performance to send small sized
3855  *  frames.
3856  **********************************************************************/
3857 static void
3858 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3859     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3860 {
3861 	struct adapter			*adapter = txr->adapter;
3862 	struct e1000_context_desc	*TXD = NULL;
3863 	struct em_txbuffer		*tx_buffer;
3864 	int				cur, hdr_len;
3865 	u32				cmd = 0;
3866 	u16				offload = 0;
3867 	u8				ipcso, ipcss, tucso, tucss;
3868 
3869 	ipcss = ipcso = tucss = tucso = 0;
3870 	hdr_len = ip_off + (ip->ip_hl << 2);
3871 	cur = txr->next_avail_desc;
3872 
3873 	/* Setup of IP header checksum. */
3874 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3875 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3876 		offload |= CSUM_IP;
3877 		ipcss = ip_off;
3878 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3879 		/*
3880 		 * Start offset for header checksum calculation.
3881 		 * End offset for header checksum calculation.
3882 		 * Offset of place to put the checksum.
3883 		 */
3884 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3885 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3886 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3887 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3888 		cmd |= E1000_TXD_CMD_IP;
3889 	}
3890 
3891 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3892  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3893  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3894  		offload |= CSUM_TCP;
3895  		tucss = hdr_len;
3896  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3897 		/*
3898 		 * The 82574L can only remember the *last* context used
3899 		 * regardless of queue that it was use for.  We cannot reuse
3900 		 * contexts on this hardware platform and must generate a new
3901 		 * context every time.  82574L hardware spec, section 7.2.6,
3902 		 * second note.
3903 		 */
3904 		if (adapter->num_queues < 2) {
3905  			/*
3906  		 	* Setting up new checksum offload context for every
3907 			* frames takes a lot of processing time for hardware.
3908 			* This also reduces performance a lot for small sized
3909 			* frames so avoid it if driver can use previously
3910 			* configured checksum offload context.
3911  		 	*/
3912  			if (txr->last_hw_offload == offload) {
3913  				if (offload & CSUM_IP) {
3914  					if (txr->last_hw_ipcss == ipcss &&
3915  				    	txr->last_hw_ipcso == ipcso &&
3916  				    	txr->last_hw_tucss == tucss &&
3917  				    	txr->last_hw_tucso == tucso)
3918  						return;
3919  				} else {
3920  					if (txr->last_hw_tucss == tucss &&
3921  				    	txr->last_hw_tucso == tucso)
3922  						return;
3923  				}
3924   			}
3925  			txr->last_hw_offload = offload;
3926  			txr->last_hw_tucss = tucss;
3927  			txr->last_hw_tucso = tucso;
3928 		}
3929  		/*
3930  		 * Start offset for payload checksum calculation.
3931  		 * End offset for payload checksum calculation.
3932  		 * Offset of place to put the checksum.
3933  		 */
3934 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3935  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3936  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3937  		TXD->upper_setup.tcp_fields.tucso = tucso;
3938  		cmd |= E1000_TXD_CMD_TCP;
3939  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3940  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3941  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3942  		tucss = hdr_len;
3943  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3944 		/*
3945 		 * The 82574L can only remember the *last* context used
3946 		 * regardless of queue that it was use for.  We cannot reuse
3947 		 * contexts on this hardware platform and must generate a new
3948 		 * context every time.  82574L hardware spec, section 7.2.6,
3949 		 * second note.
3950 		 */
3951 		if (adapter->num_queues < 2) {
3952  			/*
3953  		 	* Setting up new checksum offload context for every
3954 			* frames takes a lot of processing time for hardware.
3955 			* This also reduces performance a lot for small sized
3956 			* frames so avoid it if driver can use previously
3957 			* configured checksum offload context.
3958  		 	*/
3959  			if (txr->last_hw_offload == offload) {
3960  				if (offload & CSUM_IP) {
3961  					if (txr->last_hw_ipcss == ipcss &&
3962  				    	txr->last_hw_ipcso == ipcso &&
3963  				    	txr->last_hw_tucss == tucss &&
3964  				    	txr->last_hw_tucso == tucso)
3965  						return;
3966  				} else {
3967  					if (txr->last_hw_tucss == tucss &&
3968  				    	txr->last_hw_tucso == tucso)
3969  						return;
3970  				}
3971  			}
3972  			txr->last_hw_offload = offload;
3973  			txr->last_hw_tucss = tucss;
3974  			txr->last_hw_tucso = tucso;
3975 		}
3976  		/*
3977  		 * Start offset for header checksum calculation.
3978  		 * End offset for header checksum calculation.
3979  		 * Offset of place to put the checksum.
3980  		 */
3981 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3982  		TXD->upper_setup.tcp_fields.tucss = tucss;
3983  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3984  		TXD->upper_setup.tcp_fields.tucso = tucso;
3985   	}
3986 
3987  	if (offload & CSUM_IP) {
3988  		txr->last_hw_ipcss = ipcss;
3989  		txr->last_hw_ipcso = ipcso;
3990   	}
3991 
3992 	TXD->tcp_seg_setup.data = htole32(0);
3993 	TXD->cmd_and_length =
3994 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3995 	tx_buffer = &txr->tx_buffers[cur];
3996 	tx_buffer->m_head = NULL;
3997 	tx_buffer->next_eop = -1;
3998 
3999 	if (++cur == adapter->num_tx_desc)
4000 		cur = 0;
4001 
4002 	txr->tx_avail--;
4003 	txr->next_avail_desc = cur;
4004 }
4005 
4006 
4007 /**********************************************************************
4008  *
4009  *  Setup work for hardware segmentation offload (TSO)
4010  *
4011  **********************************************************************/
4012 static void
4013 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
4014     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
4015 {
4016 	struct adapter			*adapter = txr->adapter;
4017 	struct e1000_context_desc	*TXD;
4018 	struct em_txbuffer		*tx_buffer;
4019 	int cur, hdr_len;
4020 
4021 	/*
4022 	 * In theory we can use the same TSO context if and only if
4023 	 * frame is the same type(IP/TCP) and the same MSS. However
4024 	 * checking whether a frame has the same IP/TCP structure is
4025 	 * hard thing so just ignore that and always restablish a
4026 	 * new TSO context.
4027 	 */
4028 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
4029 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
4030 		      E1000_TXD_DTYP_D |	/* Data descr type */
4031 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
4032 
4033 	/* IP and/or TCP header checksum calculation and insertion. */
4034 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
4035 
4036 	cur = txr->next_avail_desc;
4037 	tx_buffer = &txr->tx_buffers[cur];
4038 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
4039 
4040 	/*
4041 	 * Start offset for header checksum calculation.
4042 	 * End offset for header checksum calculation.
4043 	 * Offset of place put the checksum.
4044 	 */
4045 	TXD->lower_setup.ip_fields.ipcss = ip_off;
4046 	TXD->lower_setup.ip_fields.ipcse =
4047 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
4048 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
4049 	/*
4050 	 * Start offset for payload checksum calculation.
4051 	 * End offset for payload checksum calculation.
4052 	 * Offset of place to put the checksum.
4053 	 */
4054 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
4055 	TXD->upper_setup.tcp_fields.tucse = 0;
4056 	TXD->upper_setup.tcp_fields.tucso =
4057 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
4058 	/*
4059 	 * Payload size per packet w/o any headers.
4060 	 * Length of all headers up to payload.
4061 	 */
4062 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
4063 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
4064 
4065 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
4066 				E1000_TXD_CMD_DEXT |	/* Extended descr */
4067 				E1000_TXD_CMD_TSE |	/* TSE context */
4068 				E1000_TXD_CMD_IP |	/* Do IP csum */
4069 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
4070 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
4071 
4072 	tx_buffer->m_head = NULL;
4073 	tx_buffer->next_eop = -1;
4074 
4075 	if (++cur == adapter->num_tx_desc)
4076 		cur = 0;
4077 
4078 	txr->tx_avail--;
4079 	txr->next_avail_desc = cur;
4080 	txr->tx_tso = TRUE;
4081 }
4082 
4083 
4084 /**********************************************************************
4085  *
4086  *  Examine each tx_buffer in the used queue. If the hardware is done
4087  *  processing the packet then free associated resources. The
4088  *  tx_buffer is put back on the free queue.
4089  *
4090  **********************************************************************/
4091 static void
4092 em_txeof(struct tx_ring *txr)
4093 {
4094 	struct adapter	*adapter = txr->adapter;
4095         int first, last, done, processed;
4096         struct em_txbuffer *tx_buffer;
4097         struct e1000_tx_desc   *tx_desc, *eop_desc;
4098 	if_t ifp = adapter->ifp;
4099 
4100 	EM_TX_LOCK_ASSERT(txr);
4101 #ifdef DEV_NETMAP
4102 	if (netmap_tx_irq(ifp, txr->me))
4103 		return;
4104 #endif /* DEV_NETMAP */
4105 
4106 	/* No work, make sure hang detection is disabled */
4107         if (txr->tx_avail == adapter->num_tx_desc) {
4108 		txr->busy = EM_TX_IDLE;
4109                 return;
4110 	}
4111 
4112 	processed = 0;
4113         first = txr->next_to_clean;
4114         tx_desc = &txr->tx_base[first];
4115         tx_buffer = &txr->tx_buffers[first];
4116 	last = tx_buffer->next_eop;
4117         eop_desc = &txr->tx_base[last];
4118 
4119 	/*
4120 	 * What this does is get the index of the
4121 	 * first descriptor AFTER the EOP of the
4122 	 * first packet, that way we can do the
4123 	 * simple comparison on the inner while loop.
4124 	 */
4125 	if (++last == adapter->num_tx_desc)
4126  		last = 0;
4127 	done = last;
4128 
4129         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4130             BUS_DMASYNC_POSTREAD);
4131 
4132         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
4133 		/* We clean the range of the packet */
4134 		while (first != done) {
4135                 	tx_desc->upper.data = 0;
4136                 	tx_desc->lower.data = 0;
4137                 	tx_desc->buffer_addr = 0;
4138                 	++txr->tx_avail;
4139 			++processed;
4140 
4141 			if (tx_buffer->m_head) {
4142 				bus_dmamap_sync(txr->txtag,
4143 				    tx_buffer->map,
4144 				    BUS_DMASYNC_POSTWRITE);
4145 				bus_dmamap_unload(txr->txtag,
4146 				    tx_buffer->map);
4147                         	m_freem(tx_buffer->m_head);
4148                         	tx_buffer->m_head = NULL;
4149                 	}
4150 			tx_buffer->next_eop = -1;
4151 
4152 	                if (++first == adapter->num_tx_desc)
4153 				first = 0;
4154 
4155 	                tx_buffer = &txr->tx_buffers[first];
4156 			tx_desc = &txr->tx_base[first];
4157 		}
4158 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
4159 		/* See if we can continue to the next packet */
4160 		last = tx_buffer->next_eop;
4161 		if (last != -1) {
4162         		eop_desc = &txr->tx_base[last];
4163 			/* Get new done point */
4164 			if (++last == adapter->num_tx_desc) last = 0;
4165 			done = last;
4166 		} else
4167 			break;
4168         }
4169         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4170             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4171 
4172         txr->next_to_clean = first;
4173 
4174 	/*
4175 	** Hang detection: we know there's work outstanding
4176 	** or the entry return would have been taken, so no
4177 	** descriptor processed here indicates a potential hang.
4178 	** The local timer will examine this and do a reset if needed.
4179 	*/
4180 	if (processed == 0) {
4181 		if (txr->busy != EM_TX_HUNG)
4182 			++txr->busy;
4183 	} else /* At least one descriptor was cleaned */
4184 		txr->busy = EM_TX_BUSY; /* note this clears HUNG */
4185 
4186         /*
4187          * If we have a minimum free, clear IFF_DRV_OACTIVE
4188          * to tell the stack that it is OK to send packets.
4189 	 * Notice that all writes of OACTIVE happen under the
4190 	 * TX lock which, with a single queue, guarantees
4191 	 * sanity.
4192          */
4193         if (txr->tx_avail >= EM_MAX_SCATTER) {
4194 		if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
4195 	}
4196 
4197 	/* Disable hang detection if all clean */
4198 	if (txr->tx_avail == adapter->num_tx_desc)
4199 		txr->busy = EM_TX_IDLE;
4200 }
4201 
4202 /*********************************************************************
4203  *
4204  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
4205  *
4206  **********************************************************************/
4207 static void
4208 em_refresh_mbufs(struct rx_ring *rxr, int limit)
4209 {
4210 	struct adapter		*adapter = rxr->adapter;
4211 	struct mbuf		*m;
4212 	bus_dma_segment_t	segs;
4213 	struct em_rxbuffer	*rxbuf;
4214 	int			i, j, error, nsegs;
4215 	bool			cleaned = FALSE;
4216 
4217 	i = j = rxr->next_to_refresh;
4218 	/*
4219 	** Get one descriptor beyond
4220 	** our work mark to control
4221 	** the loop.
4222 	*/
4223 	if (++j == adapter->num_rx_desc)
4224 		j = 0;
4225 
4226 	while (j != limit) {
4227 		rxbuf = &rxr->rx_buffers[i];
4228 		if (rxbuf->m_head == NULL) {
4229 			m = m_getjcl(M_NOWAIT, MT_DATA,
4230 			    M_PKTHDR, adapter->rx_mbuf_sz);
4231 			/*
4232 			** If we have a temporary resource shortage
4233 			** that causes a failure, just abort refresh
4234 			** for now, we will return to this point when
4235 			** reinvoked from em_rxeof.
4236 			*/
4237 			if (m == NULL)
4238 				goto update;
4239 		} else
4240 			m = rxbuf->m_head;
4241 
4242 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4243 		m->m_flags |= M_PKTHDR;
4244 		m->m_data = m->m_ext.ext_buf;
4245 
4246 		/* Use bus_dma machinery to setup the memory mapping  */
4247 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4248 		    m, &segs, &nsegs, BUS_DMA_NOWAIT);
4249 		if (error != 0) {
4250 			printf("Refresh mbufs: hdr dmamap load"
4251 			    " failure - %d\n", error);
4252 			m_free(m);
4253 			rxbuf->m_head = NULL;
4254 			goto update;
4255 		}
4256 		rxbuf->m_head = m;
4257 		rxbuf->paddr = segs.ds_addr;
4258 		bus_dmamap_sync(rxr->rxtag,
4259 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4260 		em_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4261 		cleaned = TRUE;
4262 
4263 		i = j; /* Next is precalulated for us */
4264 		rxr->next_to_refresh = i;
4265 		/* Calculate next controlling index */
4266 		if (++j == adapter->num_rx_desc)
4267 			j = 0;
4268 	}
4269 update:
4270 	/*
4271 	** Update the tail pointer only if,
4272 	** and as far as we have refreshed.
4273 	*/
4274 	if (cleaned)
4275 		E1000_WRITE_REG(&adapter->hw,
4276 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4277 
4278 	return;
4279 }
4280 
4281 
4282 /*********************************************************************
4283  *
4284  *  Allocate memory for rx_buffer structures. Since we use one
4285  *  rx_buffer per received packet, the maximum number of rx_buffer's
4286  *  that we'll need is equal to the number of receive descriptors
4287  *  that we've allocated.
4288  *
4289  **********************************************************************/
4290 static int
4291 em_allocate_receive_buffers(struct rx_ring *rxr)
4292 {
4293 	struct adapter		*adapter = rxr->adapter;
4294 	device_t		dev = adapter->dev;
4295 	struct em_rxbuffer	*rxbuf;
4296 	int			error;
4297 
4298 	rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) *
4299 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4300 	if (rxr->rx_buffers == NULL) {
4301 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4302 		return (ENOMEM);
4303 	}
4304 
4305 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4306 				1, 0,			/* alignment, bounds */
4307 				BUS_SPACE_MAXADDR,	/* lowaddr */
4308 				BUS_SPACE_MAXADDR,	/* highaddr */
4309 				NULL, NULL,		/* filter, filterarg */
4310 				MJUM9BYTES,		/* maxsize */
4311 				1,			/* nsegments */
4312 				MJUM9BYTES,		/* maxsegsize */
4313 				0,			/* flags */
4314 				NULL,			/* lockfunc */
4315 				NULL,			/* lockarg */
4316 				&rxr->rxtag);
4317 	if (error) {
4318 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4319 		    __func__, error);
4320 		goto fail;
4321 	}
4322 
4323 	rxbuf = rxr->rx_buffers;
4324 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4325 		rxbuf = &rxr->rx_buffers[i];
4326 		error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4327 		if (error) {
4328 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4329 			    __func__, error);
4330 			goto fail;
4331 		}
4332 	}
4333 
4334 	return (0);
4335 
4336 fail:
4337 	em_free_receive_structures(adapter);
4338 	return (error);
4339 }
4340 
4341 
4342 /*********************************************************************
4343  *
4344  *  Initialize a receive ring and its buffers.
4345  *
4346  **********************************************************************/
4347 static int
4348 em_setup_receive_ring(struct rx_ring *rxr)
4349 {
4350 	struct	adapter 	*adapter = rxr->adapter;
4351 	struct em_rxbuffer	*rxbuf;
4352 	bus_dma_segment_t	seg[1];
4353 	int			rsize, nsegs, error = 0;
4354 #ifdef DEV_NETMAP
4355 	struct netmap_slot *slot;
4356 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
4357 #endif
4358 
4359 
4360 	/* Clear the ring contents */
4361 	EM_RX_LOCK(rxr);
4362 	rsize = roundup2(adapter->num_rx_desc *
4363 	    sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
4364 	bzero((void *)rxr->rx_base, rsize);
4365 #ifdef DEV_NETMAP
4366 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4367 #endif
4368 
4369 	/*
4370 	** Free current RX buffer structs and their mbufs
4371 	*/
4372 	for (int i = 0; i < adapter->num_rx_desc; i++) {
4373 		rxbuf = &rxr->rx_buffers[i];
4374 		if (rxbuf->m_head != NULL) {
4375 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4376 			    BUS_DMASYNC_POSTREAD);
4377 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4378 			m_freem(rxbuf->m_head);
4379 			rxbuf->m_head = NULL; /* mark as freed */
4380 		}
4381 	}
4382 
4383 	/* Now replenish the mbufs */
4384         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4385 		rxbuf = &rxr->rx_buffers[j];
4386 #ifdef DEV_NETMAP
4387 		if (slot) {
4388 			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4389 			uint64_t paddr;
4390 			void *addr;
4391 
4392 			addr = PNMB(na, slot + si, &paddr);
4393 			netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4394 			em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4395 			continue;
4396 		}
4397 #endif /* DEV_NETMAP */
4398 		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4399 		    M_PKTHDR, adapter->rx_mbuf_sz);
4400 		if (rxbuf->m_head == NULL) {
4401 			error = ENOBUFS;
4402 			goto fail;
4403 		}
4404 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4405 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4406 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4407 
4408 		/* Get the memory mapping */
4409 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4410 		    rxbuf->map, rxbuf->m_head, seg,
4411 		    &nsegs, BUS_DMA_NOWAIT);
4412 		if (error != 0) {
4413 			m_freem(rxbuf->m_head);
4414 			rxbuf->m_head = NULL;
4415 			goto fail;
4416 		}
4417 		bus_dmamap_sync(rxr->rxtag,
4418 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4419 
4420 		rxbuf->paddr = seg[0].ds_addr;
4421 		em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4422 	}
4423 	rxr->next_to_check = 0;
4424 	rxr->next_to_refresh = 0;
4425 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4426 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4427 
4428 fail:
4429 	EM_RX_UNLOCK(rxr);
4430 	return (error);
4431 }
4432 
4433 /*********************************************************************
4434  *
4435  *  Initialize all receive rings.
4436  *
4437  **********************************************************************/
4438 static int
4439 em_setup_receive_structures(struct adapter *adapter)
4440 {
4441 	struct rx_ring *rxr = adapter->rx_rings;
4442 	int q;
4443 
4444 	for (q = 0; q < adapter->num_queues; q++, rxr++)
4445 		if (em_setup_receive_ring(rxr))
4446 			goto fail;
4447 
4448 	return (0);
4449 fail:
4450 	/*
4451 	 * Free RX buffers allocated so far, we will only handle
4452 	 * the rings that completed, the failing case will have
4453 	 * cleaned up for itself. 'q' failed, so its the terminus.
4454 	 */
4455 	for (int i = 0; i < q; ++i) {
4456 		rxr = &adapter->rx_rings[i];
4457 		for (int n = 0; n < adapter->num_rx_desc; n++) {
4458 			struct em_rxbuffer *rxbuf;
4459 			rxbuf = &rxr->rx_buffers[n];
4460 			if (rxbuf->m_head != NULL) {
4461 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4462 			  	  BUS_DMASYNC_POSTREAD);
4463 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4464 				m_freem(rxbuf->m_head);
4465 				rxbuf->m_head = NULL;
4466 			}
4467 		}
4468 		rxr->next_to_check = 0;
4469 		rxr->next_to_refresh = 0;
4470 	}
4471 
4472 	return (ENOBUFS);
4473 }
4474 
4475 /*********************************************************************
4476  *
4477  *  Free all receive rings.
4478  *
4479  **********************************************************************/
4480 static void
4481 em_free_receive_structures(struct adapter *adapter)
4482 {
4483 	struct rx_ring *rxr = adapter->rx_rings;
4484 
4485 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4486 		em_free_receive_buffers(rxr);
4487 		/* Free the ring memory as well */
4488 		em_dma_free(adapter, &rxr->rxdma);
4489 		EM_RX_LOCK_DESTROY(rxr);
4490 	}
4491 
4492 	free(adapter->rx_rings, M_DEVBUF);
4493 }
4494 
4495 
4496 /*********************************************************************
4497  *
4498  *  Free receive ring data structures
4499  *
4500  **********************************************************************/
4501 static void
4502 em_free_receive_buffers(struct rx_ring *rxr)
4503 {
4504 	struct adapter		*adapter = rxr->adapter;
4505 	struct em_rxbuffer	*rxbuf = NULL;
4506 
4507 	INIT_DEBUGOUT("free_receive_buffers: begin");
4508 
4509 	if (rxr->rx_buffers != NULL) {
4510 		for (int i = 0; i < adapter->num_rx_desc; i++) {
4511 			rxbuf = &rxr->rx_buffers[i];
4512 			if (rxbuf->map != NULL) {
4513 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4514 				    BUS_DMASYNC_POSTREAD);
4515 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4516 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4517 			}
4518 			if (rxbuf->m_head != NULL) {
4519 				m_freem(rxbuf->m_head);
4520 				rxbuf->m_head = NULL;
4521 			}
4522 		}
4523 		free(rxr->rx_buffers, M_DEVBUF);
4524 		rxr->rx_buffers = NULL;
4525 		rxr->next_to_check = 0;
4526 		rxr->next_to_refresh = 0;
4527 	}
4528 
4529 	if (rxr->rxtag != NULL) {
4530 		bus_dma_tag_destroy(rxr->rxtag);
4531 		rxr->rxtag = NULL;
4532 	}
4533 
4534 	return;
4535 }
4536 
4537 
4538 /*********************************************************************
4539  *
4540  *  Enable receive unit.
4541  *
4542  **********************************************************************/
4543 
4544 static void
4545 em_initialize_receive_unit(struct adapter *adapter)
4546 {
4547 	struct rx_ring *rxr = adapter->rx_rings;
4548 	if_t ifp = adapter->ifp;
4549 	struct e1000_hw	*hw = &adapter->hw;
4550 	u32	rctl, rxcsum, rfctl;
4551 
4552 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4553 
4554 	/*
4555 	 * Make sure receives are disabled while setting
4556 	 * up the descriptor ring
4557 	 */
4558 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4559 	/* Do not disable if ever enabled on this hardware */
4560 	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4561 		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4562 
4563 	/* Setup the Receive Control Register */
4564 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4565 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4566 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4567 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4568 
4569 	/* Do not store bad packets */
4570 	rctl &= ~E1000_RCTL_SBP;
4571 
4572 	/* Enable Long Packet receive */
4573 	if (if_getmtu(ifp) > ETHERMTU)
4574 		rctl |= E1000_RCTL_LPE;
4575 	else
4576 		rctl &= ~E1000_RCTL_LPE;
4577 
4578         /* Strip the CRC */
4579         if (!em_disable_crc_stripping)
4580 		rctl |= E1000_RCTL_SECRC;
4581 
4582 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4583 	    adapter->rx_abs_int_delay.value);
4584 
4585 	E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4586 	    adapter->rx_int_delay.value);
4587 	/*
4588 	 * Set the interrupt throttling rate. Value is calculated
4589 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4590 	 */
4591 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4592 
4593 	/* Use extended rx descriptor formats */
4594 	rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4595 	rfctl |= E1000_RFCTL_EXTEN;
4596 	/*
4597 	** When using MSIX interrupts we need to throttle
4598 	** using the EITR register (82574 only)
4599 	*/
4600 	if (hw->mac.type == e1000_82574) {
4601 		for (int i = 0; i < 4; i++)
4602 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4603 			    DEFAULT_ITR);
4604 		/* Disable accelerated acknowledge */
4605 		rfctl |= E1000_RFCTL_ACK_DIS;
4606 	}
4607 	E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4608 
4609 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4610 	if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
4611 #ifdef EM_MULTIQUEUE
4612 		rxcsum |= E1000_RXCSUM_TUOFL |
4613 			  E1000_RXCSUM_IPOFL |
4614 			  E1000_RXCSUM_PCSD;
4615 #else
4616 		rxcsum |= E1000_RXCSUM_TUOFL;
4617 #endif
4618 	} else
4619 		rxcsum &= ~E1000_RXCSUM_TUOFL;
4620 
4621 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4622 
4623 #ifdef EM_MULTIQUEUE
4624 #define RSSKEYLEN 10
4625 	if (adapter->num_queues > 1) {
4626 		uint8_t  rss_key[4 * RSSKEYLEN];
4627 		uint32_t reta = 0;
4628 		int i;
4629 
4630 		/*
4631 		* Configure RSS key
4632 		*/
4633 		arc4rand(rss_key, sizeof(rss_key), 0);
4634 		for (i = 0; i < RSSKEYLEN; ++i) {
4635 			uint32_t rssrk = 0;
4636 
4637 			rssrk = EM_RSSRK_VAL(rss_key, i);
4638 			E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk);
4639 		}
4640 
4641 		/*
4642 		* Configure RSS redirect table in following fashion:
4643 		* (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4644 		*/
4645 		for (i = 0; i < sizeof(reta); ++i) {
4646 			uint32_t q;
4647 
4648 			q = (i % adapter->num_queues) << 7;
4649 			reta |= q << (8 * i);
4650 		}
4651 
4652 		for (i = 0; i < 32; ++i) {
4653 			E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4654 		}
4655 
4656 		E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q |
4657 				E1000_MRQC_RSS_FIELD_IPV4_TCP |
4658 				E1000_MRQC_RSS_FIELD_IPV4 |
4659 				E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4660 				E1000_MRQC_RSS_FIELD_IPV6_EX |
4661 				E1000_MRQC_RSS_FIELD_IPV6);
4662 	}
4663 #endif
4664 	/*
4665 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4666 	** long latencies are observed, like Lenovo X60. This
4667 	** change eliminates the problem, but since having positive
4668 	** values in RDTR is a known source of problems on other
4669 	** platforms another solution is being sought.
4670 	*/
4671 	if (hw->mac.type == e1000_82573)
4672 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4673 
4674 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4675 		/* Setup the Base and Length of the Rx Descriptor Ring */
4676 		u64 bus_addr = rxr->rxdma.dma_paddr;
4677 		u32 rdt = adapter->num_rx_desc - 1; /* default */
4678 
4679 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4680 		    adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended));
4681 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4682 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4683 		/* Setup the Head and Tail Descriptor Pointers */
4684 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4685 #ifdef DEV_NETMAP
4686 		/*
4687 		 * an init() while a netmap client is active must
4688 		 * preserve the rx buffers passed to userspace.
4689 		 */
4690 		if (if_getcapenable(ifp) & IFCAP_NETMAP) {
4691 			struct netmap_adapter *na = netmap_getna(adapter->ifp);
4692 			rdt -= nm_kr_rxspace(&na->rx_rings[i]);
4693 		}
4694 #endif /* DEV_NETMAP */
4695 		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4696 	}
4697 
4698 	/*
4699 	 * Set PTHRESH for improved jumbo performance
4700 	 * According to 10.2.5.11 of Intel 82574 Datasheet,
4701 	 * RXDCTL(1) is written whenever RXDCTL(0) is written.
4702 	 * Only write to RXDCTL(1) if there is a need for different
4703 	 * settings.
4704 	 */
4705 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4706 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4707 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4708 	    (if_getmtu(ifp) > ETHERMTU)) {
4709 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4710 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4711 	} else if (adapter->hw.mac.type == e1000_82574) {
4712 		for (int i = 0; i < adapter->num_queues; i++) {
4713 			u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4714 
4715 			rxdctl |= 0x20; /* PTHRESH */
4716 			rxdctl |= 4 << 8; /* HTHRESH */
4717 			rxdctl |= 4 << 16;/* WTHRESH */
4718 			rxdctl |= 1 << 24; /* Switch to granularity */
4719 			E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4720 		}
4721 	}
4722 
4723 	if (adapter->hw.mac.type >= e1000_pch2lan) {
4724 		if (if_getmtu(ifp) > ETHERMTU)
4725 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4726 		else
4727 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4728 	}
4729 
4730         /* Make sure VLAN Filters are off */
4731         rctl &= ~E1000_RCTL_VFE;
4732 
4733 	if (adapter->rx_mbuf_sz == MCLBYTES)
4734 		rctl |= E1000_RCTL_SZ_2048;
4735 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4736 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4737 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4738 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4739 
4740 	/* ensure we clear use DTYPE of 00 here */
4741 	rctl &= ~0x00000C00;
4742 	/* Write out the settings */
4743 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4744 
4745 	return;
4746 }
4747 
4748 
4749 /*********************************************************************
4750  *
4751  *  This routine executes in interrupt context. It replenishes
4752  *  the mbufs in the descriptor and sends data which has been
4753  *  dma'ed into host memory to upper layer.
4754  *
4755  *  We loop at most count times if count is > 0, or until done if
4756  *  count < 0.
4757  *
4758  *  For polling we also now return the number of cleaned packets
4759  *********************************************************************/
4760 static bool
4761 em_rxeof(struct rx_ring *rxr, int count, int *done)
4762 {
4763 	struct adapter		*adapter = rxr->adapter;
4764 	if_t ifp = adapter->ifp;
4765 	struct mbuf		*mp, *sendmp;
4766 	u32			status = 0;
4767 	u16 			len;
4768 	int			i, processed, rxdone = 0;
4769 	bool			eop;
4770 	union e1000_rx_desc_extended	*cur;
4771 
4772 	EM_RX_LOCK(rxr);
4773 
4774 	/* Sync the ring */
4775 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4776 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4777 
4778 
4779 #ifdef DEV_NETMAP
4780 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4781 		EM_RX_UNLOCK(rxr);
4782 		return (FALSE);
4783 	}
4784 #endif /* DEV_NETMAP */
4785 
4786 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4787 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
4788 			break;
4789 
4790 		cur = &rxr->rx_base[i];
4791 		status = le32toh(cur->wb.upper.status_error);
4792 		mp = sendmp = NULL;
4793 
4794 		if ((status & E1000_RXD_STAT_DD) == 0)
4795 			break;
4796 
4797 		len = le16toh(cur->wb.upper.length);
4798 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4799 
4800 		if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
4801 		    (rxr->discard == TRUE)) {
4802 			adapter->dropped_pkts++;
4803 			++rxr->rx_discarded;
4804 			if (!eop) /* Catch subsequent segs */
4805 				rxr->discard = TRUE;
4806 			else
4807 				rxr->discard = FALSE;
4808 			em_rx_discard(rxr, i);
4809 			goto next_desc;
4810 		}
4811 		bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4812 
4813 		/* Assign correct length to the current fragment */
4814 		mp = rxr->rx_buffers[i].m_head;
4815 		mp->m_len = len;
4816 
4817 		/* Trigger for refresh */
4818 		rxr->rx_buffers[i].m_head = NULL;
4819 
4820 		/* First segment? */
4821 		if (rxr->fmp == NULL) {
4822 			mp->m_pkthdr.len = len;
4823 			rxr->fmp = rxr->lmp = mp;
4824 		} else {
4825 			/* Chain mbuf's together */
4826 			mp->m_flags &= ~M_PKTHDR;
4827 			rxr->lmp->m_next = mp;
4828 			rxr->lmp = mp;
4829 			rxr->fmp->m_pkthdr.len += len;
4830 		}
4831 
4832 		if (eop) {
4833 			--count;
4834 			sendmp = rxr->fmp;
4835 			if_setrcvif(sendmp, ifp);
4836 			if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
4837 			em_receive_checksum(status, sendmp);
4838 #ifndef __NO_STRICT_ALIGNMENT
4839 			if (adapter->hw.mac.max_frame_size >
4840 			    (MCLBYTES - ETHER_ALIGN) &&
4841 			    em_fixup_rx(rxr) != 0)
4842 				goto skip;
4843 #endif
4844 			if (status & E1000_RXD_STAT_VP) {
4845 				if_setvtag(sendmp,
4846 				    le16toh(cur->wb.upper.vlan));
4847 				sendmp->m_flags |= M_VLANTAG;
4848 			}
4849 #ifndef __NO_STRICT_ALIGNMENT
4850 skip:
4851 #endif
4852 			rxr->fmp = rxr->lmp = NULL;
4853 		}
4854 next_desc:
4855 		/* Sync the ring */
4856 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4857 	    		BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4858 
4859 		/* Zero out the receive descriptors status. */
4860 		cur->wb.upper.status_error &= htole32(~0xFF);
4861 		++rxdone;	/* cumulative for POLL */
4862 		++processed;
4863 
4864 		/* Advance our pointers to the next descriptor. */
4865 		if (++i == adapter->num_rx_desc)
4866 			i = 0;
4867 
4868 		/* Send to the stack */
4869 		if (sendmp != NULL) {
4870 			rxr->next_to_check = i;
4871 			EM_RX_UNLOCK(rxr);
4872 			if_input(ifp, sendmp);
4873 			EM_RX_LOCK(rxr);
4874 			i = rxr->next_to_check;
4875 		}
4876 
4877 		/* Only refresh mbufs every 8 descriptors */
4878 		if (processed == 8) {
4879 			em_refresh_mbufs(rxr, i);
4880 			processed = 0;
4881 		}
4882 	}
4883 
4884 	/* Catch any remaining refresh work */
4885 	if (e1000_rx_unrefreshed(rxr))
4886 		em_refresh_mbufs(rxr, i);
4887 
4888 	rxr->next_to_check = i;
4889 	if (done != NULL)
4890 		*done = rxdone;
4891 	EM_RX_UNLOCK(rxr);
4892 
4893 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4894 }
4895 
4896 static __inline void
4897 em_rx_discard(struct rx_ring *rxr, int i)
4898 {
4899 	struct em_rxbuffer	*rbuf;
4900 
4901 	rbuf = &rxr->rx_buffers[i];
4902 	bus_dmamap_unload(rxr->rxtag, rbuf->map);
4903 
4904 	/* Free any previous pieces */
4905 	if (rxr->fmp != NULL) {
4906 		rxr->fmp->m_flags |= M_PKTHDR;
4907 		m_freem(rxr->fmp);
4908 		rxr->fmp = NULL;
4909 		rxr->lmp = NULL;
4910 	}
4911 	/*
4912 	** Free buffer and allow em_refresh_mbufs()
4913 	** to clean up and recharge buffer.
4914 	*/
4915 	if (rbuf->m_head) {
4916 		m_free(rbuf->m_head);
4917 		rbuf->m_head = NULL;
4918 	}
4919 	return;
4920 }
4921 
4922 #ifndef __NO_STRICT_ALIGNMENT
4923 /*
4924  * When jumbo frames are enabled we should realign entire payload on
4925  * architecures with strict alignment. This is serious design mistake of 8254x
4926  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4927  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4928  * payload. On architecures without strict alignment restrictions 8254x still
4929  * performs unaligned memory access which would reduce the performance too.
4930  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4931  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4932  * existing mbuf chain.
4933  *
4934  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4935  * not used at all on architectures with strict alignment.
4936  */
4937 static int
4938 em_fixup_rx(struct rx_ring *rxr)
4939 {
4940 	struct adapter *adapter = rxr->adapter;
4941 	struct mbuf *m, *n;
4942 	int error;
4943 
4944 	error = 0;
4945 	m = rxr->fmp;
4946 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4947 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4948 		m->m_data += ETHER_HDR_LEN;
4949 	} else {
4950 		MGETHDR(n, M_NOWAIT, MT_DATA);
4951 		if (n != NULL) {
4952 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4953 			m->m_data += ETHER_HDR_LEN;
4954 			m->m_len -= ETHER_HDR_LEN;
4955 			n->m_len = ETHER_HDR_LEN;
4956 			M_MOVE_PKTHDR(n, m);
4957 			n->m_next = m;
4958 			rxr->fmp = n;
4959 		} else {
4960 			adapter->dropped_pkts++;
4961 			m_freem(rxr->fmp);
4962 			rxr->fmp = NULL;
4963 			error = ENOMEM;
4964 		}
4965 	}
4966 
4967 	return (error);
4968 }
4969 #endif
4970 
4971 static void
4972 em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf)
4973 {
4974 	rxd->read.buffer_addr = htole64(rxbuf->paddr);
4975 	/* DD bits must be cleared */
4976 	rxd->wb.upper.status_error= 0;
4977 }
4978 
4979 /*********************************************************************
4980  *
4981  *  Verify that the hardware indicated that the checksum is valid.
4982  *  Inform the stack about the status of checksum so that stack
4983  *  doesn't spend time verifying the checksum.
4984  *
4985  *********************************************************************/
4986 static void
4987 em_receive_checksum(uint32_t status, struct mbuf *mp)
4988 {
4989 	mp->m_pkthdr.csum_flags = 0;
4990 
4991 	/* Ignore Checksum bit is set */
4992 	if (status & E1000_RXD_STAT_IXSM)
4993 		return;
4994 
4995 	/* If the IP checksum exists and there is no IP Checksum error */
4996 	if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
4997 		E1000_RXD_STAT_IPCS) {
4998 		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4999 	}
5000 
5001 	/* TCP or UDP checksum */
5002 	if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
5003 	    E1000_RXD_STAT_TCPCS) {
5004 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5005 		mp->m_pkthdr.csum_data = htons(0xffff);
5006 	}
5007 	if (status & E1000_RXD_STAT_UDPCS) {
5008 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5009 		mp->m_pkthdr.csum_data = htons(0xffff);
5010 	}
5011 }
5012 
5013 /*
5014  * This routine is run via an vlan
5015  * config EVENT
5016  */
5017 static void
5018 em_register_vlan(void *arg, if_t ifp, u16 vtag)
5019 {
5020 	struct adapter	*adapter = if_getsoftc(ifp);
5021 	u32		index, bit;
5022 
5023 	if ((void*)adapter !=  arg)   /* Not our event */
5024 		return;
5025 
5026 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
5027                 return;
5028 
5029 	EM_CORE_LOCK(adapter);
5030 	index = (vtag >> 5) & 0x7F;
5031 	bit = vtag & 0x1F;
5032 	adapter->shadow_vfta[index] |= (1 << bit);
5033 	++adapter->num_vlans;
5034 	/* Re-init to load the changes */
5035 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
5036 		em_init_locked(adapter);
5037 	EM_CORE_UNLOCK(adapter);
5038 }
5039 
5040 /*
5041  * This routine is run via an vlan
5042  * unconfig EVENT
5043  */
5044 static void
5045 em_unregister_vlan(void *arg, if_t ifp, u16 vtag)
5046 {
5047 	struct adapter	*adapter = if_getsoftc(ifp);
5048 	u32		index, bit;
5049 
5050 	if (adapter != arg)
5051 		return;
5052 
5053 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5054                 return;
5055 
5056 	EM_CORE_LOCK(adapter);
5057 	index = (vtag >> 5) & 0x7F;
5058 	bit = vtag & 0x1F;
5059 	adapter->shadow_vfta[index] &= ~(1 << bit);
5060 	--adapter->num_vlans;
5061 	/* Re-init to load the changes */
5062 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
5063 		em_init_locked(adapter);
5064 	EM_CORE_UNLOCK(adapter);
5065 }
5066 
5067 static void
5068 em_setup_vlan_hw_support(struct adapter *adapter)
5069 {
5070 	struct e1000_hw *hw = &adapter->hw;
5071 	u32             reg;
5072 
5073 	/*
5074 	** We get here thru init_locked, meaning
5075 	** a soft reset, this has already cleared
5076 	** the VFTA and other state, so if there
5077 	** have been no vlan's registered do nothing.
5078 	*/
5079 	if (adapter->num_vlans == 0)
5080                 return;
5081 
5082 	/*
5083 	** A soft reset zero's out the VFTA, so
5084 	** we need to repopulate it now.
5085 	*/
5086 	for (int i = 0; i < EM_VFTA_SIZE; i++)
5087                 if (adapter->shadow_vfta[i] != 0)
5088 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
5089                             i, adapter->shadow_vfta[i]);
5090 
5091 	reg = E1000_READ_REG(hw, E1000_CTRL);
5092 	reg |= E1000_CTRL_VME;
5093 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
5094 
5095 	/* Enable the Filter Table */
5096 	reg = E1000_READ_REG(hw, E1000_RCTL);
5097 	reg &= ~E1000_RCTL_CFIEN;
5098 	reg |= E1000_RCTL_VFE;
5099 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
5100 }
5101 
5102 static void
5103 em_enable_intr(struct adapter *adapter)
5104 {
5105 	struct e1000_hw *hw = &adapter->hw;
5106 	u32 ims_mask = IMS_ENABLE_MASK;
5107 
5108 	if (hw->mac.type == e1000_82574) {
5109 		E1000_WRITE_REG(hw, EM_EIAC, adapter->ims);
5110 		ims_mask |= adapter->ims;
5111 	}
5112 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
5113 }
5114 
5115 static void
5116 em_disable_intr(struct adapter *adapter)
5117 {
5118 	struct e1000_hw *hw = &adapter->hw;
5119 
5120 	if (hw->mac.type == e1000_82574)
5121 		E1000_WRITE_REG(hw, EM_EIAC, 0);
5122 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
5123 }
5124 
5125 /*
5126  * Bit of a misnomer, what this really means is
5127  * to enable OS management of the system... aka
5128  * to disable special hardware management features
5129  */
5130 static void
5131 em_init_manageability(struct adapter *adapter)
5132 {
5133 	/* A shared code workaround */
5134 #define E1000_82542_MANC2H E1000_MANC2H
5135 	if (adapter->has_manage) {
5136 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5137 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5138 
5139 		/* disable hardware interception of ARP */
5140 		manc &= ~(E1000_MANC_ARP_EN);
5141 
5142                 /* enable receiving management packets to the host */
5143 		manc |= E1000_MANC_EN_MNG2HOST;
5144 #define E1000_MNG2HOST_PORT_623 (1 << 5)
5145 #define E1000_MNG2HOST_PORT_664 (1 << 6)
5146 		manc2h |= E1000_MNG2HOST_PORT_623;
5147 		manc2h |= E1000_MNG2HOST_PORT_664;
5148 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5149 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5150 	}
5151 }
5152 
5153 /*
5154  * Give control back to hardware management
5155  * controller if there is one.
5156  */
5157 static void
5158 em_release_manageability(struct adapter *adapter)
5159 {
5160 	if (adapter->has_manage) {
5161 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5162 
5163 		/* re-enable hardware interception of ARP */
5164 		manc |= E1000_MANC_ARP_EN;
5165 		manc &= ~E1000_MANC_EN_MNG2HOST;
5166 
5167 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5168 	}
5169 }
5170 
5171 /*
5172  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
5173  * For ASF and Pass Through versions of f/w this means
5174  * that the driver is loaded. For AMT version type f/w
5175  * this means that the network i/f is open.
5176  */
5177 static void
5178 em_get_hw_control(struct adapter *adapter)
5179 {
5180 	u32 ctrl_ext, swsm;
5181 
5182 	if (adapter->hw.mac.type == e1000_82573) {
5183 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5184 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5185 		    swsm | E1000_SWSM_DRV_LOAD);
5186 		return;
5187 	}
5188 	/* else */
5189 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5190 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5191 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5192 	return;
5193 }
5194 
5195 /*
5196  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
5197  * For ASF and Pass Through versions of f/w this means that
5198  * the driver is no longer loaded. For AMT versions of the
5199  * f/w this means that the network i/f is closed.
5200  */
5201 static void
5202 em_release_hw_control(struct adapter *adapter)
5203 {
5204 	u32 ctrl_ext, swsm;
5205 
5206 	if (!adapter->has_manage)
5207 		return;
5208 
5209 	if (adapter->hw.mac.type == e1000_82573) {
5210 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5211 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5212 		    swsm & ~E1000_SWSM_DRV_LOAD);
5213 		return;
5214 	}
5215 	/* else */
5216 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5217 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5218 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5219 	return;
5220 }
5221 
5222 static int
5223 em_is_valid_ether_addr(u8 *addr)
5224 {
5225 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5226 
5227 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5228 		return (FALSE);
5229 	}
5230 
5231 	return (TRUE);
5232 }
5233 
5234 /*
5235 ** Parse the interface capabilities with regard
5236 ** to both system management and wake-on-lan for
5237 ** later use.
5238 */
5239 static void
5240 em_get_wakeup(device_t dev)
5241 {
5242 	struct adapter	*adapter = device_get_softc(dev);
5243 	u16		eeprom_data = 0, device_id, apme_mask;
5244 
5245 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5246 	apme_mask = EM_EEPROM_APME;
5247 
5248 	switch (adapter->hw.mac.type) {
5249 	case e1000_82573:
5250 	case e1000_82583:
5251 		adapter->has_amt = TRUE;
5252 		/* Falls thru */
5253 	case e1000_82571:
5254 	case e1000_82572:
5255 	case e1000_80003es2lan:
5256 		if (adapter->hw.bus.func == 1) {
5257 			e1000_read_nvm(&adapter->hw,
5258 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5259 			break;
5260 		} else
5261 			e1000_read_nvm(&adapter->hw,
5262 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5263 		break;
5264 	case e1000_ich8lan:
5265 	case e1000_ich9lan:
5266 	case e1000_ich10lan:
5267 	case e1000_pchlan:
5268 	case e1000_pch2lan:
5269 		apme_mask = E1000_WUC_APME;
5270 		adapter->has_amt = TRUE;
5271 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5272 		break;
5273 	default:
5274 		e1000_read_nvm(&adapter->hw,
5275 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5276 		break;
5277 	}
5278 	if (eeprom_data & apme_mask)
5279 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5280 	/*
5281          * We have the eeprom settings, now apply the special cases
5282          * where the eeprom may be wrong or the board won't support
5283          * wake on lan on a particular port
5284 	 */
5285 	device_id = pci_get_device(dev);
5286         switch (device_id) {
5287 	case E1000_DEV_ID_82571EB_FIBER:
5288 		/* Wake events only supported on port A for dual fiber
5289 		 * regardless of eeprom setting */
5290 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5291 		    E1000_STATUS_FUNC_1)
5292 			adapter->wol = 0;
5293 		break;
5294 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
5295 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
5296 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5297                 /* if quad port adapter, disable WoL on all but port A */
5298 		if (global_quad_port_a != 0)
5299 			adapter->wol = 0;
5300 		/* Reset for multiple quad port adapters */
5301 		if (++global_quad_port_a == 4)
5302 			global_quad_port_a = 0;
5303                 break;
5304 	}
5305 	return;
5306 }
5307 
5308 
5309 /*
5310  * Enable PCI Wake On Lan capability
5311  */
5312 static void
5313 em_enable_wakeup(device_t dev)
5314 {
5315 	struct adapter	*adapter = device_get_softc(dev);
5316 	if_t ifp = adapter->ifp;
5317 	u32		pmc, ctrl, ctrl_ext, rctl;
5318 	u16     	status;
5319 
5320 	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
5321 		return;
5322 
5323 	/* Advertise the wakeup capability */
5324 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5325 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5326 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5327 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5328 
5329 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
5330 	    (adapter->hw.mac.type == e1000_pchlan) ||
5331 	    (adapter->hw.mac.type == e1000_ich9lan) ||
5332 	    (adapter->hw.mac.type == e1000_ich10lan))
5333 		e1000_suspend_workarounds_ich8lan(&adapter->hw);
5334 
5335 	/* Keep the laser running on Fiber adapters */
5336 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5337 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5338 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5339 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5340 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5341 	}
5342 
5343 	/*
5344 	** Determine type of Wakeup: note that wol
5345 	** is set with all bits on by default.
5346 	*/
5347 	if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
5348 		adapter->wol &= ~E1000_WUFC_MAG;
5349 
5350 	if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
5351 		adapter->wol &= ~E1000_WUFC_MC;
5352 	else {
5353 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5354 		rctl |= E1000_RCTL_MPE;
5355 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5356 	}
5357 
5358 	if ((adapter->hw.mac.type == e1000_pchlan) ||
5359 	    (adapter->hw.mac.type == e1000_pch2lan)) {
5360 		if (em_enable_phy_wakeup(adapter))
5361 			return;
5362 	} else {
5363 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5364 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5365 	}
5366 
5367 	if (adapter->hw.phy.type == e1000_phy_igp_3)
5368 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5369 
5370         /* Request PME */
5371         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5372 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5373 	if (if_getcapenable(ifp) & IFCAP_WOL)
5374 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5375         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5376 
5377 	return;
5378 }
5379 
5380 /*
5381 ** WOL in the newer chipset interfaces (pchlan)
5382 ** require thing to be copied into the phy
5383 */
5384 static int
5385 em_enable_phy_wakeup(struct adapter *adapter)
5386 {
5387 	struct e1000_hw *hw = &adapter->hw;
5388 	u32 mreg, ret = 0;
5389 	u16 preg;
5390 
5391 	/* copy MAC RARs to PHY RARs */
5392 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5393 
5394 	/* copy MAC MTA to PHY MTA */
5395 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5396 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5397 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5398 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5399 		    (u16)((mreg >> 16) & 0xFFFF));
5400 	}
5401 
5402 	/* configure PHY Rx Control register */
5403 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5404 	mreg = E1000_READ_REG(hw, E1000_RCTL);
5405 	if (mreg & E1000_RCTL_UPE)
5406 		preg |= BM_RCTL_UPE;
5407 	if (mreg & E1000_RCTL_MPE)
5408 		preg |= BM_RCTL_MPE;
5409 	preg &= ~(BM_RCTL_MO_MASK);
5410 	if (mreg & E1000_RCTL_MO_3)
5411 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5412 				<< BM_RCTL_MO_SHIFT);
5413 	if (mreg & E1000_RCTL_BAM)
5414 		preg |= BM_RCTL_BAM;
5415 	if (mreg & E1000_RCTL_PMCF)
5416 		preg |= BM_RCTL_PMCF;
5417 	mreg = E1000_READ_REG(hw, E1000_CTRL);
5418 	if (mreg & E1000_CTRL_RFCE)
5419 		preg |= BM_RCTL_RFCE;
5420 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5421 
5422 	/* enable PHY wakeup in MAC register */
5423 	E1000_WRITE_REG(hw, E1000_WUC,
5424 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5425 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5426 
5427 	/* configure and enable PHY wakeup in PHY registers */
5428 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5429 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5430 
5431 	/* activate PHY wakeup */
5432 	ret = hw->phy.ops.acquire(hw);
5433 	if (ret) {
5434 		printf("Could not acquire PHY\n");
5435 		return ret;
5436 	}
5437 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5438 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5439 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5440 	if (ret) {
5441 		printf("Could not read PHY page 769\n");
5442 		goto out;
5443 	}
5444 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5445 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5446 	if (ret)
5447 		printf("Could not set PHY Host Wakeup bit\n");
5448 out:
5449 	hw->phy.ops.release(hw);
5450 
5451 	return ret;
5452 }
5453 
5454 static void
5455 em_led_func(void *arg, int onoff)
5456 {
5457 	struct adapter	*adapter = arg;
5458 
5459 	EM_CORE_LOCK(adapter);
5460 	if (onoff) {
5461 		e1000_setup_led(&adapter->hw);
5462 		e1000_led_on(&adapter->hw);
5463 	} else {
5464 		e1000_led_off(&adapter->hw);
5465 		e1000_cleanup_led(&adapter->hw);
5466 	}
5467 	EM_CORE_UNLOCK(adapter);
5468 }
5469 
5470 /*
5471 ** Disable the L0S and L1 LINK states
5472 */
5473 static void
5474 em_disable_aspm(struct adapter *adapter)
5475 {
5476 	int		base, reg;
5477 	u16		link_cap,link_ctrl;
5478 	device_t	dev = adapter->dev;
5479 
5480 	switch (adapter->hw.mac.type) {
5481 		case e1000_82573:
5482 		case e1000_82574:
5483 		case e1000_82583:
5484 			break;
5485 		default:
5486 			return;
5487 	}
5488 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5489 		return;
5490 	reg = base + PCIER_LINK_CAP;
5491 	link_cap = pci_read_config(dev, reg, 2);
5492 	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5493 		return;
5494 	reg = base + PCIER_LINK_CTL;
5495 	link_ctrl = pci_read_config(dev, reg, 2);
5496 	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5497 	pci_write_config(dev, reg, link_ctrl, 2);
5498 	return;
5499 }
5500 
5501 /**********************************************************************
5502  *
5503  *  Update the board statistics counters.
5504  *
5505  **********************************************************************/
5506 static void
5507 em_update_stats_counters(struct adapter *adapter)
5508 {
5509 
5510 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5511 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5512 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5513 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5514 	}
5515 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5516 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5517 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5518 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5519 
5520 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5521 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5522 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5523 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5524 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5525 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5526 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5527 	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5528 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5529 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5530 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5531 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5532 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5533 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5534 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5535 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5536 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5537 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5538 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5539 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5540 
5541 	/* For the 64-bit byte counters the low dword must be read first. */
5542 	/* Both registers clear on the read of the high dword */
5543 
5544 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5545 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5546 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5547 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5548 
5549 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5550 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5551 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5552 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5553 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5554 
5555 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5556 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5557 
5558 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5559 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5560 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5561 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5562 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5563 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5564 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5565 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5566 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5567 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5568 
5569 	/* Interrupt Counts */
5570 
5571 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5572 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5573 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5574 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5575 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5576 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5577 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5578 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5579 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5580 
5581 	if (adapter->hw.mac.type >= e1000_82543) {
5582 		adapter->stats.algnerrc +=
5583 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5584 		adapter->stats.rxerrc +=
5585 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5586 		adapter->stats.tncrs +=
5587 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5588 		adapter->stats.cexterr +=
5589 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5590 		adapter->stats.tsctc +=
5591 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5592 		adapter->stats.tsctfc +=
5593 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5594 	}
5595 }
5596 
5597 static uint64_t
5598 em_get_counter(if_t ifp, ift_counter cnt)
5599 {
5600 	struct adapter *adapter;
5601 
5602 	adapter = if_getsoftc(ifp);
5603 
5604 	switch (cnt) {
5605 	case IFCOUNTER_COLLISIONS:
5606 		return (adapter->stats.colc);
5607 	case IFCOUNTER_IERRORS:
5608 		return (adapter->dropped_pkts + adapter->stats.rxerrc +
5609 		    adapter->stats.crcerrs + adapter->stats.algnerrc +
5610 		    adapter->stats.ruc + adapter->stats.roc +
5611 		    adapter->stats.mpc + adapter->stats.cexterr);
5612 	case IFCOUNTER_OERRORS:
5613 		return (adapter->stats.ecol + adapter->stats.latecol +
5614 		    adapter->watchdog_events);
5615 	default:
5616 		return (if_get_counter_default(ifp, cnt));
5617 	}
5618 }
5619 
5620 /* Export a single 32-bit register via a read-only sysctl. */
5621 static int
5622 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5623 {
5624 	struct adapter *adapter;
5625 	u_int val;
5626 
5627 	adapter = oidp->oid_arg1;
5628 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5629 	return (sysctl_handle_int(oidp, &val, 0, req));
5630 }
5631 
5632 /*
5633  * Add sysctl variables, one per statistic, to the system.
5634  */
5635 static void
5636 em_add_hw_stats(struct adapter *adapter)
5637 {
5638 	device_t dev = adapter->dev;
5639 
5640 	struct tx_ring *txr = adapter->tx_rings;
5641 	struct rx_ring *rxr = adapter->rx_rings;
5642 
5643 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5644 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5645 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5646 	struct e1000_hw_stats *stats = &adapter->stats;
5647 
5648 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5649 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5650 
5651 #define QUEUE_NAME_LEN 32
5652 	char namebuf[QUEUE_NAME_LEN];
5653 
5654 	/* Driver Statistics */
5655 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5656 			CTLFLAG_RD, &adapter->dropped_pkts,
5657 			"Driver dropped packets");
5658 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5659 			CTLFLAG_RD, &adapter->link_irq,
5660 			"Link MSIX IRQ Handled");
5661 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5662 			 CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5663 			 "Defragmenting mbuf chain failed");
5664 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5665 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5666 			"Driver tx dma failure in xmit");
5667 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5668 			CTLFLAG_RD, &adapter->rx_overruns,
5669 			"RX overruns");
5670 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5671 			CTLFLAG_RD, &adapter->watchdog_events,
5672 			"Watchdog timeouts");
5673 
5674 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5675 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5676 			em_sysctl_reg_handler, "IU",
5677 			"Device Control Register");
5678 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5679 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5680 			em_sysctl_reg_handler, "IU",
5681 			"Receiver Control Register");
5682 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5683 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5684 			"Flow Control High Watermark");
5685 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5686 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5687 			"Flow Control Low Watermark");
5688 
5689 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5690 		snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5691 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5692 					    CTLFLAG_RD, NULL, "TX Queue Name");
5693 		queue_list = SYSCTL_CHILDREN(queue_node);
5694 
5695 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5696 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5697 				E1000_TDH(txr->me),
5698 				em_sysctl_reg_handler, "IU",
5699  				"Transmit Descriptor Head");
5700 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5701 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5702 				E1000_TDT(txr->me),
5703 				em_sysctl_reg_handler, "IU",
5704  				"Transmit Descriptor Tail");
5705 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5706 				CTLFLAG_RD, &txr->tx_irq,
5707 				"Queue MSI-X Transmit Interrupts");
5708 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5709 				CTLFLAG_RD, &txr->no_desc_avail,
5710 				"Queue No Descriptor Available");
5711 
5712 		snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5713 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5714 					    CTLFLAG_RD, NULL, "RX Queue Name");
5715 		queue_list = SYSCTL_CHILDREN(queue_node);
5716 
5717 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5718 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5719 				E1000_RDH(rxr->me),
5720 				em_sysctl_reg_handler, "IU",
5721 				"Receive Descriptor Head");
5722 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5723 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5724 				E1000_RDT(rxr->me),
5725 				em_sysctl_reg_handler, "IU",
5726 				"Receive Descriptor Tail");
5727 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5728 				CTLFLAG_RD, &rxr->rx_irq,
5729 				"Queue MSI-X Receive Interrupts");
5730 	}
5731 
5732 	/* MAC stats get their own sub node */
5733 
5734 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5735 				    CTLFLAG_RD, NULL, "Statistics");
5736 	stat_list = SYSCTL_CHILDREN(stat_node);
5737 
5738 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5739 			CTLFLAG_RD, &stats->ecol,
5740 			"Excessive collisions");
5741 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5742 			CTLFLAG_RD, &stats->scc,
5743 			"Single collisions");
5744 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5745 			CTLFLAG_RD, &stats->mcc,
5746 			"Multiple collisions");
5747 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5748 			CTLFLAG_RD, &stats->latecol,
5749 			"Late collisions");
5750 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5751 			CTLFLAG_RD, &stats->colc,
5752 			"Collision Count");
5753 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5754 			CTLFLAG_RD, &adapter->stats.symerrs,
5755 			"Symbol Errors");
5756 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5757 			CTLFLAG_RD, &adapter->stats.sec,
5758 			"Sequence Errors");
5759 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5760 			CTLFLAG_RD, &adapter->stats.dc,
5761 			"Defer Count");
5762 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5763 			CTLFLAG_RD, &adapter->stats.mpc,
5764 			"Missed Packets");
5765 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5766 			CTLFLAG_RD, &adapter->stats.rnbc,
5767 			"Receive No Buffers");
5768 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5769 			CTLFLAG_RD, &adapter->stats.ruc,
5770 			"Receive Undersize");
5771 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5772 			CTLFLAG_RD, &adapter->stats.rfc,
5773 			"Fragmented Packets Received ");
5774 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5775 			CTLFLAG_RD, &adapter->stats.roc,
5776 			"Oversized Packets Received");
5777 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5778 			CTLFLAG_RD, &adapter->stats.rjc,
5779 			"Recevied Jabber");
5780 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5781 			CTLFLAG_RD, &adapter->stats.rxerrc,
5782 			"Receive Errors");
5783 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5784 			CTLFLAG_RD, &adapter->stats.crcerrs,
5785 			"CRC errors");
5786 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5787 			CTLFLAG_RD, &adapter->stats.algnerrc,
5788 			"Alignment Errors");
5789 	/* On 82575 these are collision counts */
5790 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5791 			CTLFLAG_RD, &adapter->stats.cexterr,
5792 			"Collision/Carrier extension errors");
5793 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5794 			CTLFLAG_RD, &adapter->stats.xonrxc,
5795 			"XON Received");
5796 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5797 			CTLFLAG_RD, &adapter->stats.xontxc,
5798 			"XON Transmitted");
5799 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5800 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5801 			"XOFF Received");
5802 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5803 			CTLFLAG_RD, &adapter->stats.xofftxc,
5804 			"XOFF Transmitted");
5805 
5806 	/* Packet Reception Stats */
5807 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5808 			CTLFLAG_RD, &adapter->stats.tpr,
5809 			"Total Packets Received ");
5810 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5811 			CTLFLAG_RD, &adapter->stats.gprc,
5812 			"Good Packets Received");
5813 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5814 			CTLFLAG_RD, &adapter->stats.bprc,
5815 			"Broadcast Packets Received");
5816 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5817 			CTLFLAG_RD, &adapter->stats.mprc,
5818 			"Multicast Packets Received");
5819 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5820 			CTLFLAG_RD, &adapter->stats.prc64,
5821 			"64 byte frames received ");
5822 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5823 			CTLFLAG_RD, &adapter->stats.prc127,
5824 			"65-127 byte frames received");
5825 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5826 			CTLFLAG_RD, &adapter->stats.prc255,
5827 			"128-255 byte frames received");
5828 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5829 			CTLFLAG_RD, &adapter->stats.prc511,
5830 			"256-511 byte frames received");
5831 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5832 			CTLFLAG_RD, &adapter->stats.prc1023,
5833 			"512-1023 byte frames received");
5834 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5835 			CTLFLAG_RD, &adapter->stats.prc1522,
5836 			"1023-1522 byte frames received");
5837  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5838  			CTLFLAG_RD, &adapter->stats.gorc,
5839  			"Good Octets Received");
5840 
5841 	/* Packet Transmission Stats */
5842  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5843  			CTLFLAG_RD, &adapter->stats.gotc,
5844  			"Good Octets Transmitted");
5845 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5846 			CTLFLAG_RD, &adapter->stats.tpt,
5847 			"Total Packets Transmitted");
5848 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5849 			CTLFLAG_RD, &adapter->stats.gptc,
5850 			"Good Packets Transmitted");
5851 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5852 			CTLFLAG_RD, &adapter->stats.bptc,
5853 			"Broadcast Packets Transmitted");
5854 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5855 			CTLFLAG_RD, &adapter->stats.mptc,
5856 			"Multicast Packets Transmitted");
5857 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5858 			CTLFLAG_RD, &adapter->stats.ptc64,
5859 			"64 byte frames transmitted ");
5860 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5861 			CTLFLAG_RD, &adapter->stats.ptc127,
5862 			"65-127 byte frames transmitted");
5863 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5864 			CTLFLAG_RD, &adapter->stats.ptc255,
5865 			"128-255 byte frames transmitted");
5866 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5867 			CTLFLAG_RD, &adapter->stats.ptc511,
5868 			"256-511 byte frames transmitted");
5869 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5870 			CTLFLAG_RD, &adapter->stats.ptc1023,
5871 			"512-1023 byte frames transmitted");
5872 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5873 			CTLFLAG_RD, &adapter->stats.ptc1522,
5874 			"1024-1522 byte frames transmitted");
5875 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5876 			CTLFLAG_RD, &adapter->stats.tsctc,
5877 			"TSO Contexts Transmitted");
5878 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5879 			CTLFLAG_RD, &adapter->stats.tsctfc,
5880 			"TSO Contexts Failed");
5881 
5882 
5883 	/* Interrupt Stats */
5884 
5885 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5886 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5887 	int_list = SYSCTL_CHILDREN(int_node);
5888 
5889 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5890 			CTLFLAG_RD, &adapter->stats.iac,
5891 			"Interrupt Assertion Count");
5892 
5893 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5894 			CTLFLAG_RD, &adapter->stats.icrxptc,
5895 			"Interrupt Cause Rx Pkt Timer Expire Count");
5896 
5897 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5898 			CTLFLAG_RD, &adapter->stats.icrxatc,
5899 			"Interrupt Cause Rx Abs Timer Expire Count");
5900 
5901 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5902 			CTLFLAG_RD, &adapter->stats.ictxptc,
5903 			"Interrupt Cause Tx Pkt Timer Expire Count");
5904 
5905 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5906 			CTLFLAG_RD, &adapter->stats.ictxatc,
5907 			"Interrupt Cause Tx Abs Timer Expire Count");
5908 
5909 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5910 			CTLFLAG_RD, &adapter->stats.ictxqec,
5911 			"Interrupt Cause Tx Queue Empty Count");
5912 
5913 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5914 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5915 			"Interrupt Cause Tx Queue Min Thresh Count");
5916 
5917 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5918 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5919 			"Interrupt Cause Rx Desc Min Thresh Count");
5920 
5921 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5922 			CTLFLAG_RD, &adapter->stats.icrxoc,
5923 			"Interrupt Cause Receiver Overrun Count");
5924 }
5925 
5926 /**********************************************************************
5927  *
5928  *  This routine provides a way to dump out the adapter eeprom,
5929  *  often a useful debug/service tool. This only dumps the first
5930  *  32 words, stuff that matters is in that extent.
5931  *
5932  **********************************************************************/
5933 static int
5934 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5935 {
5936 	struct adapter *adapter = (struct adapter *)arg1;
5937 	int error;
5938 	int result;
5939 
5940 	result = -1;
5941 	error = sysctl_handle_int(oidp, &result, 0, req);
5942 
5943 	if (error || !req->newptr)
5944 		return (error);
5945 
5946 	/*
5947 	 * This value will cause a hex dump of the
5948 	 * first 32 16-bit words of the EEPROM to
5949 	 * the screen.
5950 	 */
5951 	if (result == 1)
5952 		em_print_nvm_info(adapter);
5953 
5954 	return (error);
5955 }
5956 
5957 static void
5958 em_print_nvm_info(struct adapter *adapter)
5959 {
5960 	u16	eeprom_data;
5961 	int	i, j, row = 0;
5962 
5963 	/* Its a bit crude, but it gets the job done */
5964 	printf("\nInterface EEPROM Dump:\n");
5965 	printf("Offset\n0x0000  ");
5966 	for (i = 0, j = 0; i < 32; i++, j++) {
5967 		if (j == 8) { /* Make the offset block */
5968 			j = 0; ++row;
5969 			printf("\n0x00%x0  ",row);
5970 		}
5971 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5972 		printf("%04x ", eeprom_data);
5973 	}
5974 	printf("\n");
5975 }
5976 
5977 static int
5978 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5979 {
5980 	struct em_int_delay_info *info;
5981 	struct adapter *adapter;
5982 	u32 regval;
5983 	int error, usecs, ticks;
5984 
5985 	info = (struct em_int_delay_info *)arg1;
5986 	usecs = info->value;
5987 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5988 	if (error != 0 || req->newptr == NULL)
5989 		return (error);
5990 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5991 		return (EINVAL);
5992 	info->value = usecs;
5993 	ticks = EM_USECS_TO_TICKS(usecs);
5994 	if (info->offset == E1000_ITR)	/* units are 256ns here */
5995 		ticks *= 4;
5996 
5997 	adapter = info->adapter;
5998 
5999 	EM_CORE_LOCK(adapter);
6000 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
6001 	regval = (regval & ~0xffff) | (ticks & 0xffff);
6002 	/* Handle a few special cases. */
6003 	switch (info->offset) {
6004 	case E1000_RDTR:
6005 		break;
6006 	case E1000_TIDV:
6007 		if (ticks == 0) {
6008 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
6009 			/* Don't write 0 into the TIDV register. */
6010 			regval++;
6011 		} else
6012 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
6013 		break;
6014 	}
6015 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
6016 	EM_CORE_UNLOCK(adapter);
6017 	return (0);
6018 }
6019 
6020 static void
6021 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
6022 	const char *description, struct em_int_delay_info *info,
6023 	int offset, int value)
6024 {
6025 	info->adapter = adapter;
6026 	info->offset = offset;
6027 	info->value = value;
6028 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
6029 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6030 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
6031 	    info, 0, em_sysctl_int_delay, "I", description);
6032 }
6033 
6034 static void
6035 em_set_sysctl_value(struct adapter *adapter, const char *name,
6036 	const char *description, int *limit, int value)
6037 {
6038 	*limit = value;
6039 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6040 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6041 	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6042 }
6043 
6044 
6045 /*
6046 ** Set flow control using sysctl:
6047 ** Flow control values:
6048 **      0 - off
6049 **      1 - rx pause
6050 **      2 - tx pause
6051 **      3 - full
6052 */
6053 static int
6054 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
6055 {
6056         int		error;
6057 	static int	input = 3; /* default is full */
6058         struct adapter	*adapter = (struct adapter *) arg1;
6059 
6060         error = sysctl_handle_int(oidp, &input, 0, req);
6061 
6062         if ((error) || (req->newptr == NULL))
6063                 return (error);
6064 
6065 	if (input == adapter->fc) /* no change? */
6066 		return (error);
6067 
6068         switch (input) {
6069                 case e1000_fc_rx_pause:
6070                 case e1000_fc_tx_pause:
6071                 case e1000_fc_full:
6072                 case e1000_fc_none:
6073                         adapter->hw.fc.requested_mode = input;
6074 			adapter->fc = input;
6075                         break;
6076                 default:
6077 			/* Do nothing */
6078 			return (error);
6079         }
6080 
6081         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6082         e1000_force_mac_fc(&adapter->hw);
6083         return (error);
6084 }
6085 
6086 /*
6087 ** Manage Energy Efficient Ethernet:
6088 ** Control values:
6089 **     0/1 - enabled/disabled
6090 */
6091 static int
6092 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
6093 {
6094        struct adapter *adapter = (struct adapter *) arg1;
6095        int             error, value;
6096 
6097        value = adapter->hw.dev_spec.ich8lan.eee_disable;
6098        error = sysctl_handle_int(oidp, &value, 0, req);
6099        if (error || req->newptr == NULL)
6100                return (error);
6101        EM_CORE_LOCK(adapter);
6102        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
6103        em_init_locked(adapter);
6104        EM_CORE_UNLOCK(adapter);
6105        return (0);
6106 }
6107 
6108 static int
6109 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
6110 {
6111 	struct adapter *adapter;
6112 	int error;
6113 	int result;
6114 
6115 	result = -1;
6116 	error = sysctl_handle_int(oidp, &result, 0, req);
6117 
6118 	if (error || !req->newptr)
6119 		return (error);
6120 
6121 	if (result == 1) {
6122 		adapter = (struct adapter *)arg1;
6123 		em_print_debug_info(adapter);
6124         }
6125 
6126 	return (error);
6127 }
6128 
6129 /*
6130 ** This routine is meant to be fluid, add whatever is
6131 ** needed for debugging a problem.  -jfv
6132 */
6133 static void
6134 em_print_debug_info(struct adapter *adapter)
6135 {
6136 	device_t dev = adapter->dev;
6137 	struct tx_ring *txr = adapter->tx_rings;
6138 	struct rx_ring *rxr = adapter->rx_rings;
6139 
6140 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)
6141 		printf("Interface is RUNNING ");
6142 	else
6143 		printf("Interface is NOT RUNNING\n");
6144 
6145 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE)
6146 		printf("and INACTIVE\n");
6147 	else
6148 		printf("and ACTIVE\n");
6149 
6150 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
6151 		device_printf(dev, "TX Queue %d ------\n", i);
6152 		device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
6153 	    		E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
6154 	    		E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
6155 		device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
6156 		device_printf(dev, "TX descriptors avail = %d\n",
6157 	    		txr->tx_avail);
6158 		device_printf(dev, "Tx Descriptors avail failure = %ld\n",
6159 	    		txr->no_desc_avail);
6160 		device_printf(dev, "RX Queue %d ------\n", i);
6161 		device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
6162 	    		E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
6163 	    		E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
6164 		device_printf(dev, "RX discarded packets = %ld\n",
6165 	    		rxr->rx_discarded);
6166 		device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
6167 		device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
6168 	}
6169 }
6170 
6171 #ifdef EM_MULTIQUEUE
6172 /*
6173  * 82574 only:
6174  * Write a new value to the EEPROM increasing the number of MSIX
6175  * vectors from 3 to 5, for proper multiqueue support.
6176  */
6177 static void
6178 em_enable_vectors_82574(struct adapter *adapter)
6179 {
6180 	struct e1000_hw *hw = &adapter->hw;
6181 	device_t dev = adapter->dev;
6182 	u16 edata;
6183 
6184 	e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6185 	printf("Current cap: %#06x\n", edata);
6186 	if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
6187 		device_printf(dev, "Writing to eeprom: increasing "
6188 		    "reported MSIX vectors from 3 to 5...\n");
6189 		edata &= ~(EM_NVM_MSIX_N_MASK);
6190 		edata |= 4 << EM_NVM_MSIX_N_SHIFT;
6191 		e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6192 		e1000_update_nvm_checksum(hw);
6193 		device_printf(dev, "Writing to eeprom: done\n");
6194 	}
6195 }
6196 #endif
6197 
6198 #ifdef DDB
6199 DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
6200 {
6201 	devclass_t	dc;
6202 	int max_em;
6203 
6204 	dc = devclass_find("em");
6205 	max_em = devclass_get_maxunit(dc);
6206 
6207 	for (int index = 0; index < (max_em - 1); index++) {
6208 		device_t dev;
6209 		dev = devclass_get_device(dc, index);
6210 		if (device_get_driver(dev) == &em_driver) {
6211 			struct adapter *adapter = device_get_softc(dev);
6212 			EM_CORE_LOCK(adapter);
6213 			em_init_locked(adapter);
6214 			EM_CORE_UNLOCK(adapter);
6215 		}
6216 	}
6217 }
6218 DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6219 {
6220 	devclass_t	dc;
6221 	int max_em;
6222 
6223 	dc = devclass_find("em");
6224 	max_em = devclass_get_maxunit(dc);
6225 
6226 	for (int index = 0; index < (max_em - 1); index++) {
6227 		device_t dev;
6228 		dev = devclass_get_device(dc, index);
6229 		if (device_get_driver(dev) == &em_driver)
6230 			em_print_debug_info(device_get_softc(dev));
6231 	}
6232 
6233 }
6234 #endif
6235