xref: /freebsd/sys/dev/e1000/if_em.c (revision 1c324569538099af34419e6818fbfde4779efd33)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2015, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #include "opt_em.h"
36 #include "opt_ddb.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #endif
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #ifdef DDB
47 #include <sys/types.h>
48 #include <ddb/ddb.h>
49 #endif
50 #if __FreeBSD_version >= 800000
51 #include <sys/buf_ring.h>
52 #endif
53 #include <sys/bus.h>
54 #include <sys/endian.h>
55 #include <sys/kernel.h>
56 #include <sys/kthread.h>
57 #include <sys/malloc.h>
58 #include <sys/mbuf.h>
59 #include <sys/module.h>
60 #include <sys/rman.h>
61 #include <sys/smp.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/sysctl.h>
65 #include <sys/taskqueue.h>
66 #include <sys/eventhandler.h>
67 #include <machine/bus.h>
68 #include <machine/resource.h>
69 
70 #include <net/bpf.h>
71 #include <net/ethernet.h>
72 #include <net/if.h>
73 #include <net/if_var.h>
74 #include <net/if_arp.h>
75 #include <net/if_dl.h>
76 #include <net/if_media.h>
77 
78 #include <net/if_types.h>
79 #include <net/if_vlan_var.h>
80 
81 #include <netinet/in_systm.h>
82 #include <netinet/in.h>
83 #include <netinet/if_ether.h>
84 #include <netinet/ip.h>
85 #include <netinet/ip6.h>
86 #include <netinet/tcp.h>
87 #include <netinet/udp.h>
88 
89 #include <machine/in_cksum.h>
90 #include <dev/led/led.h>
91 #include <dev/pci/pcivar.h>
92 #include <dev/pci/pcireg.h>
93 
94 #include "e1000_api.h"
95 #include "e1000_82571.h"
96 #include "if_em.h"
97 
98 /*********************************************************************
99  *  Driver version:
100  *********************************************************************/
101 char em_driver_version[] = "7.6.1-k";
102 
103 /*********************************************************************
104  *  PCI Device ID Table
105  *
106  *  Used by probe to select devices to load on
107  *  Last field stores an index into e1000_strings
108  *  Last entry must be all 0s
109  *
110  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
111  *********************************************************************/
112 
113 static em_vendor_info_t em_vendor_info_array[] =
114 {
115 	/* Intel(R) PRO/1000 Network Connection */
116 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
117 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
118 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
119 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
120 						PCI_ANY_ID, PCI_ANY_ID, 0},
121 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
122 						PCI_ANY_ID, PCI_ANY_ID, 0},
123 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
124 						PCI_ANY_ID, PCI_ANY_ID, 0},
125 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
126 						PCI_ANY_ID, PCI_ANY_ID, 0},
127 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
128 						PCI_ANY_ID, PCI_ANY_ID, 0},
129 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
130 						PCI_ANY_ID, PCI_ANY_ID, 0},
131 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
132 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
134 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
135 
136 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
137 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
138 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
139 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
140 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
141 						PCI_ANY_ID, PCI_ANY_ID, 0},
142 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
143 						PCI_ANY_ID, PCI_ANY_ID, 0},
144 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
145 						PCI_ANY_ID, PCI_ANY_ID, 0},
146 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
147 						PCI_ANY_ID, PCI_ANY_ID, 0},
148 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
150 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
152 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
175 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
176 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
177 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
178 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
179 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
180 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
181 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
182 						PCI_ANY_ID, PCI_ANY_ID, 0},
183 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
184 						PCI_ANY_ID, PCI_ANY_ID, 0},
185 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM2,	PCI_ANY_ID, PCI_ANY_ID, 0},
186 	{ 0x8086, E1000_DEV_ID_PCH_I218_V2,	PCI_ANY_ID, PCI_ANY_ID, 0},
187 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM3,	PCI_ANY_ID, PCI_ANY_ID, 0},
188 	{ 0x8086, E1000_DEV_ID_PCH_I218_V3,	PCI_ANY_ID, PCI_ANY_ID, 0},
189 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
190 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
191 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2,
192                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
193 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
194 	{ 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3,
195 						PCI_ANY_ID, PCI_ANY_ID, 0},
196 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
197 						PCI_ANY_ID, PCI_ANY_ID, 0},
198 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
199 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
200 						PCI_ANY_ID, PCI_ANY_ID, 0},
201 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
202 	/* required last entry */
203 	{ 0, 0, 0, 0, 0}
204 };
205 
206 /*********************************************************************
207  *  Table of branding strings for all supported NICs.
208  *********************************************************************/
209 
210 static char *em_strings[] = {
211 	"Intel(R) PRO/1000 Network Connection"
212 };
213 
214 /*********************************************************************
215  *  Function prototypes
216  *********************************************************************/
217 static int	em_probe(device_t);
218 static int	em_attach(device_t);
219 static int	em_detach(device_t);
220 static int	em_shutdown(device_t);
221 static int	em_suspend(device_t);
222 static int	em_resume(device_t);
223 #ifdef EM_MULTIQUEUE
224 static int	em_mq_start(if_t, struct mbuf *);
225 static int	em_mq_start_locked(if_t,
226 		    struct tx_ring *);
227 static void	em_qflush(if_t);
228 #else
229 static void	em_start(if_t);
230 static void	em_start_locked(if_t, struct tx_ring *);
231 #endif
232 static int	em_ioctl(if_t, u_long, caddr_t);
233 static uint64_t	em_get_counter(if_t, ift_counter);
234 static void	em_init(void *);
235 static void	em_init_locked(struct adapter *);
236 static void	em_stop(void *);
237 static void	em_media_status(if_t, struct ifmediareq *);
238 static int	em_media_change(if_t);
239 static void	em_identify_hardware(struct adapter *);
240 static int	em_allocate_pci_resources(struct adapter *);
241 static int	em_allocate_legacy(struct adapter *);
242 static int	em_allocate_msix(struct adapter *);
243 static int	em_allocate_queues(struct adapter *);
244 static int	em_setup_msix(struct adapter *);
245 static void	em_free_pci_resources(struct adapter *);
246 static void	em_local_timer(void *);
247 static void	em_reset(struct adapter *);
248 static int	em_setup_interface(device_t, struct adapter *);
249 static void	em_flush_desc_rings(struct adapter *);
250 
251 static void	em_setup_transmit_structures(struct adapter *);
252 static void	em_initialize_transmit_unit(struct adapter *);
253 static int	em_allocate_transmit_buffers(struct tx_ring *);
254 static void	em_free_transmit_structures(struct adapter *);
255 static void	em_free_transmit_buffers(struct tx_ring *);
256 
257 static int	em_setup_receive_structures(struct adapter *);
258 static int	em_allocate_receive_buffers(struct rx_ring *);
259 static void	em_initialize_receive_unit(struct adapter *);
260 static void	em_free_receive_structures(struct adapter *);
261 static void	em_free_receive_buffers(struct rx_ring *);
262 
263 static void	em_enable_intr(struct adapter *);
264 static void	em_disable_intr(struct adapter *);
265 static void	em_update_stats_counters(struct adapter *);
266 static void	em_add_hw_stats(struct adapter *adapter);
267 static void	em_txeof(struct tx_ring *);
268 static bool	em_rxeof(struct rx_ring *, int, int *);
269 #ifndef __NO_STRICT_ALIGNMENT
270 static int	em_fixup_rx(struct rx_ring *);
271 #endif
272 static void	em_setup_rxdesc(union e1000_rx_desc_extended *,
273 		    const struct em_rxbuffer *rxbuf);
274 static void	em_receive_checksum(uint32_t status, struct mbuf *);
275 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
276 		    struct ip *, u32 *, u32 *);
277 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
278 		    struct tcphdr *, u32 *, u32 *);
279 static void	em_set_promisc(struct adapter *);
280 static void	em_disable_promisc(struct adapter *);
281 static void	em_set_multi(struct adapter *);
282 static void	em_update_link_status(struct adapter *);
283 static void	em_refresh_mbufs(struct rx_ring *, int);
284 static void	em_register_vlan(void *, if_t, u16);
285 static void	em_unregister_vlan(void *, if_t, u16);
286 static void	em_setup_vlan_hw_support(struct adapter *);
287 static int	em_xmit(struct tx_ring *, struct mbuf **);
288 static int	em_dma_malloc(struct adapter *, bus_size_t,
289 		    struct em_dma_alloc *, int);
290 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
291 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
292 static void	em_print_nvm_info(struct adapter *);
293 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
294 static void	em_print_debug_info(struct adapter *);
295 static int 	em_is_valid_ether_addr(u8 *);
296 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
297 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
298 		    const char *, struct em_int_delay_info *, int, int);
299 /* Management and WOL Support */
300 static void	em_init_manageability(struct adapter *);
301 static void	em_release_manageability(struct adapter *);
302 static void     em_get_hw_control(struct adapter *);
303 static void     em_release_hw_control(struct adapter *);
304 static void	em_get_wakeup(device_t);
305 static void     em_enable_wakeup(device_t);
306 static int	em_enable_phy_wakeup(struct adapter *);
307 static void	em_led_func(void *, int);
308 static void	em_disable_aspm(struct adapter *);
309 
310 static int	em_irq_fast(void *);
311 
312 /* MSIX handlers */
313 static void	em_msix_tx(void *);
314 static void	em_msix_rx(void *);
315 static void	em_msix_link(void *);
316 static void	em_handle_tx(void *context, int pending);
317 static void	em_handle_rx(void *context, int pending);
318 static void	em_handle_link(void *context, int pending);
319 
320 #ifdef EM_MULTIQUEUE
321 static void	em_enable_vectors_82574(struct adapter *);
322 #endif
323 
324 static void	em_set_sysctl_value(struct adapter *, const char *,
325 		    const char *, int *, int);
326 static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
327 static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
328 
329 static __inline void em_rx_discard(struct rx_ring *, int);
330 
331 #ifdef DEVICE_POLLING
332 static poll_handler_t em_poll;
333 #endif /* POLLING */
334 
335 /*********************************************************************
336  *  FreeBSD Device Interface Entry Points
337  *********************************************************************/
338 
339 static device_method_t em_methods[] = {
340 	/* Device interface */
341 	DEVMETHOD(device_probe, em_probe),
342 	DEVMETHOD(device_attach, em_attach),
343 	DEVMETHOD(device_detach, em_detach),
344 	DEVMETHOD(device_shutdown, em_shutdown),
345 	DEVMETHOD(device_suspend, em_suspend),
346 	DEVMETHOD(device_resume, em_resume),
347 	DEVMETHOD_END
348 };
349 
350 static driver_t em_driver = {
351 	"em", em_methods, sizeof(struct adapter),
352 };
353 
354 devclass_t em_devclass;
355 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
356 MODULE_DEPEND(em, pci, 1, 1, 1);
357 MODULE_DEPEND(em, ether, 1, 1, 1);
358 #ifdef DEV_NETMAP
359 MODULE_DEPEND(em, netmap, 1, 1, 1);
360 #endif /* DEV_NETMAP */
361 
362 /*********************************************************************
363  *  Tunable default values.
364  *********************************************************************/
365 
366 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
367 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
368 
369 #define MAX_INTS_PER_SEC	8000
370 #define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
371 
372 #define TSO_WORKAROUND	4
373 
374 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
375 
376 static int em_disable_crc_stripping = 0;
377 SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
378     &em_disable_crc_stripping, 0, "Disable CRC Stripping");
379 
380 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
381 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
382 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
383     0, "Default transmit interrupt delay in usecs");
384 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
385     0, "Default receive interrupt delay in usecs");
386 
387 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
388 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
389 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
390     &em_tx_abs_int_delay_dflt, 0,
391     "Default transmit interrupt delay limit in usecs");
392 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
393     &em_rx_abs_int_delay_dflt, 0,
394     "Default receive interrupt delay limit in usecs");
395 
396 static int em_rxd = EM_DEFAULT_RXD;
397 static int em_txd = EM_DEFAULT_TXD;
398 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
399     "Number of receive descriptors per queue");
400 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
401     "Number of transmit descriptors per queue");
402 
403 static int em_smart_pwr_down = FALSE;
404 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
405     0, "Set to true to leave smart power down enabled on newer adapters");
406 
407 /* Controls whether promiscuous also shows bad packets */
408 static int em_debug_sbp = FALSE;
409 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
410     "Show bad packets in promiscuous mode");
411 
412 static int em_enable_msix = TRUE;
413 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
414     "Enable MSI-X interrupts");
415 
416 #ifdef EM_MULTIQUEUE
417 static int em_num_queues = 1;
418 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
419     "82574 only: Number of queues to configure, 0 indicates autoconfigure");
420 #endif
421 
422 /*
423 ** Global variable to store last used CPU when binding queues
424 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
425 ** queue is bound to a cpu.
426 */
427 static int em_last_bind_cpu = -1;
428 
429 /* How many packets rxeof tries to clean at a time */
430 static int em_rx_process_limit = 100;
431 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
432     &em_rx_process_limit, 0,
433     "Maximum number of received packets to process "
434     "at a time, -1 means unlimited");
435 
436 /* Energy efficient ethernet - default to OFF */
437 static int eee_setting = 1;
438 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
439     "Enable Energy Efficient Ethernet");
440 
441 /* Global used in WOL setup with multiport cards */
442 static int global_quad_port_a = 0;
443 
444 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
445 #include <dev/netmap/if_em_netmap.h>
446 #endif /* DEV_NETMAP */
447 
448 /*********************************************************************
449  *  Device identification routine
450  *
451  *  em_probe determines if the driver should be loaded on
452  *  adapter based on PCI vendor/device id of the adapter.
453  *
454  *  return BUS_PROBE_DEFAULT on success, positive on failure
455  *********************************************************************/
456 
457 static int
458 em_probe(device_t dev)
459 {
460 	char		adapter_name[60];
461 	uint16_t	pci_vendor_id = 0;
462 	uint16_t	pci_device_id = 0;
463 	uint16_t	pci_subvendor_id = 0;
464 	uint16_t	pci_subdevice_id = 0;
465 	em_vendor_info_t *ent;
466 
467 	INIT_DEBUGOUT("em_probe: begin");
468 
469 	pci_vendor_id = pci_get_vendor(dev);
470 	if (pci_vendor_id != EM_VENDOR_ID)
471 		return (ENXIO);
472 
473 	pci_device_id = pci_get_device(dev);
474 	pci_subvendor_id = pci_get_subvendor(dev);
475 	pci_subdevice_id = pci_get_subdevice(dev);
476 
477 	ent = em_vendor_info_array;
478 	while (ent->vendor_id != 0) {
479 		if ((pci_vendor_id == ent->vendor_id) &&
480 		    (pci_device_id == ent->device_id) &&
481 
482 		    ((pci_subvendor_id == ent->subvendor_id) ||
483 		    (ent->subvendor_id == PCI_ANY_ID)) &&
484 
485 		    ((pci_subdevice_id == ent->subdevice_id) ||
486 		    (ent->subdevice_id == PCI_ANY_ID))) {
487 			sprintf(adapter_name, "%s %s",
488 				em_strings[ent->index],
489 				em_driver_version);
490 			device_set_desc_copy(dev, adapter_name);
491 			return (BUS_PROBE_DEFAULT);
492 		}
493 		ent++;
494 	}
495 
496 	return (ENXIO);
497 }
498 
499 /*********************************************************************
500  *  Device initialization routine
501  *
502  *  The attach entry point is called when the driver is being loaded.
503  *  This routine identifies the type of hardware, allocates all resources
504  *  and initializes the hardware.
505  *
506  *  return 0 on success, positive on failure
507  *********************************************************************/
508 
509 static int
510 em_attach(device_t dev)
511 {
512 	struct adapter	*adapter;
513 	struct e1000_hw	*hw;
514 	int		error = 0;
515 
516 	INIT_DEBUGOUT("em_attach: begin");
517 
518 	if (resource_disabled("em", device_get_unit(dev))) {
519 		device_printf(dev, "Disabled by device hint\n");
520 		return (ENXIO);
521 	}
522 
523 	adapter = device_get_softc(dev);
524 	adapter->dev = adapter->osdep.dev = dev;
525 	hw = &adapter->hw;
526 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
527 
528 	/* SYSCTL stuff */
529 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
530 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
531 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
532 	    em_sysctl_nvm_info, "I", "NVM Information");
533 
534 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
535 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
536 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
537 	    em_sysctl_debug_info, "I", "Debug Information");
538 
539 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
540 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
541 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
542 	    em_set_flowcntl, "I", "Flow Control");
543 
544 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
545 
546 	/* Determine hardware and mac info */
547 	em_identify_hardware(adapter);
548 
549 	/* Setup PCI resources */
550 	if (em_allocate_pci_resources(adapter)) {
551 		device_printf(dev, "Allocation of PCI resources failed\n");
552 		error = ENXIO;
553 		goto err_pci;
554 	}
555 
556 	/*
557 	** For ICH8 and family we need to
558 	** map the flash memory, and this
559 	** must happen after the MAC is
560 	** identified
561 	*/
562 	if ((hw->mac.type == e1000_ich8lan) ||
563 	    (hw->mac.type == e1000_ich9lan) ||
564 	    (hw->mac.type == e1000_ich10lan) ||
565 	    (hw->mac.type == e1000_pchlan) ||
566 	    (hw->mac.type == e1000_pch2lan) ||
567 	    (hw->mac.type == e1000_pch_lpt)) {
568 		int rid = EM_BAR_TYPE_FLASH;
569 		adapter->flash = bus_alloc_resource_any(dev,
570 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
571 		if (adapter->flash == NULL) {
572 			device_printf(dev, "Mapping of Flash failed\n");
573 			error = ENXIO;
574 			goto err_pci;
575 		}
576 		/* This is used in the shared code */
577 		hw->flash_address = (u8 *)adapter->flash;
578 		adapter->osdep.flash_bus_space_tag =
579 		    rman_get_bustag(adapter->flash);
580 		adapter->osdep.flash_bus_space_handle =
581 		    rman_get_bushandle(adapter->flash);
582 	}
583 	/*
584 	** In the new SPT device flash is not  a
585 	** separate BAR, rather it is also in BAR0,
586 	** so use the same tag and an offset handle for the
587 	** FLASH read/write macros in the shared code.
588 	*/
589 	else if (hw->mac.type == e1000_pch_spt) {
590 		adapter->osdep.flash_bus_space_tag =
591 		    adapter->osdep.mem_bus_space_tag;
592 		adapter->osdep.flash_bus_space_handle =
593 		    adapter->osdep.mem_bus_space_handle
594 		    + E1000_FLASH_BASE_ADDR;
595 	}
596 
597 	/* Do Shared Code initialization */
598 	error = e1000_setup_init_funcs(hw, TRUE);
599 	if (error) {
600 		device_printf(dev, "Setup of Shared code failed, error %d\n",
601 		    error);
602 		error = ENXIO;
603 		goto err_pci;
604 	}
605 
606 	/*
607 	 * Setup MSI/X or MSI if PCI Express
608 	 */
609 	adapter->msix = em_setup_msix(adapter);
610 
611 	e1000_get_bus_info(hw);
612 
613 	/* Set up some sysctls for the tunable interrupt delays */
614 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
615 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
616 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
617 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
618 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
619 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
620 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
621 	    "receive interrupt delay limit in usecs",
622 	    &adapter->rx_abs_int_delay,
623 	    E1000_REGISTER(hw, E1000_RADV),
624 	    em_rx_abs_int_delay_dflt);
625 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
626 	    "transmit interrupt delay limit in usecs",
627 	    &adapter->tx_abs_int_delay,
628 	    E1000_REGISTER(hw, E1000_TADV),
629 	    em_tx_abs_int_delay_dflt);
630 	em_add_int_delay_sysctl(adapter, "itr",
631 	    "interrupt delay limit in usecs/4",
632 	    &adapter->tx_itr,
633 	    E1000_REGISTER(hw, E1000_ITR),
634 	    DEFAULT_ITR);
635 
636 	/* Sysctl for limiting the amount of work done in the taskqueue */
637 	em_set_sysctl_value(adapter, "rx_processing_limit",
638 	    "max number of rx packets to process", &adapter->rx_process_limit,
639 	    em_rx_process_limit);
640 
641 	/*
642 	 * Validate number of transmit and receive descriptors. It
643 	 * must not exceed hardware maximum, and must be multiple
644 	 * of E1000_DBA_ALIGN.
645 	 */
646 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
647 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
648 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
649 		    EM_DEFAULT_TXD, em_txd);
650 		adapter->num_tx_desc = EM_DEFAULT_TXD;
651 	} else
652 		adapter->num_tx_desc = em_txd;
653 
654 	if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 ||
655 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
656 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
657 		    EM_DEFAULT_RXD, em_rxd);
658 		adapter->num_rx_desc = EM_DEFAULT_RXD;
659 	} else
660 		adapter->num_rx_desc = em_rxd;
661 
662 	hw->mac.autoneg = DO_AUTO_NEG;
663 	hw->phy.autoneg_wait_to_complete = FALSE;
664 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
665 
666 	/* Copper options */
667 	if (hw->phy.media_type == e1000_media_type_copper) {
668 		hw->phy.mdix = AUTO_ALL_MODES;
669 		hw->phy.disable_polarity_correction = FALSE;
670 		hw->phy.ms_type = EM_MASTER_SLAVE;
671 	}
672 
673 	/*
674 	 * Set the frame limits assuming
675 	 * standard ethernet sized frames.
676 	 */
677 	adapter->hw.mac.max_frame_size =
678 	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
679 
680 	/*
681 	 * This controls when hardware reports transmit completion
682 	 * status.
683 	 */
684 	hw->mac.report_tx_early = 1;
685 
686 	/*
687 	** Get queue/ring memory
688 	*/
689 	if (em_allocate_queues(adapter)) {
690 		error = ENOMEM;
691 		goto err_pci;
692 	}
693 
694 	/* Allocate multicast array memory. */
695 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
696 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
697 	if (adapter->mta == NULL) {
698 		device_printf(dev, "Can not allocate multicast setup array\n");
699 		error = ENOMEM;
700 		goto err_late;
701 	}
702 
703 	/* Check SOL/IDER usage */
704 	if (e1000_check_reset_block(hw))
705 		device_printf(dev, "PHY reset is blocked"
706 		    " due to SOL/IDER session.\n");
707 
708 	/* Sysctl for setting Energy Efficient Ethernet */
709 	hw->dev_spec.ich8lan.eee_disable = eee_setting;
710 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
711 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
712 	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
713 	    adapter, 0, em_sysctl_eee, "I",
714 	    "Disable Energy Efficient Ethernet");
715 
716 	/*
717 	** Start from a known state, this is
718 	** important in reading the nvm and
719 	** mac from that.
720 	*/
721 	e1000_reset_hw(hw);
722 
723 
724 	/* Make sure we have a good EEPROM before we read from it */
725 	if (e1000_validate_nvm_checksum(hw) < 0) {
726 		/*
727 		** Some PCI-E parts fail the first check due to
728 		** the link being in sleep state, call it again,
729 		** if it fails a second time its a real issue.
730 		*/
731 		if (e1000_validate_nvm_checksum(hw) < 0) {
732 			device_printf(dev,
733 			    "The EEPROM Checksum Is Not Valid\n");
734 			error = EIO;
735 			goto err_late;
736 		}
737 	}
738 
739 	/* Copy the permanent MAC address out of the EEPROM */
740 	if (e1000_read_mac_addr(hw) < 0) {
741 		device_printf(dev, "EEPROM read error while reading MAC"
742 		    " address\n");
743 		error = EIO;
744 		goto err_late;
745 	}
746 
747 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
748 		device_printf(dev, "Invalid MAC address\n");
749 		error = EIO;
750 		goto err_late;
751 	}
752 
753 	/* Disable ULP support */
754 	e1000_disable_ulp_lpt_lp(hw, TRUE);
755 
756 	/*
757 	**  Do interrupt configuration
758 	*/
759 	if (adapter->msix > 1) /* Do MSIX */
760 		error = em_allocate_msix(adapter);
761 	else  /* MSI or Legacy */
762 		error = em_allocate_legacy(adapter);
763 	if (error)
764 		goto err_late;
765 
766 	/*
767 	 * Get Wake-on-Lan and Management info for later use
768 	 */
769 	em_get_wakeup(dev);
770 
771 	/* Setup OS specific network interface */
772 	if (em_setup_interface(dev, adapter) != 0)
773 		goto err_late;
774 
775 	em_reset(adapter);
776 
777 	/* Initialize statistics */
778 	em_update_stats_counters(adapter);
779 
780 	hw->mac.get_link_status = 1;
781 	em_update_link_status(adapter);
782 
783 	/* Register for VLAN events */
784 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
785 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
786 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
787 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
788 
789 	em_add_hw_stats(adapter);
790 
791 	/* Non-AMT based hardware can now take control from firmware */
792 	if (adapter->has_manage && !adapter->has_amt)
793 		em_get_hw_control(adapter);
794 
795 	/* Tell the stack that the interface is not active */
796 	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
797 
798 	adapter->led_dev = led_create(em_led_func, adapter,
799 	    device_get_nameunit(dev));
800 #ifdef DEV_NETMAP
801 	em_netmap_attach(adapter);
802 #endif /* DEV_NETMAP */
803 
804 	INIT_DEBUGOUT("em_attach: end");
805 
806 	return (0);
807 
808 err_late:
809 	em_free_transmit_structures(adapter);
810 	em_free_receive_structures(adapter);
811 	em_release_hw_control(adapter);
812 	if (adapter->ifp != (void *)NULL)
813 		if_free(adapter->ifp);
814 err_pci:
815 	em_free_pci_resources(adapter);
816 	free(adapter->mta, M_DEVBUF);
817 	EM_CORE_LOCK_DESTROY(adapter);
818 
819 	return (error);
820 }
821 
822 /*********************************************************************
823  *  Device removal routine
824  *
825  *  The detach entry point is called when the driver is being removed.
826  *  This routine stops the adapter and deallocates all the resources
827  *  that were allocated for driver operation.
828  *
829  *  return 0 on success, positive on failure
830  *********************************************************************/
831 
832 static int
833 em_detach(device_t dev)
834 {
835 	struct adapter	*adapter = device_get_softc(dev);
836 	if_t ifp = adapter->ifp;
837 
838 	INIT_DEBUGOUT("em_detach: begin");
839 
840 	/* Make sure VLANS are not using driver */
841 	if (if_vlantrunkinuse(ifp)) {
842 		device_printf(dev,"Vlan in use, detach first\n");
843 		return (EBUSY);
844 	}
845 
846 #ifdef DEVICE_POLLING
847 	if (if_getcapenable(ifp) & IFCAP_POLLING)
848 		ether_poll_deregister(ifp);
849 #endif
850 
851 	if (adapter->led_dev != NULL)
852 		led_destroy(adapter->led_dev);
853 
854 	EM_CORE_LOCK(adapter);
855 	adapter->in_detach = 1;
856 	em_stop(adapter);
857 	EM_CORE_UNLOCK(adapter);
858 	EM_CORE_LOCK_DESTROY(adapter);
859 
860 	e1000_phy_hw_reset(&adapter->hw);
861 
862 	em_release_manageability(adapter);
863 	em_release_hw_control(adapter);
864 
865 	/* Unregister VLAN events */
866 	if (adapter->vlan_attach != NULL)
867 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
868 	if (adapter->vlan_detach != NULL)
869 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
870 
871 	ether_ifdetach(adapter->ifp);
872 	callout_drain(&adapter->timer);
873 
874 #ifdef DEV_NETMAP
875 	netmap_detach(ifp);
876 #endif /* DEV_NETMAP */
877 
878 	em_free_pci_resources(adapter);
879 	bus_generic_detach(dev);
880 	if_free(ifp);
881 
882 	em_free_transmit_structures(adapter);
883 	em_free_receive_structures(adapter);
884 
885 	em_release_hw_control(adapter);
886 	free(adapter->mta, M_DEVBUF);
887 
888 	return (0);
889 }
890 
891 /*********************************************************************
892  *
893  *  Shutdown entry point
894  *
895  **********************************************************************/
896 
897 static int
898 em_shutdown(device_t dev)
899 {
900 	return em_suspend(dev);
901 }
902 
903 /*
904  * Suspend/resume device methods.
905  */
906 static int
907 em_suspend(device_t dev)
908 {
909 	struct adapter *adapter = device_get_softc(dev);
910 
911 	EM_CORE_LOCK(adapter);
912 
913         em_release_manageability(adapter);
914 	em_release_hw_control(adapter);
915 	em_enable_wakeup(dev);
916 
917 	EM_CORE_UNLOCK(adapter);
918 
919 	return bus_generic_suspend(dev);
920 }
921 
922 static int
923 em_resume(device_t dev)
924 {
925 	struct adapter *adapter = device_get_softc(dev);
926 	struct tx_ring	*txr = adapter->tx_rings;
927 	if_t ifp = adapter->ifp;
928 
929 	EM_CORE_LOCK(adapter);
930 	if (adapter->hw.mac.type == e1000_pch2lan)
931 		e1000_resume_workarounds_pchlan(&adapter->hw);
932 	em_init_locked(adapter);
933 	em_init_manageability(adapter);
934 
935 	if ((if_getflags(ifp) & IFF_UP) &&
936 	    (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) {
937 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
938 			EM_TX_LOCK(txr);
939 #ifdef EM_MULTIQUEUE
940 			if (!drbr_empty(ifp, txr->br))
941 				em_mq_start_locked(ifp, txr);
942 #else
943 			if (!if_sendq_empty(ifp))
944 				em_start_locked(ifp, txr);
945 #endif
946 			EM_TX_UNLOCK(txr);
947 		}
948 	}
949 	EM_CORE_UNLOCK(adapter);
950 
951 	return bus_generic_resume(dev);
952 }
953 
954 
955 #ifndef EM_MULTIQUEUE
956 static void
957 em_start_locked(if_t ifp, struct tx_ring *txr)
958 {
959 	struct adapter	*adapter = if_getsoftc(ifp);
960 	struct mbuf	*m_head;
961 
962 	EM_TX_LOCK_ASSERT(txr);
963 
964 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
965 	    IFF_DRV_RUNNING)
966 		return;
967 
968 	if (!adapter->link_active)
969 		return;
970 
971 	while (!if_sendq_empty(ifp)) {
972         	/* Call cleanup if number of TX descriptors low */
973 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
974 			em_txeof(txr);
975 		if (txr->tx_avail < EM_MAX_SCATTER) {
976 			if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0);
977 			break;
978 		}
979 		m_head = if_dequeue(ifp);
980 		if (m_head == NULL)
981 			break;
982 		/*
983 		 *  Encapsulation can modify our pointer, and or make it
984 		 *  NULL on failure.  In that event, we can't requeue.
985 		 */
986 		if (em_xmit(txr, &m_head)) {
987 			if (m_head == NULL)
988 				break;
989 			if_sendq_prepend(ifp, m_head);
990 			break;
991 		}
992 
993 		/* Mark the queue as having work */
994 		if (txr->busy == EM_TX_IDLE)
995 			txr->busy = EM_TX_BUSY;
996 
997 		/* Send a copy of the frame to the BPF listener */
998 		ETHER_BPF_MTAP(ifp, m_head);
999 
1000 	}
1001 
1002 	return;
1003 }
1004 
1005 static void
1006 em_start(if_t ifp)
1007 {
1008 	struct adapter	*adapter = if_getsoftc(ifp);
1009 	struct tx_ring	*txr = adapter->tx_rings;
1010 
1011 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1012 		EM_TX_LOCK(txr);
1013 		em_start_locked(ifp, txr);
1014 		EM_TX_UNLOCK(txr);
1015 	}
1016 	return;
1017 }
1018 #else /* EM_MULTIQUEUE */
1019 /*********************************************************************
1020  *  Multiqueue Transmit routines
1021  *
1022  *  em_mq_start is called by the stack to initiate a transmit.
1023  *  however, if busy the driver can queue the request rather
1024  *  than do an immediate send. It is this that is an advantage
1025  *  in this driver, rather than also having multiple tx queues.
1026  **********************************************************************/
1027 /*
1028 ** Multiqueue capable stack interface
1029 */
1030 static int
1031 em_mq_start(if_t ifp, struct mbuf *m)
1032 {
1033 	struct adapter	*adapter = if_getsoftc(ifp);
1034 	struct tx_ring	*txr = adapter->tx_rings;
1035 	unsigned int	i, error;
1036 
1037 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1038 		i = m->m_pkthdr.flowid % adapter->num_queues;
1039 	else
1040 		i = curcpu % adapter->num_queues;
1041 
1042 	txr = &adapter->tx_rings[i];
1043 
1044 	error = drbr_enqueue(ifp, txr->br, m);
1045 	if (error)
1046 		return (error);
1047 
1048 	if (EM_TX_TRYLOCK(txr)) {
1049 		em_mq_start_locked(ifp, txr);
1050 		EM_TX_UNLOCK(txr);
1051 	} else
1052 		taskqueue_enqueue(txr->tq, &txr->tx_task);
1053 
1054 	return (0);
1055 }
1056 
1057 static int
1058 em_mq_start_locked(if_t ifp, struct tx_ring *txr)
1059 {
1060 	struct adapter  *adapter = txr->adapter;
1061         struct mbuf     *next;
1062         int             err = 0, enq = 0;
1063 
1064 	EM_TX_LOCK_ASSERT(txr);
1065 
1066 	if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) ||
1067 	    adapter->link_active == 0) {
1068 		return (ENETDOWN);
1069 	}
1070 
1071 	/* Process the queue */
1072 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1073 		if ((err = em_xmit(txr, &next)) != 0) {
1074 			if (next == NULL) {
1075 				/* It was freed, move forward */
1076 				drbr_advance(ifp, txr->br);
1077 			} else {
1078 				/*
1079 				 * Still have one left, it may not be
1080 				 * the same since the transmit function
1081 				 * may have changed it.
1082 				 */
1083 				drbr_putback(ifp, txr->br, next);
1084 			}
1085 			break;
1086 		}
1087 		drbr_advance(ifp, txr->br);
1088 		enq++;
1089 		if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
1090 		if (next->m_flags & M_MCAST)
1091 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
1092 		ETHER_BPF_MTAP(ifp, next);
1093 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
1094                         break;
1095 	}
1096 
1097 	/* Mark the queue as having work */
1098 	if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1099 		txr->busy = EM_TX_BUSY;
1100 
1101 	if (txr->tx_avail < EM_MAX_SCATTER)
1102 		em_txeof(txr);
1103 	if (txr->tx_avail < EM_MAX_SCATTER) {
1104 		if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
1105 	}
1106 	return (err);
1107 }
1108 
1109 /*
1110 ** Flush all ring buffers
1111 */
1112 static void
1113 em_qflush(if_t ifp)
1114 {
1115 	struct adapter  *adapter = if_getsoftc(ifp);
1116 	struct tx_ring  *txr = adapter->tx_rings;
1117 	struct mbuf     *m;
1118 
1119 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1120 		EM_TX_LOCK(txr);
1121 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1122 			m_freem(m);
1123 		EM_TX_UNLOCK(txr);
1124 	}
1125 	if_qflush(ifp);
1126 }
1127 #endif /* EM_MULTIQUEUE */
1128 
1129 /*********************************************************************
1130  *  Ioctl entry point
1131  *
1132  *  em_ioctl is called when the user wants to configure the
1133  *  interface.
1134  *
1135  *  return 0 on success, positive on failure
1136  **********************************************************************/
1137 
1138 static int
1139 em_ioctl(if_t ifp, u_long command, caddr_t data)
1140 {
1141 	struct adapter	*adapter = if_getsoftc(ifp);
1142 	struct ifreq	*ifr = (struct ifreq *)data;
1143 #if defined(INET) || defined(INET6)
1144 	struct ifaddr	*ifa = (struct ifaddr *)data;
1145 #endif
1146 	bool		avoid_reset = FALSE;
1147 	int		error = 0;
1148 
1149 	if (adapter->in_detach)
1150 		return (error);
1151 
1152 	switch (command) {
1153 	case SIOCSIFADDR:
1154 #ifdef INET
1155 		if (ifa->ifa_addr->sa_family == AF_INET)
1156 			avoid_reset = TRUE;
1157 #endif
1158 #ifdef INET6
1159 		if (ifa->ifa_addr->sa_family == AF_INET6)
1160 			avoid_reset = TRUE;
1161 #endif
1162 		/*
1163 		** Calling init results in link renegotiation,
1164 		** so we avoid doing it when possible.
1165 		*/
1166 		if (avoid_reset) {
1167 			if_setflagbits(ifp,IFF_UP,0);
1168 			if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
1169 				em_init(adapter);
1170 #ifdef INET
1171 			if (!(if_getflags(ifp) & IFF_NOARP))
1172 				arp_ifinit(ifp, ifa);
1173 #endif
1174 		} else
1175 			error = ether_ioctl(ifp, command, data);
1176 		break;
1177 	case SIOCSIFMTU:
1178 	    {
1179 		int max_frame_size;
1180 
1181 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1182 
1183 		EM_CORE_LOCK(adapter);
1184 		switch (adapter->hw.mac.type) {
1185 		case e1000_82571:
1186 		case e1000_82572:
1187 		case e1000_ich9lan:
1188 		case e1000_ich10lan:
1189 		case e1000_pch2lan:
1190 		case e1000_pch_lpt:
1191 		case e1000_pch_spt:
1192 		case e1000_82574:
1193 		case e1000_82583:
1194 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1195 			max_frame_size = 9234;
1196 			break;
1197 		case e1000_pchlan:
1198 			max_frame_size = 4096;
1199 			break;
1200 			/* Adapters that do not support jumbo frames */
1201 		case e1000_ich8lan:
1202 			max_frame_size = ETHER_MAX_LEN;
1203 			break;
1204 		default:
1205 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1206 		}
1207 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1208 		    ETHER_CRC_LEN) {
1209 			EM_CORE_UNLOCK(adapter);
1210 			error = EINVAL;
1211 			break;
1212 		}
1213 
1214 		if_setmtu(ifp, ifr->ifr_mtu);
1215 		adapter->hw.mac.max_frame_size =
1216 		    if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
1217 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1218 			em_init_locked(adapter);
1219 		EM_CORE_UNLOCK(adapter);
1220 		break;
1221 	    }
1222 	case SIOCSIFFLAGS:
1223 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1224 		    SIOCSIFFLAGS (Set Interface Flags)");
1225 		EM_CORE_LOCK(adapter);
1226 		if (if_getflags(ifp) & IFF_UP) {
1227 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1228 				if ((if_getflags(ifp) ^ adapter->if_flags) &
1229 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1230 					em_disable_promisc(adapter);
1231 					em_set_promisc(adapter);
1232 				}
1233 			} else
1234 				em_init_locked(adapter);
1235 		} else
1236 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1237 				em_stop(adapter);
1238 		adapter->if_flags = if_getflags(ifp);
1239 		EM_CORE_UNLOCK(adapter);
1240 		break;
1241 	case SIOCADDMULTI:
1242 	case SIOCDELMULTI:
1243 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1244 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1245 			EM_CORE_LOCK(adapter);
1246 			em_disable_intr(adapter);
1247 			em_set_multi(adapter);
1248 #ifdef DEVICE_POLLING
1249 			if (!(if_getcapenable(ifp) & IFCAP_POLLING))
1250 #endif
1251 				em_enable_intr(adapter);
1252 			EM_CORE_UNLOCK(adapter);
1253 		}
1254 		break;
1255 	case SIOCSIFMEDIA:
1256 		/* Check SOL/IDER usage */
1257 		EM_CORE_LOCK(adapter);
1258 		if (e1000_check_reset_block(&adapter->hw)) {
1259 			EM_CORE_UNLOCK(adapter);
1260 			device_printf(adapter->dev, "Media change is"
1261 			    " blocked due to SOL/IDER session.\n");
1262 			break;
1263 		}
1264 		EM_CORE_UNLOCK(adapter);
1265 		/* falls thru */
1266 	case SIOCGIFMEDIA:
1267 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1268 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1269 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1270 		break;
1271 	case SIOCSIFCAP:
1272 	    {
1273 		int mask, reinit;
1274 
1275 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1276 		reinit = 0;
1277 		mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
1278 #ifdef DEVICE_POLLING
1279 		if (mask & IFCAP_POLLING) {
1280 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1281 				error = ether_poll_register(em_poll, ifp);
1282 				if (error)
1283 					return (error);
1284 				EM_CORE_LOCK(adapter);
1285 				em_disable_intr(adapter);
1286 				if_setcapenablebit(ifp, IFCAP_POLLING, 0);
1287 				EM_CORE_UNLOCK(adapter);
1288 			} else {
1289 				error = ether_poll_deregister(ifp);
1290 				/* Enable interrupt even in error case */
1291 				EM_CORE_LOCK(adapter);
1292 				em_enable_intr(adapter);
1293 				if_setcapenablebit(ifp, 0, IFCAP_POLLING);
1294 				EM_CORE_UNLOCK(adapter);
1295 			}
1296 		}
1297 #endif
1298 		if (mask & IFCAP_HWCSUM) {
1299 			if_togglecapenable(ifp,IFCAP_HWCSUM);
1300 			reinit = 1;
1301 		}
1302 		if (mask & IFCAP_TSO4) {
1303 			if_togglecapenable(ifp,IFCAP_TSO4);
1304 			reinit = 1;
1305 		}
1306 		if (mask & IFCAP_VLAN_HWTAGGING) {
1307 			if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING);
1308 			reinit = 1;
1309 		}
1310 		if (mask & IFCAP_VLAN_HWFILTER) {
1311 			if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1312 			reinit = 1;
1313 		}
1314 		if (mask & IFCAP_VLAN_HWTSO) {
1315 			if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1316 			reinit = 1;
1317 		}
1318 		if ((mask & IFCAP_WOL) &&
1319 		    (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
1320 			if (mask & IFCAP_WOL_MCAST)
1321 				if_togglecapenable(ifp, IFCAP_WOL_MCAST);
1322 			if (mask & IFCAP_WOL_MAGIC)
1323 				if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
1324 		}
1325 		if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1326 			em_init(adapter);
1327 		if_vlancap(ifp);
1328 		break;
1329 	    }
1330 
1331 	default:
1332 		error = ether_ioctl(ifp, command, data);
1333 		break;
1334 	}
1335 
1336 	return (error);
1337 }
1338 
1339 
1340 /*********************************************************************
1341  *  Init entry point
1342  *
1343  *  This routine is used in two ways. It is used by the stack as
1344  *  init entry point in network interface structure. It is also used
1345  *  by the driver as a hw/sw initialization routine to get to a
1346  *  consistent state.
1347  *
1348  *  return 0 on success, positive on failure
1349  **********************************************************************/
1350 
1351 static void
1352 em_init_locked(struct adapter *adapter)
1353 {
1354 	if_t ifp = adapter->ifp;
1355 	device_t	dev = adapter->dev;
1356 
1357 	INIT_DEBUGOUT("em_init: begin");
1358 
1359 	EM_CORE_LOCK_ASSERT(adapter);
1360 
1361 	em_disable_intr(adapter);
1362 	callout_stop(&adapter->timer);
1363 
1364 	/* Get the latest mac address, User can use a LAA */
1365         bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
1366               ETHER_ADDR_LEN);
1367 
1368 	/* Put the address into the Receive Address Array */
1369 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1370 
1371 	/*
1372 	 * With the 82571 adapter, RAR[0] may be overwritten
1373 	 * when the other port is reset, we make a duplicate
1374 	 * in RAR[14] for that eventuality, this assures
1375 	 * the interface continues to function.
1376 	 */
1377 	if (adapter->hw.mac.type == e1000_82571) {
1378 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1379 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1380 		    E1000_RAR_ENTRIES - 1);
1381 	}
1382 
1383 	/* Initialize the hardware */
1384 	em_reset(adapter);
1385 	em_update_link_status(adapter);
1386 
1387 	/* Setup VLAN support, basic and offload if available */
1388 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1389 
1390 	/* Set hardware offload abilities */
1391 	if_clearhwassist(ifp);
1392 	if (if_getcapenable(ifp) & IFCAP_TXCSUM)
1393 		if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
1394 
1395 	if (if_getcapenable(ifp) & IFCAP_TSO4)
1396 		if_sethwassistbits(ifp, CSUM_TSO, 0);
1397 
1398 	/* Configure for OS presence */
1399 	em_init_manageability(adapter);
1400 
1401 	/* Prepare transmit descriptors and buffers */
1402 	em_setup_transmit_structures(adapter);
1403 	em_initialize_transmit_unit(adapter);
1404 
1405 	/* Setup Multicast table */
1406 	em_set_multi(adapter);
1407 
1408 	/*
1409 	** Figure out the desired mbuf
1410 	** pool for doing jumbos
1411 	*/
1412 	if (adapter->hw.mac.max_frame_size <= 2048)
1413 		adapter->rx_mbuf_sz = MCLBYTES;
1414 	else if (adapter->hw.mac.max_frame_size <= 4096)
1415 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1416 	else
1417 		adapter->rx_mbuf_sz = MJUM9BYTES;
1418 
1419 	/* Prepare receive descriptors and buffers */
1420 	if (em_setup_receive_structures(adapter)) {
1421 		device_printf(dev, "Could not setup receive structures\n");
1422 		em_stop(adapter);
1423 		return;
1424 	}
1425 	em_initialize_receive_unit(adapter);
1426 
1427 	/* Use real VLAN Filter support? */
1428 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
1429 		if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1430 			/* Use real VLAN Filter support */
1431 			em_setup_vlan_hw_support(adapter);
1432 		else {
1433 			u32 ctrl;
1434 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1435 			ctrl |= E1000_CTRL_VME;
1436 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1437 		}
1438 	}
1439 
1440 	/* Don't lose promiscuous settings */
1441 	em_set_promisc(adapter);
1442 
1443 	/* Set the interface as ACTIVE */
1444 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
1445 
1446 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1447 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1448 
1449 	/* MSI/X configuration for 82574 */
1450 	if (adapter->hw.mac.type == e1000_82574) {
1451 		int tmp;
1452 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1453 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1454 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1455 		/* Set the IVAR - interrupt vector routing. */
1456 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1457 	}
1458 
1459 #ifdef DEVICE_POLLING
1460 	/*
1461 	 * Only enable interrupts if we are not polling, make sure
1462 	 * they are off otherwise.
1463 	 */
1464 	if (if_getcapenable(ifp) & IFCAP_POLLING)
1465 		em_disable_intr(adapter);
1466 	else
1467 #endif /* DEVICE_POLLING */
1468 		em_enable_intr(adapter);
1469 
1470 	/* AMT based hardware can now take control from firmware */
1471 	if (adapter->has_manage && adapter->has_amt)
1472 		em_get_hw_control(adapter);
1473 }
1474 
1475 static void
1476 em_init(void *arg)
1477 {
1478 	struct adapter *adapter = arg;
1479 
1480 	EM_CORE_LOCK(adapter);
1481 	em_init_locked(adapter);
1482 	EM_CORE_UNLOCK(adapter);
1483 }
1484 
1485 
1486 #ifdef DEVICE_POLLING
1487 /*********************************************************************
1488  *
1489  *  Legacy polling routine: note this only works with single queue
1490  *
1491  *********************************************************************/
1492 static int
1493 em_poll(if_t ifp, enum poll_cmd cmd, int count)
1494 {
1495 	struct adapter *adapter = if_getsoftc(ifp);
1496 	struct tx_ring	*txr = adapter->tx_rings;
1497 	struct rx_ring	*rxr = adapter->rx_rings;
1498 	u32		reg_icr;
1499 	int		rx_done;
1500 
1501 	EM_CORE_LOCK(adapter);
1502 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
1503 		EM_CORE_UNLOCK(adapter);
1504 		return (0);
1505 	}
1506 
1507 	if (cmd == POLL_AND_CHECK_STATUS) {
1508 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1509 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1510 			callout_stop(&adapter->timer);
1511 			adapter->hw.mac.get_link_status = 1;
1512 			em_update_link_status(adapter);
1513 			callout_reset(&adapter->timer, hz,
1514 			    em_local_timer, adapter);
1515 		}
1516 	}
1517 	EM_CORE_UNLOCK(adapter);
1518 
1519 	em_rxeof(rxr, count, &rx_done);
1520 
1521 	EM_TX_LOCK(txr);
1522 	em_txeof(txr);
1523 #ifdef EM_MULTIQUEUE
1524 	if (!drbr_empty(ifp, txr->br))
1525 		em_mq_start_locked(ifp, txr);
1526 #else
1527 	if (!if_sendq_empty(ifp))
1528 		em_start_locked(ifp, txr);
1529 #endif
1530 	EM_TX_UNLOCK(txr);
1531 
1532 	return (rx_done);
1533 }
1534 #endif /* DEVICE_POLLING */
1535 
1536 
1537 /*********************************************************************
1538  *
1539  *  Fast Legacy/MSI Combined Interrupt Service routine
1540  *
1541  *********************************************************************/
1542 static int
1543 em_irq_fast(void *arg)
1544 {
1545 	struct adapter	*adapter = arg;
1546 	if_t ifp;
1547 	u32		reg_icr;
1548 
1549 	ifp = adapter->ifp;
1550 
1551 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1552 
1553 	/* Hot eject?  */
1554 	if (reg_icr == 0xffffffff)
1555 		return FILTER_STRAY;
1556 
1557 	/* Definitely not our interrupt.  */
1558 	if (reg_icr == 0x0)
1559 		return FILTER_STRAY;
1560 
1561 	/*
1562 	 * Starting with the 82571 chip, bit 31 should be used to
1563 	 * determine whether the interrupt belongs to us.
1564 	 */
1565 	if (adapter->hw.mac.type >= e1000_82571 &&
1566 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1567 		return FILTER_STRAY;
1568 
1569 	em_disable_intr(adapter);
1570 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1571 
1572 	/* Link status change */
1573 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1574 		adapter->hw.mac.get_link_status = 1;
1575 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1576 	}
1577 
1578 	if (reg_icr & E1000_ICR_RXO)
1579 		adapter->rx_overruns++;
1580 	return FILTER_HANDLED;
1581 }
1582 
1583 /* Combined RX/TX handler, used by Legacy and MSI */
1584 static void
1585 em_handle_que(void *context, int pending)
1586 {
1587 	struct adapter	*adapter = context;
1588 	if_t ifp = adapter->ifp;
1589 	struct tx_ring	*txr = adapter->tx_rings;
1590 	struct rx_ring	*rxr = adapter->rx_rings;
1591 
1592 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1593 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1594 
1595 		EM_TX_LOCK(txr);
1596 		em_txeof(txr);
1597 #ifdef EM_MULTIQUEUE
1598 		if (!drbr_empty(ifp, txr->br))
1599 			em_mq_start_locked(ifp, txr);
1600 #else
1601 		if (!if_sendq_empty(ifp))
1602 			em_start_locked(ifp, txr);
1603 #endif
1604 		EM_TX_UNLOCK(txr);
1605 		if (more) {
1606 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1607 			return;
1608 		}
1609 	}
1610 
1611 	em_enable_intr(adapter);
1612 	return;
1613 }
1614 
1615 
1616 /*********************************************************************
1617  *
1618  *  MSIX Interrupt Service Routines
1619  *
1620  **********************************************************************/
1621 static void
1622 em_msix_tx(void *arg)
1623 {
1624 	struct tx_ring *txr = arg;
1625 	struct adapter *adapter = txr->adapter;
1626 	if_t ifp = adapter->ifp;
1627 
1628 	++txr->tx_irq;
1629 	EM_TX_LOCK(txr);
1630 	em_txeof(txr);
1631 #ifdef EM_MULTIQUEUE
1632 	if (!drbr_empty(ifp, txr->br))
1633 		em_mq_start_locked(ifp, txr);
1634 #else
1635 	if (!if_sendq_empty(ifp))
1636 		em_start_locked(ifp, txr);
1637 #endif
1638 
1639 	/* Reenable this interrupt */
1640 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1641 	EM_TX_UNLOCK(txr);
1642 	return;
1643 }
1644 
1645 /*********************************************************************
1646  *
1647  *  MSIX RX Interrupt Service routine
1648  *
1649  **********************************************************************/
1650 
1651 static void
1652 em_msix_rx(void *arg)
1653 {
1654 	struct rx_ring	*rxr = arg;
1655 	struct adapter	*adapter = rxr->adapter;
1656 	bool		more;
1657 
1658 	++rxr->rx_irq;
1659 	if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING))
1660 		return;
1661 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1662 	if (more)
1663 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1664 	else {
1665 		/* Reenable this interrupt */
1666 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1667 	}
1668 	return;
1669 }
1670 
1671 /*********************************************************************
1672  *
1673  *  MSIX Link Fast Interrupt Service routine
1674  *
1675  **********************************************************************/
1676 static void
1677 em_msix_link(void *arg)
1678 {
1679 	struct adapter	*adapter = arg;
1680 	u32		reg_icr;
1681 
1682 	++adapter->link_irq;
1683 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1684 
1685 	if (reg_icr & E1000_ICR_RXO)
1686 		adapter->rx_overruns++;
1687 
1688 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1689 		adapter->hw.mac.get_link_status = 1;
1690 		em_handle_link(adapter, 0);
1691 	} else
1692 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1693 		    EM_MSIX_LINK | E1000_IMS_LSC);
1694 	/*
1695  	** Because we must read the ICR for this interrupt
1696  	** it may clear other causes using autoclear, for
1697  	** this reason we simply create a soft interrupt
1698  	** for all these vectors.
1699  	*/
1700 	if (reg_icr) {
1701 		E1000_WRITE_REG(&adapter->hw,
1702 			E1000_ICS, adapter->ims);
1703 	}
1704 	return;
1705 }
1706 
1707 static void
1708 em_handle_rx(void *context, int pending)
1709 {
1710 	struct rx_ring	*rxr = context;
1711 	struct adapter	*adapter = rxr->adapter;
1712         bool            more;
1713 
1714 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1715 	if (more)
1716 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1717 	else {
1718 		/* Reenable this interrupt */
1719 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1720 	}
1721 }
1722 
1723 static void
1724 em_handle_tx(void *context, int pending)
1725 {
1726 	struct tx_ring	*txr = context;
1727 	struct adapter	*adapter = txr->adapter;
1728 	if_t ifp = adapter->ifp;
1729 
1730 	EM_TX_LOCK(txr);
1731 	em_txeof(txr);
1732 #ifdef EM_MULTIQUEUE
1733 	if (!drbr_empty(ifp, txr->br))
1734 		em_mq_start_locked(ifp, txr);
1735 #else
1736 	if (!if_sendq_empty(ifp))
1737 		em_start_locked(ifp, txr);
1738 #endif
1739 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1740 	EM_TX_UNLOCK(txr);
1741 }
1742 
1743 static void
1744 em_handle_link(void *context, int pending)
1745 {
1746 	struct adapter	*adapter = context;
1747 	struct tx_ring	*txr = adapter->tx_rings;
1748 	if_t ifp = adapter->ifp;
1749 
1750 	if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1751 		return;
1752 
1753 	EM_CORE_LOCK(adapter);
1754 	callout_stop(&adapter->timer);
1755 	em_update_link_status(adapter);
1756 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1757 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1758 	    EM_MSIX_LINK | E1000_IMS_LSC);
1759 	if (adapter->link_active) {
1760 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1761 			EM_TX_LOCK(txr);
1762 #ifdef EM_MULTIQUEUE
1763 			if (!drbr_empty(ifp, txr->br))
1764 				em_mq_start_locked(ifp, txr);
1765 #else
1766 			if (if_sendq_empty(ifp))
1767 				em_start_locked(ifp, txr);
1768 #endif
1769 			EM_TX_UNLOCK(txr);
1770 		}
1771 	}
1772 	EM_CORE_UNLOCK(adapter);
1773 }
1774 
1775 
1776 /*********************************************************************
1777  *
1778  *  Media Ioctl callback
1779  *
1780  *  This routine is called whenever the user queries the status of
1781  *  the interface using ifconfig.
1782  *
1783  **********************************************************************/
1784 static void
1785 em_media_status(if_t ifp, struct ifmediareq *ifmr)
1786 {
1787 	struct adapter *adapter = if_getsoftc(ifp);
1788 	u_char fiber_type = IFM_1000_SX;
1789 
1790 	INIT_DEBUGOUT("em_media_status: begin");
1791 
1792 	EM_CORE_LOCK(adapter);
1793 	em_update_link_status(adapter);
1794 
1795 	ifmr->ifm_status = IFM_AVALID;
1796 	ifmr->ifm_active = IFM_ETHER;
1797 
1798 	if (!adapter->link_active) {
1799 		EM_CORE_UNLOCK(adapter);
1800 		return;
1801 	}
1802 
1803 	ifmr->ifm_status |= IFM_ACTIVE;
1804 
1805 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1806 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1807 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1808 	} else {
1809 		switch (adapter->link_speed) {
1810 		case 10:
1811 			ifmr->ifm_active |= IFM_10_T;
1812 			break;
1813 		case 100:
1814 			ifmr->ifm_active |= IFM_100_TX;
1815 			break;
1816 		case 1000:
1817 			ifmr->ifm_active |= IFM_1000_T;
1818 			break;
1819 		}
1820 		if (adapter->link_duplex == FULL_DUPLEX)
1821 			ifmr->ifm_active |= IFM_FDX;
1822 		else
1823 			ifmr->ifm_active |= IFM_HDX;
1824 	}
1825 	EM_CORE_UNLOCK(adapter);
1826 }
1827 
1828 /*********************************************************************
1829  *
1830  *  Media Ioctl callback
1831  *
1832  *  This routine is called when the user changes speed/duplex using
1833  *  media/mediopt option with ifconfig.
1834  *
1835  **********************************************************************/
1836 static int
1837 em_media_change(if_t ifp)
1838 {
1839 	struct adapter *adapter = if_getsoftc(ifp);
1840 	struct ifmedia  *ifm = &adapter->media;
1841 
1842 	INIT_DEBUGOUT("em_media_change: begin");
1843 
1844 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1845 		return (EINVAL);
1846 
1847 	EM_CORE_LOCK(adapter);
1848 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1849 	case IFM_AUTO:
1850 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1851 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1852 		break;
1853 	case IFM_1000_LX:
1854 	case IFM_1000_SX:
1855 	case IFM_1000_T:
1856 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1857 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1858 		break;
1859 	case IFM_100_TX:
1860 		adapter->hw.mac.autoneg = FALSE;
1861 		adapter->hw.phy.autoneg_advertised = 0;
1862 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1863 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1864 		else
1865 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1866 		break;
1867 	case IFM_10_T:
1868 		adapter->hw.mac.autoneg = FALSE;
1869 		adapter->hw.phy.autoneg_advertised = 0;
1870 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1871 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1872 		else
1873 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1874 		break;
1875 	default:
1876 		device_printf(adapter->dev, "Unsupported media type\n");
1877 	}
1878 
1879 	em_init_locked(adapter);
1880 	EM_CORE_UNLOCK(adapter);
1881 
1882 	return (0);
1883 }
1884 
1885 /*********************************************************************
1886  *
1887  *  This routine maps the mbufs to tx descriptors.
1888  *
1889  *  return 0 on success, positive on failure
1890  **********************************************************************/
1891 
1892 static int
1893 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1894 {
1895 	struct adapter		*adapter = txr->adapter;
1896 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1897 	bus_dmamap_t		map;
1898 	struct em_txbuffer	*tx_buffer, *tx_buffer_mapped;
1899 	struct e1000_tx_desc	*ctxd = NULL;
1900 	struct mbuf		*m_head;
1901 	struct ether_header	*eh;
1902 	struct ip		*ip = NULL;
1903 	struct tcphdr		*tp = NULL;
1904 	u32			txd_upper = 0, txd_lower = 0;
1905 	int			ip_off, poff;
1906 	int			nsegs, i, j, first, last = 0;
1907 	int			error;
1908 	bool			do_tso, tso_desc, remap = TRUE;
1909 
1910 	m_head = *m_headp;
1911 	do_tso = (m_head->m_pkthdr.csum_flags & CSUM_TSO);
1912 	tso_desc = FALSE;
1913 	ip_off = poff = 0;
1914 
1915 	/*
1916 	 * Intel recommends entire IP/TCP header length reside in a single
1917 	 * buffer. If multiple descriptors are used to describe the IP and
1918 	 * TCP header, each descriptor should describe one or more
1919 	 * complete headers; descriptors referencing only parts of headers
1920 	 * are not supported. If all layer headers are not coalesced into
1921 	 * a single buffer, each buffer should not cross a 4KB boundary,
1922 	 * or be larger than the maximum read request size.
1923 	 * Controller also requires modifing IP/TCP header to make TSO work
1924 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1925 	 * IP/TCP header into a single buffer to meet the requirement of
1926 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1927 	 * which also has similar restrictions.
1928 	 */
1929 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1930 		if (do_tso || (m_head->m_next != NULL &&
1931 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1932 			if (M_WRITABLE(*m_headp) == 0) {
1933 				m_head = m_dup(*m_headp, M_NOWAIT);
1934 				m_freem(*m_headp);
1935 				if (m_head == NULL) {
1936 					*m_headp = NULL;
1937 					return (ENOBUFS);
1938 				}
1939 				*m_headp = m_head;
1940 			}
1941 		}
1942 		/*
1943 		 * XXX
1944 		 * Assume IPv4, we don't have TSO/checksum offload support
1945 		 * for IPv6 yet.
1946 		 */
1947 		ip_off = sizeof(struct ether_header);
1948 		if (m_head->m_len < ip_off) {
1949 			m_head = m_pullup(m_head, ip_off);
1950 			if (m_head == NULL) {
1951 				*m_headp = NULL;
1952 				return (ENOBUFS);
1953 			}
1954 		}
1955 		eh = mtod(m_head, struct ether_header *);
1956 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1957 			ip_off = sizeof(struct ether_vlan_header);
1958 			if (m_head->m_len < ip_off) {
1959 				m_head = m_pullup(m_head, ip_off);
1960 				if (m_head == NULL) {
1961 					*m_headp = NULL;
1962 					return (ENOBUFS);
1963 				}
1964 			}
1965 		}
1966 		if (m_head->m_len < ip_off + sizeof(struct ip)) {
1967 			m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1968 			if (m_head == NULL) {
1969 				*m_headp = NULL;
1970 				return (ENOBUFS);
1971 			}
1972 		}
1973 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1974 		poff = ip_off + (ip->ip_hl << 2);
1975 
1976 		if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
1977 			if (m_head->m_len < poff + sizeof(struct tcphdr)) {
1978 				m_head = m_pullup(m_head, poff +
1979 				    sizeof(struct tcphdr));
1980 				if (m_head == NULL) {
1981 					*m_headp = NULL;
1982 					return (ENOBUFS);
1983 				}
1984 			}
1985 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1986 			/*
1987 			 * TSO workaround:
1988 			 *   pull 4 more bytes of data into it.
1989 			 */
1990 			if (m_head->m_len < poff + (tp->th_off << 2)) {
1991 				m_head = m_pullup(m_head, poff +
1992 				                 (tp->th_off << 2) +
1993 				                 TSO_WORKAROUND);
1994 				if (m_head == NULL) {
1995 					*m_headp = NULL;
1996 					return (ENOBUFS);
1997 				}
1998 			}
1999 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2000 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2001 			if (do_tso) {
2002 				ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
2003 				                  (ip->ip_hl << 2) +
2004 				                  (tp->th_off << 2));
2005 				ip->ip_sum = 0;
2006 				/*
2007 				 * The pseudo TCP checksum does not include TCP
2008 				 * payload length so driver should recompute
2009 				 * the checksum here what hardware expect to
2010 				 * see. This is adherence of Microsoft's Large
2011 				 * Send specification.
2012 			 	*/
2013 				tp->th_sum = in_pseudo(ip->ip_src.s_addr,
2014 				    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2015 			}
2016 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
2017 			if (m_head->m_len < poff + sizeof(struct udphdr)) {
2018 				m_head = m_pullup(m_head, poff +
2019 				    sizeof(struct udphdr));
2020 				if (m_head == NULL) {
2021 					*m_headp = NULL;
2022 					return (ENOBUFS);
2023 				}
2024 			}
2025 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2026 		}
2027 		*m_headp = m_head;
2028 	}
2029 
2030 	/*
2031 	 * Map the packet for DMA
2032 	 *
2033 	 * Capture the first descriptor index,
2034 	 * this descriptor will have the index
2035 	 * of the EOP which is the only one that
2036 	 * now gets a DONE bit writeback.
2037 	 */
2038 	first = txr->next_avail_desc;
2039 	tx_buffer = &txr->tx_buffers[first];
2040 	tx_buffer_mapped = tx_buffer;
2041 	map = tx_buffer->map;
2042 
2043 retry:
2044 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2045 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2046 
2047 	/*
2048 	 * There are two types of errors we can (try) to handle:
2049 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
2050 	 *   out of segments.  Defragment the mbuf chain and try again.
2051 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
2052 	 *   at this point in time.  Defer sending and try again later.
2053 	 * All other errors, in particular EINVAL, are fatal and prevent the
2054 	 * mbuf chain from ever going through.  Drop it and report error.
2055 	 */
2056 	if (error == EFBIG && remap) {
2057 		struct mbuf *m;
2058 
2059 		m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
2060 		if (m == NULL) {
2061 			adapter->mbuf_defrag_failed++;
2062 			m_freem(*m_headp);
2063 			*m_headp = NULL;
2064 			return (ENOBUFS);
2065 		}
2066 		*m_headp = m;
2067 
2068 		/* Try it again, but only once */
2069 		remap = FALSE;
2070 		goto retry;
2071 	} else if (error != 0) {
2072 		adapter->no_tx_dma_setup++;
2073 		m_freem(*m_headp);
2074 		*m_headp = NULL;
2075 		return (error);
2076 	}
2077 
2078 	/*
2079 	 * TSO Hardware workaround, if this packet is not
2080 	 * TSO, and is only a single descriptor long, and
2081 	 * it follows a TSO burst, then we need to add a
2082 	 * sentinel descriptor to prevent premature writeback.
2083 	 */
2084 	if ((!do_tso) && (txr->tx_tso == TRUE)) {
2085 		if (nsegs == 1)
2086 			tso_desc = TRUE;
2087 		txr->tx_tso = FALSE;
2088 	}
2089 
2090         if (txr->tx_avail < (nsegs + EM_MAX_SCATTER)) {
2091                 txr->no_desc_avail++;
2092 		bus_dmamap_unload(txr->txtag, map);
2093 		return (ENOBUFS);
2094         }
2095 	m_head = *m_headp;
2096 
2097 	/* Do hardware assists */
2098 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2099 		em_tso_setup(txr, m_head, ip_off, ip, tp,
2100 		    &txd_upper, &txd_lower);
2101 		/* we need to make a final sentinel transmit desc */
2102 		tso_desc = TRUE;
2103 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2104 		em_transmit_checksum_setup(txr, m_head,
2105 		    ip_off, ip, &txd_upper, &txd_lower);
2106 
2107 	if (m_head->m_flags & M_VLANTAG) {
2108 		/* Set the vlan id. */
2109 		txd_upper |= htole16(if_getvtag(m_head)) << 16;
2110                 /* Tell hardware to add tag */
2111                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2112         }
2113 
2114 	i = txr->next_avail_desc;
2115 
2116 	/* Set up our transmit descriptors */
2117 	for (j = 0; j < nsegs; j++) {
2118 		bus_size_t seg_len;
2119 		bus_addr_t seg_addr;
2120 
2121 		tx_buffer = &txr->tx_buffers[i];
2122 		ctxd = &txr->tx_base[i];
2123 		seg_addr = segs[j].ds_addr;
2124 		seg_len  = segs[j].ds_len;
2125 		/*
2126 		** TSO Workaround:
2127 		** If this is the last descriptor, we want to
2128 		** split it so we have a small final sentinel
2129 		*/
2130 		if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
2131 			seg_len -= TSO_WORKAROUND;
2132 			ctxd->buffer_addr = htole64(seg_addr);
2133 			ctxd->lower.data = htole32(
2134 				adapter->txd_cmd | txd_lower | seg_len);
2135 			ctxd->upper.data = htole32(txd_upper);
2136 			if (++i == adapter->num_tx_desc)
2137 				i = 0;
2138 
2139 			/* Now make the sentinel */
2140 			txr->tx_avail--;
2141 			ctxd = &txr->tx_base[i];
2142 			tx_buffer = &txr->tx_buffers[i];
2143 			ctxd->buffer_addr =
2144 			    htole64(seg_addr + seg_len);
2145 			ctxd->lower.data = htole32(
2146 			adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
2147 			ctxd->upper.data =
2148 			    htole32(txd_upper);
2149 			last = i;
2150 			if (++i == adapter->num_tx_desc)
2151 				i = 0;
2152 		} else {
2153 			ctxd->buffer_addr = htole64(seg_addr);
2154 			ctxd->lower.data = htole32(
2155 			adapter->txd_cmd | txd_lower | seg_len);
2156 			ctxd->upper.data = htole32(txd_upper);
2157 			last = i;
2158 			if (++i == adapter->num_tx_desc)
2159 				i = 0;
2160 		}
2161 		tx_buffer->m_head = NULL;
2162 		tx_buffer->next_eop = -1;
2163 	}
2164 
2165 	txr->next_avail_desc = i;
2166 	txr->tx_avail -= nsegs;
2167 
2168         tx_buffer->m_head = m_head;
2169 	/*
2170 	** Here we swap the map so the last descriptor,
2171 	** which gets the completion interrupt has the
2172 	** real map, and the first descriptor gets the
2173 	** unused map from this descriptor.
2174 	*/
2175 	tx_buffer_mapped->map = tx_buffer->map;
2176 	tx_buffer->map = map;
2177         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2178 
2179         /*
2180          * Last Descriptor of Packet
2181 	 * needs End Of Packet (EOP)
2182 	 * and Report Status (RS)
2183          */
2184         ctxd->lower.data |=
2185 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2186 	/*
2187 	 * Keep track in the first buffer which
2188 	 * descriptor will be written back
2189 	 */
2190 	tx_buffer = &txr->tx_buffers[first];
2191 	tx_buffer->next_eop = last;
2192 
2193 	/*
2194 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2195 	 * that this frame is available to transmit.
2196 	 */
2197 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2198 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2199 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2200 
2201 	return (0);
2202 }
2203 
2204 static void
2205 em_set_promisc(struct adapter *adapter)
2206 {
2207 	if_t ifp = adapter->ifp;
2208 	u32		reg_rctl;
2209 
2210 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2211 
2212 	if (if_getflags(ifp) & IFF_PROMISC) {
2213 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2214 		/* Turn this on if you want to see bad packets */
2215 		if (em_debug_sbp)
2216 			reg_rctl |= E1000_RCTL_SBP;
2217 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2218 	} else if (if_getflags(ifp) & IFF_ALLMULTI) {
2219 		reg_rctl |= E1000_RCTL_MPE;
2220 		reg_rctl &= ~E1000_RCTL_UPE;
2221 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2222 	}
2223 }
2224 
2225 static void
2226 em_disable_promisc(struct adapter *adapter)
2227 {
2228 	if_t		ifp = adapter->ifp;
2229 	u32		reg_rctl;
2230 	int		mcnt = 0;
2231 
2232 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2233 	reg_rctl &=  (~E1000_RCTL_UPE);
2234 	if (if_getflags(ifp) & IFF_ALLMULTI)
2235 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2236 	else
2237 		mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
2238 	/* Don't disable if in MAX groups */
2239 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2240 		reg_rctl &=  (~E1000_RCTL_MPE);
2241 	reg_rctl &=  (~E1000_RCTL_SBP);
2242 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2243 }
2244 
2245 
2246 /*********************************************************************
2247  *  Multicast Update
2248  *
2249  *  This routine is called whenever multicast address list is updated.
2250  *
2251  **********************************************************************/
2252 
2253 static void
2254 em_set_multi(struct adapter *adapter)
2255 {
2256 	if_t ifp = adapter->ifp;
2257 	u32 reg_rctl = 0;
2258 	u8  *mta; /* Multicast array memory */
2259 	int mcnt = 0;
2260 
2261 	IOCTL_DEBUGOUT("em_set_multi: begin");
2262 
2263 	mta = adapter->mta;
2264 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2265 
2266 	if (adapter->hw.mac.type == e1000_82542 &&
2267 	    adapter->hw.revision_id == E1000_REVISION_2) {
2268 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2269 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2270 			e1000_pci_clear_mwi(&adapter->hw);
2271 		reg_rctl |= E1000_RCTL_RST;
2272 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2273 		msec_delay(5);
2274 	}
2275 
2276 	if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
2277 
2278 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2279 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2280 		reg_rctl |= E1000_RCTL_MPE;
2281 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2282 	} else
2283 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2284 
2285 	if (adapter->hw.mac.type == e1000_82542 &&
2286 	    adapter->hw.revision_id == E1000_REVISION_2) {
2287 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2288 		reg_rctl &= ~E1000_RCTL_RST;
2289 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2290 		msec_delay(5);
2291 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2292 			e1000_pci_set_mwi(&adapter->hw);
2293 	}
2294 }
2295 
2296 
2297 /*********************************************************************
2298  *  Timer routine
2299  *
2300  *  This routine checks for link status and updates statistics.
2301  *
2302  **********************************************************************/
2303 
2304 static void
2305 em_local_timer(void *arg)
2306 {
2307 	struct adapter	*adapter = arg;
2308 	if_t ifp = adapter->ifp;
2309 	struct tx_ring	*txr = adapter->tx_rings;
2310 	struct rx_ring	*rxr = adapter->rx_rings;
2311 	u32		trigger = 0;
2312 
2313 	EM_CORE_LOCK_ASSERT(adapter);
2314 
2315 	em_update_link_status(adapter);
2316 	em_update_stats_counters(adapter);
2317 
2318 	/* Reset LAA into RAR[0] on 82571 */
2319 	if ((adapter->hw.mac.type == e1000_82571) &&
2320 	    e1000_get_laa_state_82571(&adapter->hw))
2321 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2322 
2323 	/* Mask to use in the irq trigger */
2324 	if (adapter->msix_mem) {
2325 		for (int i = 0; i < adapter->num_queues; i++, rxr++)
2326 			trigger |= rxr->ims;
2327 		rxr = adapter->rx_rings;
2328 	} else
2329 		trigger = E1000_ICS_RXDMT0;
2330 
2331 	/*
2332 	** Check on the state of the TX queue(s), this
2333 	** can be done without the lock because its RO
2334 	** and the HUNG state will be static if set.
2335 	*/
2336 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2337 		if (txr->busy == EM_TX_HUNG)
2338 			goto hung;
2339 		if (txr->busy >= EM_TX_MAXTRIES)
2340 			txr->busy = EM_TX_HUNG;
2341 		/* Schedule a TX tasklet if needed */
2342 		if (txr->tx_avail <= EM_MAX_SCATTER)
2343 			taskqueue_enqueue(txr->tq, &txr->tx_task);
2344 	}
2345 
2346 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2347 #ifndef DEVICE_POLLING
2348 	/* Trigger an RX interrupt to guarantee mbuf refresh */
2349 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2350 #endif
2351 	return;
2352 hung:
2353 	/* Looks like we're hung */
2354 	device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2355 			txr->me);
2356 	em_print_debug_info(adapter);
2357 	if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
2358 	adapter->watchdog_events++;
2359 	em_init_locked(adapter);
2360 }
2361 
2362 
2363 static void
2364 em_update_link_status(struct adapter *adapter)
2365 {
2366 	struct e1000_hw *hw = &adapter->hw;
2367 	if_t ifp = adapter->ifp;
2368 	device_t dev = adapter->dev;
2369 	struct tx_ring *txr = adapter->tx_rings;
2370 	u32 link_check = 0;
2371 
2372 	/* Get the cached link value or read phy for real */
2373 	switch (hw->phy.media_type) {
2374 	case e1000_media_type_copper:
2375 		if (hw->mac.get_link_status) {
2376 			if (hw->mac.type == e1000_pch_spt)
2377 				msec_delay(50);
2378 			/* Do the work to read phy */
2379 			e1000_check_for_link(hw);
2380 			link_check = !hw->mac.get_link_status;
2381 			if (link_check) /* ESB2 fix */
2382 				e1000_cfg_on_link_up(hw);
2383 		} else
2384 			link_check = TRUE;
2385 		break;
2386 	case e1000_media_type_fiber:
2387 		e1000_check_for_link(hw);
2388 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2389                                  E1000_STATUS_LU);
2390 		break;
2391 	case e1000_media_type_internal_serdes:
2392 		e1000_check_for_link(hw);
2393 		link_check = adapter->hw.mac.serdes_has_link;
2394 		break;
2395 	default:
2396 	case e1000_media_type_unknown:
2397 		break;
2398 	}
2399 
2400 	/* Now check for a transition */
2401 	if (link_check && (adapter->link_active == 0)) {
2402 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2403 		    &adapter->link_duplex);
2404 		/*
2405 		** There have proven to be problems with TSO when not
2406 		** at full gigabit speed, so disable the assist automatically
2407 		** when at lower speeds.  -jfv
2408 		*/
2409 		if (adapter->link_speed != SPEED_1000) {
2410 			if_sethwassistbits(ifp, 0, CSUM_TSO);
2411 			if_setcapenablebit(ifp, 0, IFCAP_TSO4);
2412         		if_setcapabilitiesbit(ifp, 0, IFCAP_TSO4);
2413 
2414 		}
2415 
2416 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2417 		if ((adapter->link_speed != SPEED_1000) &&
2418 		    ((hw->mac.type == e1000_82571) ||
2419 		    (hw->mac.type == e1000_82572))) {
2420 			int tarc0;
2421 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2422 			tarc0 &= ~TARC_SPEED_MODE_BIT;
2423 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2424 		}
2425 		if (bootverbose)
2426 			device_printf(dev, "Link is up %d Mbps %s\n",
2427 			    adapter->link_speed,
2428 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2429 			    "Full Duplex" : "Half Duplex"));
2430 		adapter->link_active = 1;
2431 		adapter->smartspeed = 0;
2432 		if_setbaudrate(ifp, adapter->link_speed * 1000000);
2433 		if_link_state_change(ifp, LINK_STATE_UP);
2434 	} else if (!link_check && (adapter->link_active == 1)) {
2435 		if_setbaudrate(ifp, 0);
2436 		adapter->link_speed = 0;
2437 		adapter->link_duplex = 0;
2438 		if (bootverbose)
2439 			device_printf(dev, "Link is Down\n");
2440 		adapter->link_active = 0;
2441 		/* Link down, disable hang detection */
2442 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2443 			txr->busy = EM_TX_IDLE;
2444 		if_link_state_change(ifp, LINK_STATE_DOWN);
2445 	}
2446 }
2447 
2448 /*********************************************************************
2449  *
2450  *  This routine disables all traffic on the adapter by issuing a
2451  *  global reset on the MAC and deallocates TX/RX buffers.
2452  *
2453  *  This routine should always be called with BOTH the CORE
2454  *  and TX locks.
2455  **********************************************************************/
2456 
2457 static void
2458 em_stop(void *arg)
2459 {
2460 	struct adapter	*adapter = arg;
2461 	if_t ifp = adapter->ifp;
2462 	struct tx_ring	*txr = adapter->tx_rings;
2463 
2464 	EM_CORE_LOCK_ASSERT(adapter);
2465 
2466 	INIT_DEBUGOUT("em_stop: begin");
2467 
2468 	em_disable_intr(adapter);
2469 	callout_stop(&adapter->timer);
2470 
2471 	/* Tell the stack that the interface is no longer active */
2472 	if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2473 
2474         /* Disarm Hang Detection. */
2475 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2476 		EM_TX_LOCK(txr);
2477 		txr->busy = EM_TX_IDLE;
2478 		EM_TX_UNLOCK(txr);
2479 	}
2480 
2481 	/* I219 needs some special flushing to avoid hangs */
2482 	if (adapter->hw.mac.type == e1000_pch_spt)
2483 		em_flush_desc_rings(adapter);
2484 
2485 	e1000_reset_hw(&adapter->hw);
2486 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2487 
2488 	e1000_led_off(&adapter->hw);
2489 	e1000_cleanup_led(&adapter->hw);
2490 }
2491 
2492 
2493 /*********************************************************************
2494  *
2495  *  Determine hardware revision.
2496  *
2497  **********************************************************************/
2498 static void
2499 em_identify_hardware(struct adapter *adapter)
2500 {
2501 	device_t dev = adapter->dev;
2502 
2503 	/* Make sure our PCI config space has the necessary stuff set */
2504 	pci_enable_busmaster(dev);
2505 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2506 
2507 	/* Save off the information about this board */
2508 	adapter->hw.vendor_id = pci_get_vendor(dev);
2509 	adapter->hw.device_id = pci_get_device(dev);
2510 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2511 	adapter->hw.subsystem_vendor_id =
2512 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2513 	adapter->hw.subsystem_device_id =
2514 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2515 
2516 	/* Do Shared Code Init and Setup */
2517 	if (e1000_set_mac_type(&adapter->hw)) {
2518 		device_printf(dev, "Setup init failure\n");
2519 		return;
2520 	}
2521 }
2522 
2523 static int
2524 em_allocate_pci_resources(struct adapter *adapter)
2525 {
2526 	device_t	dev = adapter->dev;
2527 	int		rid;
2528 
2529 	rid = PCIR_BAR(0);
2530 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2531 	    &rid, RF_ACTIVE);
2532 	if (adapter->memory == NULL) {
2533 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2534 		return (ENXIO);
2535 	}
2536 	adapter->osdep.mem_bus_space_tag =
2537 	    rman_get_bustag(adapter->memory);
2538 	adapter->osdep.mem_bus_space_handle =
2539 	    rman_get_bushandle(adapter->memory);
2540 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2541 
2542 	adapter->hw.back = &adapter->osdep;
2543 
2544 	return (0);
2545 }
2546 
2547 /*********************************************************************
2548  *
2549  *  Setup the Legacy or MSI Interrupt handler
2550  *
2551  **********************************************************************/
2552 int
2553 em_allocate_legacy(struct adapter *adapter)
2554 {
2555 	device_t dev = adapter->dev;
2556 	struct tx_ring	*txr = adapter->tx_rings;
2557 	int error, rid = 0;
2558 
2559 	/* Manually turn off all interrupts */
2560 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2561 
2562 	if (adapter->msix == 1) /* using MSI */
2563 		rid = 1;
2564 	/* We allocate a single interrupt resource */
2565 	adapter->res = bus_alloc_resource_any(dev,
2566 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2567 	if (adapter->res == NULL) {
2568 		device_printf(dev, "Unable to allocate bus resource: "
2569 		    "interrupt\n");
2570 		return (ENXIO);
2571 	}
2572 
2573 	/*
2574 	 * Allocate a fast interrupt and the associated
2575 	 * deferred processing contexts.
2576 	 */
2577 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2578 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2579 	    taskqueue_thread_enqueue, &adapter->tq);
2580 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2581 	    device_get_nameunit(adapter->dev));
2582 	/* Use a TX only tasklet for local timer */
2583 	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2584 	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2585 	    taskqueue_thread_enqueue, &txr->tq);
2586 	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2587 	    device_get_nameunit(adapter->dev));
2588 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2589 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2590 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2591 		device_printf(dev, "Failed to register fast interrupt "
2592 			    "handler: %d\n", error);
2593 		taskqueue_free(adapter->tq);
2594 		adapter->tq = NULL;
2595 		return (error);
2596 	}
2597 
2598 	return (0);
2599 }
2600 
2601 /*********************************************************************
2602  *
2603  *  Setup the MSIX Interrupt handlers
2604  *   This is not really Multiqueue, rather
2605  *   its just separate interrupt vectors
2606  *   for TX, RX, and Link.
2607  *
2608  **********************************************************************/
2609 int
2610 em_allocate_msix(struct adapter *adapter)
2611 {
2612 	device_t	dev = adapter->dev;
2613 	struct		tx_ring *txr = adapter->tx_rings;
2614 	struct		rx_ring *rxr = adapter->rx_rings;
2615 	int		error, rid, vector = 0;
2616 	int		cpu_id = 0;
2617 
2618 
2619 	/* Make sure all interrupts are disabled */
2620 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2621 
2622 	/* First set up ring resources */
2623 	for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2624 
2625 		/* RX ring */
2626 		rid = vector + 1;
2627 
2628 		rxr->res = bus_alloc_resource_any(dev,
2629 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2630 		if (rxr->res == NULL) {
2631 			device_printf(dev,
2632 			    "Unable to allocate bus resource: "
2633 			    "RX MSIX Interrupt %d\n", i);
2634 			return (ENXIO);
2635 		}
2636 		if ((error = bus_setup_intr(dev, rxr->res,
2637 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2638 		    rxr, &rxr->tag)) != 0) {
2639 			device_printf(dev, "Failed to register RX handler");
2640 			return (error);
2641 		}
2642 #if __FreeBSD_version >= 800504
2643 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2644 #endif
2645 		rxr->msix = vector;
2646 
2647 		if (em_last_bind_cpu < 0)
2648 			em_last_bind_cpu = CPU_FIRST();
2649 		cpu_id = em_last_bind_cpu;
2650 		bus_bind_intr(dev, rxr->res, cpu_id);
2651 
2652 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2653 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2654 		    taskqueue_thread_enqueue, &rxr->tq);
2655 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2656 		    device_get_nameunit(adapter->dev), cpu_id);
2657 		/*
2658 		** Set the bit to enable interrupt
2659 		** in E1000_IMS -- bits 20 and 21
2660 		** are for RX0 and RX1, note this has
2661 		** NOTHING to do with the MSIX vector
2662 		*/
2663 		rxr->ims = 1 << (20 + i);
2664 		adapter->ims |= rxr->ims;
2665 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2666 
2667 		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2668 	}
2669 
2670 	for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2671 		/* TX ring */
2672 		rid = vector + 1;
2673 		txr->res = bus_alloc_resource_any(dev,
2674 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2675 		if (txr->res == NULL) {
2676 			device_printf(dev,
2677 			    "Unable to allocate bus resource: "
2678 			    "TX MSIX Interrupt %d\n", i);
2679 			return (ENXIO);
2680 		}
2681 		if ((error = bus_setup_intr(dev, txr->res,
2682 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2683 		    txr, &txr->tag)) != 0) {
2684 			device_printf(dev, "Failed to register TX handler");
2685 			return (error);
2686 		}
2687 #if __FreeBSD_version >= 800504
2688 		bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2689 #endif
2690 		txr->msix = vector;
2691 
2692                 if (em_last_bind_cpu < 0)
2693                         em_last_bind_cpu = CPU_FIRST();
2694                 cpu_id = em_last_bind_cpu;
2695                 bus_bind_intr(dev, txr->res, cpu_id);
2696 
2697 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2698 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2699 		    taskqueue_thread_enqueue, &txr->tq);
2700 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2701 		    device_get_nameunit(adapter->dev), cpu_id);
2702 		/*
2703 		** Set the bit to enable interrupt
2704 		** in E1000_IMS -- bits 22 and 23
2705 		** are for TX0 and TX1, note this has
2706 		** NOTHING to do with the MSIX vector
2707 		*/
2708 		txr->ims = 1 << (22 + i);
2709 		adapter->ims |= txr->ims;
2710 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2711 
2712 		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2713 	}
2714 
2715 	/* Link interrupt */
2716 	rid = vector + 1;
2717 	adapter->res = bus_alloc_resource_any(dev,
2718 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2719 	if (!adapter->res) {
2720 		device_printf(dev,"Unable to allocate "
2721 		    "bus resource: Link interrupt [%d]\n", rid);
2722 		return (ENXIO);
2723         }
2724 	/* Set the link handler function */
2725 	error = bus_setup_intr(dev, adapter->res,
2726 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2727 	    em_msix_link, adapter, &adapter->tag);
2728 	if (error) {
2729 		adapter->res = NULL;
2730 		device_printf(dev, "Failed to register LINK handler");
2731 		return (error);
2732 	}
2733 #if __FreeBSD_version >= 800504
2734 	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2735 #endif
2736 	adapter->linkvec = vector;
2737 	adapter->ivars |=  (8 | vector) << 16;
2738 	adapter->ivars |= 0x80000000;
2739 
2740 	return (0);
2741 }
2742 
2743 
2744 static void
2745 em_free_pci_resources(struct adapter *adapter)
2746 {
2747 	device_t	dev = adapter->dev;
2748 	struct tx_ring	*txr;
2749 	struct rx_ring	*rxr;
2750 	int		rid;
2751 
2752 
2753 	/*
2754 	** Release all the queue interrupt resources:
2755 	*/
2756 	for (int i = 0; i < adapter->num_queues; i++) {
2757 		txr = &adapter->tx_rings[i];
2758 		/* an early abort? */
2759 		if (txr == NULL)
2760 			break;
2761 		rid = txr->msix +1;
2762 		if (txr->tag != NULL) {
2763 			bus_teardown_intr(dev, txr->res, txr->tag);
2764 			txr->tag = NULL;
2765 		}
2766 		if (txr->res != NULL)
2767 			bus_release_resource(dev, SYS_RES_IRQ,
2768 			    rid, txr->res);
2769 
2770 		rxr = &adapter->rx_rings[i];
2771 		/* an early abort? */
2772 		if (rxr == NULL)
2773 			break;
2774 		rid = rxr->msix +1;
2775 		if (rxr->tag != NULL) {
2776 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2777 			rxr->tag = NULL;
2778 		}
2779 		if (rxr->res != NULL)
2780 			bus_release_resource(dev, SYS_RES_IRQ,
2781 			    rid, rxr->res);
2782 	}
2783 
2784         if (adapter->linkvec) /* we are doing MSIX */
2785                 rid = adapter->linkvec + 1;
2786         else
2787                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2788 
2789 	if (adapter->tag != NULL) {
2790 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2791 		adapter->tag = NULL;
2792 	}
2793 
2794 	if (adapter->res != NULL)
2795 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2796 
2797 
2798 	if (adapter->msix)
2799 		pci_release_msi(dev);
2800 
2801 	if (adapter->msix_mem != NULL)
2802 		bus_release_resource(dev, SYS_RES_MEMORY,
2803 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2804 
2805 	if (adapter->memory != NULL)
2806 		bus_release_resource(dev, SYS_RES_MEMORY,
2807 		    PCIR_BAR(0), adapter->memory);
2808 
2809 	if (adapter->flash != NULL)
2810 		bus_release_resource(dev, SYS_RES_MEMORY,
2811 		    EM_FLASH, adapter->flash);
2812 }
2813 
2814 /*
2815  * Setup MSI or MSI/X
2816  */
2817 static int
2818 em_setup_msix(struct adapter *adapter)
2819 {
2820 	device_t dev = adapter->dev;
2821 	int val;
2822 
2823 	/* Nearly always going to use one queue */
2824 	adapter->num_queues = 1;
2825 
2826 	/*
2827 	** Try using MSI-X for Hartwell adapters
2828 	*/
2829 	if ((adapter->hw.mac.type == e1000_82574) &&
2830 	    (em_enable_msix == TRUE)) {
2831 #ifdef EM_MULTIQUEUE
2832 		adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2833 		if (adapter->num_queues > 1)
2834 			em_enable_vectors_82574(adapter);
2835 #endif
2836 		/* Map the MSIX BAR */
2837 		int rid = PCIR_BAR(EM_MSIX_BAR);
2838 		adapter->msix_mem = bus_alloc_resource_any(dev,
2839 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2840        		if (adapter->msix_mem == NULL) {
2841 			/* May not be enabled */
2842                		device_printf(adapter->dev,
2843 			    "Unable to map MSIX table \n");
2844 			goto msi;
2845        		}
2846 		val = pci_msix_count(dev);
2847 
2848 #ifdef EM_MULTIQUEUE
2849 		/* We need 5 vectors in the multiqueue case */
2850 		if (adapter->num_queues > 1 ) {
2851 			if (val >= 5)
2852 				val = 5;
2853 			else {
2854 				adapter->num_queues = 1;
2855 				device_printf(adapter->dev,
2856 				    "Insufficient MSIX vectors for >1 queue, "
2857 				    "using single queue...\n");
2858 				goto msix_one;
2859 			}
2860 		} else {
2861 msix_one:
2862 #endif
2863 			if (val >= 3)
2864 				val = 3;
2865 			else {
2866 				device_printf(adapter->dev,
2867 			    	"Insufficient MSIX vectors, using MSI\n");
2868 				goto msi;
2869 			}
2870 #ifdef EM_MULTIQUEUE
2871 		}
2872 #endif
2873 
2874 		if ((pci_alloc_msix(dev, &val) == 0)) {
2875 			device_printf(adapter->dev,
2876 			    "Using MSIX interrupts "
2877 			    "with %d vectors\n", val);
2878 			return (val);
2879 		}
2880 
2881 		/*
2882 		** If MSIX alloc failed or provided us with
2883 		** less than needed, free and fall through to MSI
2884 		*/
2885 		pci_release_msi(dev);
2886 	}
2887 msi:
2888 	if (adapter->msix_mem != NULL) {
2889 		bus_release_resource(dev, SYS_RES_MEMORY,
2890 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2891 		adapter->msix_mem = NULL;
2892 	}
2893        	val = 1;
2894        	if (pci_alloc_msi(dev, &val) == 0) {
2895                	device_printf(adapter->dev, "Using an MSI interrupt\n");
2896 		return (val);
2897 	}
2898 	/* Should only happen due to manual configuration */
2899 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2900 	return (0);
2901 }
2902 
2903 
2904 /*
2905 ** The 3 following flush routines are used as a workaround in the
2906 ** I219 client parts and only for them.
2907 **
2908 ** em_flush_tx_ring - remove all descriptors from the tx_ring
2909 **
2910 ** We want to clear all pending descriptors from the TX ring.
2911 ** zeroing happens when the HW reads the regs. We  assign the ring itself as
2912 ** the data of the next descriptor. We don't care about the data we are about
2913 ** to reset the HW.
2914 */
2915 static void
2916 em_flush_tx_ring(struct adapter *adapter)
2917 {
2918 	struct e1000_hw		*hw = &adapter->hw;
2919 	struct tx_ring		*txr = adapter->tx_rings;
2920 	struct e1000_tx_desc	*txd;
2921 	u32			tctl, txd_lower = E1000_TXD_CMD_IFCS;
2922 	u16			size = 512;
2923 
2924 	tctl = E1000_READ_REG(hw, E1000_TCTL);
2925 	E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
2926 
2927 	txd = &txr->tx_base[txr->next_avail_desc++];
2928 	if (txr->next_avail_desc == adapter->num_tx_desc)
2929 		txr->next_avail_desc = 0;
2930 
2931 	/* Just use the ring as a dummy buffer addr */
2932 	txd->buffer_addr = txr->txdma.dma_paddr;
2933 	txd->lower.data = htole32(txd_lower | size);
2934 	txd->upper.data = 0;
2935 
2936 	/* flush descriptors to memory before notifying the HW */
2937 	wmb();
2938 
2939 	E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc);
2940 	mb();
2941 	usec_delay(250);
2942 }
2943 
2944 /*
2945 ** em_flush_rx_ring - remove all descriptors from the rx_ring
2946 **
2947 ** Mark all descriptors in the RX ring as consumed and disable the rx ring
2948 */
2949 static void
2950 em_flush_rx_ring(struct adapter *adapter)
2951 {
2952 	struct e1000_hw	*hw = &adapter->hw;
2953 	u32		rctl, rxdctl;
2954 
2955 	rctl = E1000_READ_REG(hw, E1000_RCTL);
2956 	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2957 	E1000_WRITE_FLUSH(hw);
2958 	usec_delay(150);
2959 
2960 	rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
2961 	/* zero the lower 14 bits (prefetch and host thresholds) */
2962 	rxdctl &= 0xffffc000;
2963 	/*
2964 	 * update thresholds: prefetch threshold to 31, host threshold to 1
2965 	 * and make sure the granularity is "descriptors" and not "cache lines"
2966 	 */
2967 	rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
2968 	E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl);
2969 
2970 	/* momentarily enable the RX ring for the changes to take effect */
2971 	E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
2972 	E1000_WRITE_FLUSH(hw);
2973 	usec_delay(150);
2974 	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2975 }
2976 
2977 /*
2978 ** em_flush_desc_rings - remove all descriptors from the descriptor rings
2979 **
2980 ** In i219, the descriptor rings must be emptied before resetting the HW
2981 ** or before changing the device state to D3 during runtime (runtime PM).
2982 **
2983 ** Failure to do this will cause the HW to enter a unit hang state which can
2984 ** only be released by PCI reset on the device
2985 **
2986 */
2987 static void
2988 em_flush_desc_rings(struct adapter *adapter)
2989 {
2990 	struct e1000_hw	*hw = &adapter->hw;
2991 	device_t	dev = adapter->dev;
2992 	u16		hang_state;
2993 	u32		fext_nvm11, tdlen;
2994 
2995 	/* First, disable MULR fix in FEXTNVM11 */
2996 	fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
2997 	fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
2998 	E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11);
2999 
3000 	/* do nothing if we're not in faulty state, or if the queue is empty */
3001 	tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
3002 	hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3003 	if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
3004 		return;
3005 	em_flush_tx_ring(adapter);
3006 
3007 	/* recheck, maybe the fault is caused by the rx ring */
3008 	hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3009 	if (hang_state & FLUSH_DESC_REQUIRED)
3010 		em_flush_rx_ring(adapter);
3011 }
3012 
3013 
3014 /*********************************************************************
3015  *
3016  *  Initialize the hardware to a configuration
3017  *  as specified by the adapter structure.
3018  *
3019  **********************************************************************/
3020 static void
3021 em_reset(struct adapter *adapter)
3022 {
3023 	device_t	dev = adapter->dev;
3024 	if_t ifp = adapter->ifp;
3025 	struct e1000_hw	*hw = &adapter->hw;
3026 	u16		rx_buffer_size;
3027 	u32		pba;
3028 
3029 	INIT_DEBUGOUT("em_reset: begin");
3030 
3031 	/* Set up smart power down as default off on newer adapters. */
3032 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
3033 	    hw->mac.type == e1000_82572)) {
3034 		u16 phy_tmp = 0;
3035 
3036 		/* Speed up time to link by disabling smart power down. */
3037 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
3038 		phy_tmp &= ~IGP02E1000_PM_SPD;
3039 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
3040 	}
3041 
3042 	/*
3043 	 * Packet Buffer Allocation (PBA)
3044 	 * Writing PBA sets the receive portion of the buffer
3045 	 * the remainder is used for the transmit buffer.
3046 	 */
3047 	switch (hw->mac.type) {
3048 	/* Total Packet Buffer on these is 48K */
3049 	case e1000_82571:
3050 	case e1000_82572:
3051 	case e1000_80003es2lan:
3052 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
3053 		break;
3054 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
3055 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
3056 		break;
3057 	case e1000_82574:
3058 	case e1000_82583:
3059 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
3060 		break;
3061 	case e1000_ich8lan:
3062 		pba = E1000_PBA_8K;
3063 		break;
3064 	case e1000_ich9lan:
3065 	case e1000_ich10lan:
3066 		/* Boost Receive side for jumbo frames */
3067 		if (adapter->hw.mac.max_frame_size > 4096)
3068 			pba = E1000_PBA_14K;
3069 		else
3070 			pba = E1000_PBA_10K;
3071 		break;
3072 	case e1000_pchlan:
3073 	case e1000_pch2lan:
3074 	case e1000_pch_lpt:
3075 	case e1000_pch_spt:
3076 		pba = E1000_PBA_26K;
3077 		break;
3078 	default:
3079 		if (adapter->hw.mac.max_frame_size > 8192)
3080 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
3081 		else
3082 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
3083 	}
3084 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
3085 
3086 	/*
3087 	 * These parameters control the automatic generation (Tx) and
3088 	 * response (Rx) to Ethernet PAUSE frames.
3089 	 * - High water mark should allow for at least two frames to be
3090 	 *   received after sending an XOFF.
3091 	 * - Low water mark works best when it is very near the high water mark.
3092 	 *   This allows the receiver to restart by sending XON when it has
3093 	 *   drained a bit. Here we use an arbitrary value of 1500 which will
3094 	 *   restart after one full frame is pulled from the buffer. There
3095 	 *   could be several smaller frames in the buffer and if so they will
3096 	 *   not trigger the XON until their total number reduces the buffer
3097 	 *   by 1500.
3098 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
3099 	 */
3100 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
3101 	hw->fc.high_water = rx_buffer_size -
3102 	    roundup2(adapter->hw.mac.max_frame_size, 1024);
3103 	hw->fc.low_water = hw->fc.high_water - 1500;
3104 
3105 	if (adapter->fc) /* locally set flow control value? */
3106 		hw->fc.requested_mode = adapter->fc;
3107 	else
3108 		hw->fc.requested_mode = e1000_fc_full;
3109 
3110 	if (hw->mac.type == e1000_80003es2lan)
3111 		hw->fc.pause_time = 0xFFFF;
3112 	else
3113 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
3114 
3115 	hw->fc.send_xon = TRUE;
3116 
3117 	/* Device specific overrides/settings */
3118 	switch (hw->mac.type) {
3119 	case e1000_pchlan:
3120 		/* Workaround: no TX flow ctrl for PCH */
3121                 hw->fc.requested_mode = e1000_fc_rx_pause;
3122 		hw->fc.pause_time = 0xFFFF; /* override */
3123 		if (if_getmtu(ifp) > ETHERMTU) {
3124 			hw->fc.high_water = 0x3500;
3125 			hw->fc.low_water = 0x1500;
3126 		} else {
3127 			hw->fc.high_water = 0x5000;
3128 			hw->fc.low_water = 0x3000;
3129 		}
3130 		hw->fc.refresh_time = 0x1000;
3131 		break;
3132 	case e1000_pch2lan:
3133 	case e1000_pch_lpt:
3134 	case e1000_pch_spt:
3135 		hw->fc.high_water = 0x5C20;
3136 		hw->fc.low_water = 0x5048;
3137 		hw->fc.pause_time = 0x0650;
3138 		hw->fc.refresh_time = 0x0400;
3139 		/* Jumbos need adjusted PBA */
3140 		if (if_getmtu(ifp) > ETHERMTU)
3141 			E1000_WRITE_REG(hw, E1000_PBA, 12);
3142 		else
3143 			E1000_WRITE_REG(hw, E1000_PBA, 26);
3144 		break;
3145         case e1000_ich9lan:
3146         case e1000_ich10lan:
3147 		if (if_getmtu(ifp) > ETHERMTU) {
3148 			hw->fc.high_water = 0x2800;
3149 			hw->fc.low_water = hw->fc.high_water - 8;
3150 			break;
3151 		}
3152 		/* else fall thru */
3153 	default:
3154 		if (hw->mac.type == e1000_80003es2lan)
3155 			hw->fc.pause_time = 0xFFFF;
3156 		break;
3157 	}
3158 
3159 	/* I219 needs some special flushing to avoid hangs */
3160 	if (hw->mac.type == e1000_pch_spt)
3161 		em_flush_desc_rings(adapter);
3162 
3163 	/* Issue a global reset */
3164 	e1000_reset_hw(hw);
3165 	E1000_WRITE_REG(hw, E1000_WUC, 0);
3166 	em_disable_aspm(adapter);
3167 	/* and a re-init */
3168 	if (e1000_init_hw(hw) < 0) {
3169 		device_printf(dev, "Hardware Initialization Failed\n");
3170 		return;
3171 	}
3172 
3173 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3174 	e1000_get_phy_info(hw);
3175 	e1000_check_for_link(hw);
3176 	return;
3177 }
3178 
3179 /*********************************************************************
3180  *
3181  *  Setup networking device structure and register an interface.
3182  *
3183  **********************************************************************/
3184 static int
3185 em_setup_interface(device_t dev, struct adapter *adapter)
3186 {
3187 	if_t ifp;
3188 
3189 	INIT_DEBUGOUT("em_setup_interface: begin");
3190 
3191 	ifp = adapter->ifp = if_gethandle(IFT_ETHER);
3192 	if (ifp == 0) {
3193 		device_printf(dev, "can not allocate ifnet structure\n");
3194 		return (-1);
3195 	}
3196 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3197 	if_setdev(ifp, dev);
3198 	if_setinitfn(ifp, em_init);
3199 	if_setsoftc(ifp, adapter);
3200 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
3201 	if_setioctlfn(ifp, em_ioctl);
3202 	if_setgetcounterfn(ifp, em_get_counter);
3203 
3204 	/* TSO parameters */
3205 	ifp->if_hw_tsomax = IP_MAXPACKET;
3206 	/* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */
3207 	ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5;
3208 	ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
3209 
3210 #ifdef EM_MULTIQUEUE
3211 	/* Multiqueue stack interface */
3212 	if_settransmitfn(ifp, em_mq_start);
3213 	if_setqflushfn(ifp, em_qflush);
3214 #else
3215 	if_setstartfn(ifp, em_start);
3216 	if_setsendqlen(ifp, adapter->num_tx_desc - 1);
3217 	if_setsendqready(ifp);
3218 #endif
3219 
3220 	ether_ifattach(ifp, adapter->hw.mac.addr);
3221 
3222 	if_setcapabilities(ifp, 0);
3223 	if_setcapenable(ifp, 0);
3224 
3225 
3226 	if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM |
3227 	    IFCAP_TSO4, 0);
3228 	/*
3229 	 * Tell the upper layer(s) we
3230 	 * support full VLAN capability
3231 	 */
3232 	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
3233 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
3234 	    IFCAP_VLAN_MTU, 0);
3235 	if_setcapenable(ifp, if_getcapabilities(ifp));
3236 
3237 	/*
3238 	** Don't turn this on by default, if vlans are
3239 	** created on another pseudo device (eg. lagg)
3240 	** then vlan events are not passed thru, breaking
3241 	** operation, but with HW FILTER off it works. If
3242 	** using vlans directly on the em driver you can
3243 	** enable this and get full hardware tag filtering.
3244 	*/
3245 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
3246 
3247 #ifdef DEVICE_POLLING
3248 	if_setcapabilitiesbit(ifp, IFCAP_POLLING,0);
3249 #endif
3250 
3251 	/* Enable only WOL MAGIC by default */
3252 	if (adapter->wol) {
3253 		if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
3254 		if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
3255 	}
3256 
3257 	/*
3258 	 * Specify the media types supported by this adapter and register
3259 	 * callbacks to update media and link information
3260 	 */
3261 	ifmedia_init(&adapter->media, IFM_IMASK,
3262 	    em_media_change, em_media_status);
3263 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3264 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3265 		u_char fiber_type = IFM_1000_SX;	/* default type */
3266 
3267 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3268 			    0, NULL);
3269 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3270 	} else {
3271 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3272 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3273 			    0, NULL);
3274 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3275 			    0, NULL);
3276 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3277 			    0, NULL);
3278 		if (adapter->hw.phy.type != e1000_phy_ife) {
3279 			ifmedia_add(&adapter->media,
3280 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3281 			ifmedia_add(&adapter->media,
3282 				IFM_ETHER | IFM_1000_T, 0, NULL);
3283 		}
3284 	}
3285 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3286 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3287 	return (0);
3288 }
3289 
3290 
3291 /*
3292  * Manage DMA'able memory.
3293  */
3294 static void
3295 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3296 {
3297 	if (error)
3298 		return;
3299 	*(bus_addr_t *) arg = segs[0].ds_addr;
3300 }
3301 
3302 static int
3303 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3304         struct em_dma_alloc *dma, int mapflags)
3305 {
3306 	int error;
3307 
3308 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3309 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3310 				BUS_SPACE_MAXADDR,	/* lowaddr */
3311 				BUS_SPACE_MAXADDR,	/* highaddr */
3312 				NULL, NULL,		/* filter, filterarg */
3313 				size,			/* maxsize */
3314 				1,			/* nsegments */
3315 				size,			/* maxsegsize */
3316 				0,			/* flags */
3317 				NULL,			/* lockfunc */
3318 				NULL,			/* lockarg */
3319 				&dma->dma_tag);
3320 	if (error) {
3321 		device_printf(adapter->dev,
3322 		    "%s: bus_dma_tag_create failed: %d\n",
3323 		    __func__, error);
3324 		goto fail_0;
3325 	}
3326 
3327 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3328 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3329 	if (error) {
3330 		device_printf(adapter->dev,
3331 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3332 		    __func__, (uintmax_t)size, error);
3333 		goto fail_2;
3334 	}
3335 
3336 	dma->dma_paddr = 0;
3337 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3338 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3339 	if (error || dma->dma_paddr == 0) {
3340 		device_printf(adapter->dev,
3341 		    "%s: bus_dmamap_load failed: %d\n",
3342 		    __func__, error);
3343 		goto fail_3;
3344 	}
3345 
3346 	return (0);
3347 
3348 fail_3:
3349 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3350 fail_2:
3351 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3352 	bus_dma_tag_destroy(dma->dma_tag);
3353 fail_0:
3354 	dma->dma_tag = NULL;
3355 
3356 	return (error);
3357 }
3358 
3359 static void
3360 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3361 {
3362 	if (dma->dma_tag == NULL)
3363 		return;
3364 	if (dma->dma_paddr != 0) {
3365 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3366 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3367 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3368 		dma->dma_paddr = 0;
3369 	}
3370 	if (dma->dma_vaddr != NULL) {
3371 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3372 		dma->dma_vaddr = NULL;
3373 	}
3374 	bus_dma_tag_destroy(dma->dma_tag);
3375 	dma->dma_tag = NULL;
3376 }
3377 
3378 
3379 /*********************************************************************
3380  *
3381  *  Allocate memory for the transmit and receive rings, and then
3382  *  the descriptors associated with each, called only once at attach.
3383  *
3384  **********************************************************************/
3385 static int
3386 em_allocate_queues(struct adapter *adapter)
3387 {
3388 	device_t		dev = adapter->dev;
3389 	struct tx_ring		*txr = NULL;
3390 	struct rx_ring		*rxr = NULL;
3391 	int rsize, tsize, error = E1000_SUCCESS;
3392 	int txconf = 0, rxconf = 0;
3393 
3394 
3395 	/* Allocate the TX ring struct memory */
3396 	if (!(adapter->tx_rings =
3397 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3398 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3399 		device_printf(dev, "Unable to allocate TX ring memory\n");
3400 		error = ENOMEM;
3401 		goto fail;
3402 	}
3403 
3404 	/* Now allocate the RX */
3405 	if (!(adapter->rx_rings =
3406 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3407 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3408 		device_printf(dev, "Unable to allocate RX ring memory\n");
3409 		error = ENOMEM;
3410 		goto rx_fail;
3411 	}
3412 
3413 	tsize = roundup2(adapter->num_tx_desc *
3414 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3415 	/*
3416 	 * Now set up the TX queues, txconf is needed to handle the
3417 	 * possibility that things fail midcourse and we need to
3418 	 * undo memory gracefully
3419 	 */
3420 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3421 		/* Set up some basics */
3422 		txr = &adapter->tx_rings[i];
3423 		txr->adapter = adapter;
3424 		txr->me = i;
3425 
3426 		/* Initialize the TX lock */
3427 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3428 		    device_get_nameunit(dev), txr->me);
3429 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3430 
3431 		if (em_dma_malloc(adapter, tsize,
3432 			&txr->txdma, BUS_DMA_NOWAIT)) {
3433 			device_printf(dev,
3434 			    "Unable to allocate TX Descriptor memory\n");
3435 			error = ENOMEM;
3436 			goto err_tx_desc;
3437 		}
3438 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3439 		bzero((void *)txr->tx_base, tsize);
3440 
3441         	if (em_allocate_transmit_buffers(txr)) {
3442 			device_printf(dev,
3443 			    "Critical Failure setting up transmit buffers\n");
3444 			error = ENOMEM;
3445 			goto err_tx_desc;
3446         	}
3447 #if __FreeBSD_version >= 800000
3448 		/* Allocate a buf ring */
3449 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3450 		    M_WAITOK, &txr->tx_mtx);
3451 #endif
3452 	}
3453 
3454 	/*
3455 	 * Next the RX queues...
3456 	 */
3457 	rsize = roundup2(adapter->num_rx_desc *
3458 	    sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
3459 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3460 		rxr = &adapter->rx_rings[i];
3461 		rxr->adapter = adapter;
3462 		rxr->me = i;
3463 
3464 		/* Initialize the RX lock */
3465 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3466 		    device_get_nameunit(dev), txr->me);
3467 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3468 
3469 		if (em_dma_malloc(adapter, rsize,
3470 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3471 			device_printf(dev,
3472 			    "Unable to allocate RxDescriptor memory\n");
3473 			error = ENOMEM;
3474 			goto err_rx_desc;
3475 		}
3476 		rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr;
3477 		bzero((void *)rxr->rx_base, rsize);
3478 
3479         	/* Allocate receive buffers for the ring*/
3480 		if (em_allocate_receive_buffers(rxr)) {
3481 			device_printf(dev,
3482 			    "Critical Failure setting up receive buffers\n");
3483 			error = ENOMEM;
3484 			goto err_rx_desc;
3485 		}
3486 	}
3487 
3488 	return (0);
3489 
3490 err_rx_desc:
3491 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3492 		em_dma_free(adapter, &rxr->rxdma);
3493 err_tx_desc:
3494 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3495 		em_dma_free(adapter, &txr->txdma);
3496 	free(adapter->rx_rings, M_DEVBUF);
3497 rx_fail:
3498 #if __FreeBSD_version >= 800000
3499 	buf_ring_free(txr->br, M_DEVBUF);
3500 #endif
3501 	free(adapter->tx_rings, M_DEVBUF);
3502 fail:
3503 	return (error);
3504 }
3505 
3506 
3507 /*********************************************************************
3508  *
3509  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3510  *  the information needed to transmit a packet on the wire. This is
3511  *  called only once at attach, setup is done every reset.
3512  *
3513  **********************************************************************/
3514 static int
3515 em_allocate_transmit_buffers(struct tx_ring *txr)
3516 {
3517 	struct adapter *adapter = txr->adapter;
3518 	device_t dev = adapter->dev;
3519 	struct em_txbuffer *txbuf;
3520 	int error, i;
3521 
3522 	/*
3523 	 * Setup DMA descriptor areas.
3524 	 */
3525 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3526 			       1, 0,			/* alignment, bounds */
3527 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3528 			       BUS_SPACE_MAXADDR,	/* highaddr */
3529 			       NULL, NULL,		/* filter, filterarg */
3530 			       EM_TSO_SIZE,		/* maxsize */
3531 			       EM_MAX_SCATTER,		/* nsegments */
3532 			       PAGE_SIZE,		/* maxsegsize */
3533 			       0,			/* flags */
3534 			       NULL,			/* lockfunc */
3535 			       NULL,			/* lockfuncarg */
3536 			       &txr->txtag))) {
3537 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3538 		goto fail;
3539 	}
3540 
3541 	if (!(txr->tx_buffers =
3542 	    (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) *
3543 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3544 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3545 		error = ENOMEM;
3546 		goto fail;
3547 	}
3548 
3549         /* Create the descriptor buffer dma maps */
3550 	txbuf = txr->tx_buffers;
3551 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3552 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3553 		if (error != 0) {
3554 			device_printf(dev, "Unable to create TX DMA map\n");
3555 			goto fail;
3556 		}
3557 	}
3558 
3559 	return 0;
3560 fail:
3561 	/* We free all, it handles case where we are in the middle */
3562 	em_free_transmit_structures(adapter);
3563 	return (error);
3564 }
3565 
3566 /*********************************************************************
3567  *
3568  *  Initialize a transmit ring.
3569  *
3570  **********************************************************************/
3571 static void
3572 em_setup_transmit_ring(struct tx_ring *txr)
3573 {
3574 	struct adapter *adapter = txr->adapter;
3575 	struct em_txbuffer *txbuf;
3576 	int i;
3577 #ifdef DEV_NETMAP
3578 	struct netmap_slot *slot;
3579 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
3580 #endif /* DEV_NETMAP */
3581 
3582 	/* Clear the old descriptor contents */
3583 	EM_TX_LOCK(txr);
3584 #ifdef DEV_NETMAP
3585 	slot = netmap_reset(na, NR_TX, txr->me, 0);
3586 #endif /* DEV_NETMAP */
3587 
3588 	bzero((void *)txr->tx_base,
3589 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3590 	/* Reset indices */
3591 	txr->next_avail_desc = 0;
3592 	txr->next_to_clean = 0;
3593 
3594 	/* Free any existing tx buffers. */
3595         txbuf = txr->tx_buffers;
3596 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3597 		if (txbuf->m_head != NULL) {
3598 			bus_dmamap_sync(txr->txtag, txbuf->map,
3599 			    BUS_DMASYNC_POSTWRITE);
3600 			bus_dmamap_unload(txr->txtag, txbuf->map);
3601 			m_freem(txbuf->m_head);
3602 			txbuf->m_head = NULL;
3603 		}
3604 #ifdef DEV_NETMAP
3605 		if (slot) {
3606 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3607 			uint64_t paddr;
3608 			void *addr;
3609 
3610 			addr = PNMB(na, slot + si, &paddr);
3611 			txr->tx_base[i].buffer_addr = htole64(paddr);
3612 			/* reload the map for netmap mode */
3613 			netmap_load_map(na, txr->txtag, txbuf->map, addr);
3614 		}
3615 #endif /* DEV_NETMAP */
3616 
3617 		/* clear the watch index */
3618 		txbuf->next_eop = -1;
3619         }
3620 
3621 	/* Set number of descriptors available */
3622 	txr->tx_avail = adapter->num_tx_desc;
3623 	txr->busy = EM_TX_IDLE;
3624 
3625 	/* Clear checksum offload context. */
3626 	txr->last_hw_offload = 0;
3627 	txr->last_hw_ipcss = 0;
3628 	txr->last_hw_ipcso = 0;
3629 	txr->last_hw_tucss = 0;
3630 	txr->last_hw_tucso = 0;
3631 
3632 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3633 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3634 	EM_TX_UNLOCK(txr);
3635 }
3636 
3637 /*********************************************************************
3638  *
3639  *  Initialize all transmit rings.
3640  *
3641  **********************************************************************/
3642 static void
3643 em_setup_transmit_structures(struct adapter *adapter)
3644 {
3645 	struct tx_ring *txr = adapter->tx_rings;
3646 
3647 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3648 		em_setup_transmit_ring(txr);
3649 
3650 	return;
3651 }
3652 
3653 /*********************************************************************
3654  *
3655  *  Enable transmit unit.
3656  *
3657  **********************************************************************/
3658 static void
3659 em_initialize_transmit_unit(struct adapter *adapter)
3660 {
3661 	struct tx_ring	*txr = adapter->tx_rings;
3662 	struct e1000_hw	*hw = &adapter->hw;
3663 	u32	tctl, txdctl = 0, tarc, tipg = 0;
3664 
3665 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3666 
3667 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3668 		u64 bus_addr = txr->txdma.dma_paddr;
3669 		/* Base and Len of TX Ring */
3670 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3671 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3672 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3673 	    	    (u32)(bus_addr >> 32));
3674 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3675 	    	    (u32)bus_addr);
3676 		/* Init the HEAD/TAIL indices */
3677 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3678 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3679 
3680 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3681 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3682 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3683 
3684 		txr->busy = EM_TX_IDLE;
3685 		txdctl = 0; /* clear txdctl */
3686                 txdctl |= 0x1f; /* PTHRESH */
3687                 txdctl |= 1 << 8; /* HTHRESH */
3688                 txdctl |= 1 << 16;/* WTHRESH */
3689 		txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3690 		txdctl |= E1000_TXDCTL_GRAN;
3691                 txdctl |= 1 << 25; /* LWTHRESH */
3692 
3693                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3694 	}
3695 
3696 	/* Set the default values for the Tx Inter Packet Gap timer */
3697 	switch (adapter->hw.mac.type) {
3698 	case e1000_80003es2lan:
3699 		tipg = DEFAULT_82543_TIPG_IPGR1;
3700 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3701 		    E1000_TIPG_IPGR2_SHIFT;
3702 		break;
3703 	default:
3704 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3705 		    (adapter->hw.phy.media_type ==
3706 		    e1000_media_type_internal_serdes))
3707 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3708 		else
3709 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3710 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3711 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3712 	}
3713 
3714 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3715 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3716 
3717 	if(adapter->hw.mac.type >= e1000_82540)
3718 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3719 		    adapter->tx_abs_int_delay.value);
3720 
3721 	if ((adapter->hw.mac.type == e1000_82571) ||
3722 	    (adapter->hw.mac.type == e1000_82572)) {
3723 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3724 		tarc |= TARC_SPEED_MODE_BIT;
3725 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3726 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3727 		/* errata: program both queues to unweighted RR */
3728 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3729 		tarc |= 1;
3730 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3731 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3732 		tarc |= 1;
3733 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3734 	} else if (adapter->hw.mac.type == e1000_82574) {
3735 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3736 		tarc |= TARC_ERRATA_BIT;
3737 		if ( adapter->num_queues > 1) {
3738 			tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3739 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3740 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3741 		} else
3742 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3743 	}
3744 
3745 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3746 	if (adapter->tx_int_delay.value > 0)
3747 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3748 
3749 	/* Program the Transmit Control Register */
3750 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3751 	tctl &= ~E1000_TCTL_CT;
3752 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3753 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3754 
3755 	if (adapter->hw.mac.type >= e1000_82571)
3756 		tctl |= E1000_TCTL_MULR;
3757 
3758 	/* This write will effectively turn on the transmit unit. */
3759 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3760 
3761 	if (hw->mac.type == e1000_pch_spt) {
3762 		u32 reg;
3763 		reg = E1000_READ_REG(hw, E1000_IOSFPC);
3764 		reg |= E1000_RCTL_RDMTS_HEX;
3765 		E1000_WRITE_REG(hw, E1000_IOSFPC, reg);
3766 		reg = E1000_READ_REG(hw, E1000_TARC(0));
3767 		reg |= E1000_TARC0_CB_MULTIQ_3_REQ;
3768 		E1000_WRITE_REG(hw, E1000_TARC(0), reg);
3769 	}
3770 }
3771 
3772 
3773 /*********************************************************************
3774  *
3775  *  Free all transmit rings.
3776  *
3777  **********************************************************************/
3778 static void
3779 em_free_transmit_structures(struct adapter *adapter)
3780 {
3781 	struct tx_ring *txr = adapter->tx_rings;
3782 
3783 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3784 		EM_TX_LOCK(txr);
3785 		em_free_transmit_buffers(txr);
3786 		em_dma_free(adapter, &txr->txdma);
3787 		EM_TX_UNLOCK(txr);
3788 		EM_TX_LOCK_DESTROY(txr);
3789 	}
3790 
3791 	free(adapter->tx_rings, M_DEVBUF);
3792 }
3793 
3794 /*********************************************************************
3795  *
3796  *  Free transmit ring related data structures.
3797  *
3798  **********************************************************************/
3799 static void
3800 em_free_transmit_buffers(struct tx_ring *txr)
3801 {
3802 	struct adapter		*adapter = txr->adapter;
3803 	struct em_txbuffer	*txbuf;
3804 
3805 	INIT_DEBUGOUT("free_transmit_ring: begin");
3806 
3807 	if (txr->tx_buffers == NULL)
3808 		return;
3809 
3810 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3811 		txbuf = &txr->tx_buffers[i];
3812 		if (txbuf->m_head != NULL) {
3813 			bus_dmamap_sync(txr->txtag, txbuf->map,
3814 			    BUS_DMASYNC_POSTWRITE);
3815 			bus_dmamap_unload(txr->txtag,
3816 			    txbuf->map);
3817 			m_freem(txbuf->m_head);
3818 			txbuf->m_head = NULL;
3819 			if (txbuf->map != NULL) {
3820 				bus_dmamap_destroy(txr->txtag,
3821 				    txbuf->map);
3822 				txbuf->map = NULL;
3823 			}
3824 		} else if (txbuf->map != NULL) {
3825 			bus_dmamap_unload(txr->txtag,
3826 			    txbuf->map);
3827 			bus_dmamap_destroy(txr->txtag,
3828 			    txbuf->map);
3829 			txbuf->map = NULL;
3830 		}
3831 	}
3832 #if __FreeBSD_version >= 800000
3833 	if (txr->br != NULL)
3834 		buf_ring_free(txr->br, M_DEVBUF);
3835 #endif
3836 	if (txr->tx_buffers != NULL) {
3837 		free(txr->tx_buffers, M_DEVBUF);
3838 		txr->tx_buffers = NULL;
3839 	}
3840 	if (txr->txtag != NULL) {
3841 		bus_dma_tag_destroy(txr->txtag);
3842 		txr->txtag = NULL;
3843 	}
3844 	return;
3845 }
3846 
3847 
3848 /*********************************************************************
3849  *  The offload context is protocol specific (TCP/UDP) and thus
3850  *  only needs to be set when the protocol changes. The occasion
3851  *  of a context change can be a performance detriment, and
3852  *  might be better just disabled. The reason arises in the way
3853  *  in which the controller supports pipelined requests from the
3854  *  Tx data DMA. Up to four requests can be pipelined, and they may
3855  *  belong to the same packet or to multiple packets. However all
3856  *  requests for one packet are issued before a request is issued
3857  *  for a subsequent packet and if a request for the next packet
3858  *  requires a context change, that request will be stalled
3859  *  until the previous request completes. This means setting up
3860  *  a new context effectively disables pipelined Tx data DMA which
3861  *  in turn greatly slow down performance to send small sized
3862  *  frames.
3863  **********************************************************************/
3864 static void
3865 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3866     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3867 {
3868 	struct adapter			*adapter = txr->adapter;
3869 	struct e1000_context_desc	*TXD = NULL;
3870 	struct em_txbuffer		*tx_buffer;
3871 	int				cur, hdr_len;
3872 	u32				cmd = 0;
3873 	u16				offload = 0;
3874 	u8				ipcso, ipcss, tucso, tucss;
3875 
3876 	ipcss = ipcso = tucss = tucso = 0;
3877 	hdr_len = ip_off + (ip->ip_hl << 2);
3878 	cur = txr->next_avail_desc;
3879 
3880 	/* Setup of IP header checksum. */
3881 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3882 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3883 		offload |= CSUM_IP;
3884 		ipcss = ip_off;
3885 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3886 		/*
3887 		 * Start offset for header checksum calculation.
3888 		 * End offset for header checksum calculation.
3889 		 * Offset of place to put the checksum.
3890 		 */
3891 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3892 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3893 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3894 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3895 		cmd |= E1000_TXD_CMD_IP;
3896 	}
3897 
3898 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3899  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3900  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3901  		offload |= CSUM_TCP;
3902  		tucss = hdr_len;
3903  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3904 		/*
3905 		 * The 82574L can only remember the *last* context used
3906 		 * regardless of queue that it was use for.  We cannot reuse
3907 		 * contexts on this hardware platform and must generate a new
3908 		 * context every time.  82574L hardware spec, section 7.2.6,
3909 		 * second note.
3910 		 */
3911 		if (adapter->num_queues < 2) {
3912  			/*
3913  		 	* Setting up new checksum offload context for every
3914 			* frames takes a lot of processing time for hardware.
3915 			* This also reduces performance a lot for small sized
3916 			* frames so avoid it if driver can use previously
3917 			* configured checksum offload context.
3918  		 	*/
3919  			if (txr->last_hw_offload == offload) {
3920  				if (offload & CSUM_IP) {
3921  					if (txr->last_hw_ipcss == ipcss &&
3922  				    	txr->last_hw_ipcso == ipcso &&
3923  				    	txr->last_hw_tucss == tucss &&
3924  				    	txr->last_hw_tucso == tucso)
3925  						return;
3926  				} else {
3927  					if (txr->last_hw_tucss == tucss &&
3928  				    	txr->last_hw_tucso == tucso)
3929  						return;
3930  				}
3931   			}
3932  			txr->last_hw_offload = offload;
3933  			txr->last_hw_tucss = tucss;
3934  			txr->last_hw_tucso = tucso;
3935 		}
3936  		/*
3937  		 * Start offset for payload checksum calculation.
3938  		 * End offset for payload checksum calculation.
3939  		 * Offset of place to put the checksum.
3940  		 */
3941 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3942  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3943  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3944  		TXD->upper_setup.tcp_fields.tucso = tucso;
3945  		cmd |= E1000_TXD_CMD_TCP;
3946  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3947  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3948  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3949  		tucss = hdr_len;
3950  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3951 		/*
3952 		 * The 82574L can only remember the *last* context used
3953 		 * regardless of queue that it was use for.  We cannot reuse
3954 		 * contexts on this hardware platform and must generate a new
3955 		 * context every time.  82574L hardware spec, section 7.2.6,
3956 		 * second note.
3957 		 */
3958 		if (adapter->num_queues < 2) {
3959  			/*
3960  		 	* Setting up new checksum offload context for every
3961 			* frames takes a lot of processing time for hardware.
3962 			* This also reduces performance a lot for small sized
3963 			* frames so avoid it if driver can use previously
3964 			* configured checksum offload context.
3965  		 	*/
3966  			if (txr->last_hw_offload == offload) {
3967  				if (offload & CSUM_IP) {
3968  					if (txr->last_hw_ipcss == ipcss &&
3969  				    	txr->last_hw_ipcso == ipcso &&
3970  				    	txr->last_hw_tucss == tucss &&
3971  				    	txr->last_hw_tucso == tucso)
3972  						return;
3973  				} else {
3974  					if (txr->last_hw_tucss == tucss &&
3975  				    	txr->last_hw_tucso == tucso)
3976  						return;
3977  				}
3978  			}
3979  			txr->last_hw_offload = offload;
3980  			txr->last_hw_tucss = tucss;
3981  			txr->last_hw_tucso = tucso;
3982 		}
3983  		/*
3984  		 * Start offset for header checksum calculation.
3985  		 * End offset for header checksum calculation.
3986  		 * Offset of place to put the checksum.
3987  		 */
3988 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3989  		TXD->upper_setup.tcp_fields.tucss = tucss;
3990  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3991  		TXD->upper_setup.tcp_fields.tucso = tucso;
3992   	}
3993 
3994  	if (offload & CSUM_IP) {
3995  		txr->last_hw_ipcss = ipcss;
3996  		txr->last_hw_ipcso = ipcso;
3997   	}
3998 
3999 	TXD->tcp_seg_setup.data = htole32(0);
4000 	TXD->cmd_and_length =
4001 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
4002 	tx_buffer = &txr->tx_buffers[cur];
4003 	tx_buffer->m_head = NULL;
4004 	tx_buffer->next_eop = -1;
4005 
4006 	if (++cur == adapter->num_tx_desc)
4007 		cur = 0;
4008 
4009 	txr->tx_avail--;
4010 	txr->next_avail_desc = cur;
4011 }
4012 
4013 
4014 /**********************************************************************
4015  *
4016  *  Setup work for hardware segmentation offload (TSO)
4017  *
4018  **********************************************************************/
4019 static void
4020 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
4021     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
4022 {
4023 	struct adapter			*adapter = txr->adapter;
4024 	struct e1000_context_desc	*TXD;
4025 	struct em_txbuffer		*tx_buffer;
4026 	int cur, hdr_len;
4027 
4028 	/*
4029 	 * In theory we can use the same TSO context if and only if
4030 	 * frame is the same type(IP/TCP) and the same MSS. However
4031 	 * checking whether a frame has the same IP/TCP structure is
4032 	 * hard thing so just ignore that and always restablish a
4033 	 * new TSO context.
4034 	 */
4035 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
4036 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
4037 		      E1000_TXD_DTYP_D |	/* Data descr type */
4038 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
4039 
4040 	/* IP and/or TCP header checksum calculation and insertion. */
4041 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
4042 
4043 	cur = txr->next_avail_desc;
4044 	tx_buffer = &txr->tx_buffers[cur];
4045 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
4046 
4047 	/*
4048 	 * Start offset for header checksum calculation.
4049 	 * End offset for header checksum calculation.
4050 	 * Offset of place put the checksum.
4051 	 */
4052 	TXD->lower_setup.ip_fields.ipcss = ip_off;
4053 	TXD->lower_setup.ip_fields.ipcse =
4054 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
4055 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
4056 	/*
4057 	 * Start offset for payload checksum calculation.
4058 	 * End offset for payload checksum calculation.
4059 	 * Offset of place to put the checksum.
4060 	 */
4061 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
4062 	TXD->upper_setup.tcp_fields.tucse = 0;
4063 	TXD->upper_setup.tcp_fields.tucso =
4064 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
4065 	/*
4066 	 * Payload size per packet w/o any headers.
4067 	 * Length of all headers up to payload.
4068 	 */
4069 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
4070 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
4071 
4072 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
4073 				E1000_TXD_CMD_DEXT |	/* Extended descr */
4074 				E1000_TXD_CMD_TSE |	/* TSE context */
4075 				E1000_TXD_CMD_IP |	/* Do IP csum */
4076 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
4077 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
4078 
4079 	tx_buffer->m_head = NULL;
4080 	tx_buffer->next_eop = -1;
4081 
4082 	if (++cur == adapter->num_tx_desc)
4083 		cur = 0;
4084 
4085 	txr->tx_avail--;
4086 	txr->next_avail_desc = cur;
4087 	txr->tx_tso = TRUE;
4088 }
4089 
4090 
4091 /**********************************************************************
4092  *
4093  *  Examine each tx_buffer in the used queue. If the hardware is done
4094  *  processing the packet then free associated resources. The
4095  *  tx_buffer is put back on the free queue.
4096  *
4097  **********************************************************************/
4098 static void
4099 em_txeof(struct tx_ring *txr)
4100 {
4101 	struct adapter	*adapter = txr->adapter;
4102         int first, last, done, processed;
4103         struct em_txbuffer *tx_buffer;
4104         struct e1000_tx_desc   *tx_desc, *eop_desc;
4105 	if_t ifp = adapter->ifp;
4106 
4107 	EM_TX_LOCK_ASSERT(txr);
4108 #ifdef DEV_NETMAP
4109 	if (netmap_tx_irq(ifp, txr->me))
4110 		return;
4111 #endif /* DEV_NETMAP */
4112 
4113 	/* No work, make sure hang detection is disabled */
4114         if (txr->tx_avail == adapter->num_tx_desc) {
4115 		txr->busy = EM_TX_IDLE;
4116                 return;
4117 	}
4118 
4119 	processed = 0;
4120         first = txr->next_to_clean;
4121         tx_desc = &txr->tx_base[first];
4122         tx_buffer = &txr->tx_buffers[first];
4123 	last = tx_buffer->next_eop;
4124         eop_desc = &txr->tx_base[last];
4125 
4126 	/*
4127 	 * What this does is get the index of the
4128 	 * first descriptor AFTER the EOP of the
4129 	 * first packet, that way we can do the
4130 	 * simple comparison on the inner while loop.
4131 	 */
4132 	if (++last == adapter->num_tx_desc)
4133  		last = 0;
4134 	done = last;
4135 
4136         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4137             BUS_DMASYNC_POSTREAD);
4138 
4139         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
4140 		/* We clean the range of the packet */
4141 		while (first != done) {
4142                 	tx_desc->upper.data = 0;
4143                 	tx_desc->lower.data = 0;
4144                 	tx_desc->buffer_addr = 0;
4145                 	++txr->tx_avail;
4146 			++processed;
4147 
4148 			if (tx_buffer->m_head) {
4149 				bus_dmamap_sync(txr->txtag,
4150 				    tx_buffer->map,
4151 				    BUS_DMASYNC_POSTWRITE);
4152 				bus_dmamap_unload(txr->txtag,
4153 				    tx_buffer->map);
4154                         	m_freem(tx_buffer->m_head);
4155                         	tx_buffer->m_head = NULL;
4156                 	}
4157 			tx_buffer->next_eop = -1;
4158 
4159 	                if (++first == adapter->num_tx_desc)
4160 				first = 0;
4161 
4162 	                tx_buffer = &txr->tx_buffers[first];
4163 			tx_desc = &txr->tx_base[first];
4164 		}
4165 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
4166 		/* See if we can continue to the next packet */
4167 		last = tx_buffer->next_eop;
4168 		if (last != -1) {
4169         		eop_desc = &txr->tx_base[last];
4170 			/* Get new done point */
4171 			if (++last == adapter->num_tx_desc) last = 0;
4172 			done = last;
4173 		} else
4174 			break;
4175         }
4176         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4177             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4178 
4179         txr->next_to_clean = first;
4180 
4181 	/*
4182 	** Hang detection: we know there's work outstanding
4183 	** or the entry return would have been taken, so no
4184 	** descriptor processed here indicates a potential hang.
4185 	** The local timer will examine this and do a reset if needed.
4186 	*/
4187 	if (processed == 0) {
4188 		if (txr->busy != EM_TX_HUNG)
4189 			++txr->busy;
4190 	} else /* At least one descriptor was cleaned */
4191 		txr->busy = EM_TX_BUSY; /* note this clears HUNG */
4192 
4193         /*
4194          * If we have a minimum free, clear IFF_DRV_OACTIVE
4195          * to tell the stack that it is OK to send packets.
4196 	 * Notice that all writes of OACTIVE happen under the
4197 	 * TX lock which, with a single queue, guarantees
4198 	 * sanity.
4199          */
4200         if (txr->tx_avail >= EM_MAX_SCATTER) {
4201 		if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
4202 	}
4203 
4204 	/* Disable hang detection if all clean */
4205 	if (txr->tx_avail == adapter->num_tx_desc)
4206 		txr->busy = EM_TX_IDLE;
4207 }
4208 
4209 /*********************************************************************
4210  *
4211  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
4212  *
4213  **********************************************************************/
4214 static void
4215 em_refresh_mbufs(struct rx_ring *rxr, int limit)
4216 {
4217 	struct adapter		*adapter = rxr->adapter;
4218 	struct mbuf		*m;
4219 	bus_dma_segment_t	segs;
4220 	struct em_rxbuffer	*rxbuf;
4221 	int			i, j, error, nsegs;
4222 	bool			cleaned = FALSE;
4223 
4224 	i = j = rxr->next_to_refresh;
4225 	/*
4226 	** Get one descriptor beyond
4227 	** our work mark to control
4228 	** the loop.
4229 	*/
4230 	if (++j == adapter->num_rx_desc)
4231 		j = 0;
4232 
4233 	while (j != limit) {
4234 		rxbuf = &rxr->rx_buffers[i];
4235 		if (rxbuf->m_head == NULL) {
4236 			m = m_getjcl(M_NOWAIT, MT_DATA,
4237 			    M_PKTHDR, adapter->rx_mbuf_sz);
4238 			/*
4239 			** If we have a temporary resource shortage
4240 			** that causes a failure, just abort refresh
4241 			** for now, we will return to this point when
4242 			** reinvoked from em_rxeof.
4243 			*/
4244 			if (m == NULL)
4245 				goto update;
4246 		} else
4247 			m = rxbuf->m_head;
4248 
4249 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4250 		m->m_flags |= M_PKTHDR;
4251 		m->m_data = m->m_ext.ext_buf;
4252 
4253 		/* Use bus_dma machinery to setup the memory mapping  */
4254 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4255 		    m, &segs, &nsegs, BUS_DMA_NOWAIT);
4256 		if (error != 0) {
4257 			printf("Refresh mbufs: hdr dmamap load"
4258 			    " failure - %d\n", error);
4259 			m_free(m);
4260 			rxbuf->m_head = NULL;
4261 			goto update;
4262 		}
4263 		rxbuf->m_head = m;
4264 		rxbuf->paddr = segs.ds_addr;
4265 		bus_dmamap_sync(rxr->rxtag,
4266 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4267 		em_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4268 		cleaned = TRUE;
4269 
4270 		i = j; /* Next is precalulated for us */
4271 		rxr->next_to_refresh = i;
4272 		/* Calculate next controlling index */
4273 		if (++j == adapter->num_rx_desc)
4274 			j = 0;
4275 	}
4276 update:
4277 	/*
4278 	** Update the tail pointer only if,
4279 	** and as far as we have refreshed.
4280 	*/
4281 	if (cleaned)
4282 		E1000_WRITE_REG(&adapter->hw,
4283 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4284 
4285 	return;
4286 }
4287 
4288 
4289 /*********************************************************************
4290  *
4291  *  Allocate memory for rx_buffer structures. Since we use one
4292  *  rx_buffer per received packet, the maximum number of rx_buffer's
4293  *  that we'll need is equal to the number of receive descriptors
4294  *  that we've allocated.
4295  *
4296  **********************************************************************/
4297 static int
4298 em_allocate_receive_buffers(struct rx_ring *rxr)
4299 {
4300 	struct adapter		*adapter = rxr->adapter;
4301 	device_t		dev = adapter->dev;
4302 	struct em_rxbuffer	*rxbuf;
4303 	int			error;
4304 
4305 	rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) *
4306 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4307 	if (rxr->rx_buffers == NULL) {
4308 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4309 		return (ENOMEM);
4310 	}
4311 
4312 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4313 				1, 0,			/* alignment, bounds */
4314 				BUS_SPACE_MAXADDR,	/* lowaddr */
4315 				BUS_SPACE_MAXADDR,	/* highaddr */
4316 				NULL, NULL,		/* filter, filterarg */
4317 				MJUM9BYTES,		/* maxsize */
4318 				1,			/* nsegments */
4319 				MJUM9BYTES,		/* maxsegsize */
4320 				0,			/* flags */
4321 				NULL,			/* lockfunc */
4322 				NULL,			/* lockarg */
4323 				&rxr->rxtag);
4324 	if (error) {
4325 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4326 		    __func__, error);
4327 		goto fail;
4328 	}
4329 
4330 	rxbuf = rxr->rx_buffers;
4331 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4332 		rxbuf = &rxr->rx_buffers[i];
4333 		error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4334 		if (error) {
4335 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4336 			    __func__, error);
4337 			goto fail;
4338 		}
4339 	}
4340 
4341 	return (0);
4342 
4343 fail:
4344 	em_free_receive_structures(adapter);
4345 	return (error);
4346 }
4347 
4348 
4349 /*********************************************************************
4350  *
4351  *  Initialize a receive ring and its buffers.
4352  *
4353  **********************************************************************/
4354 static int
4355 em_setup_receive_ring(struct rx_ring *rxr)
4356 {
4357 	struct	adapter 	*adapter = rxr->adapter;
4358 	struct em_rxbuffer	*rxbuf;
4359 	bus_dma_segment_t	seg[1];
4360 	int			rsize, nsegs, error = 0;
4361 #ifdef DEV_NETMAP
4362 	struct netmap_slot *slot;
4363 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
4364 #endif
4365 
4366 
4367 	/* Clear the ring contents */
4368 	EM_RX_LOCK(rxr);
4369 	rsize = roundup2(adapter->num_rx_desc *
4370 	    sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
4371 	bzero((void *)rxr->rx_base, rsize);
4372 #ifdef DEV_NETMAP
4373 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4374 #endif
4375 
4376 	/*
4377 	** Free current RX buffer structs and their mbufs
4378 	*/
4379 	for (int i = 0; i < adapter->num_rx_desc; i++) {
4380 		rxbuf = &rxr->rx_buffers[i];
4381 		if (rxbuf->m_head != NULL) {
4382 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4383 			    BUS_DMASYNC_POSTREAD);
4384 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4385 			m_freem(rxbuf->m_head);
4386 			rxbuf->m_head = NULL; /* mark as freed */
4387 		}
4388 	}
4389 
4390 	/* Now replenish the mbufs */
4391         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4392 		rxbuf = &rxr->rx_buffers[j];
4393 #ifdef DEV_NETMAP
4394 		if (slot) {
4395 			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4396 			uint64_t paddr;
4397 			void *addr;
4398 
4399 			addr = PNMB(na, slot + si, &paddr);
4400 			netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4401 			rxbuf->paddr = paddr;
4402 			em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4403 			continue;
4404 		}
4405 #endif /* DEV_NETMAP */
4406 		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4407 		    M_PKTHDR, adapter->rx_mbuf_sz);
4408 		if (rxbuf->m_head == NULL) {
4409 			error = ENOBUFS;
4410 			goto fail;
4411 		}
4412 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4413 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4414 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4415 
4416 		/* Get the memory mapping */
4417 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4418 		    rxbuf->map, rxbuf->m_head, seg,
4419 		    &nsegs, BUS_DMA_NOWAIT);
4420 		if (error != 0) {
4421 			m_freem(rxbuf->m_head);
4422 			rxbuf->m_head = NULL;
4423 			goto fail;
4424 		}
4425 		bus_dmamap_sync(rxr->rxtag,
4426 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4427 
4428 		rxbuf->paddr = seg[0].ds_addr;
4429 		em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4430 	}
4431 	rxr->next_to_check = 0;
4432 	rxr->next_to_refresh = 0;
4433 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4434 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4435 
4436 fail:
4437 	EM_RX_UNLOCK(rxr);
4438 	return (error);
4439 }
4440 
4441 /*********************************************************************
4442  *
4443  *  Initialize all receive rings.
4444  *
4445  **********************************************************************/
4446 static int
4447 em_setup_receive_structures(struct adapter *adapter)
4448 {
4449 	struct rx_ring *rxr = adapter->rx_rings;
4450 	int q;
4451 
4452 	for (q = 0; q < adapter->num_queues; q++, rxr++)
4453 		if (em_setup_receive_ring(rxr))
4454 			goto fail;
4455 
4456 	return (0);
4457 fail:
4458 	/*
4459 	 * Free RX buffers allocated so far, we will only handle
4460 	 * the rings that completed, the failing case will have
4461 	 * cleaned up for itself. 'q' failed, so its the terminus.
4462 	 */
4463 	for (int i = 0; i < q; ++i) {
4464 		rxr = &adapter->rx_rings[i];
4465 		for (int n = 0; n < adapter->num_rx_desc; n++) {
4466 			struct em_rxbuffer *rxbuf;
4467 			rxbuf = &rxr->rx_buffers[n];
4468 			if (rxbuf->m_head != NULL) {
4469 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4470 			  	  BUS_DMASYNC_POSTREAD);
4471 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4472 				m_freem(rxbuf->m_head);
4473 				rxbuf->m_head = NULL;
4474 			}
4475 		}
4476 		rxr->next_to_check = 0;
4477 		rxr->next_to_refresh = 0;
4478 	}
4479 
4480 	return (ENOBUFS);
4481 }
4482 
4483 /*********************************************************************
4484  *
4485  *  Free all receive rings.
4486  *
4487  **********************************************************************/
4488 static void
4489 em_free_receive_structures(struct adapter *adapter)
4490 {
4491 	struct rx_ring *rxr = adapter->rx_rings;
4492 
4493 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4494 		em_free_receive_buffers(rxr);
4495 		/* Free the ring memory as well */
4496 		em_dma_free(adapter, &rxr->rxdma);
4497 		EM_RX_LOCK_DESTROY(rxr);
4498 	}
4499 
4500 	free(adapter->rx_rings, M_DEVBUF);
4501 }
4502 
4503 
4504 /*********************************************************************
4505  *
4506  *  Free receive ring data structures
4507  *
4508  **********************************************************************/
4509 static void
4510 em_free_receive_buffers(struct rx_ring *rxr)
4511 {
4512 	struct adapter		*adapter = rxr->adapter;
4513 	struct em_rxbuffer	*rxbuf = NULL;
4514 
4515 	INIT_DEBUGOUT("free_receive_buffers: begin");
4516 
4517 	if (rxr->rx_buffers != NULL) {
4518 		for (int i = 0; i < adapter->num_rx_desc; i++) {
4519 			rxbuf = &rxr->rx_buffers[i];
4520 			if (rxbuf->map != NULL) {
4521 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4522 				    BUS_DMASYNC_POSTREAD);
4523 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4524 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4525 			}
4526 			if (rxbuf->m_head != NULL) {
4527 				m_freem(rxbuf->m_head);
4528 				rxbuf->m_head = NULL;
4529 			}
4530 		}
4531 		free(rxr->rx_buffers, M_DEVBUF);
4532 		rxr->rx_buffers = NULL;
4533 		rxr->next_to_check = 0;
4534 		rxr->next_to_refresh = 0;
4535 	}
4536 
4537 	if (rxr->rxtag != NULL) {
4538 		bus_dma_tag_destroy(rxr->rxtag);
4539 		rxr->rxtag = NULL;
4540 	}
4541 
4542 	return;
4543 }
4544 
4545 
4546 /*********************************************************************
4547  *
4548  *  Enable receive unit.
4549  *
4550  **********************************************************************/
4551 
4552 static void
4553 em_initialize_receive_unit(struct adapter *adapter)
4554 {
4555 	struct rx_ring *rxr = adapter->rx_rings;
4556 	if_t ifp = adapter->ifp;
4557 	struct e1000_hw	*hw = &adapter->hw;
4558 	u32	rctl, rxcsum, rfctl;
4559 
4560 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4561 
4562 	/*
4563 	 * Make sure receives are disabled while setting
4564 	 * up the descriptor ring
4565 	 */
4566 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4567 	/* Do not disable if ever enabled on this hardware */
4568 	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4569 		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4570 
4571 	/* Setup the Receive Control Register */
4572 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4573 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4574 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4575 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4576 
4577 	/* Do not store bad packets */
4578 	rctl &= ~E1000_RCTL_SBP;
4579 
4580 	/* Enable Long Packet receive */
4581 	if (if_getmtu(ifp) > ETHERMTU)
4582 		rctl |= E1000_RCTL_LPE;
4583 	else
4584 		rctl &= ~E1000_RCTL_LPE;
4585 
4586         /* Strip the CRC */
4587         if (!em_disable_crc_stripping)
4588 		rctl |= E1000_RCTL_SECRC;
4589 
4590 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4591 	    adapter->rx_abs_int_delay.value);
4592 
4593 	E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4594 	    adapter->rx_int_delay.value);
4595 	/*
4596 	 * Set the interrupt throttling rate. Value is calculated
4597 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4598 	 */
4599 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4600 
4601 	/* Use extended rx descriptor formats */
4602 	rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4603 	rfctl |= E1000_RFCTL_EXTEN;
4604 	/*
4605 	** When using MSIX interrupts we need to throttle
4606 	** using the EITR register (82574 only)
4607 	*/
4608 	if (hw->mac.type == e1000_82574) {
4609 		for (int i = 0; i < 4; i++)
4610 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4611 			    DEFAULT_ITR);
4612 		/* Disable accelerated acknowledge */
4613 		rfctl |= E1000_RFCTL_ACK_DIS;
4614 	}
4615 	E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4616 
4617 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4618 	if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
4619 #ifdef EM_MULTIQUEUE
4620 		rxcsum |= E1000_RXCSUM_TUOFL |
4621 			  E1000_RXCSUM_IPOFL |
4622 			  E1000_RXCSUM_PCSD;
4623 #else
4624 		rxcsum |= E1000_RXCSUM_TUOFL;
4625 #endif
4626 	} else
4627 		rxcsum &= ~E1000_RXCSUM_TUOFL;
4628 
4629 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4630 
4631 #ifdef EM_MULTIQUEUE
4632 #define RSSKEYLEN 10
4633 	if (adapter->num_queues > 1) {
4634 		uint8_t  rss_key[4 * RSSKEYLEN];
4635 		uint32_t reta = 0;
4636 		int i;
4637 
4638 		/*
4639 		* Configure RSS key
4640 		*/
4641 		arc4rand(rss_key, sizeof(rss_key), 0);
4642 		for (i = 0; i < RSSKEYLEN; ++i) {
4643 			uint32_t rssrk = 0;
4644 
4645 			rssrk = EM_RSSRK_VAL(rss_key, i);
4646 			E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk);
4647 		}
4648 
4649 		/*
4650 		* Configure RSS redirect table in following fashion:
4651 		* (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4652 		*/
4653 		for (i = 0; i < sizeof(reta); ++i) {
4654 			uint32_t q;
4655 
4656 			q = (i % adapter->num_queues) << 7;
4657 			reta |= q << (8 * i);
4658 		}
4659 
4660 		for (i = 0; i < 32; ++i) {
4661 			E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4662 		}
4663 
4664 		E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q |
4665 				E1000_MRQC_RSS_FIELD_IPV4_TCP |
4666 				E1000_MRQC_RSS_FIELD_IPV4 |
4667 				E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4668 				E1000_MRQC_RSS_FIELD_IPV6_EX |
4669 				E1000_MRQC_RSS_FIELD_IPV6);
4670 	}
4671 #endif
4672 	/*
4673 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4674 	** long latencies are observed, like Lenovo X60. This
4675 	** change eliminates the problem, but since having positive
4676 	** values in RDTR is a known source of problems on other
4677 	** platforms another solution is being sought.
4678 	*/
4679 	if (hw->mac.type == e1000_82573)
4680 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4681 
4682 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4683 		/* Setup the Base and Length of the Rx Descriptor Ring */
4684 		u64 bus_addr = rxr->rxdma.dma_paddr;
4685 		u32 rdt = adapter->num_rx_desc - 1; /* default */
4686 
4687 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4688 		    adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended));
4689 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4690 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4691 		/* Setup the Head and Tail Descriptor Pointers */
4692 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4693 #ifdef DEV_NETMAP
4694 		/*
4695 		 * an init() while a netmap client is active must
4696 		 * preserve the rx buffers passed to userspace.
4697 		 */
4698 		if (if_getcapenable(ifp) & IFCAP_NETMAP) {
4699 			struct netmap_adapter *na = netmap_getna(adapter->ifp);
4700 			rdt -= nm_kr_rxspace(&na->rx_rings[i]);
4701 		}
4702 #endif /* DEV_NETMAP */
4703 		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4704 	}
4705 
4706 	/*
4707 	 * Set PTHRESH for improved jumbo performance
4708 	 * According to 10.2.5.11 of Intel 82574 Datasheet,
4709 	 * RXDCTL(1) is written whenever RXDCTL(0) is written.
4710 	 * Only write to RXDCTL(1) if there is a need for different
4711 	 * settings.
4712 	 */
4713 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4714 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4715 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4716 	    (if_getmtu(ifp) > ETHERMTU)) {
4717 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4718 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4719 	} else if (adapter->hw.mac.type == e1000_82574) {
4720 		for (int i = 0; i < adapter->num_queues; i++) {
4721 			u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4722 
4723 			rxdctl |= 0x20; /* PTHRESH */
4724 			rxdctl |= 4 << 8; /* HTHRESH */
4725 			rxdctl |= 4 << 16;/* WTHRESH */
4726 			rxdctl |= 1 << 24; /* Switch to granularity */
4727 			E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4728 		}
4729 	}
4730 
4731 	if (adapter->hw.mac.type >= e1000_pch2lan) {
4732 		if (if_getmtu(ifp) > ETHERMTU)
4733 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4734 		else
4735 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4736 	}
4737 
4738         /* Make sure VLAN Filters are off */
4739         rctl &= ~E1000_RCTL_VFE;
4740 
4741 	if (adapter->rx_mbuf_sz == MCLBYTES)
4742 		rctl |= E1000_RCTL_SZ_2048;
4743 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4744 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4745 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4746 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4747 
4748 	/* ensure we clear use DTYPE of 00 here */
4749 	rctl &= ~0x00000C00;
4750 	/* Write out the settings */
4751 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4752 
4753 	return;
4754 }
4755 
4756 
4757 /*********************************************************************
4758  *
4759  *  This routine executes in interrupt context. It replenishes
4760  *  the mbufs in the descriptor and sends data which has been
4761  *  dma'ed into host memory to upper layer.
4762  *
4763  *  We loop at most count times if count is > 0, or until done if
4764  *  count < 0.
4765  *
4766  *  For polling we also now return the number of cleaned packets
4767  *********************************************************************/
4768 static bool
4769 em_rxeof(struct rx_ring *rxr, int count, int *done)
4770 {
4771 	struct adapter		*adapter = rxr->adapter;
4772 	if_t ifp = adapter->ifp;
4773 	struct mbuf		*mp, *sendmp;
4774 	u32			status = 0;
4775 	u16 			len;
4776 	int			i, processed, rxdone = 0;
4777 	bool			eop;
4778 	union e1000_rx_desc_extended	*cur;
4779 
4780 	EM_RX_LOCK(rxr);
4781 
4782 	/* Sync the ring */
4783 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4784 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4785 
4786 
4787 #ifdef DEV_NETMAP
4788 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4789 		EM_RX_UNLOCK(rxr);
4790 		return (FALSE);
4791 	}
4792 #endif /* DEV_NETMAP */
4793 
4794 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4795 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
4796 			break;
4797 
4798 		cur = &rxr->rx_base[i];
4799 		status = le32toh(cur->wb.upper.status_error);
4800 		mp = sendmp = NULL;
4801 
4802 		if ((status & E1000_RXD_STAT_DD) == 0)
4803 			break;
4804 
4805 		len = le16toh(cur->wb.upper.length);
4806 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4807 
4808 		if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
4809 		    (rxr->discard == TRUE)) {
4810 			adapter->dropped_pkts++;
4811 			++rxr->rx_discarded;
4812 			if (!eop) /* Catch subsequent segs */
4813 				rxr->discard = TRUE;
4814 			else
4815 				rxr->discard = FALSE;
4816 			em_rx_discard(rxr, i);
4817 			goto next_desc;
4818 		}
4819 		bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4820 
4821 		/* Assign correct length to the current fragment */
4822 		mp = rxr->rx_buffers[i].m_head;
4823 		mp->m_len = len;
4824 
4825 		/* Trigger for refresh */
4826 		rxr->rx_buffers[i].m_head = NULL;
4827 
4828 		/* First segment? */
4829 		if (rxr->fmp == NULL) {
4830 			mp->m_pkthdr.len = len;
4831 			rxr->fmp = rxr->lmp = mp;
4832 		} else {
4833 			/* Chain mbuf's together */
4834 			mp->m_flags &= ~M_PKTHDR;
4835 			rxr->lmp->m_next = mp;
4836 			rxr->lmp = mp;
4837 			rxr->fmp->m_pkthdr.len += len;
4838 		}
4839 
4840 		if (eop) {
4841 			--count;
4842 			sendmp = rxr->fmp;
4843 			if_setrcvif(sendmp, ifp);
4844 			if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
4845 			em_receive_checksum(status, sendmp);
4846 #ifndef __NO_STRICT_ALIGNMENT
4847 			if (adapter->hw.mac.max_frame_size >
4848 			    (MCLBYTES - ETHER_ALIGN) &&
4849 			    em_fixup_rx(rxr) != 0)
4850 				goto skip;
4851 #endif
4852 			if (status & E1000_RXD_STAT_VP) {
4853 				if_setvtag(sendmp,
4854 				    le16toh(cur->wb.upper.vlan));
4855 				sendmp->m_flags |= M_VLANTAG;
4856 			}
4857 #ifndef __NO_STRICT_ALIGNMENT
4858 skip:
4859 #endif
4860 			rxr->fmp = rxr->lmp = NULL;
4861 		}
4862 next_desc:
4863 		/* Sync the ring */
4864 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4865 	    		BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4866 
4867 		/* Zero out the receive descriptors status. */
4868 		cur->wb.upper.status_error &= htole32(~0xFF);
4869 		++rxdone;	/* cumulative for POLL */
4870 		++processed;
4871 
4872 		/* Advance our pointers to the next descriptor. */
4873 		if (++i == adapter->num_rx_desc)
4874 			i = 0;
4875 
4876 		/* Send to the stack */
4877 		if (sendmp != NULL) {
4878 			rxr->next_to_check = i;
4879 			EM_RX_UNLOCK(rxr);
4880 			if_input(ifp, sendmp);
4881 			EM_RX_LOCK(rxr);
4882 			i = rxr->next_to_check;
4883 		}
4884 
4885 		/* Only refresh mbufs every 8 descriptors */
4886 		if (processed == 8) {
4887 			em_refresh_mbufs(rxr, i);
4888 			processed = 0;
4889 		}
4890 	}
4891 
4892 	/* Catch any remaining refresh work */
4893 	if (e1000_rx_unrefreshed(rxr))
4894 		em_refresh_mbufs(rxr, i);
4895 
4896 	rxr->next_to_check = i;
4897 	if (done != NULL)
4898 		*done = rxdone;
4899 	EM_RX_UNLOCK(rxr);
4900 
4901 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4902 }
4903 
4904 static __inline void
4905 em_rx_discard(struct rx_ring *rxr, int i)
4906 {
4907 	struct em_rxbuffer	*rbuf;
4908 
4909 	rbuf = &rxr->rx_buffers[i];
4910 	bus_dmamap_unload(rxr->rxtag, rbuf->map);
4911 
4912 	/* Free any previous pieces */
4913 	if (rxr->fmp != NULL) {
4914 		rxr->fmp->m_flags |= M_PKTHDR;
4915 		m_freem(rxr->fmp);
4916 		rxr->fmp = NULL;
4917 		rxr->lmp = NULL;
4918 	}
4919 	/*
4920 	** Free buffer and allow em_refresh_mbufs()
4921 	** to clean up and recharge buffer.
4922 	*/
4923 	if (rbuf->m_head) {
4924 		m_free(rbuf->m_head);
4925 		rbuf->m_head = NULL;
4926 	}
4927 	return;
4928 }
4929 
4930 #ifndef __NO_STRICT_ALIGNMENT
4931 /*
4932  * When jumbo frames are enabled we should realign entire payload on
4933  * architecures with strict alignment. This is serious design mistake of 8254x
4934  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4935  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4936  * payload. On architecures without strict alignment restrictions 8254x still
4937  * performs unaligned memory access which would reduce the performance too.
4938  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4939  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4940  * existing mbuf chain.
4941  *
4942  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4943  * not used at all on architectures with strict alignment.
4944  */
4945 static int
4946 em_fixup_rx(struct rx_ring *rxr)
4947 {
4948 	struct adapter *adapter = rxr->adapter;
4949 	struct mbuf *m, *n;
4950 	int error;
4951 
4952 	error = 0;
4953 	m = rxr->fmp;
4954 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4955 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4956 		m->m_data += ETHER_HDR_LEN;
4957 	} else {
4958 		MGETHDR(n, M_NOWAIT, MT_DATA);
4959 		if (n != NULL) {
4960 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4961 			m->m_data += ETHER_HDR_LEN;
4962 			m->m_len -= ETHER_HDR_LEN;
4963 			n->m_len = ETHER_HDR_LEN;
4964 			M_MOVE_PKTHDR(n, m);
4965 			n->m_next = m;
4966 			rxr->fmp = n;
4967 		} else {
4968 			adapter->dropped_pkts++;
4969 			m_freem(rxr->fmp);
4970 			rxr->fmp = NULL;
4971 			error = ENOMEM;
4972 		}
4973 	}
4974 
4975 	return (error);
4976 }
4977 #endif
4978 
4979 static void
4980 em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf)
4981 {
4982 	rxd->read.buffer_addr = htole64(rxbuf->paddr);
4983 	/* DD bits must be cleared */
4984 	rxd->wb.upper.status_error= 0;
4985 }
4986 
4987 /*********************************************************************
4988  *
4989  *  Verify that the hardware indicated that the checksum is valid.
4990  *  Inform the stack about the status of checksum so that stack
4991  *  doesn't spend time verifying the checksum.
4992  *
4993  *********************************************************************/
4994 static void
4995 em_receive_checksum(uint32_t status, struct mbuf *mp)
4996 {
4997 	mp->m_pkthdr.csum_flags = 0;
4998 
4999 	/* Ignore Checksum bit is set */
5000 	if (status & E1000_RXD_STAT_IXSM)
5001 		return;
5002 
5003 	/* If the IP checksum exists and there is no IP Checksum error */
5004 	if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
5005 		E1000_RXD_STAT_IPCS) {
5006 		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
5007 	}
5008 
5009 	/* TCP or UDP checksum */
5010 	if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
5011 	    E1000_RXD_STAT_TCPCS) {
5012 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5013 		mp->m_pkthdr.csum_data = htons(0xffff);
5014 	}
5015 	if (status & E1000_RXD_STAT_UDPCS) {
5016 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5017 		mp->m_pkthdr.csum_data = htons(0xffff);
5018 	}
5019 }
5020 
5021 /*
5022  * This routine is run via an vlan
5023  * config EVENT
5024  */
5025 static void
5026 em_register_vlan(void *arg, if_t ifp, u16 vtag)
5027 {
5028 	struct adapter	*adapter = if_getsoftc(ifp);
5029 	u32		index, bit;
5030 
5031 	if ((void*)adapter !=  arg)   /* Not our event */
5032 		return;
5033 
5034 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
5035                 return;
5036 
5037 	EM_CORE_LOCK(adapter);
5038 	index = (vtag >> 5) & 0x7F;
5039 	bit = vtag & 0x1F;
5040 	adapter->shadow_vfta[index] |= (1 << bit);
5041 	++adapter->num_vlans;
5042 	/* Re-init to load the changes */
5043 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
5044 		em_init_locked(adapter);
5045 	EM_CORE_UNLOCK(adapter);
5046 }
5047 
5048 /*
5049  * This routine is run via an vlan
5050  * unconfig EVENT
5051  */
5052 static void
5053 em_unregister_vlan(void *arg, if_t ifp, u16 vtag)
5054 {
5055 	struct adapter	*adapter = if_getsoftc(ifp);
5056 	u32		index, bit;
5057 
5058 	if (adapter != arg)
5059 		return;
5060 
5061 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5062                 return;
5063 
5064 	EM_CORE_LOCK(adapter);
5065 	index = (vtag >> 5) & 0x7F;
5066 	bit = vtag & 0x1F;
5067 	adapter->shadow_vfta[index] &= ~(1 << bit);
5068 	--adapter->num_vlans;
5069 	/* Re-init to load the changes */
5070 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
5071 		em_init_locked(adapter);
5072 	EM_CORE_UNLOCK(adapter);
5073 }
5074 
5075 static void
5076 em_setup_vlan_hw_support(struct adapter *adapter)
5077 {
5078 	struct e1000_hw *hw = &adapter->hw;
5079 	u32             reg;
5080 
5081 	/*
5082 	** We get here thru init_locked, meaning
5083 	** a soft reset, this has already cleared
5084 	** the VFTA and other state, so if there
5085 	** have been no vlan's registered do nothing.
5086 	*/
5087 	if (adapter->num_vlans == 0)
5088                 return;
5089 
5090 	/*
5091 	** A soft reset zero's out the VFTA, so
5092 	** we need to repopulate it now.
5093 	*/
5094 	for (int i = 0; i < EM_VFTA_SIZE; i++)
5095                 if (adapter->shadow_vfta[i] != 0)
5096 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
5097                             i, adapter->shadow_vfta[i]);
5098 
5099 	reg = E1000_READ_REG(hw, E1000_CTRL);
5100 	reg |= E1000_CTRL_VME;
5101 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
5102 
5103 	/* Enable the Filter Table */
5104 	reg = E1000_READ_REG(hw, E1000_RCTL);
5105 	reg &= ~E1000_RCTL_CFIEN;
5106 	reg |= E1000_RCTL_VFE;
5107 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
5108 }
5109 
5110 static void
5111 em_enable_intr(struct adapter *adapter)
5112 {
5113 	struct e1000_hw *hw = &adapter->hw;
5114 	u32 ims_mask = IMS_ENABLE_MASK;
5115 
5116 	if (hw->mac.type == e1000_82574) {
5117 		E1000_WRITE_REG(hw, EM_EIAC, adapter->ims);
5118 		ims_mask |= adapter->ims;
5119 	}
5120 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
5121 }
5122 
5123 static void
5124 em_disable_intr(struct adapter *adapter)
5125 {
5126 	struct e1000_hw *hw = &adapter->hw;
5127 
5128 	if (hw->mac.type == e1000_82574)
5129 		E1000_WRITE_REG(hw, EM_EIAC, 0);
5130 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
5131 }
5132 
5133 /*
5134  * Bit of a misnomer, what this really means is
5135  * to enable OS management of the system... aka
5136  * to disable special hardware management features
5137  */
5138 static void
5139 em_init_manageability(struct adapter *adapter)
5140 {
5141 	/* A shared code workaround */
5142 #define E1000_82542_MANC2H E1000_MANC2H
5143 	if (adapter->has_manage) {
5144 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5145 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5146 
5147 		/* disable hardware interception of ARP */
5148 		manc &= ~(E1000_MANC_ARP_EN);
5149 
5150                 /* enable receiving management packets to the host */
5151 		manc |= E1000_MANC_EN_MNG2HOST;
5152 #define E1000_MNG2HOST_PORT_623 (1 << 5)
5153 #define E1000_MNG2HOST_PORT_664 (1 << 6)
5154 		manc2h |= E1000_MNG2HOST_PORT_623;
5155 		manc2h |= E1000_MNG2HOST_PORT_664;
5156 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5157 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5158 	}
5159 }
5160 
5161 /*
5162  * Give control back to hardware management
5163  * controller if there is one.
5164  */
5165 static void
5166 em_release_manageability(struct adapter *adapter)
5167 {
5168 	if (adapter->has_manage) {
5169 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5170 
5171 		/* re-enable hardware interception of ARP */
5172 		manc |= E1000_MANC_ARP_EN;
5173 		manc &= ~E1000_MANC_EN_MNG2HOST;
5174 
5175 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5176 	}
5177 }
5178 
5179 /*
5180  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
5181  * For ASF and Pass Through versions of f/w this means
5182  * that the driver is loaded. For AMT version type f/w
5183  * this means that the network i/f is open.
5184  */
5185 static void
5186 em_get_hw_control(struct adapter *adapter)
5187 {
5188 	u32 ctrl_ext, swsm;
5189 
5190 	if (adapter->hw.mac.type == e1000_82573) {
5191 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5192 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5193 		    swsm | E1000_SWSM_DRV_LOAD);
5194 		return;
5195 	}
5196 	/* else */
5197 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5198 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5199 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5200 	return;
5201 }
5202 
5203 /*
5204  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
5205  * For ASF and Pass Through versions of f/w this means that
5206  * the driver is no longer loaded. For AMT versions of the
5207  * f/w this means that the network i/f is closed.
5208  */
5209 static void
5210 em_release_hw_control(struct adapter *adapter)
5211 {
5212 	u32 ctrl_ext, swsm;
5213 
5214 	if (!adapter->has_manage)
5215 		return;
5216 
5217 	if (adapter->hw.mac.type == e1000_82573) {
5218 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5219 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5220 		    swsm & ~E1000_SWSM_DRV_LOAD);
5221 		return;
5222 	}
5223 	/* else */
5224 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5225 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5226 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5227 	return;
5228 }
5229 
5230 static int
5231 em_is_valid_ether_addr(u8 *addr)
5232 {
5233 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5234 
5235 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5236 		return (FALSE);
5237 	}
5238 
5239 	return (TRUE);
5240 }
5241 
5242 /*
5243 ** Parse the interface capabilities with regard
5244 ** to both system management and wake-on-lan for
5245 ** later use.
5246 */
5247 static void
5248 em_get_wakeup(device_t dev)
5249 {
5250 	struct adapter	*adapter = device_get_softc(dev);
5251 	u16		eeprom_data = 0, device_id, apme_mask;
5252 
5253 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5254 	apme_mask = EM_EEPROM_APME;
5255 
5256 	switch (adapter->hw.mac.type) {
5257 	case e1000_82573:
5258 	case e1000_82583:
5259 		adapter->has_amt = TRUE;
5260 		/* Falls thru */
5261 	case e1000_82571:
5262 	case e1000_82572:
5263 	case e1000_80003es2lan:
5264 		if (adapter->hw.bus.func == 1) {
5265 			e1000_read_nvm(&adapter->hw,
5266 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5267 			break;
5268 		} else
5269 			e1000_read_nvm(&adapter->hw,
5270 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5271 		break;
5272 	case e1000_ich8lan:
5273 	case e1000_ich9lan:
5274 	case e1000_ich10lan:
5275 	case e1000_pchlan:
5276 	case e1000_pch2lan:
5277 	case e1000_pch_lpt:
5278 	case e1000_pch_spt:
5279 		apme_mask = E1000_WUC_APME;
5280 		adapter->has_amt = TRUE;
5281 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5282 		break;
5283 	default:
5284 		e1000_read_nvm(&adapter->hw,
5285 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5286 		break;
5287 	}
5288 	if (eeprom_data & apme_mask)
5289 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5290 	/*
5291          * We have the eeprom settings, now apply the special cases
5292          * where the eeprom may be wrong or the board won't support
5293          * wake on lan on a particular port
5294 	 */
5295 	device_id = pci_get_device(dev);
5296         switch (device_id) {
5297 	case E1000_DEV_ID_82571EB_FIBER:
5298 		/* Wake events only supported on port A for dual fiber
5299 		 * regardless of eeprom setting */
5300 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5301 		    E1000_STATUS_FUNC_1)
5302 			adapter->wol = 0;
5303 		break;
5304 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
5305 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
5306 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5307                 /* if quad port adapter, disable WoL on all but port A */
5308 		if (global_quad_port_a != 0)
5309 			adapter->wol = 0;
5310 		/* Reset for multiple quad port adapters */
5311 		if (++global_quad_port_a == 4)
5312 			global_quad_port_a = 0;
5313                 break;
5314 	}
5315 	return;
5316 }
5317 
5318 
5319 /*
5320  * Enable PCI Wake On Lan capability
5321  */
5322 static void
5323 em_enable_wakeup(device_t dev)
5324 {
5325 	struct adapter	*adapter = device_get_softc(dev);
5326 	if_t ifp = adapter->ifp;
5327 	u32		pmc, ctrl, ctrl_ext, rctl, wuc;
5328 	u16     	status;
5329 
5330 	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
5331 		return;
5332 
5333 	/* Advertise the wakeup capability */
5334 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5335 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5336 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5337 	wuc = E1000_READ_REG(&adapter->hw, E1000_WUC);
5338 	wuc |= E1000_WUC_PME_EN;
5339  	E1000_WRITE_REG(&adapter->hw, E1000_WUC, wuc);
5340 
5341 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
5342 	    (adapter->hw.mac.type == e1000_pchlan) ||
5343 	    (adapter->hw.mac.type == e1000_ich9lan) ||
5344 	    (adapter->hw.mac.type == e1000_ich10lan))
5345 		e1000_suspend_workarounds_ich8lan(&adapter->hw);
5346 
5347 	/* Keep the laser running on Fiber adapters */
5348 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5349 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5350 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5351 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5352 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5353 	}
5354 
5355 	/*
5356 	** Determine type of Wakeup: note that wol
5357 	** is set with all bits on by default.
5358 	*/
5359 	if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
5360 		adapter->wol &= ~E1000_WUFC_MAG;
5361 
5362 	if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
5363 		adapter->wol &= ~E1000_WUFC_MC;
5364 	else {
5365 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5366 		rctl |= E1000_RCTL_MPE;
5367 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5368 	}
5369 
5370 	if ((adapter->hw.mac.type == e1000_pchlan)  ||
5371 	    (adapter->hw.mac.type == e1000_pch2lan) ||
5372 	    (adapter->hw.mac.type == e1000_pch_lpt) ||
5373 	    (adapter->hw.mac.type == e1000_pch_spt)) {
5374 		if (em_enable_phy_wakeup(adapter))
5375 			return;
5376 	} else {
5377 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5378 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5379 	}
5380 
5381 	if (adapter->hw.phy.type == e1000_phy_igp_3)
5382 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5383 
5384         /* Request PME */
5385         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5386 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5387 	if (if_getcapenable(ifp) & IFCAP_WOL)
5388 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5389         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5390 
5391 	return;
5392 }
5393 
5394 /*
5395 ** WOL in the newer chipset interfaces (pchlan)
5396 ** require thing to be copied into the phy
5397 */
5398 static int
5399 em_enable_phy_wakeup(struct adapter *adapter)
5400 {
5401 	struct e1000_hw *hw = &adapter->hw;
5402 	u32 mreg, ret = 0;
5403 	u16 preg;
5404 
5405 	/* copy MAC RARs to PHY RARs */
5406 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5407 
5408 	/* copy MAC MTA to PHY MTA */
5409 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5410 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5411 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5412 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5413 		    (u16)((mreg >> 16) & 0xFFFF));
5414 	}
5415 
5416 	/* configure PHY Rx Control register */
5417 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5418 	mreg = E1000_READ_REG(hw, E1000_RCTL);
5419 	if (mreg & E1000_RCTL_UPE)
5420 		preg |= BM_RCTL_UPE;
5421 	if (mreg & E1000_RCTL_MPE)
5422 		preg |= BM_RCTL_MPE;
5423 	preg &= ~(BM_RCTL_MO_MASK);
5424 	if (mreg & E1000_RCTL_MO_3)
5425 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5426 				<< BM_RCTL_MO_SHIFT);
5427 	if (mreg & E1000_RCTL_BAM)
5428 		preg |= BM_RCTL_BAM;
5429 	if (mreg & E1000_RCTL_PMCF)
5430 		preg |= BM_RCTL_PMCF;
5431 	mreg = E1000_READ_REG(hw, E1000_CTRL);
5432 	if (mreg & E1000_CTRL_RFCE)
5433 		preg |= BM_RCTL_RFCE;
5434 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5435 
5436 	/* enable PHY wakeup in MAC register */
5437 	E1000_WRITE_REG(hw, E1000_WUC,
5438 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5439 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5440 
5441 	/* configure and enable PHY wakeup in PHY registers */
5442 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5443 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5444 
5445 	/* activate PHY wakeup */
5446 	ret = hw->phy.ops.acquire(hw);
5447 	if (ret) {
5448 		printf("Could not acquire PHY\n");
5449 		return ret;
5450 	}
5451 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5452 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5453 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5454 	if (ret) {
5455 		printf("Could not read PHY page 769\n");
5456 		goto out;
5457 	}
5458 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5459 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5460 	if (ret)
5461 		printf("Could not set PHY Host Wakeup bit\n");
5462 out:
5463 	hw->phy.ops.release(hw);
5464 
5465 	return ret;
5466 }
5467 
5468 static void
5469 em_led_func(void *arg, int onoff)
5470 {
5471 	struct adapter	*adapter = arg;
5472 
5473 	EM_CORE_LOCK(adapter);
5474 	if (onoff) {
5475 		e1000_setup_led(&adapter->hw);
5476 		e1000_led_on(&adapter->hw);
5477 	} else {
5478 		e1000_led_off(&adapter->hw);
5479 		e1000_cleanup_led(&adapter->hw);
5480 	}
5481 	EM_CORE_UNLOCK(adapter);
5482 }
5483 
5484 /*
5485 ** Disable the L0S and L1 LINK states
5486 */
5487 static void
5488 em_disable_aspm(struct adapter *adapter)
5489 {
5490 	int		base, reg;
5491 	u16		link_cap,link_ctrl;
5492 	device_t	dev = adapter->dev;
5493 
5494 	switch (adapter->hw.mac.type) {
5495 		case e1000_82573:
5496 		case e1000_82574:
5497 		case e1000_82583:
5498 			break;
5499 		default:
5500 			return;
5501 	}
5502 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5503 		return;
5504 	reg = base + PCIER_LINK_CAP;
5505 	link_cap = pci_read_config(dev, reg, 2);
5506 	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5507 		return;
5508 	reg = base + PCIER_LINK_CTL;
5509 	link_ctrl = pci_read_config(dev, reg, 2);
5510 	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5511 	pci_write_config(dev, reg, link_ctrl, 2);
5512 	return;
5513 }
5514 
5515 /**********************************************************************
5516  *
5517  *  Update the board statistics counters.
5518  *
5519  **********************************************************************/
5520 static void
5521 em_update_stats_counters(struct adapter *adapter)
5522 {
5523 
5524 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5525 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5526 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5527 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5528 	}
5529 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5530 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5531 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5532 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5533 
5534 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5535 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5536 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5537 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5538 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5539 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5540 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5541 	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5542 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5543 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5544 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5545 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5546 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5547 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5548 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5549 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5550 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5551 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5552 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5553 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5554 
5555 	/* For the 64-bit byte counters the low dword must be read first. */
5556 	/* Both registers clear on the read of the high dword */
5557 
5558 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5559 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5560 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5561 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5562 
5563 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5564 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5565 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5566 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5567 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5568 
5569 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5570 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5571 
5572 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5573 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5574 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5575 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5576 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5577 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5578 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5579 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5580 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5581 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5582 
5583 	/* Interrupt Counts */
5584 
5585 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5586 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5587 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5588 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5589 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5590 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5591 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5592 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5593 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5594 
5595 	if (adapter->hw.mac.type >= e1000_82543) {
5596 		adapter->stats.algnerrc +=
5597 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5598 		adapter->stats.rxerrc +=
5599 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5600 		adapter->stats.tncrs +=
5601 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5602 		adapter->stats.cexterr +=
5603 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5604 		adapter->stats.tsctc +=
5605 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5606 		adapter->stats.tsctfc +=
5607 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5608 	}
5609 }
5610 
5611 static uint64_t
5612 em_get_counter(if_t ifp, ift_counter cnt)
5613 {
5614 	struct adapter *adapter;
5615 
5616 	adapter = if_getsoftc(ifp);
5617 
5618 	switch (cnt) {
5619 	case IFCOUNTER_COLLISIONS:
5620 		return (adapter->stats.colc);
5621 	case IFCOUNTER_IERRORS:
5622 		return (adapter->dropped_pkts + adapter->stats.rxerrc +
5623 		    adapter->stats.crcerrs + adapter->stats.algnerrc +
5624 		    adapter->stats.ruc + adapter->stats.roc +
5625 		    adapter->stats.mpc + adapter->stats.cexterr);
5626 	case IFCOUNTER_OERRORS:
5627 		return (adapter->stats.ecol + adapter->stats.latecol +
5628 		    adapter->watchdog_events);
5629 	default:
5630 		return (if_get_counter_default(ifp, cnt));
5631 	}
5632 }
5633 
5634 /* Export a single 32-bit register via a read-only sysctl. */
5635 static int
5636 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5637 {
5638 	struct adapter *adapter;
5639 	u_int val;
5640 
5641 	adapter = oidp->oid_arg1;
5642 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5643 	return (sysctl_handle_int(oidp, &val, 0, req));
5644 }
5645 
5646 /*
5647  * Add sysctl variables, one per statistic, to the system.
5648  */
5649 static void
5650 em_add_hw_stats(struct adapter *adapter)
5651 {
5652 	device_t dev = adapter->dev;
5653 
5654 	struct tx_ring *txr = adapter->tx_rings;
5655 	struct rx_ring *rxr = adapter->rx_rings;
5656 
5657 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5658 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5659 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5660 	struct e1000_hw_stats *stats = &adapter->stats;
5661 
5662 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5663 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5664 
5665 #define QUEUE_NAME_LEN 32
5666 	char namebuf[QUEUE_NAME_LEN];
5667 
5668 	/* Driver Statistics */
5669 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5670 			CTLFLAG_RD, &adapter->dropped_pkts,
5671 			"Driver dropped packets");
5672 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5673 			CTLFLAG_RD, &adapter->link_irq,
5674 			"Link MSIX IRQ Handled");
5675 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5676 			 CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5677 			 "Defragmenting mbuf chain failed");
5678 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5679 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5680 			"Driver tx dma failure in xmit");
5681 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5682 			CTLFLAG_RD, &adapter->rx_overruns,
5683 			"RX overruns");
5684 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5685 			CTLFLAG_RD, &adapter->watchdog_events,
5686 			"Watchdog timeouts");
5687 
5688 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5689 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5690 			em_sysctl_reg_handler, "IU",
5691 			"Device Control Register");
5692 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5693 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5694 			em_sysctl_reg_handler, "IU",
5695 			"Receiver Control Register");
5696 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5697 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5698 			"Flow Control High Watermark");
5699 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5700 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5701 			"Flow Control Low Watermark");
5702 
5703 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5704 		snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5705 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5706 					    CTLFLAG_RD, NULL, "TX Queue Name");
5707 		queue_list = SYSCTL_CHILDREN(queue_node);
5708 
5709 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5710 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5711 				E1000_TDH(txr->me),
5712 				em_sysctl_reg_handler, "IU",
5713  				"Transmit Descriptor Head");
5714 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5715 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5716 				E1000_TDT(txr->me),
5717 				em_sysctl_reg_handler, "IU",
5718  				"Transmit Descriptor Tail");
5719 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5720 				CTLFLAG_RD, &txr->tx_irq,
5721 				"Queue MSI-X Transmit Interrupts");
5722 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5723 				CTLFLAG_RD, &txr->no_desc_avail,
5724 				"Queue No Descriptor Available");
5725 
5726 		snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5727 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5728 					    CTLFLAG_RD, NULL, "RX Queue Name");
5729 		queue_list = SYSCTL_CHILDREN(queue_node);
5730 
5731 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5732 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5733 				E1000_RDH(rxr->me),
5734 				em_sysctl_reg_handler, "IU",
5735 				"Receive Descriptor Head");
5736 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5737 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5738 				E1000_RDT(rxr->me),
5739 				em_sysctl_reg_handler, "IU",
5740 				"Receive Descriptor Tail");
5741 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5742 				CTLFLAG_RD, &rxr->rx_irq,
5743 				"Queue MSI-X Receive Interrupts");
5744 	}
5745 
5746 	/* MAC stats get their own sub node */
5747 
5748 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5749 				    CTLFLAG_RD, NULL, "Statistics");
5750 	stat_list = SYSCTL_CHILDREN(stat_node);
5751 
5752 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5753 			CTLFLAG_RD, &stats->ecol,
5754 			"Excessive collisions");
5755 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5756 			CTLFLAG_RD, &stats->scc,
5757 			"Single collisions");
5758 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5759 			CTLFLAG_RD, &stats->mcc,
5760 			"Multiple collisions");
5761 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5762 			CTLFLAG_RD, &stats->latecol,
5763 			"Late collisions");
5764 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5765 			CTLFLAG_RD, &stats->colc,
5766 			"Collision Count");
5767 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5768 			CTLFLAG_RD, &adapter->stats.symerrs,
5769 			"Symbol Errors");
5770 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5771 			CTLFLAG_RD, &adapter->stats.sec,
5772 			"Sequence Errors");
5773 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5774 			CTLFLAG_RD, &adapter->stats.dc,
5775 			"Defer Count");
5776 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5777 			CTLFLAG_RD, &adapter->stats.mpc,
5778 			"Missed Packets");
5779 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5780 			CTLFLAG_RD, &adapter->stats.rnbc,
5781 			"Receive No Buffers");
5782 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5783 			CTLFLAG_RD, &adapter->stats.ruc,
5784 			"Receive Undersize");
5785 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5786 			CTLFLAG_RD, &adapter->stats.rfc,
5787 			"Fragmented Packets Received ");
5788 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5789 			CTLFLAG_RD, &adapter->stats.roc,
5790 			"Oversized Packets Received");
5791 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5792 			CTLFLAG_RD, &adapter->stats.rjc,
5793 			"Recevied Jabber");
5794 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5795 			CTLFLAG_RD, &adapter->stats.rxerrc,
5796 			"Receive Errors");
5797 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5798 			CTLFLAG_RD, &adapter->stats.crcerrs,
5799 			"CRC errors");
5800 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5801 			CTLFLAG_RD, &adapter->stats.algnerrc,
5802 			"Alignment Errors");
5803 	/* On 82575 these are collision counts */
5804 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5805 			CTLFLAG_RD, &adapter->stats.cexterr,
5806 			"Collision/Carrier extension errors");
5807 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5808 			CTLFLAG_RD, &adapter->stats.xonrxc,
5809 			"XON Received");
5810 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5811 			CTLFLAG_RD, &adapter->stats.xontxc,
5812 			"XON Transmitted");
5813 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5814 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5815 			"XOFF Received");
5816 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5817 			CTLFLAG_RD, &adapter->stats.xofftxc,
5818 			"XOFF Transmitted");
5819 
5820 	/* Packet Reception Stats */
5821 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5822 			CTLFLAG_RD, &adapter->stats.tpr,
5823 			"Total Packets Received ");
5824 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5825 			CTLFLAG_RD, &adapter->stats.gprc,
5826 			"Good Packets Received");
5827 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5828 			CTLFLAG_RD, &adapter->stats.bprc,
5829 			"Broadcast Packets Received");
5830 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5831 			CTLFLAG_RD, &adapter->stats.mprc,
5832 			"Multicast Packets Received");
5833 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5834 			CTLFLAG_RD, &adapter->stats.prc64,
5835 			"64 byte frames received ");
5836 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5837 			CTLFLAG_RD, &adapter->stats.prc127,
5838 			"65-127 byte frames received");
5839 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5840 			CTLFLAG_RD, &adapter->stats.prc255,
5841 			"128-255 byte frames received");
5842 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5843 			CTLFLAG_RD, &adapter->stats.prc511,
5844 			"256-511 byte frames received");
5845 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5846 			CTLFLAG_RD, &adapter->stats.prc1023,
5847 			"512-1023 byte frames received");
5848 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5849 			CTLFLAG_RD, &adapter->stats.prc1522,
5850 			"1023-1522 byte frames received");
5851  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5852  			CTLFLAG_RD, &adapter->stats.gorc,
5853  			"Good Octets Received");
5854 
5855 	/* Packet Transmission Stats */
5856  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5857  			CTLFLAG_RD, &adapter->stats.gotc,
5858  			"Good Octets Transmitted");
5859 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5860 			CTLFLAG_RD, &adapter->stats.tpt,
5861 			"Total Packets Transmitted");
5862 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5863 			CTLFLAG_RD, &adapter->stats.gptc,
5864 			"Good Packets Transmitted");
5865 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5866 			CTLFLAG_RD, &adapter->stats.bptc,
5867 			"Broadcast Packets Transmitted");
5868 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5869 			CTLFLAG_RD, &adapter->stats.mptc,
5870 			"Multicast Packets Transmitted");
5871 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5872 			CTLFLAG_RD, &adapter->stats.ptc64,
5873 			"64 byte frames transmitted ");
5874 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5875 			CTLFLAG_RD, &adapter->stats.ptc127,
5876 			"65-127 byte frames transmitted");
5877 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5878 			CTLFLAG_RD, &adapter->stats.ptc255,
5879 			"128-255 byte frames transmitted");
5880 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5881 			CTLFLAG_RD, &adapter->stats.ptc511,
5882 			"256-511 byte frames transmitted");
5883 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5884 			CTLFLAG_RD, &adapter->stats.ptc1023,
5885 			"512-1023 byte frames transmitted");
5886 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5887 			CTLFLAG_RD, &adapter->stats.ptc1522,
5888 			"1024-1522 byte frames transmitted");
5889 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5890 			CTLFLAG_RD, &adapter->stats.tsctc,
5891 			"TSO Contexts Transmitted");
5892 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5893 			CTLFLAG_RD, &adapter->stats.tsctfc,
5894 			"TSO Contexts Failed");
5895 
5896 
5897 	/* Interrupt Stats */
5898 
5899 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5900 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5901 	int_list = SYSCTL_CHILDREN(int_node);
5902 
5903 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5904 			CTLFLAG_RD, &adapter->stats.iac,
5905 			"Interrupt Assertion Count");
5906 
5907 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5908 			CTLFLAG_RD, &adapter->stats.icrxptc,
5909 			"Interrupt Cause Rx Pkt Timer Expire Count");
5910 
5911 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5912 			CTLFLAG_RD, &adapter->stats.icrxatc,
5913 			"Interrupt Cause Rx Abs Timer Expire Count");
5914 
5915 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5916 			CTLFLAG_RD, &adapter->stats.ictxptc,
5917 			"Interrupt Cause Tx Pkt Timer Expire Count");
5918 
5919 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5920 			CTLFLAG_RD, &adapter->stats.ictxatc,
5921 			"Interrupt Cause Tx Abs Timer Expire Count");
5922 
5923 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5924 			CTLFLAG_RD, &adapter->stats.ictxqec,
5925 			"Interrupt Cause Tx Queue Empty Count");
5926 
5927 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5928 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5929 			"Interrupt Cause Tx Queue Min Thresh Count");
5930 
5931 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5932 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5933 			"Interrupt Cause Rx Desc Min Thresh Count");
5934 
5935 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5936 			CTLFLAG_RD, &adapter->stats.icrxoc,
5937 			"Interrupt Cause Receiver Overrun Count");
5938 }
5939 
5940 /**********************************************************************
5941  *
5942  *  This routine provides a way to dump out the adapter eeprom,
5943  *  often a useful debug/service tool. This only dumps the first
5944  *  32 words, stuff that matters is in that extent.
5945  *
5946  **********************************************************************/
5947 static int
5948 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5949 {
5950 	struct adapter *adapter = (struct adapter *)arg1;
5951 	int error;
5952 	int result;
5953 
5954 	result = -1;
5955 	error = sysctl_handle_int(oidp, &result, 0, req);
5956 
5957 	if (error || !req->newptr)
5958 		return (error);
5959 
5960 	/*
5961 	 * This value will cause a hex dump of the
5962 	 * first 32 16-bit words of the EEPROM to
5963 	 * the screen.
5964 	 */
5965 	if (result == 1)
5966 		em_print_nvm_info(adapter);
5967 
5968 	return (error);
5969 }
5970 
5971 static void
5972 em_print_nvm_info(struct adapter *adapter)
5973 {
5974 	u16	eeprom_data;
5975 	int	i, j, row = 0;
5976 
5977 	/* Its a bit crude, but it gets the job done */
5978 	printf("\nInterface EEPROM Dump:\n");
5979 	printf("Offset\n0x0000  ");
5980 	for (i = 0, j = 0; i < 32; i++, j++) {
5981 		if (j == 8) { /* Make the offset block */
5982 			j = 0; ++row;
5983 			printf("\n0x00%x0  ",row);
5984 		}
5985 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5986 		printf("%04x ", eeprom_data);
5987 	}
5988 	printf("\n");
5989 }
5990 
5991 static int
5992 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5993 {
5994 	struct em_int_delay_info *info;
5995 	struct adapter *adapter;
5996 	u32 regval;
5997 	int error, usecs, ticks;
5998 
5999 	info = (struct em_int_delay_info *)arg1;
6000 	usecs = info->value;
6001 	error = sysctl_handle_int(oidp, &usecs, 0, req);
6002 	if (error != 0 || req->newptr == NULL)
6003 		return (error);
6004 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
6005 		return (EINVAL);
6006 	info->value = usecs;
6007 	ticks = EM_USECS_TO_TICKS(usecs);
6008 	if (info->offset == E1000_ITR)	/* units are 256ns here */
6009 		ticks *= 4;
6010 
6011 	adapter = info->adapter;
6012 
6013 	EM_CORE_LOCK(adapter);
6014 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
6015 	regval = (regval & ~0xffff) | (ticks & 0xffff);
6016 	/* Handle a few special cases. */
6017 	switch (info->offset) {
6018 	case E1000_RDTR:
6019 		break;
6020 	case E1000_TIDV:
6021 		if (ticks == 0) {
6022 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
6023 			/* Don't write 0 into the TIDV register. */
6024 			regval++;
6025 		} else
6026 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
6027 		break;
6028 	}
6029 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
6030 	EM_CORE_UNLOCK(adapter);
6031 	return (0);
6032 }
6033 
6034 static void
6035 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
6036 	const char *description, struct em_int_delay_info *info,
6037 	int offset, int value)
6038 {
6039 	info->adapter = adapter;
6040 	info->offset = offset;
6041 	info->value = value;
6042 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
6043 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6044 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
6045 	    info, 0, em_sysctl_int_delay, "I", description);
6046 }
6047 
6048 static void
6049 em_set_sysctl_value(struct adapter *adapter, const char *name,
6050 	const char *description, int *limit, int value)
6051 {
6052 	*limit = value;
6053 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6054 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6055 	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6056 }
6057 
6058 
6059 /*
6060 ** Set flow control using sysctl:
6061 ** Flow control values:
6062 **      0 - off
6063 **      1 - rx pause
6064 **      2 - tx pause
6065 **      3 - full
6066 */
6067 static int
6068 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
6069 {
6070         int		error;
6071 	static int	input = 3; /* default is full */
6072         struct adapter	*adapter = (struct adapter *) arg1;
6073 
6074         error = sysctl_handle_int(oidp, &input, 0, req);
6075 
6076         if ((error) || (req->newptr == NULL))
6077                 return (error);
6078 
6079 	if (input == adapter->fc) /* no change? */
6080 		return (error);
6081 
6082         switch (input) {
6083                 case e1000_fc_rx_pause:
6084                 case e1000_fc_tx_pause:
6085                 case e1000_fc_full:
6086                 case e1000_fc_none:
6087                         adapter->hw.fc.requested_mode = input;
6088 			adapter->fc = input;
6089                         break;
6090                 default:
6091 			/* Do nothing */
6092 			return (error);
6093         }
6094 
6095         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6096         e1000_force_mac_fc(&adapter->hw);
6097         return (error);
6098 }
6099 
6100 /*
6101 ** Manage Energy Efficient Ethernet:
6102 ** Control values:
6103 **     0/1 - enabled/disabled
6104 */
6105 static int
6106 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
6107 {
6108        struct adapter *adapter = (struct adapter *) arg1;
6109        int             error, value;
6110 
6111        value = adapter->hw.dev_spec.ich8lan.eee_disable;
6112        error = sysctl_handle_int(oidp, &value, 0, req);
6113        if (error || req->newptr == NULL)
6114                return (error);
6115        EM_CORE_LOCK(adapter);
6116        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
6117        em_init_locked(adapter);
6118        EM_CORE_UNLOCK(adapter);
6119        return (0);
6120 }
6121 
6122 static int
6123 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
6124 {
6125 	struct adapter *adapter;
6126 	int error;
6127 	int result;
6128 
6129 	result = -1;
6130 	error = sysctl_handle_int(oidp, &result, 0, req);
6131 
6132 	if (error || !req->newptr)
6133 		return (error);
6134 
6135 	if (result == 1) {
6136 		adapter = (struct adapter *)arg1;
6137 		em_print_debug_info(adapter);
6138         }
6139 
6140 	return (error);
6141 }
6142 
6143 /*
6144 ** This routine is meant to be fluid, add whatever is
6145 ** needed for debugging a problem.  -jfv
6146 */
6147 static void
6148 em_print_debug_info(struct adapter *adapter)
6149 {
6150 	device_t dev = adapter->dev;
6151 	struct tx_ring *txr = adapter->tx_rings;
6152 	struct rx_ring *rxr = adapter->rx_rings;
6153 
6154 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)
6155 		printf("Interface is RUNNING ");
6156 	else
6157 		printf("Interface is NOT RUNNING\n");
6158 
6159 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE)
6160 		printf("and INACTIVE\n");
6161 	else
6162 		printf("and ACTIVE\n");
6163 
6164 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
6165 		device_printf(dev, "TX Queue %d ------\n", i);
6166 		device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
6167 	    		E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
6168 	    		E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
6169 		device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
6170 		device_printf(dev, "TX descriptors avail = %d\n",
6171 	    		txr->tx_avail);
6172 		device_printf(dev, "Tx Descriptors avail failure = %ld\n",
6173 	    		txr->no_desc_avail);
6174 		device_printf(dev, "RX Queue %d ------\n", i);
6175 		device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
6176 	    		E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
6177 	    		E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
6178 		device_printf(dev, "RX discarded packets = %ld\n",
6179 	    		rxr->rx_discarded);
6180 		device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
6181 		device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
6182 	}
6183 }
6184 
6185 #ifdef EM_MULTIQUEUE
6186 /*
6187  * 82574 only:
6188  * Write a new value to the EEPROM increasing the number of MSIX
6189  * vectors from 3 to 5, for proper multiqueue support.
6190  */
6191 static void
6192 em_enable_vectors_82574(struct adapter *adapter)
6193 {
6194 	struct e1000_hw *hw = &adapter->hw;
6195 	device_t dev = adapter->dev;
6196 	u16 edata;
6197 
6198 	e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6199 	printf("Current cap: %#06x\n", edata);
6200 	if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
6201 		device_printf(dev, "Writing to eeprom: increasing "
6202 		    "reported MSIX vectors from 3 to 5...\n");
6203 		edata &= ~(EM_NVM_MSIX_N_MASK);
6204 		edata |= 4 << EM_NVM_MSIX_N_SHIFT;
6205 		e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6206 		e1000_update_nvm_checksum(hw);
6207 		device_printf(dev, "Writing to eeprom: done\n");
6208 	}
6209 }
6210 #endif
6211 
6212 #ifdef DDB
6213 DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
6214 {
6215 	devclass_t	dc;
6216 	int max_em;
6217 
6218 	dc = devclass_find("em");
6219 	max_em = devclass_get_maxunit(dc);
6220 
6221 	for (int index = 0; index < (max_em - 1); index++) {
6222 		device_t dev;
6223 		dev = devclass_get_device(dc, index);
6224 		if (device_get_driver(dev) == &em_driver) {
6225 			struct adapter *adapter = device_get_softc(dev);
6226 			EM_CORE_LOCK(adapter);
6227 			em_init_locked(adapter);
6228 			EM_CORE_UNLOCK(adapter);
6229 		}
6230 	}
6231 }
6232 DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6233 {
6234 	devclass_t	dc;
6235 	int max_em;
6236 
6237 	dc = devclass_find("em");
6238 	max_em = devclass_get_maxunit(dc);
6239 
6240 	for (int index = 0; index < (max_em - 1); index++) {
6241 		device_t dev;
6242 		dev = devclass_get_device(dc, index);
6243 		if (device_get_driver(dev) == &em_driver)
6244 			em_print_debug_info(device_get_softc(dev));
6245 	}
6246 
6247 }
6248 #endif
6249