xref: /freebsd/sys/dev/e1000/if_em.c (revision eb9da1ada8b6b2c74378a5c17029ec5a7fb199e6)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2015, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #include "opt_em.h"
36 #include "opt_ddb.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #endif
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #ifdef DDB
47 #include <sys/types.h>
48 #include <ddb/ddb.h>
49 #endif
50 #if __FreeBSD_version >= 800000
51 #include <sys/buf_ring.h>
52 #endif
53 #include <sys/bus.h>
54 #include <sys/endian.h>
55 #include <sys/kernel.h>
56 #include <sys/kthread.h>
57 #include <sys/malloc.h>
58 #include <sys/mbuf.h>
59 #include <sys/module.h>
60 #include <sys/rman.h>
61 #include <sys/smp.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/sysctl.h>
65 #include <sys/taskqueue.h>
66 #include <sys/eventhandler.h>
67 #include <machine/bus.h>
68 #include <machine/resource.h>
69 
70 #include <net/bpf.h>
71 #include <net/ethernet.h>
72 #include <net/if.h>
73 #include <net/if_var.h>
74 #include <net/if_arp.h>
75 #include <net/if_dl.h>
76 #include <net/if_media.h>
77 
78 #include <net/if_types.h>
79 #include <net/if_vlan_var.h>
80 
81 #include <netinet/in_systm.h>
82 #include <netinet/in.h>
83 #include <netinet/if_ether.h>
84 #include <netinet/ip.h>
85 #include <netinet/ip6.h>
86 #include <netinet/tcp.h>
87 #include <netinet/udp.h>
88 
89 #include <machine/in_cksum.h>
90 #include <dev/led/led.h>
91 #include <dev/pci/pcivar.h>
92 #include <dev/pci/pcireg.h>
93 
94 #include "e1000_api.h"
95 #include "e1000_82571.h"
96 #include "if_em.h"
97 
98 /*********************************************************************
99  *  Driver version:
100  *********************************************************************/
101 char em_driver_version[] = "7.6.1-k";
102 
103 /*********************************************************************
104  *  PCI Device ID Table
105  *
106  *  Used by probe to select devices to load on
107  *  Last field stores an index into e1000_strings
108  *  Last entry must be all 0s
109  *
110  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
111  *********************************************************************/
112 
113 static em_vendor_info_t em_vendor_info_array[] =
114 {
115 	/* Intel(R) PRO/1000 Network Connection */
116 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
117 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
118 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
119 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
120 						PCI_ANY_ID, PCI_ANY_ID, 0},
121 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
122 						PCI_ANY_ID, PCI_ANY_ID, 0},
123 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
124 						PCI_ANY_ID, PCI_ANY_ID, 0},
125 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
126 						PCI_ANY_ID, PCI_ANY_ID, 0},
127 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
128 						PCI_ANY_ID, PCI_ANY_ID, 0},
129 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
130 						PCI_ANY_ID, PCI_ANY_ID, 0},
131 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
132 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
134 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
135 
136 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
137 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
138 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
139 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
140 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
141 						PCI_ANY_ID, PCI_ANY_ID, 0},
142 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
143 						PCI_ANY_ID, PCI_ANY_ID, 0},
144 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
145 						PCI_ANY_ID, PCI_ANY_ID, 0},
146 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
147 						PCI_ANY_ID, PCI_ANY_ID, 0},
148 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
150 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
152 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
175 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
176 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
177 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
178 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
179 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
180 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
181 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
182 						PCI_ANY_ID, PCI_ANY_ID, 0},
183 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
184 						PCI_ANY_ID, PCI_ANY_ID, 0},
185 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM2,	PCI_ANY_ID, PCI_ANY_ID, 0},
186 	{ 0x8086, E1000_DEV_ID_PCH_I218_V2,	PCI_ANY_ID, PCI_ANY_ID, 0},
187 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM3,	PCI_ANY_ID, PCI_ANY_ID, 0},
188 	{ 0x8086, E1000_DEV_ID_PCH_I218_V3,	PCI_ANY_ID, PCI_ANY_ID, 0},
189 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
190 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
191 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2,
192                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
193 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
194 	{ 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3,
195 						PCI_ANY_ID, PCI_ANY_ID, 0},
196 	/* required last entry */
197 	{ 0, 0, 0, 0, 0}
198 };
199 
200 /*********************************************************************
201  *  Table of branding strings for all supported NICs.
202  *********************************************************************/
203 
204 static char *em_strings[] = {
205 	"Intel(R) PRO/1000 Network Connection"
206 };
207 
208 /*********************************************************************
209  *  Function prototypes
210  *********************************************************************/
211 static int	em_probe(device_t);
212 static int	em_attach(device_t);
213 static int	em_detach(device_t);
214 static int	em_shutdown(device_t);
215 static int	em_suspend(device_t);
216 static int	em_resume(device_t);
217 #ifdef EM_MULTIQUEUE
218 static int	em_mq_start(if_t, struct mbuf *);
219 static int	em_mq_start_locked(if_t,
220 		    struct tx_ring *);
221 static void	em_qflush(if_t);
222 #else
223 static void	em_start(if_t);
224 static void	em_start_locked(if_t, struct tx_ring *);
225 #endif
226 static int	em_ioctl(if_t, u_long, caddr_t);
227 static uint64_t	em_get_counter(if_t, ift_counter);
228 static void	em_init(void *);
229 static void	em_init_locked(struct adapter *);
230 static void	em_stop(void *);
231 static void	em_media_status(if_t, struct ifmediareq *);
232 static int	em_media_change(if_t);
233 static void	em_identify_hardware(struct adapter *);
234 static int	em_allocate_pci_resources(struct adapter *);
235 static int	em_allocate_legacy(struct adapter *);
236 static int	em_allocate_msix(struct adapter *);
237 static int	em_allocate_queues(struct adapter *);
238 static int	em_setup_msix(struct adapter *);
239 static void	em_free_pci_resources(struct adapter *);
240 static void	em_local_timer(void *);
241 static void	em_reset(struct adapter *);
242 static int	em_setup_interface(device_t, struct adapter *);
243 static void	em_flush_desc_rings(struct adapter *);
244 
245 static void	em_setup_transmit_structures(struct adapter *);
246 static void	em_initialize_transmit_unit(struct adapter *);
247 static int	em_allocate_transmit_buffers(struct tx_ring *);
248 static void	em_free_transmit_structures(struct adapter *);
249 static void	em_free_transmit_buffers(struct tx_ring *);
250 
251 static int	em_setup_receive_structures(struct adapter *);
252 static int	em_allocate_receive_buffers(struct rx_ring *);
253 static void	em_initialize_receive_unit(struct adapter *);
254 static void	em_free_receive_structures(struct adapter *);
255 static void	em_free_receive_buffers(struct rx_ring *);
256 
257 static void	em_enable_intr(struct adapter *);
258 static void	em_disable_intr(struct adapter *);
259 static void	em_update_stats_counters(struct adapter *);
260 static void	em_add_hw_stats(struct adapter *adapter);
261 static void	em_txeof(struct tx_ring *);
262 static bool	em_rxeof(struct rx_ring *, int, int *);
263 #ifndef __NO_STRICT_ALIGNMENT
264 static int	em_fixup_rx(struct rx_ring *);
265 #endif
266 static void	em_setup_rxdesc(union e1000_rx_desc_extended *,
267 		    const struct em_rxbuffer *rxbuf);
268 static void	em_receive_checksum(uint32_t status, struct mbuf *);
269 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
270 		    struct ip *, u32 *, u32 *);
271 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
272 		    struct tcphdr *, u32 *, u32 *);
273 static void	em_set_promisc(struct adapter *);
274 static void	em_disable_promisc(struct adapter *);
275 static void	em_set_multi(struct adapter *);
276 static void	em_update_link_status(struct adapter *);
277 static void	em_refresh_mbufs(struct rx_ring *, int);
278 static void	em_register_vlan(void *, if_t, u16);
279 static void	em_unregister_vlan(void *, if_t, u16);
280 static void	em_setup_vlan_hw_support(struct adapter *);
281 static int	em_xmit(struct tx_ring *, struct mbuf **);
282 static int	em_dma_malloc(struct adapter *, bus_size_t,
283 		    struct em_dma_alloc *, int);
284 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
285 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
286 static void	em_print_nvm_info(struct adapter *);
287 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
288 static void	em_print_debug_info(struct adapter *);
289 static int 	em_is_valid_ether_addr(u8 *);
290 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
291 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
292 		    const char *, struct em_int_delay_info *, int, int);
293 /* Management and WOL Support */
294 static void	em_init_manageability(struct adapter *);
295 static void	em_release_manageability(struct adapter *);
296 static void     em_get_hw_control(struct adapter *);
297 static void     em_release_hw_control(struct adapter *);
298 static void	em_get_wakeup(device_t);
299 static void     em_enable_wakeup(device_t);
300 static int	em_enable_phy_wakeup(struct adapter *);
301 static void	em_led_func(void *, int);
302 static void	em_disable_aspm(struct adapter *);
303 
304 static int	em_irq_fast(void *);
305 
306 /* MSIX handlers */
307 static void	em_msix_tx(void *);
308 static void	em_msix_rx(void *);
309 static void	em_msix_link(void *);
310 static void	em_handle_tx(void *context, int pending);
311 static void	em_handle_rx(void *context, int pending);
312 static void	em_handle_link(void *context, int pending);
313 
314 #ifdef EM_MULTIQUEUE
315 static void	em_enable_vectors_82574(struct adapter *);
316 #endif
317 
318 static void	em_set_sysctl_value(struct adapter *, const char *,
319 		    const char *, int *, int);
320 static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
321 static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
322 
323 static __inline void em_rx_discard(struct rx_ring *, int);
324 
325 #ifdef DEVICE_POLLING
326 static poll_handler_t em_poll;
327 #endif /* POLLING */
328 
329 /*********************************************************************
330  *  FreeBSD Device Interface Entry Points
331  *********************************************************************/
332 
333 static device_method_t em_methods[] = {
334 	/* Device interface */
335 	DEVMETHOD(device_probe, em_probe),
336 	DEVMETHOD(device_attach, em_attach),
337 	DEVMETHOD(device_detach, em_detach),
338 	DEVMETHOD(device_shutdown, em_shutdown),
339 	DEVMETHOD(device_suspend, em_suspend),
340 	DEVMETHOD(device_resume, em_resume),
341 	DEVMETHOD_END
342 };
343 
344 static driver_t em_driver = {
345 	"em", em_methods, sizeof(struct adapter),
346 };
347 
348 devclass_t em_devclass;
349 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
350 MODULE_DEPEND(em, pci, 1, 1, 1);
351 MODULE_DEPEND(em, ether, 1, 1, 1);
352 #ifdef DEV_NETMAP
353 MODULE_DEPEND(em, netmap, 1, 1, 1);
354 #endif /* DEV_NETMAP */
355 
356 /*********************************************************************
357  *  Tunable default values.
358  *********************************************************************/
359 
360 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
361 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
362 #define M_TSO_LEN			66
363 
364 #define MAX_INTS_PER_SEC	8000
365 #define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
366 
367 /* Allow common code without TSO */
368 #ifndef CSUM_TSO
369 #define CSUM_TSO	0
370 #endif
371 
372 #define TSO_WORKAROUND	4
373 
374 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
375 
376 static int em_disable_crc_stripping = 0;
377 SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
378     &em_disable_crc_stripping, 0, "Disable CRC Stripping");
379 
380 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
381 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
382 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
383     0, "Default transmit interrupt delay in usecs");
384 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
385     0, "Default receive interrupt delay in usecs");
386 
387 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
388 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
389 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
390     &em_tx_abs_int_delay_dflt, 0,
391     "Default transmit interrupt delay limit in usecs");
392 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
393     &em_rx_abs_int_delay_dflt, 0,
394     "Default receive interrupt delay limit in usecs");
395 
396 static int em_rxd = EM_DEFAULT_RXD;
397 static int em_txd = EM_DEFAULT_TXD;
398 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
399     "Number of receive descriptors per queue");
400 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
401     "Number of transmit descriptors per queue");
402 
403 static int em_smart_pwr_down = FALSE;
404 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
405     0, "Set to true to leave smart power down enabled on newer adapters");
406 
407 /* Controls whether promiscuous also shows bad packets */
408 static int em_debug_sbp = FALSE;
409 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
410     "Show bad packets in promiscuous mode");
411 
412 static int em_enable_msix = TRUE;
413 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
414     "Enable MSI-X interrupts");
415 
416 #ifdef EM_MULTIQUEUE
417 static int em_num_queues = 1;
418 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
419     "82574 only: Number of queues to configure, 0 indicates autoconfigure");
420 #endif
421 
422 /*
423 ** Global variable to store last used CPU when binding queues
424 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
425 ** queue is bound to a cpu.
426 */
427 static int em_last_bind_cpu = -1;
428 
429 /* How many packets rxeof tries to clean at a time */
430 static int em_rx_process_limit = 100;
431 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
432     &em_rx_process_limit, 0,
433     "Maximum number of received packets to process "
434     "at a time, -1 means unlimited");
435 
436 /* Energy efficient ethernet - default to OFF */
437 static int eee_setting = 1;
438 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
439     "Enable Energy Efficient Ethernet");
440 
441 /* Global used in WOL setup with multiport cards */
442 static int global_quad_port_a = 0;
443 
444 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
445 #include <dev/netmap/if_em_netmap.h>
446 #endif /* DEV_NETMAP */
447 
448 /*********************************************************************
449  *  Device identification routine
450  *
451  *  em_probe determines if the driver should be loaded on
452  *  adapter based on PCI vendor/device id of the adapter.
453  *
454  *  return BUS_PROBE_DEFAULT on success, positive on failure
455  *********************************************************************/
456 
457 static int
458 em_probe(device_t dev)
459 {
460 	char		adapter_name[60];
461 	uint16_t	pci_vendor_id = 0;
462 	uint16_t	pci_device_id = 0;
463 	uint16_t	pci_subvendor_id = 0;
464 	uint16_t	pci_subdevice_id = 0;
465 	em_vendor_info_t *ent;
466 
467 	INIT_DEBUGOUT("em_probe: begin");
468 
469 	pci_vendor_id = pci_get_vendor(dev);
470 	if (pci_vendor_id != EM_VENDOR_ID)
471 		return (ENXIO);
472 
473 	pci_device_id = pci_get_device(dev);
474 	pci_subvendor_id = pci_get_subvendor(dev);
475 	pci_subdevice_id = pci_get_subdevice(dev);
476 
477 	ent = em_vendor_info_array;
478 	while (ent->vendor_id != 0) {
479 		if ((pci_vendor_id == ent->vendor_id) &&
480 		    (pci_device_id == ent->device_id) &&
481 
482 		    ((pci_subvendor_id == ent->subvendor_id) ||
483 		    (ent->subvendor_id == PCI_ANY_ID)) &&
484 
485 		    ((pci_subdevice_id == ent->subdevice_id) ||
486 		    (ent->subdevice_id == PCI_ANY_ID))) {
487 			sprintf(adapter_name, "%s %s",
488 				em_strings[ent->index],
489 				em_driver_version);
490 			device_set_desc_copy(dev, adapter_name);
491 			return (BUS_PROBE_DEFAULT);
492 		}
493 		ent++;
494 	}
495 
496 	return (ENXIO);
497 }
498 
499 /*********************************************************************
500  *  Device initialization routine
501  *
502  *  The attach entry point is called when the driver is being loaded.
503  *  This routine identifies the type of hardware, allocates all resources
504  *  and initializes the hardware.
505  *
506  *  return 0 on success, positive on failure
507  *********************************************************************/
508 
509 static int
510 em_attach(device_t dev)
511 {
512 	struct adapter	*adapter;
513 	struct e1000_hw	*hw;
514 	int		error = 0;
515 
516 	INIT_DEBUGOUT("em_attach: begin");
517 
518 	if (resource_disabled("em", device_get_unit(dev))) {
519 		device_printf(dev, "Disabled by device hint\n");
520 		return (ENXIO);
521 	}
522 
523 	adapter = device_get_softc(dev);
524 	adapter->dev = adapter->osdep.dev = dev;
525 	hw = &adapter->hw;
526 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
527 
528 	/* SYSCTL stuff */
529 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
530 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
531 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
532 	    em_sysctl_nvm_info, "I", "NVM Information");
533 
534 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
535 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
536 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
537 	    em_sysctl_debug_info, "I", "Debug Information");
538 
539 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
540 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
541 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
542 	    em_set_flowcntl, "I", "Flow Control");
543 
544 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
545 
546 	/* Determine hardware and mac info */
547 	em_identify_hardware(adapter);
548 
549 	/* Setup PCI resources */
550 	if (em_allocate_pci_resources(adapter)) {
551 		device_printf(dev, "Allocation of PCI resources failed\n");
552 		error = ENXIO;
553 		goto err_pci;
554 	}
555 
556 	/*
557 	** For ICH8 and family we need to
558 	** map the flash memory, and this
559 	** must happen after the MAC is
560 	** identified
561 	*/
562 	if ((hw->mac.type == e1000_ich8lan) ||
563 	    (hw->mac.type == e1000_ich9lan) ||
564 	    (hw->mac.type == e1000_ich10lan) ||
565 	    (hw->mac.type == e1000_pchlan) ||
566 	    (hw->mac.type == e1000_pch2lan) ||
567 	    (hw->mac.type == e1000_pch_lpt)) {
568 		int rid = EM_BAR_TYPE_FLASH;
569 		adapter->flash = bus_alloc_resource_any(dev,
570 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
571 		if (adapter->flash == NULL) {
572 			device_printf(dev, "Mapping of Flash failed\n");
573 			error = ENXIO;
574 			goto err_pci;
575 		}
576 		/* This is used in the shared code */
577 		hw->flash_address = (u8 *)adapter->flash;
578 		adapter->osdep.flash_bus_space_tag =
579 		    rman_get_bustag(adapter->flash);
580 		adapter->osdep.flash_bus_space_handle =
581 		    rman_get_bushandle(adapter->flash);
582 	}
583 	/*
584 	** In the new SPT device flash is not  a
585 	** separate BAR, rather it is also in BAR0,
586 	** so use the same tag and an offset handle for the
587 	** FLASH read/write macros in the shared code.
588 	*/
589 	else if (hw->mac.type == e1000_pch_spt) {
590 		adapter->osdep.flash_bus_space_tag =
591 		    adapter->osdep.mem_bus_space_tag;
592 		adapter->osdep.flash_bus_space_handle =
593 		    adapter->osdep.mem_bus_space_handle
594 		    + E1000_FLASH_BASE_ADDR;
595 	}
596 
597 	/* Do Shared Code initialization */
598 	error = e1000_setup_init_funcs(hw, TRUE);
599 	if (error) {
600 		device_printf(dev, "Setup of Shared code failed, error %d\n",
601 		    error);
602 		error = ENXIO;
603 		goto err_pci;
604 	}
605 
606 	/*
607 	 * Setup MSI/X or MSI if PCI Express
608 	 */
609 	adapter->msix = em_setup_msix(adapter);
610 
611 	e1000_get_bus_info(hw);
612 
613 	/* Set up some sysctls for the tunable interrupt delays */
614 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
615 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
616 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
617 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
618 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
619 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
620 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
621 	    "receive interrupt delay limit in usecs",
622 	    &adapter->rx_abs_int_delay,
623 	    E1000_REGISTER(hw, E1000_RADV),
624 	    em_rx_abs_int_delay_dflt);
625 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
626 	    "transmit interrupt delay limit in usecs",
627 	    &adapter->tx_abs_int_delay,
628 	    E1000_REGISTER(hw, E1000_TADV),
629 	    em_tx_abs_int_delay_dflt);
630 	em_add_int_delay_sysctl(adapter, "itr",
631 	    "interrupt delay limit in usecs/4",
632 	    &adapter->tx_itr,
633 	    E1000_REGISTER(hw, E1000_ITR),
634 	    DEFAULT_ITR);
635 
636 	/* Sysctl for limiting the amount of work done in the taskqueue */
637 	em_set_sysctl_value(adapter, "rx_processing_limit",
638 	    "max number of rx packets to process", &adapter->rx_process_limit,
639 	    em_rx_process_limit);
640 
641 	/*
642 	 * Validate number of transmit and receive descriptors. It
643 	 * must not exceed hardware maximum, and must be multiple
644 	 * of E1000_DBA_ALIGN.
645 	 */
646 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
647 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
648 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
649 		    EM_DEFAULT_TXD, em_txd);
650 		adapter->num_tx_desc = EM_DEFAULT_TXD;
651 	} else
652 		adapter->num_tx_desc = em_txd;
653 
654 	if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 ||
655 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
656 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
657 		    EM_DEFAULT_RXD, em_rxd);
658 		adapter->num_rx_desc = EM_DEFAULT_RXD;
659 	} else
660 		adapter->num_rx_desc = em_rxd;
661 
662 	hw->mac.autoneg = DO_AUTO_NEG;
663 	hw->phy.autoneg_wait_to_complete = FALSE;
664 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
665 
666 	/* Copper options */
667 	if (hw->phy.media_type == e1000_media_type_copper) {
668 		hw->phy.mdix = AUTO_ALL_MODES;
669 		hw->phy.disable_polarity_correction = FALSE;
670 		hw->phy.ms_type = EM_MASTER_SLAVE;
671 	}
672 
673 	/*
674 	 * Set the frame limits assuming
675 	 * standard ethernet sized frames.
676 	 */
677 	adapter->hw.mac.max_frame_size =
678 	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
679 
680 	/*
681 	 * This controls when hardware reports transmit completion
682 	 * status.
683 	 */
684 	hw->mac.report_tx_early = 1;
685 
686 	/*
687 	** Get queue/ring memory
688 	*/
689 	if (em_allocate_queues(adapter)) {
690 		error = ENOMEM;
691 		goto err_pci;
692 	}
693 
694 	/* Allocate multicast array memory. */
695 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
696 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
697 	if (adapter->mta == NULL) {
698 		device_printf(dev, "Can not allocate multicast setup array\n");
699 		error = ENOMEM;
700 		goto err_late;
701 	}
702 
703 	/* Check SOL/IDER usage */
704 	if (e1000_check_reset_block(hw))
705 		device_printf(dev, "PHY reset is blocked"
706 		    " due to SOL/IDER session.\n");
707 
708 	/* Sysctl for setting Energy Efficient Ethernet */
709 	hw->dev_spec.ich8lan.eee_disable = eee_setting;
710 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
711 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
712 	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
713 	    adapter, 0, em_sysctl_eee, "I",
714 	    "Disable Energy Efficient Ethernet");
715 
716 	/*
717 	** Start from a known state, this is
718 	** important in reading the nvm and
719 	** mac from that.
720 	*/
721 	e1000_reset_hw(hw);
722 
723 
724 	/* Make sure we have a good EEPROM before we read from it */
725 	if (e1000_validate_nvm_checksum(hw) < 0) {
726 		/*
727 		** Some PCI-E parts fail the first check due to
728 		** the link being in sleep state, call it again,
729 		** if it fails a second time its a real issue.
730 		*/
731 		if (e1000_validate_nvm_checksum(hw) < 0) {
732 			device_printf(dev,
733 			    "The EEPROM Checksum Is Not Valid\n");
734 			error = EIO;
735 			goto err_late;
736 		}
737 	}
738 
739 	/* Copy the permanent MAC address out of the EEPROM */
740 	if (e1000_read_mac_addr(hw) < 0) {
741 		device_printf(dev, "EEPROM read error while reading MAC"
742 		    " address\n");
743 		error = EIO;
744 		goto err_late;
745 	}
746 
747 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
748 		device_printf(dev, "Invalid MAC address\n");
749 		error = EIO;
750 		goto err_late;
751 	}
752 
753 	/* Disable ULP support */
754 	e1000_disable_ulp_lpt_lp(hw, TRUE);
755 
756 	/*
757 	**  Do interrupt configuration
758 	*/
759 	if (adapter->msix > 1) /* Do MSIX */
760 		error = em_allocate_msix(adapter);
761 	else  /* MSI or Legacy */
762 		error = em_allocate_legacy(adapter);
763 	if (error)
764 		goto err_late;
765 
766 	/*
767 	 * Get Wake-on-Lan and Management info for later use
768 	 */
769 	em_get_wakeup(dev);
770 
771 	/* Setup OS specific network interface */
772 	if (em_setup_interface(dev, adapter) != 0)
773 		goto err_late;
774 
775 	em_reset(adapter);
776 
777 	/* Initialize statistics */
778 	em_update_stats_counters(adapter);
779 
780 	hw->mac.get_link_status = 1;
781 	em_update_link_status(adapter);
782 
783 	/* Register for VLAN events */
784 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
785 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
786 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
787 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
788 
789 	em_add_hw_stats(adapter);
790 
791 	/* Non-AMT based hardware can now take control from firmware */
792 	if (adapter->has_manage && !adapter->has_amt)
793 		em_get_hw_control(adapter);
794 
795 	/* Tell the stack that the interface is not active */
796 	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
797 
798 	adapter->led_dev = led_create(em_led_func, adapter,
799 	    device_get_nameunit(dev));
800 #ifdef DEV_NETMAP
801 	em_netmap_attach(adapter);
802 #endif /* DEV_NETMAP */
803 
804 	INIT_DEBUGOUT("em_attach: end");
805 
806 	return (0);
807 
808 err_late:
809 	em_free_transmit_structures(adapter);
810 	em_free_receive_structures(adapter);
811 	em_release_hw_control(adapter);
812 	if (adapter->ifp != (void *)NULL)
813 		if_free(adapter->ifp);
814 err_pci:
815 	em_free_pci_resources(adapter);
816 	free(adapter->mta, M_DEVBUF);
817 	EM_CORE_LOCK_DESTROY(adapter);
818 
819 	return (error);
820 }
821 
822 /*********************************************************************
823  *  Device removal routine
824  *
825  *  The detach entry point is called when the driver is being removed.
826  *  This routine stops the adapter and deallocates all the resources
827  *  that were allocated for driver operation.
828  *
829  *  return 0 on success, positive on failure
830  *********************************************************************/
831 
832 static int
833 em_detach(device_t dev)
834 {
835 	struct adapter	*adapter = device_get_softc(dev);
836 	if_t ifp = adapter->ifp;
837 
838 	INIT_DEBUGOUT("em_detach: begin");
839 
840 	/* Make sure VLANS are not using driver */
841 	if (if_vlantrunkinuse(ifp)) {
842 		device_printf(dev,"Vlan in use, detach first\n");
843 		return (EBUSY);
844 	}
845 
846 #ifdef DEVICE_POLLING
847 	if (if_getcapenable(ifp) & IFCAP_POLLING)
848 		ether_poll_deregister(ifp);
849 #endif
850 
851 	if (adapter->led_dev != NULL)
852 		led_destroy(adapter->led_dev);
853 
854 	EM_CORE_LOCK(adapter);
855 	adapter->in_detach = 1;
856 	em_stop(adapter);
857 	EM_CORE_UNLOCK(adapter);
858 	EM_CORE_LOCK_DESTROY(adapter);
859 
860 	e1000_phy_hw_reset(&adapter->hw);
861 
862 	em_release_manageability(adapter);
863 	em_release_hw_control(adapter);
864 
865 	/* Unregister VLAN events */
866 	if (adapter->vlan_attach != NULL)
867 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
868 	if (adapter->vlan_detach != NULL)
869 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
870 
871 	ether_ifdetach(adapter->ifp);
872 	callout_drain(&adapter->timer);
873 
874 #ifdef DEV_NETMAP
875 	netmap_detach(ifp);
876 #endif /* DEV_NETMAP */
877 
878 	em_free_pci_resources(adapter);
879 	bus_generic_detach(dev);
880 	if_free(ifp);
881 
882 	em_free_transmit_structures(adapter);
883 	em_free_receive_structures(adapter);
884 
885 	em_release_hw_control(adapter);
886 	free(adapter->mta, M_DEVBUF);
887 
888 	return (0);
889 }
890 
891 /*********************************************************************
892  *
893  *  Shutdown entry point
894  *
895  **********************************************************************/
896 
897 static int
898 em_shutdown(device_t dev)
899 {
900 	return em_suspend(dev);
901 }
902 
903 /*
904  * Suspend/resume device methods.
905  */
906 static int
907 em_suspend(device_t dev)
908 {
909 	struct adapter *adapter = device_get_softc(dev);
910 
911 	EM_CORE_LOCK(adapter);
912 
913         em_release_manageability(adapter);
914 	em_release_hw_control(adapter);
915 	em_enable_wakeup(dev);
916 
917 	EM_CORE_UNLOCK(adapter);
918 
919 	return bus_generic_suspend(dev);
920 }
921 
922 static int
923 em_resume(device_t dev)
924 {
925 	struct adapter *adapter = device_get_softc(dev);
926 	struct tx_ring	*txr = adapter->tx_rings;
927 	if_t ifp = adapter->ifp;
928 
929 	EM_CORE_LOCK(adapter);
930 	if (adapter->hw.mac.type == e1000_pch2lan)
931 		e1000_resume_workarounds_pchlan(&adapter->hw);
932 	em_init_locked(adapter);
933 	em_init_manageability(adapter);
934 
935 	if ((if_getflags(ifp) & IFF_UP) &&
936 	    (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) {
937 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
938 			EM_TX_LOCK(txr);
939 #ifdef EM_MULTIQUEUE
940 			if (!drbr_empty(ifp, txr->br))
941 				em_mq_start_locked(ifp, txr);
942 #else
943 			if (!if_sendq_empty(ifp))
944 				em_start_locked(ifp, txr);
945 #endif
946 			EM_TX_UNLOCK(txr);
947 		}
948 	}
949 	EM_CORE_UNLOCK(adapter);
950 
951 	return bus_generic_resume(dev);
952 }
953 
954 
955 #ifndef EM_MULTIQUEUE
956 static void
957 em_start_locked(if_t ifp, struct tx_ring *txr)
958 {
959 	struct adapter	*adapter = if_getsoftc(ifp);
960 	struct mbuf	*m_head;
961 
962 	EM_TX_LOCK_ASSERT(txr);
963 
964 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
965 	    IFF_DRV_RUNNING)
966 		return;
967 
968 	if (!adapter->link_active)
969 		return;
970 
971 	while (!if_sendq_empty(ifp)) {
972         	/* Call cleanup if number of TX descriptors low */
973 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
974 			em_txeof(txr);
975 		if (txr->tx_avail < EM_MAX_SCATTER) {
976 			if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0);
977 			break;
978 		}
979 		m_head = if_dequeue(ifp);
980 		if (m_head == NULL)
981 			break;
982 		/*
983 		 *  Encapsulation can modify our pointer, and or make it
984 		 *  NULL on failure.  In that event, we can't requeue.
985 		 */
986 		if (em_xmit(txr, &m_head)) {
987 			if (m_head == NULL)
988 				break;
989 			if_sendq_prepend(ifp, m_head);
990 			break;
991 		}
992 
993 		/* Mark the queue as having work */
994 		if (txr->busy == EM_TX_IDLE)
995 			txr->busy = EM_TX_BUSY;
996 
997 		/* Send a copy of the frame to the BPF listener */
998 		ETHER_BPF_MTAP(ifp, m_head);
999 
1000 	}
1001 
1002 	return;
1003 }
1004 
1005 static void
1006 em_start(if_t ifp)
1007 {
1008 	struct adapter	*adapter = if_getsoftc(ifp);
1009 	struct tx_ring	*txr = adapter->tx_rings;
1010 
1011 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1012 		EM_TX_LOCK(txr);
1013 		em_start_locked(ifp, txr);
1014 		EM_TX_UNLOCK(txr);
1015 	}
1016 	return;
1017 }
1018 #else /* EM_MULTIQUEUE */
1019 /*********************************************************************
1020  *  Multiqueue Transmit routines
1021  *
1022  *  em_mq_start is called by the stack to initiate a transmit.
1023  *  however, if busy the driver can queue the request rather
1024  *  than do an immediate send. It is this that is an advantage
1025  *  in this driver, rather than also having multiple tx queues.
1026  **********************************************************************/
1027 /*
1028 ** Multiqueue capable stack interface
1029 */
1030 static int
1031 em_mq_start(if_t ifp, struct mbuf *m)
1032 {
1033 	struct adapter	*adapter = if_getsoftc(ifp);
1034 	struct tx_ring	*txr = adapter->tx_rings;
1035 	unsigned int	i, error;
1036 
1037 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1038 		i = m->m_pkthdr.flowid % adapter->num_queues;
1039 	else
1040 		i = curcpu % adapter->num_queues;
1041 
1042 	txr = &adapter->tx_rings[i];
1043 
1044 	error = drbr_enqueue(ifp, txr->br, m);
1045 	if (error)
1046 		return (error);
1047 
1048 	if (EM_TX_TRYLOCK(txr)) {
1049 		em_mq_start_locked(ifp, txr);
1050 		EM_TX_UNLOCK(txr);
1051 	} else
1052 		taskqueue_enqueue(txr->tq, &txr->tx_task);
1053 
1054 	return (0);
1055 }
1056 
1057 static int
1058 em_mq_start_locked(if_t ifp, struct tx_ring *txr)
1059 {
1060 	struct adapter  *adapter = txr->adapter;
1061         struct mbuf     *next;
1062         int             err = 0, enq = 0;
1063 
1064 	EM_TX_LOCK_ASSERT(txr);
1065 
1066 	if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) ||
1067 	    adapter->link_active == 0) {
1068 		return (ENETDOWN);
1069 	}
1070 
1071 	/* Process the queue */
1072 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1073 		if ((err = em_xmit(txr, &next)) != 0) {
1074 			if (next == NULL) {
1075 				/* It was freed, move forward */
1076 				drbr_advance(ifp, txr->br);
1077 			} else {
1078 				/*
1079 				 * Still have one left, it may not be
1080 				 * the same since the transmit function
1081 				 * may have changed it.
1082 				 */
1083 				drbr_putback(ifp, txr->br, next);
1084 			}
1085 			break;
1086 		}
1087 		drbr_advance(ifp, txr->br);
1088 		enq++;
1089 		if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
1090 		if (next->m_flags & M_MCAST)
1091 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
1092 		ETHER_BPF_MTAP(ifp, next);
1093 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
1094                         break;
1095 	}
1096 
1097 	/* Mark the queue as having work */
1098 	if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1099 		txr->busy = EM_TX_BUSY;
1100 
1101 	if (txr->tx_avail < EM_MAX_SCATTER)
1102 		em_txeof(txr);
1103 	if (txr->tx_avail < EM_MAX_SCATTER) {
1104 		if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
1105 	}
1106 	return (err);
1107 }
1108 
1109 /*
1110 ** Flush all ring buffers
1111 */
1112 static void
1113 em_qflush(if_t ifp)
1114 {
1115 	struct adapter  *adapter = if_getsoftc(ifp);
1116 	struct tx_ring  *txr = adapter->tx_rings;
1117 	struct mbuf     *m;
1118 
1119 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1120 		EM_TX_LOCK(txr);
1121 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1122 			m_freem(m);
1123 		EM_TX_UNLOCK(txr);
1124 	}
1125 	if_qflush(ifp);
1126 }
1127 #endif /* EM_MULTIQUEUE */
1128 
1129 /*********************************************************************
1130  *  Ioctl entry point
1131  *
1132  *  em_ioctl is called when the user wants to configure the
1133  *  interface.
1134  *
1135  *  return 0 on success, positive on failure
1136  **********************************************************************/
1137 
1138 static int
1139 em_ioctl(if_t ifp, u_long command, caddr_t data)
1140 {
1141 	struct adapter	*adapter = if_getsoftc(ifp);
1142 	struct ifreq	*ifr = (struct ifreq *)data;
1143 #if defined(INET) || defined(INET6)
1144 	struct ifaddr	*ifa = (struct ifaddr *)data;
1145 #endif
1146 	bool		avoid_reset = FALSE;
1147 	int		error = 0;
1148 
1149 	if (adapter->in_detach)
1150 		return (error);
1151 
1152 	switch (command) {
1153 	case SIOCSIFADDR:
1154 #ifdef INET
1155 		if (ifa->ifa_addr->sa_family == AF_INET)
1156 			avoid_reset = TRUE;
1157 #endif
1158 #ifdef INET6
1159 		if (ifa->ifa_addr->sa_family == AF_INET6)
1160 			avoid_reset = TRUE;
1161 #endif
1162 		/*
1163 		** Calling init results in link renegotiation,
1164 		** so we avoid doing it when possible.
1165 		*/
1166 		if (avoid_reset) {
1167 			if_setflagbits(ifp,IFF_UP,0);
1168 			if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
1169 				em_init(adapter);
1170 #ifdef INET
1171 			if (!(if_getflags(ifp) & IFF_NOARP))
1172 				arp_ifinit(ifp, ifa);
1173 #endif
1174 		} else
1175 			error = ether_ioctl(ifp, command, data);
1176 		break;
1177 	case SIOCSIFMTU:
1178 	    {
1179 		int max_frame_size;
1180 
1181 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1182 
1183 		EM_CORE_LOCK(adapter);
1184 		switch (adapter->hw.mac.type) {
1185 		case e1000_82571:
1186 		case e1000_82572:
1187 		case e1000_ich9lan:
1188 		case e1000_ich10lan:
1189 		case e1000_pch2lan:
1190 		case e1000_pch_lpt:
1191 		case e1000_pch_spt:
1192 		case e1000_82574:
1193 		case e1000_82583:
1194 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1195 			max_frame_size = 9234;
1196 			break;
1197 		case e1000_pchlan:
1198 			max_frame_size = 4096;
1199 			break;
1200 			/* Adapters that do not support jumbo frames */
1201 		case e1000_ich8lan:
1202 			max_frame_size = ETHER_MAX_LEN;
1203 			break;
1204 		default:
1205 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1206 		}
1207 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1208 		    ETHER_CRC_LEN) {
1209 			EM_CORE_UNLOCK(adapter);
1210 			error = EINVAL;
1211 			break;
1212 		}
1213 
1214 		if_setmtu(ifp, ifr->ifr_mtu);
1215 		adapter->hw.mac.max_frame_size =
1216 		    if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
1217 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1218 			em_init_locked(adapter);
1219 		EM_CORE_UNLOCK(adapter);
1220 		break;
1221 	    }
1222 	case SIOCSIFFLAGS:
1223 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1224 		    SIOCSIFFLAGS (Set Interface Flags)");
1225 		EM_CORE_LOCK(adapter);
1226 		if (if_getflags(ifp) & IFF_UP) {
1227 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1228 				if ((if_getflags(ifp) ^ adapter->if_flags) &
1229 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1230 					em_disable_promisc(adapter);
1231 					em_set_promisc(adapter);
1232 				}
1233 			} else
1234 				em_init_locked(adapter);
1235 		} else
1236 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1237 				em_stop(adapter);
1238 		adapter->if_flags = if_getflags(ifp);
1239 		EM_CORE_UNLOCK(adapter);
1240 		break;
1241 	case SIOCADDMULTI:
1242 	case SIOCDELMULTI:
1243 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1244 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1245 			EM_CORE_LOCK(adapter);
1246 			em_disable_intr(adapter);
1247 			em_set_multi(adapter);
1248 #ifdef DEVICE_POLLING
1249 			if (!(if_getcapenable(ifp) & IFCAP_POLLING))
1250 #endif
1251 				em_enable_intr(adapter);
1252 			EM_CORE_UNLOCK(adapter);
1253 		}
1254 		break;
1255 	case SIOCSIFMEDIA:
1256 		/* Check SOL/IDER usage */
1257 		EM_CORE_LOCK(adapter);
1258 		if (e1000_check_reset_block(&adapter->hw)) {
1259 			EM_CORE_UNLOCK(adapter);
1260 			device_printf(adapter->dev, "Media change is"
1261 			    " blocked due to SOL/IDER session.\n");
1262 			break;
1263 		}
1264 		EM_CORE_UNLOCK(adapter);
1265 		/* falls thru */
1266 	case SIOCGIFMEDIA:
1267 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1268 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1269 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1270 		break;
1271 	case SIOCSIFCAP:
1272 	    {
1273 		int mask, reinit;
1274 
1275 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1276 		reinit = 0;
1277 		mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
1278 #ifdef DEVICE_POLLING
1279 		if (mask & IFCAP_POLLING) {
1280 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1281 				error = ether_poll_register(em_poll, ifp);
1282 				if (error)
1283 					return (error);
1284 				EM_CORE_LOCK(adapter);
1285 				em_disable_intr(adapter);
1286 				if_setcapenablebit(ifp, IFCAP_POLLING, 0);
1287 				EM_CORE_UNLOCK(adapter);
1288 			} else {
1289 				error = ether_poll_deregister(ifp);
1290 				/* Enable interrupt even in error case */
1291 				EM_CORE_LOCK(adapter);
1292 				em_enable_intr(adapter);
1293 				if_setcapenablebit(ifp, 0, IFCAP_POLLING);
1294 				EM_CORE_UNLOCK(adapter);
1295 			}
1296 		}
1297 #endif
1298 		if (mask & IFCAP_HWCSUM) {
1299 			if_togglecapenable(ifp,IFCAP_HWCSUM);
1300 			reinit = 1;
1301 		}
1302 		if (mask & IFCAP_TSO4) {
1303 			if_togglecapenable(ifp,IFCAP_TSO4);
1304 			reinit = 1;
1305 		}
1306 		if (mask & IFCAP_VLAN_HWTAGGING) {
1307 			if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING);
1308 			reinit = 1;
1309 		}
1310 		if (mask & IFCAP_VLAN_HWFILTER) {
1311 			if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1312 			reinit = 1;
1313 		}
1314 		if (mask & IFCAP_VLAN_HWTSO) {
1315 			if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1316 			reinit = 1;
1317 		}
1318 		if ((mask & IFCAP_WOL) &&
1319 		    (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
1320 			if (mask & IFCAP_WOL_MCAST)
1321 				if_togglecapenable(ifp, IFCAP_WOL_MCAST);
1322 			if (mask & IFCAP_WOL_MAGIC)
1323 				if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
1324 		}
1325 		if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1326 			em_init(adapter);
1327 		if_vlancap(ifp);
1328 		break;
1329 	    }
1330 
1331 	default:
1332 		error = ether_ioctl(ifp, command, data);
1333 		break;
1334 	}
1335 
1336 	return (error);
1337 }
1338 
1339 
1340 /*********************************************************************
1341  *  Init entry point
1342  *
1343  *  This routine is used in two ways. It is used by the stack as
1344  *  init entry point in network interface structure. It is also used
1345  *  by the driver as a hw/sw initialization routine to get to a
1346  *  consistent state.
1347  *
1348  *  return 0 on success, positive on failure
1349  **********************************************************************/
1350 
1351 static void
1352 em_init_locked(struct adapter *adapter)
1353 {
1354 	if_t ifp = adapter->ifp;
1355 	device_t	dev = adapter->dev;
1356 
1357 	INIT_DEBUGOUT("em_init: begin");
1358 
1359 	EM_CORE_LOCK_ASSERT(adapter);
1360 
1361 	em_disable_intr(adapter);
1362 	callout_stop(&adapter->timer);
1363 
1364 	/* Get the latest mac address, User can use a LAA */
1365         bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
1366               ETHER_ADDR_LEN);
1367 
1368 	/* Put the address into the Receive Address Array */
1369 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1370 
1371 	/*
1372 	 * With the 82571 adapter, RAR[0] may be overwritten
1373 	 * when the other port is reset, we make a duplicate
1374 	 * in RAR[14] for that eventuality, this assures
1375 	 * the interface continues to function.
1376 	 */
1377 	if (adapter->hw.mac.type == e1000_82571) {
1378 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1379 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1380 		    E1000_RAR_ENTRIES - 1);
1381 	}
1382 
1383 	/* Initialize the hardware */
1384 	em_reset(adapter);
1385 	em_update_link_status(adapter);
1386 
1387 	/* Setup VLAN support, basic and offload if available */
1388 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1389 
1390 	/* Set hardware offload abilities */
1391 	if_clearhwassist(ifp);
1392 	if (if_getcapenable(ifp) & IFCAP_TXCSUM)
1393 		if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
1394 	/*
1395 	** There have proven to be problems with TSO when not
1396 	** at full gigabit speed, so disable the assist automatically
1397 	** when at lower speeds.  -jfv
1398 	*/
1399 	if (if_getcapenable(ifp) & IFCAP_TSO4) {
1400 		if (adapter->link_speed == SPEED_1000)
1401 			if_sethwassistbits(ifp, CSUM_TSO, 0);
1402 	}
1403 
1404 	/* Configure for OS presence */
1405 	em_init_manageability(adapter);
1406 
1407 	/* Prepare transmit descriptors and buffers */
1408 	em_setup_transmit_structures(adapter);
1409 	em_initialize_transmit_unit(adapter);
1410 
1411 	/* Setup Multicast table */
1412 	em_set_multi(adapter);
1413 
1414 	/*
1415 	** Figure out the desired mbuf
1416 	** pool for doing jumbos
1417 	*/
1418 	if (adapter->hw.mac.max_frame_size <= 2048)
1419 		adapter->rx_mbuf_sz = MCLBYTES;
1420 	else if (adapter->hw.mac.max_frame_size <= 4096)
1421 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1422 	else
1423 		adapter->rx_mbuf_sz = MJUM9BYTES;
1424 
1425 	/* Prepare receive descriptors and buffers */
1426 	if (em_setup_receive_structures(adapter)) {
1427 		device_printf(dev, "Could not setup receive structures\n");
1428 		em_stop(adapter);
1429 		return;
1430 	}
1431 	em_initialize_receive_unit(adapter);
1432 
1433 	/* Use real VLAN Filter support? */
1434 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
1435 		if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1436 			/* Use real VLAN Filter support */
1437 			em_setup_vlan_hw_support(adapter);
1438 		else {
1439 			u32 ctrl;
1440 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1441 			ctrl |= E1000_CTRL_VME;
1442 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1443 		}
1444 	}
1445 
1446 	/* Don't lose promiscuous settings */
1447 	em_set_promisc(adapter);
1448 
1449 	/* Set the interface as ACTIVE */
1450 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
1451 
1452 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1453 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1454 
1455 	/* MSI/X configuration for 82574 */
1456 	if (adapter->hw.mac.type == e1000_82574) {
1457 		int tmp;
1458 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1459 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1460 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1461 		/* Set the IVAR - interrupt vector routing. */
1462 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1463 	}
1464 
1465 #ifdef DEVICE_POLLING
1466 	/*
1467 	 * Only enable interrupts if we are not polling, make sure
1468 	 * they are off otherwise.
1469 	 */
1470 	if (if_getcapenable(ifp) & IFCAP_POLLING)
1471 		em_disable_intr(adapter);
1472 	else
1473 #endif /* DEVICE_POLLING */
1474 		em_enable_intr(adapter);
1475 
1476 	/* AMT based hardware can now take control from firmware */
1477 	if (adapter->has_manage && adapter->has_amt)
1478 		em_get_hw_control(adapter);
1479 }
1480 
1481 static void
1482 em_init(void *arg)
1483 {
1484 	struct adapter *adapter = arg;
1485 
1486 	EM_CORE_LOCK(adapter);
1487 	em_init_locked(adapter);
1488 	EM_CORE_UNLOCK(adapter);
1489 }
1490 
1491 
1492 #ifdef DEVICE_POLLING
1493 /*********************************************************************
1494  *
1495  *  Legacy polling routine: note this only works with single queue
1496  *
1497  *********************************************************************/
1498 static int
1499 em_poll(if_t ifp, enum poll_cmd cmd, int count)
1500 {
1501 	struct adapter *adapter = if_getsoftc(ifp);
1502 	struct tx_ring	*txr = adapter->tx_rings;
1503 	struct rx_ring	*rxr = adapter->rx_rings;
1504 	u32		reg_icr;
1505 	int		rx_done;
1506 
1507 	EM_CORE_LOCK(adapter);
1508 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
1509 		EM_CORE_UNLOCK(adapter);
1510 		return (0);
1511 	}
1512 
1513 	if (cmd == POLL_AND_CHECK_STATUS) {
1514 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1515 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1516 			callout_stop(&adapter->timer);
1517 			adapter->hw.mac.get_link_status = 1;
1518 			em_update_link_status(adapter);
1519 			callout_reset(&adapter->timer, hz,
1520 			    em_local_timer, adapter);
1521 		}
1522 	}
1523 	EM_CORE_UNLOCK(adapter);
1524 
1525 	em_rxeof(rxr, count, &rx_done);
1526 
1527 	EM_TX_LOCK(txr);
1528 	em_txeof(txr);
1529 #ifdef EM_MULTIQUEUE
1530 	if (!drbr_empty(ifp, txr->br))
1531 		em_mq_start_locked(ifp, txr);
1532 #else
1533 	if (!if_sendq_empty(ifp))
1534 		em_start_locked(ifp, txr);
1535 #endif
1536 	EM_TX_UNLOCK(txr);
1537 
1538 	return (rx_done);
1539 }
1540 #endif /* DEVICE_POLLING */
1541 
1542 
1543 /*********************************************************************
1544  *
1545  *  Fast Legacy/MSI Combined Interrupt Service routine
1546  *
1547  *********************************************************************/
1548 static int
1549 em_irq_fast(void *arg)
1550 {
1551 	struct adapter	*adapter = arg;
1552 	if_t ifp;
1553 	u32		reg_icr;
1554 
1555 	ifp = adapter->ifp;
1556 
1557 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1558 
1559 	/* Hot eject?  */
1560 	if (reg_icr == 0xffffffff)
1561 		return FILTER_STRAY;
1562 
1563 	/* Definitely not our interrupt.  */
1564 	if (reg_icr == 0x0)
1565 		return FILTER_STRAY;
1566 
1567 	/*
1568 	 * Starting with the 82571 chip, bit 31 should be used to
1569 	 * determine whether the interrupt belongs to us.
1570 	 */
1571 	if (adapter->hw.mac.type >= e1000_82571 &&
1572 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1573 		return FILTER_STRAY;
1574 
1575 	em_disable_intr(adapter);
1576 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1577 
1578 	/* Link status change */
1579 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1580 		adapter->hw.mac.get_link_status = 1;
1581 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1582 	}
1583 
1584 	if (reg_icr & E1000_ICR_RXO)
1585 		adapter->rx_overruns++;
1586 	return FILTER_HANDLED;
1587 }
1588 
1589 /* Combined RX/TX handler, used by Legacy and MSI */
1590 static void
1591 em_handle_que(void *context, int pending)
1592 {
1593 	struct adapter	*adapter = context;
1594 	if_t ifp = adapter->ifp;
1595 	struct tx_ring	*txr = adapter->tx_rings;
1596 	struct rx_ring	*rxr = adapter->rx_rings;
1597 
1598 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1599 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1600 
1601 		EM_TX_LOCK(txr);
1602 		em_txeof(txr);
1603 #ifdef EM_MULTIQUEUE
1604 		if (!drbr_empty(ifp, txr->br))
1605 			em_mq_start_locked(ifp, txr);
1606 #else
1607 		if (!if_sendq_empty(ifp))
1608 			em_start_locked(ifp, txr);
1609 #endif
1610 		EM_TX_UNLOCK(txr);
1611 		if (more) {
1612 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1613 			return;
1614 		}
1615 	}
1616 
1617 	em_enable_intr(adapter);
1618 	return;
1619 }
1620 
1621 
1622 /*********************************************************************
1623  *
1624  *  MSIX Interrupt Service Routines
1625  *
1626  **********************************************************************/
1627 static void
1628 em_msix_tx(void *arg)
1629 {
1630 	struct tx_ring *txr = arg;
1631 	struct adapter *adapter = txr->adapter;
1632 	if_t ifp = adapter->ifp;
1633 
1634 	++txr->tx_irq;
1635 	EM_TX_LOCK(txr);
1636 	em_txeof(txr);
1637 #ifdef EM_MULTIQUEUE
1638 	if (!drbr_empty(ifp, txr->br))
1639 		em_mq_start_locked(ifp, txr);
1640 #else
1641 	if (!if_sendq_empty(ifp))
1642 		em_start_locked(ifp, txr);
1643 #endif
1644 
1645 	/* Reenable this interrupt */
1646 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1647 	EM_TX_UNLOCK(txr);
1648 	return;
1649 }
1650 
1651 /*********************************************************************
1652  *
1653  *  MSIX RX Interrupt Service routine
1654  *
1655  **********************************************************************/
1656 
1657 static void
1658 em_msix_rx(void *arg)
1659 {
1660 	struct rx_ring	*rxr = arg;
1661 	struct adapter	*adapter = rxr->adapter;
1662 	bool		more;
1663 
1664 	++rxr->rx_irq;
1665 	if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING))
1666 		return;
1667 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1668 	if (more)
1669 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1670 	else {
1671 		/* Reenable this interrupt */
1672 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1673 	}
1674 	return;
1675 }
1676 
1677 /*********************************************************************
1678  *
1679  *  MSIX Link Fast Interrupt Service routine
1680  *
1681  **********************************************************************/
1682 static void
1683 em_msix_link(void *arg)
1684 {
1685 	struct adapter	*adapter = arg;
1686 	u32		reg_icr;
1687 
1688 	++adapter->link_irq;
1689 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1690 
1691 	if (reg_icr & E1000_ICR_RXO)
1692 		adapter->rx_overruns++;
1693 
1694 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1695 		adapter->hw.mac.get_link_status = 1;
1696 		em_handle_link(adapter, 0);
1697 	} else
1698 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1699 		    EM_MSIX_LINK | E1000_IMS_LSC);
1700 	/*
1701  	** Because we must read the ICR for this interrupt
1702  	** it may clear other causes using autoclear, for
1703  	** this reason we simply create a soft interrupt
1704  	** for all these vectors.
1705  	*/
1706 	if (reg_icr) {
1707 		E1000_WRITE_REG(&adapter->hw,
1708 			E1000_ICS, adapter->ims);
1709 	}
1710 	return;
1711 }
1712 
1713 static void
1714 em_handle_rx(void *context, int pending)
1715 {
1716 	struct rx_ring	*rxr = context;
1717 	struct adapter	*adapter = rxr->adapter;
1718         bool            more;
1719 
1720 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1721 	if (more)
1722 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1723 	else {
1724 		/* Reenable this interrupt */
1725 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1726 	}
1727 }
1728 
1729 static void
1730 em_handle_tx(void *context, int pending)
1731 {
1732 	struct tx_ring	*txr = context;
1733 	struct adapter	*adapter = txr->adapter;
1734 	if_t ifp = adapter->ifp;
1735 
1736 	EM_TX_LOCK(txr);
1737 	em_txeof(txr);
1738 #ifdef EM_MULTIQUEUE
1739 	if (!drbr_empty(ifp, txr->br))
1740 		em_mq_start_locked(ifp, txr);
1741 #else
1742 	if (!if_sendq_empty(ifp))
1743 		em_start_locked(ifp, txr);
1744 #endif
1745 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1746 	EM_TX_UNLOCK(txr);
1747 }
1748 
1749 static void
1750 em_handle_link(void *context, int pending)
1751 {
1752 	struct adapter	*adapter = context;
1753 	struct tx_ring	*txr = adapter->tx_rings;
1754 	if_t ifp = adapter->ifp;
1755 
1756 	if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1757 		return;
1758 
1759 	EM_CORE_LOCK(adapter);
1760 	callout_stop(&adapter->timer);
1761 	em_update_link_status(adapter);
1762 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1763 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1764 	    EM_MSIX_LINK | E1000_IMS_LSC);
1765 	if (adapter->link_active) {
1766 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1767 			EM_TX_LOCK(txr);
1768 #ifdef EM_MULTIQUEUE
1769 			if (!drbr_empty(ifp, txr->br))
1770 				em_mq_start_locked(ifp, txr);
1771 #else
1772 			if (if_sendq_empty(ifp))
1773 				em_start_locked(ifp, txr);
1774 #endif
1775 			EM_TX_UNLOCK(txr);
1776 		}
1777 	}
1778 	EM_CORE_UNLOCK(adapter);
1779 }
1780 
1781 
1782 /*********************************************************************
1783  *
1784  *  Media Ioctl callback
1785  *
1786  *  This routine is called whenever the user queries the status of
1787  *  the interface using ifconfig.
1788  *
1789  **********************************************************************/
1790 static void
1791 em_media_status(if_t ifp, struct ifmediareq *ifmr)
1792 {
1793 	struct adapter *adapter = if_getsoftc(ifp);
1794 	u_char fiber_type = IFM_1000_SX;
1795 
1796 	INIT_DEBUGOUT("em_media_status: begin");
1797 
1798 	EM_CORE_LOCK(adapter);
1799 	em_update_link_status(adapter);
1800 
1801 	ifmr->ifm_status = IFM_AVALID;
1802 	ifmr->ifm_active = IFM_ETHER;
1803 
1804 	if (!adapter->link_active) {
1805 		EM_CORE_UNLOCK(adapter);
1806 		return;
1807 	}
1808 
1809 	ifmr->ifm_status |= IFM_ACTIVE;
1810 
1811 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1812 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1813 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1814 	} else {
1815 		switch (adapter->link_speed) {
1816 		case 10:
1817 			ifmr->ifm_active |= IFM_10_T;
1818 			break;
1819 		case 100:
1820 			ifmr->ifm_active |= IFM_100_TX;
1821 			break;
1822 		case 1000:
1823 			ifmr->ifm_active |= IFM_1000_T;
1824 			break;
1825 		}
1826 		if (adapter->link_duplex == FULL_DUPLEX)
1827 			ifmr->ifm_active |= IFM_FDX;
1828 		else
1829 			ifmr->ifm_active |= IFM_HDX;
1830 	}
1831 	EM_CORE_UNLOCK(adapter);
1832 }
1833 
1834 /*********************************************************************
1835  *
1836  *  Media Ioctl callback
1837  *
1838  *  This routine is called when the user changes speed/duplex using
1839  *  media/mediopt option with ifconfig.
1840  *
1841  **********************************************************************/
1842 static int
1843 em_media_change(if_t ifp)
1844 {
1845 	struct adapter *adapter = if_getsoftc(ifp);
1846 	struct ifmedia  *ifm = &adapter->media;
1847 
1848 	INIT_DEBUGOUT("em_media_change: begin");
1849 
1850 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1851 		return (EINVAL);
1852 
1853 	EM_CORE_LOCK(adapter);
1854 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1855 	case IFM_AUTO:
1856 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1857 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1858 		break;
1859 	case IFM_1000_LX:
1860 	case IFM_1000_SX:
1861 	case IFM_1000_T:
1862 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1863 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1864 		break;
1865 	case IFM_100_TX:
1866 		adapter->hw.mac.autoneg = FALSE;
1867 		adapter->hw.phy.autoneg_advertised = 0;
1868 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1869 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1870 		else
1871 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1872 		break;
1873 	case IFM_10_T:
1874 		adapter->hw.mac.autoneg = FALSE;
1875 		adapter->hw.phy.autoneg_advertised = 0;
1876 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1877 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1878 		else
1879 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1880 		break;
1881 	default:
1882 		device_printf(adapter->dev, "Unsupported media type\n");
1883 	}
1884 
1885 	em_init_locked(adapter);
1886 	EM_CORE_UNLOCK(adapter);
1887 
1888 	return (0);
1889 }
1890 
1891 /*********************************************************************
1892  *
1893  *  This routine maps the mbufs to tx descriptors.
1894  *
1895  *  return 0 on success, positive on failure
1896  **********************************************************************/
1897 
1898 static int
1899 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1900 {
1901 	struct adapter		*adapter = txr->adapter;
1902 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1903 	bus_dmamap_t		map;
1904 	struct em_txbuffer	*tx_buffer, *tx_buffer_mapped;
1905 	struct e1000_tx_desc	*ctxd = NULL;
1906 	struct mbuf		*m_head;
1907 	struct ether_header	*eh;
1908 	struct ip		*ip = NULL;
1909 	struct tcphdr		*tp = NULL;
1910 	u32			txd_upper = 0, txd_lower = 0;
1911 	int			ip_off, poff;
1912 	int			nsegs, i, j, first, last = 0;
1913 	int			error;
1914 	bool			do_tso, tso_desc, remap = TRUE;
1915 
1916 	m_head = *m_headp;
1917 	do_tso = (m_head->m_pkthdr.csum_flags & CSUM_TSO);
1918 	tso_desc = FALSE;
1919 	ip_off = poff = 0;
1920 
1921 	/*
1922 	 * Intel recommends entire IP/TCP header length reside in a single
1923 	 * buffer. If multiple descriptors are used to describe the IP and
1924 	 * TCP header, each descriptor should describe one or more
1925 	 * complete headers; descriptors referencing only parts of headers
1926 	 * are not supported. If all layer headers are not coalesced into
1927 	 * a single buffer, each buffer should not cross a 4KB boundary,
1928 	 * or be larger than the maximum read request size.
1929 	 * Controller also requires modifing IP/TCP header to make TSO work
1930 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1931 	 * IP/TCP header into a single buffer to meet the requirement of
1932 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1933 	 * which also has similar restrictions.
1934 	 */
1935 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1936 		if (do_tso || (m_head->m_next != NULL &&
1937 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1938 			if (M_WRITABLE(*m_headp) == 0) {
1939 				m_head = m_dup(*m_headp, M_NOWAIT);
1940 				m_freem(*m_headp);
1941 				if (m_head == NULL) {
1942 					*m_headp = NULL;
1943 					return (ENOBUFS);
1944 				}
1945 				*m_headp = m_head;
1946 			}
1947 		}
1948 		/*
1949 		 * XXX
1950 		 * Assume IPv4, we don't have TSO/checksum offload support
1951 		 * for IPv6 yet.
1952 		 */
1953 		ip_off = sizeof(struct ether_header);
1954 		if (m_head->m_len < ip_off) {
1955 			m_head = m_pullup(m_head, ip_off);
1956 			if (m_head == NULL) {
1957 				*m_headp = NULL;
1958 				return (ENOBUFS);
1959 			}
1960 		}
1961 		eh = mtod(m_head, struct ether_header *);
1962 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1963 			ip_off = sizeof(struct ether_vlan_header);
1964 			if (m_head->m_len < ip_off) {
1965 				m_head = m_pullup(m_head, ip_off);
1966 				if (m_head == NULL) {
1967 					*m_headp = NULL;
1968 					return (ENOBUFS);
1969 				}
1970 			}
1971 		}
1972 		if (m_head->m_len < ip_off + sizeof(struct ip)) {
1973 			m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1974 			if (m_head == NULL) {
1975 				*m_headp = NULL;
1976 				return (ENOBUFS);
1977 			}
1978 		}
1979 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1980 		poff = ip_off + (ip->ip_hl << 2);
1981 
1982 		if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
1983 			if (m_head->m_len < poff + sizeof(struct tcphdr)) {
1984 				m_head = m_pullup(m_head, poff +
1985 				    sizeof(struct tcphdr));
1986 				if (m_head == NULL) {
1987 					*m_headp = NULL;
1988 					return (ENOBUFS);
1989 				}
1990 			}
1991 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1992 			/*
1993 			 * TSO workaround:
1994 			 *   pull 4 more bytes of data into it.
1995 			 */
1996 			if (m_head->m_len < poff + (tp->th_off << 2)) {
1997 				m_head = m_pullup(m_head, poff +
1998 				                 (tp->th_off << 2) +
1999 				                 TSO_WORKAROUND);
2000 				if (m_head == NULL) {
2001 					*m_headp = NULL;
2002 					return (ENOBUFS);
2003 				}
2004 			}
2005 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2006 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2007 			if (do_tso) {
2008 				ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
2009 				                  (ip->ip_hl << 2) +
2010 				                  (tp->th_off << 2));
2011 				ip->ip_sum = 0;
2012 				/*
2013 				 * The pseudo TCP checksum does not include TCP
2014 				 * payload length so driver should recompute
2015 				 * the checksum here what hardware expect to
2016 				 * see. This is adherence of Microsoft's Large
2017 				 * Send specification.
2018 			 	*/
2019 				tp->th_sum = in_pseudo(ip->ip_src.s_addr,
2020 				    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2021 			}
2022 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
2023 			if (m_head->m_len < poff + sizeof(struct udphdr)) {
2024 				m_head = m_pullup(m_head, poff +
2025 				    sizeof(struct udphdr));
2026 				if (m_head == NULL) {
2027 					*m_headp = NULL;
2028 					return (ENOBUFS);
2029 				}
2030 			}
2031 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2032 		}
2033 		*m_headp = m_head;
2034 	}
2035 
2036 	/*
2037 	 * Map the packet for DMA
2038 	 *
2039 	 * Capture the first descriptor index,
2040 	 * this descriptor will have the index
2041 	 * of the EOP which is the only one that
2042 	 * now gets a DONE bit writeback.
2043 	 */
2044 	first = txr->next_avail_desc;
2045 	tx_buffer = &txr->tx_buffers[first];
2046 	tx_buffer_mapped = tx_buffer;
2047 	map = tx_buffer->map;
2048 
2049 retry:
2050 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2051 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2052 
2053 	/*
2054 	 * There are two types of errors we can (try) to handle:
2055 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
2056 	 *   out of segments.  Defragment the mbuf chain and try again.
2057 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
2058 	 *   at this point in time.  Defer sending and try again later.
2059 	 * All other errors, in particular EINVAL, are fatal and prevent the
2060 	 * mbuf chain from ever going through.  Drop it and report error.
2061 	 */
2062 	if (error == EFBIG && remap) {
2063 		struct mbuf *m;
2064 
2065 		m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
2066 		if (m == NULL) {
2067 			adapter->mbuf_defrag_failed++;
2068 			m_freem(*m_headp);
2069 			*m_headp = NULL;
2070 			return (ENOBUFS);
2071 		}
2072 		*m_headp = m;
2073 
2074 		/* Try it again, but only once */
2075 		remap = FALSE;
2076 		goto retry;
2077 	} else if (error != 0) {
2078 		adapter->no_tx_dma_setup++;
2079 		m_freem(*m_headp);
2080 		*m_headp = NULL;
2081 		return (error);
2082 	}
2083 
2084 	/*
2085 	 * TSO Hardware workaround, if this packet is not
2086 	 * TSO, and is only a single descriptor long, and
2087 	 * it follows a TSO burst, then we need to add a
2088 	 * sentinel descriptor to prevent premature writeback.
2089 	 */
2090 	if ((!do_tso) && (txr->tx_tso == TRUE)) {
2091 		if (nsegs == 1)
2092 			tso_desc = TRUE;
2093 		txr->tx_tso = FALSE;
2094 	}
2095 
2096         if (txr->tx_avail < (nsegs + EM_MAX_SCATTER)) {
2097                 txr->no_desc_avail++;
2098 		bus_dmamap_unload(txr->txtag, map);
2099 		return (ENOBUFS);
2100         }
2101 	m_head = *m_headp;
2102 
2103 	/* Do hardware assists */
2104 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2105 		em_tso_setup(txr, m_head, ip_off, ip, tp,
2106 		    &txd_upper, &txd_lower);
2107 		/* we need to make a final sentinel transmit desc */
2108 		tso_desc = TRUE;
2109 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2110 		em_transmit_checksum_setup(txr, m_head,
2111 		    ip_off, ip, &txd_upper, &txd_lower);
2112 
2113 	if (m_head->m_flags & M_VLANTAG) {
2114 		/* Set the vlan id. */
2115 		txd_upper |= htole16(if_getvtag(m_head)) << 16;
2116                 /* Tell hardware to add tag */
2117                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2118         }
2119 
2120 	i = txr->next_avail_desc;
2121 
2122 	/* Set up our transmit descriptors */
2123 	for (j = 0; j < nsegs; j++) {
2124 		bus_size_t seg_len;
2125 		bus_addr_t seg_addr;
2126 
2127 		tx_buffer = &txr->tx_buffers[i];
2128 		ctxd = &txr->tx_base[i];
2129 		seg_addr = segs[j].ds_addr;
2130 		seg_len  = segs[j].ds_len;
2131 		/*
2132 		** TSO Workaround:
2133 		** If this is the last descriptor, we want to
2134 		** split it so we have a small final sentinel
2135 		*/
2136 		if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
2137 			seg_len -= TSO_WORKAROUND;
2138 			ctxd->buffer_addr = htole64(seg_addr);
2139 			ctxd->lower.data = htole32(
2140 				adapter->txd_cmd | txd_lower | seg_len);
2141 			ctxd->upper.data = htole32(txd_upper);
2142 			if (++i == adapter->num_tx_desc)
2143 				i = 0;
2144 
2145 			/* Now make the sentinel */
2146 			txr->tx_avail--;
2147 			ctxd = &txr->tx_base[i];
2148 			tx_buffer = &txr->tx_buffers[i];
2149 			ctxd->buffer_addr =
2150 			    htole64(seg_addr + seg_len);
2151 			ctxd->lower.data = htole32(
2152 			adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
2153 			ctxd->upper.data =
2154 			    htole32(txd_upper);
2155 			last = i;
2156 			if (++i == adapter->num_tx_desc)
2157 				i = 0;
2158 		} else {
2159 			ctxd->buffer_addr = htole64(seg_addr);
2160 			ctxd->lower.data = htole32(
2161 			adapter->txd_cmd | txd_lower | seg_len);
2162 			ctxd->upper.data = htole32(txd_upper);
2163 			last = i;
2164 			if (++i == adapter->num_tx_desc)
2165 				i = 0;
2166 		}
2167 		tx_buffer->m_head = NULL;
2168 		tx_buffer->next_eop = -1;
2169 	}
2170 
2171 	txr->next_avail_desc = i;
2172 	txr->tx_avail -= nsegs;
2173 
2174         tx_buffer->m_head = m_head;
2175 	/*
2176 	** Here we swap the map so the last descriptor,
2177 	** which gets the completion interrupt has the
2178 	** real map, and the first descriptor gets the
2179 	** unused map from this descriptor.
2180 	*/
2181 	tx_buffer_mapped->map = tx_buffer->map;
2182 	tx_buffer->map = map;
2183         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2184 
2185         /*
2186          * Last Descriptor of Packet
2187 	 * needs End Of Packet (EOP)
2188 	 * and Report Status (RS)
2189          */
2190         ctxd->lower.data |=
2191 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2192 	/*
2193 	 * Keep track in the first buffer which
2194 	 * descriptor will be written back
2195 	 */
2196 	tx_buffer = &txr->tx_buffers[first];
2197 	tx_buffer->next_eop = last;
2198 
2199 	/*
2200 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2201 	 * that this frame is available to transmit.
2202 	 */
2203 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2204 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2205 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2206 
2207 	return (0);
2208 }
2209 
2210 static void
2211 em_set_promisc(struct adapter *adapter)
2212 {
2213 	if_t ifp = adapter->ifp;
2214 	u32		reg_rctl;
2215 
2216 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2217 
2218 	if (if_getflags(ifp) & IFF_PROMISC) {
2219 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2220 		/* Turn this on if you want to see bad packets */
2221 		if (em_debug_sbp)
2222 			reg_rctl |= E1000_RCTL_SBP;
2223 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2224 	} else if (if_getflags(ifp) & IFF_ALLMULTI) {
2225 		reg_rctl |= E1000_RCTL_MPE;
2226 		reg_rctl &= ~E1000_RCTL_UPE;
2227 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2228 	}
2229 }
2230 
2231 static void
2232 em_disable_promisc(struct adapter *adapter)
2233 {
2234 	if_t		ifp = adapter->ifp;
2235 	u32		reg_rctl;
2236 	int		mcnt = 0;
2237 
2238 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2239 	reg_rctl &=  (~E1000_RCTL_UPE);
2240 	if (if_getflags(ifp) & IFF_ALLMULTI)
2241 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2242 	else
2243 		mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
2244 	/* Don't disable if in MAX groups */
2245 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2246 		reg_rctl &=  (~E1000_RCTL_MPE);
2247 	reg_rctl &=  (~E1000_RCTL_SBP);
2248 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2249 }
2250 
2251 
2252 /*********************************************************************
2253  *  Multicast Update
2254  *
2255  *  This routine is called whenever multicast address list is updated.
2256  *
2257  **********************************************************************/
2258 
2259 static void
2260 em_set_multi(struct adapter *adapter)
2261 {
2262 	if_t ifp = adapter->ifp;
2263 	u32 reg_rctl = 0;
2264 	u8  *mta; /* Multicast array memory */
2265 	int mcnt = 0;
2266 
2267 	IOCTL_DEBUGOUT("em_set_multi: begin");
2268 
2269 	mta = adapter->mta;
2270 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2271 
2272 	if (adapter->hw.mac.type == e1000_82542 &&
2273 	    adapter->hw.revision_id == E1000_REVISION_2) {
2274 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2275 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2276 			e1000_pci_clear_mwi(&adapter->hw);
2277 		reg_rctl |= E1000_RCTL_RST;
2278 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2279 		msec_delay(5);
2280 	}
2281 
2282 	if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
2283 
2284 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2285 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2286 		reg_rctl |= E1000_RCTL_MPE;
2287 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2288 	} else
2289 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2290 
2291 	if (adapter->hw.mac.type == e1000_82542 &&
2292 	    adapter->hw.revision_id == E1000_REVISION_2) {
2293 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2294 		reg_rctl &= ~E1000_RCTL_RST;
2295 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2296 		msec_delay(5);
2297 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2298 			e1000_pci_set_mwi(&adapter->hw);
2299 	}
2300 }
2301 
2302 
2303 /*********************************************************************
2304  *  Timer routine
2305  *
2306  *  This routine checks for link status and updates statistics.
2307  *
2308  **********************************************************************/
2309 
2310 static void
2311 em_local_timer(void *arg)
2312 {
2313 	struct adapter	*adapter = arg;
2314 	if_t ifp = adapter->ifp;
2315 	struct tx_ring	*txr = adapter->tx_rings;
2316 	struct rx_ring	*rxr = adapter->rx_rings;
2317 	u32		trigger = 0;
2318 
2319 	EM_CORE_LOCK_ASSERT(adapter);
2320 
2321 	em_update_link_status(adapter);
2322 	em_update_stats_counters(adapter);
2323 
2324 	/* Reset LAA into RAR[0] on 82571 */
2325 	if ((adapter->hw.mac.type == e1000_82571) &&
2326 	    e1000_get_laa_state_82571(&adapter->hw))
2327 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2328 
2329 	/* Mask to use in the irq trigger */
2330 	if (adapter->msix_mem) {
2331 		for (int i = 0; i < adapter->num_queues; i++, rxr++)
2332 			trigger |= rxr->ims;
2333 		rxr = adapter->rx_rings;
2334 	} else
2335 		trigger = E1000_ICS_RXDMT0;
2336 
2337 	/*
2338 	** Check on the state of the TX queue(s), this
2339 	** can be done without the lock because its RO
2340 	** and the HUNG state will be static if set.
2341 	*/
2342 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2343 		if (txr->busy == EM_TX_HUNG)
2344 			goto hung;
2345 		if (txr->busy >= EM_TX_MAXTRIES)
2346 			txr->busy = EM_TX_HUNG;
2347 		/* Schedule a TX tasklet if needed */
2348 		if (txr->tx_avail <= EM_MAX_SCATTER)
2349 			taskqueue_enqueue(txr->tq, &txr->tx_task);
2350 	}
2351 
2352 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2353 #ifndef DEVICE_POLLING
2354 	/* Trigger an RX interrupt to guarantee mbuf refresh */
2355 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2356 #endif
2357 	return;
2358 hung:
2359 	/* Looks like we're hung */
2360 	device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2361 			txr->me);
2362 	em_print_debug_info(adapter);
2363 	if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
2364 	adapter->watchdog_events++;
2365 	em_init_locked(adapter);
2366 }
2367 
2368 
2369 static void
2370 em_update_link_status(struct adapter *adapter)
2371 {
2372 	struct e1000_hw *hw = &adapter->hw;
2373 	if_t ifp = adapter->ifp;
2374 	device_t dev = adapter->dev;
2375 	struct tx_ring *txr = adapter->tx_rings;
2376 	u32 link_check = 0;
2377 
2378 	/* Get the cached link value or read phy for real */
2379 	switch (hw->phy.media_type) {
2380 	case e1000_media_type_copper:
2381 		if (hw->mac.get_link_status) {
2382 			if (hw->mac.type == e1000_pch_spt)
2383 				msec_delay(50);
2384 			/* Do the work to read phy */
2385 			e1000_check_for_link(hw);
2386 			link_check = !hw->mac.get_link_status;
2387 			if (link_check) /* ESB2 fix */
2388 				e1000_cfg_on_link_up(hw);
2389 		} else
2390 			link_check = TRUE;
2391 		break;
2392 	case e1000_media_type_fiber:
2393 		e1000_check_for_link(hw);
2394 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2395                                  E1000_STATUS_LU);
2396 		break;
2397 	case e1000_media_type_internal_serdes:
2398 		e1000_check_for_link(hw);
2399 		link_check = adapter->hw.mac.serdes_has_link;
2400 		break;
2401 	default:
2402 	case e1000_media_type_unknown:
2403 		break;
2404 	}
2405 
2406 	/* Now check for a transition */
2407 	if (link_check && (adapter->link_active == 0)) {
2408 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2409 		    &adapter->link_duplex);
2410 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2411 		if ((adapter->link_speed != SPEED_1000) &&
2412 		    ((hw->mac.type == e1000_82571) ||
2413 		    (hw->mac.type == e1000_82572))) {
2414 			int tarc0;
2415 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2416 			tarc0 &= ~TARC_SPEED_MODE_BIT;
2417 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2418 		}
2419 		if (bootverbose)
2420 			device_printf(dev, "Link is up %d Mbps %s\n",
2421 			    adapter->link_speed,
2422 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2423 			    "Full Duplex" : "Half Duplex"));
2424 		adapter->link_active = 1;
2425 		adapter->smartspeed = 0;
2426 		if_setbaudrate(ifp, adapter->link_speed * 1000000);
2427 		if_link_state_change(ifp, LINK_STATE_UP);
2428 	} else if (!link_check && (adapter->link_active == 1)) {
2429 		if_setbaudrate(ifp, 0);
2430 		adapter->link_speed = 0;
2431 		adapter->link_duplex = 0;
2432 		if (bootverbose)
2433 			device_printf(dev, "Link is Down\n");
2434 		adapter->link_active = 0;
2435 		/* Link down, disable hang detection */
2436 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2437 			txr->busy = EM_TX_IDLE;
2438 		if_link_state_change(ifp, LINK_STATE_DOWN);
2439 	}
2440 }
2441 
2442 /*********************************************************************
2443  *
2444  *  This routine disables all traffic on the adapter by issuing a
2445  *  global reset on the MAC and deallocates TX/RX buffers.
2446  *
2447  *  This routine should always be called with BOTH the CORE
2448  *  and TX locks.
2449  **********************************************************************/
2450 
2451 static void
2452 em_stop(void *arg)
2453 {
2454 	struct adapter	*adapter = arg;
2455 	if_t ifp = adapter->ifp;
2456 	struct tx_ring	*txr = adapter->tx_rings;
2457 
2458 	EM_CORE_LOCK_ASSERT(adapter);
2459 
2460 	INIT_DEBUGOUT("em_stop: begin");
2461 
2462 	em_disable_intr(adapter);
2463 	callout_stop(&adapter->timer);
2464 
2465 	/* Tell the stack that the interface is no longer active */
2466 	if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2467 
2468         /* Disarm Hang Detection. */
2469 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2470 		EM_TX_LOCK(txr);
2471 		txr->busy = EM_TX_IDLE;
2472 		EM_TX_UNLOCK(txr);
2473 	}
2474 
2475 	/* I219 needs some special flushing to avoid hangs */
2476 	if (adapter->hw.mac.type == e1000_pch_spt)
2477 		em_flush_desc_rings(adapter);
2478 
2479 	e1000_reset_hw(&adapter->hw);
2480 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2481 
2482 	e1000_led_off(&adapter->hw);
2483 	e1000_cleanup_led(&adapter->hw);
2484 }
2485 
2486 
2487 /*********************************************************************
2488  *
2489  *  Determine hardware revision.
2490  *
2491  **********************************************************************/
2492 static void
2493 em_identify_hardware(struct adapter *adapter)
2494 {
2495 	device_t dev = adapter->dev;
2496 
2497 	/* Make sure our PCI config space has the necessary stuff set */
2498 	pci_enable_busmaster(dev);
2499 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2500 
2501 	/* Save off the information about this board */
2502 	adapter->hw.vendor_id = pci_get_vendor(dev);
2503 	adapter->hw.device_id = pci_get_device(dev);
2504 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2505 	adapter->hw.subsystem_vendor_id =
2506 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2507 	adapter->hw.subsystem_device_id =
2508 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2509 
2510 	/* Do Shared Code Init and Setup */
2511 	if (e1000_set_mac_type(&adapter->hw)) {
2512 		device_printf(dev, "Setup init failure\n");
2513 		return;
2514 	}
2515 }
2516 
2517 static int
2518 em_allocate_pci_resources(struct adapter *adapter)
2519 {
2520 	device_t	dev = adapter->dev;
2521 	int		rid;
2522 
2523 	rid = PCIR_BAR(0);
2524 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2525 	    &rid, RF_ACTIVE);
2526 	if (adapter->memory == NULL) {
2527 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2528 		return (ENXIO);
2529 	}
2530 	adapter->osdep.mem_bus_space_tag =
2531 	    rman_get_bustag(adapter->memory);
2532 	adapter->osdep.mem_bus_space_handle =
2533 	    rman_get_bushandle(adapter->memory);
2534 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2535 
2536 	adapter->hw.back = &adapter->osdep;
2537 
2538 	return (0);
2539 }
2540 
2541 /*********************************************************************
2542  *
2543  *  Setup the Legacy or MSI Interrupt handler
2544  *
2545  **********************************************************************/
2546 int
2547 em_allocate_legacy(struct adapter *adapter)
2548 {
2549 	device_t dev = adapter->dev;
2550 	struct tx_ring	*txr = adapter->tx_rings;
2551 	int error, rid = 0;
2552 
2553 	/* Manually turn off all interrupts */
2554 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2555 
2556 	if (adapter->msix == 1) /* using MSI */
2557 		rid = 1;
2558 	/* We allocate a single interrupt resource */
2559 	adapter->res = bus_alloc_resource_any(dev,
2560 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2561 	if (adapter->res == NULL) {
2562 		device_printf(dev, "Unable to allocate bus resource: "
2563 		    "interrupt\n");
2564 		return (ENXIO);
2565 	}
2566 
2567 	/*
2568 	 * Allocate a fast interrupt and the associated
2569 	 * deferred processing contexts.
2570 	 */
2571 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2572 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2573 	    taskqueue_thread_enqueue, &adapter->tq);
2574 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2575 	    device_get_nameunit(adapter->dev));
2576 	/* Use a TX only tasklet for local timer */
2577 	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2578 	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2579 	    taskqueue_thread_enqueue, &txr->tq);
2580 	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2581 	    device_get_nameunit(adapter->dev));
2582 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2583 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2584 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2585 		device_printf(dev, "Failed to register fast interrupt "
2586 			    "handler: %d\n", error);
2587 		taskqueue_free(adapter->tq);
2588 		adapter->tq = NULL;
2589 		return (error);
2590 	}
2591 
2592 	return (0);
2593 }
2594 
2595 /*********************************************************************
2596  *
2597  *  Setup the MSIX Interrupt handlers
2598  *   This is not really Multiqueue, rather
2599  *   its just separate interrupt vectors
2600  *   for TX, RX, and Link.
2601  *
2602  **********************************************************************/
2603 int
2604 em_allocate_msix(struct adapter *adapter)
2605 {
2606 	device_t	dev = adapter->dev;
2607 	struct		tx_ring *txr = adapter->tx_rings;
2608 	struct		rx_ring *rxr = adapter->rx_rings;
2609 	int		error, rid, vector = 0;
2610 	int		cpu_id = 0;
2611 
2612 
2613 	/* Make sure all interrupts are disabled */
2614 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2615 
2616 	/* First set up ring resources */
2617 	for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2618 
2619 		/* RX ring */
2620 		rid = vector + 1;
2621 
2622 		rxr->res = bus_alloc_resource_any(dev,
2623 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2624 		if (rxr->res == NULL) {
2625 			device_printf(dev,
2626 			    "Unable to allocate bus resource: "
2627 			    "RX MSIX Interrupt %d\n", i);
2628 			return (ENXIO);
2629 		}
2630 		if ((error = bus_setup_intr(dev, rxr->res,
2631 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2632 		    rxr, &rxr->tag)) != 0) {
2633 			device_printf(dev, "Failed to register RX handler");
2634 			return (error);
2635 		}
2636 #if __FreeBSD_version >= 800504
2637 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2638 #endif
2639 		rxr->msix = vector;
2640 
2641 		if (em_last_bind_cpu < 0)
2642 			em_last_bind_cpu = CPU_FIRST();
2643 		cpu_id = em_last_bind_cpu;
2644 		bus_bind_intr(dev, rxr->res, cpu_id);
2645 
2646 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2647 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2648 		    taskqueue_thread_enqueue, &rxr->tq);
2649 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2650 		    device_get_nameunit(adapter->dev), cpu_id);
2651 		/*
2652 		** Set the bit to enable interrupt
2653 		** in E1000_IMS -- bits 20 and 21
2654 		** are for RX0 and RX1, note this has
2655 		** NOTHING to do with the MSIX vector
2656 		*/
2657 		rxr->ims = 1 << (20 + i);
2658 		adapter->ims |= rxr->ims;
2659 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2660 
2661 		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2662 	}
2663 
2664 	for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2665 		/* TX ring */
2666 		rid = vector + 1;
2667 		txr->res = bus_alloc_resource_any(dev,
2668 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2669 		if (txr->res == NULL) {
2670 			device_printf(dev,
2671 			    "Unable to allocate bus resource: "
2672 			    "TX MSIX Interrupt %d\n", i);
2673 			return (ENXIO);
2674 		}
2675 		if ((error = bus_setup_intr(dev, txr->res,
2676 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2677 		    txr, &txr->tag)) != 0) {
2678 			device_printf(dev, "Failed to register TX handler");
2679 			return (error);
2680 		}
2681 #if __FreeBSD_version >= 800504
2682 		bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2683 #endif
2684 		txr->msix = vector;
2685 
2686                 if (em_last_bind_cpu < 0)
2687                         em_last_bind_cpu = CPU_FIRST();
2688                 cpu_id = em_last_bind_cpu;
2689                 bus_bind_intr(dev, txr->res, cpu_id);
2690 
2691 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2692 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2693 		    taskqueue_thread_enqueue, &txr->tq);
2694 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2695 		    device_get_nameunit(adapter->dev), cpu_id);
2696 		/*
2697 		** Set the bit to enable interrupt
2698 		** in E1000_IMS -- bits 22 and 23
2699 		** are for TX0 and TX1, note this has
2700 		** NOTHING to do with the MSIX vector
2701 		*/
2702 		txr->ims = 1 << (22 + i);
2703 		adapter->ims |= txr->ims;
2704 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2705 
2706 		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2707 	}
2708 
2709 	/* Link interrupt */
2710 	rid = vector + 1;
2711 	adapter->res = bus_alloc_resource_any(dev,
2712 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2713 	if (!adapter->res) {
2714 		device_printf(dev,"Unable to allocate "
2715 		    "bus resource: Link interrupt [%d]\n", rid);
2716 		return (ENXIO);
2717         }
2718 	/* Set the link handler function */
2719 	error = bus_setup_intr(dev, adapter->res,
2720 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2721 	    em_msix_link, adapter, &adapter->tag);
2722 	if (error) {
2723 		adapter->res = NULL;
2724 		device_printf(dev, "Failed to register LINK handler");
2725 		return (error);
2726 	}
2727 #if __FreeBSD_version >= 800504
2728 	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2729 #endif
2730 	adapter->linkvec = vector;
2731 	adapter->ivars |=  (8 | vector) << 16;
2732 	adapter->ivars |= 0x80000000;
2733 
2734 	return (0);
2735 }
2736 
2737 
2738 static void
2739 em_free_pci_resources(struct adapter *adapter)
2740 {
2741 	device_t	dev = adapter->dev;
2742 	struct tx_ring	*txr;
2743 	struct rx_ring	*rxr;
2744 	int		rid;
2745 
2746 
2747 	/*
2748 	** Release all the queue interrupt resources:
2749 	*/
2750 	for (int i = 0; i < adapter->num_queues; i++) {
2751 		txr = &adapter->tx_rings[i];
2752 		/* an early abort? */
2753 		if (txr == NULL)
2754 			break;
2755 		rid = txr->msix +1;
2756 		if (txr->tag != NULL) {
2757 			bus_teardown_intr(dev, txr->res, txr->tag);
2758 			txr->tag = NULL;
2759 		}
2760 		if (txr->res != NULL)
2761 			bus_release_resource(dev, SYS_RES_IRQ,
2762 			    rid, txr->res);
2763 
2764 		rxr = &adapter->rx_rings[i];
2765 		/* an early abort? */
2766 		if (rxr == NULL)
2767 			break;
2768 		rid = rxr->msix +1;
2769 		if (rxr->tag != NULL) {
2770 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2771 			rxr->tag = NULL;
2772 		}
2773 		if (rxr->res != NULL)
2774 			bus_release_resource(dev, SYS_RES_IRQ,
2775 			    rid, rxr->res);
2776 	}
2777 
2778         if (adapter->linkvec) /* we are doing MSIX */
2779                 rid = adapter->linkvec + 1;
2780         else
2781                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2782 
2783 	if (adapter->tag != NULL) {
2784 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2785 		adapter->tag = NULL;
2786 	}
2787 
2788 	if (adapter->res != NULL)
2789 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2790 
2791 
2792 	if (adapter->msix)
2793 		pci_release_msi(dev);
2794 
2795 	if (adapter->msix_mem != NULL)
2796 		bus_release_resource(dev, SYS_RES_MEMORY,
2797 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2798 
2799 	if (adapter->memory != NULL)
2800 		bus_release_resource(dev, SYS_RES_MEMORY,
2801 		    PCIR_BAR(0), adapter->memory);
2802 
2803 	if (adapter->flash != NULL)
2804 		bus_release_resource(dev, SYS_RES_MEMORY,
2805 		    EM_FLASH, adapter->flash);
2806 }
2807 
2808 /*
2809  * Setup MSI or MSI/X
2810  */
2811 static int
2812 em_setup_msix(struct adapter *adapter)
2813 {
2814 	device_t dev = adapter->dev;
2815 	int val;
2816 
2817 	/* Nearly always going to use one queue */
2818 	adapter->num_queues = 1;
2819 
2820 	/*
2821 	** Try using MSI-X for Hartwell adapters
2822 	*/
2823 	if ((adapter->hw.mac.type == e1000_82574) &&
2824 	    (em_enable_msix == TRUE)) {
2825 #ifdef EM_MULTIQUEUE
2826 		adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2827 		if (adapter->num_queues > 1)
2828 			em_enable_vectors_82574(adapter);
2829 #endif
2830 		/* Map the MSIX BAR */
2831 		int rid = PCIR_BAR(EM_MSIX_BAR);
2832 		adapter->msix_mem = bus_alloc_resource_any(dev,
2833 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2834        		if (adapter->msix_mem == NULL) {
2835 			/* May not be enabled */
2836                		device_printf(adapter->dev,
2837 			    "Unable to map MSIX table \n");
2838 			goto msi;
2839        		}
2840 		val = pci_msix_count(dev);
2841 
2842 #ifdef EM_MULTIQUEUE
2843 		/* We need 5 vectors in the multiqueue case */
2844 		if (adapter->num_queues > 1 ) {
2845 			if (val >= 5)
2846 				val = 5;
2847 			else {
2848 				adapter->num_queues = 1;
2849 				device_printf(adapter->dev,
2850 				    "Insufficient MSIX vectors for >1 queue, "
2851 				    "using single queue...\n");
2852 				goto msix_one;
2853 			}
2854 		} else {
2855 msix_one:
2856 #endif
2857 			if (val >= 3)
2858 				val = 3;
2859 			else {
2860 				device_printf(adapter->dev,
2861 			    	"Insufficient MSIX vectors, using MSI\n");
2862 				goto msi;
2863 			}
2864 #ifdef EM_MULTIQUEUE
2865 		}
2866 #endif
2867 
2868 		if ((pci_alloc_msix(dev, &val) == 0)) {
2869 			device_printf(adapter->dev,
2870 			    "Using MSIX interrupts "
2871 			    "with %d vectors\n", val);
2872 			return (val);
2873 		}
2874 
2875 		/*
2876 		** If MSIX alloc failed or provided us with
2877 		** less than needed, free and fall through to MSI
2878 		*/
2879 		pci_release_msi(dev);
2880 	}
2881 msi:
2882 	if (adapter->msix_mem != NULL) {
2883 		bus_release_resource(dev, SYS_RES_MEMORY,
2884 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2885 		adapter->msix_mem = NULL;
2886 	}
2887        	val = 1;
2888        	if (pci_alloc_msi(dev, &val) == 0) {
2889                	device_printf(adapter->dev, "Using an MSI interrupt\n");
2890 		return (val);
2891 	}
2892 	/* Should only happen due to manual configuration */
2893 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2894 	return (0);
2895 }
2896 
2897 
2898 /*
2899 ** The 3 following flush routines are used as a workaround in the
2900 ** I219 client parts and only for them.
2901 **
2902 ** em_flush_tx_ring - remove all descriptors from the tx_ring
2903 **
2904 ** We want to clear all pending descriptors from the TX ring.
2905 ** zeroing happens when the HW reads the regs. We  assign the ring itself as
2906 ** the data of the next descriptor. We don't care about the data we are about
2907 ** to reset the HW.
2908 */
2909 static void
2910 em_flush_tx_ring(struct adapter *adapter)
2911 {
2912 	struct e1000_hw		*hw = &adapter->hw;
2913 	struct tx_ring		*txr = adapter->tx_rings;
2914 	struct e1000_tx_desc	*txd;
2915 	u32			tctl, txd_lower = E1000_TXD_CMD_IFCS;
2916 	u16			size = 512;
2917 
2918 	tctl = E1000_READ_REG(hw, E1000_TCTL);
2919 	E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
2920 
2921 	txd = &txr->tx_base[txr->next_avail_desc++];
2922 	if (txr->next_avail_desc == adapter->num_tx_desc)
2923 		txr->next_avail_desc = 0;
2924 
2925 	/* Just use the ring as a dummy buffer addr */
2926 	txd->buffer_addr = txr->txdma.dma_paddr;
2927 	txd->lower.data = htole32(txd_lower | size);
2928 	txd->upper.data = 0;
2929 
2930 	/* flush descriptors to memory before notifying the HW */
2931 	wmb();
2932 
2933 	E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc);
2934 	mb();
2935 	usec_delay(250);
2936 }
2937 
2938 /*
2939 ** em_flush_rx_ring - remove all descriptors from the rx_ring
2940 **
2941 ** Mark all descriptors in the RX ring as consumed and disable the rx ring
2942 */
2943 static void
2944 em_flush_rx_ring(struct adapter *adapter)
2945 {
2946 	struct e1000_hw	*hw = &adapter->hw;
2947 	u32		rctl, rxdctl;
2948 
2949 	rctl = E1000_READ_REG(hw, E1000_RCTL);
2950 	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2951 	E1000_WRITE_FLUSH(hw);
2952 	usec_delay(150);
2953 
2954 	rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
2955 	/* zero the lower 14 bits (prefetch and host thresholds) */
2956 	rxdctl &= 0xffffc000;
2957 	/*
2958 	 * update thresholds: prefetch threshold to 31, host threshold to 1
2959 	 * and make sure the granularity is "descriptors" and not "cache lines"
2960 	 */
2961 	rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
2962 	E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl);
2963 
2964 	/* momentarily enable the RX ring for the changes to take effect */
2965 	E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
2966 	E1000_WRITE_FLUSH(hw);
2967 	usec_delay(150);
2968 	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2969 }
2970 
2971 /*
2972 ** em_flush_desc_rings - remove all descriptors from the descriptor rings
2973 **
2974 ** In i219, the descriptor rings must be emptied before resetting the HW
2975 ** or before changing the device state to D3 during runtime (runtime PM).
2976 **
2977 ** Failure to do this will cause the HW to enter a unit hang state which can
2978 ** only be released by PCI reset on the device
2979 **
2980 */
2981 static void
2982 em_flush_desc_rings(struct adapter *adapter)
2983 {
2984 	struct e1000_hw	*hw = &adapter->hw;
2985 	device_t	dev = adapter->dev;
2986 	u16		hang_state;
2987 	u32		fext_nvm11, tdlen;
2988 
2989 	/* First, disable MULR fix in FEXTNVM11 */
2990 	fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
2991 	fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
2992 	E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11);
2993 
2994 	/* do nothing if we're not in faulty state, or if the queue is empty */
2995 	tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
2996 	hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
2997 	if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
2998 		return;
2999 	em_flush_tx_ring(adapter);
3000 
3001 	/* recheck, maybe the fault is caused by the rx ring */
3002 	hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3003 	if (hang_state & FLUSH_DESC_REQUIRED)
3004 		em_flush_rx_ring(adapter);
3005 }
3006 
3007 
3008 /*********************************************************************
3009  *
3010  *  Initialize the hardware to a configuration
3011  *  as specified by the adapter structure.
3012  *
3013  **********************************************************************/
3014 static void
3015 em_reset(struct adapter *adapter)
3016 {
3017 	device_t	dev = adapter->dev;
3018 	if_t ifp = adapter->ifp;
3019 	struct e1000_hw	*hw = &adapter->hw;
3020 	u16		rx_buffer_size;
3021 	u32		pba;
3022 
3023 	INIT_DEBUGOUT("em_reset: begin");
3024 
3025 	/* Set up smart power down as default off on newer adapters. */
3026 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
3027 	    hw->mac.type == e1000_82572)) {
3028 		u16 phy_tmp = 0;
3029 
3030 		/* Speed up time to link by disabling smart power down. */
3031 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
3032 		phy_tmp &= ~IGP02E1000_PM_SPD;
3033 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
3034 	}
3035 
3036 	/*
3037 	 * Packet Buffer Allocation (PBA)
3038 	 * Writing PBA sets the receive portion of the buffer
3039 	 * the remainder is used for the transmit buffer.
3040 	 */
3041 	switch (hw->mac.type) {
3042 	/* Total Packet Buffer on these is 48K */
3043 	case e1000_82571:
3044 	case e1000_82572:
3045 	case e1000_80003es2lan:
3046 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
3047 		break;
3048 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
3049 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
3050 		break;
3051 	case e1000_82574:
3052 	case e1000_82583:
3053 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
3054 		break;
3055 	case e1000_ich8lan:
3056 		pba = E1000_PBA_8K;
3057 		break;
3058 	case e1000_ich9lan:
3059 	case e1000_ich10lan:
3060 		/* Boost Receive side for jumbo frames */
3061 		if (adapter->hw.mac.max_frame_size > 4096)
3062 			pba = E1000_PBA_14K;
3063 		else
3064 			pba = E1000_PBA_10K;
3065 		break;
3066 	case e1000_pchlan:
3067 	case e1000_pch2lan:
3068 	case e1000_pch_lpt:
3069 	case e1000_pch_spt:
3070 		pba = E1000_PBA_26K;
3071 		break;
3072 	default:
3073 		if (adapter->hw.mac.max_frame_size > 8192)
3074 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
3075 		else
3076 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
3077 	}
3078 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
3079 
3080 	/*
3081 	 * These parameters control the automatic generation (Tx) and
3082 	 * response (Rx) to Ethernet PAUSE frames.
3083 	 * - High water mark should allow for at least two frames to be
3084 	 *   received after sending an XOFF.
3085 	 * - Low water mark works best when it is very near the high water mark.
3086 	 *   This allows the receiver to restart by sending XON when it has
3087 	 *   drained a bit. Here we use an arbitrary value of 1500 which will
3088 	 *   restart after one full frame is pulled from the buffer. There
3089 	 *   could be several smaller frames in the buffer and if so they will
3090 	 *   not trigger the XON until their total number reduces the buffer
3091 	 *   by 1500.
3092 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
3093 	 */
3094 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
3095 	hw->fc.high_water = rx_buffer_size -
3096 	    roundup2(adapter->hw.mac.max_frame_size, 1024);
3097 	hw->fc.low_water = hw->fc.high_water - 1500;
3098 
3099 	if (adapter->fc) /* locally set flow control value? */
3100 		hw->fc.requested_mode = adapter->fc;
3101 	else
3102 		hw->fc.requested_mode = e1000_fc_full;
3103 
3104 	if (hw->mac.type == e1000_80003es2lan)
3105 		hw->fc.pause_time = 0xFFFF;
3106 	else
3107 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
3108 
3109 	hw->fc.send_xon = TRUE;
3110 
3111 	/* Device specific overrides/settings */
3112 	switch (hw->mac.type) {
3113 	case e1000_pchlan:
3114 		/* Workaround: no TX flow ctrl for PCH */
3115                 hw->fc.requested_mode = e1000_fc_rx_pause;
3116 		hw->fc.pause_time = 0xFFFF; /* override */
3117 		if (if_getmtu(ifp) > ETHERMTU) {
3118 			hw->fc.high_water = 0x3500;
3119 			hw->fc.low_water = 0x1500;
3120 		} else {
3121 			hw->fc.high_water = 0x5000;
3122 			hw->fc.low_water = 0x3000;
3123 		}
3124 		hw->fc.refresh_time = 0x1000;
3125 		break;
3126 	case e1000_pch2lan:
3127 	case e1000_pch_lpt:
3128 	case e1000_pch_spt:
3129 		hw->fc.high_water = 0x5C20;
3130 		hw->fc.low_water = 0x5048;
3131 		hw->fc.pause_time = 0x0650;
3132 		hw->fc.refresh_time = 0x0400;
3133 		/* Jumbos need adjusted PBA */
3134 		if (if_getmtu(ifp) > ETHERMTU)
3135 			E1000_WRITE_REG(hw, E1000_PBA, 12);
3136 		else
3137 			E1000_WRITE_REG(hw, E1000_PBA, 26);
3138 		break;
3139         case e1000_ich9lan:
3140         case e1000_ich10lan:
3141 		if (if_getmtu(ifp) > ETHERMTU) {
3142 			hw->fc.high_water = 0x2800;
3143 			hw->fc.low_water = hw->fc.high_water - 8;
3144 			break;
3145 		}
3146 		/* else fall thru */
3147 	default:
3148 		if (hw->mac.type == e1000_80003es2lan)
3149 			hw->fc.pause_time = 0xFFFF;
3150 		break;
3151 	}
3152 
3153 	/* I219 needs some special flushing to avoid hangs */
3154 	if (hw->mac.type == e1000_pch_spt)
3155 		em_flush_desc_rings(adapter);
3156 
3157 	/* Issue a global reset */
3158 	e1000_reset_hw(hw);
3159 	E1000_WRITE_REG(hw, E1000_WUC, 0);
3160 	em_disable_aspm(adapter);
3161 	/* and a re-init */
3162 	if (e1000_init_hw(hw) < 0) {
3163 		device_printf(dev, "Hardware Initialization Failed\n");
3164 		return;
3165 	}
3166 
3167 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3168 	e1000_get_phy_info(hw);
3169 	e1000_check_for_link(hw);
3170 	return;
3171 }
3172 
3173 /*********************************************************************
3174  *
3175  *  Setup networking device structure and register an interface.
3176  *
3177  **********************************************************************/
3178 static int
3179 em_setup_interface(device_t dev, struct adapter *adapter)
3180 {
3181 	if_t ifp;
3182 
3183 	INIT_DEBUGOUT("em_setup_interface: begin");
3184 
3185 	ifp = adapter->ifp = if_gethandle(IFT_ETHER);
3186 	if (ifp == 0) {
3187 		device_printf(dev, "can not allocate ifnet structure\n");
3188 		return (-1);
3189 	}
3190 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3191 	if_setdev(ifp, dev);
3192 	if_setinitfn(ifp, em_init);
3193 	if_setsoftc(ifp, adapter);
3194 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
3195 	if_setioctlfn(ifp, em_ioctl);
3196 	if_setgetcounterfn(ifp, em_get_counter);
3197 
3198 	/* TSO parameters */
3199 	ifp->if_hw_tsomax = IP_MAXPACKET;
3200 	/* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */
3201 	ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5;
3202 	ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
3203 
3204 #ifdef EM_MULTIQUEUE
3205 	/* Multiqueue stack interface */
3206 	if_settransmitfn(ifp, em_mq_start);
3207 	if_setqflushfn(ifp, em_qflush);
3208 #else
3209 	if_setstartfn(ifp, em_start);
3210 	if_setsendqlen(ifp, adapter->num_tx_desc - 1);
3211 	if_setsendqready(ifp);
3212 #endif
3213 
3214 	ether_ifattach(ifp, adapter->hw.mac.addr);
3215 
3216 	if_setcapabilities(ifp, 0);
3217 	if_setcapenable(ifp, 0);
3218 
3219 
3220 	if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM |
3221 	    IFCAP_TSO4, 0);
3222 	/*
3223 	 * Tell the upper layer(s) we
3224 	 * support full VLAN capability
3225 	 */
3226 	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
3227 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
3228 	    IFCAP_VLAN_MTU, 0);
3229 	if_setcapenable(ifp, if_getcapabilities(ifp));
3230 
3231 	/*
3232 	** Don't turn this on by default, if vlans are
3233 	** created on another pseudo device (eg. lagg)
3234 	** then vlan events are not passed thru, breaking
3235 	** operation, but with HW FILTER off it works. If
3236 	** using vlans directly on the em driver you can
3237 	** enable this and get full hardware tag filtering.
3238 	*/
3239 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
3240 
3241 #ifdef DEVICE_POLLING
3242 	if_setcapabilitiesbit(ifp, IFCAP_POLLING,0);
3243 #endif
3244 
3245 	/* Enable only WOL MAGIC by default */
3246 	if (adapter->wol) {
3247 		if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
3248 		if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
3249 	}
3250 
3251 	/*
3252 	 * Specify the media types supported by this adapter and register
3253 	 * callbacks to update media and link information
3254 	 */
3255 	ifmedia_init(&adapter->media, IFM_IMASK,
3256 	    em_media_change, em_media_status);
3257 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3258 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3259 		u_char fiber_type = IFM_1000_SX;	/* default type */
3260 
3261 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3262 			    0, NULL);
3263 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3264 	} else {
3265 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3266 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3267 			    0, NULL);
3268 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3269 			    0, NULL);
3270 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3271 			    0, NULL);
3272 		if (adapter->hw.phy.type != e1000_phy_ife) {
3273 			ifmedia_add(&adapter->media,
3274 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3275 			ifmedia_add(&adapter->media,
3276 				IFM_ETHER | IFM_1000_T, 0, NULL);
3277 		}
3278 	}
3279 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3280 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3281 	return (0);
3282 }
3283 
3284 
3285 /*
3286  * Manage DMA'able memory.
3287  */
3288 static void
3289 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3290 {
3291 	if (error)
3292 		return;
3293 	*(bus_addr_t *) arg = segs[0].ds_addr;
3294 }
3295 
3296 static int
3297 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3298         struct em_dma_alloc *dma, int mapflags)
3299 {
3300 	int error;
3301 
3302 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3303 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3304 				BUS_SPACE_MAXADDR,	/* lowaddr */
3305 				BUS_SPACE_MAXADDR,	/* highaddr */
3306 				NULL, NULL,		/* filter, filterarg */
3307 				size,			/* maxsize */
3308 				1,			/* nsegments */
3309 				size,			/* maxsegsize */
3310 				0,			/* flags */
3311 				NULL,			/* lockfunc */
3312 				NULL,			/* lockarg */
3313 				&dma->dma_tag);
3314 	if (error) {
3315 		device_printf(adapter->dev,
3316 		    "%s: bus_dma_tag_create failed: %d\n",
3317 		    __func__, error);
3318 		goto fail_0;
3319 	}
3320 
3321 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3322 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3323 	if (error) {
3324 		device_printf(adapter->dev,
3325 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3326 		    __func__, (uintmax_t)size, error);
3327 		goto fail_2;
3328 	}
3329 
3330 	dma->dma_paddr = 0;
3331 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3332 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3333 	if (error || dma->dma_paddr == 0) {
3334 		device_printf(adapter->dev,
3335 		    "%s: bus_dmamap_load failed: %d\n",
3336 		    __func__, error);
3337 		goto fail_3;
3338 	}
3339 
3340 	return (0);
3341 
3342 fail_3:
3343 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3344 fail_2:
3345 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3346 	bus_dma_tag_destroy(dma->dma_tag);
3347 fail_0:
3348 	dma->dma_tag = NULL;
3349 
3350 	return (error);
3351 }
3352 
3353 static void
3354 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3355 {
3356 	if (dma->dma_tag == NULL)
3357 		return;
3358 	if (dma->dma_paddr != 0) {
3359 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3360 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3361 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3362 		dma->dma_paddr = 0;
3363 	}
3364 	if (dma->dma_vaddr != NULL) {
3365 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3366 		dma->dma_vaddr = NULL;
3367 	}
3368 	bus_dma_tag_destroy(dma->dma_tag);
3369 	dma->dma_tag = NULL;
3370 }
3371 
3372 
3373 /*********************************************************************
3374  *
3375  *  Allocate memory for the transmit and receive rings, and then
3376  *  the descriptors associated with each, called only once at attach.
3377  *
3378  **********************************************************************/
3379 static int
3380 em_allocate_queues(struct adapter *adapter)
3381 {
3382 	device_t		dev = adapter->dev;
3383 	struct tx_ring		*txr = NULL;
3384 	struct rx_ring		*rxr = NULL;
3385 	int rsize, tsize, error = E1000_SUCCESS;
3386 	int txconf = 0, rxconf = 0;
3387 
3388 
3389 	/* Allocate the TX ring struct memory */
3390 	if (!(adapter->tx_rings =
3391 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3392 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3393 		device_printf(dev, "Unable to allocate TX ring memory\n");
3394 		error = ENOMEM;
3395 		goto fail;
3396 	}
3397 
3398 	/* Now allocate the RX */
3399 	if (!(adapter->rx_rings =
3400 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3401 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3402 		device_printf(dev, "Unable to allocate RX ring memory\n");
3403 		error = ENOMEM;
3404 		goto rx_fail;
3405 	}
3406 
3407 	tsize = roundup2(adapter->num_tx_desc *
3408 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3409 	/*
3410 	 * Now set up the TX queues, txconf is needed to handle the
3411 	 * possibility that things fail midcourse and we need to
3412 	 * undo memory gracefully
3413 	 */
3414 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3415 		/* Set up some basics */
3416 		txr = &adapter->tx_rings[i];
3417 		txr->adapter = adapter;
3418 		txr->me = i;
3419 
3420 		/* Initialize the TX lock */
3421 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3422 		    device_get_nameunit(dev), txr->me);
3423 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3424 
3425 		if (em_dma_malloc(adapter, tsize,
3426 			&txr->txdma, BUS_DMA_NOWAIT)) {
3427 			device_printf(dev,
3428 			    "Unable to allocate TX Descriptor memory\n");
3429 			error = ENOMEM;
3430 			goto err_tx_desc;
3431 		}
3432 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3433 		bzero((void *)txr->tx_base, tsize);
3434 
3435         	if (em_allocate_transmit_buffers(txr)) {
3436 			device_printf(dev,
3437 			    "Critical Failure setting up transmit buffers\n");
3438 			error = ENOMEM;
3439 			goto err_tx_desc;
3440         	}
3441 #if __FreeBSD_version >= 800000
3442 		/* Allocate a buf ring */
3443 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3444 		    M_WAITOK, &txr->tx_mtx);
3445 #endif
3446 	}
3447 
3448 	/*
3449 	 * Next the RX queues...
3450 	 */
3451 	rsize = roundup2(adapter->num_rx_desc *
3452 	    sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
3453 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3454 		rxr = &adapter->rx_rings[i];
3455 		rxr->adapter = adapter;
3456 		rxr->me = i;
3457 
3458 		/* Initialize the RX lock */
3459 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3460 		    device_get_nameunit(dev), txr->me);
3461 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3462 
3463 		if (em_dma_malloc(adapter, rsize,
3464 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3465 			device_printf(dev,
3466 			    "Unable to allocate RxDescriptor memory\n");
3467 			error = ENOMEM;
3468 			goto err_rx_desc;
3469 		}
3470 		rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr;
3471 		bzero((void *)rxr->rx_base, rsize);
3472 
3473         	/* Allocate receive buffers for the ring*/
3474 		if (em_allocate_receive_buffers(rxr)) {
3475 			device_printf(dev,
3476 			    "Critical Failure setting up receive buffers\n");
3477 			error = ENOMEM;
3478 			goto err_rx_desc;
3479 		}
3480 	}
3481 
3482 	return (0);
3483 
3484 err_rx_desc:
3485 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3486 		em_dma_free(adapter, &rxr->rxdma);
3487 err_tx_desc:
3488 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3489 		em_dma_free(adapter, &txr->txdma);
3490 	free(adapter->rx_rings, M_DEVBUF);
3491 rx_fail:
3492 #if __FreeBSD_version >= 800000
3493 	buf_ring_free(txr->br, M_DEVBUF);
3494 #endif
3495 	free(adapter->tx_rings, M_DEVBUF);
3496 fail:
3497 	return (error);
3498 }
3499 
3500 
3501 /*********************************************************************
3502  *
3503  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3504  *  the information needed to transmit a packet on the wire. This is
3505  *  called only once at attach, setup is done every reset.
3506  *
3507  **********************************************************************/
3508 static int
3509 em_allocate_transmit_buffers(struct tx_ring *txr)
3510 {
3511 	struct adapter *adapter = txr->adapter;
3512 	device_t dev = adapter->dev;
3513 	struct em_txbuffer *txbuf;
3514 	int error, i;
3515 
3516 	/*
3517 	 * Setup DMA descriptor areas.
3518 	 */
3519 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3520 			       1, 0,			/* alignment, bounds */
3521 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3522 			       BUS_SPACE_MAXADDR,	/* highaddr */
3523 			       NULL, NULL,		/* filter, filterarg */
3524 			       EM_TSO_SIZE,		/* maxsize */
3525 			       EM_MAX_SCATTER,		/* nsegments */
3526 			       PAGE_SIZE,		/* maxsegsize */
3527 			       0,			/* flags */
3528 			       NULL,			/* lockfunc */
3529 			       NULL,			/* lockfuncarg */
3530 			       &txr->txtag))) {
3531 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3532 		goto fail;
3533 	}
3534 
3535 	if (!(txr->tx_buffers =
3536 	    (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) *
3537 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3538 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3539 		error = ENOMEM;
3540 		goto fail;
3541 	}
3542 
3543         /* Create the descriptor buffer dma maps */
3544 	txbuf = txr->tx_buffers;
3545 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3546 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3547 		if (error != 0) {
3548 			device_printf(dev, "Unable to create TX DMA map\n");
3549 			goto fail;
3550 		}
3551 	}
3552 
3553 	return 0;
3554 fail:
3555 	/* We free all, it handles case where we are in the middle */
3556 	em_free_transmit_structures(adapter);
3557 	return (error);
3558 }
3559 
3560 /*********************************************************************
3561  *
3562  *  Initialize a transmit ring.
3563  *
3564  **********************************************************************/
3565 static void
3566 em_setup_transmit_ring(struct tx_ring *txr)
3567 {
3568 	struct adapter *adapter = txr->adapter;
3569 	struct em_txbuffer *txbuf;
3570 	int i;
3571 #ifdef DEV_NETMAP
3572 	struct netmap_slot *slot;
3573 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
3574 #endif /* DEV_NETMAP */
3575 
3576 	/* Clear the old descriptor contents */
3577 	EM_TX_LOCK(txr);
3578 #ifdef DEV_NETMAP
3579 	slot = netmap_reset(na, NR_TX, txr->me, 0);
3580 #endif /* DEV_NETMAP */
3581 
3582 	bzero((void *)txr->tx_base,
3583 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3584 	/* Reset indices */
3585 	txr->next_avail_desc = 0;
3586 	txr->next_to_clean = 0;
3587 
3588 	/* Free any existing tx buffers. */
3589         txbuf = txr->tx_buffers;
3590 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3591 		if (txbuf->m_head != NULL) {
3592 			bus_dmamap_sync(txr->txtag, txbuf->map,
3593 			    BUS_DMASYNC_POSTWRITE);
3594 			bus_dmamap_unload(txr->txtag, txbuf->map);
3595 			m_freem(txbuf->m_head);
3596 			txbuf->m_head = NULL;
3597 		}
3598 #ifdef DEV_NETMAP
3599 		if (slot) {
3600 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3601 			uint64_t paddr;
3602 			void *addr;
3603 
3604 			addr = PNMB(na, slot + si, &paddr);
3605 			txr->tx_base[i].buffer_addr = htole64(paddr);
3606 			/* reload the map for netmap mode */
3607 			netmap_load_map(na, txr->txtag, txbuf->map, addr);
3608 		}
3609 #endif /* DEV_NETMAP */
3610 
3611 		/* clear the watch index */
3612 		txbuf->next_eop = -1;
3613         }
3614 
3615 	/* Set number of descriptors available */
3616 	txr->tx_avail = adapter->num_tx_desc;
3617 	txr->busy = EM_TX_IDLE;
3618 
3619 	/* Clear checksum offload context. */
3620 	txr->last_hw_offload = 0;
3621 	txr->last_hw_ipcss = 0;
3622 	txr->last_hw_ipcso = 0;
3623 	txr->last_hw_tucss = 0;
3624 	txr->last_hw_tucso = 0;
3625 
3626 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3627 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3628 	EM_TX_UNLOCK(txr);
3629 }
3630 
3631 /*********************************************************************
3632  *
3633  *  Initialize all transmit rings.
3634  *
3635  **********************************************************************/
3636 static void
3637 em_setup_transmit_structures(struct adapter *adapter)
3638 {
3639 	struct tx_ring *txr = adapter->tx_rings;
3640 
3641 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3642 		em_setup_transmit_ring(txr);
3643 
3644 	return;
3645 }
3646 
3647 /*********************************************************************
3648  *
3649  *  Enable transmit unit.
3650  *
3651  **********************************************************************/
3652 static void
3653 em_initialize_transmit_unit(struct adapter *adapter)
3654 {
3655 	struct tx_ring	*txr = adapter->tx_rings;
3656 	struct e1000_hw	*hw = &adapter->hw;
3657 	u32	tctl, txdctl = 0, tarc, tipg = 0;
3658 
3659 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3660 
3661 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3662 		u64 bus_addr = txr->txdma.dma_paddr;
3663 		/* Base and Len of TX Ring */
3664 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3665 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3666 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3667 	    	    (u32)(bus_addr >> 32));
3668 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3669 	    	    (u32)bus_addr);
3670 		/* Init the HEAD/TAIL indices */
3671 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3672 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3673 
3674 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3675 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3676 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3677 
3678 		txr->busy = EM_TX_IDLE;
3679 		txdctl = 0; /* clear txdctl */
3680                 txdctl |= 0x1f; /* PTHRESH */
3681                 txdctl |= 1 << 8; /* HTHRESH */
3682                 txdctl |= 1 << 16;/* WTHRESH */
3683 		txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3684 		txdctl |= E1000_TXDCTL_GRAN;
3685                 txdctl |= 1 << 25; /* LWTHRESH */
3686 
3687                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3688 	}
3689 
3690 	/* Set the default values for the Tx Inter Packet Gap timer */
3691 	switch (adapter->hw.mac.type) {
3692 	case e1000_80003es2lan:
3693 		tipg = DEFAULT_82543_TIPG_IPGR1;
3694 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3695 		    E1000_TIPG_IPGR2_SHIFT;
3696 		break;
3697 	default:
3698 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3699 		    (adapter->hw.phy.media_type ==
3700 		    e1000_media_type_internal_serdes))
3701 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3702 		else
3703 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3704 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3705 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3706 	}
3707 
3708 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3709 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3710 
3711 	if(adapter->hw.mac.type >= e1000_82540)
3712 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3713 		    adapter->tx_abs_int_delay.value);
3714 
3715 	if ((adapter->hw.mac.type == e1000_82571) ||
3716 	    (adapter->hw.mac.type == e1000_82572)) {
3717 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3718 		tarc |= TARC_SPEED_MODE_BIT;
3719 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3720 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3721 		/* errata: program both queues to unweighted RR */
3722 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3723 		tarc |= 1;
3724 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3725 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3726 		tarc |= 1;
3727 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3728 	} else if (adapter->hw.mac.type == e1000_82574) {
3729 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3730 		tarc |= TARC_ERRATA_BIT;
3731 		if ( adapter->num_queues > 1) {
3732 			tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3733 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3734 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3735 		} else
3736 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3737 	}
3738 
3739 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3740 	if (adapter->tx_int_delay.value > 0)
3741 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3742 
3743 	/* Program the Transmit Control Register */
3744 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3745 	tctl &= ~E1000_TCTL_CT;
3746 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3747 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3748 
3749 	if (adapter->hw.mac.type >= e1000_82571)
3750 		tctl |= E1000_TCTL_MULR;
3751 
3752 	/* This write will effectively turn on the transmit unit. */
3753 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3754 
3755 	if (hw->mac.type == e1000_pch_spt) {
3756 		u32 reg;
3757 		reg = E1000_READ_REG(hw, E1000_IOSFPC);
3758 		reg |= E1000_RCTL_RDMTS_HEX;
3759 		E1000_WRITE_REG(hw, E1000_IOSFPC, reg);
3760 		reg = E1000_READ_REG(hw, E1000_TARC(0));
3761 		reg |= E1000_TARC0_CB_MULTIQ_3_REQ;
3762 		E1000_WRITE_REG(hw, E1000_TARC(0), reg);
3763 	}
3764 }
3765 
3766 
3767 /*********************************************************************
3768  *
3769  *  Free all transmit rings.
3770  *
3771  **********************************************************************/
3772 static void
3773 em_free_transmit_structures(struct adapter *adapter)
3774 {
3775 	struct tx_ring *txr = adapter->tx_rings;
3776 
3777 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3778 		EM_TX_LOCK(txr);
3779 		em_free_transmit_buffers(txr);
3780 		em_dma_free(adapter, &txr->txdma);
3781 		EM_TX_UNLOCK(txr);
3782 		EM_TX_LOCK_DESTROY(txr);
3783 	}
3784 
3785 	free(adapter->tx_rings, M_DEVBUF);
3786 }
3787 
3788 /*********************************************************************
3789  *
3790  *  Free transmit ring related data structures.
3791  *
3792  **********************************************************************/
3793 static void
3794 em_free_transmit_buffers(struct tx_ring *txr)
3795 {
3796 	struct adapter		*adapter = txr->adapter;
3797 	struct em_txbuffer	*txbuf;
3798 
3799 	INIT_DEBUGOUT("free_transmit_ring: begin");
3800 
3801 	if (txr->tx_buffers == NULL)
3802 		return;
3803 
3804 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3805 		txbuf = &txr->tx_buffers[i];
3806 		if (txbuf->m_head != NULL) {
3807 			bus_dmamap_sync(txr->txtag, txbuf->map,
3808 			    BUS_DMASYNC_POSTWRITE);
3809 			bus_dmamap_unload(txr->txtag,
3810 			    txbuf->map);
3811 			m_freem(txbuf->m_head);
3812 			txbuf->m_head = NULL;
3813 			if (txbuf->map != NULL) {
3814 				bus_dmamap_destroy(txr->txtag,
3815 				    txbuf->map);
3816 				txbuf->map = NULL;
3817 			}
3818 		} else if (txbuf->map != NULL) {
3819 			bus_dmamap_unload(txr->txtag,
3820 			    txbuf->map);
3821 			bus_dmamap_destroy(txr->txtag,
3822 			    txbuf->map);
3823 			txbuf->map = NULL;
3824 		}
3825 	}
3826 #if __FreeBSD_version >= 800000
3827 	if (txr->br != NULL)
3828 		buf_ring_free(txr->br, M_DEVBUF);
3829 #endif
3830 	if (txr->tx_buffers != NULL) {
3831 		free(txr->tx_buffers, M_DEVBUF);
3832 		txr->tx_buffers = NULL;
3833 	}
3834 	if (txr->txtag != NULL) {
3835 		bus_dma_tag_destroy(txr->txtag);
3836 		txr->txtag = NULL;
3837 	}
3838 	return;
3839 }
3840 
3841 
3842 /*********************************************************************
3843  *  The offload context is protocol specific (TCP/UDP) and thus
3844  *  only needs to be set when the protocol changes. The occasion
3845  *  of a context change can be a performance detriment, and
3846  *  might be better just disabled. The reason arises in the way
3847  *  in which the controller supports pipelined requests from the
3848  *  Tx data DMA. Up to four requests can be pipelined, and they may
3849  *  belong to the same packet or to multiple packets. However all
3850  *  requests for one packet are issued before a request is issued
3851  *  for a subsequent packet and if a request for the next packet
3852  *  requires a context change, that request will be stalled
3853  *  until the previous request completes. This means setting up
3854  *  a new context effectively disables pipelined Tx data DMA which
3855  *  in turn greatly slow down performance to send small sized
3856  *  frames.
3857  **********************************************************************/
3858 static void
3859 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3860     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3861 {
3862 	struct adapter			*adapter = txr->adapter;
3863 	struct e1000_context_desc	*TXD = NULL;
3864 	struct em_txbuffer		*tx_buffer;
3865 	int				cur, hdr_len;
3866 	u32				cmd = 0;
3867 	u16				offload = 0;
3868 	u8				ipcso, ipcss, tucso, tucss;
3869 
3870 	ipcss = ipcso = tucss = tucso = 0;
3871 	hdr_len = ip_off + (ip->ip_hl << 2);
3872 	cur = txr->next_avail_desc;
3873 
3874 	/* Setup of IP header checksum. */
3875 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3876 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3877 		offload |= CSUM_IP;
3878 		ipcss = ip_off;
3879 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3880 		/*
3881 		 * Start offset for header checksum calculation.
3882 		 * End offset for header checksum calculation.
3883 		 * Offset of place to put the checksum.
3884 		 */
3885 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3886 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3887 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3888 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3889 		cmd |= E1000_TXD_CMD_IP;
3890 	}
3891 
3892 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3893  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3894  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3895  		offload |= CSUM_TCP;
3896  		tucss = hdr_len;
3897  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3898 		/*
3899 		 * The 82574L can only remember the *last* context used
3900 		 * regardless of queue that it was use for.  We cannot reuse
3901 		 * contexts on this hardware platform and must generate a new
3902 		 * context every time.  82574L hardware spec, section 7.2.6,
3903 		 * second note.
3904 		 */
3905 		if (adapter->num_queues < 2) {
3906  			/*
3907  		 	* Setting up new checksum offload context for every
3908 			* frames takes a lot of processing time for hardware.
3909 			* This also reduces performance a lot for small sized
3910 			* frames so avoid it if driver can use previously
3911 			* configured checksum offload context.
3912  		 	*/
3913  			if (txr->last_hw_offload == offload) {
3914  				if (offload & CSUM_IP) {
3915  					if (txr->last_hw_ipcss == ipcss &&
3916  				    	txr->last_hw_ipcso == ipcso &&
3917  				    	txr->last_hw_tucss == tucss &&
3918  				    	txr->last_hw_tucso == tucso)
3919  						return;
3920  				} else {
3921  					if (txr->last_hw_tucss == tucss &&
3922  				    	txr->last_hw_tucso == tucso)
3923  						return;
3924  				}
3925   			}
3926  			txr->last_hw_offload = offload;
3927  			txr->last_hw_tucss = tucss;
3928  			txr->last_hw_tucso = tucso;
3929 		}
3930  		/*
3931  		 * Start offset for payload checksum calculation.
3932  		 * End offset for payload checksum calculation.
3933  		 * Offset of place to put the checksum.
3934  		 */
3935 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3936  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3937  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3938  		TXD->upper_setup.tcp_fields.tucso = tucso;
3939  		cmd |= E1000_TXD_CMD_TCP;
3940  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3941  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3942  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3943  		tucss = hdr_len;
3944  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3945 		/*
3946 		 * The 82574L can only remember the *last* context used
3947 		 * regardless of queue that it was use for.  We cannot reuse
3948 		 * contexts on this hardware platform and must generate a new
3949 		 * context every time.  82574L hardware spec, section 7.2.6,
3950 		 * second note.
3951 		 */
3952 		if (adapter->num_queues < 2) {
3953  			/*
3954  		 	* Setting up new checksum offload context for every
3955 			* frames takes a lot of processing time for hardware.
3956 			* This also reduces performance a lot for small sized
3957 			* frames so avoid it if driver can use previously
3958 			* configured checksum offload context.
3959  		 	*/
3960  			if (txr->last_hw_offload == offload) {
3961  				if (offload & CSUM_IP) {
3962  					if (txr->last_hw_ipcss == ipcss &&
3963  				    	txr->last_hw_ipcso == ipcso &&
3964  				    	txr->last_hw_tucss == tucss &&
3965  				    	txr->last_hw_tucso == tucso)
3966  						return;
3967  				} else {
3968  					if (txr->last_hw_tucss == tucss &&
3969  				    	txr->last_hw_tucso == tucso)
3970  						return;
3971  				}
3972  			}
3973  			txr->last_hw_offload = offload;
3974  			txr->last_hw_tucss = tucss;
3975  			txr->last_hw_tucso = tucso;
3976 		}
3977  		/*
3978  		 * Start offset for header checksum calculation.
3979  		 * End offset for header checksum calculation.
3980  		 * Offset of place to put the checksum.
3981  		 */
3982 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3983  		TXD->upper_setup.tcp_fields.tucss = tucss;
3984  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3985  		TXD->upper_setup.tcp_fields.tucso = tucso;
3986   	}
3987 
3988  	if (offload & CSUM_IP) {
3989  		txr->last_hw_ipcss = ipcss;
3990  		txr->last_hw_ipcso = ipcso;
3991   	}
3992 
3993 	TXD->tcp_seg_setup.data = htole32(0);
3994 	TXD->cmd_and_length =
3995 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3996 	tx_buffer = &txr->tx_buffers[cur];
3997 	tx_buffer->m_head = NULL;
3998 	tx_buffer->next_eop = -1;
3999 
4000 	if (++cur == adapter->num_tx_desc)
4001 		cur = 0;
4002 
4003 	txr->tx_avail--;
4004 	txr->next_avail_desc = cur;
4005 }
4006 
4007 
4008 /**********************************************************************
4009  *
4010  *  Setup work for hardware segmentation offload (TSO)
4011  *
4012  **********************************************************************/
4013 static void
4014 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
4015     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
4016 {
4017 	struct adapter			*adapter = txr->adapter;
4018 	struct e1000_context_desc	*TXD;
4019 	struct em_txbuffer		*tx_buffer;
4020 	int cur, hdr_len;
4021 
4022 	/*
4023 	 * In theory we can use the same TSO context if and only if
4024 	 * frame is the same type(IP/TCP) and the same MSS. However
4025 	 * checking whether a frame has the same IP/TCP structure is
4026 	 * hard thing so just ignore that and always restablish a
4027 	 * new TSO context.
4028 	 */
4029 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
4030 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
4031 		      E1000_TXD_DTYP_D |	/* Data descr type */
4032 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
4033 
4034 	/* IP and/or TCP header checksum calculation and insertion. */
4035 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
4036 
4037 	cur = txr->next_avail_desc;
4038 	tx_buffer = &txr->tx_buffers[cur];
4039 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
4040 
4041 	/*
4042 	 * Start offset for header checksum calculation.
4043 	 * End offset for header checksum calculation.
4044 	 * Offset of place put the checksum.
4045 	 */
4046 	TXD->lower_setup.ip_fields.ipcss = ip_off;
4047 	TXD->lower_setup.ip_fields.ipcse =
4048 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
4049 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
4050 	/*
4051 	 * Start offset for payload checksum calculation.
4052 	 * End offset for payload checksum calculation.
4053 	 * Offset of place to put the checksum.
4054 	 */
4055 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
4056 	TXD->upper_setup.tcp_fields.tucse = 0;
4057 	TXD->upper_setup.tcp_fields.tucso =
4058 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
4059 	/*
4060 	 * Payload size per packet w/o any headers.
4061 	 * Length of all headers up to payload.
4062 	 */
4063 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
4064 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
4065 
4066 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
4067 				E1000_TXD_CMD_DEXT |	/* Extended descr */
4068 				E1000_TXD_CMD_TSE |	/* TSE context */
4069 				E1000_TXD_CMD_IP |	/* Do IP csum */
4070 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
4071 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
4072 
4073 	tx_buffer->m_head = NULL;
4074 	tx_buffer->next_eop = -1;
4075 
4076 	if (++cur == adapter->num_tx_desc)
4077 		cur = 0;
4078 
4079 	txr->tx_avail--;
4080 	txr->next_avail_desc = cur;
4081 	txr->tx_tso = TRUE;
4082 }
4083 
4084 
4085 /**********************************************************************
4086  *
4087  *  Examine each tx_buffer in the used queue. If the hardware is done
4088  *  processing the packet then free associated resources. The
4089  *  tx_buffer is put back on the free queue.
4090  *
4091  **********************************************************************/
4092 static void
4093 em_txeof(struct tx_ring *txr)
4094 {
4095 	struct adapter	*adapter = txr->adapter;
4096         int first, last, done, processed;
4097         struct em_txbuffer *tx_buffer;
4098         struct e1000_tx_desc   *tx_desc, *eop_desc;
4099 	if_t ifp = adapter->ifp;
4100 
4101 	EM_TX_LOCK_ASSERT(txr);
4102 #ifdef DEV_NETMAP
4103 	if (netmap_tx_irq(ifp, txr->me))
4104 		return;
4105 #endif /* DEV_NETMAP */
4106 
4107 	/* No work, make sure hang detection is disabled */
4108         if (txr->tx_avail == adapter->num_tx_desc) {
4109 		txr->busy = EM_TX_IDLE;
4110                 return;
4111 	}
4112 
4113 	processed = 0;
4114         first = txr->next_to_clean;
4115         tx_desc = &txr->tx_base[first];
4116         tx_buffer = &txr->tx_buffers[first];
4117 	last = tx_buffer->next_eop;
4118         eop_desc = &txr->tx_base[last];
4119 
4120 	/*
4121 	 * What this does is get the index of the
4122 	 * first descriptor AFTER the EOP of the
4123 	 * first packet, that way we can do the
4124 	 * simple comparison on the inner while loop.
4125 	 */
4126 	if (++last == adapter->num_tx_desc)
4127  		last = 0;
4128 	done = last;
4129 
4130         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4131             BUS_DMASYNC_POSTREAD);
4132 
4133         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
4134 		/* We clean the range of the packet */
4135 		while (first != done) {
4136                 	tx_desc->upper.data = 0;
4137                 	tx_desc->lower.data = 0;
4138                 	tx_desc->buffer_addr = 0;
4139                 	++txr->tx_avail;
4140 			++processed;
4141 
4142 			if (tx_buffer->m_head) {
4143 				bus_dmamap_sync(txr->txtag,
4144 				    tx_buffer->map,
4145 				    BUS_DMASYNC_POSTWRITE);
4146 				bus_dmamap_unload(txr->txtag,
4147 				    tx_buffer->map);
4148                         	m_freem(tx_buffer->m_head);
4149                         	tx_buffer->m_head = NULL;
4150                 	}
4151 			tx_buffer->next_eop = -1;
4152 
4153 	                if (++first == adapter->num_tx_desc)
4154 				first = 0;
4155 
4156 	                tx_buffer = &txr->tx_buffers[first];
4157 			tx_desc = &txr->tx_base[first];
4158 		}
4159 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
4160 		/* See if we can continue to the next packet */
4161 		last = tx_buffer->next_eop;
4162 		if (last != -1) {
4163         		eop_desc = &txr->tx_base[last];
4164 			/* Get new done point */
4165 			if (++last == adapter->num_tx_desc) last = 0;
4166 			done = last;
4167 		} else
4168 			break;
4169         }
4170         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4171             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4172 
4173         txr->next_to_clean = first;
4174 
4175 	/*
4176 	** Hang detection: we know there's work outstanding
4177 	** or the entry return would have been taken, so no
4178 	** descriptor processed here indicates a potential hang.
4179 	** The local timer will examine this and do a reset if needed.
4180 	*/
4181 	if (processed == 0) {
4182 		if (txr->busy != EM_TX_HUNG)
4183 			++txr->busy;
4184 	} else /* At least one descriptor was cleaned */
4185 		txr->busy = EM_TX_BUSY; /* note this clears HUNG */
4186 
4187         /*
4188          * If we have a minimum free, clear IFF_DRV_OACTIVE
4189          * to tell the stack that it is OK to send packets.
4190 	 * Notice that all writes of OACTIVE happen under the
4191 	 * TX lock which, with a single queue, guarantees
4192 	 * sanity.
4193          */
4194         if (txr->tx_avail >= EM_MAX_SCATTER) {
4195 		if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
4196 	}
4197 
4198 	/* Disable hang detection if all clean */
4199 	if (txr->tx_avail == adapter->num_tx_desc)
4200 		txr->busy = EM_TX_IDLE;
4201 }
4202 
4203 /*********************************************************************
4204  *
4205  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
4206  *
4207  **********************************************************************/
4208 static void
4209 em_refresh_mbufs(struct rx_ring *rxr, int limit)
4210 {
4211 	struct adapter		*adapter = rxr->adapter;
4212 	struct mbuf		*m;
4213 	bus_dma_segment_t	segs;
4214 	struct em_rxbuffer	*rxbuf;
4215 	int			i, j, error, nsegs;
4216 	bool			cleaned = FALSE;
4217 
4218 	i = j = rxr->next_to_refresh;
4219 	/*
4220 	** Get one descriptor beyond
4221 	** our work mark to control
4222 	** the loop.
4223 	*/
4224 	if (++j == adapter->num_rx_desc)
4225 		j = 0;
4226 
4227 	while (j != limit) {
4228 		rxbuf = &rxr->rx_buffers[i];
4229 		if (rxbuf->m_head == NULL) {
4230 			m = m_getjcl(M_NOWAIT, MT_DATA,
4231 			    M_PKTHDR, adapter->rx_mbuf_sz);
4232 			/*
4233 			** If we have a temporary resource shortage
4234 			** that causes a failure, just abort refresh
4235 			** for now, we will return to this point when
4236 			** reinvoked from em_rxeof.
4237 			*/
4238 			if (m == NULL)
4239 				goto update;
4240 		} else
4241 			m = rxbuf->m_head;
4242 
4243 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4244 		m->m_flags |= M_PKTHDR;
4245 		m->m_data = m->m_ext.ext_buf;
4246 
4247 		/* Use bus_dma machinery to setup the memory mapping  */
4248 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4249 		    m, &segs, &nsegs, BUS_DMA_NOWAIT);
4250 		if (error != 0) {
4251 			printf("Refresh mbufs: hdr dmamap load"
4252 			    " failure - %d\n", error);
4253 			m_free(m);
4254 			rxbuf->m_head = NULL;
4255 			goto update;
4256 		}
4257 		rxbuf->m_head = m;
4258 		rxbuf->paddr = segs.ds_addr;
4259 		bus_dmamap_sync(rxr->rxtag,
4260 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4261 		em_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4262 		cleaned = TRUE;
4263 
4264 		i = j; /* Next is precalulated for us */
4265 		rxr->next_to_refresh = i;
4266 		/* Calculate next controlling index */
4267 		if (++j == adapter->num_rx_desc)
4268 			j = 0;
4269 	}
4270 update:
4271 	/*
4272 	** Update the tail pointer only if,
4273 	** and as far as we have refreshed.
4274 	*/
4275 	if (cleaned)
4276 		E1000_WRITE_REG(&adapter->hw,
4277 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4278 
4279 	return;
4280 }
4281 
4282 
4283 /*********************************************************************
4284  *
4285  *  Allocate memory for rx_buffer structures. Since we use one
4286  *  rx_buffer per received packet, the maximum number of rx_buffer's
4287  *  that we'll need is equal to the number of receive descriptors
4288  *  that we've allocated.
4289  *
4290  **********************************************************************/
4291 static int
4292 em_allocate_receive_buffers(struct rx_ring *rxr)
4293 {
4294 	struct adapter		*adapter = rxr->adapter;
4295 	device_t		dev = adapter->dev;
4296 	struct em_rxbuffer	*rxbuf;
4297 	int			error;
4298 
4299 	rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) *
4300 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4301 	if (rxr->rx_buffers == NULL) {
4302 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4303 		return (ENOMEM);
4304 	}
4305 
4306 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4307 				1, 0,			/* alignment, bounds */
4308 				BUS_SPACE_MAXADDR,	/* lowaddr */
4309 				BUS_SPACE_MAXADDR,	/* highaddr */
4310 				NULL, NULL,		/* filter, filterarg */
4311 				MJUM9BYTES,		/* maxsize */
4312 				1,			/* nsegments */
4313 				MJUM9BYTES,		/* maxsegsize */
4314 				0,			/* flags */
4315 				NULL,			/* lockfunc */
4316 				NULL,			/* lockarg */
4317 				&rxr->rxtag);
4318 	if (error) {
4319 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4320 		    __func__, error);
4321 		goto fail;
4322 	}
4323 
4324 	rxbuf = rxr->rx_buffers;
4325 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4326 		rxbuf = &rxr->rx_buffers[i];
4327 		error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4328 		if (error) {
4329 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4330 			    __func__, error);
4331 			goto fail;
4332 		}
4333 	}
4334 
4335 	return (0);
4336 
4337 fail:
4338 	em_free_receive_structures(adapter);
4339 	return (error);
4340 }
4341 
4342 
4343 /*********************************************************************
4344  *
4345  *  Initialize a receive ring and its buffers.
4346  *
4347  **********************************************************************/
4348 static int
4349 em_setup_receive_ring(struct rx_ring *rxr)
4350 {
4351 	struct	adapter 	*adapter = rxr->adapter;
4352 	struct em_rxbuffer	*rxbuf;
4353 	bus_dma_segment_t	seg[1];
4354 	int			rsize, nsegs, error = 0;
4355 #ifdef DEV_NETMAP
4356 	struct netmap_slot *slot;
4357 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
4358 #endif
4359 
4360 
4361 	/* Clear the ring contents */
4362 	EM_RX_LOCK(rxr);
4363 	rsize = roundup2(adapter->num_rx_desc *
4364 	    sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
4365 	bzero((void *)rxr->rx_base, rsize);
4366 #ifdef DEV_NETMAP
4367 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4368 #endif
4369 
4370 	/*
4371 	** Free current RX buffer structs and their mbufs
4372 	*/
4373 	for (int i = 0; i < adapter->num_rx_desc; i++) {
4374 		rxbuf = &rxr->rx_buffers[i];
4375 		if (rxbuf->m_head != NULL) {
4376 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4377 			    BUS_DMASYNC_POSTREAD);
4378 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4379 			m_freem(rxbuf->m_head);
4380 			rxbuf->m_head = NULL; /* mark as freed */
4381 		}
4382 	}
4383 
4384 	/* Now replenish the mbufs */
4385         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4386 		rxbuf = &rxr->rx_buffers[j];
4387 #ifdef DEV_NETMAP
4388 		if (slot) {
4389 			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4390 			uint64_t paddr;
4391 			void *addr;
4392 
4393 			addr = PNMB(na, slot + si, &paddr);
4394 			netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4395 			rxbuf->paddr = paddr;
4396 			em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4397 			continue;
4398 		}
4399 #endif /* DEV_NETMAP */
4400 		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4401 		    M_PKTHDR, adapter->rx_mbuf_sz);
4402 		if (rxbuf->m_head == NULL) {
4403 			error = ENOBUFS;
4404 			goto fail;
4405 		}
4406 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4407 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4408 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4409 
4410 		/* Get the memory mapping */
4411 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4412 		    rxbuf->map, rxbuf->m_head, seg,
4413 		    &nsegs, BUS_DMA_NOWAIT);
4414 		if (error != 0) {
4415 			m_freem(rxbuf->m_head);
4416 			rxbuf->m_head = NULL;
4417 			goto fail;
4418 		}
4419 		bus_dmamap_sync(rxr->rxtag,
4420 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4421 
4422 		rxbuf->paddr = seg[0].ds_addr;
4423 		em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4424 	}
4425 	rxr->next_to_check = 0;
4426 	rxr->next_to_refresh = 0;
4427 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4428 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4429 
4430 fail:
4431 	EM_RX_UNLOCK(rxr);
4432 	return (error);
4433 }
4434 
4435 /*********************************************************************
4436  *
4437  *  Initialize all receive rings.
4438  *
4439  **********************************************************************/
4440 static int
4441 em_setup_receive_structures(struct adapter *adapter)
4442 {
4443 	struct rx_ring *rxr = adapter->rx_rings;
4444 	int q;
4445 
4446 	for (q = 0; q < adapter->num_queues; q++, rxr++)
4447 		if (em_setup_receive_ring(rxr))
4448 			goto fail;
4449 
4450 	return (0);
4451 fail:
4452 	/*
4453 	 * Free RX buffers allocated so far, we will only handle
4454 	 * the rings that completed, the failing case will have
4455 	 * cleaned up for itself. 'q' failed, so its the terminus.
4456 	 */
4457 	for (int i = 0; i < q; ++i) {
4458 		rxr = &adapter->rx_rings[i];
4459 		for (int n = 0; n < adapter->num_rx_desc; n++) {
4460 			struct em_rxbuffer *rxbuf;
4461 			rxbuf = &rxr->rx_buffers[n];
4462 			if (rxbuf->m_head != NULL) {
4463 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4464 			  	  BUS_DMASYNC_POSTREAD);
4465 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4466 				m_freem(rxbuf->m_head);
4467 				rxbuf->m_head = NULL;
4468 			}
4469 		}
4470 		rxr->next_to_check = 0;
4471 		rxr->next_to_refresh = 0;
4472 	}
4473 
4474 	return (ENOBUFS);
4475 }
4476 
4477 /*********************************************************************
4478  *
4479  *  Free all receive rings.
4480  *
4481  **********************************************************************/
4482 static void
4483 em_free_receive_structures(struct adapter *adapter)
4484 {
4485 	struct rx_ring *rxr = adapter->rx_rings;
4486 
4487 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4488 		em_free_receive_buffers(rxr);
4489 		/* Free the ring memory as well */
4490 		em_dma_free(adapter, &rxr->rxdma);
4491 		EM_RX_LOCK_DESTROY(rxr);
4492 	}
4493 
4494 	free(adapter->rx_rings, M_DEVBUF);
4495 }
4496 
4497 
4498 /*********************************************************************
4499  *
4500  *  Free receive ring data structures
4501  *
4502  **********************************************************************/
4503 static void
4504 em_free_receive_buffers(struct rx_ring *rxr)
4505 {
4506 	struct adapter		*adapter = rxr->adapter;
4507 	struct em_rxbuffer	*rxbuf = NULL;
4508 
4509 	INIT_DEBUGOUT("free_receive_buffers: begin");
4510 
4511 	if (rxr->rx_buffers != NULL) {
4512 		for (int i = 0; i < adapter->num_rx_desc; i++) {
4513 			rxbuf = &rxr->rx_buffers[i];
4514 			if (rxbuf->map != NULL) {
4515 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4516 				    BUS_DMASYNC_POSTREAD);
4517 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4518 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4519 			}
4520 			if (rxbuf->m_head != NULL) {
4521 				m_freem(rxbuf->m_head);
4522 				rxbuf->m_head = NULL;
4523 			}
4524 		}
4525 		free(rxr->rx_buffers, M_DEVBUF);
4526 		rxr->rx_buffers = NULL;
4527 		rxr->next_to_check = 0;
4528 		rxr->next_to_refresh = 0;
4529 	}
4530 
4531 	if (rxr->rxtag != NULL) {
4532 		bus_dma_tag_destroy(rxr->rxtag);
4533 		rxr->rxtag = NULL;
4534 	}
4535 
4536 	return;
4537 }
4538 
4539 
4540 /*********************************************************************
4541  *
4542  *  Enable receive unit.
4543  *
4544  **********************************************************************/
4545 
4546 static void
4547 em_initialize_receive_unit(struct adapter *adapter)
4548 {
4549 	struct rx_ring *rxr = adapter->rx_rings;
4550 	if_t ifp = adapter->ifp;
4551 	struct e1000_hw	*hw = &adapter->hw;
4552 	u32	rctl, rxcsum, rfctl;
4553 
4554 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4555 
4556 	/*
4557 	 * Make sure receives are disabled while setting
4558 	 * up the descriptor ring
4559 	 */
4560 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4561 	/* Do not disable if ever enabled on this hardware */
4562 	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4563 		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4564 
4565 	/* Setup the Receive Control Register */
4566 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4567 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4568 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4569 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4570 
4571 	/* Do not store bad packets */
4572 	rctl &= ~E1000_RCTL_SBP;
4573 
4574 	/* Enable Long Packet receive */
4575 	if (if_getmtu(ifp) > ETHERMTU)
4576 		rctl |= E1000_RCTL_LPE;
4577 	else
4578 		rctl &= ~E1000_RCTL_LPE;
4579 
4580         /* Strip the CRC */
4581         if (!em_disable_crc_stripping)
4582 		rctl |= E1000_RCTL_SECRC;
4583 
4584 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4585 	    adapter->rx_abs_int_delay.value);
4586 
4587 	E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4588 	    adapter->rx_int_delay.value);
4589 	/*
4590 	 * Set the interrupt throttling rate. Value is calculated
4591 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4592 	 */
4593 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4594 
4595 	/* Use extended rx descriptor formats */
4596 	rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4597 	rfctl |= E1000_RFCTL_EXTEN;
4598 	/*
4599 	** When using MSIX interrupts we need to throttle
4600 	** using the EITR register (82574 only)
4601 	*/
4602 	if (hw->mac.type == e1000_82574) {
4603 		for (int i = 0; i < 4; i++)
4604 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4605 			    DEFAULT_ITR);
4606 		/* Disable accelerated acknowledge */
4607 		rfctl |= E1000_RFCTL_ACK_DIS;
4608 	}
4609 	E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4610 
4611 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4612 	if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
4613 #ifdef EM_MULTIQUEUE
4614 		rxcsum |= E1000_RXCSUM_TUOFL |
4615 			  E1000_RXCSUM_IPOFL |
4616 			  E1000_RXCSUM_PCSD;
4617 #else
4618 		rxcsum |= E1000_RXCSUM_TUOFL;
4619 #endif
4620 	} else
4621 		rxcsum &= ~E1000_RXCSUM_TUOFL;
4622 
4623 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4624 
4625 #ifdef EM_MULTIQUEUE
4626 #define RSSKEYLEN 10
4627 	if (adapter->num_queues > 1) {
4628 		uint8_t  rss_key[4 * RSSKEYLEN];
4629 		uint32_t reta = 0;
4630 		int i;
4631 
4632 		/*
4633 		* Configure RSS key
4634 		*/
4635 		arc4rand(rss_key, sizeof(rss_key), 0);
4636 		for (i = 0; i < RSSKEYLEN; ++i) {
4637 			uint32_t rssrk = 0;
4638 
4639 			rssrk = EM_RSSRK_VAL(rss_key, i);
4640 			E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk);
4641 		}
4642 
4643 		/*
4644 		* Configure RSS redirect table in following fashion:
4645 		* (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4646 		*/
4647 		for (i = 0; i < sizeof(reta); ++i) {
4648 			uint32_t q;
4649 
4650 			q = (i % adapter->num_queues) << 7;
4651 			reta |= q << (8 * i);
4652 		}
4653 
4654 		for (i = 0; i < 32; ++i) {
4655 			E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4656 		}
4657 
4658 		E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q |
4659 				E1000_MRQC_RSS_FIELD_IPV4_TCP |
4660 				E1000_MRQC_RSS_FIELD_IPV4 |
4661 				E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4662 				E1000_MRQC_RSS_FIELD_IPV6_EX |
4663 				E1000_MRQC_RSS_FIELD_IPV6);
4664 	}
4665 #endif
4666 	/*
4667 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4668 	** long latencies are observed, like Lenovo X60. This
4669 	** change eliminates the problem, but since having positive
4670 	** values in RDTR is a known source of problems on other
4671 	** platforms another solution is being sought.
4672 	*/
4673 	if (hw->mac.type == e1000_82573)
4674 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4675 
4676 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4677 		/* Setup the Base and Length of the Rx Descriptor Ring */
4678 		u64 bus_addr = rxr->rxdma.dma_paddr;
4679 		u32 rdt = adapter->num_rx_desc - 1; /* default */
4680 
4681 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4682 		    adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended));
4683 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4684 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4685 		/* Setup the Head and Tail Descriptor Pointers */
4686 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4687 #ifdef DEV_NETMAP
4688 		/*
4689 		 * an init() while a netmap client is active must
4690 		 * preserve the rx buffers passed to userspace.
4691 		 */
4692 		if (if_getcapenable(ifp) & IFCAP_NETMAP) {
4693 			struct netmap_adapter *na = netmap_getna(adapter->ifp);
4694 			rdt -= nm_kr_rxspace(&na->rx_rings[i]);
4695 		}
4696 #endif /* DEV_NETMAP */
4697 		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4698 	}
4699 
4700 	/*
4701 	 * Set PTHRESH for improved jumbo performance
4702 	 * According to 10.2.5.11 of Intel 82574 Datasheet,
4703 	 * RXDCTL(1) is written whenever RXDCTL(0) is written.
4704 	 * Only write to RXDCTL(1) if there is a need for different
4705 	 * settings.
4706 	 */
4707 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4708 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4709 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4710 	    (if_getmtu(ifp) > ETHERMTU)) {
4711 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4712 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4713 	} else if (adapter->hw.mac.type == e1000_82574) {
4714 		for (int i = 0; i < adapter->num_queues; i++) {
4715 			u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4716 
4717 			rxdctl |= 0x20; /* PTHRESH */
4718 			rxdctl |= 4 << 8; /* HTHRESH */
4719 			rxdctl |= 4 << 16;/* WTHRESH */
4720 			rxdctl |= 1 << 24; /* Switch to granularity */
4721 			E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4722 		}
4723 	}
4724 
4725 	if (adapter->hw.mac.type >= e1000_pch2lan) {
4726 		if (if_getmtu(ifp) > ETHERMTU)
4727 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4728 		else
4729 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4730 	}
4731 
4732         /* Make sure VLAN Filters are off */
4733         rctl &= ~E1000_RCTL_VFE;
4734 
4735 	if (adapter->rx_mbuf_sz == MCLBYTES)
4736 		rctl |= E1000_RCTL_SZ_2048;
4737 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4738 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4739 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4740 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4741 
4742 	/* ensure we clear use DTYPE of 00 here */
4743 	rctl &= ~0x00000C00;
4744 	/* Write out the settings */
4745 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4746 
4747 	return;
4748 }
4749 
4750 
4751 /*********************************************************************
4752  *
4753  *  This routine executes in interrupt context. It replenishes
4754  *  the mbufs in the descriptor and sends data which has been
4755  *  dma'ed into host memory to upper layer.
4756  *
4757  *  We loop at most count times if count is > 0, or until done if
4758  *  count < 0.
4759  *
4760  *  For polling we also now return the number of cleaned packets
4761  *********************************************************************/
4762 static bool
4763 em_rxeof(struct rx_ring *rxr, int count, int *done)
4764 {
4765 	struct adapter		*adapter = rxr->adapter;
4766 	if_t ifp = adapter->ifp;
4767 	struct mbuf		*mp, *sendmp;
4768 	u32			status = 0;
4769 	u16 			len;
4770 	int			i, processed, rxdone = 0;
4771 	bool			eop;
4772 	union e1000_rx_desc_extended	*cur;
4773 
4774 	EM_RX_LOCK(rxr);
4775 
4776 	/* Sync the ring */
4777 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4778 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4779 
4780 
4781 #ifdef DEV_NETMAP
4782 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4783 		EM_RX_UNLOCK(rxr);
4784 		return (FALSE);
4785 	}
4786 #endif /* DEV_NETMAP */
4787 
4788 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4789 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
4790 			break;
4791 
4792 		cur = &rxr->rx_base[i];
4793 		status = le32toh(cur->wb.upper.status_error);
4794 		mp = sendmp = NULL;
4795 
4796 		if ((status & E1000_RXD_STAT_DD) == 0)
4797 			break;
4798 
4799 		len = le16toh(cur->wb.upper.length);
4800 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4801 
4802 		if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
4803 		    (rxr->discard == TRUE)) {
4804 			adapter->dropped_pkts++;
4805 			++rxr->rx_discarded;
4806 			if (!eop) /* Catch subsequent segs */
4807 				rxr->discard = TRUE;
4808 			else
4809 				rxr->discard = FALSE;
4810 			em_rx_discard(rxr, i);
4811 			goto next_desc;
4812 		}
4813 		bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4814 
4815 		/* Assign correct length to the current fragment */
4816 		mp = rxr->rx_buffers[i].m_head;
4817 		mp->m_len = len;
4818 
4819 		/* Trigger for refresh */
4820 		rxr->rx_buffers[i].m_head = NULL;
4821 
4822 		/* First segment? */
4823 		if (rxr->fmp == NULL) {
4824 			mp->m_pkthdr.len = len;
4825 			rxr->fmp = rxr->lmp = mp;
4826 		} else {
4827 			/* Chain mbuf's together */
4828 			mp->m_flags &= ~M_PKTHDR;
4829 			rxr->lmp->m_next = mp;
4830 			rxr->lmp = mp;
4831 			rxr->fmp->m_pkthdr.len += len;
4832 		}
4833 
4834 		if (eop) {
4835 			--count;
4836 			sendmp = rxr->fmp;
4837 			if_setrcvif(sendmp, ifp);
4838 			if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
4839 			em_receive_checksum(status, sendmp);
4840 #ifndef __NO_STRICT_ALIGNMENT
4841 			if (adapter->hw.mac.max_frame_size >
4842 			    (MCLBYTES - ETHER_ALIGN) &&
4843 			    em_fixup_rx(rxr) != 0)
4844 				goto skip;
4845 #endif
4846 			if (status & E1000_RXD_STAT_VP) {
4847 				if_setvtag(sendmp,
4848 				    le16toh(cur->wb.upper.vlan));
4849 				sendmp->m_flags |= M_VLANTAG;
4850 			}
4851 #ifndef __NO_STRICT_ALIGNMENT
4852 skip:
4853 #endif
4854 			rxr->fmp = rxr->lmp = NULL;
4855 		}
4856 next_desc:
4857 		/* Sync the ring */
4858 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4859 	    		BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4860 
4861 		/* Zero out the receive descriptors status. */
4862 		cur->wb.upper.status_error &= htole32(~0xFF);
4863 		++rxdone;	/* cumulative for POLL */
4864 		++processed;
4865 
4866 		/* Advance our pointers to the next descriptor. */
4867 		if (++i == adapter->num_rx_desc)
4868 			i = 0;
4869 
4870 		/* Send to the stack */
4871 		if (sendmp != NULL) {
4872 			rxr->next_to_check = i;
4873 			EM_RX_UNLOCK(rxr);
4874 			if_input(ifp, sendmp);
4875 			EM_RX_LOCK(rxr);
4876 			i = rxr->next_to_check;
4877 		}
4878 
4879 		/* Only refresh mbufs every 8 descriptors */
4880 		if (processed == 8) {
4881 			em_refresh_mbufs(rxr, i);
4882 			processed = 0;
4883 		}
4884 	}
4885 
4886 	/* Catch any remaining refresh work */
4887 	if (e1000_rx_unrefreshed(rxr))
4888 		em_refresh_mbufs(rxr, i);
4889 
4890 	rxr->next_to_check = i;
4891 	if (done != NULL)
4892 		*done = rxdone;
4893 	EM_RX_UNLOCK(rxr);
4894 
4895 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4896 }
4897 
4898 static __inline void
4899 em_rx_discard(struct rx_ring *rxr, int i)
4900 {
4901 	struct em_rxbuffer	*rbuf;
4902 
4903 	rbuf = &rxr->rx_buffers[i];
4904 	bus_dmamap_unload(rxr->rxtag, rbuf->map);
4905 
4906 	/* Free any previous pieces */
4907 	if (rxr->fmp != NULL) {
4908 		rxr->fmp->m_flags |= M_PKTHDR;
4909 		m_freem(rxr->fmp);
4910 		rxr->fmp = NULL;
4911 		rxr->lmp = NULL;
4912 	}
4913 	/*
4914 	** Free buffer and allow em_refresh_mbufs()
4915 	** to clean up and recharge buffer.
4916 	*/
4917 	if (rbuf->m_head) {
4918 		m_free(rbuf->m_head);
4919 		rbuf->m_head = NULL;
4920 	}
4921 	return;
4922 }
4923 
4924 #ifndef __NO_STRICT_ALIGNMENT
4925 /*
4926  * When jumbo frames are enabled we should realign entire payload on
4927  * architecures with strict alignment. This is serious design mistake of 8254x
4928  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4929  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4930  * payload. On architecures without strict alignment restrictions 8254x still
4931  * performs unaligned memory access which would reduce the performance too.
4932  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4933  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4934  * existing mbuf chain.
4935  *
4936  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4937  * not used at all on architectures with strict alignment.
4938  */
4939 static int
4940 em_fixup_rx(struct rx_ring *rxr)
4941 {
4942 	struct adapter *adapter = rxr->adapter;
4943 	struct mbuf *m, *n;
4944 	int error;
4945 
4946 	error = 0;
4947 	m = rxr->fmp;
4948 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4949 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4950 		m->m_data += ETHER_HDR_LEN;
4951 	} else {
4952 		MGETHDR(n, M_NOWAIT, MT_DATA);
4953 		if (n != NULL) {
4954 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4955 			m->m_data += ETHER_HDR_LEN;
4956 			m->m_len -= ETHER_HDR_LEN;
4957 			n->m_len = ETHER_HDR_LEN;
4958 			M_MOVE_PKTHDR(n, m);
4959 			n->m_next = m;
4960 			rxr->fmp = n;
4961 		} else {
4962 			adapter->dropped_pkts++;
4963 			m_freem(rxr->fmp);
4964 			rxr->fmp = NULL;
4965 			error = ENOMEM;
4966 		}
4967 	}
4968 
4969 	return (error);
4970 }
4971 #endif
4972 
4973 static void
4974 em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf)
4975 {
4976 	rxd->read.buffer_addr = htole64(rxbuf->paddr);
4977 	/* DD bits must be cleared */
4978 	rxd->wb.upper.status_error= 0;
4979 }
4980 
4981 /*********************************************************************
4982  *
4983  *  Verify that the hardware indicated that the checksum is valid.
4984  *  Inform the stack about the status of checksum so that stack
4985  *  doesn't spend time verifying the checksum.
4986  *
4987  *********************************************************************/
4988 static void
4989 em_receive_checksum(uint32_t status, struct mbuf *mp)
4990 {
4991 	mp->m_pkthdr.csum_flags = 0;
4992 
4993 	/* Ignore Checksum bit is set */
4994 	if (status & E1000_RXD_STAT_IXSM)
4995 		return;
4996 
4997 	/* If the IP checksum exists and there is no IP Checksum error */
4998 	if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
4999 		E1000_RXD_STAT_IPCS) {
5000 		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
5001 	}
5002 
5003 	/* TCP or UDP checksum */
5004 	if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
5005 	    E1000_RXD_STAT_TCPCS) {
5006 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5007 		mp->m_pkthdr.csum_data = htons(0xffff);
5008 	}
5009 	if (status & E1000_RXD_STAT_UDPCS) {
5010 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5011 		mp->m_pkthdr.csum_data = htons(0xffff);
5012 	}
5013 }
5014 
5015 /*
5016  * This routine is run via an vlan
5017  * config EVENT
5018  */
5019 static void
5020 em_register_vlan(void *arg, if_t ifp, u16 vtag)
5021 {
5022 	struct adapter	*adapter = if_getsoftc(ifp);
5023 	u32		index, bit;
5024 
5025 	if ((void*)adapter !=  arg)   /* Not our event */
5026 		return;
5027 
5028 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
5029                 return;
5030 
5031 	EM_CORE_LOCK(adapter);
5032 	index = (vtag >> 5) & 0x7F;
5033 	bit = vtag & 0x1F;
5034 	adapter->shadow_vfta[index] |= (1 << bit);
5035 	++adapter->num_vlans;
5036 	/* Re-init to load the changes */
5037 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
5038 		em_init_locked(adapter);
5039 	EM_CORE_UNLOCK(adapter);
5040 }
5041 
5042 /*
5043  * This routine is run via an vlan
5044  * unconfig EVENT
5045  */
5046 static void
5047 em_unregister_vlan(void *arg, if_t ifp, u16 vtag)
5048 {
5049 	struct adapter	*adapter = if_getsoftc(ifp);
5050 	u32		index, bit;
5051 
5052 	if (adapter != arg)
5053 		return;
5054 
5055 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5056                 return;
5057 
5058 	EM_CORE_LOCK(adapter);
5059 	index = (vtag >> 5) & 0x7F;
5060 	bit = vtag & 0x1F;
5061 	adapter->shadow_vfta[index] &= ~(1 << bit);
5062 	--adapter->num_vlans;
5063 	/* Re-init to load the changes */
5064 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
5065 		em_init_locked(adapter);
5066 	EM_CORE_UNLOCK(adapter);
5067 }
5068 
5069 static void
5070 em_setup_vlan_hw_support(struct adapter *adapter)
5071 {
5072 	struct e1000_hw *hw = &adapter->hw;
5073 	u32             reg;
5074 
5075 	/*
5076 	** We get here thru init_locked, meaning
5077 	** a soft reset, this has already cleared
5078 	** the VFTA and other state, so if there
5079 	** have been no vlan's registered do nothing.
5080 	*/
5081 	if (adapter->num_vlans == 0)
5082                 return;
5083 
5084 	/*
5085 	** A soft reset zero's out the VFTA, so
5086 	** we need to repopulate it now.
5087 	*/
5088 	for (int i = 0; i < EM_VFTA_SIZE; i++)
5089                 if (adapter->shadow_vfta[i] != 0)
5090 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
5091                             i, adapter->shadow_vfta[i]);
5092 
5093 	reg = E1000_READ_REG(hw, E1000_CTRL);
5094 	reg |= E1000_CTRL_VME;
5095 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
5096 
5097 	/* Enable the Filter Table */
5098 	reg = E1000_READ_REG(hw, E1000_RCTL);
5099 	reg &= ~E1000_RCTL_CFIEN;
5100 	reg |= E1000_RCTL_VFE;
5101 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
5102 }
5103 
5104 static void
5105 em_enable_intr(struct adapter *adapter)
5106 {
5107 	struct e1000_hw *hw = &adapter->hw;
5108 	u32 ims_mask = IMS_ENABLE_MASK;
5109 
5110 	if (hw->mac.type == e1000_82574) {
5111 		E1000_WRITE_REG(hw, EM_EIAC, adapter->ims);
5112 		ims_mask |= adapter->ims;
5113 	}
5114 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
5115 }
5116 
5117 static void
5118 em_disable_intr(struct adapter *adapter)
5119 {
5120 	struct e1000_hw *hw = &adapter->hw;
5121 
5122 	if (hw->mac.type == e1000_82574)
5123 		E1000_WRITE_REG(hw, EM_EIAC, 0);
5124 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
5125 }
5126 
5127 /*
5128  * Bit of a misnomer, what this really means is
5129  * to enable OS management of the system... aka
5130  * to disable special hardware management features
5131  */
5132 static void
5133 em_init_manageability(struct adapter *adapter)
5134 {
5135 	/* A shared code workaround */
5136 #define E1000_82542_MANC2H E1000_MANC2H
5137 	if (adapter->has_manage) {
5138 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5139 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5140 
5141 		/* disable hardware interception of ARP */
5142 		manc &= ~(E1000_MANC_ARP_EN);
5143 
5144                 /* enable receiving management packets to the host */
5145 		manc |= E1000_MANC_EN_MNG2HOST;
5146 #define E1000_MNG2HOST_PORT_623 (1 << 5)
5147 #define E1000_MNG2HOST_PORT_664 (1 << 6)
5148 		manc2h |= E1000_MNG2HOST_PORT_623;
5149 		manc2h |= E1000_MNG2HOST_PORT_664;
5150 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5151 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5152 	}
5153 }
5154 
5155 /*
5156  * Give control back to hardware management
5157  * controller if there is one.
5158  */
5159 static void
5160 em_release_manageability(struct adapter *adapter)
5161 {
5162 	if (adapter->has_manage) {
5163 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5164 
5165 		/* re-enable hardware interception of ARP */
5166 		manc |= E1000_MANC_ARP_EN;
5167 		manc &= ~E1000_MANC_EN_MNG2HOST;
5168 
5169 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5170 	}
5171 }
5172 
5173 /*
5174  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
5175  * For ASF and Pass Through versions of f/w this means
5176  * that the driver is loaded. For AMT version type f/w
5177  * this means that the network i/f is open.
5178  */
5179 static void
5180 em_get_hw_control(struct adapter *adapter)
5181 {
5182 	u32 ctrl_ext, swsm;
5183 
5184 	if (adapter->hw.mac.type == e1000_82573) {
5185 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5186 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5187 		    swsm | E1000_SWSM_DRV_LOAD);
5188 		return;
5189 	}
5190 	/* else */
5191 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5192 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5193 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5194 	return;
5195 }
5196 
5197 /*
5198  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
5199  * For ASF and Pass Through versions of f/w this means that
5200  * the driver is no longer loaded. For AMT versions of the
5201  * f/w this means that the network i/f is closed.
5202  */
5203 static void
5204 em_release_hw_control(struct adapter *adapter)
5205 {
5206 	u32 ctrl_ext, swsm;
5207 
5208 	if (!adapter->has_manage)
5209 		return;
5210 
5211 	if (adapter->hw.mac.type == e1000_82573) {
5212 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5213 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5214 		    swsm & ~E1000_SWSM_DRV_LOAD);
5215 		return;
5216 	}
5217 	/* else */
5218 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5219 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5220 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5221 	return;
5222 }
5223 
5224 static int
5225 em_is_valid_ether_addr(u8 *addr)
5226 {
5227 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5228 
5229 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5230 		return (FALSE);
5231 	}
5232 
5233 	return (TRUE);
5234 }
5235 
5236 /*
5237 ** Parse the interface capabilities with regard
5238 ** to both system management and wake-on-lan for
5239 ** later use.
5240 */
5241 static void
5242 em_get_wakeup(device_t dev)
5243 {
5244 	struct adapter	*adapter = device_get_softc(dev);
5245 	u16		eeprom_data = 0, device_id, apme_mask;
5246 
5247 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5248 	apme_mask = EM_EEPROM_APME;
5249 
5250 	switch (adapter->hw.mac.type) {
5251 	case e1000_82573:
5252 	case e1000_82583:
5253 		adapter->has_amt = TRUE;
5254 		/* Falls thru */
5255 	case e1000_82571:
5256 	case e1000_82572:
5257 	case e1000_80003es2lan:
5258 		if (adapter->hw.bus.func == 1) {
5259 			e1000_read_nvm(&adapter->hw,
5260 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5261 			break;
5262 		} else
5263 			e1000_read_nvm(&adapter->hw,
5264 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5265 		break;
5266 	case e1000_ich8lan:
5267 	case e1000_ich9lan:
5268 	case e1000_ich10lan:
5269 	case e1000_pchlan:
5270 	case e1000_pch2lan:
5271 		apme_mask = E1000_WUC_APME;
5272 		adapter->has_amt = TRUE;
5273 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5274 		break;
5275 	default:
5276 		e1000_read_nvm(&adapter->hw,
5277 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5278 		break;
5279 	}
5280 	if (eeprom_data & apme_mask)
5281 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5282 	/*
5283          * We have the eeprom settings, now apply the special cases
5284          * where the eeprom may be wrong or the board won't support
5285          * wake on lan on a particular port
5286 	 */
5287 	device_id = pci_get_device(dev);
5288         switch (device_id) {
5289 	case E1000_DEV_ID_82571EB_FIBER:
5290 		/* Wake events only supported on port A for dual fiber
5291 		 * regardless of eeprom setting */
5292 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5293 		    E1000_STATUS_FUNC_1)
5294 			adapter->wol = 0;
5295 		break;
5296 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
5297 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
5298 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5299                 /* if quad port adapter, disable WoL on all but port A */
5300 		if (global_quad_port_a != 0)
5301 			adapter->wol = 0;
5302 		/* Reset for multiple quad port adapters */
5303 		if (++global_quad_port_a == 4)
5304 			global_quad_port_a = 0;
5305                 break;
5306 	}
5307 	return;
5308 }
5309 
5310 
5311 /*
5312  * Enable PCI Wake On Lan capability
5313  */
5314 static void
5315 em_enable_wakeup(device_t dev)
5316 {
5317 	struct adapter	*adapter = device_get_softc(dev);
5318 	if_t ifp = adapter->ifp;
5319 	u32		pmc, ctrl, ctrl_ext, rctl;
5320 	u16     	status;
5321 
5322 	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
5323 		return;
5324 
5325 	/* Advertise the wakeup capability */
5326 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5327 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5328 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5329 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5330 
5331 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
5332 	    (adapter->hw.mac.type == e1000_pchlan) ||
5333 	    (adapter->hw.mac.type == e1000_ich9lan) ||
5334 	    (adapter->hw.mac.type == e1000_ich10lan))
5335 		e1000_suspend_workarounds_ich8lan(&adapter->hw);
5336 
5337 	/* Keep the laser running on Fiber adapters */
5338 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5339 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5340 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5341 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5342 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5343 	}
5344 
5345 	/*
5346 	** Determine type of Wakeup: note that wol
5347 	** is set with all bits on by default.
5348 	*/
5349 	if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
5350 		adapter->wol &= ~E1000_WUFC_MAG;
5351 
5352 	if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
5353 		adapter->wol &= ~E1000_WUFC_MC;
5354 	else {
5355 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5356 		rctl |= E1000_RCTL_MPE;
5357 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5358 	}
5359 
5360 	if ((adapter->hw.mac.type == e1000_pchlan) ||
5361 	    (adapter->hw.mac.type == e1000_pch2lan)) {
5362 		if (em_enable_phy_wakeup(adapter))
5363 			return;
5364 	} else {
5365 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5366 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5367 	}
5368 
5369 	if (adapter->hw.phy.type == e1000_phy_igp_3)
5370 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5371 
5372         /* Request PME */
5373         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5374 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5375 	if (if_getcapenable(ifp) & IFCAP_WOL)
5376 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5377         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5378 
5379 	return;
5380 }
5381 
5382 /*
5383 ** WOL in the newer chipset interfaces (pchlan)
5384 ** require thing to be copied into the phy
5385 */
5386 static int
5387 em_enable_phy_wakeup(struct adapter *adapter)
5388 {
5389 	struct e1000_hw *hw = &adapter->hw;
5390 	u32 mreg, ret = 0;
5391 	u16 preg;
5392 
5393 	/* copy MAC RARs to PHY RARs */
5394 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5395 
5396 	/* copy MAC MTA to PHY MTA */
5397 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5398 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5399 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5400 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5401 		    (u16)((mreg >> 16) & 0xFFFF));
5402 	}
5403 
5404 	/* configure PHY Rx Control register */
5405 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5406 	mreg = E1000_READ_REG(hw, E1000_RCTL);
5407 	if (mreg & E1000_RCTL_UPE)
5408 		preg |= BM_RCTL_UPE;
5409 	if (mreg & E1000_RCTL_MPE)
5410 		preg |= BM_RCTL_MPE;
5411 	preg &= ~(BM_RCTL_MO_MASK);
5412 	if (mreg & E1000_RCTL_MO_3)
5413 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5414 				<< BM_RCTL_MO_SHIFT);
5415 	if (mreg & E1000_RCTL_BAM)
5416 		preg |= BM_RCTL_BAM;
5417 	if (mreg & E1000_RCTL_PMCF)
5418 		preg |= BM_RCTL_PMCF;
5419 	mreg = E1000_READ_REG(hw, E1000_CTRL);
5420 	if (mreg & E1000_CTRL_RFCE)
5421 		preg |= BM_RCTL_RFCE;
5422 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5423 
5424 	/* enable PHY wakeup in MAC register */
5425 	E1000_WRITE_REG(hw, E1000_WUC,
5426 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5427 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5428 
5429 	/* configure and enable PHY wakeup in PHY registers */
5430 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5431 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5432 
5433 	/* activate PHY wakeup */
5434 	ret = hw->phy.ops.acquire(hw);
5435 	if (ret) {
5436 		printf("Could not acquire PHY\n");
5437 		return ret;
5438 	}
5439 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5440 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5441 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5442 	if (ret) {
5443 		printf("Could not read PHY page 769\n");
5444 		goto out;
5445 	}
5446 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5447 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5448 	if (ret)
5449 		printf("Could not set PHY Host Wakeup bit\n");
5450 out:
5451 	hw->phy.ops.release(hw);
5452 
5453 	return ret;
5454 }
5455 
5456 static void
5457 em_led_func(void *arg, int onoff)
5458 {
5459 	struct adapter	*adapter = arg;
5460 
5461 	EM_CORE_LOCK(adapter);
5462 	if (onoff) {
5463 		e1000_setup_led(&adapter->hw);
5464 		e1000_led_on(&adapter->hw);
5465 	} else {
5466 		e1000_led_off(&adapter->hw);
5467 		e1000_cleanup_led(&adapter->hw);
5468 	}
5469 	EM_CORE_UNLOCK(adapter);
5470 }
5471 
5472 /*
5473 ** Disable the L0S and L1 LINK states
5474 */
5475 static void
5476 em_disable_aspm(struct adapter *adapter)
5477 {
5478 	int		base, reg;
5479 	u16		link_cap,link_ctrl;
5480 	device_t	dev = adapter->dev;
5481 
5482 	switch (adapter->hw.mac.type) {
5483 		case e1000_82573:
5484 		case e1000_82574:
5485 		case e1000_82583:
5486 			break;
5487 		default:
5488 			return;
5489 	}
5490 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5491 		return;
5492 	reg = base + PCIER_LINK_CAP;
5493 	link_cap = pci_read_config(dev, reg, 2);
5494 	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5495 		return;
5496 	reg = base + PCIER_LINK_CTL;
5497 	link_ctrl = pci_read_config(dev, reg, 2);
5498 	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5499 	pci_write_config(dev, reg, link_ctrl, 2);
5500 	return;
5501 }
5502 
5503 /**********************************************************************
5504  *
5505  *  Update the board statistics counters.
5506  *
5507  **********************************************************************/
5508 static void
5509 em_update_stats_counters(struct adapter *adapter)
5510 {
5511 
5512 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5513 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5514 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5515 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5516 	}
5517 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5518 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5519 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5520 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5521 
5522 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5523 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5524 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5525 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5526 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5527 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5528 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5529 	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5530 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5531 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5532 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5533 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5534 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5535 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5536 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5537 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5538 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5539 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5540 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5541 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5542 
5543 	/* For the 64-bit byte counters the low dword must be read first. */
5544 	/* Both registers clear on the read of the high dword */
5545 
5546 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5547 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5548 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5549 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5550 
5551 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5552 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5553 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5554 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5555 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5556 
5557 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5558 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5559 
5560 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5561 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5562 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5563 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5564 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5565 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5566 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5567 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5568 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5569 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5570 
5571 	/* Interrupt Counts */
5572 
5573 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5574 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5575 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5576 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5577 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5578 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5579 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5580 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5581 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5582 
5583 	if (adapter->hw.mac.type >= e1000_82543) {
5584 		adapter->stats.algnerrc +=
5585 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5586 		adapter->stats.rxerrc +=
5587 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5588 		adapter->stats.tncrs +=
5589 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5590 		adapter->stats.cexterr +=
5591 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5592 		adapter->stats.tsctc +=
5593 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5594 		adapter->stats.tsctfc +=
5595 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5596 	}
5597 }
5598 
5599 static uint64_t
5600 em_get_counter(if_t ifp, ift_counter cnt)
5601 {
5602 	struct adapter *adapter;
5603 
5604 	adapter = if_getsoftc(ifp);
5605 
5606 	switch (cnt) {
5607 	case IFCOUNTER_COLLISIONS:
5608 		return (adapter->stats.colc);
5609 	case IFCOUNTER_IERRORS:
5610 		return (adapter->dropped_pkts + adapter->stats.rxerrc +
5611 		    adapter->stats.crcerrs + adapter->stats.algnerrc +
5612 		    adapter->stats.ruc + adapter->stats.roc +
5613 		    adapter->stats.mpc + adapter->stats.cexterr);
5614 	case IFCOUNTER_OERRORS:
5615 		return (adapter->stats.ecol + adapter->stats.latecol +
5616 		    adapter->watchdog_events);
5617 	default:
5618 		return (if_get_counter_default(ifp, cnt));
5619 	}
5620 }
5621 
5622 /* Export a single 32-bit register via a read-only sysctl. */
5623 static int
5624 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5625 {
5626 	struct adapter *adapter;
5627 	u_int val;
5628 
5629 	adapter = oidp->oid_arg1;
5630 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5631 	return (sysctl_handle_int(oidp, &val, 0, req));
5632 }
5633 
5634 /*
5635  * Add sysctl variables, one per statistic, to the system.
5636  */
5637 static void
5638 em_add_hw_stats(struct adapter *adapter)
5639 {
5640 	device_t dev = adapter->dev;
5641 
5642 	struct tx_ring *txr = adapter->tx_rings;
5643 	struct rx_ring *rxr = adapter->rx_rings;
5644 
5645 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5646 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5647 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5648 	struct e1000_hw_stats *stats = &adapter->stats;
5649 
5650 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5651 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5652 
5653 #define QUEUE_NAME_LEN 32
5654 	char namebuf[QUEUE_NAME_LEN];
5655 
5656 	/* Driver Statistics */
5657 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5658 			CTLFLAG_RD, &adapter->dropped_pkts,
5659 			"Driver dropped packets");
5660 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5661 			CTLFLAG_RD, &adapter->link_irq,
5662 			"Link MSIX IRQ Handled");
5663 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5664 			 CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5665 			 "Defragmenting mbuf chain failed");
5666 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5667 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5668 			"Driver tx dma failure in xmit");
5669 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5670 			CTLFLAG_RD, &adapter->rx_overruns,
5671 			"RX overruns");
5672 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5673 			CTLFLAG_RD, &adapter->watchdog_events,
5674 			"Watchdog timeouts");
5675 
5676 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5677 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5678 			em_sysctl_reg_handler, "IU",
5679 			"Device Control Register");
5680 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5681 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5682 			em_sysctl_reg_handler, "IU",
5683 			"Receiver Control Register");
5684 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5685 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5686 			"Flow Control High Watermark");
5687 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5688 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5689 			"Flow Control Low Watermark");
5690 
5691 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5692 		snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5693 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5694 					    CTLFLAG_RD, NULL, "TX Queue Name");
5695 		queue_list = SYSCTL_CHILDREN(queue_node);
5696 
5697 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5698 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5699 				E1000_TDH(txr->me),
5700 				em_sysctl_reg_handler, "IU",
5701  				"Transmit Descriptor Head");
5702 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5703 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5704 				E1000_TDT(txr->me),
5705 				em_sysctl_reg_handler, "IU",
5706  				"Transmit Descriptor Tail");
5707 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5708 				CTLFLAG_RD, &txr->tx_irq,
5709 				"Queue MSI-X Transmit Interrupts");
5710 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5711 				CTLFLAG_RD, &txr->no_desc_avail,
5712 				"Queue No Descriptor Available");
5713 
5714 		snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5715 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5716 					    CTLFLAG_RD, NULL, "RX Queue Name");
5717 		queue_list = SYSCTL_CHILDREN(queue_node);
5718 
5719 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5720 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5721 				E1000_RDH(rxr->me),
5722 				em_sysctl_reg_handler, "IU",
5723 				"Receive Descriptor Head");
5724 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5725 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5726 				E1000_RDT(rxr->me),
5727 				em_sysctl_reg_handler, "IU",
5728 				"Receive Descriptor Tail");
5729 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5730 				CTLFLAG_RD, &rxr->rx_irq,
5731 				"Queue MSI-X Receive Interrupts");
5732 	}
5733 
5734 	/* MAC stats get their own sub node */
5735 
5736 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5737 				    CTLFLAG_RD, NULL, "Statistics");
5738 	stat_list = SYSCTL_CHILDREN(stat_node);
5739 
5740 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5741 			CTLFLAG_RD, &stats->ecol,
5742 			"Excessive collisions");
5743 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5744 			CTLFLAG_RD, &stats->scc,
5745 			"Single collisions");
5746 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5747 			CTLFLAG_RD, &stats->mcc,
5748 			"Multiple collisions");
5749 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5750 			CTLFLAG_RD, &stats->latecol,
5751 			"Late collisions");
5752 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5753 			CTLFLAG_RD, &stats->colc,
5754 			"Collision Count");
5755 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5756 			CTLFLAG_RD, &adapter->stats.symerrs,
5757 			"Symbol Errors");
5758 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5759 			CTLFLAG_RD, &adapter->stats.sec,
5760 			"Sequence Errors");
5761 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5762 			CTLFLAG_RD, &adapter->stats.dc,
5763 			"Defer Count");
5764 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5765 			CTLFLAG_RD, &adapter->stats.mpc,
5766 			"Missed Packets");
5767 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5768 			CTLFLAG_RD, &adapter->stats.rnbc,
5769 			"Receive No Buffers");
5770 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5771 			CTLFLAG_RD, &adapter->stats.ruc,
5772 			"Receive Undersize");
5773 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5774 			CTLFLAG_RD, &adapter->stats.rfc,
5775 			"Fragmented Packets Received ");
5776 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5777 			CTLFLAG_RD, &adapter->stats.roc,
5778 			"Oversized Packets Received");
5779 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5780 			CTLFLAG_RD, &adapter->stats.rjc,
5781 			"Recevied Jabber");
5782 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5783 			CTLFLAG_RD, &adapter->stats.rxerrc,
5784 			"Receive Errors");
5785 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5786 			CTLFLAG_RD, &adapter->stats.crcerrs,
5787 			"CRC errors");
5788 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5789 			CTLFLAG_RD, &adapter->stats.algnerrc,
5790 			"Alignment Errors");
5791 	/* On 82575 these are collision counts */
5792 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5793 			CTLFLAG_RD, &adapter->stats.cexterr,
5794 			"Collision/Carrier extension errors");
5795 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5796 			CTLFLAG_RD, &adapter->stats.xonrxc,
5797 			"XON Received");
5798 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5799 			CTLFLAG_RD, &adapter->stats.xontxc,
5800 			"XON Transmitted");
5801 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5802 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5803 			"XOFF Received");
5804 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5805 			CTLFLAG_RD, &adapter->stats.xofftxc,
5806 			"XOFF Transmitted");
5807 
5808 	/* Packet Reception Stats */
5809 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5810 			CTLFLAG_RD, &adapter->stats.tpr,
5811 			"Total Packets Received ");
5812 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5813 			CTLFLAG_RD, &adapter->stats.gprc,
5814 			"Good Packets Received");
5815 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5816 			CTLFLAG_RD, &adapter->stats.bprc,
5817 			"Broadcast Packets Received");
5818 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5819 			CTLFLAG_RD, &adapter->stats.mprc,
5820 			"Multicast Packets Received");
5821 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5822 			CTLFLAG_RD, &adapter->stats.prc64,
5823 			"64 byte frames received ");
5824 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5825 			CTLFLAG_RD, &adapter->stats.prc127,
5826 			"65-127 byte frames received");
5827 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5828 			CTLFLAG_RD, &adapter->stats.prc255,
5829 			"128-255 byte frames received");
5830 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5831 			CTLFLAG_RD, &adapter->stats.prc511,
5832 			"256-511 byte frames received");
5833 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5834 			CTLFLAG_RD, &adapter->stats.prc1023,
5835 			"512-1023 byte frames received");
5836 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5837 			CTLFLAG_RD, &adapter->stats.prc1522,
5838 			"1023-1522 byte frames received");
5839  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5840  			CTLFLAG_RD, &adapter->stats.gorc,
5841  			"Good Octets Received");
5842 
5843 	/* Packet Transmission Stats */
5844  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5845  			CTLFLAG_RD, &adapter->stats.gotc,
5846  			"Good Octets Transmitted");
5847 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5848 			CTLFLAG_RD, &adapter->stats.tpt,
5849 			"Total Packets Transmitted");
5850 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5851 			CTLFLAG_RD, &adapter->stats.gptc,
5852 			"Good Packets Transmitted");
5853 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5854 			CTLFLAG_RD, &adapter->stats.bptc,
5855 			"Broadcast Packets Transmitted");
5856 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5857 			CTLFLAG_RD, &adapter->stats.mptc,
5858 			"Multicast Packets Transmitted");
5859 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5860 			CTLFLAG_RD, &adapter->stats.ptc64,
5861 			"64 byte frames transmitted ");
5862 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5863 			CTLFLAG_RD, &adapter->stats.ptc127,
5864 			"65-127 byte frames transmitted");
5865 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5866 			CTLFLAG_RD, &adapter->stats.ptc255,
5867 			"128-255 byte frames transmitted");
5868 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5869 			CTLFLAG_RD, &adapter->stats.ptc511,
5870 			"256-511 byte frames transmitted");
5871 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5872 			CTLFLAG_RD, &adapter->stats.ptc1023,
5873 			"512-1023 byte frames transmitted");
5874 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5875 			CTLFLAG_RD, &adapter->stats.ptc1522,
5876 			"1024-1522 byte frames transmitted");
5877 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5878 			CTLFLAG_RD, &adapter->stats.tsctc,
5879 			"TSO Contexts Transmitted");
5880 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5881 			CTLFLAG_RD, &adapter->stats.tsctfc,
5882 			"TSO Contexts Failed");
5883 
5884 
5885 	/* Interrupt Stats */
5886 
5887 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5888 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5889 	int_list = SYSCTL_CHILDREN(int_node);
5890 
5891 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5892 			CTLFLAG_RD, &adapter->stats.iac,
5893 			"Interrupt Assertion Count");
5894 
5895 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5896 			CTLFLAG_RD, &adapter->stats.icrxptc,
5897 			"Interrupt Cause Rx Pkt Timer Expire Count");
5898 
5899 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5900 			CTLFLAG_RD, &adapter->stats.icrxatc,
5901 			"Interrupt Cause Rx Abs Timer Expire Count");
5902 
5903 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5904 			CTLFLAG_RD, &adapter->stats.ictxptc,
5905 			"Interrupt Cause Tx Pkt Timer Expire Count");
5906 
5907 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5908 			CTLFLAG_RD, &adapter->stats.ictxatc,
5909 			"Interrupt Cause Tx Abs Timer Expire Count");
5910 
5911 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5912 			CTLFLAG_RD, &adapter->stats.ictxqec,
5913 			"Interrupt Cause Tx Queue Empty Count");
5914 
5915 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5916 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5917 			"Interrupt Cause Tx Queue Min Thresh Count");
5918 
5919 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5920 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5921 			"Interrupt Cause Rx Desc Min Thresh Count");
5922 
5923 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5924 			CTLFLAG_RD, &adapter->stats.icrxoc,
5925 			"Interrupt Cause Receiver Overrun Count");
5926 }
5927 
5928 /**********************************************************************
5929  *
5930  *  This routine provides a way to dump out the adapter eeprom,
5931  *  often a useful debug/service tool. This only dumps the first
5932  *  32 words, stuff that matters is in that extent.
5933  *
5934  **********************************************************************/
5935 static int
5936 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5937 {
5938 	struct adapter *adapter = (struct adapter *)arg1;
5939 	int error;
5940 	int result;
5941 
5942 	result = -1;
5943 	error = sysctl_handle_int(oidp, &result, 0, req);
5944 
5945 	if (error || !req->newptr)
5946 		return (error);
5947 
5948 	/*
5949 	 * This value will cause a hex dump of the
5950 	 * first 32 16-bit words of the EEPROM to
5951 	 * the screen.
5952 	 */
5953 	if (result == 1)
5954 		em_print_nvm_info(adapter);
5955 
5956 	return (error);
5957 }
5958 
5959 static void
5960 em_print_nvm_info(struct adapter *adapter)
5961 {
5962 	u16	eeprom_data;
5963 	int	i, j, row = 0;
5964 
5965 	/* Its a bit crude, but it gets the job done */
5966 	printf("\nInterface EEPROM Dump:\n");
5967 	printf("Offset\n0x0000  ");
5968 	for (i = 0, j = 0; i < 32; i++, j++) {
5969 		if (j == 8) { /* Make the offset block */
5970 			j = 0; ++row;
5971 			printf("\n0x00%x0  ",row);
5972 		}
5973 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5974 		printf("%04x ", eeprom_data);
5975 	}
5976 	printf("\n");
5977 }
5978 
5979 static int
5980 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5981 {
5982 	struct em_int_delay_info *info;
5983 	struct adapter *adapter;
5984 	u32 regval;
5985 	int error, usecs, ticks;
5986 
5987 	info = (struct em_int_delay_info *)arg1;
5988 	usecs = info->value;
5989 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5990 	if (error != 0 || req->newptr == NULL)
5991 		return (error);
5992 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5993 		return (EINVAL);
5994 	info->value = usecs;
5995 	ticks = EM_USECS_TO_TICKS(usecs);
5996 	if (info->offset == E1000_ITR)	/* units are 256ns here */
5997 		ticks *= 4;
5998 
5999 	adapter = info->adapter;
6000 
6001 	EM_CORE_LOCK(adapter);
6002 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
6003 	regval = (regval & ~0xffff) | (ticks & 0xffff);
6004 	/* Handle a few special cases. */
6005 	switch (info->offset) {
6006 	case E1000_RDTR:
6007 		break;
6008 	case E1000_TIDV:
6009 		if (ticks == 0) {
6010 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
6011 			/* Don't write 0 into the TIDV register. */
6012 			regval++;
6013 		} else
6014 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
6015 		break;
6016 	}
6017 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
6018 	EM_CORE_UNLOCK(adapter);
6019 	return (0);
6020 }
6021 
6022 static void
6023 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
6024 	const char *description, struct em_int_delay_info *info,
6025 	int offset, int value)
6026 {
6027 	info->adapter = adapter;
6028 	info->offset = offset;
6029 	info->value = value;
6030 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
6031 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6032 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
6033 	    info, 0, em_sysctl_int_delay, "I", description);
6034 }
6035 
6036 static void
6037 em_set_sysctl_value(struct adapter *adapter, const char *name,
6038 	const char *description, int *limit, int value)
6039 {
6040 	*limit = value;
6041 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6042 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6043 	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6044 }
6045 
6046 
6047 /*
6048 ** Set flow control using sysctl:
6049 ** Flow control values:
6050 **      0 - off
6051 **      1 - rx pause
6052 **      2 - tx pause
6053 **      3 - full
6054 */
6055 static int
6056 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
6057 {
6058         int		error;
6059 	static int	input = 3; /* default is full */
6060         struct adapter	*adapter = (struct adapter *) arg1;
6061 
6062         error = sysctl_handle_int(oidp, &input, 0, req);
6063 
6064         if ((error) || (req->newptr == NULL))
6065                 return (error);
6066 
6067 	if (input == adapter->fc) /* no change? */
6068 		return (error);
6069 
6070         switch (input) {
6071                 case e1000_fc_rx_pause:
6072                 case e1000_fc_tx_pause:
6073                 case e1000_fc_full:
6074                 case e1000_fc_none:
6075                         adapter->hw.fc.requested_mode = input;
6076 			adapter->fc = input;
6077                         break;
6078                 default:
6079 			/* Do nothing */
6080 			return (error);
6081         }
6082 
6083         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6084         e1000_force_mac_fc(&adapter->hw);
6085         return (error);
6086 }
6087 
6088 /*
6089 ** Manage Energy Efficient Ethernet:
6090 ** Control values:
6091 **     0/1 - enabled/disabled
6092 */
6093 static int
6094 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
6095 {
6096        struct adapter *adapter = (struct adapter *) arg1;
6097        int             error, value;
6098 
6099        value = adapter->hw.dev_spec.ich8lan.eee_disable;
6100        error = sysctl_handle_int(oidp, &value, 0, req);
6101        if (error || req->newptr == NULL)
6102                return (error);
6103        EM_CORE_LOCK(adapter);
6104        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
6105        em_init_locked(adapter);
6106        EM_CORE_UNLOCK(adapter);
6107        return (0);
6108 }
6109 
6110 static int
6111 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
6112 {
6113 	struct adapter *adapter;
6114 	int error;
6115 	int result;
6116 
6117 	result = -1;
6118 	error = sysctl_handle_int(oidp, &result, 0, req);
6119 
6120 	if (error || !req->newptr)
6121 		return (error);
6122 
6123 	if (result == 1) {
6124 		adapter = (struct adapter *)arg1;
6125 		em_print_debug_info(adapter);
6126         }
6127 
6128 	return (error);
6129 }
6130 
6131 /*
6132 ** This routine is meant to be fluid, add whatever is
6133 ** needed for debugging a problem.  -jfv
6134 */
6135 static void
6136 em_print_debug_info(struct adapter *adapter)
6137 {
6138 	device_t dev = adapter->dev;
6139 	struct tx_ring *txr = adapter->tx_rings;
6140 	struct rx_ring *rxr = adapter->rx_rings;
6141 
6142 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)
6143 		printf("Interface is RUNNING ");
6144 	else
6145 		printf("Interface is NOT RUNNING\n");
6146 
6147 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE)
6148 		printf("and INACTIVE\n");
6149 	else
6150 		printf("and ACTIVE\n");
6151 
6152 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
6153 		device_printf(dev, "TX Queue %d ------\n", i);
6154 		device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
6155 	    		E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
6156 	    		E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
6157 		device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
6158 		device_printf(dev, "TX descriptors avail = %d\n",
6159 	    		txr->tx_avail);
6160 		device_printf(dev, "Tx Descriptors avail failure = %ld\n",
6161 	    		txr->no_desc_avail);
6162 		device_printf(dev, "RX Queue %d ------\n", i);
6163 		device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
6164 	    		E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
6165 	    		E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
6166 		device_printf(dev, "RX discarded packets = %ld\n",
6167 	    		rxr->rx_discarded);
6168 		device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
6169 		device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
6170 	}
6171 }
6172 
6173 #ifdef EM_MULTIQUEUE
6174 /*
6175  * 82574 only:
6176  * Write a new value to the EEPROM increasing the number of MSIX
6177  * vectors from 3 to 5, for proper multiqueue support.
6178  */
6179 static void
6180 em_enable_vectors_82574(struct adapter *adapter)
6181 {
6182 	struct e1000_hw *hw = &adapter->hw;
6183 	device_t dev = adapter->dev;
6184 	u16 edata;
6185 
6186 	e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6187 	printf("Current cap: %#06x\n", edata);
6188 	if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
6189 		device_printf(dev, "Writing to eeprom: increasing "
6190 		    "reported MSIX vectors from 3 to 5...\n");
6191 		edata &= ~(EM_NVM_MSIX_N_MASK);
6192 		edata |= 4 << EM_NVM_MSIX_N_SHIFT;
6193 		e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6194 		e1000_update_nvm_checksum(hw);
6195 		device_printf(dev, "Writing to eeprom: done\n");
6196 	}
6197 }
6198 #endif
6199 
6200 #ifdef DDB
6201 DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
6202 {
6203 	devclass_t	dc;
6204 	int max_em;
6205 
6206 	dc = devclass_find("em");
6207 	max_em = devclass_get_maxunit(dc);
6208 
6209 	for (int index = 0; index < (max_em - 1); index++) {
6210 		device_t dev;
6211 		dev = devclass_get_device(dc, index);
6212 		if (device_get_driver(dev) == &em_driver) {
6213 			struct adapter *adapter = device_get_softc(dev);
6214 			EM_CORE_LOCK(adapter);
6215 			em_init_locked(adapter);
6216 			EM_CORE_UNLOCK(adapter);
6217 		}
6218 	}
6219 }
6220 DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6221 {
6222 	devclass_t	dc;
6223 	int max_em;
6224 
6225 	dc = devclass_find("em");
6226 	max_em = devclass_get_maxunit(dc);
6227 
6228 	for (int index = 0; index < (max_em - 1); index++) {
6229 		device_t dev;
6230 		dev = devclass_get_device(dc, index);
6231 		if (device_get_driver(dev) == &em_driver)
6232 			em_print_debug_info(device_get_softc(dev));
6233 	}
6234 
6235 }
6236 #endif
6237