xref: /freebsd/sys/dev/e1000/if_em.c (revision 864c53ead899f7838cd2e1cca3b485a4a82f5cdc)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2013, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #include "opt_inet.h"
36 #include "opt_inet6.h"
37 
38 #ifdef HAVE_KERNEL_OPTION_HEADERS
39 #include "opt_device_polling.h"
40 #endif
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #if __FreeBSD_version >= 800000
45 #include <sys/buf_ring.h>
46 #endif
47 #include <sys/bus.h>
48 #include <sys/endian.h>
49 #include <sys/kernel.h>
50 #include <sys/kthread.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/rman.h>
55 #include <sys/socket.h>
56 #include <sys/sockio.h>
57 #include <sys/sysctl.h>
58 #include <sys/taskqueue.h>
59 #include <sys/eventhandler.h>
60 #include <machine/bus.h>
61 #include <machine/resource.h>
62 
63 #include <net/bpf.h>
64 #include <net/ethernet.h>
65 #include <net/if.h>
66 #include <net/if_var.h>
67 #include <net/if_arp.h>
68 #include <net/if_dl.h>
69 #include <net/if_media.h>
70 
71 #include <net/if_types.h>
72 #include <net/if_vlan_var.h>
73 
74 #include <netinet/in_systm.h>
75 #include <netinet/in.h>
76 #include <netinet/if_ether.h>
77 #include <netinet/ip.h>
78 #include <netinet/ip6.h>
79 #include <netinet/tcp.h>
80 #include <netinet/udp.h>
81 
82 #include <machine/in_cksum.h>
83 #include <dev/led/led.h>
84 #include <dev/pci/pcivar.h>
85 #include <dev/pci/pcireg.h>
86 
87 #include "e1000_api.h"
88 #include "e1000_82571.h"
89 #include "if_em.h"
90 
91 /*********************************************************************
92  *  Set this to one to display debug statistics
93  *********************************************************************/
94 int	em_display_debug_stats = 0;
95 
96 /*********************************************************************
97  *  Driver version:
98  *********************************************************************/
99 char em_driver_version[] = "7.3.8";
100 
101 /*********************************************************************
102  *  PCI Device ID Table
103  *
104  *  Used by probe to select devices to load on
105  *  Last field stores an index into e1000_strings
106  *  Last entry must be all 0s
107  *
108  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
109  *********************************************************************/
110 
111 static em_vendor_info_t em_vendor_info_array[] =
112 {
113 	/* Intel(R) PRO/1000 Network Connection */
114 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
115 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
116 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
117 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
118 						PCI_ANY_ID, PCI_ANY_ID, 0},
119 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
120 						PCI_ANY_ID, PCI_ANY_ID, 0},
121 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
122 						PCI_ANY_ID, PCI_ANY_ID, 0},
123 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
124 						PCI_ANY_ID, PCI_ANY_ID, 0},
125 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
126 						PCI_ANY_ID, PCI_ANY_ID, 0},
127 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
128 						PCI_ANY_ID, PCI_ANY_ID, 0},
129 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
130 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
131 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
132 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
133 
134 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
135 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
136 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
137 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
138 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
139 						PCI_ANY_ID, PCI_ANY_ID, 0},
140 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
141 						PCI_ANY_ID, PCI_ANY_ID, 0},
142 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
143 						PCI_ANY_ID, PCI_ANY_ID, 0},
144 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
145 						PCI_ANY_ID, PCI_ANY_ID, 0},
146 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
147 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
148 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
150 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
152 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
175 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
176 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
177 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
178 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
179 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
180 						PCI_ANY_ID, PCI_ANY_ID, 0},
181 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
182 						PCI_ANY_ID, PCI_ANY_ID, 0},
183 	/* required last entry */
184 	{ 0, 0, 0, 0, 0}
185 };
186 
187 /*********************************************************************
188  *  Table of branding strings for all supported NICs.
189  *********************************************************************/
190 
191 static char *em_strings[] = {
192 	"Intel(R) PRO/1000 Network Connection"
193 };
194 
195 /*********************************************************************
196  *  Function prototypes
197  *********************************************************************/
198 static int	em_probe(device_t);
199 static int	em_attach(device_t);
200 static int	em_detach(device_t);
201 static int	em_shutdown(device_t);
202 static int	em_suspend(device_t);
203 static int	em_resume(device_t);
204 #ifdef EM_MULTIQUEUE
205 static int	em_mq_start(if_t, struct mbuf *);
206 static int	em_mq_start_locked(if_t,
207 		    struct tx_ring *, struct mbuf *);
208 static void	em_qflush(if_t);
209 #else
210 static void	em_start(if_t);
211 static void	em_start_locked(if_t, struct tx_ring *);
212 #endif
213 static int	em_ioctl(if_t, u_long, caddr_t);
214 static void	em_init(void *);
215 static void	em_init_locked(struct adapter *);
216 static void	em_stop(void *);
217 static void	em_media_status(if_t, struct ifmediareq *);
218 static int	em_media_change(if_t);
219 static void	em_identify_hardware(struct adapter *);
220 static int	em_allocate_pci_resources(struct adapter *);
221 static int	em_allocate_legacy(struct adapter *);
222 static int	em_allocate_msix(struct adapter *);
223 static int	em_allocate_queues(struct adapter *);
224 static int	em_setup_msix(struct adapter *);
225 static void	em_free_pci_resources(struct adapter *);
226 static void	em_local_timer(void *);
227 static void	em_reset(struct adapter *);
228 static int	em_setup_interface(device_t, struct adapter *);
229 
230 static void	em_setup_transmit_structures(struct adapter *);
231 static void	em_initialize_transmit_unit(struct adapter *);
232 static int	em_allocate_transmit_buffers(struct tx_ring *);
233 static void	em_free_transmit_structures(struct adapter *);
234 static void	em_free_transmit_buffers(struct tx_ring *);
235 
236 static int	em_setup_receive_structures(struct adapter *);
237 static int	em_allocate_receive_buffers(struct rx_ring *);
238 static void	em_initialize_receive_unit(struct adapter *);
239 static void	em_free_receive_structures(struct adapter *);
240 static void	em_free_receive_buffers(struct rx_ring *);
241 
242 static void	em_enable_intr(struct adapter *);
243 static void	em_disable_intr(struct adapter *);
244 static void	em_update_stats_counters(struct adapter *);
245 static void	em_add_hw_stats(struct adapter *adapter);
246 static void	em_txeof(struct tx_ring *);
247 static bool	em_rxeof(struct rx_ring *, int, int *);
248 #ifndef __NO_STRICT_ALIGNMENT
249 static int	em_fixup_rx(struct rx_ring *);
250 #endif
251 static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
252 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
253 		    struct ip *, u32 *, u32 *);
254 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
255 		    struct tcphdr *, u32 *, u32 *);
256 static void	em_set_promisc(struct adapter *);
257 static void	em_disable_promisc(struct adapter *);
258 static void	em_set_multi(struct adapter *);
259 static void	em_update_link_status(struct adapter *);
260 static void	em_refresh_mbufs(struct rx_ring *, int);
261 static void	em_register_vlan(void *, if_t, u16);
262 static void	em_unregister_vlan(void *, if_t, u16);
263 static void	em_setup_vlan_hw_support(struct adapter *);
264 static int	em_xmit(struct tx_ring *, struct mbuf **);
265 static int	em_dma_malloc(struct adapter *, bus_size_t,
266 		    struct em_dma_alloc *, int);
267 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
268 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
269 static void	em_print_nvm_info(struct adapter *);
270 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
271 static void	em_print_debug_info(struct adapter *);
272 static int 	em_is_valid_ether_addr(u8 *);
273 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
274 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
275 		    const char *, struct em_int_delay_info *, int, int);
276 /* Management and WOL Support */
277 static void	em_init_manageability(struct adapter *);
278 static void	em_release_manageability(struct adapter *);
279 static void     em_get_hw_control(struct adapter *);
280 static void     em_release_hw_control(struct adapter *);
281 static void	em_get_wakeup(device_t);
282 static void     em_enable_wakeup(device_t);
283 static int	em_enable_phy_wakeup(struct adapter *);
284 static void	em_led_func(void *, int);
285 static void	em_disable_aspm(struct adapter *);
286 
287 static int	em_irq_fast(void *);
288 
289 /* MSIX handlers */
290 static void	em_msix_tx(void *);
291 static void	em_msix_rx(void *);
292 static void	em_msix_link(void *);
293 static void	em_handle_tx(void *context, int pending);
294 static void	em_handle_rx(void *context, int pending);
295 static void	em_handle_link(void *context, int pending);
296 
297 static void	em_set_sysctl_value(struct adapter *, const char *,
298 		    const char *, int *, int);
299 static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
300 static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
301 
302 static __inline void em_rx_discard(struct rx_ring *, int);
303 
304 #ifdef DEVICE_POLLING
305 static poll_handler_drv_t em_poll;
306 #endif /* POLLING */
307 
308 /*********************************************************************
309  *  FreeBSD Device Interface Entry Points
310  *********************************************************************/
311 
312 static device_method_t em_methods[] = {
313 	/* Device interface */
314 	DEVMETHOD(device_probe, em_probe),
315 	DEVMETHOD(device_attach, em_attach),
316 	DEVMETHOD(device_detach, em_detach),
317 	DEVMETHOD(device_shutdown, em_shutdown),
318 	DEVMETHOD(device_suspend, em_suspend),
319 	DEVMETHOD(device_resume, em_resume),
320 	DEVMETHOD_END
321 };
322 
323 static driver_t em_driver = {
324 	"em", em_methods, sizeof(struct adapter),
325 };
326 
327 devclass_t em_devclass;
328 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
329 MODULE_DEPEND(em, pci, 1, 1, 1);
330 MODULE_DEPEND(em, ether, 1, 1, 1);
331 
332 /*********************************************************************
333  *  Tunable default values.
334  *********************************************************************/
335 
336 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
337 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
338 #define M_TSO_LEN			66
339 
340 #define MAX_INTS_PER_SEC	8000
341 #define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
342 
343 /* Allow common code without TSO */
344 #ifndef CSUM_TSO
345 #define CSUM_TSO	0
346 #endif
347 
348 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
349 
350 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
351 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
352 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
353 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
354 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
355     0, "Default transmit interrupt delay in usecs");
356 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
357     0, "Default receive interrupt delay in usecs");
358 
359 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
360 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
361 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
362 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
363 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
364     &em_tx_abs_int_delay_dflt, 0,
365     "Default transmit interrupt delay limit in usecs");
366 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
367     &em_rx_abs_int_delay_dflt, 0,
368     "Default receive interrupt delay limit in usecs");
369 
370 static int em_rxd = EM_DEFAULT_RXD;
371 static int em_txd = EM_DEFAULT_TXD;
372 TUNABLE_INT("hw.em.rxd", &em_rxd);
373 TUNABLE_INT("hw.em.txd", &em_txd);
374 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
375     "Number of receive descriptors per queue");
376 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
377     "Number of transmit descriptors per queue");
378 
379 static int em_smart_pwr_down = FALSE;
380 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
381 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
382     0, "Set to true to leave smart power down enabled on newer adapters");
383 
384 /* Controls whether promiscuous also shows bad packets */
385 static int em_debug_sbp = FALSE;
386 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
387 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
388     "Show bad packets in promiscuous mode");
389 
390 static int em_enable_msix = TRUE;
391 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
392 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
393     "Enable MSI-X interrupts");
394 
395 /* How many packets rxeof tries to clean at a time */
396 static int em_rx_process_limit = 100;
397 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
398 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
399     &em_rx_process_limit, 0,
400     "Maximum number of received packets to process "
401     "at a time, -1 means unlimited");
402 
403 /* Energy efficient ethernet - default to OFF */
404 static int eee_setting = 1;
405 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
406 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
407     "Enable Energy Efficient Ethernet");
408 
409 /* Global used in WOL setup with multiport cards */
410 static int global_quad_port_a = 0;
411 
412 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
413 #include <dev/netmap/if_em_netmap.h>
414 #endif /* DEV_NETMAP */
415 
416 /*********************************************************************
417  *  Device identification routine
418  *
419  *  em_probe determines if the driver should be loaded on
420  *  adapter based on PCI vendor/device id of the adapter.
421  *
422  *  return BUS_PROBE_DEFAULT on success, positive on failure
423  *********************************************************************/
424 
425 static int
426 em_probe(device_t dev)
427 {
428 	char		adapter_name[60];
429 	u16		pci_vendor_id = 0;
430 	u16		pci_device_id = 0;
431 	u16		pci_subvendor_id = 0;
432 	u16		pci_subdevice_id = 0;
433 	em_vendor_info_t *ent;
434 
435 	INIT_DEBUGOUT("em_probe: begin");
436 
437 	pci_vendor_id = pci_get_vendor(dev);
438 	if (pci_vendor_id != EM_VENDOR_ID)
439 		return (ENXIO);
440 
441 	pci_device_id = pci_get_device(dev);
442 	pci_subvendor_id = pci_get_subvendor(dev);
443 	pci_subdevice_id = pci_get_subdevice(dev);
444 
445 	ent = em_vendor_info_array;
446 	while (ent->vendor_id != 0) {
447 		if ((pci_vendor_id == ent->vendor_id) &&
448 		    (pci_device_id == ent->device_id) &&
449 
450 		    ((pci_subvendor_id == ent->subvendor_id) ||
451 		    (ent->subvendor_id == PCI_ANY_ID)) &&
452 
453 		    ((pci_subdevice_id == ent->subdevice_id) ||
454 		    (ent->subdevice_id == PCI_ANY_ID))) {
455 			sprintf(adapter_name, "%s %s",
456 				em_strings[ent->index],
457 				em_driver_version);
458 			device_set_desc_copy(dev, adapter_name);
459 			return (BUS_PROBE_DEFAULT);
460 		}
461 		ent++;
462 	}
463 
464 	return (ENXIO);
465 }
466 
467 /*********************************************************************
468  *  Device initialization routine
469  *
470  *  The attach entry point is called when the driver is being loaded.
471  *  This routine identifies the type of hardware, allocates all resources
472  *  and initializes the hardware.
473  *
474  *  return 0 on success, positive on failure
475  *********************************************************************/
476 
477 static int
478 em_attach(device_t dev)
479 {
480 	struct adapter	*adapter;
481 	struct e1000_hw	*hw;
482 	int		error = 0;
483 
484 	INIT_DEBUGOUT("em_attach: begin");
485 
486 	if (resource_disabled("em", device_get_unit(dev))) {
487 		device_printf(dev, "Disabled by device hint\n");
488 		return (ENXIO);
489 	}
490 
491 	adapter = device_get_softc(dev);
492 	adapter->dev = adapter->osdep.dev = dev;
493 	hw = &adapter->hw;
494 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
495 
496 	/* SYSCTL stuff */
497 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
498 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
499 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
500 	    em_sysctl_nvm_info, "I", "NVM Information");
501 
502 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
503 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
504 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
505 	    em_sysctl_debug_info, "I", "Debug Information");
506 
507 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
508 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
509 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
510 	    em_set_flowcntl, "I", "Flow Control");
511 
512 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
513 
514 	/* Determine hardware and mac info */
515 	em_identify_hardware(adapter);
516 
517 	/* Setup PCI resources */
518 	if (em_allocate_pci_resources(adapter)) {
519 		device_printf(dev, "Allocation of PCI resources failed\n");
520 		error = ENXIO;
521 		goto err_pci;
522 	}
523 
524 	/*
525 	** For ICH8 and family we need to
526 	** map the flash memory, and this
527 	** must happen after the MAC is
528 	** identified
529 	*/
530 	if ((hw->mac.type == e1000_ich8lan) ||
531 	    (hw->mac.type == e1000_ich9lan) ||
532 	    (hw->mac.type == e1000_ich10lan) ||
533 	    (hw->mac.type == e1000_pchlan) ||
534 	    (hw->mac.type == e1000_pch2lan) ||
535 	    (hw->mac.type == e1000_pch_lpt)) {
536 		int rid = EM_BAR_TYPE_FLASH;
537 		adapter->flash = bus_alloc_resource_any(dev,
538 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
539 		if (adapter->flash == NULL) {
540 			device_printf(dev, "Mapping of Flash failed\n");
541 			error = ENXIO;
542 			goto err_pci;
543 		}
544 		/* This is used in the shared code */
545 		hw->flash_address = (u8 *)adapter->flash;
546 		adapter->osdep.flash_bus_space_tag =
547 		    rman_get_bustag(adapter->flash);
548 		adapter->osdep.flash_bus_space_handle =
549 		    rman_get_bushandle(adapter->flash);
550 	}
551 
552 	/* Do Shared Code initialization */
553 	if (e1000_setup_init_funcs(hw, TRUE)) {
554 		device_printf(dev, "Setup of Shared code failed\n");
555 		error = ENXIO;
556 		goto err_pci;
557 	}
558 
559 	e1000_get_bus_info(hw);
560 
561 	/* Set up some sysctls for the tunable interrupt delays */
562 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
563 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
564 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
565 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
566 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
567 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
568 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
569 	    "receive interrupt delay limit in usecs",
570 	    &adapter->rx_abs_int_delay,
571 	    E1000_REGISTER(hw, E1000_RADV),
572 	    em_rx_abs_int_delay_dflt);
573 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
574 	    "transmit interrupt delay limit in usecs",
575 	    &adapter->tx_abs_int_delay,
576 	    E1000_REGISTER(hw, E1000_TADV),
577 	    em_tx_abs_int_delay_dflt);
578 	em_add_int_delay_sysctl(adapter, "itr",
579 	    "interrupt delay limit in usecs/4",
580 	    &adapter->tx_itr,
581 	    E1000_REGISTER(hw, E1000_ITR),
582 	    DEFAULT_ITR);
583 
584 	/* Sysctl for limiting the amount of work done in the taskqueue */
585 	em_set_sysctl_value(adapter, "rx_processing_limit",
586 	    "max number of rx packets to process", &adapter->rx_process_limit,
587 	    em_rx_process_limit);
588 
589 	/*
590 	 * Validate number of transmit and receive descriptors. It
591 	 * must not exceed hardware maximum, and must be multiple
592 	 * of E1000_DBA_ALIGN.
593 	 */
594 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
595 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
596 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
597 		    EM_DEFAULT_TXD, em_txd);
598 		adapter->num_tx_desc = EM_DEFAULT_TXD;
599 	} else
600 		adapter->num_tx_desc = em_txd;
601 
602 	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
603 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
604 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
605 		    EM_DEFAULT_RXD, em_rxd);
606 		adapter->num_rx_desc = EM_DEFAULT_RXD;
607 	} else
608 		adapter->num_rx_desc = em_rxd;
609 
610 	hw->mac.autoneg = DO_AUTO_NEG;
611 	hw->phy.autoneg_wait_to_complete = FALSE;
612 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
613 
614 	/* Copper options */
615 	if (hw->phy.media_type == e1000_media_type_copper) {
616 		hw->phy.mdix = AUTO_ALL_MODES;
617 		hw->phy.disable_polarity_correction = FALSE;
618 		hw->phy.ms_type = EM_MASTER_SLAVE;
619 	}
620 
621 	/*
622 	 * Set the frame limits assuming
623 	 * standard ethernet sized frames.
624 	 */
625 	adapter->hw.mac.max_frame_size =
626 	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
627 
628 	/*
629 	 * This controls when hardware reports transmit completion
630 	 * status.
631 	 */
632 	hw->mac.report_tx_early = 1;
633 
634 	/*
635 	** Get queue/ring memory
636 	*/
637 	if (em_allocate_queues(adapter)) {
638 		error = ENOMEM;
639 		goto err_pci;
640 	}
641 
642 	/* Allocate multicast array memory. */
643 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
644 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
645 	if (adapter->mta == NULL) {
646 		device_printf(dev, "Can not allocate multicast setup array\n");
647 		error = ENOMEM;
648 		goto err_late;
649 	}
650 
651 	/* Check SOL/IDER usage */
652 	if (e1000_check_reset_block(hw))
653 		device_printf(dev, "PHY reset is blocked"
654 		    " due to SOL/IDER session.\n");
655 
656 	/* Sysctl for setting Energy Efficient Ethernet */
657 	hw->dev_spec.ich8lan.eee_disable = eee_setting;
658 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
659 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
660 	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
661 	    adapter, 0, em_sysctl_eee, "I",
662 	    "Disable Energy Efficient Ethernet");
663 
664 	/*
665 	** Start from a known state, this is
666 	** important in reading the nvm and
667 	** mac from that.
668 	*/
669 	e1000_reset_hw(hw);
670 
671 
672 	/* Make sure we have a good EEPROM before we read from it */
673 	if (e1000_validate_nvm_checksum(hw) < 0) {
674 		/*
675 		** Some PCI-E parts fail the first check due to
676 		** the link being in sleep state, call it again,
677 		** if it fails a second time its a real issue.
678 		*/
679 		if (e1000_validate_nvm_checksum(hw) < 0) {
680 			device_printf(dev,
681 			    "The EEPROM Checksum Is Not Valid\n");
682 			error = EIO;
683 			goto err_late;
684 		}
685 	}
686 
687 	/* Copy the permanent MAC address out of the EEPROM */
688 	if (e1000_read_mac_addr(hw) < 0) {
689 		device_printf(dev, "EEPROM read error while reading MAC"
690 		    " address\n");
691 		error = EIO;
692 		goto err_late;
693 	}
694 
695 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
696 		device_printf(dev, "Invalid MAC address\n");
697 		error = EIO;
698 		goto err_late;
699 	}
700 
701 	/*
702 	**  Do interrupt configuration
703 	*/
704 	if (adapter->msix > 1) /* Do MSIX */
705 		error = em_allocate_msix(adapter);
706 	else  /* MSI or Legacy */
707 		error = em_allocate_legacy(adapter);
708 	if (error)
709 		goto err_late;
710 
711 	/*
712 	 * Get Wake-on-Lan and Management info for later use
713 	 */
714 	em_get_wakeup(dev);
715 
716 	/* Setup OS specific network interface */
717 	if (em_setup_interface(dev, adapter) != 0)
718 		goto err_late;
719 
720 	em_reset(adapter);
721 
722 	/* Initialize statistics */
723 	em_update_stats_counters(adapter);
724 
725 	hw->mac.get_link_status = 1;
726 	em_update_link_status(adapter);
727 
728 	/* Register for VLAN events */
729 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
730 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
731 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
732 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
733 
734 	em_add_hw_stats(adapter);
735 
736 	/* Non-AMT based hardware can now take control from firmware */
737 	if (adapter->has_manage && !adapter->has_amt)
738 		em_get_hw_control(adapter);
739 
740 	/* Tell the stack that the interface is not active */
741 	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
742 
743 	adapter->led_dev = led_create(em_led_func, adapter,
744 	    device_get_nameunit(dev));
745 #ifdef DEV_NETMAP
746 	em_netmap_attach(adapter);
747 #endif /* DEV_NETMAP */
748 
749 	INIT_DEBUGOUT("em_attach: end");
750 
751 	return (0);
752 
753 err_late:
754 	em_free_transmit_structures(adapter);
755 	em_free_receive_structures(adapter);
756 	em_release_hw_control(adapter);
757 	if (adapter->ifp != (void *)NULL)
758 		if_free_drv(adapter->ifp);
759 err_pci:
760 	em_free_pci_resources(adapter);
761 	free(adapter->mta, M_DEVBUF);
762 	EM_CORE_LOCK_DESTROY(adapter);
763 
764 	return (error);
765 }
766 
767 /*********************************************************************
768  *  Device removal routine
769  *
770  *  The detach entry point is called when the driver is being removed.
771  *  This routine stops the adapter and deallocates all the resources
772  *  that were allocated for driver operation.
773  *
774  *  return 0 on success, positive on failure
775  *********************************************************************/
776 
777 static int
778 em_detach(device_t dev)
779 {
780 	struct adapter	*adapter = device_get_softc(dev);
781 	if_t ifp = adapter->ifp;
782 
783 	INIT_DEBUGOUT("em_detach: begin");
784 
785 	/* Make sure VLANS are not using driver */
786 	if (if_vlantrunkinuse(ifp)) {
787 		device_printf(dev,"Vlan in use, detach first\n");
788 		return (EBUSY);
789 	}
790 
791 #ifdef DEVICE_POLLING
792 	if (if_getcapenable(ifp) & IFCAP_POLLING)
793 		ether_poll_deregister_drv(ifp);
794 #endif
795 
796 	if (adapter->led_dev != NULL)
797 		led_destroy(adapter->led_dev);
798 
799 	EM_CORE_LOCK(adapter);
800 	adapter->in_detach = 1;
801 	em_stop(adapter);
802 	EM_CORE_UNLOCK(adapter);
803 	EM_CORE_LOCK_DESTROY(adapter);
804 
805 	e1000_phy_hw_reset(&adapter->hw);
806 
807 	em_release_manageability(adapter);
808 	em_release_hw_control(adapter);
809 
810 	/* Unregister VLAN events */
811 	if (adapter->vlan_attach != NULL)
812 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
813 	if (adapter->vlan_detach != NULL)
814 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
815 
816 	ether_ifdetach_drv(adapter->ifp);
817 	callout_drain(&adapter->timer);
818 
819 #ifdef DEV_NETMAP
820 	netmap_detach(ifp);
821 #endif /* DEV_NETMAP */
822 
823 	em_free_pci_resources(adapter);
824 	bus_generic_detach(dev);
825 	if_free_drv(ifp);
826 
827 	em_free_transmit_structures(adapter);
828 	em_free_receive_structures(adapter);
829 
830 	em_release_hw_control(adapter);
831 	free(adapter->mta, M_DEVBUF);
832 
833 	return (0);
834 }
835 
836 /*********************************************************************
837  *
838  *  Shutdown entry point
839  *
840  **********************************************************************/
841 
842 static int
843 em_shutdown(device_t dev)
844 {
845 	return em_suspend(dev);
846 }
847 
848 /*
849  * Suspend/resume device methods.
850  */
851 static int
852 em_suspend(device_t dev)
853 {
854 	struct adapter *adapter = device_get_softc(dev);
855 
856 	EM_CORE_LOCK(adapter);
857 
858         em_release_manageability(adapter);
859 	em_release_hw_control(adapter);
860 	em_enable_wakeup(dev);
861 
862 	EM_CORE_UNLOCK(adapter);
863 
864 	return bus_generic_suspend(dev);
865 }
866 
867 static int
868 em_resume(device_t dev)
869 {
870 	struct adapter *adapter = device_get_softc(dev);
871 	struct tx_ring	*txr = adapter->tx_rings;
872 	if_t ifp = adapter->ifp;
873 
874 	EM_CORE_LOCK(adapter);
875 	if (adapter->hw.mac.type == e1000_pch2lan)
876 		e1000_resume_workarounds_pchlan(&adapter->hw);
877 	em_init_locked(adapter);
878 	em_init_manageability(adapter);
879 
880 	if ((if_getflags(ifp) & IFF_UP) &&
881 	    (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) {
882 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
883 			EM_TX_LOCK(txr);
884 #ifdef EM_MULTIQUEUE
885 			if (!drbr_empty(ifp, txr->br))
886 				em_mq_start_locked(ifp, txr, NULL);
887 #else
888 			if (!if_sendq_empty(ifp))
889 				em_start_locked(ifp, txr);
890 #endif
891 			EM_TX_UNLOCK(txr);
892 		}
893 	}
894 	EM_CORE_UNLOCK(adapter);
895 
896 	return bus_generic_resume(dev);
897 }
898 
899 
900 #ifdef EM_MULTIQUEUE
901 /*********************************************************************
902  *  Multiqueue Transmit routines
903  *
904  *  em_mq_start is called by the stack to initiate a transmit.
905  *  however, if busy the driver can queue the request rather
906  *  than do an immediate send. It is this that is an advantage
907  *  in this driver, rather than also having multiple tx queues.
908  **********************************************************************/
909 static int
910 em_mq_start_locked(if_t ifp, struct tx_ring *txr, struct mbuf *m)
911 {
912 	struct adapter  *adapter = txr->adapter;
913         struct mbuf     *next;
914         int             err = 0, enq = 0;
915 
916 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
917 	    IFF_DRV_RUNNING || adapter->link_active == 0) {
918 		if (m != NULL)
919 			err = drbr_enqueue(ifp, txr->br, m);
920 		return (err);
921 	}
922 
923 	enq = 0;
924 	if (m != NULL) {
925 		err = drbr_enqueue(ifp, txr->br, m);
926 		if (err)
927 			return (err);
928 	}
929 
930 	/* Process the queue */
931 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
932 		if ((err = em_xmit(txr, &next)) != 0) {
933 			if (next == NULL)
934 				drbr_advance(ifp, txr->br);
935 			else
936 				drbr_putback(ifp, txr->br, next);
937 			break;
938 		}
939 		drbr_advance(ifp, txr->br);
940 		enq++;
941 		if_incobytes(ifp,  next->m_pkthdr.len);
942 		if (next->m_flags & M_MCAST)
943 			if_incomcasts(ifp, 1);
944 		if_etherbpfmtap(ifp, next);
945 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
946                         break;
947 	}
948 
949 	if (enq > 0) {
950                 /* Set the watchdog */
951                 txr->queue_status = EM_QUEUE_WORKING;
952 		txr->watchdog_time = ticks;
953 	}
954 
955 	if (txr->tx_avail < EM_MAX_SCATTER)
956 		em_txeof(txr);
957 	if (txr->tx_avail < EM_MAX_SCATTER)
958 		if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
959 	return (err);
960 }
961 
962 /*
963 ** Multiqueue capable stack interface
964 */
965 static int
966 em_mq_start(if_t ifp, struct mbuf *m)
967 {
968 	struct adapter	*adapter = if_getsoftc(ifp);
969 	struct tx_ring	*txr = adapter->tx_rings;
970 	int 		error;
971 
972 	if (EM_TX_TRYLOCK(txr)) {
973 		error = em_mq_start_locked(ifp, txr, m);
974 		EM_TX_UNLOCK(txr);
975 	} else
976 		error = drbr_enqueue(ifp, txr->br, m);
977 
978 	return (error);
979 }
980 
981 /*
982 ** Flush all ring buffers
983 */
984 static void
985 em_qflush(if_t ifp)
986 {
987 	struct adapter  *adapter = if_getsoftc(ifp);
988 	struct tx_ring  *txr = adapter->tx_rings;
989 	struct mbuf     *m;
990 
991 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
992 		EM_TX_LOCK(txr);
993 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
994 			m_freem(m);
995 		EM_TX_UNLOCK(txr);
996 	}
997 	if_qflush(ifp);
998 }
999 #else  /* !EM_MULTIQUEUE */
1000 
1001 static void
1002 em_start_locked(if_t ifp, struct tx_ring *txr)
1003 {
1004 	struct adapter	*adapter = if_getsoftc(ifp);
1005 	struct mbuf	*m_head;
1006 
1007 	EM_TX_LOCK_ASSERT(txr);
1008 
1009 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
1010 	    IFF_DRV_RUNNING)
1011 		return;
1012 
1013 	if (!adapter->link_active)
1014 		return;
1015 
1016 	while (!if_sendq_empty(ifp)) {
1017         	/* Call cleanup if number of TX descriptors low */
1018 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1019 			em_txeof(txr);
1020 		if (txr->tx_avail < EM_MAX_SCATTER) {
1021 			if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0);
1022 			break;
1023 		}
1024 		m_head = if_dequeue(ifp);
1025 		if (m_head == NULL)
1026 			break;
1027 		/*
1028 		 *  Encapsulation can modify our pointer, and or make it
1029 		 *  NULL on failure.  In that event, we can't requeue.
1030 		 */
1031 		if (em_xmit(txr, &m_head)) {
1032 			if (m_head == NULL)
1033 				break;
1034 			if_sendq_prepend(ifp, m_head);
1035 			break;
1036 		}
1037 
1038 		/* Send a copy of the frame to the BPF listener */
1039 		if_etherbpfmtap(ifp, m_head);
1040 
1041 		/* Set timeout in case hardware has problems transmitting. */
1042 		txr->watchdog_time = ticks;
1043                 txr->queue_status = EM_QUEUE_WORKING;
1044 	}
1045 
1046 	return;
1047 }
1048 
1049 static void
1050 em_start(if_t ifp)
1051 {
1052 	struct adapter	*adapter = if_getsoftc(ifp);
1053 	struct tx_ring	*txr = adapter->tx_rings;
1054 
1055 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1056 		EM_TX_LOCK(txr);
1057 		em_start_locked(ifp, txr);
1058 		EM_TX_UNLOCK(txr);
1059 	}
1060 	return;
1061 }
1062 #endif /* EM_MULTIQUEUE */
1063 
1064 /*********************************************************************
1065  *  Ioctl entry point
1066  *
1067  *  em_ioctl is called when the user wants to configure the
1068  *  interface.
1069  *
1070  *  return 0 on success, positive on failure
1071  **********************************************************************/
1072 
1073 static int
1074 em_ioctl(if_t ifp, u_long command, caddr_t data)
1075 {
1076 	struct adapter	*adapter = if_getsoftc(ifp);
1077 	struct ifreq	*ifr = (struct ifreq *)data;
1078 #if defined(INET) || defined(INET6)
1079 	struct ifaddr	*ifa = (struct ifaddr *)data;
1080 #endif
1081 	bool		avoid_reset = FALSE;
1082 	int		error = 0;
1083 
1084 	if (adapter->in_detach)
1085 		return (error);
1086 
1087 	switch (command) {
1088 	case SIOCSIFADDR:
1089 #ifdef INET
1090 		if (ifa->ifa_addr->sa_family == AF_INET)
1091 			avoid_reset = TRUE;
1092 #endif
1093 #ifdef INET6
1094 		if (ifa->ifa_addr->sa_family == AF_INET6)
1095 			avoid_reset = TRUE;
1096 #endif
1097 		/*
1098 		** Calling init results in link renegotiation,
1099 		** so we avoid doing it when possible.
1100 		*/
1101 		if (avoid_reset) {
1102 			if_setflagbits(ifp,IFF_UP,0);
1103 			if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
1104 				em_init(adapter);
1105 #ifdef INET
1106 			if (!(if_getflags(ifp) & IFF_NOARP))
1107 				arp_ifinit_drv(ifp, ifa);
1108 #endif
1109 		} else
1110 			error = ether_ioctl_drv(ifp, command, data);
1111 		break;
1112 	case SIOCSIFMTU:
1113 	    {
1114 		int max_frame_size;
1115 
1116 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1117 
1118 		EM_CORE_LOCK(adapter);
1119 		switch (adapter->hw.mac.type) {
1120 		case e1000_82571:
1121 		case e1000_82572:
1122 		case e1000_ich9lan:
1123 		case e1000_ich10lan:
1124 		case e1000_pch2lan:
1125 		case e1000_pch_lpt:
1126 		case e1000_82574:
1127 		case e1000_82583:
1128 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1129 			max_frame_size = 9234;
1130 			break;
1131 		case e1000_pchlan:
1132 			max_frame_size = 4096;
1133 			break;
1134 			/* Adapters that do not support jumbo frames */
1135 		case e1000_ich8lan:
1136 			max_frame_size = ETHER_MAX_LEN;
1137 			break;
1138 		default:
1139 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1140 		}
1141 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1142 		    ETHER_CRC_LEN) {
1143 			EM_CORE_UNLOCK(adapter);
1144 			error = EINVAL;
1145 			break;
1146 		}
1147 
1148 		if_setmtu(ifp, ifr->ifr_mtu);
1149 		adapter->hw.mac.max_frame_size =
1150 		    if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
1151 		em_init_locked(adapter);
1152 		EM_CORE_UNLOCK(adapter);
1153 		break;
1154 	    }
1155 	case SIOCSIFFLAGS:
1156 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1157 		    SIOCSIFFLAGS (Set Interface Flags)");
1158 		EM_CORE_LOCK(adapter);
1159 		if (if_getflags(ifp) & IFF_UP) {
1160 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1161 				if ((if_getflags(ifp) ^ adapter->if_flags) &
1162 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1163 					em_disable_promisc(adapter);
1164 					em_set_promisc(adapter);
1165 				}
1166 			} else
1167 				em_init_locked(adapter);
1168 		} else
1169 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1170 				em_stop(adapter);
1171 		adapter->if_flags = if_getflags(ifp);
1172 		EM_CORE_UNLOCK(adapter);
1173 		break;
1174 	case SIOCADDMULTI:
1175 	case SIOCDELMULTI:
1176 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1177 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1178 			EM_CORE_LOCK(adapter);
1179 			em_disable_intr(adapter);
1180 			em_set_multi(adapter);
1181 #ifdef DEVICE_POLLING
1182 			if (!(if_getcapenable(ifp) & IFCAP_POLLING))
1183 #endif
1184 				em_enable_intr(adapter);
1185 			EM_CORE_UNLOCK(adapter);
1186 		}
1187 		break;
1188 	case SIOCSIFMEDIA:
1189 		/* Check SOL/IDER usage */
1190 		EM_CORE_LOCK(adapter);
1191 		if (e1000_check_reset_block(&adapter->hw)) {
1192 			EM_CORE_UNLOCK(adapter);
1193 			device_printf(adapter->dev, "Media change is"
1194 			    " blocked due to SOL/IDER session.\n");
1195 			break;
1196 		}
1197 		EM_CORE_UNLOCK(adapter);
1198 		/* falls thru */
1199 	case SIOCGIFMEDIA:
1200 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1201 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1202 		error = ifmedia_ioctl_drv(ifp, ifr, &adapter->media, command);
1203 		break;
1204 	case SIOCSIFCAP:
1205 	    {
1206 		int mask, reinit;
1207 
1208 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1209 		reinit = 0;
1210 		mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
1211 #ifdef DEVICE_POLLING
1212 		if (mask & IFCAP_POLLING) {
1213 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1214 				error = ether_poll_register_drv(em_poll, ifp);
1215 				if (error)
1216 					return (error);
1217 				EM_CORE_LOCK(adapter);
1218 				em_disable_intr(adapter);
1219 				if_setcapenablebit(ifp, IFCAP_POLLING, 0);
1220 				EM_CORE_UNLOCK(adapter);
1221 			} else {
1222 				error = ether_poll_deregister_drv(ifp);
1223 				/* Enable interrupt even in error case */
1224 				EM_CORE_LOCK(adapter);
1225 				em_enable_intr(adapter);
1226 				if_setcapenablebit(ifp, 0, IFCAP_POLLING);
1227 				EM_CORE_UNLOCK(adapter);
1228 			}
1229 		}
1230 #endif
1231 		if (mask & IFCAP_HWCSUM) {
1232 			if_togglecapenable(ifp,IFCAP_HWCSUM);
1233 			reinit = 1;
1234 		}
1235 		if (mask & IFCAP_TSO4) {
1236 			if_togglecapenable(ifp,IFCAP_TSO4);
1237 			reinit = 1;
1238 		}
1239 		if (mask & IFCAP_VLAN_HWTAGGING) {
1240 			if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING);
1241 			reinit = 1;
1242 		}
1243 		if (mask & IFCAP_VLAN_HWFILTER) {
1244 			if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1245 			reinit = 1;
1246 		}
1247 		if (mask & IFCAP_VLAN_HWTSO) {
1248 			if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1249 			reinit = 1;
1250 		}
1251 		if ((mask & IFCAP_WOL) &&
1252 		    (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
1253 			if (mask & IFCAP_WOL_MCAST)
1254 				if_togglecapenable(ifp, IFCAP_WOL_MCAST);
1255 			if (mask & IFCAP_WOL_MAGIC)
1256 				if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
1257 		}
1258 		if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1259 			em_init(adapter);
1260 		if_vlancap(ifp);
1261 		break;
1262 	    }
1263 
1264 	default:
1265 		error = ether_ioctl_drv(ifp, command, data);
1266 		break;
1267 	}
1268 
1269 	return (error);
1270 }
1271 
1272 
1273 /*********************************************************************
1274  *  Init entry point
1275  *
1276  *  This routine is used in two ways. It is used by the stack as
1277  *  init entry point in network interface structure. It is also used
1278  *  by the driver as a hw/sw initialization routine to get to a
1279  *  consistent state.
1280  *
1281  *  return 0 on success, positive on failure
1282  **********************************************************************/
1283 
1284 static void
1285 em_init_locked(struct adapter *adapter)
1286 {
1287 	if_t ifp = adapter->ifp;
1288 	device_t	dev = adapter->dev;
1289 
1290 	INIT_DEBUGOUT("em_init: begin");
1291 
1292 	EM_CORE_LOCK_ASSERT(adapter);
1293 
1294 	em_disable_intr(adapter);
1295 	callout_stop(&adapter->timer);
1296 
1297 	/* Get the latest mac address, User can use a LAA */
1298         bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
1299               ETHER_ADDR_LEN);
1300 
1301 	/* Put the address into the Receive Address Array */
1302 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1303 
1304 	/*
1305 	 * With the 82571 adapter, RAR[0] may be overwritten
1306 	 * when the other port is reset, we make a duplicate
1307 	 * in RAR[14] for that eventuality, this assures
1308 	 * the interface continues to function.
1309 	 */
1310 	if (adapter->hw.mac.type == e1000_82571) {
1311 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1312 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1313 		    E1000_RAR_ENTRIES - 1);
1314 	}
1315 
1316 	/* Initialize the hardware */
1317 	em_reset(adapter);
1318 	em_update_link_status(adapter);
1319 
1320 	/* Setup VLAN support, basic and offload if available */
1321 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1322 
1323 	/* Set hardware offload abilities */
1324 	if_clearhwassist(ifp);
1325 	if (if_getcapenable(ifp) & IFCAP_TXCSUM)
1326 		if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
1327 	if (if_getcapenable(ifp) & IFCAP_TSO4)
1328 		if_sethwassistbits(ifp, CSUM_TSO, 0);
1329 
1330 	/* Configure for OS presence */
1331 	em_init_manageability(adapter);
1332 
1333 	/* Prepare transmit descriptors and buffers */
1334 	em_setup_transmit_structures(adapter);
1335 	em_initialize_transmit_unit(adapter);
1336 
1337 	/* Setup Multicast table */
1338 	em_set_multi(adapter);
1339 
1340 	/*
1341 	** Figure out the desired mbuf
1342 	** pool for doing jumbos
1343 	*/
1344 	if (adapter->hw.mac.max_frame_size <= 2048)
1345 		adapter->rx_mbuf_sz = MCLBYTES;
1346 	else if (adapter->hw.mac.max_frame_size <= 4096)
1347 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1348 	else
1349 		adapter->rx_mbuf_sz = MJUM9BYTES;
1350 
1351 	/* Prepare receive descriptors and buffers */
1352 	if (em_setup_receive_structures(adapter)) {
1353 		device_printf(dev, "Could not setup receive structures\n");
1354 		em_stop(adapter);
1355 		return;
1356 	}
1357 	em_initialize_receive_unit(adapter);
1358 
1359 	/* Use real VLAN Filter support? */
1360 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
1361 		if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1362 			/* Use real VLAN Filter support */
1363 			em_setup_vlan_hw_support(adapter);
1364 		else {
1365 			u32 ctrl;
1366 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1367 			ctrl |= E1000_CTRL_VME;
1368 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1369 		}
1370 	}
1371 
1372 	/* Don't lose promiscuous settings */
1373 	em_set_promisc(adapter);
1374 
1375 	/* Set the interface as ACTIVE */
1376 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
1377 
1378 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1379 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1380 
1381 	/* MSI/X configuration for 82574 */
1382 	if (adapter->hw.mac.type == e1000_82574) {
1383 		int tmp;
1384 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1385 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1386 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1387 		/* Set the IVAR - interrupt vector routing. */
1388 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1389 	}
1390 
1391 #ifdef DEVICE_POLLING
1392 	/*
1393 	 * Only enable interrupts if we are not polling, make sure
1394 	 * they are off otherwise.
1395 	 */
1396 	if (if_getcapenable(ifp) & IFCAP_POLLING)
1397 		em_disable_intr(adapter);
1398 	else
1399 #endif /* DEVICE_POLLING */
1400 		em_enable_intr(adapter);
1401 
1402 	/* AMT based hardware can now take control from firmware */
1403 	if (adapter->has_manage && adapter->has_amt)
1404 		em_get_hw_control(adapter);
1405 }
1406 
1407 static void
1408 em_init(void *arg)
1409 {
1410 	struct adapter *adapter = arg;
1411 
1412 	EM_CORE_LOCK(adapter);
1413 	em_init_locked(adapter);
1414 	EM_CORE_UNLOCK(adapter);
1415 }
1416 
1417 
1418 #ifdef DEVICE_POLLING
1419 /*********************************************************************
1420  *
1421  *  Legacy polling routine: note this only works with single queue
1422  *
1423  *********************************************************************/
1424 static int
1425 em_poll(if_t ifp, enum poll_cmd cmd, int count)
1426 {
1427 	struct adapter *adapter = if_getsoftc(ifp);
1428 	struct tx_ring	*txr = adapter->tx_rings;
1429 	struct rx_ring	*rxr = adapter->rx_rings;
1430 	u32		reg_icr;
1431 	int		rx_done;
1432 
1433 	EM_CORE_LOCK(adapter);
1434 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
1435 		EM_CORE_UNLOCK(adapter);
1436 		return (0);
1437 	}
1438 
1439 	if (cmd == POLL_AND_CHECK_STATUS) {
1440 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1441 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1442 			callout_stop(&adapter->timer);
1443 			adapter->hw.mac.get_link_status = 1;
1444 			em_update_link_status(adapter);
1445 			callout_reset(&adapter->timer, hz,
1446 			    em_local_timer, adapter);
1447 		}
1448 	}
1449 	EM_CORE_UNLOCK(adapter);
1450 
1451 	em_rxeof(rxr, count, &rx_done);
1452 
1453 	EM_TX_LOCK(txr);
1454 	em_txeof(txr);
1455 #ifdef EM_MULTIQUEUE
1456 	if (!drbr_empty(ifp, txr->br))
1457 		em_mq_start_locked(ifp, txr, NULL);
1458 #else
1459 	if (!if_sendq_empty(ifp))
1460 		em_start_locked(ifp, txr);
1461 #endif
1462 	EM_TX_UNLOCK(txr);
1463 
1464 	return (rx_done);
1465 }
1466 #endif /* DEVICE_POLLING */
1467 
1468 
1469 /*********************************************************************
1470  *
1471  *  Fast Legacy/MSI Combined Interrupt Service routine
1472  *
1473  *********************************************************************/
1474 static int
1475 em_irq_fast(void *arg)
1476 {
1477 	struct adapter	*adapter = arg;
1478 	if_t ifp;
1479 	u32		reg_icr;
1480 
1481 	ifp = adapter->ifp;
1482 
1483 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1484 
1485 	/* Hot eject?  */
1486 	if (reg_icr == 0xffffffff)
1487 		return FILTER_STRAY;
1488 
1489 	/* Definitely not our interrupt.  */
1490 	if (reg_icr == 0x0)
1491 		return FILTER_STRAY;
1492 
1493 	/*
1494 	 * Starting with the 82571 chip, bit 31 should be used to
1495 	 * determine whether the interrupt belongs to us.
1496 	 */
1497 	if (adapter->hw.mac.type >= e1000_82571 &&
1498 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1499 		return FILTER_STRAY;
1500 
1501 	em_disable_intr(adapter);
1502 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1503 
1504 	/* Link status change */
1505 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1506 		adapter->hw.mac.get_link_status = 1;
1507 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1508 	}
1509 
1510 	if (reg_icr & E1000_ICR_RXO)
1511 		adapter->rx_overruns++;
1512 	return FILTER_HANDLED;
1513 }
1514 
1515 /* Combined RX/TX handler, used by Legacy and MSI */
1516 static void
1517 em_handle_que(void *context, int pending)
1518 {
1519 	struct adapter	*adapter = context;
1520 	if_t ifp = adapter->ifp;
1521 	struct tx_ring	*txr = adapter->tx_rings;
1522 	struct rx_ring	*rxr = adapter->rx_rings;
1523 
1524 
1525 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1526 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1527 		EM_TX_LOCK(txr);
1528 		em_txeof(txr);
1529 #ifdef EM_MULTIQUEUE
1530 		if (!drbr_empty(ifp, txr->br))
1531 			em_mq_start_locked(ifp, txr, NULL);
1532 #else
1533 		if (!if_sendq_empty(ifp))
1534 			em_start_locked(ifp, txr);
1535 #endif
1536 		EM_TX_UNLOCK(txr);
1537 		if (more) {
1538 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1539 			return;
1540 		}
1541 	}
1542 
1543 	em_enable_intr(adapter);
1544 	return;
1545 }
1546 
1547 
1548 /*********************************************************************
1549  *
1550  *  MSIX Interrupt Service Routines
1551  *
1552  **********************************************************************/
1553 static void
1554 em_msix_tx(void *arg)
1555 {
1556 	struct tx_ring *txr = arg;
1557 	struct adapter *adapter = txr->adapter;
1558 	if_t ifp = adapter->ifp;
1559 
1560 	++txr->tx_irq;
1561 	EM_TX_LOCK(txr);
1562 	em_txeof(txr);
1563 #ifdef EM_MULTIQUEUE
1564 	if (!drbr_empty(ifp, txr->br))
1565 		em_mq_start_locked(ifp, txr, NULL);
1566 #else
1567 	if (!if_sendq_empty(ifp))
1568 		em_start_locked(ifp, txr);
1569 #endif
1570 	/* Reenable this interrupt */
1571 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1572 	EM_TX_UNLOCK(txr);
1573 	return;
1574 }
1575 
1576 /*********************************************************************
1577  *
1578  *  MSIX RX Interrupt Service routine
1579  *
1580  **********************************************************************/
1581 
1582 static void
1583 em_msix_rx(void *arg)
1584 {
1585 	struct rx_ring	*rxr = arg;
1586 	struct adapter	*adapter = rxr->adapter;
1587 	bool		more;
1588 
1589 	++rxr->rx_irq;
1590 	if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING))
1591 		return;
1592 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1593 	if (more)
1594 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1595 	else
1596 		/* Reenable this interrupt */
1597 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1598 	return;
1599 }
1600 
1601 /*********************************************************************
1602  *
1603  *  MSIX Link Fast Interrupt Service routine
1604  *
1605  **********************************************************************/
1606 static void
1607 em_msix_link(void *arg)
1608 {
1609 	struct adapter	*adapter = arg;
1610 	u32		reg_icr;
1611 
1612 	++adapter->link_irq;
1613 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1614 
1615 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1616 		adapter->hw.mac.get_link_status = 1;
1617 		em_handle_link(adapter, 0);
1618 	} else
1619 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1620 		    EM_MSIX_LINK | E1000_IMS_LSC);
1621 	return;
1622 }
1623 
1624 static void
1625 em_handle_rx(void *context, int pending)
1626 {
1627 	struct rx_ring	*rxr = context;
1628 	struct adapter	*adapter = rxr->adapter;
1629         bool            more;
1630 
1631 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1632 	if (more)
1633 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1634 	else
1635 		/* Reenable this interrupt */
1636 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1637 }
1638 
1639 static void
1640 em_handle_tx(void *context, int pending)
1641 {
1642 	struct tx_ring	*txr = context;
1643 	struct adapter	*adapter = txr->adapter;
1644 	if_t ifp = adapter->ifp;
1645 
1646 	EM_TX_LOCK(txr);
1647 	em_txeof(txr);
1648 #ifdef EM_MULTIQUEUE
1649 	if (!drbr_empty(ifp, txr->br))
1650 		em_mq_start_locked(ifp, txr, NULL);
1651 #else
1652 	if (!if_sendq_empty(ifp))
1653 		em_start_locked(ifp, txr);
1654 #endif
1655 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1656 	EM_TX_UNLOCK(txr);
1657 }
1658 
1659 static void
1660 em_handle_link(void *context, int pending)
1661 {
1662 	struct adapter	*adapter = context;
1663 	struct tx_ring	*txr = adapter->tx_rings;
1664 	if_t ifp = adapter->ifp;
1665 
1666 	if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1667 		return;
1668 
1669 	EM_CORE_LOCK(adapter);
1670 	callout_stop(&adapter->timer);
1671 	em_update_link_status(adapter);
1672 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1673 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1674 	    EM_MSIX_LINK | E1000_IMS_LSC);
1675 	if (adapter->link_active) {
1676 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1677 			EM_TX_LOCK(txr);
1678 #ifdef EM_MULTIQUEUE
1679 			if (!drbr_empty(ifp, txr->br))
1680 				em_mq_start_locked(ifp, txr, NULL);
1681 #else
1682 			if (if_sendq_empty(ifp))
1683 				em_start_locked(ifp, txr);
1684 #endif
1685 			EM_TX_UNLOCK(txr);
1686 		}
1687 	}
1688 	EM_CORE_UNLOCK(adapter);
1689 }
1690 
1691 
1692 /*********************************************************************
1693  *
1694  *  Media Ioctl callback
1695  *
1696  *  This routine is called whenever the user queries the status of
1697  *  the interface using ifconfig.
1698  *
1699  **********************************************************************/
1700 static void
1701 em_media_status(if_t ifp, struct ifmediareq *ifmr)
1702 {
1703 	struct adapter *adapter = if_getsoftc(ifp);
1704 	u_char fiber_type = IFM_1000_SX;
1705 
1706 	INIT_DEBUGOUT("em_media_status: begin");
1707 
1708 	EM_CORE_LOCK(adapter);
1709 	em_update_link_status(adapter);
1710 
1711 	ifmr->ifm_status = IFM_AVALID;
1712 	ifmr->ifm_active = IFM_ETHER;
1713 
1714 	if (!adapter->link_active) {
1715 		EM_CORE_UNLOCK(adapter);
1716 		return;
1717 	}
1718 
1719 	ifmr->ifm_status |= IFM_ACTIVE;
1720 
1721 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1722 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1723 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1724 	} else {
1725 		switch (adapter->link_speed) {
1726 		case 10:
1727 			ifmr->ifm_active |= IFM_10_T;
1728 			break;
1729 		case 100:
1730 			ifmr->ifm_active |= IFM_100_TX;
1731 			break;
1732 		case 1000:
1733 			ifmr->ifm_active |= IFM_1000_T;
1734 			break;
1735 		}
1736 		if (adapter->link_duplex == FULL_DUPLEX)
1737 			ifmr->ifm_active |= IFM_FDX;
1738 		else
1739 			ifmr->ifm_active |= IFM_HDX;
1740 	}
1741 	EM_CORE_UNLOCK(adapter);
1742 }
1743 
1744 /*********************************************************************
1745  *
1746  *  Media Ioctl callback
1747  *
1748  *  This routine is called when the user changes speed/duplex using
1749  *  media/mediopt option with ifconfig.
1750  *
1751  **********************************************************************/
1752 static int
1753 em_media_change(if_t ifp)
1754 {
1755 	struct adapter *adapter = if_getsoftc(ifp);
1756 	struct ifmedia  *ifm = &adapter->media;
1757 
1758 	INIT_DEBUGOUT("em_media_change: begin");
1759 
1760 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1761 		return (EINVAL);
1762 
1763 	EM_CORE_LOCK(adapter);
1764 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1765 	case IFM_AUTO:
1766 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1767 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1768 		break;
1769 	case IFM_1000_LX:
1770 	case IFM_1000_SX:
1771 	case IFM_1000_T:
1772 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1773 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1774 		break;
1775 	case IFM_100_TX:
1776 		adapter->hw.mac.autoneg = FALSE;
1777 		adapter->hw.phy.autoneg_advertised = 0;
1778 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1779 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1780 		else
1781 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1782 		break;
1783 	case IFM_10_T:
1784 		adapter->hw.mac.autoneg = FALSE;
1785 		adapter->hw.phy.autoneg_advertised = 0;
1786 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1787 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1788 		else
1789 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1790 		break;
1791 	default:
1792 		device_printf(adapter->dev, "Unsupported media type\n");
1793 	}
1794 
1795 	em_init_locked(adapter);
1796 	EM_CORE_UNLOCK(adapter);
1797 
1798 	return (0);
1799 }
1800 
1801 /*********************************************************************
1802  *
1803  *  This routine maps the mbufs to tx descriptors.
1804  *
1805  *  return 0 on success, positive on failure
1806  **********************************************************************/
1807 
1808 static int
1809 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1810 {
1811 	struct adapter		*adapter = txr->adapter;
1812 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1813 	bus_dmamap_t		map;
1814 	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1815 	struct e1000_tx_desc	*ctxd = NULL;
1816 	struct mbuf		*m_head;
1817 	struct ether_header	*eh;
1818 	struct ip		*ip = NULL;
1819 	struct tcphdr		*tp = NULL;
1820 	u32			txd_upper, txd_lower, txd_used, txd_saved;
1821 	int			ip_off, poff;
1822 	int			nsegs, i, j, first, last = 0;
1823 	int			error, do_tso, tso_desc = 0, remap = 1;
1824 
1825 retry:
1826 	m_head = *m_headp;
1827 	txd_upper = txd_lower = txd_used = txd_saved = 0;
1828 	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1829 	ip_off = poff = 0;
1830 
1831 	/*
1832 	 * Intel recommends entire IP/TCP header length reside in a single
1833 	 * buffer. If multiple descriptors are used to describe the IP and
1834 	 * TCP header, each descriptor should describe one or more
1835 	 * complete headers; descriptors referencing only parts of headers
1836 	 * are not supported. If all layer headers are not coalesced into
1837 	 * a single buffer, each buffer should not cross a 4KB boundary,
1838 	 * or be larger than the maximum read request size.
1839 	 * Controller also requires modifing IP/TCP header to make TSO work
1840 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1841 	 * IP/TCP header into a single buffer to meet the requirement of
1842 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1843 	 * which also has similiar restrictions.
1844 	 */
1845 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1846 		if (do_tso || (m_head->m_next != NULL &&
1847 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1848 			if (M_WRITABLE(*m_headp) == 0) {
1849 				m_head = m_dup(*m_headp, M_NOWAIT);
1850 				m_freem(*m_headp);
1851 				if (m_head == NULL) {
1852 					*m_headp = NULL;
1853 					return (ENOBUFS);
1854 				}
1855 				*m_headp = m_head;
1856 			}
1857 		}
1858 		/*
1859 		 * XXX
1860 		 * Assume IPv4, we don't have TSO/checksum offload support
1861 		 * for IPv6 yet.
1862 		 */
1863 		ip_off = sizeof(struct ether_header);
1864 		m_head = m_pullup(m_head, ip_off);
1865 		if (m_head == NULL) {
1866 			*m_headp = NULL;
1867 			return (ENOBUFS);
1868 		}
1869 		eh = mtod(m_head, struct ether_header *);
1870 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1871 			ip_off = sizeof(struct ether_vlan_header);
1872 			m_head = m_pullup(m_head, ip_off);
1873 			if (m_head == NULL) {
1874 				*m_headp = NULL;
1875 				return (ENOBUFS);
1876 			}
1877 		}
1878 		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1879 		if (m_head == NULL) {
1880 			*m_headp = NULL;
1881 			return (ENOBUFS);
1882 		}
1883 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1884 		poff = ip_off + (ip->ip_hl << 2);
1885 		if (do_tso) {
1886 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1887 			if (m_head == NULL) {
1888 				*m_headp = NULL;
1889 				return (ENOBUFS);
1890 			}
1891 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1892 			/*
1893 			 * TSO workaround:
1894 			 *   pull 4 more bytes of data into it.
1895 			 */
1896 			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1897 			if (m_head == NULL) {
1898 				*m_headp = NULL;
1899 				return (ENOBUFS);
1900 			}
1901 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1902 			ip->ip_len = 0;
1903 			ip->ip_sum = 0;
1904 			/*
1905 			 * The pseudo TCP checksum does not include TCP payload
1906 			 * length so driver should recompute the checksum here
1907 			 * what hardware expect to see. This is adherence of
1908 			 * Microsoft's Large Send specification.
1909 			 */
1910 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1911 			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1912 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1913 		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1914 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1915 			if (m_head == NULL) {
1916 				*m_headp = NULL;
1917 				return (ENOBUFS);
1918 			}
1919 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1920 			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1921 			if (m_head == NULL) {
1922 				*m_headp = NULL;
1923 				return (ENOBUFS);
1924 			}
1925 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1926 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1927 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1928 			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1929 			if (m_head == NULL) {
1930 				*m_headp = NULL;
1931 				return (ENOBUFS);
1932 			}
1933 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1934 		}
1935 		*m_headp = m_head;
1936 	}
1937 
1938 	/*
1939 	 * Map the packet for DMA
1940 	 *
1941 	 * Capture the first descriptor index,
1942 	 * this descriptor will have the index
1943 	 * of the EOP which is the only one that
1944 	 * now gets a DONE bit writeback.
1945 	 */
1946 	first = txr->next_avail_desc;
1947 	tx_buffer = &txr->tx_buffers[first];
1948 	tx_buffer_mapped = tx_buffer;
1949 	map = tx_buffer->map;
1950 
1951 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1952 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1953 
1954 	/*
1955 	 * There are two types of errors we can (try) to handle:
1956 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1957 	 *   out of segments.  Defragment the mbuf chain and try again.
1958 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1959 	 *   at this point in time.  Defer sending and try again later.
1960 	 * All other errors, in particular EINVAL, are fatal and prevent the
1961 	 * mbuf chain from ever going through.  Drop it and report error.
1962 	 */
1963 	if (error == EFBIG && remap) {
1964 		struct mbuf *m;
1965 
1966 		m = m_defrag(*m_headp, M_NOWAIT);
1967 		if (m == NULL) {
1968 			adapter->mbuf_alloc_failed++;
1969 			m_freem(*m_headp);
1970 			*m_headp = NULL;
1971 			return (ENOBUFS);
1972 		}
1973 		*m_headp = m;
1974 
1975 		/* Try it again, but only once */
1976 		remap = 0;
1977 		goto retry;
1978 	} else if (error == ENOMEM) {
1979 		adapter->no_tx_dma_setup++;
1980 		return (error);
1981 	} else if (error != 0) {
1982 		adapter->no_tx_dma_setup++;
1983 		m_freem(*m_headp);
1984 		*m_headp = NULL;
1985 		return (error);
1986 	}
1987 
1988 	/*
1989 	 * TSO Hardware workaround, if this packet is not
1990 	 * TSO, and is only a single descriptor long, and
1991 	 * it follows a TSO burst, then we need to add a
1992 	 * sentinel descriptor to prevent premature writeback.
1993 	 */
1994 	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1995 		if (nsegs == 1)
1996 			tso_desc = TRUE;
1997 		txr->tx_tso = FALSE;
1998 	}
1999 
2000         if (nsegs > (txr->tx_avail - 2)) {
2001                 txr->no_desc_avail++;
2002 		bus_dmamap_unload(txr->txtag, map);
2003 		return (ENOBUFS);
2004         }
2005 	m_head = *m_headp;
2006 
2007 	/* Do hardware assists */
2008 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2009 		em_tso_setup(txr, m_head, ip_off, ip, tp,
2010 		    &txd_upper, &txd_lower);
2011 		/* we need to make a final sentinel transmit desc */
2012 		tso_desc = TRUE;
2013 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2014 		em_transmit_checksum_setup(txr, m_head,
2015 		    ip_off, ip, &txd_upper, &txd_lower);
2016 
2017 	if (m_head->m_flags & M_VLANTAG) {
2018 		/* Set the vlan id. */
2019 		txd_upper |= htole16((if_getvtag(m_head)) << 16);
2020                 /* Tell hardware to add tag */
2021                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2022         }
2023 
2024 	i = txr->next_avail_desc;
2025 
2026 	/* Set up our transmit descriptors */
2027 	for (j = 0; j < nsegs; j++) {
2028 		bus_size_t seg_len;
2029 		bus_addr_t seg_addr;
2030 
2031 		tx_buffer = &txr->tx_buffers[i];
2032 		ctxd = &txr->tx_base[i];
2033 		seg_addr = segs[j].ds_addr;
2034 		seg_len  = segs[j].ds_len;
2035 		/*
2036 		** TSO Workaround:
2037 		** If this is the last descriptor, we want to
2038 		** split it so we have a small final sentinel
2039 		*/
2040 		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2041 			seg_len -= 4;
2042 			ctxd->buffer_addr = htole64(seg_addr);
2043 			ctxd->lower.data = htole32(
2044 			adapter->txd_cmd | txd_lower | seg_len);
2045 			ctxd->upper.data =
2046 			    htole32(txd_upper);
2047 			if (++i == adapter->num_tx_desc)
2048 				i = 0;
2049 			/* Now make the sentinel */
2050 			++txd_used; /* using an extra txd */
2051 			ctxd = &txr->tx_base[i];
2052 			tx_buffer = &txr->tx_buffers[i];
2053 			ctxd->buffer_addr =
2054 			    htole64(seg_addr + seg_len);
2055 			ctxd->lower.data = htole32(
2056 			adapter->txd_cmd | txd_lower | 4);
2057 			ctxd->upper.data =
2058 			    htole32(txd_upper);
2059 			last = i;
2060 			if (++i == adapter->num_tx_desc)
2061 				i = 0;
2062 		} else {
2063 			ctxd->buffer_addr = htole64(seg_addr);
2064 			ctxd->lower.data = htole32(
2065 			adapter->txd_cmd | txd_lower | seg_len);
2066 			ctxd->upper.data =
2067 			    htole32(txd_upper);
2068 			last = i;
2069 			if (++i == adapter->num_tx_desc)
2070 				i = 0;
2071 		}
2072 		tx_buffer->m_head = NULL;
2073 		tx_buffer->next_eop = -1;
2074 	}
2075 
2076 	txr->next_avail_desc = i;
2077 	txr->tx_avail -= nsegs;
2078 	if (tso_desc) /* TSO used an extra for sentinel */
2079 		txr->tx_avail -= txd_used;
2080 
2081         tx_buffer->m_head = m_head;
2082 	/*
2083 	** Here we swap the map so the last descriptor,
2084 	** which gets the completion interrupt has the
2085 	** real map, and the first descriptor gets the
2086 	** unused map from this descriptor.
2087 	*/
2088 	tx_buffer_mapped->map = tx_buffer->map;
2089 	tx_buffer->map = map;
2090         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2091 
2092         /*
2093          * Last Descriptor of Packet
2094 	 * needs End Of Packet (EOP)
2095 	 * and Report Status (RS)
2096          */
2097         ctxd->lower.data |=
2098 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2099 	/*
2100 	 * Keep track in the first buffer which
2101 	 * descriptor will be written back
2102 	 */
2103 	tx_buffer = &txr->tx_buffers[first];
2104 	tx_buffer->next_eop = last;
2105 	/* Update the watchdog time early and often */
2106 	txr->watchdog_time = ticks;
2107 
2108 	/*
2109 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2110 	 * that this frame is available to transmit.
2111 	 */
2112 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2113 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2114 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2115 
2116 	return (0);
2117 }
2118 
2119 static void
2120 em_set_promisc(struct adapter *adapter)
2121 {
2122 	if_t ifp = adapter->ifp;
2123 	u32		reg_rctl;
2124 
2125 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2126 
2127 	if (if_getflags(ifp) & IFF_PROMISC) {
2128 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2129 		/* Turn this on if you want to see bad packets */
2130 		if (em_debug_sbp)
2131 			reg_rctl |= E1000_RCTL_SBP;
2132 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2133 	} else if (if_getflags(ifp) & IFF_ALLMULTI) {
2134 		reg_rctl |= E1000_RCTL_MPE;
2135 		reg_rctl &= ~E1000_RCTL_UPE;
2136 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2137 	}
2138 }
2139 
2140 static void
2141 em_disable_promisc(struct adapter *adapter)
2142 {
2143 	if_t		ifp = adapter->ifp;
2144 	u32		reg_rctl;
2145 	int		mcnt = 0;
2146 
2147 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2148 	reg_rctl &=  (~E1000_RCTL_UPE);
2149 	if (if_getflags(ifp) & IFF_ALLMULTI)
2150 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2151 	else
2152 		mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
2153 	/* Don't disable if in MAX groups */
2154 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2155 		reg_rctl &=  (~E1000_RCTL_MPE);
2156 	reg_rctl &=  (~E1000_RCTL_SBP);
2157 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2158 }
2159 
2160 
2161 /*********************************************************************
2162  *  Multicast Update
2163  *
2164  *  This routine is called whenever multicast address list is updated.
2165  *
2166  **********************************************************************/
2167 
2168 static void
2169 em_set_multi(struct adapter *adapter)
2170 {
2171 	if_t ifp = adapter->ifp;
2172 	u32 reg_rctl = 0;
2173 	u8  *mta; /* Multicast array memory */
2174 	int mcnt = 0;
2175 
2176 	IOCTL_DEBUGOUT("em_set_multi: begin");
2177 
2178 	mta = adapter->mta;
2179 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2180 
2181 	if (adapter->hw.mac.type == e1000_82542 &&
2182 	    adapter->hw.revision_id == E1000_REVISION_2) {
2183 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2184 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2185 			e1000_pci_clear_mwi(&adapter->hw);
2186 		reg_rctl |= E1000_RCTL_RST;
2187 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2188 		msec_delay(5);
2189 	}
2190 
2191 	if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
2192 
2193 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2194 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2195 		reg_rctl |= E1000_RCTL_MPE;
2196 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2197 	} else
2198 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2199 
2200 	if (adapter->hw.mac.type == e1000_82542 &&
2201 	    adapter->hw.revision_id == E1000_REVISION_2) {
2202 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2203 		reg_rctl &= ~E1000_RCTL_RST;
2204 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2205 		msec_delay(5);
2206 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2207 			e1000_pci_set_mwi(&adapter->hw);
2208 	}
2209 }
2210 
2211 
2212 /*********************************************************************
2213  *  Timer routine
2214  *
2215  *  This routine checks for link status and updates statistics.
2216  *
2217  **********************************************************************/
2218 
2219 static void
2220 em_local_timer(void *arg)
2221 {
2222 	struct adapter	*adapter = arg;
2223 	if_t ifp = adapter->ifp;
2224 	struct tx_ring	*txr = adapter->tx_rings;
2225 	struct rx_ring	*rxr = adapter->rx_rings;
2226 	u32		trigger;
2227 
2228 	EM_CORE_LOCK_ASSERT(adapter);
2229 
2230 	em_update_link_status(adapter);
2231 	em_update_stats_counters(adapter);
2232 
2233 	/* Reset LAA into RAR[0] on 82571 */
2234 	if ((adapter->hw.mac.type == e1000_82571) &&
2235 	    e1000_get_laa_state_82571(&adapter->hw))
2236 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2237 
2238 	/* Mask to use in the irq trigger */
2239 	if (adapter->msix_mem)
2240 		trigger = rxr->ims;
2241 	else
2242 		trigger = E1000_ICS_RXDMT0;
2243 
2244 	/*
2245 	** Check on the state of the TX queue(s), this
2246 	** can be done without the lock because its RO
2247 	** and the HUNG state will be static if set.
2248 	*/
2249 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2250 		if ((txr->queue_status == EM_QUEUE_HUNG) &&
2251 		    (adapter->pause_frames == 0))
2252 			goto hung;
2253 		/* Schedule a TX tasklet if needed */
2254 		if (txr->tx_avail <= EM_MAX_SCATTER)
2255 			taskqueue_enqueue(txr->tq, &txr->tx_task);
2256 	}
2257 
2258 	adapter->pause_frames = 0;
2259 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2260 #ifndef DEVICE_POLLING
2261 	/* Trigger an RX interrupt to guarantee mbuf refresh */
2262 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2263 #endif
2264 	return;
2265 hung:
2266 	/* Looks like we're hung */
2267 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2268 	device_printf(adapter->dev,
2269 	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2270 	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2271 	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2272 	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2273 	    "Next TX to Clean = %d\n",
2274 	    txr->me, txr->tx_avail, txr->next_to_clean);
2275 	if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
2276 	adapter->watchdog_events++;
2277 	adapter->pause_frames = 0;
2278 	em_init_locked(adapter);
2279 }
2280 
2281 
2282 static void
2283 em_update_link_status(struct adapter *adapter)
2284 {
2285 	struct e1000_hw *hw = &adapter->hw;
2286 	if_t ifp = adapter->ifp;
2287 	device_t dev = adapter->dev;
2288 	struct tx_ring *txr = adapter->tx_rings;
2289 	u32 link_check = 0;
2290 
2291 	/* Get the cached link value or read phy for real */
2292 	switch (hw->phy.media_type) {
2293 	case e1000_media_type_copper:
2294 		if (hw->mac.get_link_status) {
2295 			/* Do the work to read phy */
2296 			e1000_check_for_link(hw);
2297 			link_check = !hw->mac.get_link_status;
2298 			if (link_check) /* ESB2 fix */
2299 				e1000_cfg_on_link_up(hw);
2300 		} else
2301 			link_check = TRUE;
2302 		break;
2303 	case e1000_media_type_fiber:
2304 		e1000_check_for_link(hw);
2305 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2306                                  E1000_STATUS_LU);
2307 		break;
2308 	case e1000_media_type_internal_serdes:
2309 		e1000_check_for_link(hw);
2310 		link_check = adapter->hw.mac.serdes_has_link;
2311 		break;
2312 	default:
2313 	case e1000_media_type_unknown:
2314 		break;
2315 	}
2316 
2317 	/* Now check for a transition */
2318 	if (link_check && (adapter->link_active == 0)) {
2319 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2320 		    &adapter->link_duplex);
2321 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2322 		if ((adapter->link_speed != SPEED_1000) &&
2323 		    ((hw->mac.type == e1000_82571) ||
2324 		    (hw->mac.type == e1000_82572))) {
2325 			int tarc0;
2326 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2327 			tarc0 &= ~SPEED_MODE_BIT;
2328 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2329 		}
2330 		if (bootverbose)
2331 			device_printf(dev, "Link is up %d Mbps %s\n",
2332 			    adapter->link_speed,
2333 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2334 			    "Full Duplex" : "Half Duplex"));
2335 		adapter->link_active = 1;
2336 		adapter->smartspeed = 0;
2337 		if_setbaudrate(ifp, adapter->link_speed * 1000000);
2338 		if_linkstate_change_drv(ifp, LINK_STATE_UP);
2339 	} else if (!link_check && (adapter->link_active == 1)) {
2340 		if_setbaudrate(ifp, 0);
2341 		adapter->link_speed = 0;
2342 		adapter->link_duplex = 0;
2343 		if (bootverbose)
2344 			device_printf(dev, "Link is Down\n");
2345 		adapter->link_active = 0;
2346 		/* Link down, disable watchdog */
2347 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2348 			txr->queue_status = EM_QUEUE_IDLE;
2349 		if_linkstate_change_drv(ifp, LINK_STATE_DOWN);
2350 	}
2351 }
2352 
2353 /*********************************************************************
2354  *
2355  *  This routine disables all traffic on the adapter by issuing a
2356  *  global reset on the MAC and deallocates TX/RX buffers.
2357  *
2358  *  This routine should always be called with BOTH the CORE
2359  *  and TX locks.
2360  **********************************************************************/
2361 
2362 static void
2363 em_stop(void *arg)
2364 {
2365 	struct adapter	*adapter = arg;
2366 	if_t ifp = adapter->ifp;
2367 	struct tx_ring	*txr = adapter->tx_rings;
2368 
2369 	EM_CORE_LOCK_ASSERT(adapter);
2370 
2371 	INIT_DEBUGOUT("em_stop: begin");
2372 
2373 	em_disable_intr(adapter);
2374 	callout_stop(&adapter->timer);
2375 
2376 	/* Tell the stack that the interface is no longer active */
2377 	if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2378 
2379         /* Unarm watchdog timer. */
2380 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2381 		EM_TX_LOCK(txr);
2382 		txr->queue_status = EM_QUEUE_IDLE;
2383 		EM_TX_UNLOCK(txr);
2384 	}
2385 
2386 	e1000_reset_hw(&adapter->hw);
2387 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2388 
2389 	e1000_led_off(&adapter->hw);
2390 	e1000_cleanup_led(&adapter->hw);
2391 }
2392 
2393 
2394 /*********************************************************************
2395  *
2396  *  Determine hardware revision.
2397  *
2398  **********************************************************************/
2399 static void
2400 em_identify_hardware(struct adapter *adapter)
2401 {
2402 	device_t dev = adapter->dev;
2403 
2404 	/* Make sure our PCI config space has the necessary stuff set */
2405 	pci_enable_busmaster(dev);
2406 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2407 
2408 	/* Save off the information about this board */
2409 	adapter->hw.vendor_id = pci_get_vendor(dev);
2410 	adapter->hw.device_id = pci_get_device(dev);
2411 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2412 	adapter->hw.subsystem_vendor_id =
2413 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2414 	adapter->hw.subsystem_device_id =
2415 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2416 
2417 	/* Do Shared Code Init and Setup */
2418 	if (e1000_set_mac_type(&adapter->hw)) {
2419 		device_printf(dev, "Setup init failure\n");
2420 		return;
2421 	}
2422 }
2423 
2424 static int
2425 em_allocate_pci_resources(struct adapter *adapter)
2426 {
2427 	device_t	dev = adapter->dev;
2428 	int		rid;
2429 
2430 	rid = PCIR_BAR(0);
2431 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2432 	    &rid, RF_ACTIVE);
2433 	if (adapter->memory == NULL) {
2434 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2435 		return (ENXIO);
2436 	}
2437 	adapter->osdep.mem_bus_space_tag =
2438 	    rman_get_bustag(adapter->memory);
2439 	adapter->osdep.mem_bus_space_handle =
2440 	    rman_get_bushandle(adapter->memory);
2441 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2442 
2443 	/* Default to a single queue */
2444 	adapter->num_queues = 1;
2445 
2446 	/*
2447 	 * Setup MSI/X or MSI if PCI Express
2448 	 */
2449 	adapter->msix = em_setup_msix(adapter);
2450 
2451 	adapter->hw.back = &adapter->osdep;
2452 
2453 	return (0);
2454 }
2455 
2456 /*********************************************************************
2457  *
2458  *  Setup the Legacy or MSI Interrupt handler
2459  *
2460  **********************************************************************/
2461 int
2462 em_allocate_legacy(struct adapter *adapter)
2463 {
2464 	device_t dev = adapter->dev;
2465 	struct tx_ring	*txr = adapter->tx_rings;
2466 	int error, rid = 0;
2467 
2468 	/* Manually turn off all interrupts */
2469 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2470 
2471 	if (adapter->msix == 1) /* using MSI */
2472 		rid = 1;
2473 	/* We allocate a single interrupt resource */
2474 	adapter->res = bus_alloc_resource_any(dev,
2475 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2476 	if (adapter->res == NULL) {
2477 		device_printf(dev, "Unable to allocate bus resource: "
2478 		    "interrupt\n");
2479 		return (ENXIO);
2480 	}
2481 
2482 	/*
2483 	 * Allocate a fast interrupt and the associated
2484 	 * deferred processing contexts.
2485 	 */
2486 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2487 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2488 	    taskqueue_thread_enqueue, &adapter->tq);
2489 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2490 	    device_get_nameunit(adapter->dev));
2491 	/* Use a TX only tasklet for local timer */
2492 	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2493 	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2494 	    taskqueue_thread_enqueue, &txr->tq);
2495 	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2496 	    device_get_nameunit(adapter->dev));
2497 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2498 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2499 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2500 		device_printf(dev, "Failed to register fast interrupt "
2501 			    "handler: %d\n", error);
2502 		taskqueue_free(adapter->tq);
2503 		adapter->tq = NULL;
2504 		return (error);
2505 	}
2506 
2507 	return (0);
2508 }
2509 
2510 /*********************************************************************
2511  *
2512  *  Setup the MSIX Interrupt handlers
2513  *   This is not really Multiqueue, rather
2514  *   its just seperate interrupt vectors
2515  *   for TX, RX, and Link.
2516  *
2517  **********************************************************************/
2518 int
2519 em_allocate_msix(struct adapter *adapter)
2520 {
2521 	device_t	dev = adapter->dev;
2522 	struct		tx_ring *txr = adapter->tx_rings;
2523 	struct		rx_ring *rxr = adapter->rx_rings;
2524 	int		error, rid, vector = 0;
2525 
2526 
2527 	/* Make sure all interrupts are disabled */
2528 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2529 
2530 	/* First set up ring resources */
2531 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2532 
2533 		/* RX ring */
2534 		rid = vector + 1;
2535 
2536 		rxr->res = bus_alloc_resource_any(dev,
2537 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2538 		if (rxr->res == NULL) {
2539 			device_printf(dev,
2540 			    "Unable to allocate bus resource: "
2541 			    "RX MSIX Interrupt %d\n", i);
2542 			return (ENXIO);
2543 		}
2544 		if ((error = bus_setup_intr(dev, rxr->res,
2545 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2546 		    rxr, &rxr->tag)) != 0) {
2547 			device_printf(dev, "Failed to register RX handler");
2548 			return (error);
2549 		}
2550 #if __FreeBSD_version >= 800504
2551 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2552 #endif
2553 		rxr->msix = vector++; /* NOTE increment vector for TX */
2554 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2555 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2556 		    taskqueue_thread_enqueue, &rxr->tq);
2557 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2558 		    device_get_nameunit(adapter->dev));
2559 		/*
2560 		** Set the bit to enable interrupt
2561 		** in E1000_IMS -- bits 20 and 21
2562 		** are for RX0 and RX1, note this has
2563 		** NOTHING to do with the MSIX vector
2564 		*/
2565 		rxr->ims = 1 << (20 + i);
2566 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2567 
2568 		/* TX ring */
2569 		rid = vector + 1;
2570 		txr->res = bus_alloc_resource_any(dev,
2571 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2572 		if (txr->res == NULL) {
2573 			device_printf(dev,
2574 			    "Unable to allocate bus resource: "
2575 			    "TX MSIX Interrupt %d\n", i);
2576 			return (ENXIO);
2577 		}
2578 		if ((error = bus_setup_intr(dev, txr->res,
2579 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2580 		    txr, &txr->tag)) != 0) {
2581 			device_printf(dev, "Failed to register TX handler");
2582 			return (error);
2583 		}
2584 #if __FreeBSD_version >= 800504
2585 		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2586 #endif
2587 		txr->msix = vector++; /* Increment vector for next pass */
2588 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2589 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2590 		    taskqueue_thread_enqueue, &txr->tq);
2591 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2592 		    device_get_nameunit(adapter->dev));
2593 		/*
2594 		** Set the bit to enable interrupt
2595 		** in E1000_IMS -- bits 22 and 23
2596 		** are for TX0 and TX1, note this has
2597 		** NOTHING to do with the MSIX vector
2598 		*/
2599 		txr->ims = 1 << (22 + i);
2600 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2601 	}
2602 
2603 	/* Link interrupt */
2604 	++rid;
2605 	adapter->res = bus_alloc_resource_any(dev,
2606 	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2607 	if (!adapter->res) {
2608 		device_printf(dev,"Unable to allocate "
2609 		    "bus resource: Link interrupt [%d]\n", rid);
2610 		return (ENXIO);
2611         }
2612 	/* Set the link handler function */
2613 	error = bus_setup_intr(dev, adapter->res,
2614 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2615 	    em_msix_link, adapter, &adapter->tag);
2616 	if (error) {
2617 		adapter->res = NULL;
2618 		device_printf(dev, "Failed to register LINK handler");
2619 		return (error);
2620 	}
2621 #if __FreeBSD_version >= 800504
2622 		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2623 #endif
2624 	adapter->linkvec = vector;
2625 	adapter->ivars |=  (8 | vector) << 16;
2626 	adapter->ivars |= 0x80000000;
2627 
2628 	return (0);
2629 }
2630 
2631 
2632 static void
2633 em_free_pci_resources(struct adapter *adapter)
2634 {
2635 	device_t	dev = adapter->dev;
2636 	struct tx_ring	*txr;
2637 	struct rx_ring	*rxr;
2638 	int		rid;
2639 
2640 
2641 	/*
2642 	** Release all the queue interrupt resources:
2643 	*/
2644 	for (int i = 0; i < adapter->num_queues; i++) {
2645 		txr = &adapter->tx_rings[i];
2646 		rxr = &adapter->rx_rings[i];
2647 		/* an early abort? */
2648 		if ((txr == NULL) || (rxr == NULL))
2649 			break;
2650 		rid = txr->msix +1;
2651 		if (txr->tag != NULL) {
2652 			bus_teardown_intr(dev, txr->res, txr->tag);
2653 			txr->tag = NULL;
2654 		}
2655 		if (txr->res != NULL)
2656 			bus_release_resource(dev, SYS_RES_IRQ,
2657 			    rid, txr->res);
2658 		rid = rxr->msix +1;
2659 		if (rxr->tag != NULL) {
2660 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2661 			rxr->tag = NULL;
2662 		}
2663 		if (rxr->res != NULL)
2664 			bus_release_resource(dev, SYS_RES_IRQ,
2665 			    rid, rxr->res);
2666 	}
2667 
2668         if (adapter->linkvec) /* we are doing MSIX */
2669                 rid = adapter->linkvec + 1;
2670         else
2671                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2672 
2673 	if (adapter->tag != NULL) {
2674 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2675 		adapter->tag = NULL;
2676 	}
2677 
2678 	if (adapter->res != NULL)
2679 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2680 
2681 
2682 	if (adapter->msix)
2683 		pci_release_msi(dev);
2684 
2685 	if (adapter->msix_mem != NULL)
2686 		bus_release_resource(dev, SYS_RES_MEMORY,
2687 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2688 
2689 	if (adapter->memory != NULL)
2690 		bus_release_resource(dev, SYS_RES_MEMORY,
2691 		    PCIR_BAR(0), adapter->memory);
2692 
2693 	if (adapter->flash != NULL)
2694 		bus_release_resource(dev, SYS_RES_MEMORY,
2695 		    EM_FLASH, adapter->flash);
2696 }
2697 
2698 /*
2699  * Setup MSI or MSI/X
2700  */
2701 static int
2702 em_setup_msix(struct adapter *adapter)
2703 {
2704 	device_t dev = adapter->dev;
2705 	int val;
2706 
2707 	/*
2708 	** Setup MSI/X for Hartwell: tests have shown
2709 	** use of two queues to be unstable, and to
2710 	** provide no great gain anyway, so we simply
2711 	** seperate the interrupts and use a single queue.
2712 	*/
2713 	if ((adapter->hw.mac.type == e1000_82574) &&
2714 	    (em_enable_msix == TRUE)) {
2715 		/* Map the MSIX BAR */
2716 		int rid = PCIR_BAR(EM_MSIX_BAR);
2717 		adapter->msix_mem = bus_alloc_resource_any(dev,
2718 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2719        		if (adapter->msix_mem == NULL) {
2720 			/* May not be enabled */
2721                		device_printf(adapter->dev,
2722 			    "Unable to map MSIX table \n");
2723 			goto msi;
2724        		}
2725 		val = pci_msix_count(dev);
2726 		/* We only need/want 3 vectors */
2727 		if (val >= 3)
2728 			val = 3;
2729 		else {
2730                		device_printf(adapter->dev,
2731 			    "MSIX: insufficient vectors, using MSI\n");
2732 			goto msi;
2733 		}
2734 
2735 		if ((pci_alloc_msix(dev, &val) == 0) && (val == 3)) {
2736 			device_printf(adapter->dev,
2737 			    "Using MSIX interrupts "
2738 			    "with %d vectors\n", val);
2739 			return (val);
2740 		}
2741 
2742 		/*
2743 		** If MSIX alloc failed or provided us with
2744 		** less than needed, free and fall through to MSI
2745 		*/
2746 		pci_release_msi(dev);
2747 	}
2748 msi:
2749 	if (adapter->msix_mem != NULL) {
2750 		bus_release_resource(dev, SYS_RES_MEMORY,
2751 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2752 		adapter->msix_mem = NULL;
2753 	}
2754        	val = 1;
2755        	if (pci_alloc_msi(dev, &val) == 0) {
2756                	device_printf(adapter->dev,"Using an MSI interrupt\n");
2757 		return (val);
2758 	}
2759 	/* Should only happen due to manual configuration */
2760 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2761 	return (0);
2762 }
2763 
2764 
2765 /*********************************************************************
2766  *
2767  *  Initialize the hardware to a configuration
2768  *  as specified by the adapter structure.
2769  *
2770  **********************************************************************/
2771 static void
2772 em_reset(struct adapter *adapter)
2773 {
2774 	device_t	dev = adapter->dev;
2775 	if_t ifp = adapter->ifp;
2776 	struct e1000_hw	*hw = &adapter->hw;
2777 	u16		rx_buffer_size;
2778 	u32		pba;
2779 
2780 	INIT_DEBUGOUT("em_reset: begin");
2781 
2782 	/* Set up smart power down as default off on newer adapters. */
2783 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2784 	    hw->mac.type == e1000_82572)) {
2785 		u16 phy_tmp = 0;
2786 
2787 		/* Speed up time to link by disabling smart power down. */
2788 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2789 		phy_tmp &= ~IGP02E1000_PM_SPD;
2790 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2791 	}
2792 
2793 	/*
2794 	 * Packet Buffer Allocation (PBA)
2795 	 * Writing PBA sets the receive portion of the buffer
2796 	 * the remainder is used for the transmit buffer.
2797 	 */
2798 	switch (hw->mac.type) {
2799 	/* Total Packet Buffer on these is 48K */
2800 	case e1000_82571:
2801 	case e1000_82572:
2802 	case e1000_80003es2lan:
2803 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2804 		break;
2805 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2806 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2807 		break;
2808 	case e1000_82574:
2809 	case e1000_82583:
2810 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2811 		break;
2812 	case e1000_ich8lan:
2813 		pba = E1000_PBA_8K;
2814 		break;
2815 	case e1000_ich9lan:
2816 	case e1000_ich10lan:
2817 		/* Boost Receive side for jumbo frames */
2818 		if (adapter->hw.mac.max_frame_size > 4096)
2819 			pba = E1000_PBA_14K;
2820 		else
2821 			pba = E1000_PBA_10K;
2822 		break;
2823 	case e1000_pchlan:
2824 	case e1000_pch2lan:
2825 	case e1000_pch_lpt:
2826 		pba = E1000_PBA_26K;
2827 		break;
2828 	default:
2829 		if (adapter->hw.mac.max_frame_size > 8192)
2830 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2831 		else
2832 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2833 	}
2834 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2835 
2836 	/*
2837 	 * These parameters control the automatic generation (Tx) and
2838 	 * response (Rx) to Ethernet PAUSE frames.
2839 	 * - High water mark should allow for at least two frames to be
2840 	 *   received after sending an XOFF.
2841 	 * - Low water mark works best when it is very near the high water mark.
2842 	 *   This allows the receiver to restart by sending XON when it has
2843 	 *   drained a bit. Here we use an arbitary value of 1500 which will
2844 	 *   restart after one full frame is pulled from the buffer. There
2845 	 *   could be several smaller frames in the buffer and if so they will
2846 	 *   not trigger the XON until their total number reduces the buffer
2847 	 *   by 1500.
2848 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2849 	 */
2850 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2851 	hw->fc.high_water = rx_buffer_size -
2852 	    roundup2(adapter->hw.mac.max_frame_size, 1024);
2853 	hw->fc.low_water = hw->fc.high_water - 1500;
2854 
2855 	if (adapter->fc) /* locally set flow control value? */
2856 		hw->fc.requested_mode = adapter->fc;
2857 	else
2858 		hw->fc.requested_mode = e1000_fc_full;
2859 
2860 	if (hw->mac.type == e1000_80003es2lan)
2861 		hw->fc.pause_time = 0xFFFF;
2862 	else
2863 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2864 
2865 	hw->fc.send_xon = TRUE;
2866 
2867 	/* Device specific overrides/settings */
2868 	switch (hw->mac.type) {
2869 	case e1000_pchlan:
2870 		/* Workaround: no TX flow ctrl for PCH */
2871                 hw->fc.requested_mode = e1000_fc_rx_pause;
2872 		hw->fc.pause_time = 0xFFFF; /* override */
2873 		if (if_getmtu(ifp) > ETHERMTU) {
2874 			hw->fc.high_water = 0x3500;
2875 			hw->fc.low_water = 0x1500;
2876 		} else {
2877 			hw->fc.high_water = 0x5000;
2878 			hw->fc.low_water = 0x3000;
2879 		}
2880 		hw->fc.refresh_time = 0x1000;
2881 		break;
2882 	case e1000_pch2lan:
2883 	case e1000_pch_lpt:
2884 		hw->fc.high_water = 0x5C20;
2885 		hw->fc.low_water = 0x5048;
2886 		hw->fc.pause_time = 0x0650;
2887 		hw->fc.refresh_time = 0x0400;
2888 		/* Jumbos need adjusted PBA */
2889 		if (if_getmtu(ifp) > ETHERMTU)
2890 			E1000_WRITE_REG(hw, E1000_PBA, 12);
2891 		else
2892 			E1000_WRITE_REG(hw, E1000_PBA, 26);
2893 		break;
2894         case e1000_ich9lan:
2895         case e1000_ich10lan:
2896 		if (if_getmtu(ifp) > ETHERMTU) {
2897 			hw->fc.high_water = 0x2800;
2898 			hw->fc.low_water = hw->fc.high_water - 8;
2899 			break;
2900 		}
2901 		/* else fall thru */
2902 	default:
2903 		if (hw->mac.type == e1000_80003es2lan)
2904 			hw->fc.pause_time = 0xFFFF;
2905 		break;
2906 	}
2907 
2908 	/* Issue a global reset */
2909 	e1000_reset_hw(hw);
2910 	E1000_WRITE_REG(hw, E1000_WUC, 0);
2911 	em_disable_aspm(adapter);
2912 	/* and a re-init */
2913 	if (e1000_init_hw(hw) < 0) {
2914 		device_printf(dev, "Hardware Initialization Failed\n");
2915 		return;
2916 	}
2917 
2918 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2919 	e1000_get_phy_info(hw);
2920 	e1000_check_for_link(hw);
2921 	return;
2922 }
2923 
2924 /*********************************************************************
2925  *
2926  *  Setup networking device structure and register an interface.
2927  *
2928  **********************************************************************/
2929 static int
2930 em_setup_interface(device_t dev, struct adapter *adapter)
2931 {
2932 	if_t ifp;
2933 
2934 	INIT_DEBUGOUT("em_setup_interface: begin");
2935 
2936 	ifp = adapter->ifp = if_gethandle(IFT_ETHER);
2937 	if (ifp == 0) {
2938 		device_printf(dev, "can not allocate ifnet structure\n");
2939 		return (-1);
2940 	}
2941 	if_initname_drv(ifp, device_get_name(dev), device_get_unit(dev));
2942 	if_setdev(ifp, dev);
2943 	if_setinitfn(ifp, em_init);
2944 	if_setsoftc(ifp, adapter);
2945 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
2946 	if_setioctlfn(ifp, em_ioctl);
2947 #ifdef EM_MULTIQUEUE
2948 	/* Multiqueue stack interface */
2949 	if_settransmitfn(ifp, em_mq_start);
2950 	if_setqflushfn(ifp, em_qflush);
2951 #else
2952 	if_setstartfn(ifp, em_start);
2953 	if_setsendqlen(ifp, adapter->num_tx_desc - 1);
2954 	if_setsendqready(ifp);
2955 #endif
2956 
2957 	ether_ifattach_drv(ifp, adapter->hw.mac.addr);
2958 
2959 	if_setcapabilities(ifp, 0);
2960 	if_setcapenable(ifp, 0);
2961 
2962 
2963 	if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM |
2964 	    IFCAP_TSO4, 0);
2965 	/*
2966 	 * Tell the upper layer(s) we
2967 	 * support full VLAN capability
2968 	 */
2969 	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
2970 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
2971 	    IFCAP_VLAN_MTU, 0);
2972 	if_setcapenable(ifp, if_getcapabilities(ifp));
2973 
2974 	/*
2975 	** Don't turn this on by default, if vlans are
2976 	** created on another pseudo device (eg. lagg)
2977 	** then vlan events are not passed thru, breaking
2978 	** operation, but with HW FILTER off it works. If
2979 	** using vlans directly on the em driver you can
2980 	** enable this and get full hardware tag filtering.
2981 	*/
2982 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
2983 
2984 #ifdef DEVICE_POLLING
2985 	if_setcapabilitiesbit(ifp, IFCAP_POLLING,0);
2986 #endif
2987 
2988 	/* Enable only WOL MAGIC by default */
2989 	if (adapter->wol) {
2990 		if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
2991 		if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
2992 	}
2993 
2994 	/*
2995 	 * Specify the media types supported by this adapter and register
2996 	 * callbacks to update media and link information
2997 	 */
2998 	ifmedia_init_drv(&adapter->media, IFM_IMASK,
2999 	    em_media_change, em_media_status);
3000 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3001 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3002 		u_char fiber_type = IFM_1000_SX;	/* default type */
3003 
3004 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3005 			    0, NULL);
3006 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3007 	} else {
3008 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3009 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3010 			    0, NULL);
3011 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3012 			    0, NULL);
3013 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3014 			    0, NULL);
3015 		if (adapter->hw.phy.type != e1000_phy_ife) {
3016 			ifmedia_add(&adapter->media,
3017 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3018 			ifmedia_add(&adapter->media,
3019 				IFM_ETHER | IFM_1000_T, 0, NULL);
3020 		}
3021 	}
3022 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3023 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3024 	return (0);
3025 }
3026 
3027 
3028 /*
3029  * Manage DMA'able memory.
3030  */
3031 static void
3032 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3033 {
3034 	if (error)
3035 		return;
3036 	*(bus_addr_t *) arg = segs[0].ds_addr;
3037 }
3038 
3039 static int
3040 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3041         struct em_dma_alloc *dma, int mapflags)
3042 {
3043 	int error;
3044 
3045 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3046 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3047 				BUS_SPACE_MAXADDR,	/* lowaddr */
3048 				BUS_SPACE_MAXADDR,	/* highaddr */
3049 				NULL, NULL,		/* filter, filterarg */
3050 				size,			/* maxsize */
3051 				1,			/* nsegments */
3052 				size,			/* maxsegsize */
3053 				0,			/* flags */
3054 				NULL,			/* lockfunc */
3055 				NULL,			/* lockarg */
3056 				&dma->dma_tag);
3057 	if (error) {
3058 		device_printf(adapter->dev,
3059 		    "%s: bus_dma_tag_create failed: %d\n",
3060 		    __func__, error);
3061 		goto fail_0;
3062 	}
3063 
3064 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3065 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3066 	if (error) {
3067 		device_printf(adapter->dev,
3068 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3069 		    __func__, (uintmax_t)size, error);
3070 		goto fail_2;
3071 	}
3072 
3073 	dma->dma_paddr = 0;
3074 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3075 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3076 	if (error || dma->dma_paddr == 0) {
3077 		device_printf(adapter->dev,
3078 		    "%s: bus_dmamap_load failed: %d\n",
3079 		    __func__, error);
3080 		goto fail_3;
3081 	}
3082 
3083 	return (0);
3084 
3085 fail_3:
3086 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3087 fail_2:
3088 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3089 	bus_dma_tag_destroy(dma->dma_tag);
3090 fail_0:
3091 	dma->dma_map = NULL;
3092 	dma->dma_tag = NULL;
3093 
3094 	return (error);
3095 }
3096 
3097 static void
3098 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3099 {
3100 	if (dma->dma_tag == NULL)
3101 		return;
3102 	if (dma->dma_map != NULL) {
3103 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3104 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3105 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3106 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3107 		dma->dma_map = NULL;
3108 	}
3109 	bus_dma_tag_destroy(dma->dma_tag);
3110 	dma->dma_tag = NULL;
3111 }
3112 
3113 
3114 /*********************************************************************
3115  *
3116  *  Allocate memory for the transmit and receive rings, and then
3117  *  the descriptors associated with each, called only once at attach.
3118  *
3119  **********************************************************************/
3120 static int
3121 em_allocate_queues(struct adapter *adapter)
3122 {
3123 	device_t		dev = adapter->dev;
3124 	struct tx_ring		*txr = NULL;
3125 	struct rx_ring		*rxr = NULL;
3126 	int rsize, tsize, error = E1000_SUCCESS;
3127 	int txconf = 0, rxconf = 0;
3128 
3129 
3130 	/* Allocate the TX ring struct memory */
3131 	if (!(adapter->tx_rings =
3132 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3133 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3134 		device_printf(dev, "Unable to allocate TX ring memory\n");
3135 		error = ENOMEM;
3136 		goto fail;
3137 	}
3138 
3139 	/* Now allocate the RX */
3140 	if (!(adapter->rx_rings =
3141 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3142 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3143 		device_printf(dev, "Unable to allocate RX ring memory\n");
3144 		error = ENOMEM;
3145 		goto rx_fail;
3146 	}
3147 
3148 	tsize = roundup2(adapter->num_tx_desc *
3149 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3150 	/*
3151 	 * Now set up the TX queues, txconf is needed to handle the
3152 	 * possibility that things fail midcourse and we need to
3153 	 * undo memory gracefully
3154 	 */
3155 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3156 		/* Set up some basics */
3157 		txr = &adapter->tx_rings[i];
3158 		txr->adapter = adapter;
3159 		txr->me = i;
3160 
3161 		/* Initialize the TX lock */
3162 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3163 		    device_get_nameunit(dev), txr->me);
3164 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3165 
3166 		if (em_dma_malloc(adapter, tsize,
3167 			&txr->txdma, BUS_DMA_NOWAIT)) {
3168 			device_printf(dev,
3169 			    "Unable to allocate TX Descriptor memory\n");
3170 			error = ENOMEM;
3171 			goto err_tx_desc;
3172 		}
3173 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3174 		bzero((void *)txr->tx_base, tsize);
3175 
3176         	if (em_allocate_transmit_buffers(txr)) {
3177 			device_printf(dev,
3178 			    "Critical Failure setting up transmit buffers\n");
3179 			error = ENOMEM;
3180 			goto err_tx_desc;
3181         	}
3182 #if __FreeBSD_version >= 800000
3183 		/* Allocate a buf ring */
3184 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3185 		    M_WAITOK, &txr->tx_mtx);
3186 #endif
3187 	}
3188 
3189 	/*
3190 	 * Next the RX queues...
3191 	 */
3192 	rsize = roundup2(adapter->num_rx_desc *
3193 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3194 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3195 		rxr = &adapter->rx_rings[i];
3196 		rxr->adapter = adapter;
3197 		rxr->me = i;
3198 
3199 		/* Initialize the RX lock */
3200 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3201 		    device_get_nameunit(dev), txr->me);
3202 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3203 
3204 		if (em_dma_malloc(adapter, rsize,
3205 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3206 			device_printf(dev,
3207 			    "Unable to allocate RxDescriptor memory\n");
3208 			error = ENOMEM;
3209 			goto err_rx_desc;
3210 		}
3211 		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3212 		bzero((void *)rxr->rx_base, rsize);
3213 
3214         	/* Allocate receive buffers for the ring*/
3215 		if (em_allocate_receive_buffers(rxr)) {
3216 			device_printf(dev,
3217 			    "Critical Failure setting up receive buffers\n");
3218 			error = ENOMEM;
3219 			goto err_rx_desc;
3220 		}
3221 	}
3222 
3223 	return (0);
3224 
3225 err_rx_desc:
3226 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3227 		em_dma_free(adapter, &rxr->rxdma);
3228 err_tx_desc:
3229 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3230 		em_dma_free(adapter, &txr->txdma);
3231 	free(adapter->rx_rings, M_DEVBUF);
3232 rx_fail:
3233 #if __FreeBSD_version >= 800000
3234 	buf_ring_free(txr->br, M_DEVBUF);
3235 #endif
3236 	free(adapter->tx_rings, M_DEVBUF);
3237 fail:
3238 	return (error);
3239 }
3240 
3241 
3242 /*********************************************************************
3243  *
3244  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3245  *  the information needed to transmit a packet on the wire. This is
3246  *  called only once at attach, setup is done every reset.
3247  *
3248  **********************************************************************/
3249 static int
3250 em_allocate_transmit_buffers(struct tx_ring *txr)
3251 {
3252 	struct adapter *adapter = txr->adapter;
3253 	device_t dev = adapter->dev;
3254 	struct em_buffer *txbuf;
3255 	int error, i;
3256 
3257 	/*
3258 	 * Setup DMA descriptor areas.
3259 	 */
3260 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3261 			       1, 0,			/* alignment, bounds */
3262 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3263 			       BUS_SPACE_MAXADDR,	/* highaddr */
3264 			       NULL, NULL,		/* filter, filterarg */
3265 			       EM_TSO_SIZE,		/* maxsize */
3266 			       EM_MAX_SCATTER,		/* nsegments */
3267 			       PAGE_SIZE,		/* maxsegsize */
3268 			       0,			/* flags */
3269 			       NULL,			/* lockfunc */
3270 			       NULL,			/* lockfuncarg */
3271 			       &txr->txtag))) {
3272 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3273 		goto fail;
3274 	}
3275 
3276 	if (!(txr->tx_buffers =
3277 	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3278 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3279 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3280 		error = ENOMEM;
3281 		goto fail;
3282 	}
3283 
3284         /* Create the descriptor buffer dma maps */
3285 	txbuf = txr->tx_buffers;
3286 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3287 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3288 		if (error != 0) {
3289 			device_printf(dev, "Unable to create TX DMA map\n");
3290 			goto fail;
3291 		}
3292 	}
3293 
3294 	return 0;
3295 fail:
3296 	/* We free all, it handles case where we are in the middle */
3297 	em_free_transmit_structures(adapter);
3298 	return (error);
3299 }
3300 
3301 /*********************************************************************
3302  *
3303  *  Initialize a transmit ring.
3304  *
3305  **********************************************************************/
3306 static void
3307 em_setup_transmit_ring(struct tx_ring *txr)
3308 {
3309 	struct adapter *adapter = txr->adapter;
3310 	struct em_buffer *txbuf;
3311 	int i;
3312 #ifdef DEV_NETMAP
3313 	struct netmap_slot *slot;
3314 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
3315 #endif /* DEV_NETMAP */
3316 
3317 	/* Clear the old descriptor contents */
3318 	EM_TX_LOCK(txr);
3319 #ifdef DEV_NETMAP
3320 	slot = netmap_reset(na, NR_TX, txr->me, 0);
3321 #endif /* DEV_NETMAP */
3322 
3323 	bzero((void *)txr->tx_base,
3324 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3325 	/* Reset indices */
3326 	txr->next_avail_desc = 0;
3327 	txr->next_to_clean = 0;
3328 
3329 	/* Free any existing tx buffers. */
3330         txbuf = txr->tx_buffers;
3331 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3332 		if (txbuf->m_head != NULL) {
3333 			bus_dmamap_sync(txr->txtag, txbuf->map,
3334 			    BUS_DMASYNC_POSTWRITE);
3335 			bus_dmamap_unload(txr->txtag, txbuf->map);
3336 			m_freem(txbuf->m_head);
3337 			txbuf->m_head = NULL;
3338 		}
3339 #ifdef DEV_NETMAP
3340 		if (slot) {
3341 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3342 			uint64_t paddr;
3343 			void *addr;
3344 
3345 			addr = PNMB(slot + si, &paddr);
3346 			txr->tx_base[i].buffer_addr = htole64(paddr);
3347 			/* reload the map for netmap mode */
3348 			netmap_load_map(txr->txtag, txbuf->map, addr);
3349 		}
3350 #endif /* DEV_NETMAP */
3351 
3352 		/* clear the watch index */
3353 		txbuf->next_eop = -1;
3354         }
3355 
3356 	/* Set number of descriptors available */
3357 	txr->tx_avail = adapter->num_tx_desc;
3358 	txr->queue_status = EM_QUEUE_IDLE;
3359 
3360 	/* Clear checksum offload context. */
3361 	txr->last_hw_offload = 0;
3362 	txr->last_hw_ipcss = 0;
3363 	txr->last_hw_ipcso = 0;
3364 	txr->last_hw_tucss = 0;
3365 	txr->last_hw_tucso = 0;
3366 
3367 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3368 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3369 	EM_TX_UNLOCK(txr);
3370 }
3371 
3372 /*********************************************************************
3373  *
3374  *  Initialize all transmit rings.
3375  *
3376  **********************************************************************/
3377 static void
3378 em_setup_transmit_structures(struct adapter *adapter)
3379 {
3380 	struct tx_ring *txr = adapter->tx_rings;
3381 
3382 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3383 		em_setup_transmit_ring(txr);
3384 
3385 	return;
3386 }
3387 
3388 /*********************************************************************
3389  *
3390  *  Enable transmit unit.
3391  *
3392  **********************************************************************/
3393 static void
3394 em_initialize_transmit_unit(struct adapter *adapter)
3395 {
3396 	struct tx_ring	*txr = adapter->tx_rings;
3397 	struct e1000_hw	*hw = &adapter->hw;
3398 	u32	tctl, tarc, tipg = 0;
3399 
3400 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3401 
3402 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3403 		u64 bus_addr = txr->txdma.dma_paddr;
3404 		/* Base and Len of TX Ring */
3405 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3406 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3407 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3408 	    	    (u32)(bus_addr >> 32));
3409 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3410 	    	    (u32)bus_addr);
3411 		/* Init the HEAD/TAIL indices */
3412 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3413 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3414 
3415 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3416 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3417 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3418 
3419 		txr->queue_status = EM_QUEUE_IDLE;
3420 	}
3421 
3422 	/* Set the default values for the Tx Inter Packet Gap timer */
3423 	switch (adapter->hw.mac.type) {
3424 	case e1000_80003es2lan:
3425 		tipg = DEFAULT_82543_TIPG_IPGR1;
3426 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3427 		    E1000_TIPG_IPGR2_SHIFT;
3428 		break;
3429 	default:
3430 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3431 		    (adapter->hw.phy.media_type ==
3432 		    e1000_media_type_internal_serdes))
3433 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3434 		else
3435 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3436 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3437 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3438 	}
3439 
3440 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3441 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3442 
3443 	if(adapter->hw.mac.type >= e1000_82540)
3444 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3445 		    adapter->tx_abs_int_delay.value);
3446 
3447 	if ((adapter->hw.mac.type == e1000_82571) ||
3448 	    (adapter->hw.mac.type == e1000_82572)) {
3449 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3450 		tarc |= SPEED_MODE_BIT;
3451 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3452 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3453 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3454 		tarc |= 1;
3455 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3456 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3457 		tarc |= 1;
3458 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3459 	}
3460 
3461 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3462 	if (adapter->tx_int_delay.value > 0)
3463 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3464 
3465 	/* Program the Transmit Control Register */
3466 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3467 	tctl &= ~E1000_TCTL_CT;
3468 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3469 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3470 
3471 	if (adapter->hw.mac.type >= e1000_82571)
3472 		tctl |= E1000_TCTL_MULR;
3473 
3474 	/* This write will effectively turn on the transmit unit. */
3475 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3476 
3477 }
3478 
3479 
3480 /*********************************************************************
3481  *
3482  *  Free all transmit rings.
3483  *
3484  **********************************************************************/
3485 static void
3486 em_free_transmit_structures(struct adapter *adapter)
3487 {
3488 	struct tx_ring *txr = adapter->tx_rings;
3489 
3490 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3491 		EM_TX_LOCK(txr);
3492 		em_free_transmit_buffers(txr);
3493 		em_dma_free(adapter, &txr->txdma);
3494 		EM_TX_UNLOCK(txr);
3495 		EM_TX_LOCK_DESTROY(txr);
3496 	}
3497 
3498 	free(adapter->tx_rings, M_DEVBUF);
3499 }
3500 
3501 /*********************************************************************
3502  *
3503  *  Free transmit ring related data structures.
3504  *
3505  **********************************************************************/
3506 static void
3507 em_free_transmit_buffers(struct tx_ring *txr)
3508 {
3509 	struct adapter		*adapter = txr->adapter;
3510 	struct em_buffer	*txbuf;
3511 
3512 	INIT_DEBUGOUT("free_transmit_ring: begin");
3513 
3514 	if (txr->tx_buffers == NULL)
3515 		return;
3516 
3517 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3518 		txbuf = &txr->tx_buffers[i];
3519 		if (txbuf->m_head != NULL) {
3520 			bus_dmamap_sync(txr->txtag, txbuf->map,
3521 			    BUS_DMASYNC_POSTWRITE);
3522 			bus_dmamap_unload(txr->txtag,
3523 			    txbuf->map);
3524 			m_freem(txbuf->m_head);
3525 			txbuf->m_head = NULL;
3526 			if (txbuf->map != NULL) {
3527 				bus_dmamap_destroy(txr->txtag,
3528 				    txbuf->map);
3529 				txbuf->map = NULL;
3530 			}
3531 		} else if (txbuf->map != NULL) {
3532 			bus_dmamap_unload(txr->txtag,
3533 			    txbuf->map);
3534 			bus_dmamap_destroy(txr->txtag,
3535 			    txbuf->map);
3536 			txbuf->map = NULL;
3537 		}
3538 	}
3539 #if __FreeBSD_version >= 800000
3540 	if (txr->br != NULL)
3541 		buf_ring_free(txr->br, M_DEVBUF);
3542 #endif
3543 	if (txr->tx_buffers != NULL) {
3544 		free(txr->tx_buffers, M_DEVBUF);
3545 		txr->tx_buffers = NULL;
3546 	}
3547 	if (txr->txtag != NULL) {
3548 		bus_dma_tag_destroy(txr->txtag);
3549 		txr->txtag = NULL;
3550 	}
3551 	return;
3552 }
3553 
3554 
3555 /*********************************************************************
3556  *  The offload context is protocol specific (TCP/UDP) and thus
3557  *  only needs to be set when the protocol changes. The occasion
3558  *  of a context change can be a performance detriment, and
3559  *  might be better just disabled. The reason arises in the way
3560  *  in which the controller supports pipelined requests from the
3561  *  Tx data DMA. Up to four requests can be pipelined, and they may
3562  *  belong to the same packet or to multiple packets. However all
3563  *  requests for one packet are issued before a request is issued
3564  *  for a subsequent packet and if a request for the next packet
3565  *  requires a context change, that request will be stalled
3566  *  until the previous request completes. This means setting up
3567  *  a new context effectively disables pipelined Tx data DMA which
3568  *  in turn greatly slow down performance to send small sized
3569  *  frames.
3570  **********************************************************************/
3571 static void
3572 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3573     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3574 {
3575 	struct adapter			*adapter = txr->adapter;
3576 	struct e1000_context_desc	*TXD = NULL;
3577 	struct em_buffer		*tx_buffer;
3578 	int				cur, hdr_len;
3579 	u32				cmd = 0;
3580 	u16				offload = 0;
3581 	u8				ipcso, ipcss, tucso, tucss;
3582 
3583 	ipcss = ipcso = tucss = tucso = 0;
3584 	hdr_len = ip_off + (ip->ip_hl << 2);
3585 	cur = txr->next_avail_desc;
3586 
3587 	/* Setup of IP header checksum. */
3588 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3589 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3590 		offload |= CSUM_IP;
3591 		ipcss = ip_off;
3592 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3593 		/*
3594 		 * Start offset for header checksum calculation.
3595 		 * End offset for header checksum calculation.
3596 		 * Offset of place to put the checksum.
3597 		 */
3598 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3599 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3600 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3601 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3602 		cmd |= E1000_TXD_CMD_IP;
3603 	}
3604 
3605 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3606  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3607  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3608  		offload |= CSUM_TCP;
3609  		tucss = hdr_len;
3610  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3611  		/*
3612  		 * Setting up new checksum offload context for every frames
3613  		 * takes a lot of processing time for hardware. This also
3614  		 * reduces performance a lot for small sized frames so avoid
3615  		 * it if driver can use previously configured checksum
3616  		 * offload context.
3617  		 */
3618  		if (txr->last_hw_offload == offload) {
3619  			if (offload & CSUM_IP) {
3620  				if (txr->last_hw_ipcss == ipcss &&
3621  				    txr->last_hw_ipcso == ipcso &&
3622  				    txr->last_hw_tucss == tucss &&
3623  				    txr->last_hw_tucso == tucso)
3624  					return;
3625  			} else {
3626  				if (txr->last_hw_tucss == tucss &&
3627  				    txr->last_hw_tucso == tucso)
3628  					return;
3629  			}
3630   		}
3631  		txr->last_hw_offload = offload;
3632  		txr->last_hw_tucss = tucss;
3633  		txr->last_hw_tucso = tucso;
3634  		/*
3635  		 * Start offset for payload checksum calculation.
3636  		 * End offset for payload checksum calculation.
3637  		 * Offset of place to put the checksum.
3638  		 */
3639 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3640  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3641  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3642  		TXD->upper_setup.tcp_fields.tucso = tucso;
3643  		cmd |= E1000_TXD_CMD_TCP;
3644  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3645  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3646  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3647  		tucss = hdr_len;
3648  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3649  		/*
3650  		 * Setting up new checksum offload context for every frames
3651  		 * takes a lot of processing time for hardware. This also
3652  		 * reduces performance a lot for small sized frames so avoid
3653  		 * it if driver can use previously configured checksum
3654  		 * offload context.
3655  		 */
3656  		if (txr->last_hw_offload == offload) {
3657  			if (offload & CSUM_IP) {
3658  				if (txr->last_hw_ipcss == ipcss &&
3659  				    txr->last_hw_ipcso == ipcso &&
3660  				    txr->last_hw_tucss == tucss &&
3661  				    txr->last_hw_tucso == tucso)
3662  					return;
3663  			} else {
3664  				if (txr->last_hw_tucss == tucss &&
3665  				    txr->last_hw_tucso == tucso)
3666  					return;
3667  			}
3668  		}
3669  		txr->last_hw_offload = offload;
3670  		txr->last_hw_tucss = tucss;
3671  		txr->last_hw_tucso = tucso;
3672  		/*
3673  		 * Start offset for header checksum calculation.
3674  		 * End offset for header checksum calculation.
3675  		 * Offset of place to put the checksum.
3676  		 */
3677 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3678  		TXD->upper_setup.tcp_fields.tucss = tucss;
3679  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3680  		TXD->upper_setup.tcp_fields.tucso = tucso;
3681   	}
3682 
3683  	if (offload & CSUM_IP) {
3684  		txr->last_hw_ipcss = ipcss;
3685  		txr->last_hw_ipcso = ipcso;
3686   	}
3687 
3688 	TXD->tcp_seg_setup.data = htole32(0);
3689 	TXD->cmd_and_length =
3690 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3691 	tx_buffer = &txr->tx_buffers[cur];
3692 	tx_buffer->m_head = NULL;
3693 	tx_buffer->next_eop = -1;
3694 
3695 	if (++cur == adapter->num_tx_desc)
3696 		cur = 0;
3697 
3698 	txr->tx_avail--;
3699 	txr->next_avail_desc = cur;
3700 }
3701 
3702 
3703 /**********************************************************************
3704  *
3705  *  Setup work for hardware segmentation offload (TSO)
3706  *
3707  **********************************************************************/
3708 static void
3709 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3710     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3711 {
3712 	struct adapter			*adapter = txr->adapter;
3713 	struct e1000_context_desc	*TXD;
3714 	struct em_buffer		*tx_buffer;
3715 	int cur, hdr_len;
3716 
3717 	/*
3718 	 * In theory we can use the same TSO context if and only if
3719 	 * frame is the same type(IP/TCP) and the same MSS. However
3720 	 * checking whether a frame has the same IP/TCP structure is
3721 	 * hard thing so just ignore that and always restablish a
3722 	 * new TSO context.
3723 	 */
3724 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3725 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3726 		      E1000_TXD_DTYP_D |	/* Data descr type */
3727 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3728 
3729 	/* IP and/or TCP header checksum calculation and insertion. */
3730 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3731 
3732 	cur = txr->next_avail_desc;
3733 	tx_buffer = &txr->tx_buffers[cur];
3734 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3735 
3736 	/*
3737 	 * Start offset for header checksum calculation.
3738 	 * End offset for header checksum calculation.
3739 	 * Offset of place put the checksum.
3740 	 */
3741 	TXD->lower_setup.ip_fields.ipcss = ip_off;
3742 	TXD->lower_setup.ip_fields.ipcse =
3743 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3744 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3745 	/*
3746 	 * Start offset for payload checksum calculation.
3747 	 * End offset for payload checksum calculation.
3748 	 * Offset of place to put the checksum.
3749 	 */
3750 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3751 	TXD->upper_setup.tcp_fields.tucse = 0;
3752 	TXD->upper_setup.tcp_fields.tucso =
3753 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3754 	/*
3755 	 * Payload size per packet w/o any headers.
3756 	 * Length of all headers up to payload.
3757 	 */
3758 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3759 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3760 
3761 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3762 				E1000_TXD_CMD_DEXT |	/* Extended descr */
3763 				E1000_TXD_CMD_TSE |	/* TSE context */
3764 				E1000_TXD_CMD_IP |	/* Do IP csum */
3765 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3766 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3767 
3768 	tx_buffer->m_head = NULL;
3769 	tx_buffer->next_eop = -1;
3770 
3771 	if (++cur == adapter->num_tx_desc)
3772 		cur = 0;
3773 
3774 	txr->tx_avail--;
3775 	txr->next_avail_desc = cur;
3776 	txr->tx_tso = TRUE;
3777 }
3778 
3779 
3780 /**********************************************************************
3781  *
3782  *  Examine each tx_buffer in the used queue. If the hardware is done
3783  *  processing the packet then free associated resources. The
3784  *  tx_buffer is put back on the free queue.
3785  *
3786  **********************************************************************/
3787 static void
3788 em_txeof(struct tx_ring *txr)
3789 {
3790 	struct adapter	*adapter = txr->adapter;
3791         int first, last, done, processed;
3792         struct em_buffer *tx_buffer;
3793         struct e1000_tx_desc   *tx_desc, *eop_desc;
3794 	if_t ifp = adapter->ifp;
3795 
3796 	EM_TX_LOCK_ASSERT(txr);
3797 #ifdef DEV_NETMAP
3798 	if (netmap_tx_irq(ifp, txr->me))
3799 		return;
3800 #endif /* DEV_NETMAP */
3801 
3802 	/* No work, make sure watchdog is off */
3803         if (txr->tx_avail == adapter->num_tx_desc) {
3804 		txr->queue_status = EM_QUEUE_IDLE;
3805                 return;
3806 	}
3807 
3808 	processed = 0;
3809         first = txr->next_to_clean;
3810         tx_desc = &txr->tx_base[first];
3811         tx_buffer = &txr->tx_buffers[first];
3812 	last = tx_buffer->next_eop;
3813         eop_desc = &txr->tx_base[last];
3814 
3815 	/*
3816 	 * What this does is get the index of the
3817 	 * first descriptor AFTER the EOP of the
3818 	 * first packet, that way we can do the
3819 	 * simple comparison on the inner while loop.
3820 	 */
3821 	if (++last == adapter->num_tx_desc)
3822  		last = 0;
3823 	done = last;
3824 
3825         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3826             BUS_DMASYNC_POSTREAD);
3827 
3828         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3829 		/* We clean the range of the packet */
3830 		while (first != done) {
3831                 	tx_desc->upper.data = 0;
3832                 	tx_desc->lower.data = 0;
3833                 	tx_desc->buffer_addr = 0;
3834                 	++txr->tx_avail;
3835 			++processed;
3836 
3837 			if (tx_buffer->m_head) {
3838 				bus_dmamap_sync(txr->txtag,
3839 				    tx_buffer->map,
3840 				    BUS_DMASYNC_POSTWRITE);
3841 				bus_dmamap_unload(txr->txtag,
3842 				    tx_buffer->map);
3843                         	m_freem(tx_buffer->m_head);
3844                         	tx_buffer->m_head = NULL;
3845                 	}
3846 			tx_buffer->next_eop = -1;
3847 			txr->watchdog_time = ticks;
3848 
3849 	                if (++first == adapter->num_tx_desc)
3850 				first = 0;
3851 
3852 	                tx_buffer = &txr->tx_buffers[first];
3853 			tx_desc = &txr->tx_base[first];
3854 		}
3855 		if_incopackets(ifp, 1);
3856 		/* See if we can continue to the next packet */
3857 		last = tx_buffer->next_eop;
3858 		if (last != -1) {
3859         		eop_desc = &txr->tx_base[last];
3860 			/* Get new done point */
3861 			if (++last == adapter->num_tx_desc) last = 0;
3862 			done = last;
3863 		} else
3864 			break;
3865         }
3866         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3867             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3868 
3869         txr->next_to_clean = first;
3870 
3871 	/*
3872 	** Watchdog calculation, we know there's
3873 	** work outstanding or the first return
3874 	** would have been taken, so none processed
3875 	** for too long indicates a hang. local timer
3876 	** will examine this and do a reset if needed.
3877 	*/
3878 	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3879 		txr->queue_status = EM_QUEUE_HUNG;
3880 
3881         /*
3882          * If we have a minimum free, clear IFF_DRV_OACTIVE
3883          * to tell the stack that it is OK to send packets.
3884 	 * Notice that all writes of OACTIVE happen under the
3885 	 * TX lock which, with a single queue, guarantees
3886 	 * sanity.
3887          */
3888         if (txr->tx_avail >= EM_MAX_SCATTER)
3889 		if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
3890 
3891 	/* Disable watchdog if all clean */
3892 	if (txr->tx_avail == adapter->num_tx_desc) {
3893 		txr->queue_status = EM_QUEUE_IDLE;
3894 	}
3895 }
3896 
3897 
3898 /*********************************************************************
3899  *
3900  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3901  *
3902  **********************************************************************/
3903 static void
3904 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3905 {
3906 	struct adapter		*adapter = rxr->adapter;
3907 	struct mbuf		*m;
3908 	bus_dma_segment_t	segs[1];
3909 	struct em_buffer	*rxbuf;
3910 	int			i, j, error, nsegs;
3911 	bool			cleaned = FALSE;
3912 
3913 	i = j = rxr->next_to_refresh;
3914 	/*
3915 	** Get one descriptor beyond
3916 	** our work mark to control
3917 	** the loop.
3918 	*/
3919 	if (++j == adapter->num_rx_desc)
3920 		j = 0;
3921 
3922 	while (j != limit) {
3923 		rxbuf = &rxr->rx_buffers[i];
3924 		if (rxbuf->m_head == NULL) {
3925 			m = m_getjcl(M_NOWAIT, MT_DATA,
3926 			    M_PKTHDR, adapter->rx_mbuf_sz);
3927 			/*
3928 			** If we have a temporary resource shortage
3929 			** that causes a failure, just abort refresh
3930 			** for now, we will return to this point when
3931 			** reinvoked from em_rxeof.
3932 			*/
3933 			if (m == NULL)
3934 				goto update;
3935 		} else
3936 			m = rxbuf->m_head;
3937 
3938 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3939 		m->m_flags |= M_PKTHDR;
3940 		m->m_data = m->m_ext.ext_buf;
3941 
3942 		/* Use bus_dma machinery to setup the memory mapping  */
3943 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3944 		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3945 		if (error != 0) {
3946 			printf("Refresh mbufs: hdr dmamap load"
3947 			    " failure - %d\n", error);
3948 			m_free(m);
3949 			rxbuf->m_head = NULL;
3950 			goto update;
3951 		}
3952 		rxbuf->m_head = m;
3953 		bus_dmamap_sync(rxr->rxtag,
3954 		    rxbuf->map, BUS_DMASYNC_PREREAD);
3955 		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3956 		cleaned = TRUE;
3957 
3958 		i = j; /* Next is precalulated for us */
3959 		rxr->next_to_refresh = i;
3960 		/* Calculate next controlling index */
3961 		if (++j == adapter->num_rx_desc)
3962 			j = 0;
3963 	}
3964 update:
3965 	/*
3966 	** Update the tail pointer only if,
3967 	** and as far as we have refreshed.
3968 	*/
3969 	if (cleaned)
3970 		E1000_WRITE_REG(&adapter->hw,
3971 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
3972 
3973 	return;
3974 }
3975 
3976 
3977 /*********************************************************************
3978  *
3979  *  Allocate memory for rx_buffer structures. Since we use one
3980  *  rx_buffer per received packet, the maximum number of rx_buffer's
3981  *  that we'll need is equal to the number of receive descriptors
3982  *  that we've allocated.
3983  *
3984  **********************************************************************/
3985 static int
3986 em_allocate_receive_buffers(struct rx_ring *rxr)
3987 {
3988 	struct adapter		*adapter = rxr->adapter;
3989 	device_t		dev = adapter->dev;
3990 	struct em_buffer	*rxbuf;
3991 	int			error;
3992 
3993 	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3994 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3995 	if (rxr->rx_buffers == NULL) {
3996 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3997 		return (ENOMEM);
3998 	}
3999 
4000 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4001 				1, 0,			/* alignment, bounds */
4002 				BUS_SPACE_MAXADDR,	/* lowaddr */
4003 				BUS_SPACE_MAXADDR,	/* highaddr */
4004 				NULL, NULL,		/* filter, filterarg */
4005 				MJUM9BYTES,		/* maxsize */
4006 				1,			/* nsegments */
4007 				MJUM9BYTES,		/* maxsegsize */
4008 				0,			/* flags */
4009 				NULL,			/* lockfunc */
4010 				NULL,			/* lockarg */
4011 				&rxr->rxtag);
4012 	if (error) {
4013 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4014 		    __func__, error);
4015 		goto fail;
4016 	}
4017 
4018 	rxbuf = rxr->rx_buffers;
4019 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4020 		rxbuf = &rxr->rx_buffers[i];
4021 		error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4022 		if (error) {
4023 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4024 			    __func__, error);
4025 			goto fail;
4026 		}
4027 	}
4028 
4029 	return (0);
4030 
4031 fail:
4032 	em_free_receive_structures(adapter);
4033 	return (error);
4034 }
4035 
4036 
4037 /*********************************************************************
4038  *
4039  *  Initialize a receive ring and its buffers.
4040  *
4041  **********************************************************************/
4042 static int
4043 em_setup_receive_ring(struct rx_ring *rxr)
4044 {
4045 	struct	adapter 	*adapter = rxr->adapter;
4046 	struct em_buffer	*rxbuf;
4047 	bus_dma_segment_t	seg[1];
4048 	int			rsize, nsegs, error = 0;
4049 #ifdef DEV_NETMAP
4050 	struct netmap_slot *slot;
4051 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
4052 #endif
4053 
4054 
4055 	/* Clear the ring contents */
4056 	EM_RX_LOCK(rxr);
4057 	rsize = roundup2(adapter->num_rx_desc *
4058 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4059 	bzero((void *)rxr->rx_base, rsize);
4060 #ifdef DEV_NETMAP
4061 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4062 #endif
4063 
4064 	/*
4065 	** Free current RX buffer structs and their mbufs
4066 	*/
4067 	for (int i = 0; i < adapter->num_rx_desc; i++) {
4068 		rxbuf = &rxr->rx_buffers[i];
4069 		if (rxbuf->m_head != NULL) {
4070 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4071 			    BUS_DMASYNC_POSTREAD);
4072 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4073 			m_freem(rxbuf->m_head);
4074 			rxbuf->m_head = NULL; /* mark as freed */
4075 		}
4076 	}
4077 
4078 	/* Now replenish the mbufs */
4079         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4080 		rxbuf = &rxr->rx_buffers[j];
4081 #ifdef DEV_NETMAP
4082 		if (slot) {
4083 			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4084 			uint64_t paddr;
4085 			void *addr;
4086 
4087 			addr = PNMB(slot + si, &paddr);
4088 			netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4089 			/* Update descriptor */
4090 			rxr->rx_base[j].buffer_addr = htole64(paddr);
4091 			continue;
4092 		}
4093 #endif /* DEV_NETMAP */
4094 		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4095 		    M_PKTHDR, adapter->rx_mbuf_sz);
4096 		if (rxbuf->m_head == NULL) {
4097 			error = ENOBUFS;
4098 			goto fail;
4099 		}
4100 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4101 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4102 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4103 
4104 		/* Get the memory mapping */
4105 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4106 		    rxbuf->map, rxbuf->m_head, seg,
4107 		    &nsegs, BUS_DMA_NOWAIT);
4108 		if (error != 0) {
4109 			m_freem(rxbuf->m_head);
4110 			rxbuf->m_head = NULL;
4111 			goto fail;
4112 		}
4113 		bus_dmamap_sync(rxr->rxtag,
4114 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4115 
4116 		/* Update descriptor */
4117 		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4118 	}
4119 	rxr->next_to_check = 0;
4120 	rxr->next_to_refresh = 0;
4121 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4122 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4123 
4124 fail:
4125 	EM_RX_UNLOCK(rxr);
4126 	return (error);
4127 }
4128 
4129 /*********************************************************************
4130  *
4131  *  Initialize all receive rings.
4132  *
4133  **********************************************************************/
4134 static int
4135 em_setup_receive_structures(struct adapter *adapter)
4136 {
4137 	struct rx_ring *rxr = adapter->rx_rings;
4138 	int q;
4139 
4140 	for (q = 0; q < adapter->num_queues; q++, rxr++)
4141 		if (em_setup_receive_ring(rxr))
4142 			goto fail;
4143 
4144 	return (0);
4145 fail:
4146 	/*
4147 	 * Free RX buffers allocated so far, we will only handle
4148 	 * the rings that completed, the failing case will have
4149 	 * cleaned up for itself. 'q' failed, so its the terminus.
4150 	 */
4151 	for (int i = 0; i < q; ++i) {
4152 		rxr = &adapter->rx_rings[i];
4153 		for (int n = 0; n < adapter->num_rx_desc; n++) {
4154 			struct em_buffer *rxbuf;
4155 			rxbuf = &rxr->rx_buffers[n];
4156 			if (rxbuf->m_head != NULL) {
4157 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4158 			  	  BUS_DMASYNC_POSTREAD);
4159 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4160 				m_freem(rxbuf->m_head);
4161 				rxbuf->m_head = NULL;
4162 			}
4163 		}
4164 		rxr->next_to_check = 0;
4165 		rxr->next_to_refresh = 0;
4166 	}
4167 
4168 	return (ENOBUFS);
4169 }
4170 
4171 /*********************************************************************
4172  *
4173  *  Free all receive rings.
4174  *
4175  **********************************************************************/
4176 static void
4177 em_free_receive_structures(struct adapter *adapter)
4178 {
4179 	struct rx_ring *rxr = adapter->rx_rings;
4180 
4181 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4182 		em_free_receive_buffers(rxr);
4183 		/* Free the ring memory as well */
4184 		em_dma_free(adapter, &rxr->rxdma);
4185 		EM_RX_LOCK_DESTROY(rxr);
4186 	}
4187 
4188 	free(adapter->rx_rings, M_DEVBUF);
4189 }
4190 
4191 
4192 /*********************************************************************
4193  *
4194  *  Free receive ring data structures
4195  *
4196  **********************************************************************/
4197 static void
4198 em_free_receive_buffers(struct rx_ring *rxr)
4199 {
4200 	struct adapter		*adapter = rxr->adapter;
4201 	struct em_buffer	*rxbuf = NULL;
4202 
4203 	INIT_DEBUGOUT("free_receive_buffers: begin");
4204 
4205 	if (rxr->rx_buffers != NULL) {
4206 		for (int i = 0; i < adapter->num_rx_desc; i++) {
4207 			rxbuf = &rxr->rx_buffers[i];
4208 			if (rxbuf->map != NULL) {
4209 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4210 				    BUS_DMASYNC_POSTREAD);
4211 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4212 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4213 			}
4214 			if (rxbuf->m_head != NULL) {
4215 				m_freem(rxbuf->m_head);
4216 				rxbuf->m_head = NULL;
4217 			}
4218 		}
4219 		free(rxr->rx_buffers, M_DEVBUF);
4220 		rxr->rx_buffers = NULL;
4221 		rxr->next_to_check = 0;
4222 		rxr->next_to_refresh = 0;
4223 	}
4224 
4225 	if (rxr->rxtag != NULL) {
4226 		bus_dma_tag_destroy(rxr->rxtag);
4227 		rxr->rxtag = NULL;
4228 	}
4229 
4230 	return;
4231 }
4232 
4233 
4234 /*********************************************************************
4235  *
4236  *  Enable receive unit.
4237  *
4238  **********************************************************************/
4239 
4240 static void
4241 em_initialize_receive_unit(struct adapter *adapter)
4242 {
4243 	struct rx_ring	*rxr = adapter->rx_rings;
4244 	if_t ifp = adapter->ifp;
4245 	struct e1000_hw	*hw = &adapter->hw;
4246 	u64	bus_addr;
4247 	u32	rctl, rxcsum;
4248 
4249 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4250 
4251 	/*
4252 	 * Make sure receives are disabled while setting
4253 	 * up the descriptor ring
4254 	 */
4255 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4256 	/* Do not disable if ever enabled on this hardware */
4257 	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4258 		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4259 
4260 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4261 	    adapter->rx_abs_int_delay.value);
4262 	/*
4263 	 * Set the interrupt throttling rate. Value is calculated
4264 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4265 	 */
4266 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4267 
4268 	/*
4269 	** When using MSIX interrupts we need to throttle
4270 	** using the EITR register (82574 only)
4271 	*/
4272 	if (hw->mac.type == e1000_82574) {
4273 		for (int i = 0; i < 4; i++)
4274 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4275 			    DEFAULT_ITR);
4276 		/* Disable accelerated acknowledge */
4277 		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4278 	}
4279 
4280 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4281 	if (if_getcapenable(ifp) & IFCAP_RXCSUM)
4282 		rxcsum |= E1000_RXCSUM_TUOFL;
4283 	else
4284 		rxcsum &= ~E1000_RXCSUM_TUOFL;
4285 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4286 
4287 	/*
4288 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4289 	** long latencies are observed, like Lenovo X60. This
4290 	** change eliminates the problem, but since having positive
4291 	** values in RDTR is a known source of problems on other
4292 	** platforms another solution is being sought.
4293 	*/
4294 	if (hw->mac.type == e1000_82573)
4295 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4296 
4297 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4298 		/* Setup the Base and Length of the Rx Descriptor Ring */
4299 		u32 rdt = adapter->num_rx_desc - 1; /* default */
4300 
4301 		bus_addr = rxr->rxdma.dma_paddr;
4302 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4303 		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4304 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4305 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4306 		/* Setup the Head and Tail Descriptor Pointers */
4307 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4308 #ifdef DEV_NETMAP
4309 		/*
4310 		 * an init() while a netmap client is active must
4311 		 * preserve the rx buffers passed to userspace.
4312 		 */
4313 		if (if_getcapenable(ifp) & IFCAP_NETMAP) {
4314 			struct netmap_adapter *na = netmap_getna(adapter->ifp);
4315 			rdt -= nm_kr_rxspace(&na->rx_rings[i]);
4316 		}
4317 #endif /* DEV_NETMAP */
4318 		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4319 	}
4320 
4321 	/* Set PTHRESH for improved jumbo performance */
4322 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4323 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4324 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4325 	    (if_getmtu(ifp) > ETHERMTU)) {
4326 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4327 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4328 	}
4329 
4330 	if (adapter->hw.mac.type >= e1000_pch2lan) {
4331 		if (if_getmtu(ifp) > ETHERMTU)
4332 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4333 		else
4334 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4335 	}
4336 
4337 	/* Setup the Receive Control Register */
4338 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4339 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4340 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4341 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4342 
4343         /* Strip the CRC */
4344         rctl |= E1000_RCTL_SECRC;
4345 
4346         /* Make sure VLAN Filters are off */
4347         rctl &= ~E1000_RCTL_VFE;
4348 	rctl &= ~E1000_RCTL_SBP;
4349 
4350 	if (adapter->rx_mbuf_sz == MCLBYTES)
4351 		rctl |= E1000_RCTL_SZ_2048;
4352 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4353 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4354 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4355 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4356 
4357 	if (if_getmtu(ifp) > ETHERMTU)
4358 		rctl |= E1000_RCTL_LPE;
4359 	else
4360 		rctl &= ~E1000_RCTL_LPE;
4361 
4362 	/* Write out the settings */
4363 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4364 
4365 	return;
4366 }
4367 
4368 
4369 /*********************************************************************
4370  *
4371  *  This routine executes in interrupt context. It replenishes
4372  *  the mbufs in the descriptor and sends data which has been
4373  *  dma'ed into host memory to upper layer.
4374  *
4375  *  We loop at most count times if count is > 0, or until done if
4376  *  count < 0.
4377  *
4378  *  For polling we also now return the number of cleaned packets
4379  *********************************************************************/
4380 static bool
4381 em_rxeof(struct rx_ring *rxr, int count, int *done)
4382 {
4383 	struct adapter		*adapter = rxr->adapter;
4384 	if_t ifp = adapter->ifp;
4385 	struct mbuf		*mp, *sendmp;
4386 	u8			status = 0;
4387 	u16 			len;
4388 	int			i, processed, rxdone = 0;
4389 	bool			eop;
4390 	struct e1000_rx_desc	*cur;
4391 
4392 	EM_RX_LOCK(rxr);
4393 
4394 #ifdef DEV_NETMAP
4395 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4396 		EM_RX_UNLOCK(rxr);
4397 		return (FALSE);
4398 	}
4399 #endif /* DEV_NETMAP */
4400 
4401 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4402 
4403 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
4404 			break;
4405 
4406 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4407 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4408 
4409 		cur = &rxr->rx_base[i];
4410 		status = cur->status;
4411 		mp = sendmp = NULL;
4412 
4413 		if ((status & E1000_RXD_STAT_DD) == 0)
4414 			break;
4415 
4416 		len = le16toh(cur->length);
4417 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4418 
4419 		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4420 		    (rxr->discard == TRUE)) {
4421 			adapter->dropped_pkts++;
4422 			++rxr->rx_discarded;
4423 			if (!eop) /* Catch subsequent segs */
4424 				rxr->discard = TRUE;
4425 			else
4426 				rxr->discard = FALSE;
4427 			em_rx_discard(rxr, i);
4428 			goto next_desc;
4429 		}
4430 		bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4431 
4432 		/* Assign correct length to the current fragment */
4433 		mp = rxr->rx_buffers[i].m_head;
4434 		mp->m_len = len;
4435 
4436 		/* Trigger for refresh */
4437 		rxr->rx_buffers[i].m_head = NULL;
4438 
4439 		/* First segment? */
4440 		if (rxr->fmp == NULL) {
4441 			mp->m_pkthdr.len = len;
4442 			rxr->fmp = rxr->lmp = mp;
4443 		} else {
4444 			/* Chain mbuf's together */
4445 			mp->m_flags &= ~M_PKTHDR;
4446 			rxr->lmp->m_next = mp;
4447 			rxr->lmp = mp;
4448 			rxr->fmp->m_pkthdr.len += len;
4449 		}
4450 
4451 		if (eop) {
4452 			--count;
4453 			sendmp = rxr->fmp;
4454 			if_setrcvif(sendmp, ifp);
4455 			if_incipackets(ifp, 1);
4456 			em_receive_checksum(cur, sendmp);
4457 #ifndef __NO_STRICT_ALIGNMENT
4458 			if (adapter->hw.mac.max_frame_size >
4459 			    (MCLBYTES - ETHER_ALIGN) &&
4460 			    em_fixup_rx(rxr) != 0)
4461 				goto skip;
4462 #endif
4463 			if (status & E1000_RXD_STAT_VP) {
4464 				if_setvtag(sendmp,
4465 				    le16toh(cur->special));
4466 				sendmp->m_flags |= M_VLANTAG;
4467 			}
4468 #ifndef __NO_STRICT_ALIGNMENT
4469 skip:
4470 #endif
4471 			rxr->fmp = rxr->lmp = NULL;
4472 		}
4473 next_desc:
4474 		/* Zero out the receive descriptors status. */
4475 		cur->status = 0;
4476 		++rxdone;	/* cumulative for POLL */
4477 		++processed;
4478 
4479 		/* Advance our pointers to the next descriptor. */
4480 		if (++i == adapter->num_rx_desc)
4481 			i = 0;
4482 
4483 		/* Send to the stack */
4484 		if (sendmp != NULL) {
4485 			rxr->next_to_check = i;
4486 			EM_RX_UNLOCK(rxr);
4487 			if_input(ifp, sendmp);
4488 			EM_RX_LOCK(rxr);
4489 			i = rxr->next_to_check;
4490 		}
4491 
4492 		/* Only refresh mbufs every 8 descriptors */
4493 		if (processed == 8) {
4494 			em_refresh_mbufs(rxr, i);
4495 			processed = 0;
4496 		}
4497 	}
4498 
4499 	/* Catch any remaining refresh work */
4500 	if (e1000_rx_unrefreshed(rxr))
4501 		em_refresh_mbufs(rxr, i);
4502 
4503 	rxr->next_to_check = i;
4504 	if (done != NULL)
4505 		*done = rxdone;
4506 	EM_RX_UNLOCK(rxr);
4507 
4508 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4509 }
4510 
4511 static __inline void
4512 em_rx_discard(struct rx_ring *rxr, int i)
4513 {
4514 	struct em_buffer	*rbuf;
4515 
4516 	rbuf = &rxr->rx_buffers[i];
4517 	bus_dmamap_unload(rxr->rxtag, rbuf->map);
4518 
4519 	/* Free any previous pieces */
4520 	if (rxr->fmp != NULL) {
4521 		rxr->fmp->m_flags |= M_PKTHDR;
4522 		m_freem(rxr->fmp);
4523 		rxr->fmp = NULL;
4524 		rxr->lmp = NULL;
4525 	}
4526 	/*
4527 	** Free buffer and allow em_refresh_mbufs()
4528 	** to clean up and recharge buffer.
4529 	*/
4530 	if (rbuf->m_head) {
4531 		m_free(rbuf->m_head);
4532 		rbuf->m_head = NULL;
4533 	}
4534 	return;
4535 }
4536 
4537 #ifndef __NO_STRICT_ALIGNMENT
4538 /*
4539  * When jumbo frames are enabled we should realign entire payload on
4540  * architecures with strict alignment. This is serious design mistake of 8254x
4541  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4542  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4543  * payload. On architecures without strict alignment restrictions 8254x still
4544  * performs unaligned memory access which would reduce the performance too.
4545  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4546  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4547  * existing mbuf chain.
4548  *
4549  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4550  * not used at all on architectures with strict alignment.
4551  */
4552 static int
4553 em_fixup_rx(struct rx_ring *rxr)
4554 {
4555 	struct adapter *adapter = rxr->adapter;
4556 	struct mbuf *m, *n;
4557 	int error;
4558 
4559 	error = 0;
4560 	m = rxr->fmp;
4561 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4562 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4563 		m->m_data += ETHER_HDR_LEN;
4564 	} else {
4565 		MGETHDR(n, M_NOWAIT, MT_DATA);
4566 		if (n != NULL) {
4567 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4568 			m->m_data += ETHER_HDR_LEN;
4569 			m->m_len -= ETHER_HDR_LEN;
4570 			n->m_len = ETHER_HDR_LEN;
4571 			M_MOVE_PKTHDR(n, m);
4572 			n->m_next = m;
4573 			rxr->fmp = n;
4574 		} else {
4575 			adapter->dropped_pkts++;
4576 			m_freem(rxr->fmp);
4577 			rxr->fmp = NULL;
4578 			error = ENOMEM;
4579 		}
4580 	}
4581 
4582 	return (error);
4583 }
4584 #endif
4585 
4586 /*********************************************************************
4587  *
4588  *  Verify that the hardware indicated that the checksum is valid.
4589  *  Inform the stack about the status of checksum so that stack
4590  *  doesn't spend time verifying the checksum.
4591  *
4592  *********************************************************************/
4593 static void
4594 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4595 {
4596 	mp->m_pkthdr.csum_flags = 0;
4597 
4598 	/* Ignore Checksum bit is set */
4599 	if (rx_desc->status & E1000_RXD_STAT_IXSM)
4600 		return;
4601 
4602 	if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
4603 		return;
4604 
4605 	/* IP Checksum Good? */
4606 	if (rx_desc->status & E1000_RXD_STAT_IPCS)
4607 		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4608 
4609 	/* TCP or UDP checksum */
4610 	if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4611 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4612 		mp->m_pkthdr.csum_data = htons(0xffff);
4613 	}
4614 }
4615 
4616 /*
4617  * This routine is run via an vlan
4618  * config EVENT
4619  */
4620 static void
4621 em_register_vlan(void *arg, if_t ifp, u16 vtag)
4622 {
4623 	struct adapter	*adapter = if_getsoftc(ifp);
4624 	u32		index, bit;
4625 
4626 	if ((void*)adapter !=  arg)   /* Not our event */
4627 		return;
4628 
4629 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4630                 return;
4631 
4632 	EM_CORE_LOCK(adapter);
4633 	index = (vtag >> 5) & 0x7F;
4634 	bit = vtag & 0x1F;
4635 	adapter->shadow_vfta[index] |= (1 << bit);
4636 	++adapter->num_vlans;
4637 	/* Re-init to load the changes */
4638 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
4639 		em_init_locked(adapter);
4640 	EM_CORE_UNLOCK(adapter);
4641 }
4642 
4643 /*
4644  * This routine is run via an vlan
4645  * unconfig EVENT
4646  */
4647 static void
4648 em_unregister_vlan(void *arg, if_t ifp, u16 vtag)
4649 {
4650 	struct adapter	*adapter = if_getsoftc(ifp);
4651 	u32		index, bit;
4652 
4653 	if (adapter != arg)
4654 		return;
4655 
4656 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4657                 return;
4658 
4659 	EM_CORE_LOCK(adapter);
4660 	index = (vtag >> 5) & 0x7F;
4661 	bit = vtag & 0x1F;
4662 	adapter->shadow_vfta[index] &= ~(1 << bit);
4663 	--adapter->num_vlans;
4664 	/* Re-init to load the changes */
4665 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
4666 		em_init_locked(adapter);
4667 	EM_CORE_UNLOCK(adapter);
4668 }
4669 
4670 static void
4671 em_setup_vlan_hw_support(struct adapter *adapter)
4672 {
4673 	struct e1000_hw *hw = &adapter->hw;
4674 	u32             reg;
4675 
4676 	/*
4677 	** We get here thru init_locked, meaning
4678 	** a soft reset, this has already cleared
4679 	** the VFTA and other state, so if there
4680 	** have been no vlan's registered do nothing.
4681 	*/
4682 	if (adapter->num_vlans == 0)
4683                 return;
4684 
4685 	/*
4686 	** A soft reset zero's out the VFTA, so
4687 	** we need to repopulate it now.
4688 	*/
4689 	for (int i = 0; i < EM_VFTA_SIZE; i++)
4690                 if (adapter->shadow_vfta[i] != 0)
4691 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4692                             i, adapter->shadow_vfta[i]);
4693 
4694 	reg = E1000_READ_REG(hw, E1000_CTRL);
4695 	reg |= E1000_CTRL_VME;
4696 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4697 
4698 	/* Enable the Filter Table */
4699 	reg = E1000_READ_REG(hw, E1000_RCTL);
4700 	reg &= ~E1000_RCTL_CFIEN;
4701 	reg |= E1000_RCTL_VFE;
4702 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4703 }
4704 
4705 static void
4706 em_enable_intr(struct adapter *adapter)
4707 {
4708 	struct e1000_hw *hw = &adapter->hw;
4709 	u32 ims_mask = IMS_ENABLE_MASK;
4710 
4711 	if (hw->mac.type == e1000_82574) {
4712 		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4713 		ims_mask |= EM_MSIX_MASK;
4714 	}
4715 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4716 }
4717 
4718 static void
4719 em_disable_intr(struct adapter *adapter)
4720 {
4721 	struct e1000_hw *hw = &adapter->hw;
4722 
4723 	if (hw->mac.type == e1000_82574)
4724 		E1000_WRITE_REG(hw, EM_EIAC, 0);
4725 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4726 }
4727 
4728 /*
4729  * Bit of a misnomer, what this really means is
4730  * to enable OS management of the system... aka
4731  * to disable special hardware management features
4732  */
4733 static void
4734 em_init_manageability(struct adapter *adapter)
4735 {
4736 	/* A shared code workaround */
4737 #define E1000_82542_MANC2H E1000_MANC2H
4738 	if (adapter->has_manage) {
4739 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4740 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4741 
4742 		/* disable hardware interception of ARP */
4743 		manc &= ~(E1000_MANC_ARP_EN);
4744 
4745                 /* enable receiving management packets to the host */
4746 		manc |= E1000_MANC_EN_MNG2HOST;
4747 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4748 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4749 		manc2h |= E1000_MNG2HOST_PORT_623;
4750 		manc2h |= E1000_MNG2HOST_PORT_664;
4751 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4752 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4753 	}
4754 }
4755 
4756 /*
4757  * Give control back to hardware management
4758  * controller if there is one.
4759  */
4760 static void
4761 em_release_manageability(struct adapter *adapter)
4762 {
4763 	if (adapter->has_manage) {
4764 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4765 
4766 		/* re-enable hardware interception of ARP */
4767 		manc |= E1000_MANC_ARP_EN;
4768 		manc &= ~E1000_MANC_EN_MNG2HOST;
4769 
4770 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4771 	}
4772 }
4773 
4774 /*
4775  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4776  * For ASF and Pass Through versions of f/w this means
4777  * that the driver is loaded. For AMT version type f/w
4778  * this means that the network i/f is open.
4779  */
4780 static void
4781 em_get_hw_control(struct adapter *adapter)
4782 {
4783 	u32 ctrl_ext, swsm;
4784 
4785 	if (adapter->hw.mac.type == e1000_82573) {
4786 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4787 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4788 		    swsm | E1000_SWSM_DRV_LOAD);
4789 		return;
4790 	}
4791 	/* else */
4792 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4793 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4794 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4795 	return;
4796 }
4797 
4798 /*
4799  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4800  * For ASF and Pass Through versions of f/w this means that
4801  * the driver is no longer loaded. For AMT versions of the
4802  * f/w this means that the network i/f is closed.
4803  */
4804 static void
4805 em_release_hw_control(struct adapter *adapter)
4806 {
4807 	u32 ctrl_ext, swsm;
4808 
4809 	if (!adapter->has_manage)
4810 		return;
4811 
4812 	if (adapter->hw.mac.type == e1000_82573) {
4813 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4814 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4815 		    swsm & ~E1000_SWSM_DRV_LOAD);
4816 		return;
4817 	}
4818 	/* else */
4819 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4820 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4821 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4822 	return;
4823 }
4824 
4825 static int
4826 em_is_valid_ether_addr(u8 *addr)
4827 {
4828 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4829 
4830 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4831 		return (FALSE);
4832 	}
4833 
4834 	return (TRUE);
4835 }
4836 
4837 /*
4838 ** Parse the interface capabilities with regard
4839 ** to both system management and wake-on-lan for
4840 ** later use.
4841 */
4842 static void
4843 em_get_wakeup(device_t dev)
4844 {
4845 	struct adapter	*adapter = device_get_softc(dev);
4846 	u16		eeprom_data = 0, device_id, apme_mask;
4847 
4848 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4849 	apme_mask = EM_EEPROM_APME;
4850 
4851 	switch (adapter->hw.mac.type) {
4852 	case e1000_82573:
4853 	case e1000_82583:
4854 		adapter->has_amt = TRUE;
4855 		/* Falls thru */
4856 	case e1000_82571:
4857 	case e1000_82572:
4858 	case e1000_80003es2lan:
4859 		if (adapter->hw.bus.func == 1) {
4860 			e1000_read_nvm(&adapter->hw,
4861 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4862 			break;
4863 		} else
4864 			e1000_read_nvm(&adapter->hw,
4865 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4866 		break;
4867 	case e1000_ich8lan:
4868 	case e1000_ich9lan:
4869 	case e1000_ich10lan:
4870 	case e1000_pchlan:
4871 	case e1000_pch2lan:
4872 		apme_mask = E1000_WUC_APME;
4873 		adapter->has_amt = TRUE;
4874 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4875 		break;
4876 	default:
4877 		e1000_read_nvm(&adapter->hw,
4878 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4879 		break;
4880 	}
4881 	if (eeprom_data & apme_mask)
4882 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4883 	/*
4884          * We have the eeprom settings, now apply the special cases
4885          * where the eeprom may be wrong or the board won't support
4886          * wake on lan on a particular port
4887 	 */
4888 	device_id = pci_get_device(dev);
4889         switch (device_id) {
4890 	case E1000_DEV_ID_82571EB_FIBER:
4891 		/* Wake events only supported on port A for dual fiber
4892 		 * regardless of eeprom setting */
4893 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4894 		    E1000_STATUS_FUNC_1)
4895 			adapter->wol = 0;
4896 		break;
4897 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4898 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4899 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4900                 /* if quad port adapter, disable WoL on all but port A */
4901 		if (global_quad_port_a != 0)
4902 			adapter->wol = 0;
4903 		/* Reset for multiple quad port adapters */
4904 		if (++global_quad_port_a == 4)
4905 			global_quad_port_a = 0;
4906                 break;
4907 	}
4908 	return;
4909 }
4910 
4911 
4912 /*
4913  * Enable PCI Wake On Lan capability
4914  */
4915 static void
4916 em_enable_wakeup(device_t dev)
4917 {
4918 	struct adapter	*adapter = device_get_softc(dev);
4919 	if_t ifp = adapter->ifp;
4920 	u32		pmc, ctrl, ctrl_ext, rctl;
4921 	u16     	status;
4922 
4923 	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4924 		return;
4925 
4926 	/* Advertise the wakeup capability */
4927 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4928 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4929 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4930 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4931 
4932 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4933 	    (adapter->hw.mac.type == e1000_pchlan) ||
4934 	    (adapter->hw.mac.type == e1000_ich9lan) ||
4935 	    (adapter->hw.mac.type == e1000_ich10lan))
4936 		e1000_suspend_workarounds_ich8lan(&adapter->hw);
4937 
4938 	/* Keep the laser running on Fiber adapters */
4939 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4940 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4941 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4942 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4943 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4944 	}
4945 
4946 	/*
4947 	** Determine type of Wakeup: note that wol
4948 	** is set with all bits on by default.
4949 	*/
4950 	if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
4951 		adapter->wol &= ~E1000_WUFC_MAG;
4952 
4953 	if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
4954 		adapter->wol &= ~E1000_WUFC_MC;
4955 	else {
4956 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4957 		rctl |= E1000_RCTL_MPE;
4958 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4959 	}
4960 
4961 	if ((adapter->hw.mac.type == e1000_pchlan) ||
4962 	    (adapter->hw.mac.type == e1000_pch2lan)) {
4963 		if (em_enable_phy_wakeup(adapter))
4964 			return;
4965 	} else {
4966 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4967 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4968 	}
4969 
4970 	if (adapter->hw.phy.type == e1000_phy_igp_3)
4971 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4972 
4973         /* Request PME */
4974         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4975 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4976 	if (if_getcapenable(ifp) & IFCAP_WOL)
4977 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4978         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4979 
4980 	return;
4981 }
4982 
4983 /*
4984 ** WOL in the newer chipset interfaces (pchlan)
4985 ** require thing to be copied into the phy
4986 */
4987 static int
4988 em_enable_phy_wakeup(struct adapter *adapter)
4989 {
4990 	struct e1000_hw *hw = &adapter->hw;
4991 	u32 mreg, ret = 0;
4992 	u16 preg;
4993 
4994 	/* copy MAC RARs to PHY RARs */
4995 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
4996 
4997 	/* copy MAC MTA to PHY MTA */
4998 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4999 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5000 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5001 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5002 		    (u16)((mreg >> 16) & 0xFFFF));
5003 	}
5004 
5005 	/* configure PHY Rx Control register */
5006 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5007 	mreg = E1000_READ_REG(hw, E1000_RCTL);
5008 	if (mreg & E1000_RCTL_UPE)
5009 		preg |= BM_RCTL_UPE;
5010 	if (mreg & E1000_RCTL_MPE)
5011 		preg |= BM_RCTL_MPE;
5012 	preg &= ~(BM_RCTL_MO_MASK);
5013 	if (mreg & E1000_RCTL_MO_3)
5014 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5015 				<< BM_RCTL_MO_SHIFT);
5016 	if (mreg & E1000_RCTL_BAM)
5017 		preg |= BM_RCTL_BAM;
5018 	if (mreg & E1000_RCTL_PMCF)
5019 		preg |= BM_RCTL_PMCF;
5020 	mreg = E1000_READ_REG(hw, E1000_CTRL);
5021 	if (mreg & E1000_CTRL_RFCE)
5022 		preg |= BM_RCTL_RFCE;
5023 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5024 
5025 	/* enable PHY wakeup in MAC register */
5026 	E1000_WRITE_REG(hw, E1000_WUC,
5027 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5028 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5029 
5030 	/* configure and enable PHY wakeup in PHY registers */
5031 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5032 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5033 
5034 	/* activate PHY wakeup */
5035 	ret = hw->phy.ops.acquire(hw);
5036 	if (ret) {
5037 		printf("Could not acquire PHY\n");
5038 		return ret;
5039 	}
5040 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5041 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5042 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5043 	if (ret) {
5044 		printf("Could not read PHY page 769\n");
5045 		goto out;
5046 	}
5047 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5048 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5049 	if (ret)
5050 		printf("Could not set PHY Host Wakeup bit\n");
5051 out:
5052 	hw->phy.ops.release(hw);
5053 
5054 	return ret;
5055 }
5056 
5057 static void
5058 em_led_func(void *arg, int onoff)
5059 {
5060 	struct adapter	*adapter = arg;
5061 
5062 	EM_CORE_LOCK(adapter);
5063 	if (onoff) {
5064 		e1000_setup_led(&adapter->hw);
5065 		e1000_led_on(&adapter->hw);
5066 	} else {
5067 		e1000_led_off(&adapter->hw);
5068 		e1000_cleanup_led(&adapter->hw);
5069 	}
5070 	EM_CORE_UNLOCK(adapter);
5071 }
5072 
5073 /*
5074 ** Disable the L0S and L1 LINK states
5075 */
5076 static void
5077 em_disable_aspm(struct adapter *adapter)
5078 {
5079 	int		base, reg;
5080 	u16		link_cap,link_ctrl;
5081 	device_t	dev = adapter->dev;
5082 
5083 	switch (adapter->hw.mac.type) {
5084 		case e1000_82573:
5085 		case e1000_82574:
5086 		case e1000_82583:
5087 			break;
5088 		default:
5089 			return;
5090 	}
5091 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5092 		return;
5093 	reg = base + PCIER_LINK_CAP;
5094 	link_cap = pci_read_config(dev, reg, 2);
5095 	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5096 		return;
5097 	reg = base + PCIER_LINK_CTL;
5098 	link_ctrl = pci_read_config(dev, reg, 2);
5099 	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5100 	pci_write_config(dev, reg, link_ctrl, 2);
5101 	return;
5102 }
5103 
5104 /**********************************************************************
5105  *
5106  *  Update the board statistics counters.
5107  *
5108  **********************************************************************/
5109 static void
5110 em_update_stats_counters(struct adapter *adapter)
5111 {
5112 	if_t ifp;
5113 
5114 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5115 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5116 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5117 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5118 	}
5119 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5120 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5121 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5122 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5123 
5124 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5125 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5126 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5127 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5128 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5129 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5130 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5131 	/*
5132 	** For watchdog management we need to know if we have been
5133 	** paused during the last interval, so capture that here.
5134 	*/
5135 	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5136 	adapter->stats.xoffrxc += adapter->pause_frames;
5137 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5138 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5139 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5140 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5141 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5142 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5143 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5144 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5145 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5146 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5147 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5148 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5149 
5150 	/* For the 64-bit byte counters the low dword must be read first. */
5151 	/* Both registers clear on the read of the high dword */
5152 
5153 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5154 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5155 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5156 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5157 
5158 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5159 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5160 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5161 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5162 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5163 
5164 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5165 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5166 
5167 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5168 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5169 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5170 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5171 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5172 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5173 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5174 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5175 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5176 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5177 
5178 	/* Interrupt Counts */
5179 
5180 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5181 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5182 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5183 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5184 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5185 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5186 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5187 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5188 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5189 
5190 	if (adapter->hw.mac.type >= e1000_82543) {
5191 		adapter->stats.algnerrc +=
5192 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5193 		adapter->stats.rxerrc +=
5194 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5195 		adapter->stats.tncrs +=
5196 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5197 		adapter->stats.cexterr +=
5198 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5199 		adapter->stats.tsctc +=
5200 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5201 		adapter->stats.tsctfc +=
5202 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5203 	}
5204 	ifp = adapter->ifp;
5205 
5206 	if_setcollisions(ifp, adapter->stats.colc);
5207 
5208 	/* Rx Errors */
5209 	if_setierrors(ifp, adapter->dropped_pkts + adapter->stats.rxerrc +
5210 	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5211 	    adapter->stats.ruc + adapter->stats.roc +
5212 	    adapter->stats.mpc + adapter->stats.cexterr);
5213 
5214 	/* Tx Errors */
5215 	if_setoerrors(ifp, adapter->stats.ecol + adapter->stats.latecol +
5216 	    adapter->watchdog_events);
5217 }
5218 
5219 /* Export a single 32-bit register via a read-only sysctl. */
5220 static int
5221 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5222 {
5223 	struct adapter *adapter;
5224 	u_int val;
5225 
5226 	adapter = oidp->oid_arg1;
5227 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5228 	return (sysctl_handle_int(oidp, &val, 0, req));
5229 }
5230 
5231 /*
5232  * Add sysctl variables, one per statistic, to the system.
5233  */
5234 static void
5235 em_add_hw_stats(struct adapter *adapter)
5236 {
5237 	device_t dev = adapter->dev;
5238 
5239 	struct tx_ring *txr = adapter->tx_rings;
5240 	struct rx_ring *rxr = adapter->rx_rings;
5241 
5242 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5243 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5244 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5245 	struct e1000_hw_stats *stats = &adapter->stats;
5246 
5247 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5248 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5249 
5250 #define QUEUE_NAME_LEN 32
5251 	char namebuf[QUEUE_NAME_LEN];
5252 
5253 	/* Driver Statistics */
5254 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5255 			CTLFLAG_RD, &adapter->link_irq,
5256 			"Link MSIX IRQ Handled");
5257 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5258 			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5259 			 "Std mbuf failed");
5260 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5261 			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5262 			 "Std mbuf cluster failed");
5263 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5264 			CTLFLAG_RD, &adapter->dropped_pkts,
5265 			"Driver dropped packets");
5266 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5267 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5268 			"Driver tx dma failure in xmit");
5269 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5270 			CTLFLAG_RD, &adapter->rx_overruns,
5271 			"RX overruns");
5272 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5273 			CTLFLAG_RD, &adapter->watchdog_events,
5274 			"Watchdog timeouts");
5275 
5276 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5277 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5278 			em_sysctl_reg_handler, "IU",
5279 			"Device Control Register");
5280 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5281 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5282 			em_sysctl_reg_handler, "IU",
5283 			"Receiver Control Register");
5284 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5285 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5286 			"Flow Control High Watermark");
5287 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5288 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5289 			"Flow Control Low Watermark");
5290 
5291 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5292 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5293 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5294 					    CTLFLAG_RD, NULL, "Queue Name");
5295 		queue_list = SYSCTL_CHILDREN(queue_node);
5296 
5297 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5298 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5299 				E1000_TDH(txr->me),
5300 				em_sysctl_reg_handler, "IU",
5301  				"Transmit Descriptor Head");
5302 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5303 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5304 				E1000_TDT(txr->me),
5305 				em_sysctl_reg_handler, "IU",
5306  				"Transmit Descriptor Tail");
5307 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5308 				CTLFLAG_RD, &txr->tx_irq,
5309 				"Queue MSI-X Transmit Interrupts");
5310 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5311 				CTLFLAG_RD, &txr->no_desc_avail,
5312 				"Queue No Descriptor Available");
5313 
5314 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5315 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5316 				E1000_RDH(rxr->me),
5317 				em_sysctl_reg_handler, "IU",
5318 				"Receive Descriptor Head");
5319 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5320 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5321 				E1000_RDT(rxr->me),
5322 				em_sysctl_reg_handler, "IU",
5323 				"Receive Descriptor Tail");
5324 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5325 				CTLFLAG_RD, &rxr->rx_irq,
5326 				"Queue MSI-X Receive Interrupts");
5327 	}
5328 
5329 	/* MAC stats get their own sub node */
5330 
5331 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5332 				    CTLFLAG_RD, NULL, "Statistics");
5333 	stat_list = SYSCTL_CHILDREN(stat_node);
5334 
5335 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5336 			CTLFLAG_RD, &stats->ecol,
5337 			"Excessive collisions");
5338 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5339 			CTLFLAG_RD, &stats->scc,
5340 			"Single collisions");
5341 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5342 			CTLFLAG_RD, &stats->mcc,
5343 			"Multiple collisions");
5344 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5345 			CTLFLAG_RD, &stats->latecol,
5346 			"Late collisions");
5347 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5348 			CTLFLAG_RD, &stats->colc,
5349 			"Collision Count");
5350 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5351 			CTLFLAG_RD, &adapter->stats.symerrs,
5352 			"Symbol Errors");
5353 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5354 			CTLFLAG_RD, &adapter->stats.sec,
5355 			"Sequence Errors");
5356 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5357 			CTLFLAG_RD, &adapter->stats.dc,
5358 			"Defer Count");
5359 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5360 			CTLFLAG_RD, &adapter->stats.mpc,
5361 			"Missed Packets");
5362 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5363 			CTLFLAG_RD, &adapter->stats.rnbc,
5364 			"Receive No Buffers");
5365 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5366 			CTLFLAG_RD, &adapter->stats.ruc,
5367 			"Receive Undersize");
5368 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5369 			CTLFLAG_RD, &adapter->stats.rfc,
5370 			"Fragmented Packets Received ");
5371 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5372 			CTLFLAG_RD, &adapter->stats.roc,
5373 			"Oversized Packets Received");
5374 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5375 			CTLFLAG_RD, &adapter->stats.rjc,
5376 			"Recevied Jabber");
5377 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5378 			CTLFLAG_RD, &adapter->stats.rxerrc,
5379 			"Receive Errors");
5380 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5381 			CTLFLAG_RD, &adapter->stats.crcerrs,
5382 			"CRC errors");
5383 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5384 			CTLFLAG_RD, &adapter->stats.algnerrc,
5385 			"Alignment Errors");
5386 	/* On 82575 these are collision counts */
5387 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5388 			CTLFLAG_RD, &adapter->stats.cexterr,
5389 			"Collision/Carrier extension errors");
5390 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5391 			CTLFLAG_RD, &adapter->stats.xonrxc,
5392 			"XON Received");
5393 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5394 			CTLFLAG_RD, &adapter->stats.xontxc,
5395 			"XON Transmitted");
5396 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5397 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5398 			"XOFF Received");
5399 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5400 			CTLFLAG_RD, &adapter->stats.xofftxc,
5401 			"XOFF Transmitted");
5402 
5403 	/* Packet Reception Stats */
5404 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5405 			CTLFLAG_RD, &adapter->stats.tpr,
5406 			"Total Packets Received ");
5407 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5408 			CTLFLAG_RD, &adapter->stats.gprc,
5409 			"Good Packets Received");
5410 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5411 			CTLFLAG_RD, &adapter->stats.bprc,
5412 			"Broadcast Packets Received");
5413 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5414 			CTLFLAG_RD, &adapter->stats.mprc,
5415 			"Multicast Packets Received");
5416 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5417 			CTLFLAG_RD, &adapter->stats.prc64,
5418 			"64 byte frames received ");
5419 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5420 			CTLFLAG_RD, &adapter->stats.prc127,
5421 			"65-127 byte frames received");
5422 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5423 			CTLFLAG_RD, &adapter->stats.prc255,
5424 			"128-255 byte frames received");
5425 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5426 			CTLFLAG_RD, &adapter->stats.prc511,
5427 			"256-511 byte frames received");
5428 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5429 			CTLFLAG_RD, &adapter->stats.prc1023,
5430 			"512-1023 byte frames received");
5431 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5432 			CTLFLAG_RD, &adapter->stats.prc1522,
5433 			"1023-1522 byte frames received");
5434  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5435  			CTLFLAG_RD, &adapter->stats.gorc,
5436  			"Good Octets Received");
5437 
5438 	/* Packet Transmission Stats */
5439  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5440  			CTLFLAG_RD, &adapter->stats.gotc,
5441  			"Good Octets Transmitted");
5442 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5443 			CTLFLAG_RD, &adapter->stats.tpt,
5444 			"Total Packets Transmitted");
5445 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5446 			CTLFLAG_RD, &adapter->stats.gptc,
5447 			"Good Packets Transmitted");
5448 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5449 			CTLFLAG_RD, &adapter->stats.bptc,
5450 			"Broadcast Packets Transmitted");
5451 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5452 			CTLFLAG_RD, &adapter->stats.mptc,
5453 			"Multicast Packets Transmitted");
5454 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5455 			CTLFLAG_RD, &adapter->stats.ptc64,
5456 			"64 byte frames transmitted ");
5457 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5458 			CTLFLAG_RD, &adapter->stats.ptc127,
5459 			"65-127 byte frames transmitted");
5460 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5461 			CTLFLAG_RD, &adapter->stats.ptc255,
5462 			"128-255 byte frames transmitted");
5463 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5464 			CTLFLAG_RD, &adapter->stats.ptc511,
5465 			"256-511 byte frames transmitted");
5466 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5467 			CTLFLAG_RD, &adapter->stats.ptc1023,
5468 			"512-1023 byte frames transmitted");
5469 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5470 			CTLFLAG_RD, &adapter->stats.ptc1522,
5471 			"1024-1522 byte frames transmitted");
5472 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5473 			CTLFLAG_RD, &adapter->stats.tsctc,
5474 			"TSO Contexts Transmitted");
5475 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5476 			CTLFLAG_RD, &adapter->stats.tsctfc,
5477 			"TSO Contexts Failed");
5478 
5479 
5480 	/* Interrupt Stats */
5481 
5482 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5483 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5484 	int_list = SYSCTL_CHILDREN(int_node);
5485 
5486 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5487 			CTLFLAG_RD, &adapter->stats.iac,
5488 			"Interrupt Assertion Count");
5489 
5490 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5491 			CTLFLAG_RD, &adapter->stats.icrxptc,
5492 			"Interrupt Cause Rx Pkt Timer Expire Count");
5493 
5494 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5495 			CTLFLAG_RD, &adapter->stats.icrxatc,
5496 			"Interrupt Cause Rx Abs Timer Expire Count");
5497 
5498 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5499 			CTLFLAG_RD, &adapter->stats.ictxptc,
5500 			"Interrupt Cause Tx Pkt Timer Expire Count");
5501 
5502 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5503 			CTLFLAG_RD, &adapter->stats.ictxatc,
5504 			"Interrupt Cause Tx Abs Timer Expire Count");
5505 
5506 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5507 			CTLFLAG_RD, &adapter->stats.ictxqec,
5508 			"Interrupt Cause Tx Queue Empty Count");
5509 
5510 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5511 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5512 			"Interrupt Cause Tx Queue Min Thresh Count");
5513 
5514 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5515 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5516 			"Interrupt Cause Rx Desc Min Thresh Count");
5517 
5518 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5519 			CTLFLAG_RD, &adapter->stats.icrxoc,
5520 			"Interrupt Cause Receiver Overrun Count");
5521 }
5522 
5523 /**********************************************************************
5524  *
5525  *  This routine provides a way to dump out the adapter eeprom,
5526  *  often a useful debug/service tool. This only dumps the first
5527  *  32 words, stuff that matters is in that extent.
5528  *
5529  **********************************************************************/
5530 static int
5531 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5532 {
5533 	struct adapter *adapter = (struct adapter *)arg1;
5534 	int error;
5535 	int result;
5536 
5537 	result = -1;
5538 	error = sysctl_handle_int(oidp, &result, 0, req);
5539 
5540 	if (error || !req->newptr)
5541 		return (error);
5542 
5543 	/*
5544 	 * This value will cause a hex dump of the
5545 	 * first 32 16-bit words of the EEPROM to
5546 	 * the screen.
5547 	 */
5548 	if (result == 1)
5549 		em_print_nvm_info(adapter);
5550 
5551 	return (error);
5552 }
5553 
5554 static void
5555 em_print_nvm_info(struct adapter *adapter)
5556 {
5557 	u16	eeprom_data;
5558 	int	i, j, row = 0;
5559 
5560 	/* Its a bit crude, but it gets the job done */
5561 	printf("\nInterface EEPROM Dump:\n");
5562 	printf("Offset\n0x0000  ");
5563 	for (i = 0, j = 0; i < 32; i++, j++) {
5564 		if (j == 8) { /* Make the offset block */
5565 			j = 0; ++row;
5566 			printf("\n0x00%x0  ",row);
5567 		}
5568 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5569 		printf("%04x ", eeprom_data);
5570 	}
5571 	printf("\n");
5572 }
5573 
5574 static int
5575 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5576 {
5577 	struct em_int_delay_info *info;
5578 	struct adapter *adapter;
5579 	u32 regval;
5580 	int error, usecs, ticks;
5581 
5582 	info = (struct em_int_delay_info *)arg1;
5583 	usecs = info->value;
5584 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5585 	if (error != 0 || req->newptr == NULL)
5586 		return (error);
5587 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5588 		return (EINVAL);
5589 	info->value = usecs;
5590 	ticks = EM_USECS_TO_TICKS(usecs);
5591 	if (info->offset == E1000_ITR)	/* units are 256ns here */
5592 		ticks *= 4;
5593 
5594 	adapter = info->adapter;
5595 
5596 	EM_CORE_LOCK(adapter);
5597 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5598 	regval = (regval & ~0xffff) | (ticks & 0xffff);
5599 	/* Handle a few special cases. */
5600 	switch (info->offset) {
5601 	case E1000_RDTR:
5602 		break;
5603 	case E1000_TIDV:
5604 		if (ticks == 0) {
5605 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5606 			/* Don't write 0 into the TIDV register. */
5607 			regval++;
5608 		} else
5609 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5610 		break;
5611 	}
5612 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5613 	EM_CORE_UNLOCK(adapter);
5614 	return (0);
5615 }
5616 
5617 static void
5618 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5619 	const char *description, struct em_int_delay_info *info,
5620 	int offset, int value)
5621 {
5622 	info->adapter = adapter;
5623 	info->offset = offset;
5624 	info->value = value;
5625 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5626 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5627 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5628 	    info, 0, em_sysctl_int_delay, "I", description);
5629 }
5630 
5631 static void
5632 em_set_sysctl_value(struct adapter *adapter, const char *name,
5633 	const char *description, int *limit, int value)
5634 {
5635 	*limit = value;
5636 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5637 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5638 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5639 }
5640 
5641 
5642 /*
5643 ** Set flow control using sysctl:
5644 ** Flow control values:
5645 **      0 - off
5646 **      1 - rx pause
5647 **      2 - tx pause
5648 **      3 - full
5649 */
5650 static int
5651 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5652 {
5653         int		error;
5654 	static int	input = 3; /* default is full */
5655         struct adapter	*adapter = (struct adapter *) arg1;
5656 
5657         error = sysctl_handle_int(oidp, &input, 0, req);
5658 
5659         if ((error) || (req->newptr == NULL))
5660                 return (error);
5661 
5662 	if (input == adapter->fc) /* no change? */
5663 		return (error);
5664 
5665         switch (input) {
5666                 case e1000_fc_rx_pause:
5667                 case e1000_fc_tx_pause:
5668                 case e1000_fc_full:
5669                 case e1000_fc_none:
5670                         adapter->hw.fc.requested_mode = input;
5671 			adapter->fc = input;
5672                         break;
5673                 default:
5674 			/* Do nothing */
5675 			return (error);
5676         }
5677 
5678         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5679         e1000_force_mac_fc(&adapter->hw);
5680         return (error);
5681 }
5682 
5683 /*
5684 ** Manage Energy Efficient Ethernet:
5685 ** Control values:
5686 **     0/1 - enabled/disabled
5687 */
5688 static int
5689 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5690 {
5691        struct adapter *adapter = (struct adapter *) arg1;
5692        int             error, value;
5693 
5694        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5695        error = sysctl_handle_int(oidp, &value, 0, req);
5696        if (error || req->newptr == NULL)
5697                return (error);
5698        EM_CORE_LOCK(adapter);
5699        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5700        em_init_locked(adapter);
5701        EM_CORE_UNLOCK(adapter);
5702        return (0);
5703 }
5704 
5705 static int
5706 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5707 {
5708 	struct adapter *adapter;
5709 	int error;
5710 	int result;
5711 
5712 	result = -1;
5713 	error = sysctl_handle_int(oidp, &result, 0, req);
5714 
5715 	if (error || !req->newptr)
5716 		return (error);
5717 
5718 	if (result == 1) {
5719 		adapter = (struct adapter *)arg1;
5720 		em_print_debug_info(adapter);
5721         }
5722 
5723 	return (error);
5724 }
5725 
5726 /*
5727 ** This routine is meant to be fluid, add whatever is
5728 ** needed for debugging a problem.  -jfv
5729 */
5730 static void
5731 em_print_debug_info(struct adapter *adapter)
5732 {
5733 	device_t dev = adapter->dev;
5734 	struct tx_ring *txr = adapter->tx_rings;
5735 	struct rx_ring *rxr = adapter->rx_rings;
5736 
5737 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)
5738 		printf("Interface is RUNNING ");
5739 	else
5740 		printf("Interface is NOT RUNNING\n");
5741 
5742 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE)
5743 		printf("and INACTIVE\n");
5744 	else
5745 		printf("and ACTIVE\n");
5746 
5747 	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5748 	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5749 	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5750 	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5751 	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5752 	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5753 	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5754 	device_printf(dev, "TX descriptors avail = %d\n",
5755 	    txr->tx_avail);
5756 	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5757 	    txr->no_desc_avail);
5758 	device_printf(dev, "RX discarded packets = %ld\n",
5759 	    rxr->rx_discarded);
5760 	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5761 	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5762 }
5763