xref: /freebsd/sys/dev/e1000/if_em.c (revision 595e514d0df2bac5b813d35f83e32875dbf16a83)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2013, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #endif
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #if __FreeBSD_version >= 800000
44 #include <sys/buf_ring.h>
45 #endif
46 #include <sys/bus.h>
47 #include <sys/endian.h>
48 #include <sys/kernel.h>
49 #include <sys/kthread.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/module.h>
53 #include <sys/rman.h>
54 #include <sys/socket.h>
55 #include <sys/sockio.h>
56 #include <sys/sysctl.h>
57 #include <sys/taskqueue.h>
58 #include <sys/eventhandler.h>
59 #include <machine/bus.h>
60 #include <machine/resource.h>
61 
62 #include <net/bpf.h>
63 #include <net/ethernet.h>
64 #include <net/if.h>
65 #include <net/if_arp.h>
66 #include <net/if_dl.h>
67 #include <net/if_media.h>
68 
69 #include <net/if_types.h>
70 #include <net/if_vlan_var.h>
71 
72 #include <netinet/in_systm.h>
73 #include <netinet/in.h>
74 #include <netinet/if_ether.h>
75 #include <netinet/ip.h>
76 #include <netinet/ip6.h>
77 #include <netinet/tcp.h>
78 #include <netinet/udp.h>
79 
80 #include <machine/in_cksum.h>
81 #include <dev/led/led.h>
82 #include <dev/pci/pcivar.h>
83 #include <dev/pci/pcireg.h>
84 
85 #include "e1000_api.h"
86 #include "e1000_82571.h"
87 #include "if_em.h"
88 
89 /*********************************************************************
90  *  Set this to one to display debug statistics
91  *********************************************************************/
92 int	em_display_debug_stats = 0;
93 
94 /*********************************************************************
95  *  Driver version:
96  *********************************************************************/
97 char em_driver_version[] = "7.3.8";
98 
99 /*********************************************************************
100  *  PCI Device ID Table
101  *
102  *  Used by probe to select devices to load on
103  *  Last field stores an index into e1000_strings
104  *  Last entry must be all 0s
105  *
106  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107  *********************************************************************/
108 
109 static em_vendor_info_t em_vendor_info_array[] =
110 {
111 	/* Intel(R) PRO/1000 Network Connection */
112 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
115 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116 						PCI_ANY_ID, PCI_ANY_ID, 0},
117 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118 						PCI_ANY_ID, PCI_ANY_ID, 0},
119 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120 						PCI_ANY_ID, PCI_ANY_ID, 0},
121 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122 						PCI_ANY_ID, PCI_ANY_ID, 0},
123 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124 						PCI_ANY_ID, PCI_ANY_ID, 0},
125 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126 						PCI_ANY_ID, PCI_ANY_ID, 0},
127 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
130 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
131 
132 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
133 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
134 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
135 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
136 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137 						PCI_ANY_ID, PCI_ANY_ID, 0},
138 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139 						PCI_ANY_ID, PCI_ANY_ID, 0},
140 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141 						PCI_ANY_ID, PCI_ANY_ID, 0},
142 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143 						PCI_ANY_ID, PCI_ANY_ID, 0},
144 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
147 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
148 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
150 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
152 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
175 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
176 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
177 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
178 						PCI_ANY_ID, PCI_ANY_ID, 0},
179 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
180 						PCI_ANY_ID, PCI_ANY_ID, 0},
181 	/* required last entry */
182 	{ 0, 0, 0, 0, 0}
183 };
184 
185 /*********************************************************************
186  *  Table of branding strings for all supported NICs.
187  *********************************************************************/
188 
189 static char *em_strings[] = {
190 	"Intel(R) PRO/1000 Network Connection"
191 };
192 
193 /*********************************************************************
194  *  Function prototypes
195  *********************************************************************/
196 static int	em_probe(device_t);
197 static int	em_attach(device_t);
198 static int	em_detach(device_t);
199 static int	em_shutdown(device_t);
200 static int	em_suspend(device_t);
201 static int	em_resume(device_t);
202 #ifdef EM_MULTIQUEUE
203 static int	em_mq_start(struct ifnet *, struct mbuf *);
204 static int	em_mq_start_locked(struct ifnet *,
205 		    struct tx_ring *, struct mbuf *);
206 static void	em_qflush(struct ifnet *);
207 #else
208 static void	em_start(struct ifnet *);
209 static void	em_start_locked(struct ifnet *, struct tx_ring *);
210 #endif
211 static int	em_ioctl(struct ifnet *, u_long, caddr_t);
212 static void	em_init(void *);
213 static void	em_init_locked(struct adapter *);
214 static void	em_stop(void *);
215 static void	em_media_status(struct ifnet *, struct ifmediareq *);
216 static int	em_media_change(struct ifnet *);
217 static void	em_identify_hardware(struct adapter *);
218 static int	em_allocate_pci_resources(struct adapter *);
219 static int	em_allocate_legacy(struct adapter *);
220 static int	em_allocate_msix(struct adapter *);
221 static int	em_allocate_queues(struct adapter *);
222 static int	em_setup_msix(struct adapter *);
223 static void	em_free_pci_resources(struct adapter *);
224 static void	em_local_timer(void *);
225 static void	em_reset(struct adapter *);
226 static int	em_setup_interface(device_t, struct adapter *);
227 
228 static void	em_setup_transmit_structures(struct adapter *);
229 static void	em_initialize_transmit_unit(struct adapter *);
230 static int	em_allocate_transmit_buffers(struct tx_ring *);
231 static void	em_free_transmit_structures(struct adapter *);
232 static void	em_free_transmit_buffers(struct tx_ring *);
233 
234 static int	em_setup_receive_structures(struct adapter *);
235 static int	em_allocate_receive_buffers(struct rx_ring *);
236 static void	em_initialize_receive_unit(struct adapter *);
237 static void	em_free_receive_structures(struct adapter *);
238 static void	em_free_receive_buffers(struct rx_ring *);
239 
240 static void	em_enable_intr(struct adapter *);
241 static void	em_disable_intr(struct adapter *);
242 static void	em_update_stats_counters(struct adapter *);
243 static void	em_add_hw_stats(struct adapter *adapter);
244 static void	em_txeof(struct tx_ring *);
245 static bool	em_rxeof(struct rx_ring *, int, int *);
246 #ifndef __NO_STRICT_ALIGNMENT
247 static int	em_fixup_rx(struct rx_ring *);
248 #endif
249 static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
250 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
251 		    struct ip *, u32 *, u32 *);
252 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
253 		    struct tcphdr *, u32 *, u32 *);
254 static void	em_set_promisc(struct adapter *);
255 static void	em_disable_promisc(struct adapter *);
256 static void	em_set_multi(struct adapter *);
257 static void	em_update_link_status(struct adapter *);
258 static void	em_refresh_mbufs(struct rx_ring *, int);
259 static void	em_register_vlan(void *, struct ifnet *, u16);
260 static void	em_unregister_vlan(void *, struct ifnet *, u16);
261 static void	em_setup_vlan_hw_support(struct adapter *);
262 static int	em_xmit(struct tx_ring *, struct mbuf **);
263 static int	em_dma_malloc(struct adapter *, bus_size_t,
264 		    struct em_dma_alloc *, int);
265 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
266 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
267 static void	em_print_nvm_info(struct adapter *);
268 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
269 static void	em_print_debug_info(struct adapter *);
270 static int 	em_is_valid_ether_addr(u8 *);
271 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
272 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
273 		    const char *, struct em_int_delay_info *, int, int);
274 /* Management and WOL Support */
275 static void	em_init_manageability(struct adapter *);
276 static void	em_release_manageability(struct adapter *);
277 static void     em_get_hw_control(struct adapter *);
278 static void     em_release_hw_control(struct adapter *);
279 static void	em_get_wakeup(device_t);
280 static void     em_enable_wakeup(device_t);
281 static int	em_enable_phy_wakeup(struct adapter *);
282 static void	em_led_func(void *, int);
283 static void	em_disable_aspm(struct adapter *);
284 
285 static int	em_irq_fast(void *);
286 
287 /* MSIX handlers */
288 static void	em_msix_tx(void *);
289 static void	em_msix_rx(void *);
290 static void	em_msix_link(void *);
291 static void	em_handle_tx(void *context, int pending);
292 static void	em_handle_rx(void *context, int pending);
293 static void	em_handle_link(void *context, int pending);
294 
295 static void	em_set_sysctl_value(struct adapter *, const char *,
296 		    const char *, int *, int);
297 static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
298 static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
299 
300 static __inline void em_rx_discard(struct rx_ring *, int);
301 
302 #ifdef DEVICE_POLLING
303 static poll_handler_t em_poll;
304 #endif /* POLLING */
305 
306 /*********************************************************************
307  *  FreeBSD Device Interface Entry Points
308  *********************************************************************/
309 
310 static device_method_t em_methods[] = {
311 	/* Device interface */
312 	DEVMETHOD(device_probe, em_probe),
313 	DEVMETHOD(device_attach, em_attach),
314 	DEVMETHOD(device_detach, em_detach),
315 	DEVMETHOD(device_shutdown, em_shutdown),
316 	DEVMETHOD(device_suspend, em_suspend),
317 	DEVMETHOD(device_resume, em_resume),
318 	DEVMETHOD_END
319 };
320 
321 static driver_t em_driver = {
322 	"em", em_methods, sizeof(struct adapter),
323 };
324 
325 devclass_t em_devclass;
326 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
327 MODULE_DEPEND(em, pci, 1, 1, 1);
328 MODULE_DEPEND(em, ether, 1, 1, 1);
329 
330 /*********************************************************************
331  *  Tunable default values.
332  *********************************************************************/
333 
334 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
335 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
336 #define M_TSO_LEN			66
337 
338 #define MAX_INTS_PER_SEC	8000
339 #define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
340 
341 /* Allow common code without TSO */
342 #ifndef CSUM_TSO
343 #define CSUM_TSO	0
344 #endif
345 
346 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
347 
348 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
349 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
350 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
351 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
352 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
353     0, "Default transmit interrupt delay in usecs");
354 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
355     0, "Default receive interrupt delay in usecs");
356 
357 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
358 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
359 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
360 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
361 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
362     &em_tx_abs_int_delay_dflt, 0,
363     "Default transmit interrupt delay limit in usecs");
364 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
365     &em_rx_abs_int_delay_dflt, 0,
366     "Default receive interrupt delay limit in usecs");
367 
368 static int em_rxd = EM_DEFAULT_RXD;
369 static int em_txd = EM_DEFAULT_TXD;
370 TUNABLE_INT("hw.em.rxd", &em_rxd);
371 TUNABLE_INT("hw.em.txd", &em_txd);
372 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
373     "Number of receive descriptors per queue");
374 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
375     "Number of transmit descriptors per queue");
376 
377 static int em_smart_pwr_down = FALSE;
378 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
379 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
380     0, "Set to true to leave smart power down enabled on newer adapters");
381 
382 /* Controls whether promiscuous also shows bad packets */
383 static int em_debug_sbp = FALSE;
384 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
385 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
386     "Show bad packets in promiscuous mode");
387 
388 static int em_enable_msix = TRUE;
389 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
390 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
391     "Enable MSI-X interrupts");
392 
393 /* How many packets rxeof tries to clean at a time */
394 static int em_rx_process_limit = 100;
395 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
396 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
397     &em_rx_process_limit, 0,
398     "Maximum number of received packets to process "
399     "at a time, -1 means unlimited");
400 
401 /* Energy efficient ethernet - default to OFF */
402 static int eee_setting = 1;
403 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
404 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
405     "Enable Energy Efficient Ethernet");
406 
407 /* Global used in WOL setup with multiport cards */
408 static int global_quad_port_a = 0;
409 
410 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
411 #include <dev/netmap/if_em_netmap.h>
412 #endif /* DEV_NETMAP */
413 
414 /*********************************************************************
415  *  Device identification routine
416  *
417  *  em_probe determines if the driver should be loaded on
418  *  adapter based on PCI vendor/device id of the adapter.
419  *
420  *  return BUS_PROBE_DEFAULT on success, positive on failure
421  *********************************************************************/
422 
423 static int
424 em_probe(device_t dev)
425 {
426 	char		adapter_name[60];
427 	u16		pci_vendor_id = 0;
428 	u16		pci_device_id = 0;
429 	u16		pci_subvendor_id = 0;
430 	u16		pci_subdevice_id = 0;
431 	em_vendor_info_t *ent;
432 
433 	INIT_DEBUGOUT("em_probe: begin");
434 
435 	pci_vendor_id = pci_get_vendor(dev);
436 	if (pci_vendor_id != EM_VENDOR_ID)
437 		return (ENXIO);
438 
439 	pci_device_id = pci_get_device(dev);
440 	pci_subvendor_id = pci_get_subvendor(dev);
441 	pci_subdevice_id = pci_get_subdevice(dev);
442 
443 	ent = em_vendor_info_array;
444 	while (ent->vendor_id != 0) {
445 		if ((pci_vendor_id == ent->vendor_id) &&
446 		    (pci_device_id == ent->device_id) &&
447 
448 		    ((pci_subvendor_id == ent->subvendor_id) ||
449 		    (ent->subvendor_id == PCI_ANY_ID)) &&
450 
451 		    ((pci_subdevice_id == ent->subdevice_id) ||
452 		    (ent->subdevice_id == PCI_ANY_ID))) {
453 			sprintf(adapter_name, "%s %s",
454 				em_strings[ent->index],
455 				em_driver_version);
456 			device_set_desc_copy(dev, adapter_name);
457 			return (BUS_PROBE_DEFAULT);
458 		}
459 		ent++;
460 	}
461 
462 	return (ENXIO);
463 }
464 
465 /*********************************************************************
466  *  Device initialization routine
467  *
468  *  The attach entry point is called when the driver is being loaded.
469  *  This routine identifies the type of hardware, allocates all resources
470  *  and initializes the hardware.
471  *
472  *  return 0 on success, positive on failure
473  *********************************************************************/
474 
475 static int
476 em_attach(device_t dev)
477 {
478 	struct adapter	*adapter;
479 	struct e1000_hw	*hw;
480 	int		error = 0;
481 
482 	INIT_DEBUGOUT("em_attach: begin");
483 
484 	if (resource_disabled("em", device_get_unit(dev))) {
485 		device_printf(dev, "Disabled by device hint\n");
486 		return (ENXIO);
487 	}
488 
489 	adapter = device_get_softc(dev);
490 	adapter->dev = adapter->osdep.dev = dev;
491 	hw = &adapter->hw;
492 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
493 
494 	/* SYSCTL stuff */
495 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
496 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
497 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
498 	    em_sysctl_nvm_info, "I", "NVM Information");
499 
500 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
501 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
502 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
503 	    em_sysctl_debug_info, "I", "Debug Information");
504 
505 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
506 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
507 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
508 	    em_set_flowcntl, "I", "Flow Control");
509 
510 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
511 
512 	/* Determine hardware and mac info */
513 	em_identify_hardware(adapter);
514 
515 	/* Setup PCI resources */
516 	if (em_allocate_pci_resources(adapter)) {
517 		device_printf(dev, "Allocation of PCI resources failed\n");
518 		error = ENXIO;
519 		goto err_pci;
520 	}
521 
522 	/*
523 	** For ICH8 and family we need to
524 	** map the flash memory, and this
525 	** must happen after the MAC is
526 	** identified
527 	*/
528 	if ((hw->mac.type == e1000_ich8lan) ||
529 	    (hw->mac.type == e1000_ich9lan) ||
530 	    (hw->mac.type == e1000_ich10lan) ||
531 	    (hw->mac.type == e1000_pchlan) ||
532 	    (hw->mac.type == e1000_pch2lan) ||
533 	    (hw->mac.type == e1000_pch_lpt)) {
534 		int rid = EM_BAR_TYPE_FLASH;
535 		adapter->flash = bus_alloc_resource_any(dev,
536 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
537 		if (adapter->flash == NULL) {
538 			device_printf(dev, "Mapping of Flash failed\n");
539 			error = ENXIO;
540 			goto err_pci;
541 		}
542 		/* This is used in the shared code */
543 		hw->flash_address = (u8 *)adapter->flash;
544 		adapter->osdep.flash_bus_space_tag =
545 		    rman_get_bustag(adapter->flash);
546 		adapter->osdep.flash_bus_space_handle =
547 		    rman_get_bushandle(adapter->flash);
548 	}
549 
550 	/* Do Shared Code initialization */
551 	if (e1000_setup_init_funcs(hw, TRUE)) {
552 		device_printf(dev, "Setup of Shared code failed\n");
553 		error = ENXIO;
554 		goto err_pci;
555 	}
556 
557 	e1000_get_bus_info(hw);
558 
559 	/* Set up some sysctls for the tunable interrupt delays */
560 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
561 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
562 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
563 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
564 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
565 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
566 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
567 	    "receive interrupt delay limit in usecs",
568 	    &adapter->rx_abs_int_delay,
569 	    E1000_REGISTER(hw, E1000_RADV),
570 	    em_rx_abs_int_delay_dflt);
571 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
572 	    "transmit interrupt delay limit in usecs",
573 	    &adapter->tx_abs_int_delay,
574 	    E1000_REGISTER(hw, E1000_TADV),
575 	    em_tx_abs_int_delay_dflt);
576 	em_add_int_delay_sysctl(adapter, "itr",
577 	    "interrupt delay limit in usecs/4",
578 	    &adapter->tx_itr,
579 	    E1000_REGISTER(hw, E1000_ITR),
580 	    DEFAULT_ITR);
581 
582 	/* Sysctl for limiting the amount of work done in the taskqueue */
583 	em_set_sysctl_value(adapter, "rx_processing_limit",
584 	    "max number of rx packets to process", &adapter->rx_process_limit,
585 	    em_rx_process_limit);
586 
587 	/*
588 	 * Validate number of transmit and receive descriptors. It
589 	 * must not exceed hardware maximum, and must be multiple
590 	 * of E1000_DBA_ALIGN.
591 	 */
592 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
593 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
594 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
595 		    EM_DEFAULT_TXD, em_txd);
596 		adapter->num_tx_desc = EM_DEFAULT_TXD;
597 	} else
598 		adapter->num_tx_desc = em_txd;
599 
600 	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
601 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
602 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
603 		    EM_DEFAULT_RXD, em_rxd);
604 		adapter->num_rx_desc = EM_DEFAULT_RXD;
605 	} else
606 		adapter->num_rx_desc = em_rxd;
607 
608 	hw->mac.autoneg = DO_AUTO_NEG;
609 	hw->phy.autoneg_wait_to_complete = FALSE;
610 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
611 
612 	/* Copper options */
613 	if (hw->phy.media_type == e1000_media_type_copper) {
614 		hw->phy.mdix = AUTO_ALL_MODES;
615 		hw->phy.disable_polarity_correction = FALSE;
616 		hw->phy.ms_type = EM_MASTER_SLAVE;
617 	}
618 
619 	/*
620 	 * Set the frame limits assuming
621 	 * standard ethernet sized frames.
622 	 */
623 	adapter->hw.mac.max_frame_size =
624 	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
625 
626 	/*
627 	 * This controls when hardware reports transmit completion
628 	 * status.
629 	 */
630 	hw->mac.report_tx_early = 1;
631 
632 	/*
633 	** Get queue/ring memory
634 	*/
635 	if (em_allocate_queues(adapter)) {
636 		error = ENOMEM;
637 		goto err_pci;
638 	}
639 
640 	/* Allocate multicast array memory. */
641 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
642 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
643 	if (adapter->mta == NULL) {
644 		device_printf(dev, "Can not allocate multicast setup array\n");
645 		error = ENOMEM;
646 		goto err_late;
647 	}
648 
649 	/* Check SOL/IDER usage */
650 	if (e1000_check_reset_block(hw))
651 		device_printf(dev, "PHY reset is blocked"
652 		    " due to SOL/IDER session.\n");
653 
654 	/* Sysctl for setting Energy Efficient Ethernet */
655 	hw->dev_spec.ich8lan.eee_disable = eee_setting;
656 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
657 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
658 	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
659 	    adapter, 0, em_sysctl_eee, "I",
660 	    "Disable Energy Efficient Ethernet");
661 
662 	/*
663 	** Start from a known state, this is
664 	** important in reading the nvm and
665 	** mac from that.
666 	*/
667 	e1000_reset_hw(hw);
668 
669 
670 	/* Make sure we have a good EEPROM before we read from it */
671 	if (e1000_validate_nvm_checksum(hw) < 0) {
672 		/*
673 		** Some PCI-E parts fail the first check due to
674 		** the link being in sleep state, call it again,
675 		** if it fails a second time its a real issue.
676 		*/
677 		if (e1000_validate_nvm_checksum(hw) < 0) {
678 			device_printf(dev,
679 			    "The EEPROM Checksum Is Not Valid\n");
680 			error = EIO;
681 			goto err_late;
682 		}
683 	}
684 
685 	/* Copy the permanent MAC address out of the EEPROM */
686 	if (e1000_read_mac_addr(hw) < 0) {
687 		device_printf(dev, "EEPROM read error while reading MAC"
688 		    " address\n");
689 		error = EIO;
690 		goto err_late;
691 	}
692 
693 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
694 		device_printf(dev, "Invalid MAC address\n");
695 		error = EIO;
696 		goto err_late;
697 	}
698 
699 	/*
700 	**  Do interrupt configuration
701 	*/
702 	if (adapter->msix > 1) /* Do MSIX */
703 		error = em_allocate_msix(adapter);
704 	else  /* MSI or Legacy */
705 		error = em_allocate_legacy(adapter);
706 	if (error)
707 		goto err_late;
708 
709 	/*
710 	 * Get Wake-on-Lan and Management info for later use
711 	 */
712 	em_get_wakeup(dev);
713 
714 	/* Setup OS specific network interface */
715 	if (em_setup_interface(dev, adapter) != 0)
716 		goto err_late;
717 
718 	em_reset(adapter);
719 
720 	/* Initialize statistics */
721 	em_update_stats_counters(adapter);
722 
723 	hw->mac.get_link_status = 1;
724 	em_update_link_status(adapter);
725 
726 	/* Register for VLAN events */
727 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
728 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
729 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
730 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
731 
732 	em_add_hw_stats(adapter);
733 
734 	/* Non-AMT based hardware can now take control from firmware */
735 	if (adapter->has_manage && !adapter->has_amt)
736 		em_get_hw_control(adapter);
737 
738 	/* Tell the stack that the interface is not active */
739 	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
740 	adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
741 
742 	adapter->led_dev = led_create(em_led_func, adapter,
743 	    device_get_nameunit(dev));
744 #ifdef DEV_NETMAP
745 	em_netmap_attach(adapter);
746 #endif /* DEV_NETMAP */
747 
748 	INIT_DEBUGOUT("em_attach: end");
749 
750 	return (0);
751 
752 err_late:
753 	em_free_transmit_structures(adapter);
754 	em_free_receive_structures(adapter);
755 	em_release_hw_control(adapter);
756 	if (adapter->ifp != NULL)
757 		if_free(adapter->ifp);
758 err_pci:
759 	em_free_pci_resources(adapter);
760 	free(adapter->mta, M_DEVBUF);
761 	EM_CORE_LOCK_DESTROY(adapter);
762 
763 	return (error);
764 }
765 
766 /*********************************************************************
767  *  Device removal routine
768  *
769  *  The detach entry point is called when the driver is being removed.
770  *  This routine stops the adapter and deallocates all the resources
771  *  that were allocated for driver operation.
772  *
773  *  return 0 on success, positive on failure
774  *********************************************************************/
775 
776 static int
777 em_detach(device_t dev)
778 {
779 	struct adapter	*adapter = device_get_softc(dev);
780 	struct ifnet	*ifp = adapter->ifp;
781 
782 	INIT_DEBUGOUT("em_detach: begin");
783 
784 	/* Make sure VLANS are not using driver */
785 	if (adapter->ifp->if_vlantrunk != NULL) {
786 		device_printf(dev,"Vlan in use, detach first\n");
787 		return (EBUSY);
788 	}
789 
790 #ifdef DEVICE_POLLING
791 	if (ifp->if_capenable & IFCAP_POLLING)
792 		ether_poll_deregister(ifp);
793 #endif
794 
795 	if (adapter->led_dev != NULL)
796 		led_destroy(adapter->led_dev);
797 
798 	EM_CORE_LOCK(adapter);
799 	adapter->in_detach = 1;
800 	em_stop(adapter);
801 	EM_CORE_UNLOCK(adapter);
802 	EM_CORE_LOCK_DESTROY(adapter);
803 
804 	e1000_phy_hw_reset(&adapter->hw);
805 
806 	em_release_manageability(adapter);
807 	em_release_hw_control(adapter);
808 
809 	/* Unregister VLAN events */
810 	if (adapter->vlan_attach != NULL)
811 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
812 	if (adapter->vlan_detach != NULL)
813 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
814 
815 	ether_ifdetach(adapter->ifp);
816 	callout_drain(&adapter->timer);
817 
818 #ifdef DEV_NETMAP
819 	netmap_detach(ifp);
820 #endif /* DEV_NETMAP */
821 
822 	em_free_pci_resources(adapter);
823 	bus_generic_detach(dev);
824 	if_free(ifp);
825 
826 	em_free_transmit_structures(adapter);
827 	em_free_receive_structures(adapter);
828 
829 	em_release_hw_control(adapter);
830 	free(adapter->mta, M_DEVBUF);
831 
832 	return (0);
833 }
834 
835 /*********************************************************************
836  *
837  *  Shutdown entry point
838  *
839  **********************************************************************/
840 
841 static int
842 em_shutdown(device_t dev)
843 {
844 	return em_suspend(dev);
845 }
846 
847 /*
848  * Suspend/resume device methods.
849  */
850 static int
851 em_suspend(device_t dev)
852 {
853 	struct adapter *adapter = device_get_softc(dev);
854 
855 	EM_CORE_LOCK(adapter);
856 
857         em_release_manageability(adapter);
858 	em_release_hw_control(adapter);
859 	em_enable_wakeup(dev);
860 
861 	EM_CORE_UNLOCK(adapter);
862 
863 	return bus_generic_suspend(dev);
864 }
865 
866 static int
867 em_resume(device_t dev)
868 {
869 	struct adapter *adapter = device_get_softc(dev);
870 	struct tx_ring	*txr = adapter->tx_rings;
871 	struct ifnet *ifp = adapter->ifp;
872 
873 	EM_CORE_LOCK(adapter);
874 	if (adapter->hw.mac.type == e1000_pch2lan)
875 		e1000_resume_workarounds_pchlan(&adapter->hw);
876 	em_init_locked(adapter);
877 	em_init_manageability(adapter);
878 
879 	if ((ifp->if_flags & IFF_UP) &&
880 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
881 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
882 			EM_TX_LOCK(txr);
883 #ifdef EM_MULTIQUEUE
884 			if (!drbr_empty(ifp, txr->br))
885 				em_mq_start_locked(ifp, txr, NULL);
886 #else
887 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
888 				em_start_locked(ifp, txr);
889 #endif
890 			EM_TX_UNLOCK(txr);
891 		}
892 	}
893 	EM_CORE_UNLOCK(adapter);
894 
895 	return bus_generic_resume(dev);
896 }
897 
898 
899 #ifdef EM_MULTIQUEUE
900 /*********************************************************************
901  *  Multiqueue Transmit routines
902  *
903  *  em_mq_start is called by the stack to initiate a transmit.
904  *  however, if busy the driver can queue the request rather
905  *  than do an immediate send. It is this that is an advantage
906  *  in this driver, rather than also having multiple tx queues.
907  **********************************************************************/
908 static int
909 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
910 {
911 	struct adapter  *adapter = txr->adapter;
912         struct mbuf     *next;
913         int             err = 0, enq = 0;
914 
915 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
916 	    IFF_DRV_RUNNING || adapter->link_active == 0) {
917 		if (m != NULL)
918 			err = drbr_enqueue(ifp, txr->br, m);
919 		return (err);
920 	}
921 
922 	enq = 0;
923 	if (m != NULL) {
924 		err = drbr_enqueue(ifp, txr->br, m);
925 		if (err)
926 			return (err);
927 	}
928 
929 	/* Process the queue */
930 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
931 		if ((err = em_xmit(txr, &next)) != 0) {
932 			if (next == NULL)
933 				drbr_advance(ifp, txr->br);
934 			else
935 				drbr_putback(ifp, txr->br, next);
936 			break;
937 		}
938 		drbr_advance(ifp, txr->br);
939 		enq++;
940 		ifp->if_obytes += next->m_pkthdr.len;
941 		if (next->m_flags & M_MCAST)
942 			ifp->if_omcasts++;
943 		ETHER_BPF_MTAP(ifp, next);
944 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
945                         break;
946 	}
947 
948 	if (enq > 0) {
949                 /* Set the watchdog */
950                 txr->queue_status = EM_QUEUE_WORKING;
951 		txr->watchdog_time = ticks;
952 	}
953 
954 	if (txr->tx_avail < EM_MAX_SCATTER)
955 		em_txeof(txr);
956 	if (txr->tx_avail < EM_MAX_SCATTER)
957 		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
958 	return (err);
959 }
960 
961 /*
962 ** Multiqueue capable stack interface
963 */
964 static int
965 em_mq_start(struct ifnet *ifp, struct mbuf *m)
966 {
967 	struct adapter	*adapter = ifp->if_softc;
968 	struct tx_ring	*txr = adapter->tx_rings;
969 	int 		error;
970 
971 	if (EM_TX_TRYLOCK(txr)) {
972 		error = em_mq_start_locked(ifp, txr, m);
973 		EM_TX_UNLOCK(txr);
974 	} else
975 		error = drbr_enqueue(ifp, txr->br, m);
976 
977 	return (error);
978 }
979 
980 /*
981 ** Flush all ring buffers
982 */
983 static void
984 em_qflush(struct ifnet *ifp)
985 {
986 	struct adapter  *adapter = ifp->if_softc;
987 	struct tx_ring  *txr = adapter->tx_rings;
988 	struct mbuf     *m;
989 
990 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
991 		EM_TX_LOCK(txr);
992 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
993 			m_freem(m);
994 		EM_TX_UNLOCK(txr);
995 	}
996 	if_qflush(ifp);
997 }
998 #else  /* !EM_MULTIQUEUE */
999 
1000 static void
1001 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1002 {
1003 	struct adapter	*adapter = ifp->if_softc;
1004 	struct mbuf	*m_head;
1005 
1006 	EM_TX_LOCK_ASSERT(txr);
1007 
1008 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
1009 	    IFF_DRV_RUNNING)
1010 		return;
1011 
1012 	if (!adapter->link_active)
1013 		return;
1014 
1015 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1016         	/* Call cleanup if number of TX descriptors low */
1017 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1018 			em_txeof(txr);
1019 		if (txr->tx_avail < EM_MAX_SCATTER) {
1020 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1021 			break;
1022 		}
1023                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1024 		if (m_head == NULL)
1025 			break;
1026 		/*
1027 		 *  Encapsulation can modify our pointer, and or make it
1028 		 *  NULL on failure.  In that event, we can't requeue.
1029 		 */
1030 		if (em_xmit(txr, &m_head)) {
1031 			if (m_head == NULL)
1032 				break;
1033 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1034 			break;
1035 		}
1036 
1037 		/* Send a copy of the frame to the BPF listener */
1038 		ETHER_BPF_MTAP(ifp, m_head);
1039 
1040 		/* Set timeout in case hardware has problems transmitting. */
1041 		txr->watchdog_time = ticks;
1042                 txr->queue_status = EM_QUEUE_WORKING;
1043 	}
1044 
1045 	return;
1046 }
1047 
1048 static void
1049 em_start(struct ifnet *ifp)
1050 {
1051 	struct adapter	*adapter = ifp->if_softc;
1052 	struct tx_ring	*txr = adapter->tx_rings;
1053 
1054 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1055 		EM_TX_LOCK(txr);
1056 		em_start_locked(ifp, txr);
1057 		EM_TX_UNLOCK(txr);
1058 	}
1059 	return;
1060 }
1061 #endif /* EM_MULTIQUEUE */
1062 
1063 /*********************************************************************
1064  *  Ioctl entry point
1065  *
1066  *  em_ioctl is called when the user wants to configure the
1067  *  interface.
1068  *
1069  *  return 0 on success, positive on failure
1070  **********************************************************************/
1071 
1072 static int
1073 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1074 {
1075 	struct adapter	*adapter = ifp->if_softc;
1076 	struct ifreq	*ifr = (struct ifreq *)data;
1077 #if defined(INET) || defined(INET6)
1078 	struct ifaddr	*ifa = (struct ifaddr *)data;
1079 #endif
1080 	bool		avoid_reset = FALSE;
1081 	int		error = 0;
1082 
1083 	if (adapter->in_detach)
1084 		return (error);
1085 
1086 	switch (command) {
1087 	case SIOCSIFADDR:
1088 #ifdef INET
1089 		if (ifa->ifa_addr->sa_family == AF_INET)
1090 			avoid_reset = TRUE;
1091 #endif
1092 #ifdef INET6
1093 		if (ifa->ifa_addr->sa_family == AF_INET6)
1094 			avoid_reset = TRUE;
1095 #endif
1096 		/*
1097 		** Calling init results in link renegotiation,
1098 		** so we avoid doing it when possible.
1099 		*/
1100 		if (avoid_reset) {
1101 			ifp->if_flags |= IFF_UP;
1102 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1103 				em_init(adapter);
1104 #ifdef INET
1105 			if (!(ifp->if_flags & IFF_NOARP))
1106 				arp_ifinit(ifp, ifa);
1107 #endif
1108 		} else
1109 			error = ether_ioctl(ifp, command, data);
1110 		break;
1111 	case SIOCSIFMTU:
1112 	    {
1113 		int max_frame_size;
1114 
1115 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1116 
1117 		EM_CORE_LOCK(adapter);
1118 		switch (adapter->hw.mac.type) {
1119 		case e1000_82571:
1120 		case e1000_82572:
1121 		case e1000_ich9lan:
1122 		case e1000_ich10lan:
1123 		case e1000_pch2lan:
1124 		case e1000_pch_lpt:
1125 		case e1000_82574:
1126 		case e1000_82583:
1127 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1128 			max_frame_size = 9234;
1129 			break;
1130 		case e1000_pchlan:
1131 			max_frame_size = 4096;
1132 			break;
1133 			/* Adapters that do not support jumbo frames */
1134 		case e1000_ich8lan:
1135 			max_frame_size = ETHER_MAX_LEN;
1136 			break;
1137 		default:
1138 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1139 		}
1140 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1141 		    ETHER_CRC_LEN) {
1142 			EM_CORE_UNLOCK(adapter);
1143 			error = EINVAL;
1144 			break;
1145 		}
1146 
1147 		ifp->if_mtu = ifr->ifr_mtu;
1148 		adapter->hw.mac.max_frame_size =
1149 		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1150 		em_init_locked(adapter);
1151 		EM_CORE_UNLOCK(adapter);
1152 		break;
1153 	    }
1154 	case SIOCSIFFLAGS:
1155 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1156 		    SIOCSIFFLAGS (Set Interface Flags)");
1157 		EM_CORE_LOCK(adapter);
1158 		if (ifp->if_flags & IFF_UP) {
1159 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1160 				if ((ifp->if_flags ^ adapter->if_flags) &
1161 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1162 					em_disable_promisc(adapter);
1163 					em_set_promisc(adapter);
1164 				}
1165 			} else
1166 				em_init_locked(adapter);
1167 		} else
1168 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1169 				em_stop(adapter);
1170 		adapter->if_flags = ifp->if_flags;
1171 		EM_CORE_UNLOCK(adapter);
1172 		break;
1173 	case SIOCADDMULTI:
1174 	case SIOCDELMULTI:
1175 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1176 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1177 			EM_CORE_LOCK(adapter);
1178 			em_disable_intr(adapter);
1179 			em_set_multi(adapter);
1180 #ifdef DEVICE_POLLING
1181 			if (!(ifp->if_capenable & IFCAP_POLLING))
1182 #endif
1183 				em_enable_intr(adapter);
1184 			EM_CORE_UNLOCK(adapter);
1185 		}
1186 		break;
1187 	case SIOCSIFMEDIA:
1188 		/* Check SOL/IDER usage */
1189 		EM_CORE_LOCK(adapter);
1190 		if (e1000_check_reset_block(&adapter->hw)) {
1191 			EM_CORE_UNLOCK(adapter);
1192 			device_printf(adapter->dev, "Media change is"
1193 			    " blocked due to SOL/IDER session.\n");
1194 			break;
1195 		}
1196 		EM_CORE_UNLOCK(adapter);
1197 		/* falls thru */
1198 	case SIOCGIFMEDIA:
1199 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1200 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1201 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1202 		break;
1203 	case SIOCSIFCAP:
1204 	    {
1205 		int mask, reinit;
1206 
1207 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1208 		reinit = 0;
1209 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1210 #ifdef DEVICE_POLLING
1211 		if (mask & IFCAP_POLLING) {
1212 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1213 				error = ether_poll_register(em_poll, ifp);
1214 				if (error)
1215 					return (error);
1216 				EM_CORE_LOCK(adapter);
1217 				em_disable_intr(adapter);
1218 				ifp->if_capenable |= IFCAP_POLLING;
1219 				EM_CORE_UNLOCK(adapter);
1220 			} else {
1221 				error = ether_poll_deregister(ifp);
1222 				/* Enable interrupt even in error case */
1223 				EM_CORE_LOCK(adapter);
1224 				em_enable_intr(adapter);
1225 				ifp->if_capenable &= ~IFCAP_POLLING;
1226 				EM_CORE_UNLOCK(adapter);
1227 			}
1228 		}
1229 #endif
1230 		if (mask & IFCAP_HWCSUM) {
1231 			ifp->if_capenable ^= IFCAP_HWCSUM;
1232 			reinit = 1;
1233 		}
1234 		if (mask & IFCAP_TSO4) {
1235 			ifp->if_capenable ^= IFCAP_TSO4;
1236 			reinit = 1;
1237 		}
1238 		if (mask & IFCAP_VLAN_HWTAGGING) {
1239 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1240 			reinit = 1;
1241 		}
1242 		if (mask & IFCAP_VLAN_HWFILTER) {
1243 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1244 			reinit = 1;
1245 		}
1246 		if (mask & IFCAP_VLAN_HWTSO) {
1247 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1248 			reinit = 1;
1249 		}
1250 		if ((mask & IFCAP_WOL) &&
1251 		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1252 			if (mask & IFCAP_WOL_MCAST)
1253 				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1254 			if (mask & IFCAP_WOL_MAGIC)
1255 				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1256 		}
1257 		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1258 			em_init(adapter);
1259 		VLAN_CAPABILITIES(ifp);
1260 		break;
1261 	    }
1262 
1263 	default:
1264 		error = ether_ioctl(ifp, command, data);
1265 		break;
1266 	}
1267 
1268 	return (error);
1269 }
1270 
1271 
1272 /*********************************************************************
1273  *  Init entry point
1274  *
1275  *  This routine is used in two ways. It is used by the stack as
1276  *  init entry point in network interface structure. It is also used
1277  *  by the driver as a hw/sw initialization routine to get to a
1278  *  consistent state.
1279  *
1280  *  return 0 on success, positive on failure
1281  **********************************************************************/
1282 
1283 static void
1284 em_init_locked(struct adapter *adapter)
1285 {
1286 	struct ifnet	*ifp = adapter->ifp;
1287 	device_t	dev = adapter->dev;
1288 
1289 	INIT_DEBUGOUT("em_init: begin");
1290 
1291 	EM_CORE_LOCK_ASSERT(adapter);
1292 
1293 	em_disable_intr(adapter);
1294 	callout_stop(&adapter->timer);
1295 
1296 	/* Get the latest mac address, User can use a LAA */
1297         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1298               ETHER_ADDR_LEN);
1299 
1300 	/* Put the address into the Receive Address Array */
1301 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1302 
1303 	/*
1304 	 * With the 82571 adapter, RAR[0] may be overwritten
1305 	 * when the other port is reset, we make a duplicate
1306 	 * in RAR[14] for that eventuality, this assures
1307 	 * the interface continues to function.
1308 	 */
1309 	if (adapter->hw.mac.type == e1000_82571) {
1310 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1311 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1312 		    E1000_RAR_ENTRIES - 1);
1313 	}
1314 
1315 	/* Initialize the hardware */
1316 	em_reset(adapter);
1317 	em_update_link_status(adapter);
1318 
1319 	/* Setup VLAN support, basic and offload if available */
1320 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1321 
1322 	/* Set hardware offload abilities */
1323 	ifp->if_hwassist = 0;
1324 	if (ifp->if_capenable & IFCAP_TXCSUM)
1325 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1326 	if (ifp->if_capenable & IFCAP_TSO4)
1327 		ifp->if_hwassist |= CSUM_TSO;
1328 
1329 	/* Configure for OS presence */
1330 	em_init_manageability(adapter);
1331 
1332 	/* Prepare transmit descriptors and buffers */
1333 	em_setup_transmit_structures(adapter);
1334 	em_initialize_transmit_unit(adapter);
1335 
1336 	/* Setup Multicast table */
1337 	em_set_multi(adapter);
1338 
1339 	/*
1340 	** Figure out the desired mbuf
1341 	** pool for doing jumbos
1342 	*/
1343 	if (adapter->hw.mac.max_frame_size <= 2048)
1344 		adapter->rx_mbuf_sz = MCLBYTES;
1345 	else if (adapter->hw.mac.max_frame_size <= 4096)
1346 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1347 	else
1348 		adapter->rx_mbuf_sz = MJUM9BYTES;
1349 
1350 	/* Prepare receive descriptors and buffers */
1351 	if (em_setup_receive_structures(adapter)) {
1352 		device_printf(dev, "Could not setup receive structures\n");
1353 		em_stop(adapter);
1354 		return;
1355 	}
1356 	em_initialize_receive_unit(adapter);
1357 
1358 	/* Use real VLAN Filter support? */
1359 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1360 		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1361 			/* Use real VLAN Filter support */
1362 			em_setup_vlan_hw_support(adapter);
1363 		else {
1364 			u32 ctrl;
1365 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1366 			ctrl |= E1000_CTRL_VME;
1367 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1368 		}
1369 	}
1370 
1371 	/* Don't lose promiscuous settings */
1372 	em_set_promisc(adapter);
1373 
1374 	/* Set the interface as ACTIVE */
1375 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1376 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1377 
1378 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1379 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1380 
1381 	/* MSI/X configuration for 82574 */
1382 	if (adapter->hw.mac.type == e1000_82574) {
1383 		int tmp;
1384 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1385 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1386 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1387 		/* Set the IVAR - interrupt vector routing. */
1388 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1389 	}
1390 
1391 #ifdef DEVICE_POLLING
1392 	/*
1393 	 * Only enable interrupts if we are not polling, make sure
1394 	 * they are off otherwise.
1395 	 */
1396 	if (ifp->if_capenable & IFCAP_POLLING)
1397 		em_disable_intr(adapter);
1398 	else
1399 #endif /* DEVICE_POLLING */
1400 		em_enable_intr(adapter);
1401 
1402 	/* AMT based hardware can now take control from firmware */
1403 	if (adapter->has_manage && adapter->has_amt)
1404 		em_get_hw_control(adapter);
1405 }
1406 
1407 static void
1408 em_init(void *arg)
1409 {
1410 	struct adapter *adapter = arg;
1411 
1412 	EM_CORE_LOCK(adapter);
1413 	em_init_locked(adapter);
1414 	EM_CORE_UNLOCK(adapter);
1415 }
1416 
1417 
1418 #ifdef DEVICE_POLLING
1419 /*********************************************************************
1420  *
1421  *  Legacy polling routine: note this only works with single queue
1422  *
1423  *********************************************************************/
1424 static int
1425 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1426 {
1427 	struct adapter *adapter = ifp->if_softc;
1428 	struct tx_ring	*txr = adapter->tx_rings;
1429 	struct rx_ring	*rxr = adapter->rx_rings;
1430 	u32		reg_icr;
1431 	int		rx_done;
1432 
1433 	EM_CORE_LOCK(adapter);
1434 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1435 		EM_CORE_UNLOCK(adapter);
1436 		return (0);
1437 	}
1438 
1439 	if (cmd == POLL_AND_CHECK_STATUS) {
1440 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1441 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1442 			callout_stop(&adapter->timer);
1443 			adapter->hw.mac.get_link_status = 1;
1444 			em_update_link_status(adapter);
1445 			callout_reset(&adapter->timer, hz,
1446 			    em_local_timer, adapter);
1447 		}
1448 	}
1449 	EM_CORE_UNLOCK(adapter);
1450 
1451 	em_rxeof(rxr, count, &rx_done);
1452 
1453 	EM_TX_LOCK(txr);
1454 	em_txeof(txr);
1455 #ifdef EM_MULTIQUEUE
1456 	if (!drbr_empty(ifp, txr->br))
1457 		em_mq_start_locked(ifp, txr, NULL);
1458 #else
1459 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1460 		em_start_locked(ifp, txr);
1461 #endif
1462 	EM_TX_UNLOCK(txr);
1463 
1464 	return (rx_done);
1465 }
1466 #endif /* DEVICE_POLLING */
1467 
1468 
1469 /*********************************************************************
1470  *
1471  *  Fast Legacy/MSI Combined Interrupt Service routine
1472  *
1473  *********************************************************************/
1474 static int
1475 em_irq_fast(void *arg)
1476 {
1477 	struct adapter	*adapter = arg;
1478 	struct ifnet	*ifp;
1479 	u32		reg_icr;
1480 
1481 	ifp = adapter->ifp;
1482 
1483 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1484 
1485 	/* Hot eject?  */
1486 	if (reg_icr == 0xffffffff)
1487 		return FILTER_STRAY;
1488 
1489 	/* Definitely not our interrupt.  */
1490 	if (reg_icr == 0x0)
1491 		return FILTER_STRAY;
1492 
1493 	/*
1494 	 * Starting with the 82571 chip, bit 31 should be used to
1495 	 * determine whether the interrupt belongs to us.
1496 	 */
1497 	if (adapter->hw.mac.type >= e1000_82571 &&
1498 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1499 		return FILTER_STRAY;
1500 
1501 	em_disable_intr(adapter);
1502 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1503 
1504 	/* Link status change */
1505 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1506 		adapter->hw.mac.get_link_status = 1;
1507 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1508 	}
1509 
1510 	if (reg_icr & E1000_ICR_RXO)
1511 		adapter->rx_overruns++;
1512 	return FILTER_HANDLED;
1513 }
1514 
1515 /* Combined RX/TX handler, used by Legacy and MSI */
1516 static void
1517 em_handle_que(void *context, int pending)
1518 {
1519 	struct adapter	*adapter = context;
1520 	struct ifnet	*ifp = adapter->ifp;
1521 	struct tx_ring	*txr = adapter->tx_rings;
1522 	struct rx_ring	*rxr = adapter->rx_rings;
1523 
1524 
1525 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1526 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1527 		EM_TX_LOCK(txr);
1528 		em_txeof(txr);
1529 #ifdef EM_MULTIQUEUE
1530 		if (!drbr_empty(ifp, txr->br))
1531 			em_mq_start_locked(ifp, txr, NULL);
1532 #else
1533 		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1534 			em_start_locked(ifp, txr);
1535 #endif
1536 		EM_TX_UNLOCK(txr);
1537 		if (more) {
1538 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1539 			return;
1540 		}
1541 	}
1542 
1543 	em_enable_intr(adapter);
1544 	return;
1545 }
1546 
1547 
1548 /*********************************************************************
1549  *
1550  *  MSIX Interrupt Service Routines
1551  *
1552  **********************************************************************/
1553 static void
1554 em_msix_tx(void *arg)
1555 {
1556 	struct tx_ring *txr = arg;
1557 	struct adapter *adapter = txr->adapter;
1558 	struct ifnet	*ifp = adapter->ifp;
1559 
1560 	++txr->tx_irq;
1561 	EM_TX_LOCK(txr);
1562 	em_txeof(txr);
1563 #ifdef EM_MULTIQUEUE
1564 	if (!drbr_empty(ifp, txr->br))
1565 		em_mq_start_locked(ifp, txr, NULL);
1566 #else
1567 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1568 		em_start_locked(ifp, txr);
1569 #endif
1570 	/* Reenable this interrupt */
1571 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1572 	EM_TX_UNLOCK(txr);
1573 	return;
1574 }
1575 
1576 /*********************************************************************
1577  *
1578  *  MSIX RX Interrupt Service routine
1579  *
1580  **********************************************************************/
1581 
1582 static void
1583 em_msix_rx(void *arg)
1584 {
1585 	struct rx_ring	*rxr = arg;
1586 	struct adapter	*adapter = rxr->adapter;
1587 	bool		more;
1588 
1589 	++rxr->rx_irq;
1590 	if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1591 		return;
1592 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1593 	if (more)
1594 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1595 	else
1596 		/* Reenable this interrupt */
1597 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1598 	return;
1599 }
1600 
1601 /*********************************************************************
1602  *
1603  *  MSIX Link Fast Interrupt Service routine
1604  *
1605  **********************************************************************/
1606 static void
1607 em_msix_link(void *arg)
1608 {
1609 	struct adapter	*adapter = arg;
1610 	u32		reg_icr;
1611 
1612 	++adapter->link_irq;
1613 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1614 
1615 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1616 		adapter->hw.mac.get_link_status = 1;
1617 		em_handle_link(adapter, 0);
1618 	} else
1619 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1620 		    EM_MSIX_LINK | E1000_IMS_LSC);
1621 	return;
1622 }
1623 
1624 static void
1625 em_handle_rx(void *context, int pending)
1626 {
1627 	struct rx_ring	*rxr = context;
1628 	struct adapter	*adapter = rxr->adapter;
1629         bool            more;
1630 
1631 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1632 	if (more)
1633 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1634 	else
1635 		/* Reenable this interrupt */
1636 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1637 }
1638 
1639 static void
1640 em_handle_tx(void *context, int pending)
1641 {
1642 	struct tx_ring	*txr = context;
1643 	struct adapter	*adapter = txr->adapter;
1644 	struct ifnet	*ifp = adapter->ifp;
1645 
1646 	EM_TX_LOCK(txr);
1647 	em_txeof(txr);
1648 #ifdef EM_MULTIQUEUE
1649 	if (!drbr_empty(ifp, txr->br))
1650 		em_mq_start_locked(ifp, txr, NULL);
1651 #else
1652 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1653 		em_start_locked(ifp, txr);
1654 #endif
1655 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1656 	EM_TX_UNLOCK(txr);
1657 }
1658 
1659 static void
1660 em_handle_link(void *context, int pending)
1661 {
1662 	struct adapter	*adapter = context;
1663 	struct tx_ring	*txr = adapter->tx_rings;
1664 	struct ifnet *ifp = adapter->ifp;
1665 
1666 	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1667 		return;
1668 
1669 	EM_CORE_LOCK(adapter);
1670 	callout_stop(&adapter->timer);
1671 	em_update_link_status(adapter);
1672 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1673 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1674 	    EM_MSIX_LINK | E1000_IMS_LSC);
1675 	if (adapter->link_active) {
1676 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1677 			EM_TX_LOCK(txr);
1678 #ifdef EM_MULTIQUEUE
1679 			if (!drbr_empty(ifp, txr->br))
1680 				em_mq_start_locked(ifp, txr, NULL);
1681 #else
1682 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1683 				em_start_locked(ifp, txr);
1684 #endif
1685 			EM_TX_UNLOCK(txr);
1686 		}
1687 	}
1688 	EM_CORE_UNLOCK(adapter);
1689 }
1690 
1691 
1692 /*********************************************************************
1693  *
1694  *  Media Ioctl callback
1695  *
1696  *  This routine is called whenever the user queries the status of
1697  *  the interface using ifconfig.
1698  *
1699  **********************************************************************/
1700 static void
1701 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1702 {
1703 	struct adapter *adapter = ifp->if_softc;
1704 	u_char fiber_type = IFM_1000_SX;
1705 
1706 	INIT_DEBUGOUT("em_media_status: begin");
1707 
1708 	EM_CORE_LOCK(adapter);
1709 	em_update_link_status(adapter);
1710 
1711 	ifmr->ifm_status = IFM_AVALID;
1712 	ifmr->ifm_active = IFM_ETHER;
1713 
1714 	if (!adapter->link_active) {
1715 		EM_CORE_UNLOCK(adapter);
1716 		return;
1717 	}
1718 
1719 	ifmr->ifm_status |= IFM_ACTIVE;
1720 
1721 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1722 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1723 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1724 	} else {
1725 		switch (adapter->link_speed) {
1726 		case 10:
1727 			ifmr->ifm_active |= IFM_10_T;
1728 			break;
1729 		case 100:
1730 			ifmr->ifm_active |= IFM_100_TX;
1731 			break;
1732 		case 1000:
1733 			ifmr->ifm_active |= IFM_1000_T;
1734 			break;
1735 		}
1736 		if (adapter->link_duplex == FULL_DUPLEX)
1737 			ifmr->ifm_active |= IFM_FDX;
1738 		else
1739 			ifmr->ifm_active |= IFM_HDX;
1740 	}
1741 	EM_CORE_UNLOCK(adapter);
1742 }
1743 
1744 /*********************************************************************
1745  *
1746  *  Media Ioctl callback
1747  *
1748  *  This routine is called when the user changes speed/duplex using
1749  *  media/mediopt option with ifconfig.
1750  *
1751  **********************************************************************/
1752 static int
1753 em_media_change(struct ifnet *ifp)
1754 {
1755 	struct adapter *adapter = ifp->if_softc;
1756 	struct ifmedia  *ifm = &adapter->media;
1757 
1758 	INIT_DEBUGOUT("em_media_change: begin");
1759 
1760 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1761 		return (EINVAL);
1762 
1763 	EM_CORE_LOCK(adapter);
1764 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1765 	case IFM_AUTO:
1766 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1767 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1768 		break;
1769 	case IFM_1000_LX:
1770 	case IFM_1000_SX:
1771 	case IFM_1000_T:
1772 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1773 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1774 		break;
1775 	case IFM_100_TX:
1776 		adapter->hw.mac.autoneg = FALSE;
1777 		adapter->hw.phy.autoneg_advertised = 0;
1778 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1779 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1780 		else
1781 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1782 		break;
1783 	case IFM_10_T:
1784 		adapter->hw.mac.autoneg = FALSE;
1785 		adapter->hw.phy.autoneg_advertised = 0;
1786 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1787 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1788 		else
1789 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1790 		break;
1791 	default:
1792 		device_printf(adapter->dev, "Unsupported media type\n");
1793 	}
1794 
1795 	em_init_locked(adapter);
1796 	EM_CORE_UNLOCK(adapter);
1797 
1798 	return (0);
1799 }
1800 
1801 /*********************************************************************
1802  *
1803  *  This routine maps the mbufs to tx descriptors.
1804  *
1805  *  return 0 on success, positive on failure
1806  **********************************************************************/
1807 
1808 static int
1809 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1810 {
1811 	struct adapter		*adapter = txr->adapter;
1812 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1813 	bus_dmamap_t		map;
1814 	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1815 	struct e1000_tx_desc	*ctxd = NULL;
1816 	struct mbuf		*m_head;
1817 	struct ether_header	*eh;
1818 	struct ip		*ip = NULL;
1819 	struct tcphdr		*tp = NULL;
1820 	u32			txd_upper, txd_lower, txd_used, txd_saved;
1821 	int			ip_off, poff;
1822 	int			nsegs, i, j, first, last = 0;
1823 	int			error, do_tso, tso_desc = 0, remap = 1;
1824 
1825 retry:
1826 	m_head = *m_headp;
1827 	txd_upper = txd_lower = txd_used = txd_saved = 0;
1828 	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1829 	ip_off = poff = 0;
1830 
1831 	/*
1832 	 * Intel recommends entire IP/TCP header length reside in a single
1833 	 * buffer. If multiple descriptors are used to describe the IP and
1834 	 * TCP header, each descriptor should describe one or more
1835 	 * complete headers; descriptors referencing only parts of headers
1836 	 * are not supported. If all layer headers are not coalesced into
1837 	 * a single buffer, each buffer should not cross a 4KB boundary,
1838 	 * or be larger than the maximum read request size.
1839 	 * Controller also requires modifing IP/TCP header to make TSO work
1840 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1841 	 * IP/TCP header into a single buffer to meet the requirement of
1842 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1843 	 * which also has similiar restrictions.
1844 	 */
1845 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1846 		if (do_tso || (m_head->m_next != NULL &&
1847 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1848 			if (M_WRITABLE(*m_headp) == 0) {
1849 				m_head = m_dup(*m_headp, M_NOWAIT);
1850 				m_freem(*m_headp);
1851 				if (m_head == NULL) {
1852 					*m_headp = NULL;
1853 					return (ENOBUFS);
1854 				}
1855 				*m_headp = m_head;
1856 			}
1857 		}
1858 		/*
1859 		 * XXX
1860 		 * Assume IPv4, we don't have TSO/checksum offload support
1861 		 * for IPv6 yet.
1862 		 */
1863 		ip_off = sizeof(struct ether_header);
1864 		m_head = m_pullup(m_head, ip_off);
1865 		if (m_head == NULL) {
1866 			*m_headp = NULL;
1867 			return (ENOBUFS);
1868 		}
1869 		eh = mtod(m_head, struct ether_header *);
1870 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1871 			ip_off = sizeof(struct ether_vlan_header);
1872 			m_head = m_pullup(m_head, ip_off);
1873 			if (m_head == NULL) {
1874 				*m_headp = NULL;
1875 				return (ENOBUFS);
1876 			}
1877 		}
1878 		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1879 		if (m_head == NULL) {
1880 			*m_headp = NULL;
1881 			return (ENOBUFS);
1882 		}
1883 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1884 		poff = ip_off + (ip->ip_hl << 2);
1885 		if (do_tso) {
1886 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1887 			if (m_head == NULL) {
1888 				*m_headp = NULL;
1889 				return (ENOBUFS);
1890 			}
1891 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1892 			/*
1893 			 * TSO workaround:
1894 			 *   pull 4 more bytes of data into it.
1895 			 */
1896 			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1897 			if (m_head == NULL) {
1898 				*m_headp = NULL;
1899 				return (ENOBUFS);
1900 			}
1901 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1902 			ip->ip_len = 0;
1903 			ip->ip_sum = 0;
1904 			/*
1905 			 * The pseudo TCP checksum does not include TCP payload
1906 			 * length so driver should recompute the checksum here
1907 			 * what hardware expect to see. This is adherence of
1908 			 * Microsoft's Large Send specification.
1909 			 */
1910 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1911 			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1912 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1913 		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1914 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1915 			if (m_head == NULL) {
1916 				*m_headp = NULL;
1917 				return (ENOBUFS);
1918 			}
1919 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1920 			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1921 			if (m_head == NULL) {
1922 				*m_headp = NULL;
1923 				return (ENOBUFS);
1924 			}
1925 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1926 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1927 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1928 			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1929 			if (m_head == NULL) {
1930 				*m_headp = NULL;
1931 				return (ENOBUFS);
1932 			}
1933 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1934 		}
1935 		*m_headp = m_head;
1936 	}
1937 
1938 	/*
1939 	 * Map the packet for DMA
1940 	 *
1941 	 * Capture the first descriptor index,
1942 	 * this descriptor will have the index
1943 	 * of the EOP which is the only one that
1944 	 * now gets a DONE bit writeback.
1945 	 */
1946 	first = txr->next_avail_desc;
1947 	tx_buffer = &txr->tx_buffers[first];
1948 	tx_buffer_mapped = tx_buffer;
1949 	map = tx_buffer->map;
1950 
1951 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1952 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1953 
1954 	/*
1955 	 * There are two types of errors we can (try) to handle:
1956 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1957 	 *   out of segments.  Defragment the mbuf chain and try again.
1958 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1959 	 *   at this point in time.  Defer sending and try again later.
1960 	 * All other errors, in particular EINVAL, are fatal and prevent the
1961 	 * mbuf chain from ever going through.  Drop it and report error.
1962 	 */
1963 	if (error == EFBIG && remap) {
1964 		struct mbuf *m;
1965 
1966 		m = m_defrag(*m_headp, M_NOWAIT);
1967 		if (m == NULL) {
1968 			adapter->mbuf_alloc_failed++;
1969 			m_freem(*m_headp);
1970 			*m_headp = NULL;
1971 			return (ENOBUFS);
1972 		}
1973 		*m_headp = m;
1974 
1975 		/* Try it again, but only once */
1976 		remap = 0;
1977 		goto retry;
1978 	} else if (error == ENOMEM) {
1979 		adapter->no_tx_dma_setup++;
1980 		return (error);
1981 	} else if (error != 0) {
1982 		adapter->no_tx_dma_setup++;
1983 		m_freem(*m_headp);
1984 		*m_headp = NULL;
1985 		return (error);
1986 	}
1987 
1988 	/*
1989 	 * TSO Hardware workaround, if this packet is not
1990 	 * TSO, and is only a single descriptor long, and
1991 	 * it follows a TSO burst, then we need to add a
1992 	 * sentinel descriptor to prevent premature writeback.
1993 	 */
1994 	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1995 		if (nsegs == 1)
1996 			tso_desc = TRUE;
1997 		txr->tx_tso = FALSE;
1998 	}
1999 
2000         if (nsegs > (txr->tx_avail - 2)) {
2001                 txr->no_desc_avail++;
2002 		bus_dmamap_unload(txr->txtag, map);
2003 		return (ENOBUFS);
2004         }
2005 	m_head = *m_headp;
2006 
2007 	/* Do hardware assists */
2008 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2009 		em_tso_setup(txr, m_head, ip_off, ip, tp,
2010 		    &txd_upper, &txd_lower);
2011 		/* we need to make a final sentinel transmit desc */
2012 		tso_desc = TRUE;
2013 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2014 		em_transmit_checksum_setup(txr, m_head,
2015 		    ip_off, ip, &txd_upper, &txd_lower);
2016 
2017 	if (m_head->m_flags & M_VLANTAG) {
2018 		/* Set the vlan id. */
2019 		txd_upper |=
2020 		    (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2021                 /* Tell hardware to add tag */
2022                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2023         }
2024 
2025 	i = txr->next_avail_desc;
2026 
2027 	/* Set up our transmit descriptors */
2028 	for (j = 0; j < nsegs; j++) {
2029 		bus_size_t seg_len;
2030 		bus_addr_t seg_addr;
2031 
2032 		tx_buffer = &txr->tx_buffers[i];
2033 		ctxd = &txr->tx_base[i];
2034 		seg_addr = segs[j].ds_addr;
2035 		seg_len  = segs[j].ds_len;
2036 		/*
2037 		** TSO Workaround:
2038 		** If this is the last descriptor, we want to
2039 		** split it so we have a small final sentinel
2040 		*/
2041 		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2042 			seg_len -= 4;
2043 			ctxd->buffer_addr = htole64(seg_addr);
2044 			ctxd->lower.data = htole32(
2045 			adapter->txd_cmd | txd_lower | seg_len);
2046 			ctxd->upper.data =
2047 			    htole32(txd_upper);
2048 			if (++i == adapter->num_tx_desc)
2049 				i = 0;
2050 			/* Now make the sentinel */
2051 			++txd_used; /* using an extra txd */
2052 			ctxd = &txr->tx_base[i];
2053 			tx_buffer = &txr->tx_buffers[i];
2054 			ctxd->buffer_addr =
2055 			    htole64(seg_addr + seg_len);
2056 			ctxd->lower.data = htole32(
2057 			adapter->txd_cmd | txd_lower | 4);
2058 			ctxd->upper.data =
2059 			    htole32(txd_upper);
2060 			last = i;
2061 			if (++i == adapter->num_tx_desc)
2062 				i = 0;
2063 		} else {
2064 			ctxd->buffer_addr = htole64(seg_addr);
2065 			ctxd->lower.data = htole32(
2066 			adapter->txd_cmd | txd_lower | seg_len);
2067 			ctxd->upper.data =
2068 			    htole32(txd_upper);
2069 			last = i;
2070 			if (++i == adapter->num_tx_desc)
2071 				i = 0;
2072 		}
2073 		tx_buffer->m_head = NULL;
2074 		tx_buffer->next_eop = -1;
2075 	}
2076 
2077 	txr->next_avail_desc = i;
2078 	txr->tx_avail -= nsegs;
2079 	if (tso_desc) /* TSO used an extra for sentinel */
2080 		txr->tx_avail -= txd_used;
2081 
2082         tx_buffer->m_head = m_head;
2083 	/*
2084 	** Here we swap the map so the last descriptor,
2085 	** which gets the completion interrupt has the
2086 	** real map, and the first descriptor gets the
2087 	** unused map from this descriptor.
2088 	*/
2089 	tx_buffer_mapped->map = tx_buffer->map;
2090 	tx_buffer->map = map;
2091         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2092 
2093         /*
2094          * Last Descriptor of Packet
2095 	 * needs End Of Packet (EOP)
2096 	 * and Report Status (RS)
2097          */
2098         ctxd->lower.data |=
2099 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2100 	/*
2101 	 * Keep track in the first buffer which
2102 	 * descriptor will be written back
2103 	 */
2104 	tx_buffer = &txr->tx_buffers[first];
2105 	tx_buffer->next_eop = last;
2106 	/* Update the watchdog time early and often */
2107 	txr->watchdog_time = ticks;
2108 
2109 	/*
2110 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2111 	 * that this frame is available to transmit.
2112 	 */
2113 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2114 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2115 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2116 
2117 	return (0);
2118 }
2119 
2120 static void
2121 em_set_promisc(struct adapter *adapter)
2122 {
2123 	struct ifnet	*ifp = adapter->ifp;
2124 	u32		reg_rctl;
2125 
2126 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2127 
2128 	if (ifp->if_flags & IFF_PROMISC) {
2129 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2130 		/* Turn this on if you want to see bad packets */
2131 		if (em_debug_sbp)
2132 			reg_rctl |= E1000_RCTL_SBP;
2133 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2134 	} else if (ifp->if_flags & IFF_ALLMULTI) {
2135 		reg_rctl |= E1000_RCTL_MPE;
2136 		reg_rctl &= ~E1000_RCTL_UPE;
2137 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2138 	}
2139 }
2140 
2141 static void
2142 em_disable_promisc(struct adapter *adapter)
2143 {
2144 	struct ifnet	*ifp = adapter->ifp;
2145 	u32		reg_rctl;
2146 	int		mcnt = 0;
2147 
2148 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2149 	reg_rctl &=  (~E1000_RCTL_UPE);
2150 	if (ifp->if_flags & IFF_ALLMULTI)
2151 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2152 	else {
2153 		struct  ifmultiaddr *ifma;
2154 #if __FreeBSD_version < 800000
2155 		IF_ADDR_LOCK(ifp);
2156 #else
2157 		if_maddr_rlock(ifp);
2158 #endif
2159 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2160 			if (ifma->ifma_addr->sa_family != AF_LINK)
2161 				continue;
2162 			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2163 				break;
2164 			mcnt++;
2165 		}
2166 #if __FreeBSD_version < 800000
2167 		IF_ADDR_UNLOCK(ifp);
2168 #else
2169 		if_maddr_runlock(ifp);
2170 #endif
2171 	}
2172 	/* Don't disable if in MAX groups */
2173 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2174 		reg_rctl &=  (~E1000_RCTL_MPE);
2175 	reg_rctl &=  (~E1000_RCTL_SBP);
2176 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2177 }
2178 
2179 
2180 /*********************************************************************
2181  *  Multicast Update
2182  *
2183  *  This routine is called whenever multicast address list is updated.
2184  *
2185  **********************************************************************/
2186 
2187 static void
2188 em_set_multi(struct adapter *adapter)
2189 {
2190 	struct ifnet	*ifp = adapter->ifp;
2191 	struct ifmultiaddr *ifma;
2192 	u32 reg_rctl = 0;
2193 	u8  *mta; /* Multicast array memory */
2194 	int mcnt = 0;
2195 
2196 	IOCTL_DEBUGOUT("em_set_multi: begin");
2197 
2198 	mta = adapter->mta;
2199 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2200 
2201 	if (adapter->hw.mac.type == e1000_82542 &&
2202 	    adapter->hw.revision_id == E1000_REVISION_2) {
2203 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2204 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2205 			e1000_pci_clear_mwi(&adapter->hw);
2206 		reg_rctl |= E1000_RCTL_RST;
2207 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2208 		msec_delay(5);
2209 	}
2210 
2211 #if __FreeBSD_version < 800000
2212 	IF_ADDR_LOCK(ifp);
2213 #else
2214 	if_maddr_rlock(ifp);
2215 #endif
2216 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2217 		if (ifma->ifma_addr->sa_family != AF_LINK)
2218 			continue;
2219 
2220 		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2221 			break;
2222 
2223 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2224 		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2225 		mcnt++;
2226 	}
2227 #if __FreeBSD_version < 800000
2228 	IF_ADDR_UNLOCK(ifp);
2229 #else
2230 	if_maddr_runlock(ifp);
2231 #endif
2232 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2233 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2234 		reg_rctl |= E1000_RCTL_MPE;
2235 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2236 	} else
2237 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2238 
2239 	if (adapter->hw.mac.type == e1000_82542 &&
2240 	    adapter->hw.revision_id == E1000_REVISION_2) {
2241 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2242 		reg_rctl &= ~E1000_RCTL_RST;
2243 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2244 		msec_delay(5);
2245 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2246 			e1000_pci_set_mwi(&adapter->hw);
2247 	}
2248 }
2249 
2250 
2251 /*********************************************************************
2252  *  Timer routine
2253  *
2254  *  This routine checks for link status and updates statistics.
2255  *
2256  **********************************************************************/
2257 
2258 static void
2259 em_local_timer(void *arg)
2260 {
2261 	struct adapter	*adapter = arg;
2262 	struct ifnet	*ifp = adapter->ifp;
2263 	struct tx_ring	*txr = adapter->tx_rings;
2264 	struct rx_ring	*rxr = adapter->rx_rings;
2265 	u32		trigger;
2266 
2267 	EM_CORE_LOCK_ASSERT(adapter);
2268 
2269 	em_update_link_status(adapter);
2270 	em_update_stats_counters(adapter);
2271 
2272 	/* Reset LAA into RAR[0] on 82571 */
2273 	if ((adapter->hw.mac.type == e1000_82571) &&
2274 	    e1000_get_laa_state_82571(&adapter->hw))
2275 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2276 
2277 	/* Mask to use in the irq trigger */
2278 	if (adapter->msix_mem)
2279 		trigger = rxr->ims; /* RX for 82574 */
2280 	else
2281 		trigger = E1000_ICS_RXDMT0;
2282 
2283 	/*
2284 	** Check on the state of the TX queue(s), this
2285 	** can be done without the lock because its RO
2286 	** and the HUNG state will be static if set.
2287 	*/
2288 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2289 		if ((txr->queue_status == EM_QUEUE_HUNG) &&
2290 		    (adapter->pause_frames == 0))
2291 			goto hung;
2292 		/* Schedule a TX tasklet if needed */
2293 		if (txr->tx_avail <= EM_MAX_SCATTER)
2294 			taskqueue_enqueue(txr->tq, &txr->tx_task);
2295 	}
2296 
2297 	adapter->pause_frames = 0;
2298 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2299 #ifndef DEVICE_POLLING
2300 	/* Trigger an RX interrupt to guarantee mbuf refresh */
2301 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2302 #endif
2303 	return;
2304 hung:
2305 	/* Looks like we're hung */
2306 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2307 	device_printf(adapter->dev,
2308 	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2309 	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2310 	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2311 	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2312 	    "Next TX to Clean = %d\n",
2313 	    txr->me, txr->tx_avail, txr->next_to_clean);
2314 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2315 	adapter->watchdog_events++;
2316 	adapter->pause_frames = 0;
2317 	em_init_locked(adapter);
2318 }
2319 
2320 
2321 static void
2322 em_update_link_status(struct adapter *adapter)
2323 {
2324 	struct e1000_hw *hw = &adapter->hw;
2325 	struct ifnet *ifp = adapter->ifp;
2326 	device_t dev = adapter->dev;
2327 	struct tx_ring *txr = adapter->tx_rings;
2328 	u32 link_check = 0;
2329 
2330 	/* Get the cached link value or read phy for real */
2331 	switch (hw->phy.media_type) {
2332 	case e1000_media_type_copper:
2333 		if (hw->mac.get_link_status) {
2334 			/* Do the work to read phy */
2335 			e1000_check_for_link(hw);
2336 			link_check = !hw->mac.get_link_status;
2337 			if (link_check) /* ESB2 fix */
2338 				e1000_cfg_on_link_up(hw);
2339 		} else
2340 			link_check = TRUE;
2341 		break;
2342 	case e1000_media_type_fiber:
2343 		e1000_check_for_link(hw);
2344 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2345                                  E1000_STATUS_LU);
2346 		break;
2347 	case e1000_media_type_internal_serdes:
2348 		e1000_check_for_link(hw);
2349 		link_check = adapter->hw.mac.serdes_has_link;
2350 		break;
2351 	default:
2352 	case e1000_media_type_unknown:
2353 		break;
2354 	}
2355 
2356 	/* Now check for a transition */
2357 	if (link_check && (adapter->link_active == 0)) {
2358 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2359 		    &adapter->link_duplex);
2360 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2361 		if ((adapter->link_speed != SPEED_1000) &&
2362 		    ((hw->mac.type == e1000_82571) ||
2363 		    (hw->mac.type == e1000_82572))) {
2364 			int tarc0;
2365 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2366 			tarc0 &= ~SPEED_MODE_BIT;
2367 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2368 		}
2369 		if (bootverbose)
2370 			device_printf(dev, "Link is up %d Mbps %s\n",
2371 			    adapter->link_speed,
2372 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2373 			    "Full Duplex" : "Half Duplex"));
2374 		adapter->link_active = 1;
2375 		adapter->smartspeed = 0;
2376 		ifp->if_baudrate = adapter->link_speed * 1000000;
2377 		if_link_state_change(ifp, LINK_STATE_UP);
2378 	} else if (!link_check && (adapter->link_active == 1)) {
2379 		ifp->if_baudrate = adapter->link_speed = 0;
2380 		adapter->link_duplex = 0;
2381 		if (bootverbose)
2382 			device_printf(dev, "Link is Down\n");
2383 		adapter->link_active = 0;
2384 		/* Link down, disable watchdog */
2385 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2386 			txr->queue_status = EM_QUEUE_IDLE;
2387 		if_link_state_change(ifp, LINK_STATE_DOWN);
2388 	}
2389 }
2390 
2391 /*********************************************************************
2392  *
2393  *  This routine disables all traffic on the adapter by issuing a
2394  *  global reset on the MAC and deallocates TX/RX buffers.
2395  *
2396  *  This routine should always be called with BOTH the CORE
2397  *  and TX locks.
2398  **********************************************************************/
2399 
2400 static void
2401 em_stop(void *arg)
2402 {
2403 	struct adapter	*adapter = arg;
2404 	struct ifnet	*ifp = adapter->ifp;
2405 	struct tx_ring	*txr = adapter->tx_rings;
2406 
2407 	EM_CORE_LOCK_ASSERT(adapter);
2408 
2409 	INIT_DEBUGOUT("em_stop: begin");
2410 
2411 	em_disable_intr(adapter);
2412 	callout_stop(&adapter->timer);
2413 
2414 	/* Tell the stack that the interface is no longer active */
2415 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2416 	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2417 
2418         /* Unarm watchdog timer. */
2419 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2420 		EM_TX_LOCK(txr);
2421 		txr->queue_status = EM_QUEUE_IDLE;
2422 		EM_TX_UNLOCK(txr);
2423 	}
2424 
2425 	e1000_reset_hw(&adapter->hw);
2426 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2427 
2428 	e1000_led_off(&adapter->hw);
2429 	e1000_cleanup_led(&adapter->hw);
2430 }
2431 
2432 
2433 /*********************************************************************
2434  *
2435  *  Determine hardware revision.
2436  *
2437  **********************************************************************/
2438 static void
2439 em_identify_hardware(struct adapter *adapter)
2440 {
2441 	device_t dev = adapter->dev;
2442 
2443 	/* Make sure our PCI config space has the necessary stuff set */
2444 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2445 	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2446 	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2447 		device_printf(dev, "Memory Access and/or Bus Master bits "
2448 		    "were not set!\n");
2449 		adapter->hw.bus.pci_cmd_word |=
2450 		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2451 		pci_write_config(dev, PCIR_COMMAND,
2452 		    adapter->hw.bus.pci_cmd_word, 2);
2453 	}
2454 
2455 	/* Save off the information about this board */
2456 	adapter->hw.vendor_id = pci_get_vendor(dev);
2457 	adapter->hw.device_id = pci_get_device(dev);
2458 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2459 	adapter->hw.subsystem_vendor_id =
2460 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2461 	adapter->hw.subsystem_device_id =
2462 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2463 
2464 	/* Do Shared Code Init and Setup */
2465 	if (e1000_set_mac_type(&adapter->hw)) {
2466 		device_printf(dev, "Setup init failure\n");
2467 		return;
2468 	}
2469 }
2470 
2471 static int
2472 em_allocate_pci_resources(struct adapter *adapter)
2473 {
2474 	device_t	dev = adapter->dev;
2475 	int		rid;
2476 
2477 	rid = PCIR_BAR(0);
2478 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2479 	    &rid, RF_ACTIVE);
2480 	if (adapter->memory == NULL) {
2481 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2482 		return (ENXIO);
2483 	}
2484 	adapter->osdep.mem_bus_space_tag =
2485 	    rman_get_bustag(adapter->memory);
2486 	adapter->osdep.mem_bus_space_handle =
2487 	    rman_get_bushandle(adapter->memory);
2488 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2489 
2490 	/* Default to a single queue */
2491 	adapter->num_queues = 1;
2492 
2493 	/*
2494 	 * Setup MSI/X or MSI if PCI Express
2495 	 */
2496 	adapter->msix = em_setup_msix(adapter);
2497 
2498 	adapter->hw.back = &adapter->osdep;
2499 
2500 	return (0);
2501 }
2502 
2503 /*********************************************************************
2504  *
2505  *  Setup the Legacy or MSI Interrupt handler
2506  *
2507  **********************************************************************/
2508 int
2509 em_allocate_legacy(struct adapter *adapter)
2510 {
2511 	device_t dev = adapter->dev;
2512 	struct tx_ring	*txr = adapter->tx_rings;
2513 	int error, rid = 0;
2514 
2515 	/* Manually turn off all interrupts */
2516 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2517 
2518 	if (adapter->msix == 1) /* using MSI */
2519 		rid = 1;
2520 	/* We allocate a single interrupt resource */
2521 	adapter->res = bus_alloc_resource_any(dev,
2522 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2523 	if (adapter->res == NULL) {
2524 		device_printf(dev, "Unable to allocate bus resource: "
2525 		    "interrupt\n");
2526 		return (ENXIO);
2527 	}
2528 
2529 	/*
2530 	 * Allocate a fast interrupt and the associated
2531 	 * deferred processing contexts.
2532 	 */
2533 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2534 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2535 	    taskqueue_thread_enqueue, &adapter->tq);
2536 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2537 	    device_get_nameunit(adapter->dev));
2538 	/* Use a TX only tasklet for local timer */
2539 	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2540 	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2541 	    taskqueue_thread_enqueue, &txr->tq);
2542 	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2543 	    device_get_nameunit(adapter->dev));
2544 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2545 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2546 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2547 		device_printf(dev, "Failed to register fast interrupt "
2548 			    "handler: %d\n", error);
2549 		taskqueue_free(adapter->tq);
2550 		adapter->tq = NULL;
2551 		return (error);
2552 	}
2553 
2554 	return (0);
2555 }
2556 
2557 /*********************************************************************
2558  *
2559  *  Setup the MSIX Interrupt handlers
2560  *   This is not really Multiqueue, rather
2561  *   its just seperate interrupt vectors
2562  *   for TX, RX, and Link.
2563  *
2564  **********************************************************************/
2565 int
2566 em_allocate_msix(struct adapter *adapter)
2567 {
2568 	device_t	dev = adapter->dev;
2569 	struct		tx_ring *txr = adapter->tx_rings;
2570 	struct		rx_ring *rxr = adapter->rx_rings;
2571 	int		error, rid, vector = 0;
2572 
2573 
2574 	/* Make sure all interrupts are disabled */
2575 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2576 
2577 	/* First set up ring resources */
2578 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2579 
2580 		/* RX ring */
2581 		rid = vector + 1;
2582 
2583 		rxr->res = bus_alloc_resource_any(dev,
2584 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2585 		if (rxr->res == NULL) {
2586 			device_printf(dev,
2587 			    "Unable to allocate bus resource: "
2588 			    "RX MSIX Interrupt %d\n", i);
2589 			return (ENXIO);
2590 		}
2591 		if ((error = bus_setup_intr(dev, rxr->res,
2592 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2593 		    rxr, &rxr->tag)) != 0) {
2594 			device_printf(dev, "Failed to register RX handler");
2595 			return (error);
2596 		}
2597 #if __FreeBSD_version >= 800504
2598 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2599 #endif
2600 		rxr->msix = vector++; /* NOTE increment vector for TX */
2601 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2602 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2603 		    taskqueue_thread_enqueue, &rxr->tq);
2604 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2605 		    device_get_nameunit(adapter->dev));
2606 		/*
2607 		** Set the bit to enable interrupt
2608 		** in E1000_IMS -- bits 20 and 21
2609 		** are for RX0 and RX1, note this has
2610 		** NOTHING to do with the MSIX vector
2611 		*/
2612 		rxr->ims = 1 << (20 + i);
2613 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2614 
2615 		/* TX ring */
2616 		rid = vector + 1;
2617 		txr->res = bus_alloc_resource_any(dev,
2618 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2619 		if (txr->res == NULL) {
2620 			device_printf(dev,
2621 			    "Unable to allocate bus resource: "
2622 			    "TX MSIX Interrupt %d\n", i);
2623 			return (ENXIO);
2624 		}
2625 		if ((error = bus_setup_intr(dev, txr->res,
2626 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2627 		    txr, &txr->tag)) != 0) {
2628 			device_printf(dev, "Failed to register TX handler");
2629 			return (error);
2630 		}
2631 #if __FreeBSD_version >= 800504
2632 		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2633 #endif
2634 		txr->msix = vector++; /* Increment vector for next pass */
2635 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2636 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2637 		    taskqueue_thread_enqueue, &txr->tq);
2638 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2639 		    device_get_nameunit(adapter->dev));
2640 		/*
2641 		** Set the bit to enable interrupt
2642 		** in E1000_IMS -- bits 22 and 23
2643 		** are for TX0 and TX1, note this has
2644 		** NOTHING to do with the MSIX vector
2645 		*/
2646 		txr->ims = 1 << (22 + i);
2647 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2648 	}
2649 
2650 	/* Link interrupt */
2651 	++rid;
2652 	adapter->res = bus_alloc_resource_any(dev,
2653 	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2654 	if (!adapter->res) {
2655 		device_printf(dev,"Unable to allocate "
2656 		    "bus resource: Link interrupt [%d]\n", rid);
2657 		return (ENXIO);
2658         }
2659 	/* Set the link handler function */
2660 	error = bus_setup_intr(dev, adapter->res,
2661 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2662 	    em_msix_link, adapter, &adapter->tag);
2663 	if (error) {
2664 		adapter->res = NULL;
2665 		device_printf(dev, "Failed to register LINK handler");
2666 		return (error);
2667 	}
2668 #if __FreeBSD_version >= 800504
2669 		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2670 #endif
2671 	adapter->linkvec = vector;
2672 	adapter->ivars |=  (8 | vector) << 16;
2673 	adapter->ivars |= 0x80000000;
2674 
2675 	return (0);
2676 }
2677 
2678 
2679 static void
2680 em_free_pci_resources(struct adapter *adapter)
2681 {
2682 	device_t	dev = adapter->dev;
2683 	struct tx_ring	*txr;
2684 	struct rx_ring	*rxr;
2685 	int		rid;
2686 
2687 
2688 	/*
2689 	** Release all the queue interrupt resources:
2690 	*/
2691 	for (int i = 0; i < adapter->num_queues; i++) {
2692 		txr = &adapter->tx_rings[i];
2693 		rxr = &adapter->rx_rings[i];
2694 		/* an early abort? */
2695 		if ((txr == NULL) || (rxr == NULL))
2696 			break;
2697 		rid = txr->msix +1;
2698 		if (txr->tag != NULL) {
2699 			bus_teardown_intr(dev, txr->res, txr->tag);
2700 			txr->tag = NULL;
2701 		}
2702 		if (txr->res != NULL)
2703 			bus_release_resource(dev, SYS_RES_IRQ,
2704 			    rid, txr->res);
2705 		rid = rxr->msix +1;
2706 		if (rxr->tag != NULL) {
2707 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2708 			rxr->tag = NULL;
2709 		}
2710 		if (rxr->res != NULL)
2711 			bus_release_resource(dev, SYS_RES_IRQ,
2712 			    rid, rxr->res);
2713 	}
2714 
2715         if (adapter->linkvec) /* we are doing MSIX */
2716                 rid = adapter->linkvec + 1;
2717         else
2718                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2719 
2720 	if (adapter->tag != NULL) {
2721 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2722 		adapter->tag = NULL;
2723 	}
2724 
2725 	if (adapter->res != NULL)
2726 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2727 
2728 
2729 	if (adapter->msix)
2730 		pci_release_msi(dev);
2731 
2732 	if (adapter->msix_mem != NULL)
2733 		bus_release_resource(dev, SYS_RES_MEMORY,
2734 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2735 
2736 	if (adapter->memory != NULL)
2737 		bus_release_resource(dev, SYS_RES_MEMORY,
2738 		    PCIR_BAR(0), adapter->memory);
2739 
2740 	if (adapter->flash != NULL)
2741 		bus_release_resource(dev, SYS_RES_MEMORY,
2742 		    EM_FLASH, adapter->flash);
2743 }
2744 
2745 /*
2746  * Setup MSI or MSI/X
2747  */
2748 static int
2749 em_setup_msix(struct adapter *adapter)
2750 {
2751 	device_t dev = adapter->dev;
2752 	int val = 0;
2753 
2754 	/*
2755 	** Setup MSI/X for Hartwell: tests have shown
2756 	** use of two queues to be unstable, and to
2757 	** provide no great gain anyway, so we simply
2758 	** seperate the interrupts and use a single queue.
2759 	*/
2760 	if ((adapter->hw.mac.type == e1000_82574) &&
2761 	    (em_enable_msix == TRUE)) {
2762 		/* Map the MSIX BAR */
2763 		int rid = PCIR_BAR(EM_MSIX_BAR);
2764 		adapter->msix_mem = bus_alloc_resource_any(dev,
2765 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2766        		if (!adapter->msix_mem) {
2767 			/* May not be enabled */
2768                		device_printf(adapter->dev,
2769 			    "Unable to map MSIX table \n");
2770 			goto msi;
2771        		}
2772 		val = pci_msix_count(dev);
2773 		/* We only need 3 vectors */
2774 		if (val > 3)
2775 			val = 3;
2776 		if ((val != 3) && (val != 5)) {
2777 			bus_release_resource(dev, SYS_RES_MEMORY,
2778 			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2779 			adapter->msix_mem = NULL;
2780                		device_printf(adapter->dev,
2781 			    "MSIX: incorrect vectors, using MSI\n");
2782 			goto msi;
2783 		}
2784 
2785 		if (pci_alloc_msix(dev, &val) == 0) {
2786 			device_printf(adapter->dev,
2787 			    "Using MSIX interrupts "
2788 			    "with %d vectors\n", val);
2789 		}
2790 
2791 		return (val);
2792 	}
2793 msi:
2794        	val = pci_msi_count(dev);
2795        	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2796                	adapter->msix = 1;
2797                	device_printf(adapter->dev,"Using an MSI interrupt\n");
2798 		return (val);
2799 	}
2800 	/* Should only happen due to manual configuration */
2801 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2802 	return (0);
2803 }
2804 
2805 
2806 /*********************************************************************
2807  *
2808  *  Initialize the hardware to a configuration
2809  *  as specified by the adapter structure.
2810  *
2811  **********************************************************************/
2812 static void
2813 em_reset(struct adapter *adapter)
2814 {
2815 	device_t	dev = adapter->dev;
2816 	struct ifnet	*ifp = adapter->ifp;
2817 	struct e1000_hw	*hw = &adapter->hw;
2818 	u16		rx_buffer_size;
2819 	u32		pba;
2820 
2821 	INIT_DEBUGOUT("em_reset: begin");
2822 
2823 	/* Set up smart power down as default off on newer adapters. */
2824 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2825 	    hw->mac.type == e1000_82572)) {
2826 		u16 phy_tmp = 0;
2827 
2828 		/* Speed up time to link by disabling smart power down. */
2829 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2830 		phy_tmp &= ~IGP02E1000_PM_SPD;
2831 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2832 	}
2833 
2834 	/*
2835 	 * Packet Buffer Allocation (PBA)
2836 	 * Writing PBA sets the receive portion of the buffer
2837 	 * the remainder is used for the transmit buffer.
2838 	 */
2839 	switch (hw->mac.type) {
2840 	/* Total Packet Buffer on these is 48K */
2841 	case e1000_82571:
2842 	case e1000_82572:
2843 	case e1000_80003es2lan:
2844 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2845 		break;
2846 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2847 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2848 		break;
2849 	case e1000_82574:
2850 	case e1000_82583:
2851 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2852 		break;
2853 	case e1000_ich8lan:
2854 		pba = E1000_PBA_8K;
2855 		break;
2856 	case e1000_ich9lan:
2857 	case e1000_ich10lan:
2858 		/* Boost Receive side for jumbo frames */
2859 		if (adapter->hw.mac.max_frame_size > 4096)
2860 			pba = E1000_PBA_14K;
2861 		else
2862 			pba = E1000_PBA_10K;
2863 		break;
2864 	case e1000_pchlan:
2865 	case e1000_pch2lan:
2866 	case e1000_pch_lpt:
2867 		pba = E1000_PBA_26K;
2868 		break;
2869 	default:
2870 		if (adapter->hw.mac.max_frame_size > 8192)
2871 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2872 		else
2873 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2874 	}
2875 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2876 
2877 	/*
2878 	 * These parameters control the automatic generation (Tx) and
2879 	 * response (Rx) to Ethernet PAUSE frames.
2880 	 * - High water mark should allow for at least two frames to be
2881 	 *   received after sending an XOFF.
2882 	 * - Low water mark works best when it is very near the high water mark.
2883 	 *   This allows the receiver to restart by sending XON when it has
2884 	 *   drained a bit. Here we use an arbitary value of 1500 which will
2885 	 *   restart after one full frame is pulled from the buffer. There
2886 	 *   could be several smaller frames in the buffer and if so they will
2887 	 *   not trigger the XON until their total number reduces the buffer
2888 	 *   by 1500.
2889 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2890 	 */
2891 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2892 	hw->fc.high_water = rx_buffer_size -
2893 	    roundup2(adapter->hw.mac.max_frame_size, 1024);
2894 	hw->fc.low_water = hw->fc.high_water - 1500;
2895 
2896 	if (adapter->fc) /* locally set flow control value? */
2897 		hw->fc.requested_mode = adapter->fc;
2898 	else
2899 		hw->fc.requested_mode = e1000_fc_full;
2900 
2901 	if (hw->mac.type == e1000_80003es2lan)
2902 		hw->fc.pause_time = 0xFFFF;
2903 	else
2904 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2905 
2906 	hw->fc.send_xon = TRUE;
2907 
2908 	/* Device specific overrides/settings */
2909 	switch (hw->mac.type) {
2910 	case e1000_pchlan:
2911 		/* Workaround: no TX flow ctrl for PCH */
2912                 hw->fc.requested_mode = e1000_fc_rx_pause;
2913 		hw->fc.pause_time = 0xFFFF; /* override */
2914 		if (ifp->if_mtu > ETHERMTU) {
2915 			hw->fc.high_water = 0x3500;
2916 			hw->fc.low_water = 0x1500;
2917 		} else {
2918 			hw->fc.high_water = 0x5000;
2919 			hw->fc.low_water = 0x3000;
2920 		}
2921 		hw->fc.refresh_time = 0x1000;
2922 		break;
2923 	case e1000_pch2lan:
2924 	case e1000_pch_lpt:
2925 		hw->fc.high_water = 0x5C20;
2926 		hw->fc.low_water = 0x5048;
2927 		hw->fc.pause_time = 0x0650;
2928 		hw->fc.refresh_time = 0x0400;
2929 		/* Jumbos need adjusted PBA */
2930 		if (ifp->if_mtu > ETHERMTU)
2931 			E1000_WRITE_REG(hw, E1000_PBA, 12);
2932 		else
2933 			E1000_WRITE_REG(hw, E1000_PBA, 26);
2934 		break;
2935         case e1000_ich9lan:
2936         case e1000_ich10lan:
2937 		if (ifp->if_mtu > ETHERMTU) {
2938 			hw->fc.high_water = 0x2800;
2939 			hw->fc.low_water = hw->fc.high_water - 8;
2940 			break;
2941 		}
2942 		/* else fall thru */
2943 	default:
2944 		if (hw->mac.type == e1000_80003es2lan)
2945 			hw->fc.pause_time = 0xFFFF;
2946 		break;
2947 	}
2948 
2949 	/* Issue a global reset */
2950 	e1000_reset_hw(hw);
2951 	E1000_WRITE_REG(hw, E1000_WUC, 0);
2952 	em_disable_aspm(adapter);
2953 	/* and a re-init */
2954 	if (e1000_init_hw(hw) < 0) {
2955 		device_printf(dev, "Hardware Initialization Failed\n");
2956 		return;
2957 	}
2958 
2959 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2960 	e1000_get_phy_info(hw);
2961 	e1000_check_for_link(hw);
2962 	return;
2963 }
2964 
2965 /*********************************************************************
2966  *
2967  *  Setup networking device structure and register an interface.
2968  *
2969  **********************************************************************/
2970 static int
2971 em_setup_interface(device_t dev, struct adapter *adapter)
2972 {
2973 	struct ifnet   *ifp;
2974 
2975 	INIT_DEBUGOUT("em_setup_interface: begin");
2976 
2977 	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2978 	if (ifp == NULL) {
2979 		device_printf(dev, "can not allocate ifnet structure\n");
2980 		return (-1);
2981 	}
2982 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2983 	ifp->if_init =  em_init;
2984 	ifp->if_softc = adapter;
2985 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2986 	ifp->if_ioctl = em_ioctl;
2987 #ifdef EM_MULTIQUEUE
2988 	/* Multiqueue stack interface */
2989 	ifp->if_transmit = em_mq_start;
2990 	ifp->if_qflush = em_qflush;
2991 #else
2992 	ifp->if_start = em_start;
2993 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2994 	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2995 	IFQ_SET_READY(&ifp->if_snd);
2996 #endif
2997 
2998 	ether_ifattach(ifp, adapter->hw.mac.addr);
2999 
3000 	ifp->if_capabilities = ifp->if_capenable = 0;
3001 
3002 
3003 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3004 	ifp->if_capabilities |= IFCAP_TSO4;
3005 	/*
3006 	 * Tell the upper layer(s) we
3007 	 * support full VLAN capability
3008 	 */
3009 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3010 	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3011 			     |  IFCAP_VLAN_HWTSO
3012 			     |  IFCAP_VLAN_MTU;
3013 	ifp->if_capenable = ifp->if_capabilities;
3014 
3015 	/*
3016 	** Don't turn this on by default, if vlans are
3017 	** created on another pseudo device (eg. lagg)
3018 	** then vlan events are not passed thru, breaking
3019 	** operation, but with HW FILTER off it works. If
3020 	** using vlans directly on the em driver you can
3021 	** enable this and get full hardware tag filtering.
3022 	*/
3023 	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3024 
3025 #ifdef DEVICE_POLLING
3026 	ifp->if_capabilities |= IFCAP_POLLING;
3027 #endif
3028 
3029 	/* Enable only WOL MAGIC by default */
3030 	if (adapter->wol) {
3031 		ifp->if_capabilities |= IFCAP_WOL;
3032 		ifp->if_capenable |= IFCAP_WOL_MAGIC;
3033 	}
3034 
3035 	/*
3036 	 * Specify the media types supported by this adapter and register
3037 	 * callbacks to update media and link information
3038 	 */
3039 	ifmedia_init(&adapter->media, IFM_IMASK,
3040 	    em_media_change, em_media_status);
3041 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3042 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3043 		u_char fiber_type = IFM_1000_SX;	/* default type */
3044 
3045 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3046 			    0, NULL);
3047 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3048 	} else {
3049 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3050 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3051 			    0, NULL);
3052 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3053 			    0, NULL);
3054 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3055 			    0, NULL);
3056 		if (adapter->hw.phy.type != e1000_phy_ife) {
3057 			ifmedia_add(&adapter->media,
3058 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3059 			ifmedia_add(&adapter->media,
3060 				IFM_ETHER | IFM_1000_T, 0, NULL);
3061 		}
3062 	}
3063 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3064 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3065 	return (0);
3066 }
3067 
3068 
3069 /*
3070  * Manage DMA'able memory.
3071  */
3072 static void
3073 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3074 {
3075 	if (error)
3076 		return;
3077 	*(bus_addr_t *) arg = segs[0].ds_addr;
3078 }
3079 
3080 static int
3081 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3082         struct em_dma_alloc *dma, int mapflags)
3083 {
3084 	int error;
3085 
3086 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3087 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3088 				BUS_SPACE_MAXADDR,	/* lowaddr */
3089 				BUS_SPACE_MAXADDR,	/* highaddr */
3090 				NULL, NULL,		/* filter, filterarg */
3091 				size,			/* maxsize */
3092 				1,			/* nsegments */
3093 				size,			/* maxsegsize */
3094 				0,			/* flags */
3095 				NULL,			/* lockfunc */
3096 				NULL,			/* lockarg */
3097 				&dma->dma_tag);
3098 	if (error) {
3099 		device_printf(adapter->dev,
3100 		    "%s: bus_dma_tag_create failed: %d\n",
3101 		    __func__, error);
3102 		goto fail_0;
3103 	}
3104 
3105 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3106 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3107 	if (error) {
3108 		device_printf(adapter->dev,
3109 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3110 		    __func__, (uintmax_t)size, error);
3111 		goto fail_2;
3112 	}
3113 
3114 	dma->dma_paddr = 0;
3115 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3116 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3117 	if (error || dma->dma_paddr == 0) {
3118 		device_printf(adapter->dev,
3119 		    "%s: bus_dmamap_load failed: %d\n",
3120 		    __func__, error);
3121 		goto fail_3;
3122 	}
3123 
3124 	return (0);
3125 
3126 fail_3:
3127 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3128 fail_2:
3129 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3130 	bus_dma_tag_destroy(dma->dma_tag);
3131 fail_0:
3132 	dma->dma_map = NULL;
3133 	dma->dma_tag = NULL;
3134 
3135 	return (error);
3136 }
3137 
3138 static void
3139 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3140 {
3141 	if (dma->dma_tag == NULL)
3142 		return;
3143 	if (dma->dma_map != NULL) {
3144 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3145 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3146 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3147 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3148 		dma->dma_map = NULL;
3149 	}
3150 	bus_dma_tag_destroy(dma->dma_tag);
3151 	dma->dma_tag = NULL;
3152 }
3153 
3154 
3155 /*********************************************************************
3156  *
3157  *  Allocate memory for the transmit and receive rings, and then
3158  *  the descriptors associated with each, called only once at attach.
3159  *
3160  **********************************************************************/
3161 static int
3162 em_allocate_queues(struct adapter *adapter)
3163 {
3164 	device_t		dev = adapter->dev;
3165 	struct tx_ring		*txr = NULL;
3166 	struct rx_ring		*rxr = NULL;
3167 	int rsize, tsize, error = E1000_SUCCESS;
3168 	int txconf = 0, rxconf = 0;
3169 
3170 
3171 	/* Allocate the TX ring struct memory */
3172 	if (!(adapter->tx_rings =
3173 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3174 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3175 		device_printf(dev, "Unable to allocate TX ring memory\n");
3176 		error = ENOMEM;
3177 		goto fail;
3178 	}
3179 
3180 	/* Now allocate the RX */
3181 	if (!(adapter->rx_rings =
3182 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3183 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3184 		device_printf(dev, "Unable to allocate RX ring memory\n");
3185 		error = ENOMEM;
3186 		goto rx_fail;
3187 	}
3188 
3189 	tsize = roundup2(adapter->num_tx_desc *
3190 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3191 	/*
3192 	 * Now set up the TX queues, txconf is needed to handle the
3193 	 * possibility that things fail midcourse and we need to
3194 	 * undo memory gracefully
3195 	 */
3196 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3197 		/* Set up some basics */
3198 		txr = &adapter->tx_rings[i];
3199 		txr->adapter = adapter;
3200 		txr->me = i;
3201 
3202 		/* Initialize the TX lock */
3203 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3204 		    device_get_nameunit(dev), txr->me);
3205 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3206 
3207 		if (em_dma_malloc(adapter, tsize,
3208 			&txr->txdma, BUS_DMA_NOWAIT)) {
3209 			device_printf(dev,
3210 			    "Unable to allocate TX Descriptor memory\n");
3211 			error = ENOMEM;
3212 			goto err_tx_desc;
3213 		}
3214 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3215 		bzero((void *)txr->tx_base, tsize);
3216 
3217         	if (em_allocate_transmit_buffers(txr)) {
3218 			device_printf(dev,
3219 			    "Critical Failure setting up transmit buffers\n");
3220 			error = ENOMEM;
3221 			goto err_tx_desc;
3222         	}
3223 #if __FreeBSD_version >= 800000
3224 		/* Allocate a buf ring */
3225 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3226 		    M_WAITOK, &txr->tx_mtx);
3227 #endif
3228 	}
3229 
3230 	/*
3231 	 * Next the RX queues...
3232 	 */
3233 	rsize = roundup2(adapter->num_rx_desc *
3234 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3235 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3236 		rxr = &adapter->rx_rings[i];
3237 		rxr->adapter = adapter;
3238 		rxr->me = i;
3239 
3240 		/* Initialize the RX lock */
3241 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3242 		    device_get_nameunit(dev), txr->me);
3243 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3244 
3245 		if (em_dma_malloc(adapter, rsize,
3246 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3247 			device_printf(dev,
3248 			    "Unable to allocate RxDescriptor memory\n");
3249 			error = ENOMEM;
3250 			goto err_rx_desc;
3251 		}
3252 		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3253 		bzero((void *)rxr->rx_base, rsize);
3254 
3255         	/* Allocate receive buffers for the ring*/
3256 		if (em_allocate_receive_buffers(rxr)) {
3257 			device_printf(dev,
3258 			    "Critical Failure setting up receive buffers\n");
3259 			error = ENOMEM;
3260 			goto err_rx_desc;
3261 		}
3262 	}
3263 
3264 	return (0);
3265 
3266 err_rx_desc:
3267 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3268 		em_dma_free(adapter, &rxr->rxdma);
3269 err_tx_desc:
3270 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3271 		em_dma_free(adapter, &txr->txdma);
3272 	free(adapter->rx_rings, M_DEVBUF);
3273 rx_fail:
3274 #if __FreeBSD_version >= 800000
3275 	buf_ring_free(txr->br, M_DEVBUF);
3276 #endif
3277 	free(adapter->tx_rings, M_DEVBUF);
3278 fail:
3279 	return (error);
3280 }
3281 
3282 
3283 /*********************************************************************
3284  *
3285  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3286  *  the information needed to transmit a packet on the wire. This is
3287  *  called only once at attach, setup is done every reset.
3288  *
3289  **********************************************************************/
3290 static int
3291 em_allocate_transmit_buffers(struct tx_ring *txr)
3292 {
3293 	struct adapter *adapter = txr->adapter;
3294 	device_t dev = adapter->dev;
3295 	struct em_buffer *txbuf;
3296 	int error, i;
3297 
3298 	/*
3299 	 * Setup DMA descriptor areas.
3300 	 */
3301 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3302 			       1, 0,			/* alignment, bounds */
3303 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3304 			       BUS_SPACE_MAXADDR,	/* highaddr */
3305 			       NULL, NULL,		/* filter, filterarg */
3306 			       EM_TSO_SIZE,		/* maxsize */
3307 			       EM_MAX_SCATTER,		/* nsegments */
3308 			       PAGE_SIZE,		/* maxsegsize */
3309 			       0,			/* flags */
3310 			       NULL,			/* lockfunc */
3311 			       NULL,			/* lockfuncarg */
3312 			       &txr->txtag))) {
3313 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3314 		goto fail;
3315 	}
3316 
3317 	if (!(txr->tx_buffers =
3318 	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3319 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3320 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3321 		error = ENOMEM;
3322 		goto fail;
3323 	}
3324 
3325         /* Create the descriptor buffer dma maps */
3326 	txbuf = txr->tx_buffers;
3327 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3328 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3329 		if (error != 0) {
3330 			device_printf(dev, "Unable to create TX DMA map\n");
3331 			goto fail;
3332 		}
3333 	}
3334 
3335 	return 0;
3336 fail:
3337 	/* We free all, it handles case where we are in the middle */
3338 	em_free_transmit_structures(adapter);
3339 	return (error);
3340 }
3341 
3342 /*********************************************************************
3343  *
3344  *  Initialize a transmit ring.
3345  *
3346  **********************************************************************/
3347 static void
3348 em_setup_transmit_ring(struct tx_ring *txr)
3349 {
3350 	struct adapter *adapter = txr->adapter;
3351 	struct em_buffer *txbuf;
3352 	int i;
3353 #ifdef DEV_NETMAP
3354 	struct netmap_adapter *na = NA(adapter->ifp);
3355 	struct netmap_slot *slot;
3356 #endif /* DEV_NETMAP */
3357 
3358 	/* Clear the old descriptor contents */
3359 	EM_TX_LOCK(txr);
3360 #ifdef DEV_NETMAP
3361 	slot = netmap_reset(na, NR_TX, txr->me, 0);
3362 #endif /* DEV_NETMAP */
3363 
3364 	bzero((void *)txr->tx_base,
3365 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3366 	/* Reset indices */
3367 	txr->next_avail_desc = 0;
3368 	txr->next_to_clean = 0;
3369 
3370 	/* Free any existing tx buffers. */
3371         txbuf = txr->tx_buffers;
3372 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3373 		if (txbuf->m_head != NULL) {
3374 			bus_dmamap_sync(txr->txtag, txbuf->map,
3375 			    BUS_DMASYNC_POSTWRITE);
3376 			bus_dmamap_unload(txr->txtag, txbuf->map);
3377 			m_freem(txbuf->m_head);
3378 			txbuf->m_head = NULL;
3379 		}
3380 #ifdef DEV_NETMAP
3381 		if (slot) {
3382 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3383 			uint64_t paddr;
3384 			void *addr;
3385 
3386 			addr = PNMB(slot + si, &paddr);
3387 			txr->tx_base[i].buffer_addr = htole64(paddr);
3388 			/* reload the map for netmap mode */
3389 			netmap_load_map(txr->txtag, txbuf->map, addr);
3390 		}
3391 #endif /* DEV_NETMAP */
3392 
3393 		/* clear the watch index */
3394 		txbuf->next_eop = -1;
3395         }
3396 
3397 	/* Set number of descriptors available */
3398 	txr->tx_avail = adapter->num_tx_desc;
3399 	txr->queue_status = EM_QUEUE_IDLE;
3400 
3401 	/* Clear checksum offload context. */
3402 	txr->last_hw_offload = 0;
3403 	txr->last_hw_ipcss = 0;
3404 	txr->last_hw_ipcso = 0;
3405 	txr->last_hw_tucss = 0;
3406 	txr->last_hw_tucso = 0;
3407 
3408 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3409 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3410 	EM_TX_UNLOCK(txr);
3411 }
3412 
3413 /*********************************************************************
3414  *
3415  *  Initialize all transmit rings.
3416  *
3417  **********************************************************************/
3418 static void
3419 em_setup_transmit_structures(struct adapter *adapter)
3420 {
3421 	struct tx_ring *txr = adapter->tx_rings;
3422 
3423 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3424 		em_setup_transmit_ring(txr);
3425 
3426 	return;
3427 }
3428 
3429 /*********************************************************************
3430  *
3431  *  Enable transmit unit.
3432  *
3433  **********************************************************************/
3434 static void
3435 em_initialize_transmit_unit(struct adapter *adapter)
3436 {
3437 	struct tx_ring	*txr = adapter->tx_rings;
3438 	struct e1000_hw	*hw = &adapter->hw;
3439 	u32	tctl, tarc, tipg = 0;
3440 
3441 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3442 
3443 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3444 		u64 bus_addr = txr->txdma.dma_paddr;
3445 		/* Base and Len of TX Ring */
3446 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3447 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3448 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3449 	    	    (u32)(bus_addr >> 32));
3450 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3451 	    	    (u32)bus_addr);
3452 		/* Init the HEAD/TAIL indices */
3453 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3454 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3455 
3456 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3457 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3458 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3459 
3460 		txr->queue_status = EM_QUEUE_IDLE;
3461 	}
3462 
3463 	/* Set the default values for the Tx Inter Packet Gap timer */
3464 	switch (adapter->hw.mac.type) {
3465 	case e1000_80003es2lan:
3466 		tipg = DEFAULT_82543_TIPG_IPGR1;
3467 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3468 		    E1000_TIPG_IPGR2_SHIFT;
3469 		break;
3470 	default:
3471 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3472 		    (adapter->hw.phy.media_type ==
3473 		    e1000_media_type_internal_serdes))
3474 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3475 		else
3476 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3477 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3478 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3479 	}
3480 
3481 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3482 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3483 
3484 	if(adapter->hw.mac.type >= e1000_82540)
3485 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3486 		    adapter->tx_abs_int_delay.value);
3487 
3488 	if ((adapter->hw.mac.type == e1000_82571) ||
3489 	    (adapter->hw.mac.type == e1000_82572)) {
3490 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3491 		tarc |= SPEED_MODE_BIT;
3492 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3493 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3494 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3495 		tarc |= 1;
3496 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3497 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3498 		tarc |= 1;
3499 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3500 	}
3501 
3502 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3503 	if (adapter->tx_int_delay.value > 0)
3504 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3505 
3506 	/* Program the Transmit Control Register */
3507 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3508 	tctl &= ~E1000_TCTL_CT;
3509 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3510 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3511 
3512 	if (adapter->hw.mac.type >= e1000_82571)
3513 		tctl |= E1000_TCTL_MULR;
3514 
3515 	/* This write will effectively turn on the transmit unit. */
3516 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3517 
3518 }
3519 
3520 
3521 /*********************************************************************
3522  *
3523  *  Free all transmit rings.
3524  *
3525  **********************************************************************/
3526 static void
3527 em_free_transmit_structures(struct adapter *adapter)
3528 {
3529 	struct tx_ring *txr = adapter->tx_rings;
3530 
3531 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3532 		EM_TX_LOCK(txr);
3533 		em_free_transmit_buffers(txr);
3534 		em_dma_free(adapter, &txr->txdma);
3535 		EM_TX_UNLOCK(txr);
3536 		EM_TX_LOCK_DESTROY(txr);
3537 	}
3538 
3539 	free(adapter->tx_rings, M_DEVBUF);
3540 }
3541 
3542 /*********************************************************************
3543  *
3544  *  Free transmit ring related data structures.
3545  *
3546  **********************************************************************/
3547 static void
3548 em_free_transmit_buffers(struct tx_ring *txr)
3549 {
3550 	struct adapter		*adapter = txr->adapter;
3551 	struct em_buffer	*txbuf;
3552 
3553 	INIT_DEBUGOUT("free_transmit_ring: begin");
3554 
3555 	if (txr->tx_buffers == NULL)
3556 		return;
3557 
3558 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3559 		txbuf = &txr->tx_buffers[i];
3560 		if (txbuf->m_head != NULL) {
3561 			bus_dmamap_sync(txr->txtag, txbuf->map,
3562 			    BUS_DMASYNC_POSTWRITE);
3563 			bus_dmamap_unload(txr->txtag,
3564 			    txbuf->map);
3565 			m_freem(txbuf->m_head);
3566 			txbuf->m_head = NULL;
3567 			if (txbuf->map != NULL) {
3568 				bus_dmamap_destroy(txr->txtag,
3569 				    txbuf->map);
3570 				txbuf->map = NULL;
3571 			}
3572 		} else if (txbuf->map != NULL) {
3573 			bus_dmamap_unload(txr->txtag,
3574 			    txbuf->map);
3575 			bus_dmamap_destroy(txr->txtag,
3576 			    txbuf->map);
3577 			txbuf->map = NULL;
3578 		}
3579 	}
3580 #if __FreeBSD_version >= 800000
3581 	if (txr->br != NULL)
3582 		buf_ring_free(txr->br, M_DEVBUF);
3583 #endif
3584 	if (txr->tx_buffers != NULL) {
3585 		free(txr->tx_buffers, M_DEVBUF);
3586 		txr->tx_buffers = NULL;
3587 	}
3588 	if (txr->txtag != NULL) {
3589 		bus_dma_tag_destroy(txr->txtag);
3590 		txr->txtag = NULL;
3591 	}
3592 	return;
3593 }
3594 
3595 
3596 /*********************************************************************
3597  *  The offload context is protocol specific (TCP/UDP) and thus
3598  *  only needs to be set when the protocol changes. The occasion
3599  *  of a context change can be a performance detriment, and
3600  *  might be better just disabled. The reason arises in the way
3601  *  in which the controller supports pipelined requests from the
3602  *  Tx data DMA. Up to four requests can be pipelined, and they may
3603  *  belong to the same packet or to multiple packets. However all
3604  *  requests for one packet are issued before a request is issued
3605  *  for a subsequent packet and if a request for the next packet
3606  *  requires a context change, that request will be stalled
3607  *  until the previous request completes. This means setting up
3608  *  a new context effectively disables pipelined Tx data DMA which
3609  *  in turn greatly slow down performance to send small sized
3610  *  frames.
3611  **********************************************************************/
3612 static void
3613 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3614     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3615 {
3616 	struct adapter			*adapter = txr->adapter;
3617 	struct e1000_context_desc	*TXD = NULL;
3618 	struct em_buffer		*tx_buffer;
3619 	int				cur, hdr_len;
3620 	u32				cmd = 0;
3621 	u16				offload = 0;
3622 	u8				ipcso, ipcss, tucso, tucss;
3623 
3624 	ipcss = ipcso = tucss = tucso = 0;
3625 	hdr_len = ip_off + (ip->ip_hl << 2);
3626 	cur = txr->next_avail_desc;
3627 
3628 	/* Setup of IP header checksum. */
3629 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3630 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3631 		offload |= CSUM_IP;
3632 		ipcss = ip_off;
3633 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3634 		/*
3635 		 * Start offset for header checksum calculation.
3636 		 * End offset for header checksum calculation.
3637 		 * Offset of place to put the checksum.
3638 		 */
3639 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3640 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3641 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3642 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3643 		cmd |= E1000_TXD_CMD_IP;
3644 	}
3645 
3646 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3647  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3648  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3649  		offload |= CSUM_TCP;
3650  		tucss = hdr_len;
3651  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3652  		/*
3653  		 * Setting up new checksum offload context for every frames
3654  		 * takes a lot of processing time for hardware. This also
3655  		 * reduces performance a lot for small sized frames so avoid
3656  		 * it if driver can use previously configured checksum
3657  		 * offload context.
3658  		 */
3659  		if (txr->last_hw_offload == offload) {
3660  			if (offload & CSUM_IP) {
3661  				if (txr->last_hw_ipcss == ipcss &&
3662  				    txr->last_hw_ipcso == ipcso &&
3663  				    txr->last_hw_tucss == tucss &&
3664  				    txr->last_hw_tucso == tucso)
3665  					return;
3666  			} else {
3667  				if (txr->last_hw_tucss == tucss &&
3668  				    txr->last_hw_tucso == tucso)
3669  					return;
3670  			}
3671   		}
3672  		txr->last_hw_offload = offload;
3673  		txr->last_hw_tucss = tucss;
3674  		txr->last_hw_tucso = tucso;
3675  		/*
3676  		 * Start offset for payload checksum calculation.
3677  		 * End offset for payload checksum calculation.
3678  		 * Offset of place to put the checksum.
3679  		 */
3680 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3681  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3682  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3683  		TXD->upper_setup.tcp_fields.tucso = tucso;
3684  		cmd |= E1000_TXD_CMD_TCP;
3685  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3686  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3687  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3688  		tucss = hdr_len;
3689  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3690  		/*
3691  		 * Setting up new checksum offload context for every frames
3692  		 * takes a lot of processing time for hardware. This also
3693  		 * reduces performance a lot for small sized frames so avoid
3694  		 * it if driver can use previously configured checksum
3695  		 * offload context.
3696  		 */
3697  		if (txr->last_hw_offload == offload) {
3698  			if (offload & CSUM_IP) {
3699  				if (txr->last_hw_ipcss == ipcss &&
3700  				    txr->last_hw_ipcso == ipcso &&
3701  				    txr->last_hw_tucss == tucss &&
3702  				    txr->last_hw_tucso == tucso)
3703  					return;
3704  			} else {
3705  				if (txr->last_hw_tucss == tucss &&
3706  				    txr->last_hw_tucso == tucso)
3707  					return;
3708  			}
3709  		}
3710  		txr->last_hw_offload = offload;
3711  		txr->last_hw_tucss = tucss;
3712  		txr->last_hw_tucso = tucso;
3713  		/*
3714  		 * Start offset for header checksum calculation.
3715  		 * End offset for header checksum calculation.
3716  		 * Offset of place to put the checksum.
3717  		 */
3718 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3719  		TXD->upper_setup.tcp_fields.tucss = tucss;
3720  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3721  		TXD->upper_setup.tcp_fields.tucso = tucso;
3722   	}
3723 
3724  	if (offload & CSUM_IP) {
3725  		txr->last_hw_ipcss = ipcss;
3726  		txr->last_hw_ipcso = ipcso;
3727   	}
3728 
3729 	TXD->tcp_seg_setup.data = htole32(0);
3730 	TXD->cmd_and_length =
3731 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3732 	tx_buffer = &txr->tx_buffers[cur];
3733 	tx_buffer->m_head = NULL;
3734 	tx_buffer->next_eop = -1;
3735 
3736 	if (++cur == adapter->num_tx_desc)
3737 		cur = 0;
3738 
3739 	txr->tx_avail--;
3740 	txr->next_avail_desc = cur;
3741 }
3742 
3743 
3744 /**********************************************************************
3745  *
3746  *  Setup work for hardware segmentation offload (TSO)
3747  *
3748  **********************************************************************/
3749 static void
3750 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3751     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3752 {
3753 	struct adapter			*adapter = txr->adapter;
3754 	struct e1000_context_desc	*TXD;
3755 	struct em_buffer		*tx_buffer;
3756 	int cur, hdr_len;
3757 
3758 	/*
3759 	 * In theory we can use the same TSO context if and only if
3760 	 * frame is the same type(IP/TCP) and the same MSS. However
3761 	 * checking whether a frame has the same IP/TCP structure is
3762 	 * hard thing so just ignore that and always restablish a
3763 	 * new TSO context.
3764 	 */
3765 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3766 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3767 		      E1000_TXD_DTYP_D |	/* Data descr type */
3768 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3769 
3770 	/* IP and/or TCP header checksum calculation and insertion. */
3771 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3772 
3773 	cur = txr->next_avail_desc;
3774 	tx_buffer = &txr->tx_buffers[cur];
3775 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3776 
3777 	/*
3778 	 * Start offset for header checksum calculation.
3779 	 * End offset for header checksum calculation.
3780 	 * Offset of place put the checksum.
3781 	 */
3782 	TXD->lower_setup.ip_fields.ipcss = ip_off;
3783 	TXD->lower_setup.ip_fields.ipcse =
3784 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3785 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3786 	/*
3787 	 * Start offset for payload checksum calculation.
3788 	 * End offset for payload checksum calculation.
3789 	 * Offset of place to put the checksum.
3790 	 */
3791 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3792 	TXD->upper_setup.tcp_fields.tucse = 0;
3793 	TXD->upper_setup.tcp_fields.tucso =
3794 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3795 	/*
3796 	 * Payload size per packet w/o any headers.
3797 	 * Length of all headers up to payload.
3798 	 */
3799 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3800 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3801 
3802 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3803 				E1000_TXD_CMD_DEXT |	/* Extended descr */
3804 				E1000_TXD_CMD_TSE |	/* TSE context */
3805 				E1000_TXD_CMD_IP |	/* Do IP csum */
3806 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3807 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3808 
3809 	tx_buffer->m_head = NULL;
3810 	tx_buffer->next_eop = -1;
3811 
3812 	if (++cur == adapter->num_tx_desc)
3813 		cur = 0;
3814 
3815 	txr->tx_avail--;
3816 	txr->next_avail_desc = cur;
3817 	txr->tx_tso = TRUE;
3818 }
3819 
3820 
3821 /**********************************************************************
3822  *
3823  *  Examine each tx_buffer in the used queue. If the hardware is done
3824  *  processing the packet then free associated resources. The
3825  *  tx_buffer is put back on the free queue.
3826  *
3827  **********************************************************************/
3828 static void
3829 em_txeof(struct tx_ring *txr)
3830 {
3831 	struct adapter	*adapter = txr->adapter;
3832         int first, last, done, processed;
3833         struct em_buffer *tx_buffer;
3834         struct e1000_tx_desc   *tx_desc, *eop_desc;
3835 	struct ifnet   *ifp = adapter->ifp;
3836 
3837 	EM_TX_LOCK_ASSERT(txr);
3838 #ifdef DEV_NETMAP
3839 	if (netmap_tx_irq(ifp, txr->me |
3840 	    (NETMAP_LOCKED_ENTER | NETMAP_LOCKED_EXIT)))
3841 		return;
3842 #endif /* DEV_NETMAP */
3843 
3844 	/* No work, make sure watchdog is off */
3845         if (txr->tx_avail == adapter->num_tx_desc) {
3846 		txr->queue_status = EM_QUEUE_IDLE;
3847                 return;
3848 	}
3849 
3850 	processed = 0;
3851         first = txr->next_to_clean;
3852         tx_desc = &txr->tx_base[first];
3853         tx_buffer = &txr->tx_buffers[first];
3854 	last = tx_buffer->next_eop;
3855         eop_desc = &txr->tx_base[last];
3856 
3857 	/*
3858 	 * What this does is get the index of the
3859 	 * first descriptor AFTER the EOP of the
3860 	 * first packet, that way we can do the
3861 	 * simple comparison on the inner while loop.
3862 	 */
3863 	if (++last == adapter->num_tx_desc)
3864  		last = 0;
3865 	done = last;
3866 
3867         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3868             BUS_DMASYNC_POSTREAD);
3869 
3870         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3871 		/* We clean the range of the packet */
3872 		while (first != done) {
3873                 	tx_desc->upper.data = 0;
3874                 	tx_desc->lower.data = 0;
3875                 	tx_desc->buffer_addr = 0;
3876                 	++txr->tx_avail;
3877 			++processed;
3878 
3879 			if (tx_buffer->m_head) {
3880 				bus_dmamap_sync(txr->txtag,
3881 				    tx_buffer->map,
3882 				    BUS_DMASYNC_POSTWRITE);
3883 				bus_dmamap_unload(txr->txtag,
3884 				    tx_buffer->map);
3885                         	m_freem(tx_buffer->m_head);
3886                         	tx_buffer->m_head = NULL;
3887                 	}
3888 			tx_buffer->next_eop = -1;
3889 			txr->watchdog_time = ticks;
3890 
3891 	                if (++first == adapter->num_tx_desc)
3892 				first = 0;
3893 
3894 	                tx_buffer = &txr->tx_buffers[first];
3895 			tx_desc = &txr->tx_base[first];
3896 		}
3897 		++ifp->if_opackets;
3898 		/* See if we can continue to the next packet */
3899 		last = tx_buffer->next_eop;
3900 		if (last != -1) {
3901         		eop_desc = &txr->tx_base[last];
3902 			/* Get new done point */
3903 			if (++last == adapter->num_tx_desc) last = 0;
3904 			done = last;
3905 		} else
3906 			break;
3907         }
3908         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3909             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3910 
3911         txr->next_to_clean = first;
3912 
3913 	/*
3914 	** Watchdog calculation, we know there's
3915 	** work outstanding or the first return
3916 	** would have been taken, so none processed
3917 	** for too long indicates a hang. local timer
3918 	** will examine this and do a reset if needed.
3919 	*/
3920 	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3921 		txr->queue_status = EM_QUEUE_HUNG;
3922 
3923         /*
3924          * If we have a minimum free, clear IFF_DRV_OACTIVE
3925          * to tell the stack that it is OK to send packets.
3926 	 * Notice that all writes of OACTIVE happen under the
3927 	 * TX lock which, with a single queue, guarantees
3928 	 * sanity.
3929          */
3930         if (txr->tx_avail >= EM_MAX_SCATTER)
3931 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3932 
3933 	/* Disable watchdog if all clean */
3934 	if (txr->tx_avail == adapter->num_tx_desc) {
3935 		txr->queue_status = EM_QUEUE_IDLE;
3936 	}
3937 }
3938 
3939 
3940 /*********************************************************************
3941  *
3942  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3943  *
3944  **********************************************************************/
3945 static void
3946 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3947 {
3948 	struct adapter		*adapter = rxr->adapter;
3949 	struct mbuf		*m;
3950 	bus_dma_segment_t	segs[1];
3951 	struct em_buffer	*rxbuf;
3952 	int			i, j, error, nsegs;
3953 	bool			cleaned = FALSE;
3954 
3955 	i = j = rxr->next_to_refresh;
3956 	/*
3957 	** Get one descriptor beyond
3958 	** our work mark to control
3959 	** the loop.
3960 	*/
3961 	if (++j == adapter->num_rx_desc)
3962 		j = 0;
3963 
3964 	while (j != limit) {
3965 		rxbuf = &rxr->rx_buffers[i];
3966 		if (rxbuf->m_head == NULL) {
3967 			m = m_getjcl(M_NOWAIT, MT_DATA,
3968 			    M_PKTHDR, adapter->rx_mbuf_sz);
3969 			/*
3970 			** If we have a temporary resource shortage
3971 			** that causes a failure, just abort refresh
3972 			** for now, we will return to this point when
3973 			** reinvoked from em_rxeof.
3974 			*/
3975 			if (m == NULL)
3976 				goto update;
3977 		} else
3978 			m = rxbuf->m_head;
3979 
3980 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3981 		m->m_flags |= M_PKTHDR;
3982 		m->m_data = m->m_ext.ext_buf;
3983 
3984 		/* Use bus_dma machinery to setup the memory mapping  */
3985 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3986 		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3987 		if (error != 0) {
3988 			printf("Refresh mbufs: hdr dmamap load"
3989 			    " failure - %d\n", error);
3990 			m_free(m);
3991 			rxbuf->m_head = NULL;
3992 			goto update;
3993 		}
3994 		rxbuf->m_head = m;
3995 		bus_dmamap_sync(rxr->rxtag,
3996 		    rxbuf->map, BUS_DMASYNC_PREREAD);
3997 		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3998 		cleaned = TRUE;
3999 
4000 		i = j; /* Next is precalulated for us */
4001 		rxr->next_to_refresh = i;
4002 		/* Calculate next controlling index */
4003 		if (++j == adapter->num_rx_desc)
4004 			j = 0;
4005 	}
4006 update:
4007 	/*
4008 	** Update the tail pointer only if,
4009 	** and as far as we have refreshed.
4010 	*/
4011 	if (cleaned)
4012 		E1000_WRITE_REG(&adapter->hw,
4013 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4014 
4015 	return;
4016 }
4017 
4018 
4019 /*********************************************************************
4020  *
4021  *  Allocate memory for rx_buffer structures. Since we use one
4022  *  rx_buffer per received packet, the maximum number of rx_buffer's
4023  *  that we'll need is equal to the number of receive descriptors
4024  *  that we've allocated.
4025  *
4026  **********************************************************************/
4027 static int
4028 em_allocate_receive_buffers(struct rx_ring *rxr)
4029 {
4030 	struct adapter		*adapter = rxr->adapter;
4031 	device_t		dev = adapter->dev;
4032 	struct em_buffer	*rxbuf;
4033 	int			error;
4034 
4035 	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4036 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4037 	if (rxr->rx_buffers == NULL) {
4038 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4039 		return (ENOMEM);
4040 	}
4041 
4042 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4043 				1, 0,			/* alignment, bounds */
4044 				BUS_SPACE_MAXADDR,	/* lowaddr */
4045 				BUS_SPACE_MAXADDR,	/* highaddr */
4046 				NULL, NULL,		/* filter, filterarg */
4047 				MJUM9BYTES,		/* maxsize */
4048 				1,			/* nsegments */
4049 				MJUM9BYTES,		/* maxsegsize */
4050 				0,			/* flags */
4051 				NULL,			/* lockfunc */
4052 				NULL,			/* lockarg */
4053 				&rxr->rxtag);
4054 	if (error) {
4055 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4056 		    __func__, error);
4057 		goto fail;
4058 	}
4059 
4060 	rxbuf = rxr->rx_buffers;
4061 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4062 		rxbuf = &rxr->rx_buffers[i];
4063 		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
4064 		    &rxbuf->map);
4065 		if (error) {
4066 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4067 			    __func__, error);
4068 			goto fail;
4069 		}
4070 	}
4071 
4072 	return (0);
4073 
4074 fail:
4075 	em_free_receive_structures(adapter);
4076 	return (error);
4077 }
4078 
4079 
4080 /*********************************************************************
4081  *
4082  *  Initialize a receive ring and its buffers.
4083  *
4084  **********************************************************************/
4085 static int
4086 em_setup_receive_ring(struct rx_ring *rxr)
4087 {
4088 	struct	adapter 	*adapter = rxr->adapter;
4089 	struct em_buffer	*rxbuf;
4090 	bus_dma_segment_t	seg[1];
4091 	int			rsize, nsegs, error = 0;
4092 #ifdef DEV_NETMAP
4093 	struct netmap_adapter *na = NA(adapter->ifp);
4094 	struct netmap_slot *slot;
4095 #endif
4096 
4097 
4098 	/* Clear the ring contents */
4099 	EM_RX_LOCK(rxr);
4100 	rsize = roundup2(adapter->num_rx_desc *
4101 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4102 	bzero((void *)rxr->rx_base, rsize);
4103 #ifdef DEV_NETMAP
4104 	slot = netmap_reset(na, NR_RX, 0, 0);
4105 #endif
4106 
4107 	/*
4108 	** Free current RX buffer structs and their mbufs
4109 	*/
4110 	for (int i = 0; i < adapter->num_rx_desc; i++) {
4111 		rxbuf = &rxr->rx_buffers[i];
4112 		if (rxbuf->m_head != NULL) {
4113 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4114 			    BUS_DMASYNC_POSTREAD);
4115 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4116 			m_freem(rxbuf->m_head);
4117 			rxbuf->m_head = NULL; /* mark as freed */
4118 		}
4119 	}
4120 
4121 	/* Now replenish the mbufs */
4122         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4123 		rxbuf = &rxr->rx_buffers[j];
4124 #ifdef DEV_NETMAP
4125 		if (slot) {
4126 			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4127 			uint64_t paddr;
4128 			void *addr;
4129 
4130 			addr = PNMB(slot + si, &paddr);
4131 			netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4132 			/* Update descriptor */
4133 			rxr->rx_base[j].buffer_addr = htole64(paddr);
4134 			continue;
4135 		}
4136 #endif /* DEV_NETMAP */
4137 		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4138 		    M_PKTHDR, adapter->rx_mbuf_sz);
4139 		if (rxbuf->m_head == NULL) {
4140 			error = ENOBUFS;
4141 			goto fail;
4142 		}
4143 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4144 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4145 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4146 
4147 		/* Get the memory mapping */
4148 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4149 		    rxbuf->map, rxbuf->m_head, seg,
4150 		    &nsegs, BUS_DMA_NOWAIT);
4151 		if (error != 0) {
4152 			m_freem(rxbuf->m_head);
4153 			rxbuf->m_head = NULL;
4154 			goto fail;
4155 		}
4156 		bus_dmamap_sync(rxr->rxtag,
4157 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4158 
4159 		/* Update descriptor */
4160 		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4161 	}
4162 	rxr->next_to_check = 0;
4163 	rxr->next_to_refresh = 0;
4164 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4165 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4166 
4167 fail:
4168 	EM_RX_UNLOCK(rxr);
4169 	return (error);
4170 }
4171 
4172 /*********************************************************************
4173  *
4174  *  Initialize all receive rings.
4175  *
4176  **********************************************************************/
4177 static int
4178 em_setup_receive_structures(struct adapter *adapter)
4179 {
4180 	struct rx_ring *rxr = adapter->rx_rings;
4181 	int q;
4182 
4183 	for (q = 0; q < adapter->num_queues; q++, rxr++)
4184 		if (em_setup_receive_ring(rxr))
4185 			goto fail;
4186 
4187 	return (0);
4188 fail:
4189 	/*
4190 	 * Free RX buffers allocated so far, we will only handle
4191 	 * the rings that completed, the failing case will have
4192 	 * cleaned up for itself. 'q' failed, so its the terminus.
4193 	 */
4194 	for (int i = 0; i < q; ++i) {
4195 		rxr = &adapter->rx_rings[i];
4196 		for (int n = 0; n < adapter->num_rx_desc; n++) {
4197 			struct em_buffer *rxbuf;
4198 			rxbuf = &rxr->rx_buffers[n];
4199 			if (rxbuf->m_head != NULL) {
4200 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4201 			  	  BUS_DMASYNC_POSTREAD);
4202 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4203 				m_freem(rxbuf->m_head);
4204 				rxbuf->m_head = NULL;
4205 			}
4206 		}
4207 		rxr->next_to_check = 0;
4208 		rxr->next_to_refresh = 0;
4209 	}
4210 
4211 	return (ENOBUFS);
4212 }
4213 
4214 /*********************************************************************
4215  *
4216  *  Free all receive rings.
4217  *
4218  **********************************************************************/
4219 static void
4220 em_free_receive_structures(struct adapter *adapter)
4221 {
4222 	struct rx_ring *rxr = adapter->rx_rings;
4223 
4224 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4225 		em_free_receive_buffers(rxr);
4226 		/* Free the ring memory as well */
4227 		em_dma_free(adapter, &rxr->rxdma);
4228 		EM_RX_LOCK_DESTROY(rxr);
4229 	}
4230 
4231 	free(adapter->rx_rings, M_DEVBUF);
4232 }
4233 
4234 
4235 /*********************************************************************
4236  *
4237  *  Free receive ring data structures
4238  *
4239  **********************************************************************/
4240 static void
4241 em_free_receive_buffers(struct rx_ring *rxr)
4242 {
4243 	struct adapter		*adapter = rxr->adapter;
4244 	struct em_buffer	*rxbuf = NULL;
4245 
4246 	INIT_DEBUGOUT("free_receive_buffers: begin");
4247 
4248 	if (rxr->rx_buffers != NULL) {
4249 		for (int i = 0; i < adapter->num_rx_desc; i++) {
4250 			rxbuf = &rxr->rx_buffers[i];
4251 			if (rxbuf->map != NULL) {
4252 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4253 				    BUS_DMASYNC_POSTREAD);
4254 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4255 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4256 			}
4257 			if (rxbuf->m_head != NULL) {
4258 				m_freem(rxbuf->m_head);
4259 				rxbuf->m_head = NULL;
4260 			}
4261 		}
4262 		free(rxr->rx_buffers, M_DEVBUF);
4263 		rxr->rx_buffers = NULL;
4264 		rxr->next_to_check = 0;
4265 		rxr->next_to_refresh = 0;
4266 	}
4267 
4268 	if (rxr->rxtag != NULL) {
4269 		bus_dma_tag_destroy(rxr->rxtag);
4270 		rxr->rxtag = NULL;
4271 	}
4272 
4273 	return;
4274 }
4275 
4276 
4277 /*********************************************************************
4278  *
4279  *  Enable receive unit.
4280  *
4281  **********************************************************************/
4282 
4283 static void
4284 em_initialize_receive_unit(struct adapter *adapter)
4285 {
4286 	struct rx_ring	*rxr = adapter->rx_rings;
4287 	struct ifnet	*ifp = adapter->ifp;
4288 	struct e1000_hw	*hw = &adapter->hw;
4289 	u64	bus_addr;
4290 	u32	rctl, rxcsum;
4291 
4292 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4293 
4294 	/*
4295 	 * Make sure receives are disabled while setting
4296 	 * up the descriptor ring
4297 	 */
4298 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4299 	/* Do not disable if ever enabled on this hardware */
4300 	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4301 		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4302 
4303 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4304 	    adapter->rx_abs_int_delay.value);
4305 	/*
4306 	 * Set the interrupt throttling rate. Value is calculated
4307 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4308 	 */
4309 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4310 
4311 	/*
4312 	** When using MSIX interrupts we need to throttle
4313 	** using the EITR register (82574 only)
4314 	*/
4315 	if (hw->mac.type == e1000_82574) {
4316 		for (int i = 0; i < 4; i++)
4317 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4318 			    DEFAULT_ITR);
4319 		/* Disable accelerated acknowledge */
4320 		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4321 	}
4322 
4323 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4324 	if (ifp->if_capenable & IFCAP_RXCSUM)
4325 		rxcsum |= E1000_RXCSUM_TUOFL;
4326 	else
4327 		rxcsum &= ~E1000_RXCSUM_TUOFL;
4328 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4329 
4330 	/*
4331 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4332 	** long latencies are observed, like Lenovo X60. This
4333 	** change eliminates the problem, but since having positive
4334 	** values in RDTR is a known source of problems on other
4335 	** platforms another solution is being sought.
4336 	*/
4337 	if (hw->mac.type == e1000_82573)
4338 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4339 
4340 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4341 		/* Setup the Base and Length of the Rx Descriptor Ring */
4342 		u32 rdt = adapter->num_rx_desc - 1; /* default */
4343 
4344 		bus_addr = rxr->rxdma.dma_paddr;
4345 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4346 		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4347 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4348 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4349 		/* Setup the Head and Tail Descriptor Pointers */
4350 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4351 #ifdef DEV_NETMAP
4352 		/*
4353 		 * an init() while a netmap client is active must
4354 		 * preserve the rx buffers passed to userspace.
4355 		 */
4356 		if (ifp->if_capenable & IFCAP_NETMAP)
4357 			rdt -= NA(adapter->ifp)->rx_rings[i].nr_hwavail;
4358 #endif /* DEV_NETMAP */
4359 		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4360 	}
4361 
4362 	/* Set PTHRESH for improved jumbo performance */
4363 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4364 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4365 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4366 	    (ifp->if_mtu > ETHERMTU)) {
4367 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4368 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4369 	}
4370 
4371 	if (adapter->hw.mac.type >= e1000_pch2lan) {
4372 		if (ifp->if_mtu > ETHERMTU)
4373 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4374 		else
4375 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4376 	}
4377 
4378 	/* Setup the Receive Control Register */
4379 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4380 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4381 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4382 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4383 
4384         /* Strip the CRC */
4385         rctl |= E1000_RCTL_SECRC;
4386 
4387         /* Make sure VLAN Filters are off */
4388         rctl &= ~E1000_RCTL_VFE;
4389 	rctl &= ~E1000_RCTL_SBP;
4390 
4391 	if (adapter->rx_mbuf_sz == MCLBYTES)
4392 		rctl |= E1000_RCTL_SZ_2048;
4393 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4394 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4395 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4396 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4397 
4398 	if (ifp->if_mtu > ETHERMTU)
4399 		rctl |= E1000_RCTL_LPE;
4400 	else
4401 		rctl &= ~E1000_RCTL_LPE;
4402 
4403 	/* Write out the settings */
4404 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4405 
4406 	return;
4407 }
4408 
4409 
4410 /*********************************************************************
4411  *
4412  *  This routine executes in interrupt context. It replenishes
4413  *  the mbufs in the descriptor and sends data which has been
4414  *  dma'ed into host memory to upper layer.
4415  *
4416  *  We loop at most count times if count is > 0, or until done if
4417  *  count < 0.
4418  *
4419  *  For polling we also now return the number of cleaned packets
4420  *********************************************************************/
4421 static bool
4422 em_rxeof(struct rx_ring *rxr, int count, int *done)
4423 {
4424 	struct adapter		*adapter = rxr->adapter;
4425 	struct ifnet		*ifp = adapter->ifp;
4426 	struct mbuf		*mp, *sendmp;
4427 	u8			status = 0;
4428 	u16 			len;
4429 	int			i, processed, rxdone = 0;
4430 	bool			eop;
4431 	struct e1000_rx_desc	*cur;
4432 
4433 	EM_RX_LOCK(rxr);
4434 
4435 #ifdef DEV_NETMAP
4436 	if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
4437 		return (FALSE);
4438 #endif /* DEV_NETMAP */
4439 
4440 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4441 
4442 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4443 			break;
4444 
4445 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4446 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4447 
4448 		cur = &rxr->rx_base[i];
4449 		status = cur->status;
4450 		mp = sendmp = NULL;
4451 
4452 		if ((status & E1000_RXD_STAT_DD) == 0)
4453 			break;
4454 
4455 		len = le16toh(cur->length);
4456 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4457 
4458 		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4459 		    (rxr->discard == TRUE)) {
4460 			adapter->dropped_pkts++;
4461 			++rxr->rx_discarded;
4462 			if (!eop) /* Catch subsequent segs */
4463 				rxr->discard = TRUE;
4464 			else
4465 				rxr->discard = FALSE;
4466 			em_rx_discard(rxr, i);
4467 			goto next_desc;
4468 		}
4469 
4470 		/* Assign correct length to the current fragment */
4471 		mp = rxr->rx_buffers[i].m_head;
4472 		mp->m_len = len;
4473 
4474 		/* Trigger for refresh */
4475 		rxr->rx_buffers[i].m_head = NULL;
4476 
4477 		/* First segment? */
4478 		if (rxr->fmp == NULL) {
4479 			mp->m_pkthdr.len = len;
4480 			rxr->fmp = rxr->lmp = mp;
4481 		} else {
4482 			/* Chain mbuf's together */
4483 			mp->m_flags &= ~M_PKTHDR;
4484 			rxr->lmp->m_next = mp;
4485 			rxr->lmp = mp;
4486 			rxr->fmp->m_pkthdr.len += len;
4487 		}
4488 
4489 		if (eop) {
4490 			--count;
4491 			sendmp = rxr->fmp;
4492 			sendmp->m_pkthdr.rcvif = ifp;
4493 			ifp->if_ipackets++;
4494 			em_receive_checksum(cur, sendmp);
4495 #ifndef __NO_STRICT_ALIGNMENT
4496 			if (adapter->hw.mac.max_frame_size >
4497 			    (MCLBYTES - ETHER_ALIGN) &&
4498 			    em_fixup_rx(rxr) != 0)
4499 				goto skip;
4500 #endif
4501 			if (status & E1000_RXD_STAT_VP) {
4502 				sendmp->m_pkthdr.ether_vtag =
4503 				    le16toh(cur->special);
4504 				sendmp->m_flags |= M_VLANTAG;
4505 			}
4506 #ifndef __NO_STRICT_ALIGNMENT
4507 skip:
4508 #endif
4509 			rxr->fmp = rxr->lmp = NULL;
4510 		}
4511 next_desc:
4512 		/* Zero out the receive descriptors status. */
4513 		cur->status = 0;
4514 		++rxdone;	/* cumulative for POLL */
4515 		++processed;
4516 
4517 		/* Advance our pointers to the next descriptor. */
4518 		if (++i == adapter->num_rx_desc)
4519 			i = 0;
4520 
4521 		/* Send to the stack */
4522 		if (sendmp != NULL) {
4523 			rxr->next_to_check = i;
4524 			EM_RX_UNLOCK(rxr);
4525 			(*ifp->if_input)(ifp, sendmp);
4526 			EM_RX_LOCK(rxr);
4527 			i = rxr->next_to_check;
4528 		}
4529 
4530 		/* Only refresh mbufs every 8 descriptors */
4531 		if (processed == 8) {
4532 			em_refresh_mbufs(rxr, i);
4533 			processed = 0;
4534 		}
4535 	}
4536 
4537 	/* Catch any remaining refresh work */
4538 	if (e1000_rx_unrefreshed(rxr))
4539 		em_refresh_mbufs(rxr, i);
4540 
4541 	rxr->next_to_check = i;
4542 	if (done != NULL)
4543 		*done = rxdone;
4544 	EM_RX_UNLOCK(rxr);
4545 
4546 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4547 }
4548 
4549 static __inline void
4550 em_rx_discard(struct rx_ring *rxr, int i)
4551 {
4552 	struct em_buffer	*rbuf;
4553 
4554 	rbuf = &rxr->rx_buffers[i];
4555 	/* Free any previous pieces */
4556 	if (rxr->fmp != NULL) {
4557 		rxr->fmp->m_flags |= M_PKTHDR;
4558 		m_freem(rxr->fmp);
4559 		rxr->fmp = NULL;
4560 		rxr->lmp = NULL;
4561 	}
4562 	/*
4563 	** Free buffer and allow em_refresh_mbufs()
4564 	** to clean up and recharge buffer.
4565 	*/
4566 	if (rbuf->m_head) {
4567 		m_free(rbuf->m_head);
4568 		rbuf->m_head = NULL;
4569 	}
4570 	return;
4571 }
4572 
4573 #ifndef __NO_STRICT_ALIGNMENT
4574 /*
4575  * When jumbo frames are enabled we should realign entire payload on
4576  * architecures with strict alignment. This is serious design mistake of 8254x
4577  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4578  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4579  * payload. On architecures without strict alignment restrictions 8254x still
4580  * performs unaligned memory access which would reduce the performance too.
4581  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4582  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4583  * existing mbuf chain.
4584  *
4585  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4586  * not used at all on architectures with strict alignment.
4587  */
4588 static int
4589 em_fixup_rx(struct rx_ring *rxr)
4590 {
4591 	struct adapter *adapter = rxr->adapter;
4592 	struct mbuf *m, *n;
4593 	int error;
4594 
4595 	error = 0;
4596 	m = rxr->fmp;
4597 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4598 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4599 		m->m_data += ETHER_HDR_LEN;
4600 	} else {
4601 		MGETHDR(n, M_NOWAIT, MT_DATA);
4602 		if (n != NULL) {
4603 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4604 			m->m_data += ETHER_HDR_LEN;
4605 			m->m_len -= ETHER_HDR_LEN;
4606 			n->m_len = ETHER_HDR_LEN;
4607 			M_MOVE_PKTHDR(n, m);
4608 			n->m_next = m;
4609 			rxr->fmp = n;
4610 		} else {
4611 			adapter->dropped_pkts++;
4612 			m_freem(rxr->fmp);
4613 			rxr->fmp = NULL;
4614 			error = ENOMEM;
4615 		}
4616 	}
4617 
4618 	return (error);
4619 }
4620 #endif
4621 
4622 /*********************************************************************
4623  *
4624  *  Verify that the hardware indicated that the checksum is valid.
4625  *  Inform the stack about the status of checksum so that stack
4626  *  doesn't spend time verifying the checksum.
4627  *
4628  *********************************************************************/
4629 static void
4630 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4631 {
4632 	mp->m_pkthdr.csum_flags = 0;
4633 
4634 	/* Ignore Checksum bit is set */
4635 	if (rx_desc->status & E1000_RXD_STAT_IXSM)
4636 		return;
4637 
4638 	if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
4639 		return;
4640 
4641 	/* IP Checksum Good? */
4642 	if (rx_desc->status & E1000_RXD_STAT_IPCS)
4643 		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4644 
4645 	/* TCP or UDP checksum */
4646 	if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4647 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4648 		mp->m_pkthdr.csum_data = htons(0xffff);
4649 	}
4650 }
4651 
4652 /*
4653  * This routine is run via an vlan
4654  * config EVENT
4655  */
4656 static void
4657 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4658 {
4659 	struct adapter	*adapter = ifp->if_softc;
4660 	u32		index, bit;
4661 
4662 	if (ifp->if_softc !=  arg)   /* Not our event */
4663 		return;
4664 
4665 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4666                 return;
4667 
4668 	EM_CORE_LOCK(adapter);
4669 	index = (vtag >> 5) & 0x7F;
4670 	bit = vtag & 0x1F;
4671 	adapter->shadow_vfta[index] |= (1 << bit);
4672 	++adapter->num_vlans;
4673 	/* Re-init to load the changes */
4674 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4675 		em_init_locked(adapter);
4676 	EM_CORE_UNLOCK(adapter);
4677 }
4678 
4679 /*
4680  * This routine is run via an vlan
4681  * unconfig EVENT
4682  */
4683 static void
4684 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4685 {
4686 	struct adapter	*adapter = ifp->if_softc;
4687 	u32		index, bit;
4688 
4689 	if (ifp->if_softc !=  arg)
4690 		return;
4691 
4692 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4693                 return;
4694 
4695 	EM_CORE_LOCK(adapter);
4696 	index = (vtag >> 5) & 0x7F;
4697 	bit = vtag & 0x1F;
4698 	adapter->shadow_vfta[index] &= ~(1 << bit);
4699 	--adapter->num_vlans;
4700 	/* Re-init to load the changes */
4701 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4702 		em_init_locked(adapter);
4703 	EM_CORE_UNLOCK(adapter);
4704 }
4705 
4706 static void
4707 em_setup_vlan_hw_support(struct adapter *adapter)
4708 {
4709 	struct e1000_hw *hw = &adapter->hw;
4710 	u32             reg;
4711 
4712 	/*
4713 	** We get here thru init_locked, meaning
4714 	** a soft reset, this has already cleared
4715 	** the VFTA and other state, so if there
4716 	** have been no vlan's registered do nothing.
4717 	*/
4718 	if (adapter->num_vlans == 0)
4719                 return;
4720 
4721 	/*
4722 	** A soft reset zero's out the VFTA, so
4723 	** we need to repopulate it now.
4724 	*/
4725 	for (int i = 0; i < EM_VFTA_SIZE; i++)
4726                 if (adapter->shadow_vfta[i] != 0)
4727 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4728                             i, adapter->shadow_vfta[i]);
4729 
4730 	reg = E1000_READ_REG(hw, E1000_CTRL);
4731 	reg |= E1000_CTRL_VME;
4732 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4733 
4734 	/* Enable the Filter Table */
4735 	reg = E1000_READ_REG(hw, E1000_RCTL);
4736 	reg &= ~E1000_RCTL_CFIEN;
4737 	reg |= E1000_RCTL_VFE;
4738 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4739 }
4740 
4741 static void
4742 em_enable_intr(struct adapter *adapter)
4743 {
4744 	struct e1000_hw *hw = &adapter->hw;
4745 	u32 ims_mask = IMS_ENABLE_MASK;
4746 
4747 	if (hw->mac.type == e1000_82574) {
4748 		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4749 		ims_mask |= EM_MSIX_MASK;
4750 	}
4751 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4752 }
4753 
4754 static void
4755 em_disable_intr(struct adapter *adapter)
4756 {
4757 	struct e1000_hw *hw = &adapter->hw;
4758 
4759 	if (hw->mac.type == e1000_82574)
4760 		E1000_WRITE_REG(hw, EM_EIAC, 0);
4761 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4762 }
4763 
4764 /*
4765  * Bit of a misnomer, what this really means is
4766  * to enable OS management of the system... aka
4767  * to disable special hardware management features
4768  */
4769 static void
4770 em_init_manageability(struct adapter *adapter)
4771 {
4772 	/* A shared code workaround */
4773 #define E1000_82542_MANC2H E1000_MANC2H
4774 	if (adapter->has_manage) {
4775 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4776 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4777 
4778 		/* disable hardware interception of ARP */
4779 		manc &= ~(E1000_MANC_ARP_EN);
4780 
4781                 /* enable receiving management packets to the host */
4782 		manc |= E1000_MANC_EN_MNG2HOST;
4783 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4784 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4785 		manc2h |= E1000_MNG2HOST_PORT_623;
4786 		manc2h |= E1000_MNG2HOST_PORT_664;
4787 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4788 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4789 	}
4790 }
4791 
4792 /*
4793  * Give control back to hardware management
4794  * controller if there is one.
4795  */
4796 static void
4797 em_release_manageability(struct adapter *adapter)
4798 {
4799 	if (adapter->has_manage) {
4800 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4801 
4802 		/* re-enable hardware interception of ARP */
4803 		manc |= E1000_MANC_ARP_EN;
4804 		manc &= ~E1000_MANC_EN_MNG2HOST;
4805 
4806 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4807 	}
4808 }
4809 
4810 /*
4811  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4812  * For ASF and Pass Through versions of f/w this means
4813  * that the driver is loaded. For AMT version type f/w
4814  * this means that the network i/f is open.
4815  */
4816 static void
4817 em_get_hw_control(struct adapter *adapter)
4818 {
4819 	u32 ctrl_ext, swsm;
4820 
4821 	if (adapter->hw.mac.type == e1000_82573) {
4822 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4823 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4824 		    swsm | E1000_SWSM_DRV_LOAD);
4825 		return;
4826 	}
4827 	/* else */
4828 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4829 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4830 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4831 	return;
4832 }
4833 
4834 /*
4835  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4836  * For ASF and Pass Through versions of f/w this means that
4837  * the driver is no longer loaded. For AMT versions of the
4838  * f/w this means that the network i/f is closed.
4839  */
4840 static void
4841 em_release_hw_control(struct adapter *adapter)
4842 {
4843 	u32 ctrl_ext, swsm;
4844 
4845 	if (!adapter->has_manage)
4846 		return;
4847 
4848 	if (adapter->hw.mac.type == e1000_82573) {
4849 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4850 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4851 		    swsm & ~E1000_SWSM_DRV_LOAD);
4852 		return;
4853 	}
4854 	/* else */
4855 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4856 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4857 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4858 	return;
4859 }
4860 
4861 static int
4862 em_is_valid_ether_addr(u8 *addr)
4863 {
4864 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4865 
4866 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4867 		return (FALSE);
4868 	}
4869 
4870 	return (TRUE);
4871 }
4872 
4873 /*
4874 ** Parse the interface capabilities with regard
4875 ** to both system management and wake-on-lan for
4876 ** later use.
4877 */
4878 static void
4879 em_get_wakeup(device_t dev)
4880 {
4881 	struct adapter	*adapter = device_get_softc(dev);
4882 	u16		eeprom_data = 0, device_id, apme_mask;
4883 
4884 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4885 	apme_mask = EM_EEPROM_APME;
4886 
4887 	switch (adapter->hw.mac.type) {
4888 	case e1000_82573:
4889 	case e1000_82583:
4890 		adapter->has_amt = TRUE;
4891 		/* Falls thru */
4892 	case e1000_82571:
4893 	case e1000_82572:
4894 	case e1000_80003es2lan:
4895 		if (adapter->hw.bus.func == 1) {
4896 			e1000_read_nvm(&adapter->hw,
4897 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4898 			break;
4899 		} else
4900 			e1000_read_nvm(&adapter->hw,
4901 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4902 		break;
4903 	case e1000_ich8lan:
4904 	case e1000_ich9lan:
4905 	case e1000_ich10lan:
4906 	case e1000_pchlan:
4907 	case e1000_pch2lan:
4908 		apme_mask = E1000_WUC_APME;
4909 		adapter->has_amt = TRUE;
4910 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4911 		break;
4912 	default:
4913 		e1000_read_nvm(&adapter->hw,
4914 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4915 		break;
4916 	}
4917 	if (eeprom_data & apme_mask)
4918 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4919 	/*
4920          * We have the eeprom settings, now apply the special cases
4921          * where the eeprom may be wrong or the board won't support
4922          * wake on lan on a particular port
4923 	 */
4924 	device_id = pci_get_device(dev);
4925         switch (device_id) {
4926 	case E1000_DEV_ID_82571EB_FIBER:
4927 		/* Wake events only supported on port A for dual fiber
4928 		 * regardless of eeprom setting */
4929 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4930 		    E1000_STATUS_FUNC_1)
4931 			adapter->wol = 0;
4932 		break;
4933 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4934 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4935 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4936                 /* if quad port adapter, disable WoL on all but port A */
4937 		if (global_quad_port_a != 0)
4938 			adapter->wol = 0;
4939 		/* Reset for multiple quad port adapters */
4940 		if (++global_quad_port_a == 4)
4941 			global_quad_port_a = 0;
4942                 break;
4943 	}
4944 	return;
4945 }
4946 
4947 
4948 /*
4949  * Enable PCI Wake On Lan capability
4950  */
4951 static void
4952 em_enable_wakeup(device_t dev)
4953 {
4954 	struct adapter	*adapter = device_get_softc(dev);
4955 	struct ifnet	*ifp = adapter->ifp;
4956 	u32		pmc, ctrl, ctrl_ext, rctl;
4957 	u16     	status;
4958 
4959 	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4960 		return;
4961 
4962 	/* Advertise the wakeup capability */
4963 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4964 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4965 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4966 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4967 
4968 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4969 	    (adapter->hw.mac.type == e1000_pchlan) ||
4970 	    (adapter->hw.mac.type == e1000_ich9lan) ||
4971 	    (adapter->hw.mac.type == e1000_ich10lan))
4972 		e1000_suspend_workarounds_ich8lan(&adapter->hw);
4973 
4974 	/* Keep the laser running on Fiber adapters */
4975 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4976 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4977 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4978 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4979 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4980 	}
4981 
4982 	/*
4983 	** Determine type of Wakeup: note that wol
4984 	** is set with all bits on by default.
4985 	*/
4986 	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4987 		adapter->wol &= ~E1000_WUFC_MAG;
4988 
4989 	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4990 		adapter->wol &= ~E1000_WUFC_MC;
4991 	else {
4992 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4993 		rctl |= E1000_RCTL_MPE;
4994 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4995 	}
4996 
4997 	if ((adapter->hw.mac.type == e1000_pchlan) ||
4998 	    (adapter->hw.mac.type == e1000_pch2lan)) {
4999 		if (em_enable_phy_wakeup(adapter))
5000 			return;
5001 	} else {
5002 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5003 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5004 	}
5005 
5006 	if (adapter->hw.phy.type == e1000_phy_igp_3)
5007 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5008 
5009         /* Request PME */
5010         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5011 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5012 	if (ifp->if_capenable & IFCAP_WOL)
5013 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5014         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5015 
5016 	return;
5017 }
5018 
5019 /*
5020 ** WOL in the newer chipset interfaces (pchlan)
5021 ** require thing to be copied into the phy
5022 */
5023 static int
5024 em_enable_phy_wakeup(struct adapter *adapter)
5025 {
5026 	struct e1000_hw *hw = &adapter->hw;
5027 	u32 mreg, ret = 0;
5028 	u16 preg;
5029 
5030 	/* copy MAC RARs to PHY RARs */
5031 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5032 
5033 	/* copy MAC MTA to PHY MTA */
5034 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5035 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5036 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5037 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5038 		    (u16)((mreg >> 16) & 0xFFFF));
5039 	}
5040 
5041 	/* configure PHY Rx Control register */
5042 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5043 	mreg = E1000_READ_REG(hw, E1000_RCTL);
5044 	if (mreg & E1000_RCTL_UPE)
5045 		preg |= BM_RCTL_UPE;
5046 	if (mreg & E1000_RCTL_MPE)
5047 		preg |= BM_RCTL_MPE;
5048 	preg &= ~(BM_RCTL_MO_MASK);
5049 	if (mreg & E1000_RCTL_MO_3)
5050 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5051 				<< BM_RCTL_MO_SHIFT);
5052 	if (mreg & E1000_RCTL_BAM)
5053 		preg |= BM_RCTL_BAM;
5054 	if (mreg & E1000_RCTL_PMCF)
5055 		preg |= BM_RCTL_PMCF;
5056 	mreg = E1000_READ_REG(hw, E1000_CTRL);
5057 	if (mreg & E1000_CTRL_RFCE)
5058 		preg |= BM_RCTL_RFCE;
5059 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5060 
5061 	/* enable PHY wakeup in MAC register */
5062 	E1000_WRITE_REG(hw, E1000_WUC,
5063 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5064 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5065 
5066 	/* configure and enable PHY wakeup in PHY registers */
5067 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5068 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5069 
5070 	/* activate PHY wakeup */
5071 	ret = hw->phy.ops.acquire(hw);
5072 	if (ret) {
5073 		printf("Could not acquire PHY\n");
5074 		return ret;
5075 	}
5076 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5077 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5078 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5079 	if (ret) {
5080 		printf("Could not read PHY page 769\n");
5081 		goto out;
5082 	}
5083 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5084 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5085 	if (ret)
5086 		printf("Could not set PHY Host Wakeup bit\n");
5087 out:
5088 	hw->phy.ops.release(hw);
5089 
5090 	return ret;
5091 }
5092 
5093 static void
5094 em_led_func(void *arg, int onoff)
5095 {
5096 	struct adapter	*adapter = arg;
5097 
5098 	EM_CORE_LOCK(adapter);
5099 	if (onoff) {
5100 		e1000_setup_led(&adapter->hw);
5101 		e1000_led_on(&adapter->hw);
5102 	} else {
5103 		e1000_led_off(&adapter->hw);
5104 		e1000_cleanup_led(&adapter->hw);
5105 	}
5106 	EM_CORE_UNLOCK(adapter);
5107 }
5108 
5109 /*
5110 ** Disable the L0S and L1 LINK states
5111 */
5112 static void
5113 em_disable_aspm(struct adapter *adapter)
5114 {
5115 	int		base, reg;
5116 	u16		link_cap,link_ctrl;
5117 	device_t	dev = adapter->dev;
5118 
5119 	switch (adapter->hw.mac.type) {
5120 		case e1000_82573:
5121 		case e1000_82574:
5122 		case e1000_82583:
5123 			break;
5124 		default:
5125 			return;
5126 	}
5127 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5128 		return;
5129 	reg = base + PCIER_LINK_CAP;
5130 	link_cap = pci_read_config(dev, reg, 2);
5131 	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5132 		return;
5133 	reg = base + PCIER_LINK_CTL;
5134 	link_ctrl = pci_read_config(dev, reg, 2);
5135 	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5136 	pci_write_config(dev, reg, link_ctrl, 2);
5137 	return;
5138 }
5139 
5140 /**********************************************************************
5141  *
5142  *  Update the board statistics counters.
5143  *
5144  **********************************************************************/
5145 static void
5146 em_update_stats_counters(struct adapter *adapter)
5147 {
5148 	struct ifnet   *ifp;
5149 
5150 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5151 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5152 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5153 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5154 	}
5155 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5156 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5157 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5158 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5159 
5160 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5161 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5162 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5163 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5164 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5165 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5166 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5167 	/*
5168 	** For watchdog management we need to know if we have been
5169 	** paused during the last interval, so capture that here.
5170 	*/
5171 	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5172 	adapter->stats.xoffrxc += adapter->pause_frames;
5173 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5174 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5175 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5176 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5177 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5178 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5179 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5180 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5181 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5182 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5183 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5184 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5185 
5186 	/* For the 64-bit byte counters the low dword must be read first. */
5187 	/* Both registers clear on the read of the high dword */
5188 
5189 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5190 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5191 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5192 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5193 
5194 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5195 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5196 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5197 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5198 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5199 
5200 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5201 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5202 
5203 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5204 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5205 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5206 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5207 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5208 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5209 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5210 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5211 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5212 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5213 
5214 	/* Interrupt Counts */
5215 
5216 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5217 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5218 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5219 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5220 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5221 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5222 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5223 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5224 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5225 
5226 	if (adapter->hw.mac.type >= e1000_82543) {
5227 		adapter->stats.algnerrc +=
5228 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5229 		adapter->stats.rxerrc +=
5230 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5231 		adapter->stats.tncrs +=
5232 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5233 		adapter->stats.cexterr +=
5234 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5235 		adapter->stats.tsctc +=
5236 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5237 		adapter->stats.tsctfc +=
5238 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5239 	}
5240 	ifp = adapter->ifp;
5241 
5242 	ifp->if_collisions = adapter->stats.colc;
5243 
5244 	/* Rx Errors */
5245 	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5246 	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5247 	    adapter->stats.ruc + adapter->stats.roc +
5248 	    adapter->stats.mpc + adapter->stats.cexterr;
5249 
5250 	/* Tx Errors */
5251 	ifp->if_oerrors = adapter->stats.ecol +
5252 	    adapter->stats.latecol + adapter->watchdog_events;
5253 }
5254 
5255 /* Export a single 32-bit register via a read-only sysctl. */
5256 static int
5257 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5258 {
5259 	struct adapter *adapter;
5260 	u_int val;
5261 
5262 	adapter = oidp->oid_arg1;
5263 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5264 	return (sysctl_handle_int(oidp, &val, 0, req));
5265 }
5266 
5267 /*
5268  * Add sysctl variables, one per statistic, to the system.
5269  */
5270 static void
5271 em_add_hw_stats(struct adapter *adapter)
5272 {
5273 	device_t dev = adapter->dev;
5274 
5275 	struct tx_ring *txr = adapter->tx_rings;
5276 	struct rx_ring *rxr = adapter->rx_rings;
5277 
5278 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5279 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5280 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5281 	struct e1000_hw_stats *stats = &adapter->stats;
5282 
5283 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5284 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5285 
5286 #define QUEUE_NAME_LEN 32
5287 	char namebuf[QUEUE_NAME_LEN];
5288 
5289 	/* Driver Statistics */
5290 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5291 			CTLFLAG_RD, &adapter->link_irq,
5292 			"Link MSIX IRQ Handled");
5293 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5294 			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5295 			 "Std mbuf failed");
5296 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5297 			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5298 			 "Std mbuf cluster failed");
5299 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5300 			CTLFLAG_RD, &adapter->dropped_pkts,
5301 			"Driver dropped packets");
5302 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5303 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5304 			"Driver tx dma failure in xmit");
5305 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5306 			CTLFLAG_RD, &adapter->rx_overruns,
5307 			"RX overruns");
5308 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5309 			CTLFLAG_RD, &adapter->watchdog_events,
5310 			"Watchdog timeouts");
5311 
5312 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5313 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5314 			em_sysctl_reg_handler, "IU",
5315 			"Device Control Register");
5316 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5317 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5318 			em_sysctl_reg_handler, "IU",
5319 			"Receiver Control Register");
5320 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5321 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5322 			"Flow Control High Watermark");
5323 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5324 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5325 			"Flow Control Low Watermark");
5326 
5327 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5328 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5329 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5330 					    CTLFLAG_RD, NULL, "Queue Name");
5331 		queue_list = SYSCTL_CHILDREN(queue_node);
5332 
5333 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5334 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5335 				E1000_TDH(txr->me),
5336 				em_sysctl_reg_handler, "IU",
5337  				"Transmit Descriptor Head");
5338 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5339 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5340 				E1000_TDT(txr->me),
5341 				em_sysctl_reg_handler, "IU",
5342  				"Transmit Descriptor Tail");
5343 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5344 				CTLFLAG_RD, &txr->tx_irq,
5345 				"Queue MSI-X Transmit Interrupts");
5346 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5347 				CTLFLAG_RD, &txr->no_desc_avail,
5348 				"Queue No Descriptor Available");
5349 
5350 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5351 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5352 				E1000_RDH(rxr->me),
5353 				em_sysctl_reg_handler, "IU",
5354 				"Receive Descriptor Head");
5355 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5356 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5357 				E1000_RDT(rxr->me),
5358 				em_sysctl_reg_handler, "IU",
5359 				"Receive Descriptor Tail");
5360 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5361 				CTLFLAG_RD, &rxr->rx_irq,
5362 				"Queue MSI-X Receive Interrupts");
5363 	}
5364 
5365 	/* MAC stats get their own sub node */
5366 
5367 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5368 				    CTLFLAG_RD, NULL, "Statistics");
5369 	stat_list = SYSCTL_CHILDREN(stat_node);
5370 
5371 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5372 			CTLFLAG_RD, &stats->ecol,
5373 			"Excessive collisions");
5374 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5375 			CTLFLAG_RD, &stats->scc,
5376 			"Single collisions");
5377 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5378 			CTLFLAG_RD, &stats->mcc,
5379 			"Multiple collisions");
5380 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5381 			CTLFLAG_RD, &stats->latecol,
5382 			"Late collisions");
5383 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5384 			CTLFLAG_RD, &stats->colc,
5385 			"Collision Count");
5386 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5387 			CTLFLAG_RD, &adapter->stats.symerrs,
5388 			"Symbol Errors");
5389 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5390 			CTLFLAG_RD, &adapter->stats.sec,
5391 			"Sequence Errors");
5392 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5393 			CTLFLAG_RD, &adapter->stats.dc,
5394 			"Defer Count");
5395 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5396 			CTLFLAG_RD, &adapter->stats.mpc,
5397 			"Missed Packets");
5398 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5399 			CTLFLAG_RD, &adapter->stats.rnbc,
5400 			"Receive No Buffers");
5401 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5402 			CTLFLAG_RD, &adapter->stats.ruc,
5403 			"Receive Undersize");
5404 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5405 			CTLFLAG_RD, &adapter->stats.rfc,
5406 			"Fragmented Packets Received ");
5407 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5408 			CTLFLAG_RD, &adapter->stats.roc,
5409 			"Oversized Packets Received");
5410 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5411 			CTLFLAG_RD, &adapter->stats.rjc,
5412 			"Recevied Jabber");
5413 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5414 			CTLFLAG_RD, &adapter->stats.rxerrc,
5415 			"Receive Errors");
5416 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5417 			CTLFLAG_RD, &adapter->stats.crcerrs,
5418 			"CRC errors");
5419 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5420 			CTLFLAG_RD, &adapter->stats.algnerrc,
5421 			"Alignment Errors");
5422 	/* On 82575 these are collision counts */
5423 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5424 			CTLFLAG_RD, &adapter->stats.cexterr,
5425 			"Collision/Carrier extension errors");
5426 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5427 			CTLFLAG_RD, &adapter->stats.xonrxc,
5428 			"XON Received");
5429 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5430 			CTLFLAG_RD, &adapter->stats.xontxc,
5431 			"XON Transmitted");
5432 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5433 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5434 			"XOFF Received");
5435 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5436 			CTLFLAG_RD, &adapter->stats.xofftxc,
5437 			"XOFF Transmitted");
5438 
5439 	/* Packet Reception Stats */
5440 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5441 			CTLFLAG_RD, &adapter->stats.tpr,
5442 			"Total Packets Received ");
5443 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5444 			CTLFLAG_RD, &adapter->stats.gprc,
5445 			"Good Packets Received");
5446 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5447 			CTLFLAG_RD, &adapter->stats.bprc,
5448 			"Broadcast Packets Received");
5449 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5450 			CTLFLAG_RD, &adapter->stats.mprc,
5451 			"Multicast Packets Received");
5452 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5453 			CTLFLAG_RD, &adapter->stats.prc64,
5454 			"64 byte frames received ");
5455 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5456 			CTLFLAG_RD, &adapter->stats.prc127,
5457 			"65-127 byte frames received");
5458 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5459 			CTLFLAG_RD, &adapter->stats.prc255,
5460 			"128-255 byte frames received");
5461 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5462 			CTLFLAG_RD, &adapter->stats.prc511,
5463 			"256-511 byte frames received");
5464 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5465 			CTLFLAG_RD, &adapter->stats.prc1023,
5466 			"512-1023 byte frames received");
5467 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5468 			CTLFLAG_RD, &adapter->stats.prc1522,
5469 			"1023-1522 byte frames received");
5470  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5471  			CTLFLAG_RD, &adapter->stats.gorc,
5472  			"Good Octets Received");
5473 
5474 	/* Packet Transmission Stats */
5475  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5476  			CTLFLAG_RD, &adapter->stats.gotc,
5477  			"Good Octets Transmitted");
5478 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5479 			CTLFLAG_RD, &adapter->stats.tpt,
5480 			"Total Packets Transmitted");
5481 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5482 			CTLFLAG_RD, &adapter->stats.gptc,
5483 			"Good Packets Transmitted");
5484 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5485 			CTLFLAG_RD, &adapter->stats.bptc,
5486 			"Broadcast Packets Transmitted");
5487 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5488 			CTLFLAG_RD, &adapter->stats.mptc,
5489 			"Multicast Packets Transmitted");
5490 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5491 			CTLFLAG_RD, &adapter->stats.ptc64,
5492 			"64 byte frames transmitted ");
5493 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5494 			CTLFLAG_RD, &adapter->stats.ptc127,
5495 			"65-127 byte frames transmitted");
5496 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5497 			CTLFLAG_RD, &adapter->stats.ptc255,
5498 			"128-255 byte frames transmitted");
5499 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5500 			CTLFLAG_RD, &adapter->stats.ptc511,
5501 			"256-511 byte frames transmitted");
5502 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5503 			CTLFLAG_RD, &adapter->stats.ptc1023,
5504 			"512-1023 byte frames transmitted");
5505 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5506 			CTLFLAG_RD, &adapter->stats.ptc1522,
5507 			"1024-1522 byte frames transmitted");
5508 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5509 			CTLFLAG_RD, &adapter->stats.tsctc,
5510 			"TSO Contexts Transmitted");
5511 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5512 			CTLFLAG_RD, &adapter->stats.tsctfc,
5513 			"TSO Contexts Failed");
5514 
5515 
5516 	/* Interrupt Stats */
5517 
5518 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5519 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5520 	int_list = SYSCTL_CHILDREN(int_node);
5521 
5522 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5523 			CTLFLAG_RD, &adapter->stats.iac,
5524 			"Interrupt Assertion Count");
5525 
5526 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5527 			CTLFLAG_RD, &adapter->stats.icrxptc,
5528 			"Interrupt Cause Rx Pkt Timer Expire Count");
5529 
5530 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5531 			CTLFLAG_RD, &adapter->stats.icrxatc,
5532 			"Interrupt Cause Rx Abs Timer Expire Count");
5533 
5534 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5535 			CTLFLAG_RD, &adapter->stats.ictxptc,
5536 			"Interrupt Cause Tx Pkt Timer Expire Count");
5537 
5538 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5539 			CTLFLAG_RD, &adapter->stats.ictxatc,
5540 			"Interrupt Cause Tx Abs Timer Expire Count");
5541 
5542 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5543 			CTLFLAG_RD, &adapter->stats.ictxqec,
5544 			"Interrupt Cause Tx Queue Empty Count");
5545 
5546 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5547 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5548 			"Interrupt Cause Tx Queue Min Thresh Count");
5549 
5550 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5551 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5552 			"Interrupt Cause Rx Desc Min Thresh Count");
5553 
5554 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5555 			CTLFLAG_RD, &adapter->stats.icrxoc,
5556 			"Interrupt Cause Receiver Overrun Count");
5557 }
5558 
5559 /**********************************************************************
5560  *
5561  *  This routine provides a way to dump out the adapter eeprom,
5562  *  often a useful debug/service tool. This only dumps the first
5563  *  32 words, stuff that matters is in that extent.
5564  *
5565  **********************************************************************/
5566 static int
5567 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5568 {
5569 	struct adapter *adapter = (struct adapter *)arg1;
5570 	int error;
5571 	int result;
5572 
5573 	result = -1;
5574 	error = sysctl_handle_int(oidp, &result, 0, req);
5575 
5576 	if (error || !req->newptr)
5577 		return (error);
5578 
5579 	/*
5580 	 * This value will cause a hex dump of the
5581 	 * first 32 16-bit words of the EEPROM to
5582 	 * the screen.
5583 	 */
5584 	if (result == 1)
5585 		em_print_nvm_info(adapter);
5586 
5587 	return (error);
5588 }
5589 
5590 static void
5591 em_print_nvm_info(struct adapter *adapter)
5592 {
5593 	u16	eeprom_data;
5594 	int	i, j, row = 0;
5595 
5596 	/* Its a bit crude, but it gets the job done */
5597 	printf("\nInterface EEPROM Dump:\n");
5598 	printf("Offset\n0x0000  ");
5599 	for (i = 0, j = 0; i < 32; i++, j++) {
5600 		if (j == 8) { /* Make the offset block */
5601 			j = 0; ++row;
5602 			printf("\n0x00%x0  ",row);
5603 		}
5604 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5605 		printf("%04x ", eeprom_data);
5606 	}
5607 	printf("\n");
5608 }
5609 
5610 static int
5611 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5612 {
5613 	struct em_int_delay_info *info;
5614 	struct adapter *adapter;
5615 	u32 regval;
5616 	int error, usecs, ticks;
5617 
5618 	info = (struct em_int_delay_info *)arg1;
5619 	usecs = info->value;
5620 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5621 	if (error != 0 || req->newptr == NULL)
5622 		return (error);
5623 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5624 		return (EINVAL);
5625 	info->value = usecs;
5626 	ticks = EM_USECS_TO_TICKS(usecs);
5627 	if (info->offset == E1000_ITR)	/* units are 256ns here */
5628 		ticks *= 4;
5629 
5630 	adapter = info->adapter;
5631 
5632 	EM_CORE_LOCK(adapter);
5633 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5634 	regval = (regval & ~0xffff) | (ticks & 0xffff);
5635 	/* Handle a few special cases. */
5636 	switch (info->offset) {
5637 	case E1000_RDTR:
5638 		break;
5639 	case E1000_TIDV:
5640 		if (ticks == 0) {
5641 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5642 			/* Don't write 0 into the TIDV register. */
5643 			regval++;
5644 		} else
5645 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5646 		break;
5647 	}
5648 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5649 	EM_CORE_UNLOCK(adapter);
5650 	return (0);
5651 }
5652 
5653 static void
5654 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5655 	const char *description, struct em_int_delay_info *info,
5656 	int offset, int value)
5657 {
5658 	info->adapter = adapter;
5659 	info->offset = offset;
5660 	info->value = value;
5661 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5662 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5663 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5664 	    info, 0, em_sysctl_int_delay, "I", description);
5665 }
5666 
5667 static void
5668 em_set_sysctl_value(struct adapter *adapter, const char *name,
5669 	const char *description, int *limit, int value)
5670 {
5671 	*limit = value;
5672 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5673 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5674 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5675 }
5676 
5677 
5678 /*
5679 ** Set flow control using sysctl:
5680 ** Flow control values:
5681 **      0 - off
5682 **      1 - rx pause
5683 **      2 - tx pause
5684 **      3 - full
5685 */
5686 static int
5687 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5688 {
5689         int		error;
5690 	static int	input = 3; /* default is full */
5691         struct adapter	*adapter = (struct adapter *) arg1;
5692 
5693         error = sysctl_handle_int(oidp, &input, 0, req);
5694 
5695         if ((error) || (req->newptr == NULL))
5696                 return (error);
5697 
5698 	if (input == adapter->fc) /* no change? */
5699 		return (error);
5700 
5701         switch (input) {
5702                 case e1000_fc_rx_pause:
5703                 case e1000_fc_tx_pause:
5704                 case e1000_fc_full:
5705                 case e1000_fc_none:
5706                         adapter->hw.fc.requested_mode = input;
5707 			adapter->fc = input;
5708                         break;
5709                 default:
5710 			/* Do nothing */
5711 			return (error);
5712         }
5713 
5714         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5715         e1000_force_mac_fc(&adapter->hw);
5716         return (error);
5717 }
5718 
5719 /*
5720 ** Manage Energy Efficient Ethernet:
5721 ** Control values:
5722 **     0/1 - enabled/disabled
5723 */
5724 static int
5725 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5726 {
5727        struct adapter *adapter = (struct adapter *) arg1;
5728        int             error, value;
5729 
5730        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5731        error = sysctl_handle_int(oidp, &value, 0, req);
5732        if (error || req->newptr == NULL)
5733                return (error);
5734        EM_CORE_LOCK(adapter);
5735        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5736        em_init_locked(adapter);
5737        EM_CORE_UNLOCK(adapter);
5738        return (0);
5739 }
5740 
5741 static int
5742 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5743 {
5744 	struct adapter *adapter;
5745 	int error;
5746 	int result;
5747 
5748 	result = -1;
5749 	error = sysctl_handle_int(oidp, &result, 0, req);
5750 
5751 	if (error || !req->newptr)
5752 		return (error);
5753 
5754 	if (result == 1) {
5755 		adapter = (struct adapter *)arg1;
5756 		em_print_debug_info(adapter);
5757         }
5758 
5759 	return (error);
5760 }
5761 
5762 /*
5763 ** This routine is meant to be fluid, add whatever is
5764 ** needed for debugging a problem.  -jfv
5765 */
5766 static void
5767 em_print_debug_info(struct adapter *adapter)
5768 {
5769 	device_t dev = adapter->dev;
5770 	struct tx_ring *txr = adapter->tx_rings;
5771 	struct rx_ring *rxr = adapter->rx_rings;
5772 
5773 	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5774 		printf("Interface is RUNNING ");
5775 	else
5776 		printf("Interface is NOT RUNNING\n");
5777 
5778 	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5779 		printf("and INACTIVE\n");
5780 	else
5781 		printf("and ACTIVE\n");
5782 
5783 	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5784 	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5785 	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5786 	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5787 	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5788 	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5789 	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5790 	device_printf(dev, "TX descriptors avail = %d\n",
5791 	    txr->tx_avail);
5792 	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5793 	    txr->no_desc_avail);
5794 	device_printf(dev, "RX discarded packets = %ld\n",
5795 	    rxr->rx_discarded);
5796 	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5797 	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5798 }
5799