xref: /freebsd/sys/dev/e1000/if_em.c (revision a98ff317388a00b992f1bf8404dee596f9383f5e)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2013, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #include "opt_inet.h"
36 #include "opt_inet6.h"
37 
38 #ifdef HAVE_KERNEL_OPTION_HEADERS
39 #include "opt_device_polling.h"
40 #endif
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #if __FreeBSD_version >= 800000
45 #include <sys/buf_ring.h>
46 #endif
47 #include <sys/bus.h>
48 #include <sys/endian.h>
49 #include <sys/kernel.h>
50 #include <sys/kthread.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/rman.h>
55 #include <sys/socket.h>
56 #include <sys/sockio.h>
57 #include <sys/sysctl.h>
58 #include <sys/taskqueue.h>
59 #include <sys/eventhandler.h>
60 #include <machine/bus.h>
61 #include <machine/resource.h>
62 
63 #include <net/bpf.h>
64 #include <net/ethernet.h>
65 #include <net/if.h>
66 #include <net/if_arp.h>
67 #include <net/if_dl.h>
68 #include <net/if_media.h>
69 
70 #include <net/if_types.h>
71 #include <net/if_vlan_var.h>
72 
73 #include <netinet/in_systm.h>
74 #include <netinet/in.h>
75 #include <netinet/if_ether.h>
76 #include <netinet/ip.h>
77 #include <netinet/ip6.h>
78 #include <netinet/tcp.h>
79 #include <netinet/udp.h>
80 
81 #include <machine/in_cksum.h>
82 #include <dev/led/led.h>
83 #include <dev/pci/pcivar.h>
84 #include <dev/pci/pcireg.h>
85 
86 #include "e1000_api.h"
87 #include "e1000_82571.h"
88 #include "if_em.h"
89 
90 /*********************************************************************
91  *  Set this to one to display debug statistics
92  *********************************************************************/
93 int	em_display_debug_stats = 0;
94 
95 /*********************************************************************
96  *  Driver version:
97  *********************************************************************/
98 char em_driver_version[] = "7.3.8";
99 
100 /*********************************************************************
101  *  PCI Device ID Table
102  *
103  *  Used by probe to select devices to load on
104  *  Last field stores an index into e1000_strings
105  *  Last entry must be all 0s
106  *
107  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
108  *********************************************************************/
109 
110 static em_vendor_info_t em_vendor_info_array[] =
111 {
112 	/* Intel(R) PRO/1000 Network Connection */
113 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
115 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
116 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
117 						PCI_ANY_ID, PCI_ANY_ID, 0},
118 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
119 						PCI_ANY_ID, PCI_ANY_ID, 0},
120 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
121 						PCI_ANY_ID, PCI_ANY_ID, 0},
122 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
123 						PCI_ANY_ID, PCI_ANY_ID, 0},
124 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
125 						PCI_ANY_ID, PCI_ANY_ID, 0},
126 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
127 						PCI_ANY_ID, PCI_ANY_ID, 0},
128 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
130 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
131 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
132 
133 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
134 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
135 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
136 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
137 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
138 						PCI_ANY_ID, PCI_ANY_ID, 0},
139 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
140 						PCI_ANY_ID, PCI_ANY_ID, 0},
141 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
142 						PCI_ANY_ID, PCI_ANY_ID, 0},
143 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
144 						PCI_ANY_ID, PCI_ANY_ID, 0},
145 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
147 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
148 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
150 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
152 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
175 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
176 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
177 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
178 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
179 						PCI_ANY_ID, PCI_ANY_ID, 0},
180 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
181 						PCI_ANY_ID, PCI_ANY_ID, 0},
182 	/* required last entry */
183 	{ 0, 0, 0, 0, 0}
184 };
185 
186 /*********************************************************************
187  *  Table of branding strings for all supported NICs.
188  *********************************************************************/
189 
190 static char *em_strings[] = {
191 	"Intel(R) PRO/1000 Network Connection"
192 };
193 
194 /*********************************************************************
195  *  Function prototypes
196  *********************************************************************/
197 static int	em_probe(device_t);
198 static int	em_attach(device_t);
199 static int	em_detach(device_t);
200 static int	em_shutdown(device_t);
201 static int	em_suspend(device_t);
202 static int	em_resume(device_t);
203 #ifdef EM_MULTIQUEUE
204 static int	em_mq_start(struct ifnet *, struct mbuf *);
205 static int	em_mq_start_locked(struct ifnet *,
206 		    struct tx_ring *, struct mbuf *);
207 static void	em_qflush(struct ifnet *);
208 #else
209 static void	em_start(struct ifnet *);
210 static void	em_start_locked(struct ifnet *, struct tx_ring *);
211 #endif
212 static int	em_ioctl(struct ifnet *, u_long, caddr_t);
213 static void	em_init(void *);
214 static void	em_init_locked(struct adapter *);
215 static void	em_stop(void *);
216 static void	em_media_status(struct ifnet *, struct ifmediareq *);
217 static int	em_media_change(struct ifnet *);
218 static void	em_identify_hardware(struct adapter *);
219 static int	em_allocate_pci_resources(struct adapter *);
220 static int	em_allocate_legacy(struct adapter *);
221 static int	em_allocate_msix(struct adapter *);
222 static int	em_allocate_queues(struct adapter *);
223 static int	em_setup_msix(struct adapter *);
224 static void	em_free_pci_resources(struct adapter *);
225 static void	em_local_timer(void *);
226 static void	em_reset(struct adapter *);
227 static int	em_setup_interface(device_t, struct adapter *);
228 
229 static void	em_setup_transmit_structures(struct adapter *);
230 static void	em_initialize_transmit_unit(struct adapter *);
231 static int	em_allocate_transmit_buffers(struct tx_ring *);
232 static void	em_free_transmit_structures(struct adapter *);
233 static void	em_free_transmit_buffers(struct tx_ring *);
234 
235 static int	em_setup_receive_structures(struct adapter *);
236 static int	em_allocate_receive_buffers(struct rx_ring *);
237 static void	em_initialize_receive_unit(struct adapter *);
238 static void	em_free_receive_structures(struct adapter *);
239 static void	em_free_receive_buffers(struct rx_ring *);
240 
241 static void	em_enable_intr(struct adapter *);
242 static void	em_disable_intr(struct adapter *);
243 static void	em_update_stats_counters(struct adapter *);
244 static void	em_add_hw_stats(struct adapter *adapter);
245 static void	em_txeof(struct tx_ring *);
246 static bool	em_rxeof(struct rx_ring *, int, int *);
247 #ifndef __NO_STRICT_ALIGNMENT
248 static int	em_fixup_rx(struct rx_ring *);
249 #endif
250 static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
251 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
252 		    struct ip *, u32 *, u32 *);
253 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
254 		    struct tcphdr *, u32 *, u32 *);
255 static void	em_set_promisc(struct adapter *);
256 static void	em_disable_promisc(struct adapter *);
257 static void	em_set_multi(struct adapter *);
258 static void	em_update_link_status(struct adapter *);
259 static void	em_refresh_mbufs(struct rx_ring *, int);
260 static void	em_register_vlan(void *, struct ifnet *, u16);
261 static void	em_unregister_vlan(void *, struct ifnet *, u16);
262 static void	em_setup_vlan_hw_support(struct adapter *);
263 static int	em_xmit(struct tx_ring *, struct mbuf **);
264 static int	em_dma_malloc(struct adapter *, bus_size_t,
265 		    struct em_dma_alloc *, int);
266 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
267 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
268 static void	em_print_nvm_info(struct adapter *);
269 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
270 static void	em_print_debug_info(struct adapter *);
271 static int 	em_is_valid_ether_addr(u8 *);
272 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
273 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
274 		    const char *, struct em_int_delay_info *, int, int);
275 /* Management and WOL Support */
276 static void	em_init_manageability(struct adapter *);
277 static void	em_release_manageability(struct adapter *);
278 static void     em_get_hw_control(struct adapter *);
279 static void     em_release_hw_control(struct adapter *);
280 static void	em_get_wakeup(device_t);
281 static void     em_enable_wakeup(device_t);
282 static int	em_enable_phy_wakeup(struct adapter *);
283 static void	em_led_func(void *, int);
284 static void	em_disable_aspm(struct adapter *);
285 
286 static int	em_irq_fast(void *);
287 
288 /* MSIX handlers */
289 static void	em_msix_tx(void *);
290 static void	em_msix_rx(void *);
291 static void	em_msix_link(void *);
292 static void	em_handle_tx(void *context, int pending);
293 static void	em_handle_rx(void *context, int pending);
294 static void	em_handle_link(void *context, int pending);
295 
296 static void	em_set_sysctl_value(struct adapter *, const char *,
297 		    const char *, int *, int);
298 static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
299 static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
300 
301 static __inline void em_rx_discard(struct rx_ring *, int);
302 
303 #ifdef DEVICE_POLLING
304 static poll_handler_t em_poll;
305 #endif /* POLLING */
306 
307 /*********************************************************************
308  *  FreeBSD Device Interface Entry Points
309  *********************************************************************/
310 
311 static device_method_t em_methods[] = {
312 	/* Device interface */
313 	DEVMETHOD(device_probe, em_probe),
314 	DEVMETHOD(device_attach, em_attach),
315 	DEVMETHOD(device_detach, em_detach),
316 	DEVMETHOD(device_shutdown, em_shutdown),
317 	DEVMETHOD(device_suspend, em_suspend),
318 	DEVMETHOD(device_resume, em_resume),
319 	DEVMETHOD_END
320 };
321 
322 static driver_t em_driver = {
323 	"em", em_methods, sizeof(struct adapter),
324 };
325 
326 devclass_t em_devclass;
327 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
328 MODULE_DEPEND(em, pci, 1, 1, 1);
329 MODULE_DEPEND(em, ether, 1, 1, 1);
330 
331 /*********************************************************************
332  *  Tunable default values.
333  *********************************************************************/
334 
335 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
336 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
337 #define M_TSO_LEN			66
338 
339 #define MAX_INTS_PER_SEC	8000
340 #define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
341 
342 /* Allow common code without TSO */
343 #ifndef CSUM_TSO
344 #define CSUM_TSO	0
345 #endif
346 
347 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
348 
349 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
350 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
351 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
352 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
353 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
354     0, "Default transmit interrupt delay in usecs");
355 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
356     0, "Default receive interrupt delay in usecs");
357 
358 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
359 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
360 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
361 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
362 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
363     &em_tx_abs_int_delay_dflt, 0,
364     "Default transmit interrupt delay limit in usecs");
365 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
366     &em_rx_abs_int_delay_dflt, 0,
367     "Default receive interrupt delay limit in usecs");
368 
369 static int em_rxd = EM_DEFAULT_RXD;
370 static int em_txd = EM_DEFAULT_TXD;
371 TUNABLE_INT("hw.em.rxd", &em_rxd);
372 TUNABLE_INT("hw.em.txd", &em_txd);
373 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
374     "Number of receive descriptors per queue");
375 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
376     "Number of transmit descriptors per queue");
377 
378 static int em_smart_pwr_down = FALSE;
379 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
380 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
381     0, "Set to true to leave smart power down enabled on newer adapters");
382 
383 /* Controls whether promiscuous also shows bad packets */
384 static int em_debug_sbp = FALSE;
385 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
386 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
387     "Show bad packets in promiscuous mode");
388 
389 static int em_enable_msix = TRUE;
390 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
391 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
392     "Enable MSI-X interrupts");
393 
394 /* How many packets rxeof tries to clean at a time */
395 static int em_rx_process_limit = 100;
396 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
397 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
398     &em_rx_process_limit, 0,
399     "Maximum number of received packets to process "
400     "at a time, -1 means unlimited");
401 
402 /* Energy efficient ethernet - default to OFF */
403 static int eee_setting = 1;
404 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
405 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
406     "Enable Energy Efficient Ethernet");
407 
408 /* Global used in WOL setup with multiport cards */
409 static int global_quad_port_a = 0;
410 
411 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
412 #include <dev/netmap/if_em_netmap.h>
413 #endif /* DEV_NETMAP */
414 
415 /*********************************************************************
416  *  Device identification routine
417  *
418  *  em_probe determines if the driver should be loaded on
419  *  adapter based on PCI vendor/device id of the adapter.
420  *
421  *  return BUS_PROBE_DEFAULT on success, positive on failure
422  *********************************************************************/
423 
424 static int
425 em_probe(device_t dev)
426 {
427 	char		adapter_name[60];
428 	u16		pci_vendor_id = 0;
429 	u16		pci_device_id = 0;
430 	u16		pci_subvendor_id = 0;
431 	u16		pci_subdevice_id = 0;
432 	em_vendor_info_t *ent;
433 
434 	INIT_DEBUGOUT("em_probe: begin");
435 
436 	pci_vendor_id = pci_get_vendor(dev);
437 	if (pci_vendor_id != EM_VENDOR_ID)
438 		return (ENXIO);
439 
440 	pci_device_id = pci_get_device(dev);
441 	pci_subvendor_id = pci_get_subvendor(dev);
442 	pci_subdevice_id = pci_get_subdevice(dev);
443 
444 	ent = em_vendor_info_array;
445 	while (ent->vendor_id != 0) {
446 		if ((pci_vendor_id == ent->vendor_id) &&
447 		    (pci_device_id == ent->device_id) &&
448 
449 		    ((pci_subvendor_id == ent->subvendor_id) ||
450 		    (ent->subvendor_id == PCI_ANY_ID)) &&
451 
452 		    ((pci_subdevice_id == ent->subdevice_id) ||
453 		    (ent->subdevice_id == PCI_ANY_ID))) {
454 			sprintf(adapter_name, "%s %s",
455 				em_strings[ent->index],
456 				em_driver_version);
457 			device_set_desc_copy(dev, adapter_name);
458 			return (BUS_PROBE_DEFAULT);
459 		}
460 		ent++;
461 	}
462 
463 	return (ENXIO);
464 }
465 
466 /*********************************************************************
467  *  Device initialization routine
468  *
469  *  The attach entry point is called when the driver is being loaded.
470  *  This routine identifies the type of hardware, allocates all resources
471  *  and initializes the hardware.
472  *
473  *  return 0 on success, positive on failure
474  *********************************************************************/
475 
476 static int
477 em_attach(device_t dev)
478 {
479 	struct adapter	*adapter;
480 	struct e1000_hw	*hw;
481 	int		error = 0;
482 
483 	INIT_DEBUGOUT("em_attach: begin");
484 
485 	if (resource_disabled("em", device_get_unit(dev))) {
486 		device_printf(dev, "Disabled by device hint\n");
487 		return (ENXIO);
488 	}
489 
490 	adapter = device_get_softc(dev);
491 	adapter->dev = adapter->osdep.dev = dev;
492 	hw = &adapter->hw;
493 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
494 
495 	/* SYSCTL stuff */
496 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
497 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
498 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
499 	    em_sysctl_nvm_info, "I", "NVM Information");
500 
501 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
502 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
503 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
504 	    em_sysctl_debug_info, "I", "Debug Information");
505 
506 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
507 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
508 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
509 	    em_set_flowcntl, "I", "Flow Control");
510 
511 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
512 
513 	/* Determine hardware and mac info */
514 	em_identify_hardware(adapter);
515 
516 	/* Setup PCI resources */
517 	if (em_allocate_pci_resources(adapter)) {
518 		device_printf(dev, "Allocation of PCI resources failed\n");
519 		error = ENXIO;
520 		goto err_pci;
521 	}
522 
523 	/*
524 	** For ICH8 and family we need to
525 	** map the flash memory, and this
526 	** must happen after the MAC is
527 	** identified
528 	*/
529 	if ((hw->mac.type == e1000_ich8lan) ||
530 	    (hw->mac.type == e1000_ich9lan) ||
531 	    (hw->mac.type == e1000_ich10lan) ||
532 	    (hw->mac.type == e1000_pchlan) ||
533 	    (hw->mac.type == e1000_pch2lan) ||
534 	    (hw->mac.type == e1000_pch_lpt)) {
535 		int rid = EM_BAR_TYPE_FLASH;
536 		adapter->flash = bus_alloc_resource_any(dev,
537 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
538 		if (adapter->flash == NULL) {
539 			device_printf(dev, "Mapping of Flash failed\n");
540 			error = ENXIO;
541 			goto err_pci;
542 		}
543 		/* This is used in the shared code */
544 		hw->flash_address = (u8 *)adapter->flash;
545 		adapter->osdep.flash_bus_space_tag =
546 		    rman_get_bustag(adapter->flash);
547 		adapter->osdep.flash_bus_space_handle =
548 		    rman_get_bushandle(adapter->flash);
549 	}
550 
551 	/* Do Shared Code initialization */
552 	if (e1000_setup_init_funcs(hw, TRUE)) {
553 		device_printf(dev, "Setup of Shared code failed\n");
554 		error = ENXIO;
555 		goto err_pci;
556 	}
557 
558 	e1000_get_bus_info(hw);
559 
560 	/* Set up some sysctls for the tunable interrupt delays */
561 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
562 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
563 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
564 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
565 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
566 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
567 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
568 	    "receive interrupt delay limit in usecs",
569 	    &adapter->rx_abs_int_delay,
570 	    E1000_REGISTER(hw, E1000_RADV),
571 	    em_rx_abs_int_delay_dflt);
572 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
573 	    "transmit interrupt delay limit in usecs",
574 	    &adapter->tx_abs_int_delay,
575 	    E1000_REGISTER(hw, E1000_TADV),
576 	    em_tx_abs_int_delay_dflt);
577 	em_add_int_delay_sysctl(adapter, "itr",
578 	    "interrupt delay limit in usecs/4",
579 	    &adapter->tx_itr,
580 	    E1000_REGISTER(hw, E1000_ITR),
581 	    DEFAULT_ITR);
582 
583 	/* Sysctl for limiting the amount of work done in the taskqueue */
584 	em_set_sysctl_value(adapter, "rx_processing_limit",
585 	    "max number of rx packets to process", &adapter->rx_process_limit,
586 	    em_rx_process_limit);
587 
588 	/*
589 	 * Validate number of transmit and receive descriptors. It
590 	 * must not exceed hardware maximum, and must be multiple
591 	 * of E1000_DBA_ALIGN.
592 	 */
593 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
594 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
595 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
596 		    EM_DEFAULT_TXD, em_txd);
597 		adapter->num_tx_desc = EM_DEFAULT_TXD;
598 	} else
599 		adapter->num_tx_desc = em_txd;
600 
601 	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
602 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
603 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
604 		    EM_DEFAULT_RXD, em_rxd);
605 		adapter->num_rx_desc = EM_DEFAULT_RXD;
606 	} else
607 		adapter->num_rx_desc = em_rxd;
608 
609 	hw->mac.autoneg = DO_AUTO_NEG;
610 	hw->phy.autoneg_wait_to_complete = FALSE;
611 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
612 
613 	/* Copper options */
614 	if (hw->phy.media_type == e1000_media_type_copper) {
615 		hw->phy.mdix = AUTO_ALL_MODES;
616 		hw->phy.disable_polarity_correction = FALSE;
617 		hw->phy.ms_type = EM_MASTER_SLAVE;
618 	}
619 
620 	/*
621 	 * Set the frame limits assuming
622 	 * standard ethernet sized frames.
623 	 */
624 	adapter->hw.mac.max_frame_size =
625 	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
626 
627 	/*
628 	 * This controls when hardware reports transmit completion
629 	 * status.
630 	 */
631 	hw->mac.report_tx_early = 1;
632 
633 	/*
634 	** Get queue/ring memory
635 	*/
636 	if (em_allocate_queues(adapter)) {
637 		error = ENOMEM;
638 		goto err_pci;
639 	}
640 
641 	/* Allocate multicast array memory. */
642 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
643 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
644 	if (adapter->mta == NULL) {
645 		device_printf(dev, "Can not allocate multicast setup array\n");
646 		error = ENOMEM;
647 		goto err_late;
648 	}
649 
650 	/* Check SOL/IDER usage */
651 	if (e1000_check_reset_block(hw))
652 		device_printf(dev, "PHY reset is blocked"
653 		    " due to SOL/IDER session.\n");
654 
655 	/* Sysctl for setting Energy Efficient Ethernet */
656 	hw->dev_spec.ich8lan.eee_disable = eee_setting;
657 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
658 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
659 	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
660 	    adapter, 0, em_sysctl_eee, "I",
661 	    "Disable Energy Efficient Ethernet");
662 
663 	/*
664 	** Start from a known state, this is
665 	** important in reading the nvm and
666 	** mac from that.
667 	*/
668 	e1000_reset_hw(hw);
669 
670 
671 	/* Make sure we have a good EEPROM before we read from it */
672 	if (e1000_validate_nvm_checksum(hw) < 0) {
673 		/*
674 		** Some PCI-E parts fail the first check due to
675 		** the link being in sleep state, call it again,
676 		** if it fails a second time its a real issue.
677 		*/
678 		if (e1000_validate_nvm_checksum(hw) < 0) {
679 			device_printf(dev,
680 			    "The EEPROM Checksum Is Not Valid\n");
681 			error = EIO;
682 			goto err_late;
683 		}
684 	}
685 
686 	/* Copy the permanent MAC address out of the EEPROM */
687 	if (e1000_read_mac_addr(hw) < 0) {
688 		device_printf(dev, "EEPROM read error while reading MAC"
689 		    " address\n");
690 		error = EIO;
691 		goto err_late;
692 	}
693 
694 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
695 		device_printf(dev, "Invalid MAC address\n");
696 		error = EIO;
697 		goto err_late;
698 	}
699 
700 	/*
701 	**  Do interrupt configuration
702 	*/
703 	if (adapter->msix > 1) /* Do MSIX */
704 		error = em_allocate_msix(adapter);
705 	else  /* MSI or Legacy */
706 		error = em_allocate_legacy(adapter);
707 	if (error)
708 		goto err_late;
709 
710 	/*
711 	 * Get Wake-on-Lan and Management info for later use
712 	 */
713 	em_get_wakeup(dev);
714 
715 	/* Setup OS specific network interface */
716 	if (em_setup_interface(dev, adapter) != 0)
717 		goto err_late;
718 
719 	em_reset(adapter);
720 
721 	/* Initialize statistics */
722 	em_update_stats_counters(adapter);
723 
724 	hw->mac.get_link_status = 1;
725 	em_update_link_status(adapter);
726 
727 	/* Register for VLAN events */
728 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
729 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
730 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
731 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
732 
733 	em_add_hw_stats(adapter);
734 
735 	/* Non-AMT based hardware can now take control from firmware */
736 	if (adapter->has_manage && !adapter->has_amt)
737 		em_get_hw_control(adapter);
738 
739 	/* Tell the stack that the interface is not active */
740 	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
741 	adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
742 
743 	adapter->led_dev = led_create(em_led_func, adapter,
744 	    device_get_nameunit(dev));
745 #ifdef DEV_NETMAP
746 	em_netmap_attach(adapter);
747 #endif /* DEV_NETMAP */
748 
749 	INIT_DEBUGOUT("em_attach: end");
750 
751 	return (0);
752 
753 err_late:
754 	em_free_transmit_structures(adapter);
755 	em_free_receive_structures(adapter);
756 	em_release_hw_control(adapter);
757 	if (adapter->ifp != NULL)
758 		if_free(adapter->ifp);
759 err_pci:
760 	em_free_pci_resources(adapter);
761 	free(adapter->mta, M_DEVBUF);
762 	EM_CORE_LOCK_DESTROY(adapter);
763 
764 	return (error);
765 }
766 
767 /*********************************************************************
768  *  Device removal routine
769  *
770  *  The detach entry point is called when the driver is being removed.
771  *  This routine stops the adapter and deallocates all the resources
772  *  that were allocated for driver operation.
773  *
774  *  return 0 on success, positive on failure
775  *********************************************************************/
776 
777 static int
778 em_detach(device_t dev)
779 {
780 	struct adapter	*adapter = device_get_softc(dev);
781 	struct ifnet	*ifp = adapter->ifp;
782 
783 	INIT_DEBUGOUT("em_detach: begin");
784 
785 	/* Make sure VLANS are not using driver */
786 	if (adapter->ifp->if_vlantrunk != NULL) {
787 		device_printf(dev,"Vlan in use, detach first\n");
788 		return (EBUSY);
789 	}
790 
791 #ifdef DEVICE_POLLING
792 	if (ifp->if_capenable & IFCAP_POLLING)
793 		ether_poll_deregister(ifp);
794 #endif
795 
796 	if (adapter->led_dev != NULL)
797 		led_destroy(adapter->led_dev);
798 
799 	EM_CORE_LOCK(adapter);
800 	adapter->in_detach = 1;
801 	em_stop(adapter);
802 	EM_CORE_UNLOCK(adapter);
803 	EM_CORE_LOCK_DESTROY(adapter);
804 
805 	e1000_phy_hw_reset(&adapter->hw);
806 
807 	em_release_manageability(adapter);
808 	em_release_hw_control(adapter);
809 
810 	/* Unregister VLAN events */
811 	if (adapter->vlan_attach != NULL)
812 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
813 	if (adapter->vlan_detach != NULL)
814 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
815 
816 	ether_ifdetach(adapter->ifp);
817 	callout_drain(&adapter->timer);
818 
819 #ifdef DEV_NETMAP
820 	netmap_detach(ifp);
821 #endif /* DEV_NETMAP */
822 
823 	em_free_pci_resources(adapter);
824 	bus_generic_detach(dev);
825 	if_free(ifp);
826 
827 	em_free_transmit_structures(adapter);
828 	em_free_receive_structures(adapter);
829 
830 	em_release_hw_control(adapter);
831 	free(adapter->mta, M_DEVBUF);
832 
833 	return (0);
834 }
835 
836 /*********************************************************************
837  *
838  *  Shutdown entry point
839  *
840  **********************************************************************/
841 
842 static int
843 em_shutdown(device_t dev)
844 {
845 	return em_suspend(dev);
846 }
847 
848 /*
849  * Suspend/resume device methods.
850  */
851 static int
852 em_suspend(device_t dev)
853 {
854 	struct adapter *adapter = device_get_softc(dev);
855 
856 	EM_CORE_LOCK(adapter);
857 
858         em_release_manageability(adapter);
859 	em_release_hw_control(adapter);
860 	em_enable_wakeup(dev);
861 
862 	EM_CORE_UNLOCK(adapter);
863 
864 	return bus_generic_suspend(dev);
865 }
866 
867 static int
868 em_resume(device_t dev)
869 {
870 	struct adapter *adapter = device_get_softc(dev);
871 	struct tx_ring	*txr = adapter->tx_rings;
872 	struct ifnet *ifp = adapter->ifp;
873 
874 	EM_CORE_LOCK(adapter);
875 	if (adapter->hw.mac.type == e1000_pch2lan)
876 		e1000_resume_workarounds_pchlan(&adapter->hw);
877 	em_init_locked(adapter);
878 	em_init_manageability(adapter);
879 
880 	if ((ifp->if_flags & IFF_UP) &&
881 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
882 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
883 			EM_TX_LOCK(txr);
884 #ifdef EM_MULTIQUEUE
885 			if (!drbr_empty(ifp, txr->br))
886 				em_mq_start_locked(ifp, txr, NULL);
887 #else
888 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
889 				em_start_locked(ifp, txr);
890 #endif
891 			EM_TX_UNLOCK(txr);
892 		}
893 	}
894 	EM_CORE_UNLOCK(adapter);
895 
896 	return bus_generic_resume(dev);
897 }
898 
899 
900 #ifdef EM_MULTIQUEUE
901 /*********************************************************************
902  *  Multiqueue Transmit routines
903  *
904  *  em_mq_start is called by the stack to initiate a transmit.
905  *  however, if busy the driver can queue the request rather
906  *  than do an immediate send. It is this that is an advantage
907  *  in this driver, rather than also having multiple tx queues.
908  **********************************************************************/
909 static int
910 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
911 {
912 	struct adapter  *adapter = txr->adapter;
913         struct mbuf     *next;
914         int             err = 0, enq = 0;
915 
916 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
917 	    IFF_DRV_RUNNING || adapter->link_active == 0) {
918 		if (m != NULL)
919 			err = drbr_enqueue(ifp, txr->br, m);
920 		return (err);
921 	}
922 
923 	enq = 0;
924 	if (m != NULL) {
925 		err = drbr_enqueue(ifp, txr->br, m);
926 		if (err)
927 			return (err);
928 	}
929 
930 	/* Process the queue */
931 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
932 		if ((err = em_xmit(txr, &next)) != 0) {
933 			if (next == NULL)
934 				drbr_advance(ifp, txr->br);
935 			else
936 				drbr_putback(ifp, txr->br, next);
937 			break;
938 		}
939 		drbr_advance(ifp, txr->br);
940 		enq++;
941 		ifp->if_obytes += next->m_pkthdr.len;
942 		if (next->m_flags & M_MCAST)
943 			ifp->if_omcasts++;
944 		ETHER_BPF_MTAP(ifp, next);
945 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
946                         break;
947 	}
948 
949 	if (enq > 0) {
950                 /* Set the watchdog */
951                 txr->queue_status = EM_QUEUE_WORKING;
952 		txr->watchdog_time = ticks;
953 	}
954 
955 	if (txr->tx_avail < EM_MAX_SCATTER)
956 		em_txeof(txr);
957 	if (txr->tx_avail < EM_MAX_SCATTER)
958 		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
959 	return (err);
960 }
961 
962 /*
963 ** Multiqueue capable stack interface
964 */
965 static int
966 em_mq_start(struct ifnet *ifp, struct mbuf *m)
967 {
968 	struct adapter	*adapter = ifp->if_softc;
969 	struct tx_ring	*txr = adapter->tx_rings;
970 	int 		error;
971 
972 	if (EM_TX_TRYLOCK(txr)) {
973 		error = em_mq_start_locked(ifp, txr, m);
974 		EM_TX_UNLOCK(txr);
975 	} else
976 		error = drbr_enqueue(ifp, txr->br, m);
977 
978 	return (error);
979 }
980 
981 /*
982 ** Flush all ring buffers
983 */
984 static void
985 em_qflush(struct ifnet *ifp)
986 {
987 	struct adapter  *adapter = ifp->if_softc;
988 	struct tx_ring  *txr = adapter->tx_rings;
989 	struct mbuf     *m;
990 
991 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
992 		EM_TX_LOCK(txr);
993 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
994 			m_freem(m);
995 		EM_TX_UNLOCK(txr);
996 	}
997 	if_qflush(ifp);
998 }
999 #else  /* !EM_MULTIQUEUE */
1000 
1001 static void
1002 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1003 {
1004 	struct adapter	*adapter = ifp->if_softc;
1005 	struct mbuf	*m_head;
1006 
1007 	EM_TX_LOCK_ASSERT(txr);
1008 
1009 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
1010 	    IFF_DRV_RUNNING)
1011 		return;
1012 
1013 	if (!adapter->link_active)
1014 		return;
1015 
1016 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1017         	/* Call cleanup if number of TX descriptors low */
1018 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1019 			em_txeof(txr);
1020 		if (txr->tx_avail < EM_MAX_SCATTER) {
1021 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1022 			break;
1023 		}
1024                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1025 		if (m_head == NULL)
1026 			break;
1027 		/*
1028 		 *  Encapsulation can modify our pointer, and or make it
1029 		 *  NULL on failure.  In that event, we can't requeue.
1030 		 */
1031 		if (em_xmit(txr, &m_head)) {
1032 			if (m_head == NULL)
1033 				break;
1034 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1035 			break;
1036 		}
1037 
1038 		/* Send a copy of the frame to the BPF listener */
1039 		ETHER_BPF_MTAP(ifp, m_head);
1040 
1041 		/* Set timeout in case hardware has problems transmitting. */
1042 		txr->watchdog_time = ticks;
1043                 txr->queue_status = EM_QUEUE_WORKING;
1044 	}
1045 
1046 	return;
1047 }
1048 
1049 static void
1050 em_start(struct ifnet *ifp)
1051 {
1052 	struct adapter	*adapter = ifp->if_softc;
1053 	struct tx_ring	*txr = adapter->tx_rings;
1054 
1055 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1056 		EM_TX_LOCK(txr);
1057 		em_start_locked(ifp, txr);
1058 		EM_TX_UNLOCK(txr);
1059 	}
1060 	return;
1061 }
1062 #endif /* EM_MULTIQUEUE */
1063 
1064 /*********************************************************************
1065  *  Ioctl entry point
1066  *
1067  *  em_ioctl is called when the user wants to configure the
1068  *  interface.
1069  *
1070  *  return 0 on success, positive on failure
1071  **********************************************************************/
1072 
1073 static int
1074 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1075 {
1076 	struct adapter	*adapter = ifp->if_softc;
1077 	struct ifreq	*ifr = (struct ifreq *)data;
1078 #if defined(INET) || defined(INET6)
1079 	struct ifaddr	*ifa = (struct ifaddr *)data;
1080 #endif
1081 	bool		avoid_reset = FALSE;
1082 	int		error = 0;
1083 
1084 	if (adapter->in_detach)
1085 		return (error);
1086 
1087 	switch (command) {
1088 	case SIOCSIFADDR:
1089 #ifdef INET
1090 		if (ifa->ifa_addr->sa_family == AF_INET)
1091 			avoid_reset = TRUE;
1092 #endif
1093 #ifdef INET6
1094 		if (ifa->ifa_addr->sa_family == AF_INET6)
1095 			avoid_reset = TRUE;
1096 #endif
1097 		/*
1098 		** Calling init results in link renegotiation,
1099 		** so we avoid doing it when possible.
1100 		*/
1101 		if (avoid_reset) {
1102 			ifp->if_flags |= IFF_UP;
1103 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1104 				em_init(adapter);
1105 #ifdef INET
1106 			if (!(ifp->if_flags & IFF_NOARP))
1107 				arp_ifinit(ifp, ifa);
1108 #endif
1109 		} else
1110 			error = ether_ioctl(ifp, command, data);
1111 		break;
1112 	case SIOCSIFMTU:
1113 	    {
1114 		int max_frame_size;
1115 
1116 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1117 
1118 		EM_CORE_LOCK(adapter);
1119 		switch (adapter->hw.mac.type) {
1120 		case e1000_82571:
1121 		case e1000_82572:
1122 		case e1000_ich9lan:
1123 		case e1000_ich10lan:
1124 		case e1000_pch2lan:
1125 		case e1000_pch_lpt:
1126 		case e1000_82574:
1127 		case e1000_82583:
1128 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1129 			max_frame_size = 9234;
1130 			break;
1131 		case e1000_pchlan:
1132 			max_frame_size = 4096;
1133 			break;
1134 			/* Adapters that do not support jumbo frames */
1135 		case e1000_ich8lan:
1136 			max_frame_size = ETHER_MAX_LEN;
1137 			break;
1138 		default:
1139 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1140 		}
1141 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1142 		    ETHER_CRC_LEN) {
1143 			EM_CORE_UNLOCK(adapter);
1144 			error = EINVAL;
1145 			break;
1146 		}
1147 
1148 		ifp->if_mtu = ifr->ifr_mtu;
1149 		adapter->hw.mac.max_frame_size =
1150 		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1151 		em_init_locked(adapter);
1152 		EM_CORE_UNLOCK(adapter);
1153 		break;
1154 	    }
1155 	case SIOCSIFFLAGS:
1156 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1157 		    SIOCSIFFLAGS (Set Interface Flags)");
1158 		EM_CORE_LOCK(adapter);
1159 		if (ifp->if_flags & IFF_UP) {
1160 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1161 				if ((ifp->if_flags ^ adapter->if_flags) &
1162 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1163 					em_disable_promisc(adapter);
1164 					em_set_promisc(adapter);
1165 				}
1166 			} else
1167 				em_init_locked(adapter);
1168 		} else
1169 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1170 				em_stop(adapter);
1171 		adapter->if_flags = ifp->if_flags;
1172 		EM_CORE_UNLOCK(adapter);
1173 		break;
1174 	case SIOCADDMULTI:
1175 	case SIOCDELMULTI:
1176 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1177 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1178 			EM_CORE_LOCK(adapter);
1179 			em_disable_intr(adapter);
1180 			em_set_multi(adapter);
1181 #ifdef DEVICE_POLLING
1182 			if (!(ifp->if_capenable & IFCAP_POLLING))
1183 #endif
1184 				em_enable_intr(adapter);
1185 			EM_CORE_UNLOCK(adapter);
1186 		}
1187 		break;
1188 	case SIOCSIFMEDIA:
1189 		/* Check SOL/IDER usage */
1190 		EM_CORE_LOCK(adapter);
1191 		if (e1000_check_reset_block(&adapter->hw)) {
1192 			EM_CORE_UNLOCK(adapter);
1193 			device_printf(adapter->dev, "Media change is"
1194 			    " blocked due to SOL/IDER session.\n");
1195 			break;
1196 		}
1197 		EM_CORE_UNLOCK(adapter);
1198 		/* falls thru */
1199 	case SIOCGIFMEDIA:
1200 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1201 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1202 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1203 		break;
1204 	case SIOCSIFCAP:
1205 	    {
1206 		int mask, reinit;
1207 
1208 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1209 		reinit = 0;
1210 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1211 #ifdef DEVICE_POLLING
1212 		if (mask & IFCAP_POLLING) {
1213 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1214 				error = ether_poll_register(em_poll, ifp);
1215 				if (error)
1216 					return (error);
1217 				EM_CORE_LOCK(adapter);
1218 				em_disable_intr(adapter);
1219 				ifp->if_capenable |= IFCAP_POLLING;
1220 				EM_CORE_UNLOCK(adapter);
1221 			} else {
1222 				error = ether_poll_deregister(ifp);
1223 				/* Enable interrupt even in error case */
1224 				EM_CORE_LOCK(adapter);
1225 				em_enable_intr(adapter);
1226 				ifp->if_capenable &= ~IFCAP_POLLING;
1227 				EM_CORE_UNLOCK(adapter);
1228 			}
1229 		}
1230 #endif
1231 		if (mask & IFCAP_HWCSUM) {
1232 			ifp->if_capenable ^= IFCAP_HWCSUM;
1233 			reinit = 1;
1234 		}
1235 		if (mask & IFCAP_TSO4) {
1236 			ifp->if_capenable ^= IFCAP_TSO4;
1237 			reinit = 1;
1238 		}
1239 		if (mask & IFCAP_VLAN_HWTAGGING) {
1240 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1241 			reinit = 1;
1242 		}
1243 		if (mask & IFCAP_VLAN_HWFILTER) {
1244 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1245 			reinit = 1;
1246 		}
1247 		if (mask & IFCAP_VLAN_HWTSO) {
1248 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1249 			reinit = 1;
1250 		}
1251 		if ((mask & IFCAP_WOL) &&
1252 		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1253 			if (mask & IFCAP_WOL_MCAST)
1254 				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1255 			if (mask & IFCAP_WOL_MAGIC)
1256 				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1257 		}
1258 		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1259 			em_init(adapter);
1260 		VLAN_CAPABILITIES(ifp);
1261 		break;
1262 	    }
1263 
1264 	default:
1265 		error = ether_ioctl(ifp, command, data);
1266 		break;
1267 	}
1268 
1269 	return (error);
1270 }
1271 
1272 
1273 /*********************************************************************
1274  *  Init entry point
1275  *
1276  *  This routine is used in two ways. It is used by the stack as
1277  *  init entry point in network interface structure. It is also used
1278  *  by the driver as a hw/sw initialization routine to get to a
1279  *  consistent state.
1280  *
1281  *  return 0 on success, positive on failure
1282  **********************************************************************/
1283 
1284 static void
1285 em_init_locked(struct adapter *adapter)
1286 {
1287 	struct ifnet	*ifp = adapter->ifp;
1288 	device_t	dev = adapter->dev;
1289 
1290 	INIT_DEBUGOUT("em_init: begin");
1291 
1292 	EM_CORE_LOCK_ASSERT(adapter);
1293 
1294 	em_disable_intr(adapter);
1295 	callout_stop(&adapter->timer);
1296 
1297 	/* Get the latest mac address, User can use a LAA */
1298         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1299               ETHER_ADDR_LEN);
1300 
1301 	/* Put the address into the Receive Address Array */
1302 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1303 
1304 	/*
1305 	 * With the 82571 adapter, RAR[0] may be overwritten
1306 	 * when the other port is reset, we make a duplicate
1307 	 * in RAR[14] for that eventuality, this assures
1308 	 * the interface continues to function.
1309 	 */
1310 	if (adapter->hw.mac.type == e1000_82571) {
1311 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1312 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1313 		    E1000_RAR_ENTRIES - 1);
1314 	}
1315 
1316 	/* Initialize the hardware */
1317 	em_reset(adapter);
1318 	em_update_link_status(adapter);
1319 
1320 	/* Setup VLAN support, basic and offload if available */
1321 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1322 
1323 	/* Set hardware offload abilities */
1324 	ifp->if_hwassist = 0;
1325 	if (ifp->if_capenable & IFCAP_TXCSUM)
1326 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1327 	if (ifp->if_capenable & IFCAP_TSO4)
1328 		ifp->if_hwassist |= CSUM_TSO;
1329 
1330 	/* Configure for OS presence */
1331 	em_init_manageability(adapter);
1332 
1333 	/* Prepare transmit descriptors and buffers */
1334 	em_setup_transmit_structures(adapter);
1335 	em_initialize_transmit_unit(adapter);
1336 
1337 	/* Setup Multicast table */
1338 	em_set_multi(adapter);
1339 
1340 	/*
1341 	** Figure out the desired mbuf
1342 	** pool for doing jumbos
1343 	*/
1344 	if (adapter->hw.mac.max_frame_size <= 2048)
1345 		adapter->rx_mbuf_sz = MCLBYTES;
1346 	else if (adapter->hw.mac.max_frame_size <= 4096)
1347 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1348 	else
1349 		adapter->rx_mbuf_sz = MJUM9BYTES;
1350 
1351 	/* Prepare receive descriptors and buffers */
1352 	if (em_setup_receive_structures(adapter)) {
1353 		device_printf(dev, "Could not setup receive structures\n");
1354 		em_stop(adapter);
1355 		return;
1356 	}
1357 	em_initialize_receive_unit(adapter);
1358 
1359 	/* Use real VLAN Filter support? */
1360 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1361 		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1362 			/* Use real VLAN Filter support */
1363 			em_setup_vlan_hw_support(adapter);
1364 		else {
1365 			u32 ctrl;
1366 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1367 			ctrl |= E1000_CTRL_VME;
1368 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1369 		}
1370 	}
1371 
1372 	/* Don't lose promiscuous settings */
1373 	em_set_promisc(adapter);
1374 
1375 	/* Set the interface as ACTIVE */
1376 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1377 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1378 
1379 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1380 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1381 
1382 	/* MSI/X configuration for 82574 */
1383 	if (adapter->hw.mac.type == e1000_82574) {
1384 		int tmp;
1385 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1386 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1387 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1388 		/* Set the IVAR - interrupt vector routing. */
1389 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1390 	}
1391 
1392 #ifdef DEVICE_POLLING
1393 	/*
1394 	 * Only enable interrupts if we are not polling, make sure
1395 	 * they are off otherwise.
1396 	 */
1397 	if (ifp->if_capenable & IFCAP_POLLING)
1398 		em_disable_intr(adapter);
1399 	else
1400 #endif /* DEVICE_POLLING */
1401 		em_enable_intr(adapter);
1402 
1403 	/* AMT based hardware can now take control from firmware */
1404 	if (adapter->has_manage && adapter->has_amt)
1405 		em_get_hw_control(adapter);
1406 }
1407 
1408 static void
1409 em_init(void *arg)
1410 {
1411 	struct adapter *adapter = arg;
1412 
1413 	EM_CORE_LOCK(adapter);
1414 	em_init_locked(adapter);
1415 	EM_CORE_UNLOCK(adapter);
1416 }
1417 
1418 
1419 #ifdef DEVICE_POLLING
1420 /*********************************************************************
1421  *
1422  *  Legacy polling routine: note this only works with single queue
1423  *
1424  *********************************************************************/
1425 static int
1426 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1427 {
1428 	struct adapter *adapter = ifp->if_softc;
1429 	struct tx_ring	*txr = adapter->tx_rings;
1430 	struct rx_ring	*rxr = adapter->rx_rings;
1431 	u32		reg_icr;
1432 	int		rx_done;
1433 
1434 	EM_CORE_LOCK(adapter);
1435 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1436 		EM_CORE_UNLOCK(adapter);
1437 		return (0);
1438 	}
1439 
1440 	if (cmd == POLL_AND_CHECK_STATUS) {
1441 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1442 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1443 			callout_stop(&adapter->timer);
1444 			adapter->hw.mac.get_link_status = 1;
1445 			em_update_link_status(adapter);
1446 			callout_reset(&adapter->timer, hz,
1447 			    em_local_timer, adapter);
1448 		}
1449 	}
1450 	EM_CORE_UNLOCK(adapter);
1451 
1452 	em_rxeof(rxr, count, &rx_done);
1453 
1454 	EM_TX_LOCK(txr);
1455 	em_txeof(txr);
1456 #ifdef EM_MULTIQUEUE
1457 	if (!drbr_empty(ifp, txr->br))
1458 		em_mq_start_locked(ifp, txr, NULL);
1459 #else
1460 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1461 		em_start_locked(ifp, txr);
1462 #endif
1463 	EM_TX_UNLOCK(txr);
1464 
1465 	return (rx_done);
1466 }
1467 #endif /* DEVICE_POLLING */
1468 
1469 
1470 /*********************************************************************
1471  *
1472  *  Fast Legacy/MSI Combined Interrupt Service routine
1473  *
1474  *********************************************************************/
1475 static int
1476 em_irq_fast(void *arg)
1477 {
1478 	struct adapter	*adapter = arg;
1479 	struct ifnet	*ifp;
1480 	u32		reg_icr;
1481 
1482 	ifp = adapter->ifp;
1483 
1484 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1485 
1486 	/* Hot eject?  */
1487 	if (reg_icr == 0xffffffff)
1488 		return FILTER_STRAY;
1489 
1490 	/* Definitely not our interrupt.  */
1491 	if (reg_icr == 0x0)
1492 		return FILTER_STRAY;
1493 
1494 	/*
1495 	 * Starting with the 82571 chip, bit 31 should be used to
1496 	 * determine whether the interrupt belongs to us.
1497 	 */
1498 	if (adapter->hw.mac.type >= e1000_82571 &&
1499 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1500 		return FILTER_STRAY;
1501 
1502 	em_disable_intr(adapter);
1503 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1504 
1505 	/* Link status change */
1506 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1507 		adapter->hw.mac.get_link_status = 1;
1508 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1509 	}
1510 
1511 	if (reg_icr & E1000_ICR_RXO)
1512 		adapter->rx_overruns++;
1513 	return FILTER_HANDLED;
1514 }
1515 
1516 /* Combined RX/TX handler, used by Legacy and MSI */
1517 static void
1518 em_handle_que(void *context, int pending)
1519 {
1520 	struct adapter	*adapter = context;
1521 	struct ifnet	*ifp = adapter->ifp;
1522 	struct tx_ring	*txr = adapter->tx_rings;
1523 	struct rx_ring	*rxr = adapter->rx_rings;
1524 
1525 
1526 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1527 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1528 		EM_TX_LOCK(txr);
1529 		em_txeof(txr);
1530 #ifdef EM_MULTIQUEUE
1531 		if (!drbr_empty(ifp, txr->br))
1532 			em_mq_start_locked(ifp, txr, NULL);
1533 #else
1534 		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1535 			em_start_locked(ifp, txr);
1536 #endif
1537 		EM_TX_UNLOCK(txr);
1538 		if (more) {
1539 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1540 			return;
1541 		}
1542 	}
1543 
1544 	em_enable_intr(adapter);
1545 	return;
1546 }
1547 
1548 
1549 /*********************************************************************
1550  *
1551  *  MSIX Interrupt Service Routines
1552  *
1553  **********************************************************************/
1554 static void
1555 em_msix_tx(void *arg)
1556 {
1557 	struct tx_ring *txr = arg;
1558 	struct adapter *adapter = txr->adapter;
1559 	struct ifnet	*ifp = adapter->ifp;
1560 
1561 	++txr->tx_irq;
1562 	EM_TX_LOCK(txr);
1563 	em_txeof(txr);
1564 #ifdef EM_MULTIQUEUE
1565 	if (!drbr_empty(ifp, txr->br))
1566 		em_mq_start_locked(ifp, txr, NULL);
1567 #else
1568 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1569 		em_start_locked(ifp, txr);
1570 #endif
1571 	/* Reenable this interrupt */
1572 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1573 	EM_TX_UNLOCK(txr);
1574 	return;
1575 }
1576 
1577 /*********************************************************************
1578  *
1579  *  MSIX RX Interrupt Service routine
1580  *
1581  **********************************************************************/
1582 
1583 static void
1584 em_msix_rx(void *arg)
1585 {
1586 	struct rx_ring	*rxr = arg;
1587 	struct adapter	*adapter = rxr->adapter;
1588 	bool		more;
1589 
1590 	++rxr->rx_irq;
1591 	if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1592 		return;
1593 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1594 	if (more)
1595 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1596 	else
1597 		/* Reenable this interrupt */
1598 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1599 	return;
1600 }
1601 
1602 /*********************************************************************
1603  *
1604  *  MSIX Link Fast Interrupt Service routine
1605  *
1606  **********************************************************************/
1607 static void
1608 em_msix_link(void *arg)
1609 {
1610 	struct adapter	*adapter = arg;
1611 	u32		reg_icr;
1612 
1613 	++adapter->link_irq;
1614 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1615 
1616 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1617 		adapter->hw.mac.get_link_status = 1;
1618 		em_handle_link(adapter, 0);
1619 	} else
1620 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1621 		    EM_MSIX_LINK | E1000_IMS_LSC);
1622 	return;
1623 }
1624 
1625 static void
1626 em_handle_rx(void *context, int pending)
1627 {
1628 	struct rx_ring	*rxr = context;
1629 	struct adapter	*adapter = rxr->adapter;
1630         bool            more;
1631 
1632 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1633 	if (more)
1634 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1635 	else
1636 		/* Reenable this interrupt */
1637 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1638 }
1639 
1640 static void
1641 em_handle_tx(void *context, int pending)
1642 {
1643 	struct tx_ring	*txr = context;
1644 	struct adapter	*adapter = txr->adapter;
1645 	struct ifnet	*ifp = adapter->ifp;
1646 
1647 	EM_TX_LOCK(txr);
1648 	em_txeof(txr);
1649 #ifdef EM_MULTIQUEUE
1650 	if (!drbr_empty(ifp, txr->br))
1651 		em_mq_start_locked(ifp, txr, NULL);
1652 #else
1653 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1654 		em_start_locked(ifp, txr);
1655 #endif
1656 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1657 	EM_TX_UNLOCK(txr);
1658 }
1659 
1660 static void
1661 em_handle_link(void *context, int pending)
1662 {
1663 	struct adapter	*adapter = context;
1664 	struct tx_ring	*txr = adapter->tx_rings;
1665 	struct ifnet *ifp = adapter->ifp;
1666 
1667 	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1668 		return;
1669 
1670 	EM_CORE_LOCK(adapter);
1671 	callout_stop(&adapter->timer);
1672 	em_update_link_status(adapter);
1673 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1674 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1675 	    EM_MSIX_LINK | E1000_IMS_LSC);
1676 	if (adapter->link_active) {
1677 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1678 			EM_TX_LOCK(txr);
1679 #ifdef EM_MULTIQUEUE
1680 			if (!drbr_empty(ifp, txr->br))
1681 				em_mq_start_locked(ifp, txr, NULL);
1682 #else
1683 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1684 				em_start_locked(ifp, txr);
1685 #endif
1686 			EM_TX_UNLOCK(txr);
1687 		}
1688 	}
1689 	EM_CORE_UNLOCK(adapter);
1690 }
1691 
1692 
1693 /*********************************************************************
1694  *
1695  *  Media Ioctl callback
1696  *
1697  *  This routine is called whenever the user queries the status of
1698  *  the interface using ifconfig.
1699  *
1700  **********************************************************************/
1701 static void
1702 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1703 {
1704 	struct adapter *adapter = ifp->if_softc;
1705 	u_char fiber_type = IFM_1000_SX;
1706 
1707 	INIT_DEBUGOUT("em_media_status: begin");
1708 
1709 	EM_CORE_LOCK(adapter);
1710 	em_update_link_status(adapter);
1711 
1712 	ifmr->ifm_status = IFM_AVALID;
1713 	ifmr->ifm_active = IFM_ETHER;
1714 
1715 	if (!adapter->link_active) {
1716 		EM_CORE_UNLOCK(adapter);
1717 		return;
1718 	}
1719 
1720 	ifmr->ifm_status |= IFM_ACTIVE;
1721 
1722 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1723 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1724 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1725 	} else {
1726 		switch (adapter->link_speed) {
1727 		case 10:
1728 			ifmr->ifm_active |= IFM_10_T;
1729 			break;
1730 		case 100:
1731 			ifmr->ifm_active |= IFM_100_TX;
1732 			break;
1733 		case 1000:
1734 			ifmr->ifm_active |= IFM_1000_T;
1735 			break;
1736 		}
1737 		if (adapter->link_duplex == FULL_DUPLEX)
1738 			ifmr->ifm_active |= IFM_FDX;
1739 		else
1740 			ifmr->ifm_active |= IFM_HDX;
1741 	}
1742 	EM_CORE_UNLOCK(adapter);
1743 }
1744 
1745 /*********************************************************************
1746  *
1747  *  Media Ioctl callback
1748  *
1749  *  This routine is called when the user changes speed/duplex using
1750  *  media/mediopt option with ifconfig.
1751  *
1752  **********************************************************************/
1753 static int
1754 em_media_change(struct ifnet *ifp)
1755 {
1756 	struct adapter *adapter = ifp->if_softc;
1757 	struct ifmedia  *ifm = &adapter->media;
1758 
1759 	INIT_DEBUGOUT("em_media_change: begin");
1760 
1761 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1762 		return (EINVAL);
1763 
1764 	EM_CORE_LOCK(adapter);
1765 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1766 	case IFM_AUTO:
1767 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1768 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1769 		break;
1770 	case IFM_1000_LX:
1771 	case IFM_1000_SX:
1772 	case IFM_1000_T:
1773 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1774 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1775 		break;
1776 	case IFM_100_TX:
1777 		adapter->hw.mac.autoneg = FALSE;
1778 		adapter->hw.phy.autoneg_advertised = 0;
1779 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1780 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1781 		else
1782 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1783 		break;
1784 	case IFM_10_T:
1785 		adapter->hw.mac.autoneg = FALSE;
1786 		adapter->hw.phy.autoneg_advertised = 0;
1787 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1788 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1789 		else
1790 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1791 		break;
1792 	default:
1793 		device_printf(adapter->dev, "Unsupported media type\n");
1794 	}
1795 
1796 	em_init_locked(adapter);
1797 	EM_CORE_UNLOCK(adapter);
1798 
1799 	return (0);
1800 }
1801 
1802 /*********************************************************************
1803  *
1804  *  This routine maps the mbufs to tx descriptors.
1805  *
1806  *  return 0 on success, positive on failure
1807  **********************************************************************/
1808 
1809 static int
1810 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1811 {
1812 	struct adapter		*adapter = txr->adapter;
1813 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1814 	bus_dmamap_t		map;
1815 	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1816 	struct e1000_tx_desc	*ctxd = NULL;
1817 	struct mbuf		*m_head;
1818 	struct ether_header	*eh;
1819 	struct ip		*ip = NULL;
1820 	struct tcphdr		*tp = NULL;
1821 	u32			txd_upper, txd_lower, txd_used, txd_saved;
1822 	int			ip_off, poff;
1823 	int			nsegs, i, j, first, last = 0;
1824 	int			error, do_tso, tso_desc = 0, remap = 1;
1825 
1826 retry:
1827 	m_head = *m_headp;
1828 	txd_upper = txd_lower = txd_used = txd_saved = 0;
1829 	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1830 	ip_off = poff = 0;
1831 
1832 	/*
1833 	 * Intel recommends entire IP/TCP header length reside in a single
1834 	 * buffer. If multiple descriptors are used to describe the IP and
1835 	 * TCP header, each descriptor should describe one or more
1836 	 * complete headers; descriptors referencing only parts of headers
1837 	 * are not supported. If all layer headers are not coalesced into
1838 	 * a single buffer, each buffer should not cross a 4KB boundary,
1839 	 * or be larger than the maximum read request size.
1840 	 * Controller also requires modifing IP/TCP header to make TSO work
1841 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1842 	 * IP/TCP header into a single buffer to meet the requirement of
1843 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1844 	 * which also has similiar restrictions.
1845 	 */
1846 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1847 		if (do_tso || (m_head->m_next != NULL &&
1848 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1849 			if (M_WRITABLE(*m_headp) == 0) {
1850 				m_head = m_dup(*m_headp, M_NOWAIT);
1851 				m_freem(*m_headp);
1852 				if (m_head == NULL) {
1853 					*m_headp = NULL;
1854 					return (ENOBUFS);
1855 				}
1856 				*m_headp = m_head;
1857 			}
1858 		}
1859 		/*
1860 		 * XXX
1861 		 * Assume IPv4, we don't have TSO/checksum offload support
1862 		 * for IPv6 yet.
1863 		 */
1864 		ip_off = sizeof(struct ether_header);
1865 		m_head = m_pullup(m_head, ip_off);
1866 		if (m_head == NULL) {
1867 			*m_headp = NULL;
1868 			return (ENOBUFS);
1869 		}
1870 		eh = mtod(m_head, struct ether_header *);
1871 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1872 			ip_off = sizeof(struct ether_vlan_header);
1873 			m_head = m_pullup(m_head, ip_off);
1874 			if (m_head == NULL) {
1875 				*m_headp = NULL;
1876 				return (ENOBUFS);
1877 			}
1878 		}
1879 		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1880 		if (m_head == NULL) {
1881 			*m_headp = NULL;
1882 			return (ENOBUFS);
1883 		}
1884 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1885 		poff = ip_off + (ip->ip_hl << 2);
1886 		if (do_tso) {
1887 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1888 			if (m_head == NULL) {
1889 				*m_headp = NULL;
1890 				return (ENOBUFS);
1891 			}
1892 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1893 			/*
1894 			 * TSO workaround:
1895 			 *   pull 4 more bytes of data into it.
1896 			 */
1897 			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1898 			if (m_head == NULL) {
1899 				*m_headp = NULL;
1900 				return (ENOBUFS);
1901 			}
1902 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1903 			ip->ip_len = 0;
1904 			ip->ip_sum = 0;
1905 			/*
1906 			 * The pseudo TCP checksum does not include TCP payload
1907 			 * length so driver should recompute the checksum here
1908 			 * what hardware expect to see. This is adherence of
1909 			 * Microsoft's Large Send specification.
1910 			 */
1911 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1912 			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1913 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1914 		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1915 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1916 			if (m_head == NULL) {
1917 				*m_headp = NULL;
1918 				return (ENOBUFS);
1919 			}
1920 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1921 			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1922 			if (m_head == NULL) {
1923 				*m_headp = NULL;
1924 				return (ENOBUFS);
1925 			}
1926 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1927 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1928 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1929 			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1930 			if (m_head == NULL) {
1931 				*m_headp = NULL;
1932 				return (ENOBUFS);
1933 			}
1934 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1935 		}
1936 		*m_headp = m_head;
1937 	}
1938 
1939 	/*
1940 	 * Map the packet for DMA
1941 	 *
1942 	 * Capture the first descriptor index,
1943 	 * this descriptor will have the index
1944 	 * of the EOP which is the only one that
1945 	 * now gets a DONE bit writeback.
1946 	 */
1947 	first = txr->next_avail_desc;
1948 	tx_buffer = &txr->tx_buffers[first];
1949 	tx_buffer_mapped = tx_buffer;
1950 	map = tx_buffer->map;
1951 
1952 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1953 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1954 
1955 	/*
1956 	 * There are two types of errors we can (try) to handle:
1957 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1958 	 *   out of segments.  Defragment the mbuf chain and try again.
1959 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1960 	 *   at this point in time.  Defer sending and try again later.
1961 	 * All other errors, in particular EINVAL, are fatal and prevent the
1962 	 * mbuf chain from ever going through.  Drop it and report error.
1963 	 */
1964 	if (error == EFBIG && remap) {
1965 		struct mbuf *m;
1966 
1967 		m = m_defrag(*m_headp, M_NOWAIT);
1968 		if (m == NULL) {
1969 			adapter->mbuf_alloc_failed++;
1970 			m_freem(*m_headp);
1971 			*m_headp = NULL;
1972 			return (ENOBUFS);
1973 		}
1974 		*m_headp = m;
1975 
1976 		/* Try it again, but only once */
1977 		remap = 0;
1978 		goto retry;
1979 	} else if (error == ENOMEM) {
1980 		adapter->no_tx_dma_setup++;
1981 		return (error);
1982 	} else if (error != 0) {
1983 		adapter->no_tx_dma_setup++;
1984 		m_freem(*m_headp);
1985 		*m_headp = NULL;
1986 		return (error);
1987 	}
1988 
1989 	/*
1990 	 * TSO Hardware workaround, if this packet is not
1991 	 * TSO, and is only a single descriptor long, and
1992 	 * it follows a TSO burst, then we need to add a
1993 	 * sentinel descriptor to prevent premature writeback.
1994 	 */
1995 	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1996 		if (nsegs == 1)
1997 			tso_desc = TRUE;
1998 		txr->tx_tso = FALSE;
1999 	}
2000 
2001         if (nsegs > (txr->tx_avail - 2)) {
2002                 txr->no_desc_avail++;
2003 		bus_dmamap_unload(txr->txtag, map);
2004 		return (ENOBUFS);
2005         }
2006 	m_head = *m_headp;
2007 
2008 	/* Do hardware assists */
2009 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2010 		em_tso_setup(txr, m_head, ip_off, ip, tp,
2011 		    &txd_upper, &txd_lower);
2012 		/* we need to make a final sentinel transmit desc */
2013 		tso_desc = TRUE;
2014 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2015 		em_transmit_checksum_setup(txr, m_head,
2016 		    ip_off, ip, &txd_upper, &txd_lower);
2017 
2018 	if (m_head->m_flags & M_VLANTAG) {
2019 		/* Set the vlan id. */
2020 		txd_upper |=
2021 		    (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2022                 /* Tell hardware to add tag */
2023                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2024         }
2025 
2026 	i = txr->next_avail_desc;
2027 
2028 	/* Set up our transmit descriptors */
2029 	for (j = 0; j < nsegs; j++) {
2030 		bus_size_t seg_len;
2031 		bus_addr_t seg_addr;
2032 
2033 		tx_buffer = &txr->tx_buffers[i];
2034 		ctxd = &txr->tx_base[i];
2035 		seg_addr = segs[j].ds_addr;
2036 		seg_len  = segs[j].ds_len;
2037 		/*
2038 		** TSO Workaround:
2039 		** If this is the last descriptor, we want to
2040 		** split it so we have a small final sentinel
2041 		*/
2042 		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2043 			seg_len -= 4;
2044 			ctxd->buffer_addr = htole64(seg_addr);
2045 			ctxd->lower.data = htole32(
2046 			adapter->txd_cmd | txd_lower | seg_len);
2047 			ctxd->upper.data =
2048 			    htole32(txd_upper);
2049 			if (++i == adapter->num_tx_desc)
2050 				i = 0;
2051 			/* Now make the sentinel */
2052 			++txd_used; /* using an extra txd */
2053 			ctxd = &txr->tx_base[i];
2054 			tx_buffer = &txr->tx_buffers[i];
2055 			ctxd->buffer_addr =
2056 			    htole64(seg_addr + seg_len);
2057 			ctxd->lower.data = htole32(
2058 			adapter->txd_cmd | txd_lower | 4);
2059 			ctxd->upper.data =
2060 			    htole32(txd_upper);
2061 			last = i;
2062 			if (++i == adapter->num_tx_desc)
2063 				i = 0;
2064 		} else {
2065 			ctxd->buffer_addr = htole64(seg_addr);
2066 			ctxd->lower.data = htole32(
2067 			adapter->txd_cmd | txd_lower | seg_len);
2068 			ctxd->upper.data =
2069 			    htole32(txd_upper);
2070 			last = i;
2071 			if (++i == adapter->num_tx_desc)
2072 				i = 0;
2073 		}
2074 		tx_buffer->m_head = NULL;
2075 		tx_buffer->next_eop = -1;
2076 	}
2077 
2078 	txr->next_avail_desc = i;
2079 	txr->tx_avail -= nsegs;
2080 	if (tso_desc) /* TSO used an extra for sentinel */
2081 		txr->tx_avail -= txd_used;
2082 
2083         tx_buffer->m_head = m_head;
2084 	/*
2085 	** Here we swap the map so the last descriptor,
2086 	** which gets the completion interrupt has the
2087 	** real map, and the first descriptor gets the
2088 	** unused map from this descriptor.
2089 	*/
2090 	tx_buffer_mapped->map = tx_buffer->map;
2091 	tx_buffer->map = map;
2092         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2093 
2094         /*
2095          * Last Descriptor of Packet
2096 	 * needs End Of Packet (EOP)
2097 	 * and Report Status (RS)
2098          */
2099         ctxd->lower.data |=
2100 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2101 	/*
2102 	 * Keep track in the first buffer which
2103 	 * descriptor will be written back
2104 	 */
2105 	tx_buffer = &txr->tx_buffers[first];
2106 	tx_buffer->next_eop = last;
2107 	/* Update the watchdog time early and often */
2108 	txr->watchdog_time = ticks;
2109 
2110 	/*
2111 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2112 	 * that this frame is available to transmit.
2113 	 */
2114 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2115 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2116 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2117 
2118 	return (0);
2119 }
2120 
2121 static void
2122 em_set_promisc(struct adapter *adapter)
2123 {
2124 	struct ifnet	*ifp = adapter->ifp;
2125 	u32		reg_rctl;
2126 
2127 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2128 
2129 	if (ifp->if_flags & IFF_PROMISC) {
2130 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2131 		/* Turn this on if you want to see bad packets */
2132 		if (em_debug_sbp)
2133 			reg_rctl |= E1000_RCTL_SBP;
2134 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2135 	} else if (ifp->if_flags & IFF_ALLMULTI) {
2136 		reg_rctl |= E1000_RCTL_MPE;
2137 		reg_rctl &= ~E1000_RCTL_UPE;
2138 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2139 	}
2140 }
2141 
2142 static void
2143 em_disable_promisc(struct adapter *adapter)
2144 {
2145 	struct ifnet	*ifp = adapter->ifp;
2146 	u32		reg_rctl;
2147 	int		mcnt = 0;
2148 
2149 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2150 	reg_rctl &=  (~E1000_RCTL_UPE);
2151 	if (ifp->if_flags & IFF_ALLMULTI)
2152 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2153 	else {
2154 		struct  ifmultiaddr *ifma;
2155 #if __FreeBSD_version < 800000
2156 		IF_ADDR_LOCK(ifp);
2157 #else
2158 		if_maddr_rlock(ifp);
2159 #endif
2160 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2161 			if (ifma->ifma_addr->sa_family != AF_LINK)
2162 				continue;
2163 			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2164 				break;
2165 			mcnt++;
2166 		}
2167 #if __FreeBSD_version < 800000
2168 		IF_ADDR_UNLOCK(ifp);
2169 #else
2170 		if_maddr_runlock(ifp);
2171 #endif
2172 	}
2173 	/* Don't disable if in MAX groups */
2174 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2175 		reg_rctl &=  (~E1000_RCTL_MPE);
2176 	reg_rctl &=  (~E1000_RCTL_SBP);
2177 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2178 }
2179 
2180 
2181 /*********************************************************************
2182  *  Multicast Update
2183  *
2184  *  This routine is called whenever multicast address list is updated.
2185  *
2186  **********************************************************************/
2187 
2188 static void
2189 em_set_multi(struct adapter *adapter)
2190 {
2191 	struct ifnet	*ifp = adapter->ifp;
2192 	struct ifmultiaddr *ifma;
2193 	u32 reg_rctl = 0;
2194 	u8  *mta; /* Multicast array memory */
2195 	int mcnt = 0;
2196 
2197 	IOCTL_DEBUGOUT("em_set_multi: begin");
2198 
2199 	mta = adapter->mta;
2200 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2201 
2202 	if (adapter->hw.mac.type == e1000_82542 &&
2203 	    adapter->hw.revision_id == E1000_REVISION_2) {
2204 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2205 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2206 			e1000_pci_clear_mwi(&adapter->hw);
2207 		reg_rctl |= E1000_RCTL_RST;
2208 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2209 		msec_delay(5);
2210 	}
2211 
2212 #if __FreeBSD_version < 800000
2213 	IF_ADDR_LOCK(ifp);
2214 #else
2215 	if_maddr_rlock(ifp);
2216 #endif
2217 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2218 		if (ifma->ifma_addr->sa_family != AF_LINK)
2219 			continue;
2220 
2221 		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2222 			break;
2223 
2224 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2225 		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2226 		mcnt++;
2227 	}
2228 #if __FreeBSD_version < 800000
2229 	IF_ADDR_UNLOCK(ifp);
2230 #else
2231 	if_maddr_runlock(ifp);
2232 #endif
2233 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2234 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2235 		reg_rctl |= E1000_RCTL_MPE;
2236 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2237 	} else
2238 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2239 
2240 	if (adapter->hw.mac.type == e1000_82542 &&
2241 	    adapter->hw.revision_id == E1000_REVISION_2) {
2242 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2243 		reg_rctl &= ~E1000_RCTL_RST;
2244 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2245 		msec_delay(5);
2246 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2247 			e1000_pci_set_mwi(&adapter->hw);
2248 	}
2249 }
2250 
2251 
2252 /*********************************************************************
2253  *  Timer routine
2254  *
2255  *  This routine checks for link status and updates statistics.
2256  *
2257  **********************************************************************/
2258 
2259 static void
2260 em_local_timer(void *arg)
2261 {
2262 	struct adapter	*adapter = arg;
2263 	struct ifnet	*ifp = adapter->ifp;
2264 	struct tx_ring	*txr = adapter->tx_rings;
2265 	struct rx_ring	*rxr = adapter->rx_rings;
2266 	u32		trigger;
2267 
2268 	EM_CORE_LOCK_ASSERT(adapter);
2269 
2270 	em_update_link_status(adapter);
2271 	em_update_stats_counters(adapter);
2272 
2273 	/* Reset LAA into RAR[0] on 82571 */
2274 	if ((adapter->hw.mac.type == e1000_82571) &&
2275 	    e1000_get_laa_state_82571(&adapter->hw))
2276 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2277 
2278 	/* Mask to use in the irq trigger */
2279 	if (adapter->msix_mem)
2280 		trigger = rxr->ims; /* RX for 82574 */
2281 	else
2282 		trigger = E1000_ICS_RXDMT0;
2283 
2284 	/*
2285 	** Check on the state of the TX queue(s), this
2286 	** can be done without the lock because its RO
2287 	** and the HUNG state will be static if set.
2288 	*/
2289 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2290 		if ((txr->queue_status == EM_QUEUE_HUNG) &&
2291 		    (adapter->pause_frames == 0))
2292 			goto hung;
2293 		/* Schedule a TX tasklet if needed */
2294 		if (txr->tx_avail <= EM_MAX_SCATTER)
2295 			taskqueue_enqueue(txr->tq, &txr->tx_task);
2296 	}
2297 
2298 	adapter->pause_frames = 0;
2299 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2300 #ifndef DEVICE_POLLING
2301 	/* Trigger an RX interrupt to guarantee mbuf refresh */
2302 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2303 #endif
2304 	return;
2305 hung:
2306 	/* Looks like we're hung */
2307 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2308 	device_printf(adapter->dev,
2309 	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2310 	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2311 	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2312 	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2313 	    "Next TX to Clean = %d\n",
2314 	    txr->me, txr->tx_avail, txr->next_to_clean);
2315 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2316 	adapter->watchdog_events++;
2317 	adapter->pause_frames = 0;
2318 	em_init_locked(adapter);
2319 }
2320 
2321 
2322 static void
2323 em_update_link_status(struct adapter *adapter)
2324 {
2325 	struct e1000_hw *hw = &adapter->hw;
2326 	struct ifnet *ifp = adapter->ifp;
2327 	device_t dev = adapter->dev;
2328 	struct tx_ring *txr = adapter->tx_rings;
2329 	u32 link_check = 0;
2330 
2331 	/* Get the cached link value or read phy for real */
2332 	switch (hw->phy.media_type) {
2333 	case e1000_media_type_copper:
2334 		if (hw->mac.get_link_status) {
2335 			/* Do the work to read phy */
2336 			e1000_check_for_link(hw);
2337 			link_check = !hw->mac.get_link_status;
2338 			if (link_check) /* ESB2 fix */
2339 				e1000_cfg_on_link_up(hw);
2340 		} else
2341 			link_check = TRUE;
2342 		break;
2343 	case e1000_media_type_fiber:
2344 		e1000_check_for_link(hw);
2345 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2346                                  E1000_STATUS_LU);
2347 		break;
2348 	case e1000_media_type_internal_serdes:
2349 		e1000_check_for_link(hw);
2350 		link_check = adapter->hw.mac.serdes_has_link;
2351 		break;
2352 	default:
2353 	case e1000_media_type_unknown:
2354 		break;
2355 	}
2356 
2357 	/* Now check for a transition */
2358 	if (link_check && (adapter->link_active == 0)) {
2359 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2360 		    &adapter->link_duplex);
2361 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2362 		if ((adapter->link_speed != SPEED_1000) &&
2363 		    ((hw->mac.type == e1000_82571) ||
2364 		    (hw->mac.type == e1000_82572))) {
2365 			int tarc0;
2366 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2367 			tarc0 &= ~SPEED_MODE_BIT;
2368 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2369 		}
2370 		if (bootverbose)
2371 			device_printf(dev, "Link is up %d Mbps %s\n",
2372 			    adapter->link_speed,
2373 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2374 			    "Full Duplex" : "Half Duplex"));
2375 		adapter->link_active = 1;
2376 		adapter->smartspeed = 0;
2377 		ifp->if_baudrate = adapter->link_speed * 1000000;
2378 		if_link_state_change(ifp, LINK_STATE_UP);
2379 	} else if (!link_check && (adapter->link_active == 1)) {
2380 		ifp->if_baudrate = adapter->link_speed = 0;
2381 		adapter->link_duplex = 0;
2382 		if (bootverbose)
2383 			device_printf(dev, "Link is Down\n");
2384 		adapter->link_active = 0;
2385 		/* Link down, disable watchdog */
2386 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2387 			txr->queue_status = EM_QUEUE_IDLE;
2388 		if_link_state_change(ifp, LINK_STATE_DOWN);
2389 	}
2390 }
2391 
2392 /*********************************************************************
2393  *
2394  *  This routine disables all traffic on the adapter by issuing a
2395  *  global reset on the MAC and deallocates TX/RX buffers.
2396  *
2397  *  This routine should always be called with BOTH the CORE
2398  *  and TX locks.
2399  **********************************************************************/
2400 
2401 static void
2402 em_stop(void *arg)
2403 {
2404 	struct adapter	*adapter = arg;
2405 	struct ifnet	*ifp = adapter->ifp;
2406 	struct tx_ring	*txr = adapter->tx_rings;
2407 
2408 	EM_CORE_LOCK_ASSERT(adapter);
2409 
2410 	INIT_DEBUGOUT("em_stop: begin");
2411 
2412 	em_disable_intr(adapter);
2413 	callout_stop(&adapter->timer);
2414 
2415 	/* Tell the stack that the interface is no longer active */
2416 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2417 	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2418 
2419         /* Unarm watchdog timer. */
2420 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2421 		EM_TX_LOCK(txr);
2422 		txr->queue_status = EM_QUEUE_IDLE;
2423 		EM_TX_UNLOCK(txr);
2424 	}
2425 
2426 	e1000_reset_hw(&adapter->hw);
2427 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2428 
2429 	e1000_led_off(&adapter->hw);
2430 	e1000_cleanup_led(&adapter->hw);
2431 }
2432 
2433 
2434 /*********************************************************************
2435  *
2436  *  Determine hardware revision.
2437  *
2438  **********************************************************************/
2439 static void
2440 em_identify_hardware(struct adapter *adapter)
2441 {
2442 	device_t dev = adapter->dev;
2443 
2444 	/* Make sure our PCI config space has the necessary stuff set */
2445 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2446 	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2447 	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2448 		device_printf(dev, "Memory Access and/or Bus Master bits "
2449 		    "were not set!\n");
2450 		adapter->hw.bus.pci_cmd_word |=
2451 		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2452 		pci_write_config(dev, PCIR_COMMAND,
2453 		    adapter->hw.bus.pci_cmd_word, 2);
2454 	}
2455 
2456 	/* Save off the information about this board */
2457 	adapter->hw.vendor_id = pci_get_vendor(dev);
2458 	adapter->hw.device_id = pci_get_device(dev);
2459 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2460 	adapter->hw.subsystem_vendor_id =
2461 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2462 	adapter->hw.subsystem_device_id =
2463 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2464 
2465 	/* Do Shared Code Init and Setup */
2466 	if (e1000_set_mac_type(&adapter->hw)) {
2467 		device_printf(dev, "Setup init failure\n");
2468 		return;
2469 	}
2470 }
2471 
2472 static int
2473 em_allocate_pci_resources(struct adapter *adapter)
2474 {
2475 	device_t	dev = adapter->dev;
2476 	int		rid;
2477 
2478 	rid = PCIR_BAR(0);
2479 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2480 	    &rid, RF_ACTIVE);
2481 	if (adapter->memory == NULL) {
2482 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2483 		return (ENXIO);
2484 	}
2485 	adapter->osdep.mem_bus_space_tag =
2486 	    rman_get_bustag(adapter->memory);
2487 	adapter->osdep.mem_bus_space_handle =
2488 	    rman_get_bushandle(adapter->memory);
2489 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2490 
2491 	/* Default to a single queue */
2492 	adapter->num_queues = 1;
2493 
2494 	/*
2495 	 * Setup MSI/X or MSI if PCI Express
2496 	 */
2497 	adapter->msix = em_setup_msix(adapter);
2498 
2499 	adapter->hw.back = &adapter->osdep;
2500 
2501 	return (0);
2502 }
2503 
2504 /*********************************************************************
2505  *
2506  *  Setup the Legacy or MSI Interrupt handler
2507  *
2508  **********************************************************************/
2509 int
2510 em_allocate_legacy(struct adapter *adapter)
2511 {
2512 	device_t dev = adapter->dev;
2513 	struct tx_ring	*txr = adapter->tx_rings;
2514 	int error, rid = 0;
2515 
2516 	/* Manually turn off all interrupts */
2517 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2518 
2519 	if (adapter->msix == 1) /* using MSI */
2520 		rid = 1;
2521 	/* We allocate a single interrupt resource */
2522 	adapter->res = bus_alloc_resource_any(dev,
2523 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2524 	if (adapter->res == NULL) {
2525 		device_printf(dev, "Unable to allocate bus resource: "
2526 		    "interrupt\n");
2527 		return (ENXIO);
2528 	}
2529 
2530 	/*
2531 	 * Allocate a fast interrupt and the associated
2532 	 * deferred processing contexts.
2533 	 */
2534 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2535 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2536 	    taskqueue_thread_enqueue, &adapter->tq);
2537 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2538 	    device_get_nameunit(adapter->dev));
2539 	/* Use a TX only tasklet for local timer */
2540 	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2541 	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2542 	    taskqueue_thread_enqueue, &txr->tq);
2543 	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2544 	    device_get_nameunit(adapter->dev));
2545 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2546 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2547 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2548 		device_printf(dev, "Failed to register fast interrupt "
2549 			    "handler: %d\n", error);
2550 		taskqueue_free(adapter->tq);
2551 		adapter->tq = NULL;
2552 		return (error);
2553 	}
2554 
2555 	return (0);
2556 }
2557 
2558 /*********************************************************************
2559  *
2560  *  Setup the MSIX Interrupt handlers
2561  *   This is not really Multiqueue, rather
2562  *   its just seperate interrupt vectors
2563  *   for TX, RX, and Link.
2564  *
2565  **********************************************************************/
2566 int
2567 em_allocate_msix(struct adapter *adapter)
2568 {
2569 	device_t	dev = adapter->dev;
2570 	struct		tx_ring *txr = adapter->tx_rings;
2571 	struct		rx_ring *rxr = adapter->rx_rings;
2572 	int		error, rid, vector = 0;
2573 
2574 
2575 	/* Make sure all interrupts are disabled */
2576 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2577 
2578 	/* First set up ring resources */
2579 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2580 
2581 		/* RX ring */
2582 		rid = vector + 1;
2583 
2584 		rxr->res = bus_alloc_resource_any(dev,
2585 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2586 		if (rxr->res == NULL) {
2587 			device_printf(dev,
2588 			    "Unable to allocate bus resource: "
2589 			    "RX MSIX Interrupt %d\n", i);
2590 			return (ENXIO);
2591 		}
2592 		if ((error = bus_setup_intr(dev, rxr->res,
2593 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2594 		    rxr, &rxr->tag)) != 0) {
2595 			device_printf(dev, "Failed to register RX handler");
2596 			return (error);
2597 		}
2598 #if __FreeBSD_version >= 800504
2599 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2600 #endif
2601 		rxr->msix = vector++; /* NOTE increment vector for TX */
2602 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2603 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2604 		    taskqueue_thread_enqueue, &rxr->tq);
2605 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2606 		    device_get_nameunit(adapter->dev));
2607 		/*
2608 		** Set the bit to enable interrupt
2609 		** in E1000_IMS -- bits 20 and 21
2610 		** are for RX0 and RX1, note this has
2611 		** NOTHING to do with the MSIX vector
2612 		*/
2613 		rxr->ims = 1 << (20 + i);
2614 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2615 
2616 		/* TX ring */
2617 		rid = vector + 1;
2618 		txr->res = bus_alloc_resource_any(dev,
2619 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2620 		if (txr->res == NULL) {
2621 			device_printf(dev,
2622 			    "Unable to allocate bus resource: "
2623 			    "TX MSIX Interrupt %d\n", i);
2624 			return (ENXIO);
2625 		}
2626 		if ((error = bus_setup_intr(dev, txr->res,
2627 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2628 		    txr, &txr->tag)) != 0) {
2629 			device_printf(dev, "Failed to register TX handler");
2630 			return (error);
2631 		}
2632 #if __FreeBSD_version >= 800504
2633 		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2634 #endif
2635 		txr->msix = vector++; /* Increment vector for next pass */
2636 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2637 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2638 		    taskqueue_thread_enqueue, &txr->tq);
2639 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2640 		    device_get_nameunit(adapter->dev));
2641 		/*
2642 		** Set the bit to enable interrupt
2643 		** in E1000_IMS -- bits 22 and 23
2644 		** are for TX0 and TX1, note this has
2645 		** NOTHING to do with the MSIX vector
2646 		*/
2647 		txr->ims = 1 << (22 + i);
2648 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2649 	}
2650 
2651 	/* Link interrupt */
2652 	++rid;
2653 	adapter->res = bus_alloc_resource_any(dev,
2654 	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2655 	if (!adapter->res) {
2656 		device_printf(dev,"Unable to allocate "
2657 		    "bus resource: Link interrupt [%d]\n", rid);
2658 		return (ENXIO);
2659         }
2660 	/* Set the link handler function */
2661 	error = bus_setup_intr(dev, adapter->res,
2662 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2663 	    em_msix_link, adapter, &adapter->tag);
2664 	if (error) {
2665 		adapter->res = NULL;
2666 		device_printf(dev, "Failed to register LINK handler");
2667 		return (error);
2668 	}
2669 #if __FreeBSD_version >= 800504
2670 		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2671 #endif
2672 	adapter->linkvec = vector;
2673 	adapter->ivars |=  (8 | vector) << 16;
2674 	adapter->ivars |= 0x80000000;
2675 
2676 	return (0);
2677 }
2678 
2679 
2680 static void
2681 em_free_pci_resources(struct adapter *adapter)
2682 {
2683 	device_t	dev = adapter->dev;
2684 	struct tx_ring	*txr;
2685 	struct rx_ring	*rxr;
2686 	int		rid;
2687 
2688 
2689 	/*
2690 	** Release all the queue interrupt resources:
2691 	*/
2692 	for (int i = 0; i < adapter->num_queues; i++) {
2693 		txr = &adapter->tx_rings[i];
2694 		rxr = &adapter->rx_rings[i];
2695 		/* an early abort? */
2696 		if ((txr == NULL) || (rxr == NULL))
2697 			break;
2698 		rid = txr->msix +1;
2699 		if (txr->tag != NULL) {
2700 			bus_teardown_intr(dev, txr->res, txr->tag);
2701 			txr->tag = NULL;
2702 		}
2703 		if (txr->res != NULL)
2704 			bus_release_resource(dev, SYS_RES_IRQ,
2705 			    rid, txr->res);
2706 		rid = rxr->msix +1;
2707 		if (rxr->tag != NULL) {
2708 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2709 			rxr->tag = NULL;
2710 		}
2711 		if (rxr->res != NULL)
2712 			bus_release_resource(dev, SYS_RES_IRQ,
2713 			    rid, rxr->res);
2714 	}
2715 
2716         if (adapter->linkvec) /* we are doing MSIX */
2717                 rid = adapter->linkvec + 1;
2718         else
2719                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2720 
2721 	if (adapter->tag != NULL) {
2722 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2723 		adapter->tag = NULL;
2724 	}
2725 
2726 	if (adapter->res != NULL)
2727 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2728 
2729 
2730 	if (adapter->msix)
2731 		pci_release_msi(dev);
2732 
2733 	if (adapter->msix_mem != NULL)
2734 		bus_release_resource(dev, SYS_RES_MEMORY,
2735 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2736 
2737 	if (adapter->memory != NULL)
2738 		bus_release_resource(dev, SYS_RES_MEMORY,
2739 		    PCIR_BAR(0), adapter->memory);
2740 
2741 	if (adapter->flash != NULL)
2742 		bus_release_resource(dev, SYS_RES_MEMORY,
2743 		    EM_FLASH, adapter->flash);
2744 }
2745 
2746 /*
2747  * Setup MSI or MSI/X
2748  */
2749 static int
2750 em_setup_msix(struct adapter *adapter)
2751 {
2752 	device_t dev = adapter->dev;
2753 	int val = 0;
2754 
2755 	/*
2756 	** Setup MSI/X for Hartwell: tests have shown
2757 	** use of two queues to be unstable, and to
2758 	** provide no great gain anyway, so we simply
2759 	** seperate the interrupts and use a single queue.
2760 	*/
2761 	if ((adapter->hw.mac.type == e1000_82574) &&
2762 	    (em_enable_msix == TRUE)) {
2763 		/* Map the MSIX BAR */
2764 		int rid = PCIR_BAR(EM_MSIX_BAR);
2765 		adapter->msix_mem = bus_alloc_resource_any(dev,
2766 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2767        		if (!adapter->msix_mem) {
2768 			/* May not be enabled */
2769                		device_printf(adapter->dev,
2770 			    "Unable to map MSIX table \n");
2771 			goto msi;
2772        		}
2773 		val = pci_msix_count(dev);
2774 		/* We only need 3 vectors */
2775 		if (val > 3)
2776 			val = 3;
2777 		if ((val != 3) && (val != 5)) {
2778 			bus_release_resource(dev, SYS_RES_MEMORY,
2779 			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2780 			adapter->msix_mem = NULL;
2781                		device_printf(adapter->dev,
2782 			    "MSIX: incorrect vectors, using MSI\n");
2783 			goto msi;
2784 		}
2785 
2786 		if (pci_alloc_msix(dev, &val) == 0) {
2787 			device_printf(adapter->dev,
2788 			    "Using MSIX interrupts "
2789 			    "with %d vectors\n", val);
2790 		}
2791 
2792 		return (val);
2793 	}
2794 msi:
2795        	val = pci_msi_count(dev);
2796        	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2797                	adapter->msix = 1;
2798                	device_printf(adapter->dev,"Using an MSI interrupt\n");
2799 		return (val);
2800 	}
2801 	/* Should only happen due to manual configuration */
2802 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2803 	return (0);
2804 }
2805 
2806 
2807 /*********************************************************************
2808  *
2809  *  Initialize the hardware to a configuration
2810  *  as specified by the adapter structure.
2811  *
2812  **********************************************************************/
2813 static void
2814 em_reset(struct adapter *adapter)
2815 {
2816 	device_t	dev = adapter->dev;
2817 	struct ifnet	*ifp = adapter->ifp;
2818 	struct e1000_hw	*hw = &adapter->hw;
2819 	u16		rx_buffer_size;
2820 	u32		pba;
2821 
2822 	INIT_DEBUGOUT("em_reset: begin");
2823 
2824 	/* Set up smart power down as default off on newer adapters. */
2825 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2826 	    hw->mac.type == e1000_82572)) {
2827 		u16 phy_tmp = 0;
2828 
2829 		/* Speed up time to link by disabling smart power down. */
2830 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2831 		phy_tmp &= ~IGP02E1000_PM_SPD;
2832 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2833 	}
2834 
2835 	/*
2836 	 * Packet Buffer Allocation (PBA)
2837 	 * Writing PBA sets the receive portion of the buffer
2838 	 * the remainder is used for the transmit buffer.
2839 	 */
2840 	switch (hw->mac.type) {
2841 	/* Total Packet Buffer on these is 48K */
2842 	case e1000_82571:
2843 	case e1000_82572:
2844 	case e1000_80003es2lan:
2845 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2846 		break;
2847 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2848 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2849 		break;
2850 	case e1000_82574:
2851 	case e1000_82583:
2852 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2853 		break;
2854 	case e1000_ich8lan:
2855 		pba = E1000_PBA_8K;
2856 		break;
2857 	case e1000_ich9lan:
2858 	case e1000_ich10lan:
2859 		/* Boost Receive side for jumbo frames */
2860 		if (adapter->hw.mac.max_frame_size > 4096)
2861 			pba = E1000_PBA_14K;
2862 		else
2863 			pba = E1000_PBA_10K;
2864 		break;
2865 	case e1000_pchlan:
2866 	case e1000_pch2lan:
2867 	case e1000_pch_lpt:
2868 		pba = E1000_PBA_26K;
2869 		break;
2870 	default:
2871 		if (adapter->hw.mac.max_frame_size > 8192)
2872 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2873 		else
2874 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2875 	}
2876 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2877 
2878 	/*
2879 	 * These parameters control the automatic generation (Tx) and
2880 	 * response (Rx) to Ethernet PAUSE frames.
2881 	 * - High water mark should allow for at least two frames to be
2882 	 *   received after sending an XOFF.
2883 	 * - Low water mark works best when it is very near the high water mark.
2884 	 *   This allows the receiver to restart by sending XON when it has
2885 	 *   drained a bit. Here we use an arbitary value of 1500 which will
2886 	 *   restart after one full frame is pulled from the buffer. There
2887 	 *   could be several smaller frames in the buffer and if so they will
2888 	 *   not trigger the XON until their total number reduces the buffer
2889 	 *   by 1500.
2890 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2891 	 */
2892 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2893 	hw->fc.high_water = rx_buffer_size -
2894 	    roundup2(adapter->hw.mac.max_frame_size, 1024);
2895 	hw->fc.low_water = hw->fc.high_water - 1500;
2896 
2897 	if (adapter->fc) /* locally set flow control value? */
2898 		hw->fc.requested_mode = adapter->fc;
2899 	else
2900 		hw->fc.requested_mode = e1000_fc_full;
2901 
2902 	if (hw->mac.type == e1000_80003es2lan)
2903 		hw->fc.pause_time = 0xFFFF;
2904 	else
2905 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2906 
2907 	hw->fc.send_xon = TRUE;
2908 
2909 	/* Device specific overrides/settings */
2910 	switch (hw->mac.type) {
2911 	case e1000_pchlan:
2912 		/* Workaround: no TX flow ctrl for PCH */
2913                 hw->fc.requested_mode = e1000_fc_rx_pause;
2914 		hw->fc.pause_time = 0xFFFF; /* override */
2915 		if (ifp->if_mtu > ETHERMTU) {
2916 			hw->fc.high_water = 0x3500;
2917 			hw->fc.low_water = 0x1500;
2918 		} else {
2919 			hw->fc.high_water = 0x5000;
2920 			hw->fc.low_water = 0x3000;
2921 		}
2922 		hw->fc.refresh_time = 0x1000;
2923 		break;
2924 	case e1000_pch2lan:
2925 	case e1000_pch_lpt:
2926 		hw->fc.high_water = 0x5C20;
2927 		hw->fc.low_water = 0x5048;
2928 		hw->fc.pause_time = 0x0650;
2929 		hw->fc.refresh_time = 0x0400;
2930 		/* Jumbos need adjusted PBA */
2931 		if (ifp->if_mtu > ETHERMTU)
2932 			E1000_WRITE_REG(hw, E1000_PBA, 12);
2933 		else
2934 			E1000_WRITE_REG(hw, E1000_PBA, 26);
2935 		break;
2936         case e1000_ich9lan:
2937         case e1000_ich10lan:
2938 		if (ifp->if_mtu > ETHERMTU) {
2939 			hw->fc.high_water = 0x2800;
2940 			hw->fc.low_water = hw->fc.high_water - 8;
2941 			break;
2942 		}
2943 		/* else fall thru */
2944 	default:
2945 		if (hw->mac.type == e1000_80003es2lan)
2946 			hw->fc.pause_time = 0xFFFF;
2947 		break;
2948 	}
2949 
2950 	/* Issue a global reset */
2951 	e1000_reset_hw(hw);
2952 	E1000_WRITE_REG(hw, E1000_WUC, 0);
2953 	em_disable_aspm(adapter);
2954 	/* and a re-init */
2955 	if (e1000_init_hw(hw) < 0) {
2956 		device_printf(dev, "Hardware Initialization Failed\n");
2957 		return;
2958 	}
2959 
2960 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2961 	e1000_get_phy_info(hw);
2962 	e1000_check_for_link(hw);
2963 	return;
2964 }
2965 
2966 /*********************************************************************
2967  *
2968  *  Setup networking device structure and register an interface.
2969  *
2970  **********************************************************************/
2971 static int
2972 em_setup_interface(device_t dev, struct adapter *adapter)
2973 {
2974 	struct ifnet   *ifp;
2975 
2976 	INIT_DEBUGOUT("em_setup_interface: begin");
2977 
2978 	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2979 	if (ifp == NULL) {
2980 		device_printf(dev, "can not allocate ifnet structure\n");
2981 		return (-1);
2982 	}
2983 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2984 	ifp->if_init =  em_init;
2985 	ifp->if_softc = adapter;
2986 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2987 	ifp->if_ioctl = em_ioctl;
2988 #ifdef EM_MULTIQUEUE
2989 	/* Multiqueue stack interface */
2990 	ifp->if_transmit = em_mq_start;
2991 	ifp->if_qflush = em_qflush;
2992 #else
2993 	ifp->if_start = em_start;
2994 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2995 	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2996 	IFQ_SET_READY(&ifp->if_snd);
2997 #endif
2998 
2999 	ether_ifattach(ifp, adapter->hw.mac.addr);
3000 
3001 	ifp->if_capabilities = ifp->if_capenable = 0;
3002 
3003 
3004 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3005 	ifp->if_capabilities |= IFCAP_TSO4;
3006 	/*
3007 	 * Tell the upper layer(s) we
3008 	 * support full VLAN capability
3009 	 */
3010 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3011 	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3012 			     |  IFCAP_VLAN_HWTSO
3013 			     |  IFCAP_VLAN_MTU;
3014 	ifp->if_capenable = ifp->if_capabilities;
3015 
3016 	/*
3017 	** Don't turn this on by default, if vlans are
3018 	** created on another pseudo device (eg. lagg)
3019 	** then vlan events are not passed thru, breaking
3020 	** operation, but with HW FILTER off it works. If
3021 	** using vlans directly on the em driver you can
3022 	** enable this and get full hardware tag filtering.
3023 	*/
3024 	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3025 
3026 #ifdef DEVICE_POLLING
3027 	ifp->if_capabilities |= IFCAP_POLLING;
3028 #endif
3029 
3030 	/* Enable only WOL MAGIC by default */
3031 	if (adapter->wol) {
3032 		ifp->if_capabilities |= IFCAP_WOL;
3033 		ifp->if_capenable |= IFCAP_WOL_MAGIC;
3034 	}
3035 
3036 	/*
3037 	 * Specify the media types supported by this adapter and register
3038 	 * callbacks to update media and link information
3039 	 */
3040 	ifmedia_init(&adapter->media, IFM_IMASK,
3041 	    em_media_change, em_media_status);
3042 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3043 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3044 		u_char fiber_type = IFM_1000_SX;	/* default type */
3045 
3046 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3047 			    0, NULL);
3048 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3049 	} else {
3050 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3051 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3052 			    0, NULL);
3053 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3054 			    0, NULL);
3055 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3056 			    0, NULL);
3057 		if (adapter->hw.phy.type != e1000_phy_ife) {
3058 			ifmedia_add(&adapter->media,
3059 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3060 			ifmedia_add(&adapter->media,
3061 				IFM_ETHER | IFM_1000_T, 0, NULL);
3062 		}
3063 	}
3064 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3065 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3066 	return (0);
3067 }
3068 
3069 
3070 /*
3071  * Manage DMA'able memory.
3072  */
3073 static void
3074 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3075 {
3076 	if (error)
3077 		return;
3078 	*(bus_addr_t *) arg = segs[0].ds_addr;
3079 }
3080 
3081 static int
3082 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3083         struct em_dma_alloc *dma, int mapflags)
3084 {
3085 	int error;
3086 
3087 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3088 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3089 				BUS_SPACE_MAXADDR,	/* lowaddr */
3090 				BUS_SPACE_MAXADDR,	/* highaddr */
3091 				NULL, NULL,		/* filter, filterarg */
3092 				size,			/* maxsize */
3093 				1,			/* nsegments */
3094 				size,			/* maxsegsize */
3095 				0,			/* flags */
3096 				NULL,			/* lockfunc */
3097 				NULL,			/* lockarg */
3098 				&dma->dma_tag);
3099 	if (error) {
3100 		device_printf(adapter->dev,
3101 		    "%s: bus_dma_tag_create failed: %d\n",
3102 		    __func__, error);
3103 		goto fail_0;
3104 	}
3105 
3106 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3107 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3108 	if (error) {
3109 		device_printf(adapter->dev,
3110 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3111 		    __func__, (uintmax_t)size, error);
3112 		goto fail_2;
3113 	}
3114 
3115 	dma->dma_paddr = 0;
3116 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3117 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3118 	if (error || dma->dma_paddr == 0) {
3119 		device_printf(adapter->dev,
3120 		    "%s: bus_dmamap_load failed: %d\n",
3121 		    __func__, error);
3122 		goto fail_3;
3123 	}
3124 
3125 	return (0);
3126 
3127 fail_3:
3128 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3129 fail_2:
3130 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3131 	bus_dma_tag_destroy(dma->dma_tag);
3132 fail_0:
3133 	dma->dma_map = NULL;
3134 	dma->dma_tag = NULL;
3135 
3136 	return (error);
3137 }
3138 
3139 static void
3140 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3141 {
3142 	if (dma->dma_tag == NULL)
3143 		return;
3144 	if (dma->dma_map != NULL) {
3145 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3146 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3147 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3148 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3149 		dma->dma_map = NULL;
3150 	}
3151 	bus_dma_tag_destroy(dma->dma_tag);
3152 	dma->dma_tag = NULL;
3153 }
3154 
3155 
3156 /*********************************************************************
3157  *
3158  *  Allocate memory for the transmit and receive rings, and then
3159  *  the descriptors associated with each, called only once at attach.
3160  *
3161  **********************************************************************/
3162 static int
3163 em_allocate_queues(struct adapter *adapter)
3164 {
3165 	device_t		dev = adapter->dev;
3166 	struct tx_ring		*txr = NULL;
3167 	struct rx_ring		*rxr = NULL;
3168 	int rsize, tsize, error = E1000_SUCCESS;
3169 	int txconf = 0, rxconf = 0;
3170 
3171 
3172 	/* Allocate the TX ring struct memory */
3173 	if (!(adapter->tx_rings =
3174 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3175 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3176 		device_printf(dev, "Unable to allocate TX ring memory\n");
3177 		error = ENOMEM;
3178 		goto fail;
3179 	}
3180 
3181 	/* Now allocate the RX */
3182 	if (!(adapter->rx_rings =
3183 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3184 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3185 		device_printf(dev, "Unable to allocate RX ring memory\n");
3186 		error = ENOMEM;
3187 		goto rx_fail;
3188 	}
3189 
3190 	tsize = roundup2(adapter->num_tx_desc *
3191 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3192 	/*
3193 	 * Now set up the TX queues, txconf is needed to handle the
3194 	 * possibility that things fail midcourse and we need to
3195 	 * undo memory gracefully
3196 	 */
3197 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3198 		/* Set up some basics */
3199 		txr = &adapter->tx_rings[i];
3200 		txr->adapter = adapter;
3201 		txr->me = i;
3202 
3203 		/* Initialize the TX lock */
3204 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3205 		    device_get_nameunit(dev), txr->me);
3206 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3207 
3208 		if (em_dma_malloc(adapter, tsize,
3209 			&txr->txdma, BUS_DMA_NOWAIT)) {
3210 			device_printf(dev,
3211 			    "Unable to allocate TX Descriptor memory\n");
3212 			error = ENOMEM;
3213 			goto err_tx_desc;
3214 		}
3215 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3216 		bzero((void *)txr->tx_base, tsize);
3217 
3218         	if (em_allocate_transmit_buffers(txr)) {
3219 			device_printf(dev,
3220 			    "Critical Failure setting up transmit buffers\n");
3221 			error = ENOMEM;
3222 			goto err_tx_desc;
3223         	}
3224 #if __FreeBSD_version >= 800000
3225 		/* Allocate a buf ring */
3226 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3227 		    M_WAITOK, &txr->tx_mtx);
3228 #endif
3229 	}
3230 
3231 	/*
3232 	 * Next the RX queues...
3233 	 */
3234 	rsize = roundup2(adapter->num_rx_desc *
3235 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3236 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3237 		rxr = &adapter->rx_rings[i];
3238 		rxr->adapter = adapter;
3239 		rxr->me = i;
3240 
3241 		/* Initialize the RX lock */
3242 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3243 		    device_get_nameunit(dev), txr->me);
3244 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3245 
3246 		if (em_dma_malloc(adapter, rsize,
3247 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3248 			device_printf(dev,
3249 			    "Unable to allocate RxDescriptor memory\n");
3250 			error = ENOMEM;
3251 			goto err_rx_desc;
3252 		}
3253 		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3254 		bzero((void *)rxr->rx_base, rsize);
3255 
3256         	/* Allocate receive buffers for the ring*/
3257 		if (em_allocate_receive_buffers(rxr)) {
3258 			device_printf(dev,
3259 			    "Critical Failure setting up receive buffers\n");
3260 			error = ENOMEM;
3261 			goto err_rx_desc;
3262 		}
3263 	}
3264 
3265 	return (0);
3266 
3267 err_rx_desc:
3268 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3269 		em_dma_free(adapter, &rxr->rxdma);
3270 err_tx_desc:
3271 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3272 		em_dma_free(adapter, &txr->txdma);
3273 	free(adapter->rx_rings, M_DEVBUF);
3274 rx_fail:
3275 #if __FreeBSD_version >= 800000
3276 	buf_ring_free(txr->br, M_DEVBUF);
3277 #endif
3278 	free(adapter->tx_rings, M_DEVBUF);
3279 fail:
3280 	return (error);
3281 }
3282 
3283 
3284 /*********************************************************************
3285  *
3286  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3287  *  the information needed to transmit a packet on the wire. This is
3288  *  called only once at attach, setup is done every reset.
3289  *
3290  **********************************************************************/
3291 static int
3292 em_allocate_transmit_buffers(struct tx_ring *txr)
3293 {
3294 	struct adapter *adapter = txr->adapter;
3295 	device_t dev = adapter->dev;
3296 	struct em_buffer *txbuf;
3297 	int error, i;
3298 
3299 	/*
3300 	 * Setup DMA descriptor areas.
3301 	 */
3302 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3303 			       1, 0,			/* alignment, bounds */
3304 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3305 			       BUS_SPACE_MAXADDR,	/* highaddr */
3306 			       NULL, NULL,		/* filter, filterarg */
3307 			       EM_TSO_SIZE,		/* maxsize */
3308 			       EM_MAX_SCATTER,		/* nsegments */
3309 			       PAGE_SIZE,		/* maxsegsize */
3310 			       0,			/* flags */
3311 			       NULL,			/* lockfunc */
3312 			       NULL,			/* lockfuncarg */
3313 			       &txr->txtag))) {
3314 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3315 		goto fail;
3316 	}
3317 
3318 	if (!(txr->tx_buffers =
3319 	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3320 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3321 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3322 		error = ENOMEM;
3323 		goto fail;
3324 	}
3325 
3326         /* Create the descriptor buffer dma maps */
3327 	txbuf = txr->tx_buffers;
3328 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3329 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3330 		if (error != 0) {
3331 			device_printf(dev, "Unable to create TX DMA map\n");
3332 			goto fail;
3333 		}
3334 	}
3335 
3336 	return 0;
3337 fail:
3338 	/* We free all, it handles case where we are in the middle */
3339 	em_free_transmit_structures(adapter);
3340 	return (error);
3341 }
3342 
3343 /*********************************************************************
3344  *
3345  *  Initialize a transmit ring.
3346  *
3347  **********************************************************************/
3348 static void
3349 em_setup_transmit_ring(struct tx_ring *txr)
3350 {
3351 	struct adapter *adapter = txr->adapter;
3352 	struct em_buffer *txbuf;
3353 	int i;
3354 #ifdef DEV_NETMAP
3355 	struct netmap_adapter *na = NA(adapter->ifp);
3356 	struct netmap_slot *slot;
3357 #endif /* DEV_NETMAP */
3358 
3359 	/* Clear the old descriptor contents */
3360 	EM_TX_LOCK(txr);
3361 #ifdef DEV_NETMAP
3362 	slot = netmap_reset(na, NR_TX, txr->me, 0);
3363 #endif /* DEV_NETMAP */
3364 
3365 	bzero((void *)txr->tx_base,
3366 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3367 	/* Reset indices */
3368 	txr->next_avail_desc = 0;
3369 	txr->next_to_clean = 0;
3370 
3371 	/* Free any existing tx buffers. */
3372         txbuf = txr->tx_buffers;
3373 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3374 		if (txbuf->m_head != NULL) {
3375 			bus_dmamap_sync(txr->txtag, txbuf->map,
3376 			    BUS_DMASYNC_POSTWRITE);
3377 			bus_dmamap_unload(txr->txtag, txbuf->map);
3378 			m_freem(txbuf->m_head);
3379 			txbuf->m_head = NULL;
3380 		}
3381 #ifdef DEV_NETMAP
3382 		if (slot) {
3383 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3384 			uint64_t paddr;
3385 			void *addr;
3386 
3387 			addr = PNMB(slot + si, &paddr);
3388 			txr->tx_base[i].buffer_addr = htole64(paddr);
3389 			/* reload the map for netmap mode */
3390 			netmap_load_map(txr->txtag, txbuf->map, addr);
3391 		}
3392 #endif /* DEV_NETMAP */
3393 
3394 		/* clear the watch index */
3395 		txbuf->next_eop = -1;
3396         }
3397 
3398 	/* Set number of descriptors available */
3399 	txr->tx_avail = adapter->num_tx_desc;
3400 	txr->queue_status = EM_QUEUE_IDLE;
3401 
3402 	/* Clear checksum offload context. */
3403 	txr->last_hw_offload = 0;
3404 	txr->last_hw_ipcss = 0;
3405 	txr->last_hw_ipcso = 0;
3406 	txr->last_hw_tucss = 0;
3407 	txr->last_hw_tucso = 0;
3408 
3409 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3410 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3411 	EM_TX_UNLOCK(txr);
3412 }
3413 
3414 /*********************************************************************
3415  *
3416  *  Initialize all transmit rings.
3417  *
3418  **********************************************************************/
3419 static void
3420 em_setup_transmit_structures(struct adapter *adapter)
3421 {
3422 	struct tx_ring *txr = adapter->tx_rings;
3423 
3424 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3425 		em_setup_transmit_ring(txr);
3426 
3427 	return;
3428 }
3429 
3430 /*********************************************************************
3431  *
3432  *  Enable transmit unit.
3433  *
3434  **********************************************************************/
3435 static void
3436 em_initialize_transmit_unit(struct adapter *adapter)
3437 {
3438 	struct tx_ring	*txr = adapter->tx_rings;
3439 	struct e1000_hw	*hw = &adapter->hw;
3440 	u32	tctl, tarc, tipg = 0;
3441 
3442 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3443 
3444 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3445 		u64 bus_addr = txr->txdma.dma_paddr;
3446 		/* Base and Len of TX Ring */
3447 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3448 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3449 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3450 	    	    (u32)(bus_addr >> 32));
3451 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3452 	    	    (u32)bus_addr);
3453 		/* Init the HEAD/TAIL indices */
3454 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3455 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3456 
3457 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3458 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3459 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3460 
3461 		txr->queue_status = EM_QUEUE_IDLE;
3462 	}
3463 
3464 	/* Set the default values for the Tx Inter Packet Gap timer */
3465 	switch (adapter->hw.mac.type) {
3466 	case e1000_80003es2lan:
3467 		tipg = DEFAULT_82543_TIPG_IPGR1;
3468 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3469 		    E1000_TIPG_IPGR2_SHIFT;
3470 		break;
3471 	default:
3472 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3473 		    (adapter->hw.phy.media_type ==
3474 		    e1000_media_type_internal_serdes))
3475 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3476 		else
3477 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3478 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3479 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3480 	}
3481 
3482 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3483 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3484 
3485 	if(adapter->hw.mac.type >= e1000_82540)
3486 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3487 		    adapter->tx_abs_int_delay.value);
3488 
3489 	if ((adapter->hw.mac.type == e1000_82571) ||
3490 	    (adapter->hw.mac.type == e1000_82572)) {
3491 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3492 		tarc |= SPEED_MODE_BIT;
3493 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3494 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3495 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3496 		tarc |= 1;
3497 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3498 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3499 		tarc |= 1;
3500 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3501 	}
3502 
3503 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3504 	if (adapter->tx_int_delay.value > 0)
3505 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3506 
3507 	/* Program the Transmit Control Register */
3508 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3509 	tctl &= ~E1000_TCTL_CT;
3510 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3511 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3512 
3513 	if (adapter->hw.mac.type >= e1000_82571)
3514 		tctl |= E1000_TCTL_MULR;
3515 
3516 	/* This write will effectively turn on the transmit unit. */
3517 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3518 
3519 }
3520 
3521 
3522 /*********************************************************************
3523  *
3524  *  Free all transmit rings.
3525  *
3526  **********************************************************************/
3527 static void
3528 em_free_transmit_structures(struct adapter *adapter)
3529 {
3530 	struct tx_ring *txr = adapter->tx_rings;
3531 
3532 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3533 		EM_TX_LOCK(txr);
3534 		em_free_transmit_buffers(txr);
3535 		em_dma_free(adapter, &txr->txdma);
3536 		EM_TX_UNLOCK(txr);
3537 		EM_TX_LOCK_DESTROY(txr);
3538 	}
3539 
3540 	free(adapter->tx_rings, M_DEVBUF);
3541 }
3542 
3543 /*********************************************************************
3544  *
3545  *  Free transmit ring related data structures.
3546  *
3547  **********************************************************************/
3548 static void
3549 em_free_transmit_buffers(struct tx_ring *txr)
3550 {
3551 	struct adapter		*adapter = txr->adapter;
3552 	struct em_buffer	*txbuf;
3553 
3554 	INIT_DEBUGOUT("free_transmit_ring: begin");
3555 
3556 	if (txr->tx_buffers == NULL)
3557 		return;
3558 
3559 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3560 		txbuf = &txr->tx_buffers[i];
3561 		if (txbuf->m_head != NULL) {
3562 			bus_dmamap_sync(txr->txtag, txbuf->map,
3563 			    BUS_DMASYNC_POSTWRITE);
3564 			bus_dmamap_unload(txr->txtag,
3565 			    txbuf->map);
3566 			m_freem(txbuf->m_head);
3567 			txbuf->m_head = NULL;
3568 			if (txbuf->map != NULL) {
3569 				bus_dmamap_destroy(txr->txtag,
3570 				    txbuf->map);
3571 				txbuf->map = NULL;
3572 			}
3573 		} else if (txbuf->map != NULL) {
3574 			bus_dmamap_unload(txr->txtag,
3575 			    txbuf->map);
3576 			bus_dmamap_destroy(txr->txtag,
3577 			    txbuf->map);
3578 			txbuf->map = NULL;
3579 		}
3580 	}
3581 #if __FreeBSD_version >= 800000
3582 	if (txr->br != NULL)
3583 		buf_ring_free(txr->br, M_DEVBUF);
3584 #endif
3585 	if (txr->tx_buffers != NULL) {
3586 		free(txr->tx_buffers, M_DEVBUF);
3587 		txr->tx_buffers = NULL;
3588 	}
3589 	if (txr->txtag != NULL) {
3590 		bus_dma_tag_destroy(txr->txtag);
3591 		txr->txtag = NULL;
3592 	}
3593 	return;
3594 }
3595 
3596 
3597 /*********************************************************************
3598  *  The offload context is protocol specific (TCP/UDP) and thus
3599  *  only needs to be set when the protocol changes. The occasion
3600  *  of a context change can be a performance detriment, and
3601  *  might be better just disabled. The reason arises in the way
3602  *  in which the controller supports pipelined requests from the
3603  *  Tx data DMA. Up to four requests can be pipelined, and they may
3604  *  belong to the same packet or to multiple packets. However all
3605  *  requests for one packet are issued before a request is issued
3606  *  for a subsequent packet and if a request for the next packet
3607  *  requires a context change, that request will be stalled
3608  *  until the previous request completes. This means setting up
3609  *  a new context effectively disables pipelined Tx data DMA which
3610  *  in turn greatly slow down performance to send small sized
3611  *  frames.
3612  **********************************************************************/
3613 static void
3614 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3615     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3616 {
3617 	struct adapter			*adapter = txr->adapter;
3618 	struct e1000_context_desc	*TXD = NULL;
3619 	struct em_buffer		*tx_buffer;
3620 	int				cur, hdr_len;
3621 	u32				cmd = 0;
3622 	u16				offload = 0;
3623 	u8				ipcso, ipcss, tucso, tucss;
3624 
3625 	ipcss = ipcso = tucss = tucso = 0;
3626 	hdr_len = ip_off + (ip->ip_hl << 2);
3627 	cur = txr->next_avail_desc;
3628 
3629 	/* Setup of IP header checksum. */
3630 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3631 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3632 		offload |= CSUM_IP;
3633 		ipcss = ip_off;
3634 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3635 		/*
3636 		 * Start offset for header checksum calculation.
3637 		 * End offset for header checksum calculation.
3638 		 * Offset of place to put the checksum.
3639 		 */
3640 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3641 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3642 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3643 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3644 		cmd |= E1000_TXD_CMD_IP;
3645 	}
3646 
3647 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3648  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3649  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3650  		offload |= CSUM_TCP;
3651  		tucss = hdr_len;
3652  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3653  		/*
3654  		 * Setting up new checksum offload context for every frames
3655  		 * takes a lot of processing time for hardware. This also
3656  		 * reduces performance a lot for small sized frames so avoid
3657  		 * it if driver can use previously configured checksum
3658  		 * offload context.
3659  		 */
3660  		if (txr->last_hw_offload == offload) {
3661  			if (offload & CSUM_IP) {
3662  				if (txr->last_hw_ipcss == ipcss &&
3663  				    txr->last_hw_ipcso == ipcso &&
3664  				    txr->last_hw_tucss == tucss &&
3665  				    txr->last_hw_tucso == tucso)
3666  					return;
3667  			} else {
3668  				if (txr->last_hw_tucss == tucss &&
3669  				    txr->last_hw_tucso == tucso)
3670  					return;
3671  			}
3672   		}
3673  		txr->last_hw_offload = offload;
3674  		txr->last_hw_tucss = tucss;
3675  		txr->last_hw_tucso = tucso;
3676  		/*
3677  		 * Start offset for payload checksum calculation.
3678  		 * End offset for payload checksum calculation.
3679  		 * Offset of place to put the checksum.
3680  		 */
3681 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3682  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3683  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3684  		TXD->upper_setup.tcp_fields.tucso = tucso;
3685  		cmd |= E1000_TXD_CMD_TCP;
3686  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3687  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3688  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3689  		tucss = hdr_len;
3690  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3691  		/*
3692  		 * Setting up new checksum offload context for every frames
3693  		 * takes a lot of processing time for hardware. This also
3694  		 * reduces performance a lot for small sized frames so avoid
3695  		 * it if driver can use previously configured checksum
3696  		 * offload context.
3697  		 */
3698  		if (txr->last_hw_offload == offload) {
3699  			if (offload & CSUM_IP) {
3700  				if (txr->last_hw_ipcss == ipcss &&
3701  				    txr->last_hw_ipcso == ipcso &&
3702  				    txr->last_hw_tucss == tucss &&
3703  				    txr->last_hw_tucso == tucso)
3704  					return;
3705  			} else {
3706  				if (txr->last_hw_tucss == tucss &&
3707  				    txr->last_hw_tucso == tucso)
3708  					return;
3709  			}
3710  		}
3711  		txr->last_hw_offload = offload;
3712  		txr->last_hw_tucss = tucss;
3713  		txr->last_hw_tucso = tucso;
3714  		/*
3715  		 * Start offset for header checksum calculation.
3716  		 * End offset for header checksum calculation.
3717  		 * Offset of place to put the checksum.
3718  		 */
3719 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3720  		TXD->upper_setup.tcp_fields.tucss = tucss;
3721  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3722  		TXD->upper_setup.tcp_fields.tucso = tucso;
3723   	}
3724 
3725  	if (offload & CSUM_IP) {
3726  		txr->last_hw_ipcss = ipcss;
3727  		txr->last_hw_ipcso = ipcso;
3728   	}
3729 
3730 	TXD->tcp_seg_setup.data = htole32(0);
3731 	TXD->cmd_and_length =
3732 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3733 	tx_buffer = &txr->tx_buffers[cur];
3734 	tx_buffer->m_head = NULL;
3735 	tx_buffer->next_eop = -1;
3736 
3737 	if (++cur == adapter->num_tx_desc)
3738 		cur = 0;
3739 
3740 	txr->tx_avail--;
3741 	txr->next_avail_desc = cur;
3742 }
3743 
3744 
3745 /**********************************************************************
3746  *
3747  *  Setup work for hardware segmentation offload (TSO)
3748  *
3749  **********************************************************************/
3750 static void
3751 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3752     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3753 {
3754 	struct adapter			*adapter = txr->adapter;
3755 	struct e1000_context_desc	*TXD;
3756 	struct em_buffer		*tx_buffer;
3757 	int cur, hdr_len;
3758 
3759 	/*
3760 	 * In theory we can use the same TSO context if and only if
3761 	 * frame is the same type(IP/TCP) and the same MSS. However
3762 	 * checking whether a frame has the same IP/TCP structure is
3763 	 * hard thing so just ignore that and always restablish a
3764 	 * new TSO context.
3765 	 */
3766 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3767 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3768 		      E1000_TXD_DTYP_D |	/* Data descr type */
3769 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3770 
3771 	/* IP and/or TCP header checksum calculation and insertion. */
3772 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3773 
3774 	cur = txr->next_avail_desc;
3775 	tx_buffer = &txr->tx_buffers[cur];
3776 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3777 
3778 	/*
3779 	 * Start offset for header checksum calculation.
3780 	 * End offset for header checksum calculation.
3781 	 * Offset of place put the checksum.
3782 	 */
3783 	TXD->lower_setup.ip_fields.ipcss = ip_off;
3784 	TXD->lower_setup.ip_fields.ipcse =
3785 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3786 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3787 	/*
3788 	 * Start offset for payload checksum calculation.
3789 	 * End offset for payload checksum calculation.
3790 	 * Offset of place to put the checksum.
3791 	 */
3792 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3793 	TXD->upper_setup.tcp_fields.tucse = 0;
3794 	TXD->upper_setup.tcp_fields.tucso =
3795 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3796 	/*
3797 	 * Payload size per packet w/o any headers.
3798 	 * Length of all headers up to payload.
3799 	 */
3800 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3801 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3802 
3803 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3804 				E1000_TXD_CMD_DEXT |	/* Extended descr */
3805 				E1000_TXD_CMD_TSE |	/* TSE context */
3806 				E1000_TXD_CMD_IP |	/* Do IP csum */
3807 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3808 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3809 
3810 	tx_buffer->m_head = NULL;
3811 	tx_buffer->next_eop = -1;
3812 
3813 	if (++cur == adapter->num_tx_desc)
3814 		cur = 0;
3815 
3816 	txr->tx_avail--;
3817 	txr->next_avail_desc = cur;
3818 	txr->tx_tso = TRUE;
3819 }
3820 
3821 
3822 /**********************************************************************
3823  *
3824  *  Examine each tx_buffer in the used queue. If the hardware is done
3825  *  processing the packet then free associated resources. The
3826  *  tx_buffer is put back on the free queue.
3827  *
3828  **********************************************************************/
3829 static void
3830 em_txeof(struct tx_ring *txr)
3831 {
3832 	struct adapter	*adapter = txr->adapter;
3833         int first, last, done, processed;
3834         struct em_buffer *tx_buffer;
3835         struct e1000_tx_desc   *tx_desc, *eop_desc;
3836 	struct ifnet   *ifp = adapter->ifp;
3837 
3838 	EM_TX_LOCK_ASSERT(txr);
3839 #ifdef DEV_NETMAP
3840 	if (netmap_tx_irq(ifp, txr->me |
3841 	    (NETMAP_LOCKED_ENTER | NETMAP_LOCKED_EXIT)))
3842 		return;
3843 #endif /* DEV_NETMAP */
3844 
3845 	/* No work, make sure watchdog is off */
3846         if (txr->tx_avail == adapter->num_tx_desc) {
3847 		txr->queue_status = EM_QUEUE_IDLE;
3848                 return;
3849 	}
3850 
3851 	processed = 0;
3852         first = txr->next_to_clean;
3853         tx_desc = &txr->tx_base[first];
3854         tx_buffer = &txr->tx_buffers[first];
3855 	last = tx_buffer->next_eop;
3856         eop_desc = &txr->tx_base[last];
3857 
3858 	/*
3859 	 * What this does is get the index of the
3860 	 * first descriptor AFTER the EOP of the
3861 	 * first packet, that way we can do the
3862 	 * simple comparison on the inner while loop.
3863 	 */
3864 	if (++last == adapter->num_tx_desc)
3865  		last = 0;
3866 	done = last;
3867 
3868         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3869             BUS_DMASYNC_POSTREAD);
3870 
3871         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3872 		/* We clean the range of the packet */
3873 		while (first != done) {
3874                 	tx_desc->upper.data = 0;
3875                 	tx_desc->lower.data = 0;
3876                 	tx_desc->buffer_addr = 0;
3877                 	++txr->tx_avail;
3878 			++processed;
3879 
3880 			if (tx_buffer->m_head) {
3881 				bus_dmamap_sync(txr->txtag,
3882 				    tx_buffer->map,
3883 				    BUS_DMASYNC_POSTWRITE);
3884 				bus_dmamap_unload(txr->txtag,
3885 				    tx_buffer->map);
3886                         	m_freem(tx_buffer->m_head);
3887                         	tx_buffer->m_head = NULL;
3888                 	}
3889 			tx_buffer->next_eop = -1;
3890 			txr->watchdog_time = ticks;
3891 
3892 	                if (++first == adapter->num_tx_desc)
3893 				first = 0;
3894 
3895 	                tx_buffer = &txr->tx_buffers[first];
3896 			tx_desc = &txr->tx_base[first];
3897 		}
3898 		++ifp->if_opackets;
3899 		/* See if we can continue to the next packet */
3900 		last = tx_buffer->next_eop;
3901 		if (last != -1) {
3902         		eop_desc = &txr->tx_base[last];
3903 			/* Get new done point */
3904 			if (++last == adapter->num_tx_desc) last = 0;
3905 			done = last;
3906 		} else
3907 			break;
3908         }
3909         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3910             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3911 
3912         txr->next_to_clean = first;
3913 
3914 	/*
3915 	** Watchdog calculation, we know there's
3916 	** work outstanding or the first return
3917 	** would have been taken, so none processed
3918 	** for too long indicates a hang. local timer
3919 	** will examine this and do a reset if needed.
3920 	*/
3921 	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3922 		txr->queue_status = EM_QUEUE_HUNG;
3923 
3924         /*
3925          * If we have a minimum free, clear IFF_DRV_OACTIVE
3926          * to tell the stack that it is OK to send packets.
3927 	 * Notice that all writes of OACTIVE happen under the
3928 	 * TX lock which, with a single queue, guarantees
3929 	 * sanity.
3930          */
3931         if (txr->tx_avail >= EM_MAX_SCATTER)
3932 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3933 
3934 	/* Disable watchdog if all clean */
3935 	if (txr->tx_avail == adapter->num_tx_desc) {
3936 		txr->queue_status = EM_QUEUE_IDLE;
3937 	}
3938 }
3939 
3940 
3941 /*********************************************************************
3942  *
3943  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3944  *
3945  **********************************************************************/
3946 static void
3947 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3948 {
3949 	struct adapter		*adapter = rxr->adapter;
3950 	struct mbuf		*m;
3951 	bus_dma_segment_t	segs[1];
3952 	struct em_buffer	*rxbuf;
3953 	int			i, j, error, nsegs;
3954 	bool			cleaned = FALSE;
3955 
3956 	i = j = rxr->next_to_refresh;
3957 	/*
3958 	** Get one descriptor beyond
3959 	** our work mark to control
3960 	** the loop.
3961 	*/
3962 	if (++j == adapter->num_rx_desc)
3963 		j = 0;
3964 
3965 	while (j != limit) {
3966 		rxbuf = &rxr->rx_buffers[i];
3967 		if (rxbuf->m_head == NULL) {
3968 			m = m_getjcl(M_NOWAIT, MT_DATA,
3969 			    M_PKTHDR, adapter->rx_mbuf_sz);
3970 			/*
3971 			** If we have a temporary resource shortage
3972 			** that causes a failure, just abort refresh
3973 			** for now, we will return to this point when
3974 			** reinvoked from em_rxeof.
3975 			*/
3976 			if (m == NULL)
3977 				goto update;
3978 		} else
3979 			m = rxbuf->m_head;
3980 
3981 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3982 		m->m_flags |= M_PKTHDR;
3983 		m->m_data = m->m_ext.ext_buf;
3984 
3985 		/* Use bus_dma machinery to setup the memory mapping  */
3986 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3987 		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3988 		if (error != 0) {
3989 			printf("Refresh mbufs: hdr dmamap load"
3990 			    " failure - %d\n", error);
3991 			m_free(m);
3992 			rxbuf->m_head = NULL;
3993 			goto update;
3994 		}
3995 		rxbuf->m_head = m;
3996 		bus_dmamap_sync(rxr->rxtag,
3997 		    rxbuf->map, BUS_DMASYNC_PREREAD);
3998 		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3999 		cleaned = TRUE;
4000 
4001 		i = j; /* Next is precalulated for us */
4002 		rxr->next_to_refresh = i;
4003 		/* Calculate next controlling index */
4004 		if (++j == adapter->num_rx_desc)
4005 			j = 0;
4006 	}
4007 update:
4008 	/*
4009 	** Update the tail pointer only if,
4010 	** and as far as we have refreshed.
4011 	*/
4012 	if (cleaned)
4013 		E1000_WRITE_REG(&adapter->hw,
4014 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4015 
4016 	return;
4017 }
4018 
4019 
4020 /*********************************************************************
4021  *
4022  *  Allocate memory for rx_buffer structures. Since we use one
4023  *  rx_buffer per received packet, the maximum number of rx_buffer's
4024  *  that we'll need is equal to the number of receive descriptors
4025  *  that we've allocated.
4026  *
4027  **********************************************************************/
4028 static int
4029 em_allocate_receive_buffers(struct rx_ring *rxr)
4030 {
4031 	struct adapter		*adapter = rxr->adapter;
4032 	device_t		dev = adapter->dev;
4033 	struct em_buffer	*rxbuf;
4034 	int			error;
4035 
4036 	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4037 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4038 	if (rxr->rx_buffers == NULL) {
4039 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4040 		return (ENOMEM);
4041 	}
4042 
4043 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4044 				1, 0,			/* alignment, bounds */
4045 				BUS_SPACE_MAXADDR,	/* lowaddr */
4046 				BUS_SPACE_MAXADDR,	/* highaddr */
4047 				NULL, NULL,		/* filter, filterarg */
4048 				MJUM9BYTES,		/* maxsize */
4049 				1,			/* nsegments */
4050 				MJUM9BYTES,		/* maxsegsize */
4051 				0,			/* flags */
4052 				NULL,			/* lockfunc */
4053 				NULL,			/* lockarg */
4054 				&rxr->rxtag);
4055 	if (error) {
4056 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4057 		    __func__, error);
4058 		goto fail;
4059 	}
4060 
4061 	rxbuf = rxr->rx_buffers;
4062 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4063 		rxbuf = &rxr->rx_buffers[i];
4064 		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
4065 		    &rxbuf->map);
4066 		if (error) {
4067 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4068 			    __func__, error);
4069 			goto fail;
4070 		}
4071 	}
4072 
4073 	return (0);
4074 
4075 fail:
4076 	em_free_receive_structures(adapter);
4077 	return (error);
4078 }
4079 
4080 
4081 /*********************************************************************
4082  *
4083  *  Initialize a receive ring and its buffers.
4084  *
4085  **********************************************************************/
4086 static int
4087 em_setup_receive_ring(struct rx_ring *rxr)
4088 {
4089 	struct	adapter 	*adapter = rxr->adapter;
4090 	struct em_buffer	*rxbuf;
4091 	bus_dma_segment_t	seg[1];
4092 	int			rsize, nsegs, error = 0;
4093 #ifdef DEV_NETMAP
4094 	struct netmap_adapter *na = NA(adapter->ifp);
4095 	struct netmap_slot *slot;
4096 #endif
4097 
4098 
4099 	/* Clear the ring contents */
4100 	EM_RX_LOCK(rxr);
4101 	rsize = roundup2(adapter->num_rx_desc *
4102 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4103 	bzero((void *)rxr->rx_base, rsize);
4104 #ifdef DEV_NETMAP
4105 	slot = netmap_reset(na, NR_RX, 0, 0);
4106 #endif
4107 
4108 	/*
4109 	** Free current RX buffer structs and their mbufs
4110 	*/
4111 	for (int i = 0; i < adapter->num_rx_desc; i++) {
4112 		rxbuf = &rxr->rx_buffers[i];
4113 		if (rxbuf->m_head != NULL) {
4114 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4115 			    BUS_DMASYNC_POSTREAD);
4116 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4117 			m_freem(rxbuf->m_head);
4118 			rxbuf->m_head = NULL; /* mark as freed */
4119 		}
4120 	}
4121 
4122 	/* Now replenish the mbufs */
4123         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4124 		rxbuf = &rxr->rx_buffers[j];
4125 #ifdef DEV_NETMAP
4126 		if (slot) {
4127 			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4128 			uint64_t paddr;
4129 			void *addr;
4130 
4131 			addr = PNMB(slot + si, &paddr);
4132 			netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4133 			/* Update descriptor */
4134 			rxr->rx_base[j].buffer_addr = htole64(paddr);
4135 			continue;
4136 		}
4137 #endif /* DEV_NETMAP */
4138 		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4139 		    M_PKTHDR, adapter->rx_mbuf_sz);
4140 		if (rxbuf->m_head == NULL) {
4141 			error = ENOBUFS;
4142 			goto fail;
4143 		}
4144 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4145 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4146 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4147 
4148 		/* Get the memory mapping */
4149 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4150 		    rxbuf->map, rxbuf->m_head, seg,
4151 		    &nsegs, BUS_DMA_NOWAIT);
4152 		if (error != 0) {
4153 			m_freem(rxbuf->m_head);
4154 			rxbuf->m_head = NULL;
4155 			goto fail;
4156 		}
4157 		bus_dmamap_sync(rxr->rxtag,
4158 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4159 
4160 		/* Update descriptor */
4161 		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4162 	}
4163 	rxr->next_to_check = 0;
4164 	rxr->next_to_refresh = 0;
4165 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4166 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4167 
4168 fail:
4169 	EM_RX_UNLOCK(rxr);
4170 	return (error);
4171 }
4172 
4173 /*********************************************************************
4174  *
4175  *  Initialize all receive rings.
4176  *
4177  **********************************************************************/
4178 static int
4179 em_setup_receive_structures(struct adapter *adapter)
4180 {
4181 	struct rx_ring *rxr = adapter->rx_rings;
4182 	int q;
4183 
4184 	for (q = 0; q < adapter->num_queues; q++, rxr++)
4185 		if (em_setup_receive_ring(rxr))
4186 			goto fail;
4187 
4188 	return (0);
4189 fail:
4190 	/*
4191 	 * Free RX buffers allocated so far, we will only handle
4192 	 * the rings that completed, the failing case will have
4193 	 * cleaned up for itself. 'q' failed, so its the terminus.
4194 	 */
4195 	for (int i = 0; i < q; ++i) {
4196 		rxr = &adapter->rx_rings[i];
4197 		for (int n = 0; n < adapter->num_rx_desc; n++) {
4198 			struct em_buffer *rxbuf;
4199 			rxbuf = &rxr->rx_buffers[n];
4200 			if (rxbuf->m_head != NULL) {
4201 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4202 			  	  BUS_DMASYNC_POSTREAD);
4203 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4204 				m_freem(rxbuf->m_head);
4205 				rxbuf->m_head = NULL;
4206 			}
4207 		}
4208 		rxr->next_to_check = 0;
4209 		rxr->next_to_refresh = 0;
4210 	}
4211 
4212 	return (ENOBUFS);
4213 }
4214 
4215 /*********************************************************************
4216  *
4217  *  Free all receive rings.
4218  *
4219  **********************************************************************/
4220 static void
4221 em_free_receive_structures(struct adapter *adapter)
4222 {
4223 	struct rx_ring *rxr = adapter->rx_rings;
4224 
4225 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4226 		em_free_receive_buffers(rxr);
4227 		/* Free the ring memory as well */
4228 		em_dma_free(adapter, &rxr->rxdma);
4229 		EM_RX_LOCK_DESTROY(rxr);
4230 	}
4231 
4232 	free(adapter->rx_rings, M_DEVBUF);
4233 }
4234 
4235 
4236 /*********************************************************************
4237  *
4238  *  Free receive ring data structures
4239  *
4240  **********************************************************************/
4241 static void
4242 em_free_receive_buffers(struct rx_ring *rxr)
4243 {
4244 	struct adapter		*adapter = rxr->adapter;
4245 	struct em_buffer	*rxbuf = NULL;
4246 
4247 	INIT_DEBUGOUT("free_receive_buffers: begin");
4248 
4249 	if (rxr->rx_buffers != NULL) {
4250 		for (int i = 0; i < adapter->num_rx_desc; i++) {
4251 			rxbuf = &rxr->rx_buffers[i];
4252 			if (rxbuf->map != NULL) {
4253 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4254 				    BUS_DMASYNC_POSTREAD);
4255 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4256 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4257 			}
4258 			if (rxbuf->m_head != NULL) {
4259 				m_freem(rxbuf->m_head);
4260 				rxbuf->m_head = NULL;
4261 			}
4262 		}
4263 		free(rxr->rx_buffers, M_DEVBUF);
4264 		rxr->rx_buffers = NULL;
4265 		rxr->next_to_check = 0;
4266 		rxr->next_to_refresh = 0;
4267 	}
4268 
4269 	if (rxr->rxtag != NULL) {
4270 		bus_dma_tag_destroy(rxr->rxtag);
4271 		rxr->rxtag = NULL;
4272 	}
4273 
4274 	return;
4275 }
4276 
4277 
4278 /*********************************************************************
4279  *
4280  *  Enable receive unit.
4281  *
4282  **********************************************************************/
4283 
4284 static void
4285 em_initialize_receive_unit(struct adapter *adapter)
4286 {
4287 	struct rx_ring	*rxr = adapter->rx_rings;
4288 	struct ifnet	*ifp = adapter->ifp;
4289 	struct e1000_hw	*hw = &adapter->hw;
4290 	u64	bus_addr;
4291 	u32	rctl, rxcsum;
4292 
4293 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4294 
4295 	/*
4296 	 * Make sure receives are disabled while setting
4297 	 * up the descriptor ring
4298 	 */
4299 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4300 	/* Do not disable if ever enabled on this hardware */
4301 	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4302 		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4303 
4304 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4305 	    adapter->rx_abs_int_delay.value);
4306 	/*
4307 	 * Set the interrupt throttling rate. Value is calculated
4308 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4309 	 */
4310 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4311 
4312 	/*
4313 	** When using MSIX interrupts we need to throttle
4314 	** using the EITR register (82574 only)
4315 	*/
4316 	if (hw->mac.type == e1000_82574) {
4317 		for (int i = 0; i < 4; i++)
4318 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4319 			    DEFAULT_ITR);
4320 		/* Disable accelerated acknowledge */
4321 		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4322 	}
4323 
4324 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4325 	if (ifp->if_capenable & IFCAP_RXCSUM)
4326 		rxcsum |= E1000_RXCSUM_TUOFL;
4327 	else
4328 		rxcsum &= ~E1000_RXCSUM_TUOFL;
4329 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4330 
4331 	/*
4332 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4333 	** long latencies are observed, like Lenovo X60. This
4334 	** change eliminates the problem, but since having positive
4335 	** values in RDTR is a known source of problems on other
4336 	** platforms another solution is being sought.
4337 	*/
4338 	if (hw->mac.type == e1000_82573)
4339 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4340 
4341 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4342 		/* Setup the Base and Length of the Rx Descriptor Ring */
4343 		u32 rdt = adapter->num_rx_desc - 1; /* default */
4344 
4345 		bus_addr = rxr->rxdma.dma_paddr;
4346 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4347 		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4348 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4349 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4350 		/* Setup the Head and Tail Descriptor Pointers */
4351 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4352 #ifdef DEV_NETMAP
4353 		/*
4354 		 * an init() while a netmap client is active must
4355 		 * preserve the rx buffers passed to userspace.
4356 		 */
4357 		if (ifp->if_capenable & IFCAP_NETMAP)
4358 			rdt -= NA(adapter->ifp)->rx_rings[i].nr_hwavail;
4359 #endif /* DEV_NETMAP */
4360 		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4361 	}
4362 
4363 	/* Set PTHRESH for improved jumbo performance */
4364 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4365 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4366 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4367 	    (ifp->if_mtu > ETHERMTU)) {
4368 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4369 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4370 	}
4371 
4372 	if (adapter->hw.mac.type >= e1000_pch2lan) {
4373 		if (ifp->if_mtu > ETHERMTU)
4374 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4375 		else
4376 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4377 	}
4378 
4379 	/* Setup the Receive Control Register */
4380 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4381 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4382 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4383 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4384 
4385         /* Strip the CRC */
4386         rctl |= E1000_RCTL_SECRC;
4387 
4388         /* Make sure VLAN Filters are off */
4389         rctl &= ~E1000_RCTL_VFE;
4390 	rctl &= ~E1000_RCTL_SBP;
4391 
4392 	if (adapter->rx_mbuf_sz == MCLBYTES)
4393 		rctl |= E1000_RCTL_SZ_2048;
4394 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4395 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4396 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4397 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4398 
4399 	if (ifp->if_mtu > ETHERMTU)
4400 		rctl |= E1000_RCTL_LPE;
4401 	else
4402 		rctl &= ~E1000_RCTL_LPE;
4403 
4404 	/* Write out the settings */
4405 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4406 
4407 	return;
4408 }
4409 
4410 
4411 /*********************************************************************
4412  *
4413  *  This routine executes in interrupt context. It replenishes
4414  *  the mbufs in the descriptor and sends data which has been
4415  *  dma'ed into host memory to upper layer.
4416  *
4417  *  We loop at most count times if count is > 0, or until done if
4418  *  count < 0.
4419  *
4420  *  For polling we also now return the number of cleaned packets
4421  *********************************************************************/
4422 static bool
4423 em_rxeof(struct rx_ring *rxr, int count, int *done)
4424 {
4425 	struct adapter		*adapter = rxr->adapter;
4426 	struct ifnet		*ifp = adapter->ifp;
4427 	struct mbuf		*mp, *sendmp;
4428 	u8			status = 0;
4429 	u16 			len;
4430 	int			i, processed, rxdone = 0;
4431 	bool			eop;
4432 	struct e1000_rx_desc	*cur;
4433 
4434 	EM_RX_LOCK(rxr);
4435 
4436 #ifdef DEV_NETMAP
4437 	if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
4438 		return (FALSE);
4439 #endif /* DEV_NETMAP */
4440 
4441 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4442 
4443 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4444 			break;
4445 
4446 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4447 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4448 
4449 		cur = &rxr->rx_base[i];
4450 		status = cur->status;
4451 		mp = sendmp = NULL;
4452 
4453 		if ((status & E1000_RXD_STAT_DD) == 0)
4454 			break;
4455 
4456 		len = le16toh(cur->length);
4457 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4458 
4459 		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4460 		    (rxr->discard == TRUE)) {
4461 			adapter->dropped_pkts++;
4462 			++rxr->rx_discarded;
4463 			if (!eop) /* Catch subsequent segs */
4464 				rxr->discard = TRUE;
4465 			else
4466 				rxr->discard = FALSE;
4467 			em_rx_discard(rxr, i);
4468 			goto next_desc;
4469 		}
4470 
4471 		/* Assign correct length to the current fragment */
4472 		mp = rxr->rx_buffers[i].m_head;
4473 		mp->m_len = len;
4474 
4475 		/* Trigger for refresh */
4476 		rxr->rx_buffers[i].m_head = NULL;
4477 
4478 		/* First segment? */
4479 		if (rxr->fmp == NULL) {
4480 			mp->m_pkthdr.len = len;
4481 			rxr->fmp = rxr->lmp = mp;
4482 		} else {
4483 			/* Chain mbuf's together */
4484 			mp->m_flags &= ~M_PKTHDR;
4485 			rxr->lmp->m_next = mp;
4486 			rxr->lmp = mp;
4487 			rxr->fmp->m_pkthdr.len += len;
4488 		}
4489 
4490 		if (eop) {
4491 			--count;
4492 			sendmp = rxr->fmp;
4493 			sendmp->m_pkthdr.rcvif = ifp;
4494 			ifp->if_ipackets++;
4495 			em_receive_checksum(cur, sendmp);
4496 #ifndef __NO_STRICT_ALIGNMENT
4497 			if (adapter->hw.mac.max_frame_size >
4498 			    (MCLBYTES - ETHER_ALIGN) &&
4499 			    em_fixup_rx(rxr) != 0)
4500 				goto skip;
4501 #endif
4502 			if (status & E1000_RXD_STAT_VP) {
4503 				sendmp->m_pkthdr.ether_vtag =
4504 				    le16toh(cur->special);
4505 				sendmp->m_flags |= M_VLANTAG;
4506 			}
4507 #ifndef __NO_STRICT_ALIGNMENT
4508 skip:
4509 #endif
4510 			rxr->fmp = rxr->lmp = NULL;
4511 		}
4512 next_desc:
4513 		/* Zero out the receive descriptors status. */
4514 		cur->status = 0;
4515 		++rxdone;	/* cumulative for POLL */
4516 		++processed;
4517 
4518 		/* Advance our pointers to the next descriptor. */
4519 		if (++i == adapter->num_rx_desc)
4520 			i = 0;
4521 
4522 		/* Send to the stack */
4523 		if (sendmp != NULL) {
4524 			rxr->next_to_check = i;
4525 			EM_RX_UNLOCK(rxr);
4526 			(*ifp->if_input)(ifp, sendmp);
4527 			EM_RX_LOCK(rxr);
4528 			i = rxr->next_to_check;
4529 		}
4530 
4531 		/* Only refresh mbufs every 8 descriptors */
4532 		if (processed == 8) {
4533 			em_refresh_mbufs(rxr, i);
4534 			processed = 0;
4535 		}
4536 	}
4537 
4538 	/* Catch any remaining refresh work */
4539 	if (e1000_rx_unrefreshed(rxr))
4540 		em_refresh_mbufs(rxr, i);
4541 
4542 	rxr->next_to_check = i;
4543 	if (done != NULL)
4544 		*done = rxdone;
4545 	EM_RX_UNLOCK(rxr);
4546 
4547 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4548 }
4549 
4550 static __inline void
4551 em_rx_discard(struct rx_ring *rxr, int i)
4552 {
4553 	struct em_buffer	*rbuf;
4554 
4555 	rbuf = &rxr->rx_buffers[i];
4556 	/* Free any previous pieces */
4557 	if (rxr->fmp != NULL) {
4558 		rxr->fmp->m_flags |= M_PKTHDR;
4559 		m_freem(rxr->fmp);
4560 		rxr->fmp = NULL;
4561 		rxr->lmp = NULL;
4562 	}
4563 	/*
4564 	** Free buffer and allow em_refresh_mbufs()
4565 	** to clean up and recharge buffer.
4566 	*/
4567 	if (rbuf->m_head) {
4568 		m_free(rbuf->m_head);
4569 		rbuf->m_head = NULL;
4570 	}
4571 	return;
4572 }
4573 
4574 #ifndef __NO_STRICT_ALIGNMENT
4575 /*
4576  * When jumbo frames are enabled we should realign entire payload on
4577  * architecures with strict alignment. This is serious design mistake of 8254x
4578  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4579  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4580  * payload. On architecures without strict alignment restrictions 8254x still
4581  * performs unaligned memory access which would reduce the performance too.
4582  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4583  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4584  * existing mbuf chain.
4585  *
4586  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4587  * not used at all on architectures with strict alignment.
4588  */
4589 static int
4590 em_fixup_rx(struct rx_ring *rxr)
4591 {
4592 	struct adapter *adapter = rxr->adapter;
4593 	struct mbuf *m, *n;
4594 	int error;
4595 
4596 	error = 0;
4597 	m = rxr->fmp;
4598 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4599 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4600 		m->m_data += ETHER_HDR_LEN;
4601 	} else {
4602 		MGETHDR(n, M_NOWAIT, MT_DATA);
4603 		if (n != NULL) {
4604 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4605 			m->m_data += ETHER_HDR_LEN;
4606 			m->m_len -= ETHER_HDR_LEN;
4607 			n->m_len = ETHER_HDR_LEN;
4608 			M_MOVE_PKTHDR(n, m);
4609 			n->m_next = m;
4610 			rxr->fmp = n;
4611 		} else {
4612 			adapter->dropped_pkts++;
4613 			m_freem(rxr->fmp);
4614 			rxr->fmp = NULL;
4615 			error = ENOMEM;
4616 		}
4617 	}
4618 
4619 	return (error);
4620 }
4621 #endif
4622 
4623 /*********************************************************************
4624  *
4625  *  Verify that the hardware indicated that the checksum is valid.
4626  *  Inform the stack about the status of checksum so that stack
4627  *  doesn't spend time verifying the checksum.
4628  *
4629  *********************************************************************/
4630 static void
4631 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4632 {
4633 	mp->m_pkthdr.csum_flags = 0;
4634 
4635 	/* Ignore Checksum bit is set */
4636 	if (rx_desc->status & E1000_RXD_STAT_IXSM)
4637 		return;
4638 
4639 	if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
4640 		return;
4641 
4642 	/* IP Checksum Good? */
4643 	if (rx_desc->status & E1000_RXD_STAT_IPCS)
4644 		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4645 
4646 	/* TCP or UDP checksum */
4647 	if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4648 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4649 		mp->m_pkthdr.csum_data = htons(0xffff);
4650 	}
4651 }
4652 
4653 /*
4654  * This routine is run via an vlan
4655  * config EVENT
4656  */
4657 static void
4658 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4659 {
4660 	struct adapter	*adapter = ifp->if_softc;
4661 	u32		index, bit;
4662 
4663 	if (ifp->if_softc !=  arg)   /* Not our event */
4664 		return;
4665 
4666 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4667                 return;
4668 
4669 	EM_CORE_LOCK(adapter);
4670 	index = (vtag >> 5) & 0x7F;
4671 	bit = vtag & 0x1F;
4672 	adapter->shadow_vfta[index] |= (1 << bit);
4673 	++adapter->num_vlans;
4674 	/* Re-init to load the changes */
4675 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4676 		em_init_locked(adapter);
4677 	EM_CORE_UNLOCK(adapter);
4678 }
4679 
4680 /*
4681  * This routine is run via an vlan
4682  * unconfig EVENT
4683  */
4684 static void
4685 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4686 {
4687 	struct adapter	*adapter = ifp->if_softc;
4688 	u32		index, bit;
4689 
4690 	if (ifp->if_softc !=  arg)
4691 		return;
4692 
4693 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4694                 return;
4695 
4696 	EM_CORE_LOCK(adapter);
4697 	index = (vtag >> 5) & 0x7F;
4698 	bit = vtag & 0x1F;
4699 	adapter->shadow_vfta[index] &= ~(1 << bit);
4700 	--adapter->num_vlans;
4701 	/* Re-init to load the changes */
4702 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4703 		em_init_locked(adapter);
4704 	EM_CORE_UNLOCK(adapter);
4705 }
4706 
4707 static void
4708 em_setup_vlan_hw_support(struct adapter *adapter)
4709 {
4710 	struct e1000_hw *hw = &adapter->hw;
4711 	u32             reg;
4712 
4713 	/*
4714 	** We get here thru init_locked, meaning
4715 	** a soft reset, this has already cleared
4716 	** the VFTA and other state, so if there
4717 	** have been no vlan's registered do nothing.
4718 	*/
4719 	if (adapter->num_vlans == 0)
4720                 return;
4721 
4722 	/*
4723 	** A soft reset zero's out the VFTA, so
4724 	** we need to repopulate it now.
4725 	*/
4726 	for (int i = 0; i < EM_VFTA_SIZE; i++)
4727                 if (adapter->shadow_vfta[i] != 0)
4728 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4729                             i, adapter->shadow_vfta[i]);
4730 
4731 	reg = E1000_READ_REG(hw, E1000_CTRL);
4732 	reg |= E1000_CTRL_VME;
4733 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4734 
4735 	/* Enable the Filter Table */
4736 	reg = E1000_READ_REG(hw, E1000_RCTL);
4737 	reg &= ~E1000_RCTL_CFIEN;
4738 	reg |= E1000_RCTL_VFE;
4739 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4740 }
4741 
4742 static void
4743 em_enable_intr(struct adapter *adapter)
4744 {
4745 	struct e1000_hw *hw = &adapter->hw;
4746 	u32 ims_mask = IMS_ENABLE_MASK;
4747 
4748 	if (hw->mac.type == e1000_82574) {
4749 		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4750 		ims_mask |= EM_MSIX_MASK;
4751 	}
4752 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4753 }
4754 
4755 static void
4756 em_disable_intr(struct adapter *adapter)
4757 {
4758 	struct e1000_hw *hw = &adapter->hw;
4759 
4760 	if (hw->mac.type == e1000_82574)
4761 		E1000_WRITE_REG(hw, EM_EIAC, 0);
4762 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4763 }
4764 
4765 /*
4766  * Bit of a misnomer, what this really means is
4767  * to enable OS management of the system... aka
4768  * to disable special hardware management features
4769  */
4770 static void
4771 em_init_manageability(struct adapter *adapter)
4772 {
4773 	/* A shared code workaround */
4774 #define E1000_82542_MANC2H E1000_MANC2H
4775 	if (adapter->has_manage) {
4776 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4777 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4778 
4779 		/* disable hardware interception of ARP */
4780 		manc &= ~(E1000_MANC_ARP_EN);
4781 
4782                 /* enable receiving management packets to the host */
4783 		manc |= E1000_MANC_EN_MNG2HOST;
4784 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4785 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4786 		manc2h |= E1000_MNG2HOST_PORT_623;
4787 		manc2h |= E1000_MNG2HOST_PORT_664;
4788 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4789 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4790 	}
4791 }
4792 
4793 /*
4794  * Give control back to hardware management
4795  * controller if there is one.
4796  */
4797 static void
4798 em_release_manageability(struct adapter *adapter)
4799 {
4800 	if (adapter->has_manage) {
4801 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4802 
4803 		/* re-enable hardware interception of ARP */
4804 		manc |= E1000_MANC_ARP_EN;
4805 		manc &= ~E1000_MANC_EN_MNG2HOST;
4806 
4807 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4808 	}
4809 }
4810 
4811 /*
4812  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4813  * For ASF and Pass Through versions of f/w this means
4814  * that the driver is loaded. For AMT version type f/w
4815  * this means that the network i/f is open.
4816  */
4817 static void
4818 em_get_hw_control(struct adapter *adapter)
4819 {
4820 	u32 ctrl_ext, swsm;
4821 
4822 	if (adapter->hw.mac.type == e1000_82573) {
4823 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4824 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4825 		    swsm | E1000_SWSM_DRV_LOAD);
4826 		return;
4827 	}
4828 	/* else */
4829 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4830 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4831 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4832 	return;
4833 }
4834 
4835 /*
4836  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4837  * For ASF and Pass Through versions of f/w this means that
4838  * the driver is no longer loaded. For AMT versions of the
4839  * f/w this means that the network i/f is closed.
4840  */
4841 static void
4842 em_release_hw_control(struct adapter *adapter)
4843 {
4844 	u32 ctrl_ext, swsm;
4845 
4846 	if (!adapter->has_manage)
4847 		return;
4848 
4849 	if (adapter->hw.mac.type == e1000_82573) {
4850 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4851 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4852 		    swsm & ~E1000_SWSM_DRV_LOAD);
4853 		return;
4854 	}
4855 	/* else */
4856 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4857 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4858 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4859 	return;
4860 }
4861 
4862 static int
4863 em_is_valid_ether_addr(u8 *addr)
4864 {
4865 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4866 
4867 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4868 		return (FALSE);
4869 	}
4870 
4871 	return (TRUE);
4872 }
4873 
4874 /*
4875 ** Parse the interface capabilities with regard
4876 ** to both system management and wake-on-lan for
4877 ** later use.
4878 */
4879 static void
4880 em_get_wakeup(device_t dev)
4881 {
4882 	struct adapter	*adapter = device_get_softc(dev);
4883 	u16		eeprom_data = 0, device_id, apme_mask;
4884 
4885 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4886 	apme_mask = EM_EEPROM_APME;
4887 
4888 	switch (adapter->hw.mac.type) {
4889 	case e1000_82573:
4890 	case e1000_82583:
4891 		adapter->has_amt = TRUE;
4892 		/* Falls thru */
4893 	case e1000_82571:
4894 	case e1000_82572:
4895 	case e1000_80003es2lan:
4896 		if (adapter->hw.bus.func == 1) {
4897 			e1000_read_nvm(&adapter->hw,
4898 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4899 			break;
4900 		} else
4901 			e1000_read_nvm(&adapter->hw,
4902 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4903 		break;
4904 	case e1000_ich8lan:
4905 	case e1000_ich9lan:
4906 	case e1000_ich10lan:
4907 	case e1000_pchlan:
4908 	case e1000_pch2lan:
4909 		apme_mask = E1000_WUC_APME;
4910 		adapter->has_amt = TRUE;
4911 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4912 		break;
4913 	default:
4914 		e1000_read_nvm(&adapter->hw,
4915 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4916 		break;
4917 	}
4918 	if (eeprom_data & apme_mask)
4919 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4920 	/*
4921          * We have the eeprom settings, now apply the special cases
4922          * where the eeprom may be wrong or the board won't support
4923          * wake on lan on a particular port
4924 	 */
4925 	device_id = pci_get_device(dev);
4926         switch (device_id) {
4927 	case E1000_DEV_ID_82571EB_FIBER:
4928 		/* Wake events only supported on port A for dual fiber
4929 		 * regardless of eeprom setting */
4930 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4931 		    E1000_STATUS_FUNC_1)
4932 			adapter->wol = 0;
4933 		break;
4934 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4935 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4936 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4937                 /* if quad port adapter, disable WoL on all but port A */
4938 		if (global_quad_port_a != 0)
4939 			adapter->wol = 0;
4940 		/* Reset for multiple quad port adapters */
4941 		if (++global_quad_port_a == 4)
4942 			global_quad_port_a = 0;
4943                 break;
4944 	}
4945 	return;
4946 }
4947 
4948 
4949 /*
4950  * Enable PCI Wake On Lan capability
4951  */
4952 static void
4953 em_enable_wakeup(device_t dev)
4954 {
4955 	struct adapter	*adapter = device_get_softc(dev);
4956 	struct ifnet	*ifp = adapter->ifp;
4957 	u32		pmc, ctrl, ctrl_ext, rctl;
4958 	u16     	status;
4959 
4960 	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4961 		return;
4962 
4963 	/* Advertise the wakeup capability */
4964 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4965 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4966 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4967 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4968 
4969 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4970 	    (adapter->hw.mac.type == e1000_pchlan) ||
4971 	    (adapter->hw.mac.type == e1000_ich9lan) ||
4972 	    (adapter->hw.mac.type == e1000_ich10lan))
4973 		e1000_suspend_workarounds_ich8lan(&adapter->hw);
4974 
4975 	/* Keep the laser running on Fiber adapters */
4976 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4977 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4978 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4979 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4980 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4981 	}
4982 
4983 	/*
4984 	** Determine type of Wakeup: note that wol
4985 	** is set with all bits on by default.
4986 	*/
4987 	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4988 		adapter->wol &= ~E1000_WUFC_MAG;
4989 
4990 	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4991 		adapter->wol &= ~E1000_WUFC_MC;
4992 	else {
4993 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4994 		rctl |= E1000_RCTL_MPE;
4995 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4996 	}
4997 
4998 	if ((adapter->hw.mac.type == e1000_pchlan) ||
4999 	    (adapter->hw.mac.type == e1000_pch2lan)) {
5000 		if (em_enable_phy_wakeup(adapter))
5001 			return;
5002 	} else {
5003 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5004 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5005 	}
5006 
5007 	if (adapter->hw.phy.type == e1000_phy_igp_3)
5008 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5009 
5010         /* Request PME */
5011         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5012 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5013 	if (ifp->if_capenable & IFCAP_WOL)
5014 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5015         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5016 
5017 	return;
5018 }
5019 
5020 /*
5021 ** WOL in the newer chipset interfaces (pchlan)
5022 ** require thing to be copied into the phy
5023 */
5024 static int
5025 em_enable_phy_wakeup(struct adapter *adapter)
5026 {
5027 	struct e1000_hw *hw = &adapter->hw;
5028 	u32 mreg, ret = 0;
5029 	u16 preg;
5030 
5031 	/* copy MAC RARs to PHY RARs */
5032 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5033 
5034 	/* copy MAC MTA to PHY MTA */
5035 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5036 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5037 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5038 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5039 		    (u16)((mreg >> 16) & 0xFFFF));
5040 	}
5041 
5042 	/* configure PHY Rx Control register */
5043 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5044 	mreg = E1000_READ_REG(hw, E1000_RCTL);
5045 	if (mreg & E1000_RCTL_UPE)
5046 		preg |= BM_RCTL_UPE;
5047 	if (mreg & E1000_RCTL_MPE)
5048 		preg |= BM_RCTL_MPE;
5049 	preg &= ~(BM_RCTL_MO_MASK);
5050 	if (mreg & E1000_RCTL_MO_3)
5051 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5052 				<< BM_RCTL_MO_SHIFT);
5053 	if (mreg & E1000_RCTL_BAM)
5054 		preg |= BM_RCTL_BAM;
5055 	if (mreg & E1000_RCTL_PMCF)
5056 		preg |= BM_RCTL_PMCF;
5057 	mreg = E1000_READ_REG(hw, E1000_CTRL);
5058 	if (mreg & E1000_CTRL_RFCE)
5059 		preg |= BM_RCTL_RFCE;
5060 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5061 
5062 	/* enable PHY wakeup in MAC register */
5063 	E1000_WRITE_REG(hw, E1000_WUC,
5064 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5065 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5066 
5067 	/* configure and enable PHY wakeup in PHY registers */
5068 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5069 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5070 
5071 	/* activate PHY wakeup */
5072 	ret = hw->phy.ops.acquire(hw);
5073 	if (ret) {
5074 		printf("Could not acquire PHY\n");
5075 		return ret;
5076 	}
5077 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5078 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5079 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5080 	if (ret) {
5081 		printf("Could not read PHY page 769\n");
5082 		goto out;
5083 	}
5084 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5085 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5086 	if (ret)
5087 		printf("Could not set PHY Host Wakeup bit\n");
5088 out:
5089 	hw->phy.ops.release(hw);
5090 
5091 	return ret;
5092 }
5093 
5094 static void
5095 em_led_func(void *arg, int onoff)
5096 {
5097 	struct adapter	*adapter = arg;
5098 
5099 	EM_CORE_LOCK(adapter);
5100 	if (onoff) {
5101 		e1000_setup_led(&adapter->hw);
5102 		e1000_led_on(&adapter->hw);
5103 	} else {
5104 		e1000_led_off(&adapter->hw);
5105 		e1000_cleanup_led(&adapter->hw);
5106 	}
5107 	EM_CORE_UNLOCK(adapter);
5108 }
5109 
5110 /*
5111 ** Disable the L0S and L1 LINK states
5112 */
5113 static void
5114 em_disable_aspm(struct adapter *adapter)
5115 {
5116 	int		base, reg;
5117 	u16		link_cap,link_ctrl;
5118 	device_t	dev = adapter->dev;
5119 
5120 	switch (adapter->hw.mac.type) {
5121 		case e1000_82573:
5122 		case e1000_82574:
5123 		case e1000_82583:
5124 			break;
5125 		default:
5126 			return;
5127 	}
5128 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5129 		return;
5130 	reg = base + PCIER_LINK_CAP;
5131 	link_cap = pci_read_config(dev, reg, 2);
5132 	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5133 		return;
5134 	reg = base + PCIER_LINK_CTL;
5135 	link_ctrl = pci_read_config(dev, reg, 2);
5136 	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5137 	pci_write_config(dev, reg, link_ctrl, 2);
5138 	return;
5139 }
5140 
5141 /**********************************************************************
5142  *
5143  *  Update the board statistics counters.
5144  *
5145  **********************************************************************/
5146 static void
5147 em_update_stats_counters(struct adapter *adapter)
5148 {
5149 	struct ifnet   *ifp;
5150 
5151 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5152 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5153 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5154 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5155 	}
5156 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5157 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5158 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5159 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5160 
5161 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5162 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5163 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5164 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5165 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5166 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5167 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5168 	/*
5169 	** For watchdog management we need to know if we have been
5170 	** paused during the last interval, so capture that here.
5171 	*/
5172 	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5173 	adapter->stats.xoffrxc += adapter->pause_frames;
5174 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5175 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5176 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5177 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5178 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5179 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5180 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5181 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5182 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5183 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5184 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5185 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5186 
5187 	/* For the 64-bit byte counters the low dword must be read first. */
5188 	/* Both registers clear on the read of the high dword */
5189 
5190 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5191 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5192 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5193 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5194 
5195 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5196 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5197 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5198 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5199 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5200 
5201 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5202 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5203 
5204 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5205 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5206 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5207 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5208 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5209 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5210 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5211 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5212 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5213 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5214 
5215 	/* Interrupt Counts */
5216 
5217 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5218 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5219 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5220 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5221 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5222 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5223 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5224 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5225 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5226 
5227 	if (adapter->hw.mac.type >= e1000_82543) {
5228 		adapter->stats.algnerrc +=
5229 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5230 		adapter->stats.rxerrc +=
5231 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5232 		adapter->stats.tncrs +=
5233 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5234 		adapter->stats.cexterr +=
5235 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5236 		adapter->stats.tsctc +=
5237 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5238 		adapter->stats.tsctfc +=
5239 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5240 	}
5241 	ifp = adapter->ifp;
5242 
5243 	ifp->if_collisions = adapter->stats.colc;
5244 
5245 	/* Rx Errors */
5246 	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5247 	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5248 	    adapter->stats.ruc + adapter->stats.roc +
5249 	    adapter->stats.mpc + adapter->stats.cexterr;
5250 
5251 	/* Tx Errors */
5252 	ifp->if_oerrors = adapter->stats.ecol +
5253 	    adapter->stats.latecol + adapter->watchdog_events;
5254 }
5255 
5256 /* Export a single 32-bit register via a read-only sysctl. */
5257 static int
5258 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5259 {
5260 	struct adapter *adapter;
5261 	u_int val;
5262 
5263 	adapter = oidp->oid_arg1;
5264 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5265 	return (sysctl_handle_int(oidp, &val, 0, req));
5266 }
5267 
5268 /*
5269  * Add sysctl variables, one per statistic, to the system.
5270  */
5271 static void
5272 em_add_hw_stats(struct adapter *adapter)
5273 {
5274 	device_t dev = adapter->dev;
5275 
5276 	struct tx_ring *txr = adapter->tx_rings;
5277 	struct rx_ring *rxr = adapter->rx_rings;
5278 
5279 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5280 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5281 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5282 	struct e1000_hw_stats *stats = &adapter->stats;
5283 
5284 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5285 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5286 
5287 #define QUEUE_NAME_LEN 32
5288 	char namebuf[QUEUE_NAME_LEN];
5289 
5290 	/* Driver Statistics */
5291 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5292 			CTLFLAG_RD, &adapter->link_irq,
5293 			"Link MSIX IRQ Handled");
5294 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5295 			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5296 			 "Std mbuf failed");
5297 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5298 			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5299 			 "Std mbuf cluster failed");
5300 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5301 			CTLFLAG_RD, &adapter->dropped_pkts,
5302 			"Driver dropped packets");
5303 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5304 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5305 			"Driver tx dma failure in xmit");
5306 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5307 			CTLFLAG_RD, &adapter->rx_overruns,
5308 			"RX overruns");
5309 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5310 			CTLFLAG_RD, &adapter->watchdog_events,
5311 			"Watchdog timeouts");
5312 
5313 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5314 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5315 			em_sysctl_reg_handler, "IU",
5316 			"Device Control Register");
5317 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5318 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5319 			em_sysctl_reg_handler, "IU",
5320 			"Receiver Control Register");
5321 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5322 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5323 			"Flow Control High Watermark");
5324 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5325 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5326 			"Flow Control Low Watermark");
5327 
5328 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5329 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5330 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5331 					    CTLFLAG_RD, NULL, "Queue Name");
5332 		queue_list = SYSCTL_CHILDREN(queue_node);
5333 
5334 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5335 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5336 				E1000_TDH(txr->me),
5337 				em_sysctl_reg_handler, "IU",
5338  				"Transmit Descriptor Head");
5339 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5340 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5341 				E1000_TDT(txr->me),
5342 				em_sysctl_reg_handler, "IU",
5343  				"Transmit Descriptor Tail");
5344 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5345 				CTLFLAG_RD, &txr->tx_irq,
5346 				"Queue MSI-X Transmit Interrupts");
5347 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5348 				CTLFLAG_RD, &txr->no_desc_avail,
5349 				"Queue No Descriptor Available");
5350 
5351 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5352 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5353 				E1000_RDH(rxr->me),
5354 				em_sysctl_reg_handler, "IU",
5355 				"Receive Descriptor Head");
5356 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5357 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5358 				E1000_RDT(rxr->me),
5359 				em_sysctl_reg_handler, "IU",
5360 				"Receive Descriptor Tail");
5361 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5362 				CTLFLAG_RD, &rxr->rx_irq,
5363 				"Queue MSI-X Receive Interrupts");
5364 	}
5365 
5366 	/* MAC stats get their own sub node */
5367 
5368 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5369 				    CTLFLAG_RD, NULL, "Statistics");
5370 	stat_list = SYSCTL_CHILDREN(stat_node);
5371 
5372 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5373 			CTLFLAG_RD, &stats->ecol,
5374 			"Excessive collisions");
5375 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5376 			CTLFLAG_RD, &stats->scc,
5377 			"Single collisions");
5378 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5379 			CTLFLAG_RD, &stats->mcc,
5380 			"Multiple collisions");
5381 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5382 			CTLFLAG_RD, &stats->latecol,
5383 			"Late collisions");
5384 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5385 			CTLFLAG_RD, &stats->colc,
5386 			"Collision Count");
5387 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5388 			CTLFLAG_RD, &adapter->stats.symerrs,
5389 			"Symbol Errors");
5390 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5391 			CTLFLAG_RD, &adapter->stats.sec,
5392 			"Sequence Errors");
5393 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5394 			CTLFLAG_RD, &adapter->stats.dc,
5395 			"Defer Count");
5396 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5397 			CTLFLAG_RD, &adapter->stats.mpc,
5398 			"Missed Packets");
5399 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5400 			CTLFLAG_RD, &adapter->stats.rnbc,
5401 			"Receive No Buffers");
5402 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5403 			CTLFLAG_RD, &adapter->stats.ruc,
5404 			"Receive Undersize");
5405 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5406 			CTLFLAG_RD, &adapter->stats.rfc,
5407 			"Fragmented Packets Received ");
5408 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5409 			CTLFLAG_RD, &adapter->stats.roc,
5410 			"Oversized Packets Received");
5411 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5412 			CTLFLAG_RD, &adapter->stats.rjc,
5413 			"Recevied Jabber");
5414 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5415 			CTLFLAG_RD, &adapter->stats.rxerrc,
5416 			"Receive Errors");
5417 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5418 			CTLFLAG_RD, &adapter->stats.crcerrs,
5419 			"CRC errors");
5420 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5421 			CTLFLAG_RD, &adapter->stats.algnerrc,
5422 			"Alignment Errors");
5423 	/* On 82575 these are collision counts */
5424 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5425 			CTLFLAG_RD, &adapter->stats.cexterr,
5426 			"Collision/Carrier extension errors");
5427 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5428 			CTLFLAG_RD, &adapter->stats.xonrxc,
5429 			"XON Received");
5430 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5431 			CTLFLAG_RD, &adapter->stats.xontxc,
5432 			"XON Transmitted");
5433 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5434 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5435 			"XOFF Received");
5436 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5437 			CTLFLAG_RD, &adapter->stats.xofftxc,
5438 			"XOFF Transmitted");
5439 
5440 	/* Packet Reception Stats */
5441 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5442 			CTLFLAG_RD, &adapter->stats.tpr,
5443 			"Total Packets Received ");
5444 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5445 			CTLFLAG_RD, &adapter->stats.gprc,
5446 			"Good Packets Received");
5447 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5448 			CTLFLAG_RD, &adapter->stats.bprc,
5449 			"Broadcast Packets Received");
5450 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5451 			CTLFLAG_RD, &adapter->stats.mprc,
5452 			"Multicast Packets Received");
5453 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5454 			CTLFLAG_RD, &adapter->stats.prc64,
5455 			"64 byte frames received ");
5456 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5457 			CTLFLAG_RD, &adapter->stats.prc127,
5458 			"65-127 byte frames received");
5459 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5460 			CTLFLAG_RD, &adapter->stats.prc255,
5461 			"128-255 byte frames received");
5462 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5463 			CTLFLAG_RD, &adapter->stats.prc511,
5464 			"256-511 byte frames received");
5465 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5466 			CTLFLAG_RD, &adapter->stats.prc1023,
5467 			"512-1023 byte frames received");
5468 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5469 			CTLFLAG_RD, &adapter->stats.prc1522,
5470 			"1023-1522 byte frames received");
5471  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5472  			CTLFLAG_RD, &adapter->stats.gorc,
5473  			"Good Octets Received");
5474 
5475 	/* Packet Transmission Stats */
5476  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5477  			CTLFLAG_RD, &adapter->stats.gotc,
5478  			"Good Octets Transmitted");
5479 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5480 			CTLFLAG_RD, &adapter->stats.tpt,
5481 			"Total Packets Transmitted");
5482 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5483 			CTLFLAG_RD, &adapter->stats.gptc,
5484 			"Good Packets Transmitted");
5485 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5486 			CTLFLAG_RD, &adapter->stats.bptc,
5487 			"Broadcast Packets Transmitted");
5488 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5489 			CTLFLAG_RD, &adapter->stats.mptc,
5490 			"Multicast Packets Transmitted");
5491 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5492 			CTLFLAG_RD, &adapter->stats.ptc64,
5493 			"64 byte frames transmitted ");
5494 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5495 			CTLFLAG_RD, &adapter->stats.ptc127,
5496 			"65-127 byte frames transmitted");
5497 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5498 			CTLFLAG_RD, &adapter->stats.ptc255,
5499 			"128-255 byte frames transmitted");
5500 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5501 			CTLFLAG_RD, &adapter->stats.ptc511,
5502 			"256-511 byte frames transmitted");
5503 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5504 			CTLFLAG_RD, &adapter->stats.ptc1023,
5505 			"512-1023 byte frames transmitted");
5506 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5507 			CTLFLAG_RD, &adapter->stats.ptc1522,
5508 			"1024-1522 byte frames transmitted");
5509 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5510 			CTLFLAG_RD, &adapter->stats.tsctc,
5511 			"TSO Contexts Transmitted");
5512 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5513 			CTLFLAG_RD, &adapter->stats.tsctfc,
5514 			"TSO Contexts Failed");
5515 
5516 
5517 	/* Interrupt Stats */
5518 
5519 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5520 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5521 	int_list = SYSCTL_CHILDREN(int_node);
5522 
5523 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5524 			CTLFLAG_RD, &adapter->stats.iac,
5525 			"Interrupt Assertion Count");
5526 
5527 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5528 			CTLFLAG_RD, &adapter->stats.icrxptc,
5529 			"Interrupt Cause Rx Pkt Timer Expire Count");
5530 
5531 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5532 			CTLFLAG_RD, &adapter->stats.icrxatc,
5533 			"Interrupt Cause Rx Abs Timer Expire Count");
5534 
5535 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5536 			CTLFLAG_RD, &adapter->stats.ictxptc,
5537 			"Interrupt Cause Tx Pkt Timer Expire Count");
5538 
5539 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5540 			CTLFLAG_RD, &adapter->stats.ictxatc,
5541 			"Interrupt Cause Tx Abs Timer Expire Count");
5542 
5543 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5544 			CTLFLAG_RD, &adapter->stats.ictxqec,
5545 			"Interrupt Cause Tx Queue Empty Count");
5546 
5547 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5548 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5549 			"Interrupt Cause Tx Queue Min Thresh Count");
5550 
5551 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5552 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5553 			"Interrupt Cause Rx Desc Min Thresh Count");
5554 
5555 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5556 			CTLFLAG_RD, &adapter->stats.icrxoc,
5557 			"Interrupt Cause Receiver Overrun Count");
5558 }
5559 
5560 /**********************************************************************
5561  *
5562  *  This routine provides a way to dump out the adapter eeprom,
5563  *  often a useful debug/service tool. This only dumps the first
5564  *  32 words, stuff that matters is in that extent.
5565  *
5566  **********************************************************************/
5567 static int
5568 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5569 {
5570 	struct adapter *adapter = (struct adapter *)arg1;
5571 	int error;
5572 	int result;
5573 
5574 	result = -1;
5575 	error = sysctl_handle_int(oidp, &result, 0, req);
5576 
5577 	if (error || !req->newptr)
5578 		return (error);
5579 
5580 	/*
5581 	 * This value will cause a hex dump of the
5582 	 * first 32 16-bit words of the EEPROM to
5583 	 * the screen.
5584 	 */
5585 	if (result == 1)
5586 		em_print_nvm_info(adapter);
5587 
5588 	return (error);
5589 }
5590 
5591 static void
5592 em_print_nvm_info(struct adapter *adapter)
5593 {
5594 	u16	eeprom_data;
5595 	int	i, j, row = 0;
5596 
5597 	/* Its a bit crude, but it gets the job done */
5598 	printf("\nInterface EEPROM Dump:\n");
5599 	printf("Offset\n0x0000  ");
5600 	for (i = 0, j = 0; i < 32; i++, j++) {
5601 		if (j == 8) { /* Make the offset block */
5602 			j = 0; ++row;
5603 			printf("\n0x00%x0  ",row);
5604 		}
5605 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5606 		printf("%04x ", eeprom_data);
5607 	}
5608 	printf("\n");
5609 }
5610 
5611 static int
5612 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5613 {
5614 	struct em_int_delay_info *info;
5615 	struct adapter *adapter;
5616 	u32 regval;
5617 	int error, usecs, ticks;
5618 
5619 	info = (struct em_int_delay_info *)arg1;
5620 	usecs = info->value;
5621 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5622 	if (error != 0 || req->newptr == NULL)
5623 		return (error);
5624 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5625 		return (EINVAL);
5626 	info->value = usecs;
5627 	ticks = EM_USECS_TO_TICKS(usecs);
5628 	if (info->offset == E1000_ITR)	/* units are 256ns here */
5629 		ticks *= 4;
5630 
5631 	adapter = info->adapter;
5632 
5633 	EM_CORE_LOCK(adapter);
5634 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5635 	regval = (regval & ~0xffff) | (ticks & 0xffff);
5636 	/* Handle a few special cases. */
5637 	switch (info->offset) {
5638 	case E1000_RDTR:
5639 		break;
5640 	case E1000_TIDV:
5641 		if (ticks == 0) {
5642 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5643 			/* Don't write 0 into the TIDV register. */
5644 			regval++;
5645 		} else
5646 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5647 		break;
5648 	}
5649 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5650 	EM_CORE_UNLOCK(adapter);
5651 	return (0);
5652 }
5653 
5654 static void
5655 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5656 	const char *description, struct em_int_delay_info *info,
5657 	int offset, int value)
5658 {
5659 	info->adapter = adapter;
5660 	info->offset = offset;
5661 	info->value = value;
5662 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5663 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5664 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5665 	    info, 0, em_sysctl_int_delay, "I", description);
5666 }
5667 
5668 static void
5669 em_set_sysctl_value(struct adapter *adapter, const char *name,
5670 	const char *description, int *limit, int value)
5671 {
5672 	*limit = value;
5673 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5674 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5675 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5676 }
5677 
5678 
5679 /*
5680 ** Set flow control using sysctl:
5681 ** Flow control values:
5682 **      0 - off
5683 **      1 - rx pause
5684 **      2 - tx pause
5685 **      3 - full
5686 */
5687 static int
5688 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5689 {
5690         int		error;
5691 	static int	input = 3; /* default is full */
5692         struct adapter	*adapter = (struct adapter *) arg1;
5693 
5694         error = sysctl_handle_int(oidp, &input, 0, req);
5695 
5696         if ((error) || (req->newptr == NULL))
5697                 return (error);
5698 
5699 	if (input == adapter->fc) /* no change? */
5700 		return (error);
5701 
5702         switch (input) {
5703                 case e1000_fc_rx_pause:
5704                 case e1000_fc_tx_pause:
5705                 case e1000_fc_full:
5706                 case e1000_fc_none:
5707                         adapter->hw.fc.requested_mode = input;
5708 			adapter->fc = input;
5709                         break;
5710                 default:
5711 			/* Do nothing */
5712 			return (error);
5713         }
5714 
5715         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5716         e1000_force_mac_fc(&adapter->hw);
5717         return (error);
5718 }
5719 
5720 /*
5721 ** Manage Energy Efficient Ethernet:
5722 ** Control values:
5723 **     0/1 - enabled/disabled
5724 */
5725 static int
5726 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5727 {
5728        struct adapter *adapter = (struct adapter *) arg1;
5729        int             error, value;
5730 
5731        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5732        error = sysctl_handle_int(oidp, &value, 0, req);
5733        if (error || req->newptr == NULL)
5734                return (error);
5735        EM_CORE_LOCK(adapter);
5736        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5737        em_init_locked(adapter);
5738        EM_CORE_UNLOCK(adapter);
5739        return (0);
5740 }
5741 
5742 static int
5743 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5744 {
5745 	struct adapter *adapter;
5746 	int error;
5747 	int result;
5748 
5749 	result = -1;
5750 	error = sysctl_handle_int(oidp, &result, 0, req);
5751 
5752 	if (error || !req->newptr)
5753 		return (error);
5754 
5755 	if (result == 1) {
5756 		adapter = (struct adapter *)arg1;
5757 		em_print_debug_info(adapter);
5758         }
5759 
5760 	return (error);
5761 }
5762 
5763 /*
5764 ** This routine is meant to be fluid, add whatever is
5765 ** needed for debugging a problem.  -jfv
5766 */
5767 static void
5768 em_print_debug_info(struct adapter *adapter)
5769 {
5770 	device_t dev = adapter->dev;
5771 	struct tx_ring *txr = adapter->tx_rings;
5772 	struct rx_ring *rxr = adapter->rx_rings;
5773 
5774 	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5775 		printf("Interface is RUNNING ");
5776 	else
5777 		printf("Interface is NOT RUNNING\n");
5778 
5779 	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5780 		printf("and INACTIVE\n");
5781 	else
5782 		printf("and ACTIVE\n");
5783 
5784 	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5785 	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5786 	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5787 	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5788 	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5789 	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5790 	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5791 	device_printf(dev, "TX descriptors avail = %d\n",
5792 	    txr->tx_avail);
5793 	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5794 	    txr->no_desc_avail);
5795 	device_printf(dev, "RX discarded packets = %ld\n",
5796 	    rxr->rx_discarded);
5797 	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5798 	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5799 }
5800