xref: /freebsd/sys/dev/e1000/if_em.c (revision 3fc9e2c36555140de248a0b4def91bbfa44d7c2c)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2013, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #include "opt_inet.h"
36 #include "opt_inet6.h"
37 
38 #ifdef HAVE_KERNEL_OPTION_HEADERS
39 #include "opt_device_polling.h"
40 #endif
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #if __FreeBSD_version >= 800000
45 #include <sys/buf_ring.h>
46 #endif
47 #include <sys/bus.h>
48 #include <sys/endian.h>
49 #include <sys/kernel.h>
50 #include <sys/kthread.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/rman.h>
55 #include <sys/socket.h>
56 #include <sys/sockio.h>
57 #include <sys/sysctl.h>
58 #include <sys/taskqueue.h>
59 #include <sys/eventhandler.h>
60 #include <machine/bus.h>
61 #include <machine/resource.h>
62 
63 #include <net/bpf.h>
64 #include <net/ethernet.h>
65 #include <net/if.h>
66 #include <net/if_arp.h>
67 #include <net/if_dl.h>
68 #include <net/if_media.h>
69 
70 #include <net/if_types.h>
71 #include <net/if_vlan_var.h>
72 
73 #include <netinet/in_systm.h>
74 #include <netinet/in.h>
75 #include <netinet/if_ether.h>
76 #include <netinet/ip.h>
77 #include <netinet/ip6.h>
78 #include <netinet/tcp.h>
79 #include <netinet/udp.h>
80 
81 #include <machine/in_cksum.h>
82 #include <dev/led/led.h>
83 #include <dev/pci/pcivar.h>
84 #include <dev/pci/pcireg.h>
85 
86 #include "e1000_api.h"
87 #include "e1000_82571.h"
88 #include "if_em.h"
89 
90 /*********************************************************************
91  *  Set this to one to display debug statistics
92  *********************************************************************/
93 int	em_display_debug_stats = 0;
94 
95 /*********************************************************************
96  *  Driver version:
97  *********************************************************************/
98 char em_driver_version[] = "7.3.8";
99 
100 /*********************************************************************
101  *  PCI Device ID Table
102  *
103  *  Used by probe to select devices to load on
104  *  Last field stores an index into e1000_strings
105  *  Last entry must be all 0s
106  *
107  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
108  *********************************************************************/
109 
110 static em_vendor_info_t em_vendor_info_array[] =
111 {
112 	/* Intel(R) PRO/1000 Network Connection */
113 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
115 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
116 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
117 						PCI_ANY_ID, PCI_ANY_ID, 0},
118 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
119 						PCI_ANY_ID, PCI_ANY_ID, 0},
120 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
121 						PCI_ANY_ID, PCI_ANY_ID, 0},
122 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
123 						PCI_ANY_ID, PCI_ANY_ID, 0},
124 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
125 						PCI_ANY_ID, PCI_ANY_ID, 0},
126 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
127 						PCI_ANY_ID, PCI_ANY_ID, 0},
128 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
130 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
131 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
132 
133 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
134 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
135 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
136 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
137 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
138 						PCI_ANY_ID, PCI_ANY_ID, 0},
139 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
140 						PCI_ANY_ID, PCI_ANY_ID, 0},
141 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
142 						PCI_ANY_ID, PCI_ANY_ID, 0},
143 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
144 						PCI_ANY_ID, PCI_ANY_ID, 0},
145 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
147 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
148 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
150 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
152 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
175 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
176 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
177 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
178 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
179 						PCI_ANY_ID, PCI_ANY_ID, 0},
180 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
181 						PCI_ANY_ID, PCI_ANY_ID, 0},
182 	/* required last entry */
183 	{ 0, 0, 0, 0, 0}
184 };
185 
186 /*********************************************************************
187  *  Table of branding strings for all supported NICs.
188  *********************************************************************/
189 
190 static char *em_strings[] = {
191 	"Intel(R) PRO/1000 Network Connection"
192 };
193 
194 /*********************************************************************
195  *  Function prototypes
196  *********************************************************************/
197 static int	em_probe(device_t);
198 static int	em_attach(device_t);
199 static int	em_detach(device_t);
200 static int	em_shutdown(device_t);
201 static int	em_suspend(device_t);
202 static int	em_resume(device_t);
203 #ifdef EM_MULTIQUEUE
204 static int	em_mq_start(struct ifnet *, struct mbuf *);
205 static int	em_mq_start_locked(struct ifnet *,
206 		    struct tx_ring *, struct mbuf *);
207 static void	em_qflush(struct ifnet *);
208 #else
209 static void	em_start(struct ifnet *);
210 static void	em_start_locked(struct ifnet *, struct tx_ring *);
211 #endif
212 static int	em_ioctl(struct ifnet *, u_long, caddr_t);
213 static void	em_init(void *);
214 static void	em_init_locked(struct adapter *);
215 static void	em_stop(void *);
216 static void	em_media_status(struct ifnet *, struct ifmediareq *);
217 static int	em_media_change(struct ifnet *);
218 static void	em_identify_hardware(struct adapter *);
219 static int	em_allocate_pci_resources(struct adapter *);
220 static int	em_allocate_legacy(struct adapter *);
221 static int	em_allocate_msix(struct adapter *);
222 static int	em_allocate_queues(struct adapter *);
223 static int	em_setup_msix(struct adapter *);
224 static void	em_free_pci_resources(struct adapter *);
225 static void	em_local_timer(void *);
226 static void	em_reset(struct adapter *);
227 static int	em_setup_interface(device_t, struct adapter *);
228 
229 static void	em_setup_transmit_structures(struct adapter *);
230 static void	em_initialize_transmit_unit(struct adapter *);
231 static int	em_allocate_transmit_buffers(struct tx_ring *);
232 static void	em_free_transmit_structures(struct adapter *);
233 static void	em_free_transmit_buffers(struct tx_ring *);
234 
235 static int	em_setup_receive_structures(struct adapter *);
236 static int	em_allocate_receive_buffers(struct rx_ring *);
237 static void	em_initialize_receive_unit(struct adapter *);
238 static void	em_free_receive_structures(struct adapter *);
239 static void	em_free_receive_buffers(struct rx_ring *);
240 
241 static void	em_enable_intr(struct adapter *);
242 static void	em_disable_intr(struct adapter *);
243 static void	em_update_stats_counters(struct adapter *);
244 static void	em_add_hw_stats(struct adapter *adapter);
245 static void	em_txeof(struct tx_ring *);
246 static bool	em_rxeof(struct rx_ring *, int, int *);
247 #ifndef __NO_STRICT_ALIGNMENT
248 static int	em_fixup_rx(struct rx_ring *);
249 #endif
250 static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
251 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
252 		    struct ip *, u32 *, u32 *);
253 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
254 		    struct tcphdr *, u32 *, u32 *);
255 static void	em_set_promisc(struct adapter *);
256 static void	em_disable_promisc(struct adapter *);
257 static void	em_set_multi(struct adapter *);
258 static void	em_update_link_status(struct adapter *);
259 static void	em_refresh_mbufs(struct rx_ring *, int);
260 static void	em_register_vlan(void *, struct ifnet *, u16);
261 static void	em_unregister_vlan(void *, struct ifnet *, u16);
262 static void	em_setup_vlan_hw_support(struct adapter *);
263 static int	em_xmit(struct tx_ring *, struct mbuf **);
264 static int	em_dma_malloc(struct adapter *, bus_size_t,
265 		    struct em_dma_alloc *, int);
266 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
267 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
268 static void	em_print_nvm_info(struct adapter *);
269 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
270 static void	em_print_debug_info(struct adapter *);
271 static int 	em_is_valid_ether_addr(u8 *);
272 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
273 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
274 		    const char *, struct em_int_delay_info *, int, int);
275 /* Management and WOL Support */
276 static void	em_init_manageability(struct adapter *);
277 static void	em_release_manageability(struct adapter *);
278 static void     em_get_hw_control(struct adapter *);
279 static void     em_release_hw_control(struct adapter *);
280 static void	em_get_wakeup(device_t);
281 static void     em_enable_wakeup(device_t);
282 static int	em_enable_phy_wakeup(struct adapter *);
283 static void	em_led_func(void *, int);
284 static void	em_disable_aspm(struct adapter *);
285 
286 static int	em_irq_fast(void *);
287 
288 /* MSIX handlers */
289 static void	em_msix_tx(void *);
290 static void	em_msix_rx(void *);
291 static void	em_msix_link(void *);
292 static void	em_handle_tx(void *context, int pending);
293 static void	em_handle_rx(void *context, int pending);
294 static void	em_handle_link(void *context, int pending);
295 
296 static void	em_set_sysctl_value(struct adapter *, const char *,
297 		    const char *, int *, int);
298 static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
299 static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
300 
301 static __inline void em_rx_discard(struct rx_ring *, int);
302 
303 #ifdef DEVICE_POLLING
304 static poll_handler_t em_poll;
305 #endif /* POLLING */
306 
307 /*********************************************************************
308  *  FreeBSD Device Interface Entry Points
309  *********************************************************************/
310 
311 static device_method_t em_methods[] = {
312 	/* Device interface */
313 	DEVMETHOD(device_probe, em_probe),
314 	DEVMETHOD(device_attach, em_attach),
315 	DEVMETHOD(device_detach, em_detach),
316 	DEVMETHOD(device_shutdown, em_shutdown),
317 	DEVMETHOD(device_suspend, em_suspend),
318 	DEVMETHOD(device_resume, em_resume),
319 	DEVMETHOD_END
320 };
321 
322 static driver_t em_driver = {
323 	"em", em_methods, sizeof(struct adapter),
324 };
325 
326 devclass_t em_devclass;
327 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
328 MODULE_DEPEND(em, pci, 1, 1, 1);
329 MODULE_DEPEND(em, ether, 1, 1, 1);
330 
331 /*********************************************************************
332  *  Tunable default values.
333  *********************************************************************/
334 
335 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
336 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
337 #define M_TSO_LEN			66
338 
339 #define MAX_INTS_PER_SEC	8000
340 #define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
341 
342 /* Allow common code without TSO */
343 #ifndef CSUM_TSO
344 #define CSUM_TSO	0
345 #endif
346 
347 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
348 
349 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
350 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
351 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
352 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
353 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
354     0, "Default transmit interrupt delay in usecs");
355 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
356     0, "Default receive interrupt delay in usecs");
357 
358 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
359 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
360 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
361 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
362 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
363     &em_tx_abs_int_delay_dflt, 0,
364     "Default transmit interrupt delay limit in usecs");
365 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
366     &em_rx_abs_int_delay_dflt, 0,
367     "Default receive interrupt delay limit in usecs");
368 
369 static int em_rxd = EM_DEFAULT_RXD;
370 static int em_txd = EM_DEFAULT_TXD;
371 TUNABLE_INT("hw.em.rxd", &em_rxd);
372 TUNABLE_INT("hw.em.txd", &em_txd);
373 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
374     "Number of receive descriptors per queue");
375 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
376     "Number of transmit descriptors per queue");
377 
378 static int em_smart_pwr_down = FALSE;
379 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
380 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
381     0, "Set to true to leave smart power down enabled on newer adapters");
382 
383 /* Controls whether promiscuous also shows bad packets */
384 static int em_debug_sbp = FALSE;
385 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
386 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
387     "Show bad packets in promiscuous mode");
388 
389 static int em_enable_msix = TRUE;
390 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
391 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
392     "Enable MSI-X interrupts");
393 
394 /* How many packets rxeof tries to clean at a time */
395 static int em_rx_process_limit = 100;
396 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
397 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
398     &em_rx_process_limit, 0,
399     "Maximum number of received packets to process "
400     "at a time, -1 means unlimited");
401 
402 /* Energy efficient ethernet - default to OFF */
403 static int eee_setting = 1;
404 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
405 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
406     "Enable Energy Efficient Ethernet");
407 
408 /* Global used in WOL setup with multiport cards */
409 static int global_quad_port_a = 0;
410 
411 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
412 #include <dev/netmap/if_em_netmap.h>
413 #endif /* DEV_NETMAP */
414 
415 /*********************************************************************
416  *  Device identification routine
417  *
418  *  em_probe determines if the driver should be loaded on
419  *  adapter based on PCI vendor/device id of the adapter.
420  *
421  *  return BUS_PROBE_DEFAULT on success, positive on failure
422  *********************************************************************/
423 
424 static int
425 em_probe(device_t dev)
426 {
427 	char		adapter_name[60];
428 	u16		pci_vendor_id = 0;
429 	u16		pci_device_id = 0;
430 	u16		pci_subvendor_id = 0;
431 	u16		pci_subdevice_id = 0;
432 	em_vendor_info_t *ent;
433 
434 	INIT_DEBUGOUT("em_probe: begin");
435 
436 	pci_vendor_id = pci_get_vendor(dev);
437 	if (pci_vendor_id != EM_VENDOR_ID)
438 		return (ENXIO);
439 
440 	pci_device_id = pci_get_device(dev);
441 	pci_subvendor_id = pci_get_subvendor(dev);
442 	pci_subdevice_id = pci_get_subdevice(dev);
443 
444 	ent = em_vendor_info_array;
445 	while (ent->vendor_id != 0) {
446 		if ((pci_vendor_id == ent->vendor_id) &&
447 		    (pci_device_id == ent->device_id) &&
448 
449 		    ((pci_subvendor_id == ent->subvendor_id) ||
450 		    (ent->subvendor_id == PCI_ANY_ID)) &&
451 
452 		    ((pci_subdevice_id == ent->subdevice_id) ||
453 		    (ent->subdevice_id == PCI_ANY_ID))) {
454 			sprintf(adapter_name, "%s %s",
455 				em_strings[ent->index],
456 				em_driver_version);
457 			device_set_desc_copy(dev, adapter_name);
458 			return (BUS_PROBE_DEFAULT);
459 		}
460 		ent++;
461 	}
462 
463 	return (ENXIO);
464 }
465 
466 /*********************************************************************
467  *  Device initialization routine
468  *
469  *  The attach entry point is called when the driver is being loaded.
470  *  This routine identifies the type of hardware, allocates all resources
471  *  and initializes the hardware.
472  *
473  *  return 0 on success, positive on failure
474  *********************************************************************/
475 
476 static int
477 em_attach(device_t dev)
478 {
479 	struct adapter	*adapter;
480 	struct e1000_hw	*hw;
481 	int		error = 0;
482 
483 	INIT_DEBUGOUT("em_attach: begin");
484 
485 	if (resource_disabled("em", device_get_unit(dev))) {
486 		device_printf(dev, "Disabled by device hint\n");
487 		return (ENXIO);
488 	}
489 
490 	adapter = device_get_softc(dev);
491 	adapter->dev = adapter->osdep.dev = dev;
492 	hw = &adapter->hw;
493 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
494 
495 	/* SYSCTL stuff */
496 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
497 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
498 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
499 	    em_sysctl_nvm_info, "I", "NVM Information");
500 
501 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
502 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
503 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
504 	    em_sysctl_debug_info, "I", "Debug Information");
505 
506 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
507 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
508 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
509 	    em_set_flowcntl, "I", "Flow Control");
510 
511 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
512 
513 	/* Determine hardware and mac info */
514 	em_identify_hardware(adapter);
515 
516 	/* Setup PCI resources */
517 	if (em_allocate_pci_resources(adapter)) {
518 		device_printf(dev, "Allocation of PCI resources failed\n");
519 		error = ENXIO;
520 		goto err_pci;
521 	}
522 
523 	/*
524 	** For ICH8 and family we need to
525 	** map the flash memory, and this
526 	** must happen after the MAC is
527 	** identified
528 	*/
529 	if ((hw->mac.type == e1000_ich8lan) ||
530 	    (hw->mac.type == e1000_ich9lan) ||
531 	    (hw->mac.type == e1000_ich10lan) ||
532 	    (hw->mac.type == e1000_pchlan) ||
533 	    (hw->mac.type == e1000_pch2lan) ||
534 	    (hw->mac.type == e1000_pch_lpt)) {
535 		int rid = EM_BAR_TYPE_FLASH;
536 		adapter->flash = bus_alloc_resource_any(dev,
537 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
538 		if (adapter->flash == NULL) {
539 			device_printf(dev, "Mapping of Flash failed\n");
540 			error = ENXIO;
541 			goto err_pci;
542 		}
543 		/* This is used in the shared code */
544 		hw->flash_address = (u8 *)adapter->flash;
545 		adapter->osdep.flash_bus_space_tag =
546 		    rman_get_bustag(adapter->flash);
547 		adapter->osdep.flash_bus_space_handle =
548 		    rman_get_bushandle(adapter->flash);
549 	}
550 
551 	/* Do Shared Code initialization */
552 	if (e1000_setup_init_funcs(hw, TRUE)) {
553 		device_printf(dev, "Setup of Shared code failed\n");
554 		error = ENXIO;
555 		goto err_pci;
556 	}
557 
558 	e1000_get_bus_info(hw);
559 
560 	/* Set up some sysctls for the tunable interrupt delays */
561 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
562 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
563 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
564 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
565 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
566 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
567 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
568 	    "receive interrupt delay limit in usecs",
569 	    &adapter->rx_abs_int_delay,
570 	    E1000_REGISTER(hw, E1000_RADV),
571 	    em_rx_abs_int_delay_dflt);
572 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
573 	    "transmit interrupt delay limit in usecs",
574 	    &adapter->tx_abs_int_delay,
575 	    E1000_REGISTER(hw, E1000_TADV),
576 	    em_tx_abs_int_delay_dflt);
577 	em_add_int_delay_sysctl(adapter, "itr",
578 	    "interrupt delay limit in usecs/4",
579 	    &adapter->tx_itr,
580 	    E1000_REGISTER(hw, E1000_ITR),
581 	    DEFAULT_ITR);
582 
583 	/* Sysctl for limiting the amount of work done in the taskqueue */
584 	em_set_sysctl_value(adapter, "rx_processing_limit",
585 	    "max number of rx packets to process", &adapter->rx_process_limit,
586 	    em_rx_process_limit);
587 
588 	/*
589 	 * Validate number of transmit and receive descriptors. It
590 	 * must not exceed hardware maximum, and must be multiple
591 	 * of E1000_DBA_ALIGN.
592 	 */
593 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
594 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
595 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
596 		    EM_DEFAULT_TXD, em_txd);
597 		adapter->num_tx_desc = EM_DEFAULT_TXD;
598 	} else
599 		adapter->num_tx_desc = em_txd;
600 
601 	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
602 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
603 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
604 		    EM_DEFAULT_RXD, em_rxd);
605 		adapter->num_rx_desc = EM_DEFAULT_RXD;
606 	} else
607 		adapter->num_rx_desc = em_rxd;
608 
609 	hw->mac.autoneg = DO_AUTO_NEG;
610 	hw->phy.autoneg_wait_to_complete = FALSE;
611 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
612 
613 	/* Copper options */
614 	if (hw->phy.media_type == e1000_media_type_copper) {
615 		hw->phy.mdix = AUTO_ALL_MODES;
616 		hw->phy.disable_polarity_correction = FALSE;
617 		hw->phy.ms_type = EM_MASTER_SLAVE;
618 	}
619 
620 	/*
621 	 * Set the frame limits assuming
622 	 * standard ethernet sized frames.
623 	 */
624 	adapter->hw.mac.max_frame_size =
625 	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
626 
627 	/*
628 	 * This controls when hardware reports transmit completion
629 	 * status.
630 	 */
631 	hw->mac.report_tx_early = 1;
632 
633 	/*
634 	** Get queue/ring memory
635 	*/
636 	if (em_allocate_queues(adapter)) {
637 		error = ENOMEM;
638 		goto err_pci;
639 	}
640 
641 	/* Allocate multicast array memory. */
642 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
643 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
644 	if (adapter->mta == NULL) {
645 		device_printf(dev, "Can not allocate multicast setup array\n");
646 		error = ENOMEM;
647 		goto err_late;
648 	}
649 
650 	/* Check SOL/IDER usage */
651 	if (e1000_check_reset_block(hw))
652 		device_printf(dev, "PHY reset is blocked"
653 		    " due to SOL/IDER session.\n");
654 
655 	/* Sysctl for setting Energy Efficient Ethernet */
656 	hw->dev_spec.ich8lan.eee_disable = eee_setting;
657 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
658 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
659 	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
660 	    adapter, 0, em_sysctl_eee, "I",
661 	    "Disable Energy Efficient Ethernet");
662 
663 	/*
664 	** Start from a known state, this is
665 	** important in reading the nvm and
666 	** mac from that.
667 	*/
668 	e1000_reset_hw(hw);
669 
670 
671 	/* Make sure we have a good EEPROM before we read from it */
672 	if (e1000_validate_nvm_checksum(hw) < 0) {
673 		/*
674 		** Some PCI-E parts fail the first check due to
675 		** the link being in sleep state, call it again,
676 		** if it fails a second time its a real issue.
677 		*/
678 		if (e1000_validate_nvm_checksum(hw) < 0) {
679 			device_printf(dev,
680 			    "The EEPROM Checksum Is Not Valid\n");
681 			error = EIO;
682 			goto err_late;
683 		}
684 	}
685 
686 	/* Copy the permanent MAC address out of the EEPROM */
687 	if (e1000_read_mac_addr(hw) < 0) {
688 		device_printf(dev, "EEPROM read error while reading MAC"
689 		    " address\n");
690 		error = EIO;
691 		goto err_late;
692 	}
693 
694 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
695 		device_printf(dev, "Invalid MAC address\n");
696 		error = EIO;
697 		goto err_late;
698 	}
699 
700 	/*
701 	**  Do interrupt configuration
702 	*/
703 	if (adapter->msix > 1) /* Do MSIX */
704 		error = em_allocate_msix(adapter);
705 	else  /* MSI or Legacy */
706 		error = em_allocate_legacy(adapter);
707 	if (error)
708 		goto err_late;
709 
710 	/*
711 	 * Get Wake-on-Lan and Management info for later use
712 	 */
713 	em_get_wakeup(dev);
714 
715 	/* Setup OS specific network interface */
716 	if (em_setup_interface(dev, adapter) != 0)
717 		goto err_late;
718 
719 	em_reset(adapter);
720 
721 	/* Initialize statistics */
722 	em_update_stats_counters(adapter);
723 
724 	hw->mac.get_link_status = 1;
725 	em_update_link_status(adapter);
726 
727 	/* Register for VLAN events */
728 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
729 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
730 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
731 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
732 
733 	em_add_hw_stats(adapter);
734 
735 	/* Non-AMT based hardware can now take control from firmware */
736 	if (adapter->has_manage && !adapter->has_amt)
737 		em_get_hw_control(adapter);
738 
739 	/* Tell the stack that the interface is not active */
740 	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
741 	adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
742 
743 	adapter->led_dev = led_create(em_led_func, adapter,
744 	    device_get_nameunit(dev));
745 #ifdef DEV_NETMAP
746 	em_netmap_attach(adapter);
747 #endif /* DEV_NETMAP */
748 
749 	INIT_DEBUGOUT("em_attach: end");
750 
751 	return (0);
752 
753 err_late:
754 	em_free_transmit_structures(adapter);
755 	em_free_receive_structures(adapter);
756 	em_release_hw_control(adapter);
757 	if (adapter->ifp != NULL)
758 		if_free(adapter->ifp);
759 err_pci:
760 	em_free_pci_resources(adapter);
761 	free(adapter->mta, M_DEVBUF);
762 	EM_CORE_LOCK_DESTROY(adapter);
763 
764 	return (error);
765 }
766 
767 /*********************************************************************
768  *  Device removal routine
769  *
770  *  The detach entry point is called when the driver is being removed.
771  *  This routine stops the adapter and deallocates all the resources
772  *  that were allocated for driver operation.
773  *
774  *  return 0 on success, positive on failure
775  *********************************************************************/
776 
777 static int
778 em_detach(device_t dev)
779 {
780 	struct adapter	*adapter = device_get_softc(dev);
781 	struct ifnet	*ifp = adapter->ifp;
782 
783 	INIT_DEBUGOUT("em_detach: begin");
784 
785 	/* Make sure VLANS are not using driver */
786 	if (adapter->ifp->if_vlantrunk != NULL) {
787 		device_printf(dev,"Vlan in use, detach first\n");
788 		return (EBUSY);
789 	}
790 
791 #ifdef DEVICE_POLLING
792 	if (ifp->if_capenable & IFCAP_POLLING)
793 		ether_poll_deregister(ifp);
794 #endif
795 
796 	if (adapter->led_dev != NULL)
797 		led_destroy(adapter->led_dev);
798 
799 	EM_CORE_LOCK(adapter);
800 	adapter->in_detach = 1;
801 	em_stop(adapter);
802 	EM_CORE_UNLOCK(adapter);
803 	EM_CORE_LOCK_DESTROY(adapter);
804 
805 	e1000_phy_hw_reset(&adapter->hw);
806 
807 	em_release_manageability(adapter);
808 	em_release_hw_control(adapter);
809 
810 	/* Unregister VLAN events */
811 	if (adapter->vlan_attach != NULL)
812 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
813 	if (adapter->vlan_detach != NULL)
814 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
815 
816 	ether_ifdetach(adapter->ifp);
817 	callout_drain(&adapter->timer);
818 
819 #ifdef DEV_NETMAP
820 	netmap_detach(ifp);
821 #endif /* DEV_NETMAP */
822 
823 	em_free_pci_resources(adapter);
824 	bus_generic_detach(dev);
825 	if_free(ifp);
826 
827 	em_free_transmit_structures(adapter);
828 	em_free_receive_structures(adapter);
829 
830 	em_release_hw_control(adapter);
831 	free(adapter->mta, M_DEVBUF);
832 
833 	return (0);
834 }
835 
836 /*********************************************************************
837  *
838  *  Shutdown entry point
839  *
840  **********************************************************************/
841 
842 static int
843 em_shutdown(device_t dev)
844 {
845 	return em_suspend(dev);
846 }
847 
848 /*
849  * Suspend/resume device methods.
850  */
851 static int
852 em_suspend(device_t dev)
853 {
854 	struct adapter *adapter = device_get_softc(dev);
855 
856 	EM_CORE_LOCK(adapter);
857 
858         em_release_manageability(adapter);
859 	em_release_hw_control(adapter);
860 	em_enable_wakeup(dev);
861 
862 	EM_CORE_UNLOCK(adapter);
863 
864 	return bus_generic_suspend(dev);
865 }
866 
867 static int
868 em_resume(device_t dev)
869 {
870 	struct adapter *adapter = device_get_softc(dev);
871 	struct tx_ring	*txr = adapter->tx_rings;
872 	struct ifnet *ifp = adapter->ifp;
873 
874 	EM_CORE_LOCK(adapter);
875 	if (adapter->hw.mac.type == e1000_pch2lan)
876 		e1000_resume_workarounds_pchlan(&adapter->hw);
877 	em_init_locked(adapter);
878 	em_init_manageability(adapter);
879 
880 	if ((ifp->if_flags & IFF_UP) &&
881 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
882 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
883 			EM_TX_LOCK(txr);
884 #ifdef EM_MULTIQUEUE
885 			if (!drbr_empty(ifp, txr->br))
886 				em_mq_start_locked(ifp, txr, NULL);
887 #else
888 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
889 				em_start_locked(ifp, txr);
890 #endif
891 			EM_TX_UNLOCK(txr);
892 		}
893 	}
894 	EM_CORE_UNLOCK(adapter);
895 
896 	return bus_generic_resume(dev);
897 }
898 
899 
900 #ifdef EM_MULTIQUEUE
901 /*********************************************************************
902  *  Multiqueue Transmit routines
903  *
904  *  em_mq_start is called by the stack to initiate a transmit.
905  *  however, if busy the driver can queue the request rather
906  *  than do an immediate send. It is this that is an advantage
907  *  in this driver, rather than also having multiple tx queues.
908  **********************************************************************/
909 static int
910 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
911 {
912 	struct adapter  *adapter = txr->adapter;
913         struct mbuf     *next;
914         int             err = 0, enq = 0;
915 
916 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
917 	    IFF_DRV_RUNNING || adapter->link_active == 0) {
918 		if (m != NULL)
919 			err = drbr_enqueue(ifp, txr->br, m);
920 		return (err);
921 	}
922 
923 	enq = 0;
924 	if (m != NULL) {
925 		err = drbr_enqueue(ifp, txr->br, m);
926 		if (err)
927 			return (err);
928 	}
929 
930 	/* Process the queue */
931 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
932 		if ((err = em_xmit(txr, &next)) != 0) {
933 			if (next == NULL)
934 				drbr_advance(ifp, txr->br);
935 			else
936 				drbr_putback(ifp, txr->br, next);
937 			break;
938 		}
939 		drbr_advance(ifp, txr->br);
940 		enq++;
941 		ifp->if_obytes += next->m_pkthdr.len;
942 		if (next->m_flags & M_MCAST)
943 			ifp->if_omcasts++;
944 		ETHER_BPF_MTAP(ifp, next);
945 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
946                         break;
947 	}
948 
949 	if (enq > 0) {
950                 /* Set the watchdog */
951                 txr->queue_status = EM_QUEUE_WORKING;
952 		txr->watchdog_time = ticks;
953 	}
954 
955 	if (txr->tx_avail < EM_MAX_SCATTER)
956 		em_txeof(txr);
957 	if (txr->tx_avail < EM_MAX_SCATTER)
958 		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
959 	return (err);
960 }
961 
962 /*
963 ** Multiqueue capable stack interface
964 */
965 static int
966 em_mq_start(struct ifnet *ifp, struct mbuf *m)
967 {
968 	struct adapter	*adapter = ifp->if_softc;
969 	struct tx_ring	*txr = adapter->tx_rings;
970 	int 		error;
971 
972 	if (EM_TX_TRYLOCK(txr)) {
973 		error = em_mq_start_locked(ifp, txr, m);
974 		EM_TX_UNLOCK(txr);
975 	} else
976 		error = drbr_enqueue(ifp, txr->br, m);
977 
978 	return (error);
979 }
980 
981 /*
982 ** Flush all ring buffers
983 */
984 static void
985 em_qflush(struct ifnet *ifp)
986 {
987 	struct adapter  *adapter = ifp->if_softc;
988 	struct tx_ring  *txr = adapter->tx_rings;
989 	struct mbuf     *m;
990 
991 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
992 		EM_TX_LOCK(txr);
993 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
994 			m_freem(m);
995 		EM_TX_UNLOCK(txr);
996 	}
997 	if_qflush(ifp);
998 }
999 #else  /* !EM_MULTIQUEUE */
1000 
1001 static void
1002 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1003 {
1004 	struct adapter	*adapter = ifp->if_softc;
1005 	struct mbuf	*m_head;
1006 
1007 	EM_TX_LOCK_ASSERT(txr);
1008 
1009 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
1010 	    IFF_DRV_RUNNING)
1011 		return;
1012 
1013 	if (!adapter->link_active)
1014 		return;
1015 
1016 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1017         	/* Call cleanup if number of TX descriptors low */
1018 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1019 			em_txeof(txr);
1020 		if (txr->tx_avail < EM_MAX_SCATTER) {
1021 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1022 			break;
1023 		}
1024                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1025 		if (m_head == NULL)
1026 			break;
1027 		/*
1028 		 *  Encapsulation can modify our pointer, and or make it
1029 		 *  NULL on failure.  In that event, we can't requeue.
1030 		 */
1031 		if (em_xmit(txr, &m_head)) {
1032 			if (m_head == NULL)
1033 				break;
1034 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1035 			break;
1036 		}
1037 
1038 		/* Send a copy of the frame to the BPF listener */
1039 		ETHER_BPF_MTAP(ifp, m_head);
1040 
1041 		/* Set timeout in case hardware has problems transmitting. */
1042 		txr->watchdog_time = ticks;
1043                 txr->queue_status = EM_QUEUE_WORKING;
1044 	}
1045 
1046 	return;
1047 }
1048 
1049 static void
1050 em_start(struct ifnet *ifp)
1051 {
1052 	struct adapter	*adapter = ifp->if_softc;
1053 	struct tx_ring	*txr = adapter->tx_rings;
1054 
1055 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1056 		EM_TX_LOCK(txr);
1057 		em_start_locked(ifp, txr);
1058 		EM_TX_UNLOCK(txr);
1059 	}
1060 	return;
1061 }
1062 #endif /* EM_MULTIQUEUE */
1063 
1064 /*********************************************************************
1065  *  Ioctl entry point
1066  *
1067  *  em_ioctl is called when the user wants to configure the
1068  *  interface.
1069  *
1070  *  return 0 on success, positive on failure
1071  **********************************************************************/
1072 
1073 static int
1074 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1075 {
1076 	struct adapter	*adapter = ifp->if_softc;
1077 	struct ifreq	*ifr = (struct ifreq *)data;
1078 #if defined(INET) || defined(INET6)
1079 	struct ifaddr	*ifa = (struct ifaddr *)data;
1080 #endif
1081 	bool		avoid_reset = FALSE;
1082 	int		error = 0;
1083 
1084 	if (adapter->in_detach)
1085 		return (error);
1086 
1087 	switch (command) {
1088 	case SIOCSIFADDR:
1089 #ifdef INET
1090 		if (ifa->ifa_addr->sa_family == AF_INET)
1091 			avoid_reset = TRUE;
1092 #endif
1093 #ifdef INET6
1094 		if (ifa->ifa_addr->sa_family == AF_INET6)
1095 			avoid_reset = TRUE;
1096 #endif
1097 		/*
1098 		** Calling init results in link renegotiation,
1099 		** so we avoid doing it when possible.
1100 		*/
1101 		if (avoid_reset) {
1102 			ifp->if_flags |= IFF_UP;
1103 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1104 				em_init(adapter);
1105 #ifdef INET
1106 			if (!(ifp->if_flags & IFF_NOARP))
1107 				arp_ifinit(ifp, ifa);
1108 #endif
1109 		} else
1110 			error = ether_ioctl(ifp, command, data);
1111 		break;
1112 	case SIOCSIFMTU:
1113 	    {
1114 		int max_frame_size;
1115 
1116 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1117 
1118 		EM_CORE_LOCK(adapter);
1119 		switch (adapter->hw.mac.type) {
1120 		case e1000_82571:
1121 		case e1000_82572:
1122 		case e1000_ich9lan:
1123 		case e1000_ich10lan:
1124 		case e1000_pch2lan:
1125 		case e1000_pch_lpt:
1126 		case e1000_82574:
1127 		case e1000_82583:
1128 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1129 			max_frame_size = 9234;
1130 			break;
1131 		case e1000_pchlan:
1132 			max_frame_size = 4096;
1133 			break;
1134 			/* Adapters that do not support jumbo frames */
1135 		case e1000_ich8lan:
1136 			max_frame_size = ETHER_MAX_LEN;
1137 			break;
1138 		default:
1139 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1140 		}
1141 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1142 		    ETHER_CRC_LEN) {
1143 			EM_CORE_UNLOCK(adapter);
1144 			error = EINVAL;
1145 			break;
1146 		}
1147 
1148 		ifp->if_mtu = ifr->ifr_mtu;
1149 		adapter->hw.mac.max_frame_size =
1150 		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1151 		em_init_locked(adapter);
1152 		EM_CORE_UNLOCK(adapter);
1153 		break;
1154 	    }
1155 	case SIOCSIFFLAGS:
1156 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1157 		    SIOCSIFFLAGS (Set Interface Flags)");
1158 		EM_CORE_LOCK(adapter);
1159 		if (ifp->if_flags & IFF_UP) {
1160 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1161 				if ((ifp->if_flags ^ adapter->if_flags) &
1162 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1163 					em_disable_promisc(adapter);
1164 					em_set_promisc(adapter);
1165 				}
1166 			} else
1167 				em_init_locked(adapter);
1168 		} else
1169 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1170 				em_stop(adapter);
1171 		adapter->if_flags = ifp->if_flags;
1172 		EM_CORE_UNLOCK(adapter);
1173 		break;
1174 	case SIOCADDMULTI:
1175 	case SIOCDELMULTI:
1176 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1177 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1178 			EM_CORE_LOCK(adapter);
1179 			em_disable_intr(adapter);
1180 			em_set_multi(adapter);
1181 #ifdef DEVICE_POLLING
1182 			if (!(ifp->if_capenable & IFCAP_POLLING))
1183 #endif
1184 				em_enable_intr(adapter);
1185 			EM_CORE_UNLOCK(adapter);
1186 		}
1187 		break;
1188 	case SIOCSIFMEDIA:
1189 		/* Check SOL/IDER usage */
1190 		EM_CORE_LOCK(adapter);
1191 		if (e1000_check_reset_block(&adapter->hw)) {
1192 			EM_CORE_UNLOCK(adapter);
1193 			device_printf(adapter->dev, "Media change is"
1194 			    " blocked due to SOL/IDER session.\n");
1195 			break;
1196 		}
1197 		EM_CORE_UNLOCK(adapter);
1198 		/* falls thru */
1199 	case SIOCGIFMEDIA:
1200 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1201 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1202 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1203 		break;
1204 	case SIOCSIFCAP:
1205 	    {
1206 		int mask, reinit;
1207 
1208 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1209 		reinit = 0;
1210 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1211 #ifdef DEVICE_POLLING
1212 		if (mask & IFCAP_POLLING) {
1213 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1214 				error = ether_poll_register(em_poll, ifp);
1215 				if (error)
1216 					return (error);
1217 				EM_CORE_LOCK(adapter);
1218 				em_disable_intr(adapter);
1219 				ifp->if_capenable |= IFCAP_POLLING;
1220 				EM_CORE_UNLOCK(adapter);
1221 			} else {
1222 				error = ether_poll_deregister(ifp);
1223 				/* Enable interrupt even in error case */
1224 				EM_CORE_LOCK(adapter);
1225 				em_enable_intr(adapter);
1226 				ifp->if_capenable &= ~IFCAP_POLLING;
1227 				EM_CORE_UNLOCK(adapter);
1228 			}
1229 		}
1230 #endif
1231 		if (mask & IFCAP_HWCSUM) {
1232 			ifp->if_capenable ^= IFCAP_HWCSUM;
1233 			reinit = 1;
1234 		}
1235 		if (mask & IFCAP_TSO4) {
1236 			ifp->if_capenable ^= IFCAP_TSO4;
1237 			reinit = 1;
1238 		}
1239 		if (mask & IFCAP_VLAN_HWTAGGING) {
1240 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1241 			reinit = 1;
1242 		}
1243 		if (mask & IFCAP_VLAN_HWFILTER) {
1244 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1245 			reinit = 1;
1246 		}
1247 		if (mask & IFCAP_VLAN_HWTSO) {
1248 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1249 			reinit = 1;
1250 		}
1251 		if ((mask & IFCAP_WOL) &&
1252 		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1253 			if (mask & IFCAP_WOL_MCAST)
1254 				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1255 			if (mask & IFCAP_WOL_MAGIC)
1256 				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1257 		}
1258 		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1259 			em_init(adapter);
1260 		VLAN_CAPABILITIES(ifp);
1261 		break;
1262 	    }
1263 
1264 	default:
1265 		error = ether_ioctl(ifp, command, data);
1266 		break;
1267 	}
1268 
1269 	return (error);
1270 }
1271 
1272 
1273 /*********************************************************************
1274  *  Init entry point
1275  *
1276  *  This routine is used in two ways. It is used by the stack as
1277  *  init entry point in network interface structure. It is also used
1278  *  by the driver as a hw/sw initialization routine to get to a
1279  *  consistent state.
1280  *
1281  *  return 0 on success, positive on failure
1282  **********************************************************************/
1283 
1284 static void
1285 em_init_locked(struct adapter *adapter)
1286 {
1287 	struct ifnet	*ifp = adapter->ifp;
1288 	device_t	dev = adapter->dev;
1289 
1290 	INIT_DEBUGOUT("em_init: begin");
1291 
1292 	EM_CORE_LOCK_ASSERT(adapter);
1293 
1294 	em_disable_intr(adapter);
1295 	callout_stop(&adapter->timer);
1296 
1297 	/* Get the latest mac address, User can use a LAA */
1298         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1299               ETHER_ADDR_LEN);
1300 
1301 	/* Put the address into the Receive Address Array */
1302 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1303 
1304 	/*
1305 	 * With the 82571 adapter, RAR[0] may be overwritten
1306 	 * when the other port is reset, we make a duplicate
1307 	 * in RAR[14] for that eventuality, this assures
1308 	 * the interface continues to function.
1309 	 */
1310 	if (adapter->hw.mac.type == e1000_82571) {
1311 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1312 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1313 		    E1000_RAR_ENTRIES - 1);
1314 	}
1315 
1316 	/* Initialize the hardware */
1317 	em_reset(adapter);
1318 	em_update_link_status(adapter);
1319 
1320 	/* Setup VLAN support, basic and offload if available */
1321 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1322 
1323 	/* Set hardware offload abilities */
1324 	ifp->if_hwassist = 0;
1325 	if (ifp->if_capenable & IFCAP_TXCSUM)
1326 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1327 	if (ifp->if_capenable & IFCAP_TSO4)
1328 		ifp->if_hwassist |= CSUM_TSO;
1329 
1330 	/* Configure for OS presence */
1331 	em_init_manageability(adapter);
1332 
1333 	/* Prepare transmit descriptors and buffers */
1334 	em_setup_transmit_structures(adapter);
1335 	em_initialize_transmit_unit(adapter);
1336 
1337 	/* Setup Multicast table */
1338 	em_set_multi(adapter);
1339 
1340 	/*
1341 	** Figure out the desired mbuf
1342 	** pool for doing jumbos
1343 	*/
1344 	if (adapter->hw.mac.max_frame_size <= 2048)
1345 		adapter->rx_mbuf_sz = MCLBYTES;
1346 	else if (adapter->hw.mac.max_frame_size <= 4096)
1347 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1348 	else
1349 		adapter->rx_mbuf_sz = MJUM9BYTES;
1350 
1351 	/* Prepare receive descriptors and buffers */
1352 	if (em_setup_receive_structures(adapter)) {
1353 		device_printf(dev, "Could not setup receive structures\n");
1354 		em_stop(adapter);
1355 		return;
1356 	}
1357 	em_initialize_receive_unit(adapter);
1358 
1359 	/* Use real VLAN Filter support? */
1360 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1361 		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1362 			/* Use real VLAN Filter support */
1363 			em_setup_vlan_hw_support(adapter);
1364 		else {
1365 			u32 ctrl;
1366 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1367 			ctrl |= E1000_CTRL_VME;
1368 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1369 		}
1370 	}
1371 
1372 	/* Don't lose promiscuous settings */
1373 	em_set_promisc(adapter);
1374 
1375 	/* Set the interface as ACTIVE */
1376 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1377 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1378 
1379 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1380 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1381 
1382 	/* MSI/X configuration for 82574 */
1383 	if (adapter->hw.mac.type == e1000_82574) {
1384 		int tmp;
1385 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1386 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1387 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1388 		/* Set the IVAR - interrupt vector routing. */
1389 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1390 	}
1391 
1392 #ifdef DEVICE_POLLING
1393 	/*
1394 	 * Only enable interrupts if we are not polling, make sure
1395 	 * they are off otherwise.
1396 	 */
1397 	if (ifp->if_capenable & IFCAP_POLLING)
1398 		em_disable_intr(adapter);
1399 	else
1400 #endif /* DEVICE_POLLING */
1401 		em_enable_intr(adapter);
1402 
1403 	/* AMT based hardware can now take control from firmware */
1404 	if (adapter->has_manage && adapter->has_amt)
1405 		em_get_hw_control(adapter);
1406 }
1407 
1408 static void
1409 em_init(void *arg)
1410 {
1411 	struct adapter *adapter = arg;
1412 
1413 	EM_CORE_LOCK(adapter);
1414 	em_init_locked(adapter);
1415 	EM_CORE_UNLOCK(adapter);
1416 }
1417 
1418 
1419 #ifdef DEVICE_POLLING
1420 /*********************************************************************
1421  *
1422  *  Legacy polling routine: note this only works with single queue
1423  *
1424  *********************************************************************/
1425 static int
1426 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1427 {
1428 	struct adapter *adapter = ifp->if_softc;
1429 	struct tx_ring	*txr = adapter->tx_rings;
1430 	struct rx_ring	*rxr = adapter->rx_rings;
1431 	u32		reg_icr;
1432 	int		rx_done;
1433 
1434 	EM_CORE_LOCK(adapter);
1435 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1436 		EM_CORE_UNLOCK(adapter);
1437 		return (0);
1438 	}
1439 
1440 	if (cmd == POLL_AND_CHECK_STATUS) {
1441 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1442 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1443 			callout_stop(&adapter->timer);
1444 			adapter->hw.mac.get_link_status = 1;
1445 			em_update_link_status(adapter);
1446 			callout_reset(&adapter->timer, hz,
1447 			    em_local_timer, adapter);
1448 		}
1449 	}
1450 	EM_CORE_UNLOCK(adapter);
1451 
1452 	em_rxeof(rxr, count, &rx_done);
1453 
1454 	EM_TX_LOCK(txr);
1455 	em_txeof(txr);
1456 #ifdef EM_MULTIQUEUE
1457 	if (!drbr_empty(ifp, txr->br))
1458 		em_mq_start_locked(ifp, txr, NULL);
1459 #else
1460 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1461 		em_start_locked(ifp, txr);
1462 #endif
1463 	EM_TX_UNLOCK(txr);
1464 
1465 	return (rx_done);
1466 }
1467 #endif /* DEVICE_POLLING */
1468 
1469 
1470 /*********************************************************************
1471  *
1472  *  Fast Legacy/MSI Combined Interrupt Service routine
1473  *
1474  *********************************************************************/
1475 static int
1476 em_irq_fast(void *arg)
1477 {
1478 	struct adapter	*adapter = arg;
1479 	struct ifnet	*ifp;
1480 	u32		reg_icr;
1481 
1482 	ifp = adapter->ifp;
1483 
1484 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1485 
1486 	/* Hot eject?  */
1487 	if (reg_icr == 0xffffffff)
1488 		return FILTER_STRAY;
1489 
1490 	/* Definitely not our interrupt.  */
1491 	if (reg_icr == 0x0)
1492 		return FILTER_STRAY;
1493 
1494 	/*
1495 	 * Starting with the 82571 chip, bit 31 should be used to
1496 	 * determine whether the interrupt belongs to us.
1497 	 */
1498 	if (adapter->hw.mac.type >= e1000_82571 &&
1499 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1500 		return FILTER_STRAY;
1501 
1502 	em_disable_intr(adapter);
1503 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1504 
1505 	/* Link status change */
1506 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1507 		adapter->hw.mac.get_link_status = 1;
1508 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1509 	}
1510 
1511 	if (reg_icr & E1000_ICR_RXO)
1512 		adapter->rx_overruns++;
1513 	return FILTER_HANDLED;
1514 }
1515 
1516 /* Combined RX/TX handler, used by Legacy and MSI */
1517 static void
1518 em_handle_que(void *context, int pending)
1519 {
1520 	struct adapter	*adapter = context;
1521 	struct ifnet	*ifp = adapter->ifp;
1522 	struct tx_ring	*txr = adapter->tx_rings;
1523 	struct rx_ring	*rxr = adapter->rx_rings;
1524 
1525 
1526 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1527 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1528 		EM_TX_LOCK(txr);
1529 		em_txeof(txr);
1530 #ifdef EM_MULTIQUEUE
1531 		if (!drbr_empty(ifp, txr->br))
1532 			em_mq_start_locked(ifp, txr, NULL);
1533 #else
1534 		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1535 			em_start_locked(ifp, txr);
1536 #endif
1537 		EM_TX_UNLOCK(txr);
1538 		if (more) {
1539 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1540 			return;
1541 		}
1542 	}
1543 
1544 	em_enable_intr(adapter);
1545 	return;
1546 }
1547 
1548 
1549 /*********************************************************************
1550  *
1551  *  MSIX Interrupt Service Routines
1552  *
1553  **********************************************************************/
1554 static void
1555 em_msix_tx(void *arg)
1556 {
1557 	struct tx_ring *txr = arg;
1558 	struct adapter *adapter = txr->adapter;
1559 	struct ifnet	*ifp = adapter->ifp;
1560 
1561 	++txr->tx_irq;
1562 	EM_TX_LOCK(txr);
1563 	em_txeof(txr);
1564 #ifdef EM_MULTIQUEUE
1565 	if (!drbr_empty(ifp, txr->br))
1566 		em_mq_start_locked(ifp, txr, NULL);
1567 #else
1568 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1569 		em_start_locked(ifp, txr);
1570 #endif
1571 	/* Reenable this interrupt */
1572 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1573 	EM_TX_UNLOCK(txr);
1574 	return;
1575 }
1576 
1577 /*********************************************************************
1578  *
1579  *  MSIX RX Interrupt Service routine
1580  *
1581  **********************************************************************/
1582 
1583 static void
1584 em_msix_rx(void *arg)
1585 {
1586 	struct rx_ring	*rxr = arg;
1587 	struct adapter	*adapter = rxr->adapter;
1588 	bool		more;
1589 
1590 	++rxr->rx_irq;
1591 	if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1592 		return;
1593 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1594 	if (more)
1595 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1596 	else
1597 		/* Reenable this interrupt */
1598 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1599 	return;
1600 }
1601 
1602 /*********************************************************************
1603  *
1604  *  MSIX Link Fast Interrupt Service routine
1605  *
1606  **********************************************************************/
1607 static void
1608 em_msix_link(void *arg)
1609 {
1610 	struct adapter	*adapter = arg;
1611 	u32		reg_icr;
1612 
1613 	++adapter->link_irq;
1614 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1615 
1616 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1617 		adapter->hw.mac.get_link_status = 1;
1618 		em_handle_link(adapter, 0);
1619 	} else
1620 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1621 		    EM_MSIX_LINK | E1000_IMS_LSC);
1622 	return;
1623 }
1624 
1625 static void
1626 em_handle_rx(void *context, int pending)
1627 {
1628 	struct rx_ring	*rxr = context;
1629 	struct adapter	*adapter = rxr->adapter;
1630         bool            more;
1631 
1632 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1633 	if (more)
1634 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1635 	else
1636 		/* Reenable this interrupt */
1637 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1638 }
1639 
1640 static void
1641 em_handle_tx(void *context, int pending)
1642 {
1643 	struct tx_ring	*txr = context;
1644 	struct adapter	*adapter = txr->adapter;
1645 	struct ifnet	*ifp = adapter->ifp;
1646 
1647 	EM_TX_LOCK(txr);
1648 	em_txeof(txr);
1649 #ifdef EM_MULTIQUEUE
1650 	if (!drbr_empty(ifp, txr->br))
1651 		em_mq_start_locked(ifp, txr, NULL);
1652 #else
1653 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1654 		em_start_locked(ifp, txr);
1655 #endif
1656 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1657 	EM_TX_UNLOCK(txr);
1658 }
1659 
1660 static void
1661 em_handle_link(void *context, int pending)
1662 {
1663 	struct adapter	*adapter = context;
1664 	struct tx_ring	*txr = adapter->tx_rings;
1665 	struct ifnet *ifp = adapter->ifp;
1666 
1667 	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1668 		return;
1669 
1670 	EM_CORE_LOCK(adapter);
1671 	callout_stop(&adapter->timer);
1672 	em_update_link_status(adapter);
1673 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1674 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1675 	    EM_MSIX_LINK | E1000_IMS_LSC);
1676 	if (adapter->link_active) {
1677 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1678 			EM_TX_LOCK(txr);
1679 #ifdef EM_MULTIQUEUE
1680 			if (!drbr_empty(ifp, txr->br))
1681 				em_mq_start_locked(ifp, txr, NULL);
1682 #else
1683 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1684 				em_start_locked(ifp, txr);
1685 #endif
1686 			EM_TX_UNLOCK(txr);
1687 		}
1688 	}
1689 	EM_CORE_UNLOCK(adapter);
1690 }
1691 
1692 
1693 /*********************************************************************
1694  *
1695  *  Media Ioctl callback
1696  *
1697  *  This routine is called whenever the user queries the status of
1698  *  the interface using ifconfig.
1699  *
1700  **********************************************************************/
1701 static void
1702 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1703 {
1704 	struct adapter *adapter = ifp->if_softc;
1705 	u_char fiber_type = IFM_1000_SX;
1706 
1707 	INIT_DEBUGOUT("em_media_status: begin");
1708 
1709 	EM_CORE_LOCK(adapter);
1710 	em_update_link_status(adapter);
1711 
1712 	ifmr->ifm_status = IFM_AVALID;
1713 	ifmr->ifm_active = IFM_ETHER;
1714 
1715 	if (!adapter->link_active) {
1716 		EM_CORE_UNLOCK(adapter);
1717 		return;
1718 	}
1719 
1720 	ifmr->ifm_status |= IFM_ACTIVE;
1721 
1722 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1723 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1724 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1725 	} else {
1726 		switch (adapter->link_speed) {
1727 		case 10:
1728 			ifmr->ifm_active |= IFM_10_T;
1729 			break;
1730 		case 100:
1731 			ifmr->ifm_active |= IFM_100_TX;
1732 			break;
1733 		case 1000:
1734 			ifmr->ifm_active |= IFM_1000_T;
1735 			break;
1736 		}
1737 		if (adapter->link_duplex == FULL_DUPLEX)
1738 			ifmr->ifm_active |= IFM_FDX;
1739 		else
1740 			ifmr->ifm_active |= IFM_HDX;
1741 	}
1742 	EM_CORE_UNLOCK(adapter);
1743 }
1744 
1745 /*********************************************************************
1746  *
1747  *  Media Ioctl callback
1748  *
1749  *  This routine is called when the user changes speed/duplex using
1750  *  media/mediopt option with ifconfig.
1751  *
1752  **********************************************************************/
1753 static int
1754 em_media_change(struct ifnet *ifp)
1755 {
1756 	struct adapter *adapter = ifp->if_softc;
1757 	struct ifmedia  *ifm = &adapter->media;
1758 
1759 	INIT_DEBUGOUT("em_media_change: begin");
1760 
1761 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1762 		return (EINVAL);
1763 
1764 	EM_CORE_LOCK(adapter);
1765 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1766 	case IFM_AUTO:
1767 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1768 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1769 		break;
1770 	case IFM_1000_LX:
1771 	case IFM_1000_SX:
1772 	case IFM_1000_T:
1773 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1774 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1775 		break;
1776 	case IFM_100_TX:
1777 		adapter->hw.mac.autoneg = FALSE;
1778 		adapter->hw.phy.autoneg_advertised = 0;
1779 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1780 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1781 		else
1782 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1783 		break;
1784 	case IFM_10_T:
1785 		adapter->hw.mac.autoneg = FALSE;
1786 		adapter->hw.phy.autoneg_advertised = 0;
1787 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1788 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1789 		else
1790 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1791 		break;
1792 	default:
1793 		device_printf(adapter->dev, "Unsupported media type\n");
1794 	}
1795 
1796 	em_init_locked(adapter);
1797 	EM_CORE_UNLOCK(adapter);
1798 
1799 	return (0);
1800 }
1801 
1802 /*********************************************************************
1803  *
1804  *  This routine maps the mbufs to tx descriptors.
1805  *
1806  *  return 0 on success, positive on failure
1807  **********************************************************************/
1808 
1809 static int
1810 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1811 {
1812 	struct adapter		*adapter = txr->adapter;
1813 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1814 	bus_dmamap_t		map;
1815 	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1816 	struct e1000_tx_desc	*ctxd = NULL;
1817 	struct mbuf		*m_head;
1818 	struct ether_header	*eh;
1819 	struct ip		*ip = NULL;
1820 	struct tcphdr		*tp = NULL;
1821 	u32			txd_upper, txd_lower, txd_used, txd_saved;
1822 	int			ip_off, poff;
1823 	int			nsegs, i, j, first, last = 0;
1824 	int			error, do_tso, tso_desc = 0, remap = 1;
1825 
1826 retry:
1827 	m_head = *m_headp;
1828 	txd_upper = txd_lower = txd_used = txd_saved = 0;
1829 	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1830 	ip_off = poff = 0;
1831 
1832 	/*
1833 	 * Intel recommends entire IP/TCP header length reside in a single
1834 	 * buffer. If multiple descriptors are used to describe the IP and
1835 	 * TCP header, each descriptor should describe one or more
1836 	 * complete headers; descriptors referencing only parts of headers
1837 	 * are not supported. If all layer headers are not coalesced into
1838 	 * a single buffer, each buffer should not cross a 4KB boundary,
1839 	 * or be larger than the maximum read request size.
1840 	 * Controller also requires modifing IP/TCP header to make TSO work
1841 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1842 	 * IP/TCP header into a single buffer to meet the requirement of
1843 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1844 	 * which also has similiar restrictions.
1845 	 */
1846 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1847 		if (do_tso || (m_head->m_next != NULL &&
1848 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1849 			if (M_WRITABLE(*m_headp) == 0) {
1850 				m_head = m_dup(*m_headp, M_NOWAIT);
1851 				m_freem(*m_headp);
1852 				if (m_head == NULL) {
1853 					*m_headp = NULL;
1854 					return (ENOBUFS);
1855 				}
1856 				*m_headp = m_head;
1857 			}
1858 		}
1859 		/*
1860 		 * XXX
1861 		 * Assume IPv4, we don't have TSO/checksum offload support
1862 		 * for IPv6 yet.
1863 		 */
1864 		ip_off = sizeof(struct ether_header);
1865 		m_head = m_pullup(m_head, ip_off);
1866 		if (m_head == NULL) {
1867 			*m_headp = NULL;
1868 			return (ENOBUFS);
1869 		}
1870 		eh = mtod(m_head, struct ether_header *);
1871 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1872 			ip_off = sizeof(struct ether_vlan_header);
1873 			m_head = m_pullup(m_head, ip_off);
1874 			if (m_head == NULL) {
1875 				*m_headp = NULL;
1876 				return (ENOBUFS);
1877 			}
1878 		}
1879 		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1880 		if (m_head == NULL) {
1881 			*m_headp = NULL;
1882 			return (ENOBUFS);
1883 		}
1884 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1885 		poff = ip_off + (ip->ip_hl << 2);
1886 		if (do_tso) {
1887 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1888 			if (m_head == NULL) {
1889 				*m_headp = NULL;
1890 				return (ENOBUFS);
1891 			}
1892 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1893 			/*
1894 			 * TSO workaround:
1895 			 *   pull 4 more bytes of data into it.
1896 			 */
1897 			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1898 			if (m_head == NULL) {
1899 				*m_headp = NULL;
1900 				return (ENOBUFS);
1901 			}
1902 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1903 			ip->ip_len = 0;
1904 			ip->ip_sum = 0;
1905 			/*
1906 			 * The pseudo TCP checksum does not include TCP payload
1907 			 * length so driver should recompute the checksum here
1908 			 * what hardware expect to see. This is adherence of
1909 			 * Microsoft's Large Send specification.
1910 			 */
1911 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1912 			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1913 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1914 		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1915 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1916 			if (m_head == NULL) {
1917 				*m_headp = NULL;
1918 				return (ENOBUFS);
1919 			}
1920 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1921 			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1922 			if (m_head == NULL) {
1923 				*m_headp = NULL;
1924 				return (ENOBUFS);
1925 			}
1926 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1927 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1928 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1929 			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1930 			if (m_head == NULL) {
1931 				*m_headp = NULL;
1932 				return (ENOBUFS);
1933 			}
1934 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1935 		}
1936 		*m_headp = m_head;
1937 	}
1938 
1939 	/*
1940 	 * Map the packet for DMA
1941 	 *
1942 	 * Capture the first descriptor index,
1943 	 * this descriptor will have the index
1944 	 * of the EOP which is the only one that
1945 	 * now gets a DONE bit writeback.
1946 	 */
1947 	first = txr->next_avail_desc;
1948 	tx_buffer = &txr->tx_buffers[first];
1949 	tx_buffer_mapped = tx_buffer;
1950 	map = tx_buffer->map;
1951 
1952 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1953 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1954 
1955 	/*
1956 	 * There are two types of errors we can (try) to handle:
1957 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1958 	 *   out of segments.  Defragment the mbuf chain and try again.
1959 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1960 	 *   at this point in time.  Defer sending and try again later.
1961 	 * All other errors, in particular EINVAL, are fatal and prevent the
1962 	 * mbuf chain from ever going through.  Drop it and report error.
1963 	 */
1964 	if (error == EFBIG && remap) {
1965 		struct mbuf *m;
1966 
1967 		m = m_defrag(*m_headp, M_NOWAIT);
1968 		if (m == NULL) {
1969 			adapter->mbuf_alloc_failed++;
1970 			m_freem(*m_headp);
1971 			*m_headp = NULL;
1972 			return (ENOBUFS);
1973 		}
1974 		*m_headp = m;
1975 
1976 		/* Try it again, but only once */
1977 		remap = 0;
1978 		goto retry;
1979 	} else if (error == ENOMEM) {
1980 		adapter->no_tx_dma_setup++;
1981 		return (error);
1982 	} else if (error != 0) {
1983 		adapter->no_tx_dma_setup++;
1984 		m_freem(*m_headp);
1985 		*m_headp = NULL;
1986 		return (error);
1987 	}
1988 
1989 	/*
1990 	 * TSO Hardware workaround, if this packet is not
1991 	 * TSO, and is only a single descriptor long, and
1992 	 * it follows a TSO burst, then we need to add a
1993 	 * sentinel descriptor to prevent premature writeback.
1994 	 */
1995 	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1996 		if (nsegs == 1)
1997 			tso_desc = TRUE;
1998 		txr->tx_tso = FALSE;
1999 	}
2000 
2001         if (nsegs > (txr->tx_avail - 2)) {
2002                 txr->no_desc_avail++;
2003 		bus_dmamap_unload(txr->txtag, map);
2004 		return (ENOBUFS);
2005         }
2006 	m_head = *m_headp;
2007 
2008 	/* Do hardware assists */
2009 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2010 		em_tso_setup(txr, m_head, ip_off, ip, tp,
2011 		    &txd_upper, &txd_lower);
2012 		/* we need to make a final sentinel transmit desc */
2013 		tso_desc = TRUE;
2014 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2015 		em_transmit_checksum_setup(txr, m_head,
2016 		    ip_off, ip, &txd_upper, &txd_lower);
2017 
2018 	if (m_head->m_flags & M_VLANTAG) {
2019 		/* Set the vlan id. */
2020 		txd_upper |=
2021 		    (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2022                 /* Tell hardware to add tag */
2023                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2024         }
2025 
2026 	i = txr->next_avail_desc;
2027 
2028 	/* Set up our transmit descriptors */
2029 	for (j = 0; j < nsegs; j++) {
2030 		bus_size_t seg_len;
2031 		bus_addr_t seg_addr;
2032 
2033 		tx_buffer = &txr->tx_buffers[i];
2034 		ctxd = &txr->tx_base[i];
2035 		seg_addr = segs[j].ds_addr;
2036 		seg_len  = segs[j].ds_len;
2037 		/*
2038 		** TSO Workaround:
2039 		** If this is the last descriptor, we want to
2040 		** split it so we have a small final sentinel
2041 		*/
2042 		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2043 			seg_len -= 4;
2044 			ctxd->buffer_addr = htole64(seg_addr);
2045 			ctxd->lower.data = htole32(
2046 			adapter->txd_cmd | txd_lower | seg_len);
2047 			ctxd->upper.data =
2048 			    htole32(txd_upper);
2049 			if (++i == adapter->num_tx_desc)
2050 				i = 0;
2051 			/* Now make the sentinel */
2052 			++txd_used; /* using an extra txd */
2053 			ctxd = &txr->tx_base[i];
2054 			tx_buffer = &txr->tx_buffers[i];
2055 			ctxd->buffer_addr =
2056 			    htole64(seg_addr + seg_len);
2057 			ctxd->lower.data = htole32(
2058 			adapter->txd_cmd | txd_lower | 4);
2059 			ctxd->upper.data =
2060 			    htole32(txd_upper);
2061 			last = i;
2062 			if (++i == adapter->num_tx_desc)
2063 				i = 0;
2064 		} else {
2065 			ctxd->buffer_addr = htole64(seg_addr);
2066 			ctxd->lower.data = htole32(
2067 			adapter->txd_cmd | txd_lower | seg_len);
2068 			ctxd->upper.data =
2069 			    htole32(txd_upper);
2070 			last = i;
2071 			if (++i == adapter->num_tx_desc)
2072 				i = 0;
2073 		}
2074 		tx_buffer->m_head = NULL;
2075 		tx_buffer->next_eop = -1;
2076 	}
2077 
2078 	txr->next_avail_desc = i;
2079 	txr->tx_avail -= nsegs;
2080 	if (tso_desc) /* TSO used an extra for sentinel */
2081 		txr->tx_avail -= txd_used;
2082 
2083         tx_buffer->m_head = m_head;
2084 	/*
2085 	** Here we swap the map so the last descriptor,
2086 	** which gets the completion interrupt has the
2087 	** real map, and the first descriptor gets the
2088 	** unused map from this descriptor.
2089 	*/
2090 	tx_buffer_mapped->map = tx_buffer->map;
2091 	tx_buffer->map = map;
2092         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2093 
2094         /*
2095          * Last Descriptor of Packet
2096 	 * needs End Of Packet (EOP)
2097 	 * and Report Status (RS)
2098          */
2099         ctxd->lower.data |=
2100 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2101 	/*
2102 	 * Keep track in the first buffer which
2103 	 * descriptor will be written back
2104 	 */
2105 	tx_buffer = &txr->tx_buffers[first];
2106 	tx_buffer->next_eop = last;
2107 	/* Update the watchdog time early and often */
2108 	txr->watchdog_time = ticks;
2109 
2110 	/*
2111 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2112 	 * that this frame is available to transmit.
2113 	 */
2114 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2115 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2116 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2117 
2118 	return (0);
2119 }
2120 
2121 static void
2122 em_set_promisc(struct adapter *adapter)
2123 {
2124 	struct ifnet	*ifp = adapter->ifp;
2125 	u32		reg_rctl;
2126 
2127 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2128 
2129 	if (ifp->if_flags & IFF_PROMISC) {
2130 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2131 		/* Turn this on if you want to see bad packets */
2132 		if (em_debug_sbp)
2133 			reg_rctl |= E1000_RCTL_SBP;
2134 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2135 	} else if (ifp->if_flags & IFF_ALLMULTI) {
2136 		reg_rctl |= E1000_RCTL_MPE;
2137 		reg_rctl &= ~E1000_RCTL_UPE;
2138 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2139 	}
2140 }
2141 
2142 static void
2143 em_disable_promisc(struct adapter *adapter)
2144 {
2145 	struct ifnet	*ifp = adapter->ifp;
2146 	u32		reg_rctl;
2147 	int		mcnt = 0;
2148 
2149 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2150 	reg_rctl &=  (~E1000_RCTL_UPE);
2151 	if (ifp->if_flags & IFF_ALLMULTI)
2152 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2153 	else {
2154 		struct  ifmultiaddr *ifma;
2155 #if __FreeBSD_version < 800000
2156 		IF_ADDR_LOCK(ifp);
2157 #else
2158 		if_maddr_rlock(ifp);
2159 #endif
2160 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2161 			if (ifma->ifma_addr->sa_family != AF_LINK)
2162 				continue;
2163 			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2164 				break;
2165 			mcnt++;
2166 		}
2167 #if __FreeBSD_version < 800000
2168 		IF_ADDR_UNLOCK(ifp);
2169 #else
2170 		if_maddr_runlock(ifp);
2171 #endif
2172 	}
2173 	/* Don't disable if in MAX groups */
2174 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2175 		reg_rctl &=  (~E1000_RCTL_MPE);
2176 	reg_rctl &=  (~E1000_RCTL_SBP);
2177 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2178 }
2179 
2180 
2181 /*********************************************************************
2182  *  Multicast Update
2183  *
2184  *  This routine is called whenever multicast address list is updated.
2185  *
2186  **********************************************************************/
2187 
2188 static void
2189 em_set_multi(struct adapter *adapter)
2190 {
2191 	struct ifnet	*ifp = adapter->ifp;
2192 	struct ifmultiaddr *ifma;
2193 	u32 reg_rctl = 0;
2194 	u8  *mta; /* Multicast array memory */
2195 	int mcnt = 0;
2196 
2197 	IOCTL_DEBUGOUT("em_set_multi: begin");
2198 
2199 	mta = adapter->mta;
2200 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2201 
2202 	if (adapter->hw.mac.type == e1000_82542 &&
2203 	    adapter->hw.revision_id == E1000_REVISION_2) {
2204 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2205 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2206 			e1000_pci_clear_mwi(&adapter->hw);
2207 		reg_rctl |= E1000_RCTL_RST;
2208 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2209 		msec_delay(5);
2210 	}
2211 
2212 #if __FreeBSD_version < 800000
2213 	IF_ADDR_LOCK(ifp);
2214 #else
2215 	if_maddr_rlock(ifp);
2216 #endif
2217 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2218 		if (ifma->ifma_addr->sa_family != AF_LINK)
2219 			continue;
2220 
2221 		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2222 			break;
2223 
2224 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2225 		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2226 		mcnt++;
2227 	}
2228 #if __FreeBSD_version < 800000
2229 	IF_ADDR_UNLOCK(ifp);
2230 #else
2231 	if_maddr_runlock(ifp);
2232 #endif
2233 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2234 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2235 		reg_rctl |= E1000_RCTL_MPE;
2236 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2237 	} else
2238 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2239 
2240 	if (adapter->hw.mac.type == e1000_82542 &&
2241 	    adapter->hw.revision_id == E1000_REVISION_2) {
2242 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2243 		reg_rctl &= ~E1000_RCTL_RST;
2244 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2245 		msec_delay(5);
2246 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2247 			e1000_pci_set_mwi(&adapter->hw);
2248 	}
2249 }
2250 
2251 
2252 /*********************************************************************
2253  *  Timer routine
2254  *
2255  *  This routine checks for link status and updates statistics.
2256  *
2257  **********************************************************************/
2258 
2259 static void
2260 em_local_timer(void *arg)
2261 {
2262 	struct adapter	*adapter = arg;
2263 	struct ifnet	*ifp = adapter->ifp;
2264 	struct tx_ring	*txr = adapter->tx_rings;
2265 	struct rx_ring	*rxr = adapter->rx_rings;
2266 	u32		trigger;
2267 
2268 	EM_CORE_LOCK_ASSERT(adapter);
2269 
2270 	em_update_link_status(adapter);
2271 	em_update_stats_counters(adapter);
2272 
2273 	/* Reset LAA into RAR[0] on 82571 */
2274 	if ((adapter->hw.mac.type == e1000_82571) &&
2275 	    e1000_get_laa_state_82571(&adapter->hw))
2276 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2277 
2278 	/* Mask to use in the irq trigger */
2279 	if (adapter->msix_mem)
2280 		trigger = rxr->ims;
2281 	else
2282 		trigger = E1000_ICS_RXDMT0;
2283 
2284 	/*
2285 	** Check on the state of the TX queue(s), this
2286 	** can be done without the lock because its RO
2287 	** and the HUNG state will be static if set.
2288 	*/
2289 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2290 		if ((txr->queue_status == EM_QUEUE_HUNG) &&
2291 		    (adapter->pause_frames == 0))
2292 			goto hung;
2293 		/* Schedule a TX tasklet if needed */
2294 		if (txr->tx_avail <= EM_MAX_SCATTER)
2295 			taskqueue_enqueue(txr->tq, &txr->tx_task);
2296 	}
2297 
2298 	adapter->pause_frames = 0;
2299 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2300 #ifndef DEVICE_POLLING
2301 	/* Trigger an RX interrupt to guarantee mbuf refresh */
2302 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2303 #endif
2304 	return;
2305 hung:
2306 	/* Looks like we're hung */
2307 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2308 	device_printf(adapter->dev,
2309 	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2310 	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2311 	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2312 	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2313 	    "Next TX to Clean = %d\n",
2314 	    txr->me, txr->tx_avail, txr->next_to_clean);
2315 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2316 	adapter->watchdog_events++;
2317 	adapter->pause_frames = 0;
2318 	em_init_locked(adapter);
2319 }
2320 
2321 
2322 static void
2323 em_update_link_status(struct adapter *adapter)
2324 {
2325 	struct e1000_hw *hw = &adapter->hw;
2326 	struct ifnet *ifp = adapter->ifp;
2327 	device_t dev = adapter->dev;
2328 	struct tx_ring *txr = adapter->tx_rings;
2329 	u32 link_check = 0;
2330 
2331 	/* Get the cached link value or read phy for real */
2332 	switch (hw->phy.media_type) {
2333 	case e1000_media_type_copper:
2334 		if (hw->mac.get_link_status) {
2335 			/* Do the work to read phy */
2336 			e1000_check_for_link(hw);
2337 			link_check = !hw->mac.get_link_status;
2338 			if (link_check) /* ESB2 fix */
2339 				e1000_cfg_on_link_up(hw);
2340 		} else
2341 			link_check = TRUE;
2342 		break;
2343 	case e1000_media_type_fiber:
2344 		e1000_check_for_link(hw);
2345 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2346                                  E1000_STATUS_LU);
2347 		break;
2348 	case e1000_media_type_internal_serdes:
2349 		e1000_check_for_link(hw);
2350 		link_check = adapter->hw.mac.serdes_has_link;
2351 		break;
2352 	default:
2353 	case e1000_media_type_unknown:
2354 		break;
2355 	}
2356 
2357 	/* Now check for a transition */
2358 	if (link_check && (adapter->link_active == 0)) {
2359 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2360 		    &adapter->link_duplex);
2361 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2362 		if ((adapter->link_speed != SPEED_1000) &&
2363 		    ((hw->mac.type == e1000_82571) ||
2364 		    (hw->mac.type == e1000_82572))) {
2365 			int tarc0;
2366 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2367 			tarc0 &= ~SPEED_MODE_BIT;
2368 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2369 		}
2370 		if (bootverbose)
2371 			device_printf(dev, "Link is up %d Mbps %s\n",
2372 			    adapter->link_speed,
2373 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2374 			    "Full Duplex" : "Half Duplex"));
2375 		adapter->link_active = 1;
2376 		adapter->smartspeed = 0;
2377 		ifp->if_baudrate = adapter->link_speed * 1000000;
2378 		if_link_state_change(ifp, LINK_STATE_UP);
2379 	} else if (!link_check && (adapter->link_active == 1)) {
2380 		ifp->if_baudrate = adapter->link_speed = 0;
2381 		adapter->link_duplex = 0;
2382 		if (bootverbose)
2383 			device_printf(dev, "Link is Down\n");
2384 		adapter->link_active = 0;
2385 		/* Link down, disable watchdog */
2386 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2387 			txr->queue_status = EM_QUEUE_IDLE;
2388 		if_link_state_change(ifp, LINK_STATE_DOWN);
2389 	}
2390 }
2391 
2392 /*********************************************************************
2393  *
2394  *  This routine disables all traffic on the adapter by issuing a
2395  *  global reset on the MAC and deallocates TX/RX buffers.
2396  *
2397  *  This routine should always be called with BOTH the CORE
2398  *  and TX locks.
2399  **********************************************************************/
2400 
2401 static void
2402 em_stop(void *arg)
2403 {
2404 	struct adapter	*adapter = arg;
2405 	struct ifnet	*ifp = adapter->ifp;
2406 	struct tx_ring	*txr = adapter->tx_rings;
2407 
2408 	EM_CORE_LOCK_ASSERT(adapter);
2409 
2410 	INIT_DEBUGOUT("em_stop: begin");
2411 
2412 	em_disable_intr(adapter);
2413 	callout_stop(&adapter->timer);
2414 
2415 	/* Tell the stack that the interface is no longer active */
2416 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2417 	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2418 
2419         /* Unarm watchdog timer. */
2420 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2421 		EM_TX_LOCK(txr);
2422 		txr->queue_status = EM_QUEUE_IDLE;
2423 		EM_TX_UNLOCK(txr);
2424 	}
2425 
2426 	e1000_reset_hw(&adapter->hw);
2427 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2428 
2429 	e1000_led_off(&adapter->hw);
2430 	e1000_cleanup_led(&adapter->hw);
2431 }
2432 
2433 
2434 /*********************************************************************
2435  *
2436  *  Determine hardware revision.
2437  *
2438  **********************************************************************/
2439 static void
2440 em_identify_hardware(struct adapter *adapter)
2441 {
2442 	device_t dev = adapter->dev;
2443 
2444 	/* Make sure our PCI config space has the necessary stuff set */
2445 	pci_enable_busmaster(dev);
2446 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2447 
2448 	/* Save off the information about this board */
2449 	adapter->hw.vendor_id = pci_get_vendor(dev);
2450 	adapter->hw.device_id = pci_get_device(dev);
2451 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2452 	adapter->hw.subsystem_vendor_id =
2453 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2454 	adapter->hw.subsystem_device_id =
2455 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2456 
2457 	/* Do Shared Code Init and Setup */
2458 	if (e1000_set_mac_type(&adapter->hw)) {
2459 		device_printf(dev, "Setup init failure\n");
2460 		return;
2461 	}
2462 }
2463 
2464 static int
2465 em_allocate_pci_resources(struct adapter *adapter)
2466 {
2467 	device_t	dev = adapter->dev;
2468 	int		rid;
2469 
2470 	rid = PCIR_BAR(0);
2471 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2472 	    &rid, RF_ACTIVE);
2473 	if (adapter->memory == NULL) {
2474 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2475 		return (ENXIO);
2476 	}
2477 	adapter->osdep.mem_bus_space_tag =
2478 	    rman_get_bustag(adapter->memory);
2479 	adapter->osdep.mem_bus_space_handle =
2480 	    rman_get_bushandle(adapter->memory);
2481 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2482 
2483 	/* Default to a single queue */
2484 	adapter->num_queues = 1;
2485 
2486 	/*
2487 	 * Setup MSI/X or MSI if PCI Express
2488 	 */
2489 	adapter->msix = em_setup_msix(adapter);
2490 
2491 	adapter->hw.back = &adapter->osdep;
2492 
2493 	return (0);
2494 }
2495 
2496 /*********************************************************************
2497  *
2498  *  Setup the Legacy or MSI Interrupt handler
2499  *
2500  **********************************************************************/
2501 int
2502 em_allocate_legacy(struct adapter *adapter)
2503 {
2504 	device_t dev = adapter->dev;
2505 	struct tx_ring	*txr = adapter->tx_rings;
2506 	int error, rid = 0;
2507 
2508 	/* Manually turn off all interrupts */
2509 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2510 
2511 	if (adapter->msix == 1) /* using MSI */
2512 		rid = 1;
2513 	/* We allocate a single interrupt resource */
2514 	adapter->res = bus_alloc_resource_any(dev,
2515 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2516 	if (adapter->res == NULL) {
2517 		device_printf(dev, "Unable to allocate bus resource: "
2518 		    "interrupt\n");
2519 		return (ENXIO);
2520 	}
2521 
2522 	/*
2523 	 * Allocate a fast interrupt and the associated
2524 	 * deferred processing contexts.
2525 	 */
2526 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2527 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2528 	    taskqueue_thread_enqueue, &adapter->tq);
2529 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2530 	    device_get_nameunit(adapter->dev));
2531 	/* Use a TX only tasklet for local timer */
2532 	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2533 	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2534 	    taskqueue_thread_enqueue, &txr->tq);
2535 	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2536 	    device_get_nameunit(adapter->dev));
2537 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2538 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2539 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2540 		device_printf(dev, "Failed to register fast interrupt "
2541 			    "handler: %d\n", error);
2542 		taskqueue_free(adapter->tq);
2543 		adapter->tq = NULL;
2544 		return (error);
2545 	}
2546 
2547 	return (0);
2548 }
2549 
2550 /*********************************************************************
2551  *
2552  *  Setup the MSIX Interrupt handlers
2553  *   This is not really Multiqueue, rather
2554  *   its just seperate interrupt vectors
2555  *   for TX, RX, and Link.
2556  *
2557  **********************************************************************/
2558 int
2559 em_allocate_msix(struct adapter *adapter)
2560 {
2561 	device_t	dev = adapter->dev;
2562 	struct		tx_ring *txr = adapter->tx_rings;
2563 	struct		rx_ring *rxr = adapter->rx_rings;
2564 	int		error, rid, vector = 0;
2565 
2566 
2567 	/* Make sure all interrupts are disabled */
2568 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2569 
2570 	/* First set up ring resources */
2571 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2572 
2573 		/* RX ring */
2574 		rid = vector + 1;
2575 
2576 		rxr->res = bus_alloc_resource_any(dev,
2577 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2578 		if (rxr->res == NULL) {
2579 			device_printf(dev,
2580 			    "Unable to allocate bus resource: "
2581 			    "RX MSIX Interrupt %d\n", i);
2582 			return (ENXIO);
2583 		}
2584 		if ((error = bus_setup_intr(dev, rxr->res,
2585 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2586 		    rxr, &rxr->tag)) != 0) {
2587 			device_printf(dev, "Failed to register RX handler");
2588 			return (error);
2589 		}
2590 #if __FreeBSD_version >= 800504
2591 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2592 #endif
2593 		rxr->msix = vector++; /* NOTE increment vector for TX */
2594 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2595 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2596 		    taskqueue_thread_enqueue, &rxr->tq);
2597 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2598 		    device_get_nameunit(adapter->dev));
2599 		/*
2600 		** Set the bit to enable interrupt
2601 		** in E1000_IMS -- bits 20 and 21
2602 		** are for RX0 and RX1, note this has
2603 		** NOTHING to do with the MSIX vector
2604 		*/
2605 		rxr->ims = 1 << (20 + i);
2606 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2607 
2608 		/* TX ring */
2609 		rid = vector + 1;
2610 		txr->res = bus_alloc_resource_any(dev,
2611 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2612 		if (txr->res == NULL) {
2613 			device_printf(dev,
2614 			    "Unable to allocate bus resource: "
2615 			    "TX MSIX Interrupt %d\n", i);
2616 			return (ENXIO);
2617 		}
2618 		if ((error = bus_setup_intr(dev, txr->res,
2619 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2620 		    txr, &txr->tag)) != 0) {
2621 			device_printf(dev, "Failed to register TX handler");
2622 			return (error);
2623 		}
2624 #if __FreeBSD_version >= 800504
2625 		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2626 #endif
2627 		txr->msix = vector++; /* Increment vector for next pass */
2628 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2629 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2630 		    taskqueue_thread_enqueue, &txr->tq);
2631 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2632 		    device_get_nameunit(adapter->dev));
2633 		/*
2634 		** Set the bit to enable interrupt
2635 		** in E1000_IMS -- bits 22 and 23
2636 		** are for TX0 and TX1, note this has
2637 		** NOTHING to do with the MSIX vector
2638 		*/
2639 		txr->ims = 1 << (22 + i);
2640 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2641 	}
2642 
2643 	/* Link interrupt */
2644 	++rid;
2645 	adapter->res = bus_alloc_resource_any(dev,
2646 	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2647 	if (!adapter->res) {
2648 		device_printf(dev,"Unable to allocate "
2649 		    "bus resource: Link interrupt [%d]\n", rid);
2650 		return (ENXIO);
2651         }
2652 	/* Set the link handler function */
2653 	error = bus_setup_intr(dev, adapter->res,
2654 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2655 	    em_msix_link, adapter, &adapter->tag);
2656 	if (error) {
2657 		adapter->res = NULL;
2658 		device_printf(dev, "Failed to register LINK handler");
2659 		return (error);
2660 	}
2661 #if __FreeBSD_version >= 800504
2662 		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2663 #endif
2664 	adapter->linkvec = vector;
2665 	adapter->ivars |=  (8 | vector) << 16;
2666 	adapter->ivars |= 0x80000000;
2667 
2668 	return (0);
2669 }
2670 
2671 
2672 static void
2673 em_free_pci_resources(struct adapter *adapter)
2674 {
2675 	device_t	dev = adapter->dev;
2676 	struct tx_ring	*txr;
2677 	struct rx_ring	*rxr;
2678 	int		rid;
2679 
2680 
2681 	/*
2682 	** Release all the queue interrupt resources:
2683 	*/
2684 	for (int i = 0; i < adapter->num_queues; i++) {
2685 		txr = &adapter->tx_rings[i];
2686 		rxr = &adapter->rx_rings[i];
2687 		/* an early abort? */
2688 		if ((txr == NULL) || (rxr == NULL))
2689 			break;
2690 		rid = txr->msix +1;
2691 		if (txr->tag != NULL) {
2692 			bus_teardown_intr(dev, txr->res, txr->tag);
2693 			txr->tag = NULL;
2694 		}
2695 		if (txr->res != NULL)
2696 			bus_release_resource(dev, SYS_RES_IRQ,
2697 			    rid, txr->res);
2698 		rid = rxr->msix +1;
2699 		if (rxr->tag != NULL) {
2700 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2701 			rxr->tag = NULL;
2702 		}
2703 		if (rxr->res != NULL)
2704 			bus_release_resource(dev, SYS_RES_IRQ,
2705 			    rid, rxr->res);
2706 	}
2707 
2708         if (adapter->linkvec) /* we are doing MSIX */
2709                 rid = adapter->linkvec + 1;
2710         else
2711                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2712 
2713 	if (adapter->tag != NULL) {
2714 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2715 		adapter->tag = NULL;
2716 	}
2717 
2718 	if (adapter->res != NULL)
2719 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2720 
2721 
2722 	if (adapter->msix)
2723 		pci_release_msi(dev);
2724 
2725 	if (adapter->msix_mem != NULL)
2726 		bus_release_resource(dev, SYS_RES_MEMORY,
2727 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2728 
2729 	if (adapter->memory != NULL)
2730 		bus_release_resource(dev, SYS_RES_MEMORY,
2731 		    PCIR_BAR(0), adapter->memory);
2732 
2733 	if (adapter->flash != NULL)
2734 		bus_release_resource(dev, SYS_RES_MEMORY,
2735 		    EM_FLASH, adapter->flash);
2736 }
2737 
2738 /*
2739  * Setup MSI or MSI/X
2740  */
2741 static int
2742 em_setup_msix(struct adapter *adapter)
2743 {
2744 	device_t dev = adapter->dev;
2745 	int val;
2746 
2747 	/*
2748 	** Setup MSI/X for Hartwell: tests have shown
2749 	** use of two queues to be unstable, and to
2750 	** provide no great gain anyway, so we simply
2751 	** seperate the interrupts and use a single queue.
2752 	*/
2753 	if ((adapter->hw.mac.type == e1000_82574) &&
2754 	    (em_enable_msix == TRUE)) {
2755 		/* Map the MSIX BAR */
2756 		int rid = PCIR_BAR(EM_MSIX_BAR);
2757 		adapter->msix_mem = bus_alloc_resource_any(dev,
2758 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2759        		if (adapter->msix_mem == NULL) {
2760 			/* May not be enabled */
2761                		device_printf(adapter->dev,
2762 			    "Unable to map MSIX table \n");
2763 			goto msi;
2764        		}
2765 		val = pci_msix_count(dev);
2766 		/* We only need/want 3 vectors */
2767 		if (val >= 3)
2768 			val = 3;
2769 		else {
2770                		device_printf(adapter->dev,
2771 			    "MSIX: insufficient vectors, using MSI\n");
2772 			goto msi;
2773 		}
2774 
2775 		if ((pci_alloc_msix(dev, &val) == 0) && (val == 3)) {
2776 			device_printf(adapter->dev,
2777 			    "Using MSIX interrupts "
2778 			    "with %d vectors\n", val);
2779 			return (val);
2780 		}
2781 
2782 		/*
2783 		** If MSIX alloc failed or provided us with
2784 		** less than needed, free and fall through to MSI
2785 		*/
2786 		pci_release_msi(dev);
2787 	}
2788 msi:
2789 	if (adapter->msix_mem != NULL) {
2790 		bus_release_resource(dev, SYS_RES_MEMORY,
2791 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2792 		adapter->msix_mem = NULL;
2793 	}
2794        	val = 1;
2795        	if (pci_alloc_msi(dev, &val) == 0) {
2796                	device_printf(adapter->dev,"Using an MSI interrupt\n");
2797 		return (val);
2798 	}
2799 	/* Should only happen due to manual configuration */
2800 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2801 	return (0);
2802 }
2803 
2804 
2805 /*********************************************************************
2806  *
2807  *  Initialize the hardware to a configuration
2808  *  as specified by the adapter structure.
2809  *
2810  **********************************************************************/
2811 static void
2812 em_reset(struct adapter *adapter)
2813 {
2814 	device_t	dev = adapter->dev;
2815 	struct ifnet	*ifp = adapter->ifp;
2816 	struct e1000_hw	*hw = &adapter->hw;
2817 	u16		rx_buffer_size;
2818 	u32		pba;
2819 
2820 	INIT_DEBUGOUT("em_reset: begin");
2821 
2822 	/* Set up smart power down as default off on newer adapters. */
2823 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2824 	    hw->mac.type == e1000_82572)) {
2825 		u16 phy_tmp = 0;
2826 
2827 		/* Speed up time to link by disabling smart power down. */
2828 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2829 		phy_tmp &= ~IGP02E1000_PM_SPD;
2830 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2831 	}
2832 
2833 	/*
2834 	 * Packet Buffer Allocation (PBA)
2835 	 * Writing PBA sets the receive portion of the buffer
2836 	 * the remainder is used for the transmit buffer.
2837 	 */
2838 	switch (hw->mac.type) {
2839 	/* Total Packet Buffer on these is 48K */
2840 	case e1000_82571:
2841 	case e1000_82572:
2842 	case e1000_80003es2lan:
2843 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2844 		break;
2845 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2846 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2847 		break;
2848 	case e1000_82574:
2849 	case e1000_82583:
2850 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2851 		break;
2852 	case e1000_ich8lan:
2853 		pba = E1000_PBA_8K;
2854 		break;
2855 	case e1000_ich9lan:
2856 	case e1000_ich10lan:
2857 		/* Boost Receive side for jumbo frames */
2858 		if (adapter->hw.mac.max_frame_size > 4096)
2859 			pba = E1000_PBA_14K;
2860 		else
2861 			pba = E1000_PBA_10K;
2862 		break;
2863 	case e1000_pchlan:
2864 	case e1000_pch2lan:
2865 	case e1000_pch_lpt:
2866 		pba = E1000_PBA_26K;
2867 		break;
2868 	default:
2869 		if (adapter->hw.mac.max_frame_size > 8192)
2870 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2871 		else
2872 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2873 	}
2874 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2875 
2876 	/*
2877 	 * These parameters control the automatic generation (Tx) and
2878 	 * response (Rx) to Ethernet PAUSE frames.
2879 	 * - High water mark should allow for at least two frames to be
2880 	 *   received after sending an XOFF.
2881 	 * - Low water mark works best when it is very near the high water mark.
2882 	 *   This allows the receiver to restart by sending XON when it has
2883 	 *   drained a bit. Here we use an arbitary value of 1500 which will
2884 	 *   restart after one full frame is pulled from the buffer. There
2885 	 *   could be several smaller frames in the buffer and if so they will
2886 	 *   not trigger the XON until their total number reduces the buffer
2887 	 *   by 1500.
2888 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2889 	 */
2890 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2891 	hw->fc.high_water = rx_buffer_size -
2892 	    roundup2(adapter->hw.mac.max_frame_size, 1024);
2893 	hw->fc.low_water = hw->fc.high_water - 1500;
2894 
2895 	if (adapter->fc) /* locally set flow control value? */
2896 		hw->fc.requested_mode = adapter->fc;
2897 	else
2898 		hw->fc.requested_mode = e1000_fc_full;
2899 
2900 	if (hw->mac.type == e1000_80003es2lan)
2901 		hw->fc.pause_time = 0xFFFF;
2902 	else
2903 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2904 
2905 	hw->fc.send_xon = TRUE;
2906 
2907 	/* Device specific overrides/settings */
2908 	switch (hw->mac.type) {
2909 	case e1000_pchlan:
2910 		/* Workaround: no TX flow ctrl for PCH */
2911                 hw->fc.requested_mode = e1000_fc_rx_pause;
2912 		hw->fc.pause_time = 0xFFFF; /* override */
2913 		if (ifp->if_mtu > ETHERMTU) {
2914 			hw->fc.high_water = 0x3500;
2915 			hw->fc.low_water = 0x1500;
2916 		} else {
2917 			hw->fc.high_water = 0x5000;
2918 			hw->fc.low_water = 0x3000;
2919 		}
2920 		hw->fc.refresh_time = 0x1000;
2921 		break;
2922 	case e1000_pch2lan:
2923 	case e1000_pch_lpt:
2924 		hw->fc.high_water = 0x5C20;
2925 		hw->fc.low_water = 0x5048;
2926 		hw->fc.pause_time = 0x0650;
2927 		hw->fc.refresh_time = 0x0400;
2928 		/* Jumbos need adjusted PBA */
2929 		if (ifp->if_mtu > ETHERMTU)
2930 			E1000_WRITE_REG(hw, E1000_PBA, 12);
2931 		else
2932 			E1000_WRITE_REG(hw, E1000_PBA, 26);
2933 		break;
2934         case e1000_ich9lan:
2935         case e1000_ich10lan:
2936 		if (ifp->if_mtu > ETHERMTU) {
2937 			hw->fc.high_water = 0x2800;
2938 			hw->fc.low_water = hw->fc.high_water - 8;
2939 			break;
2940 		}
2941 		/* else fall thru */
2942 	default:
2943 		if (hw->mac.type == e1000_80003es2lan)
2944 			hw->fc.pause_time = 0xFFFF;
2945 		break;
2946 	}
2947 
2948 	/* Issue a global reset */
2949 	e1000_reset_hw(hw);
2950 	E1000_WRITE_REG(hw, E1000_WUC, 0);
2951 	em_disable_aspm(adapter);
2952 	/* and a re-init */
2953 	if (e1000_init_hw(hw) < 0) {
2954 		device_printf(dev, "Hardware Initialization Failed\n");
2955 		return;
2956 	}
2957 
2958 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2959 	e1000_get_phy_info(hw);
2960 	e1000_check_for_link(hw);
2961 	return;
2962 }
2963 
2964 /*********************************************************************
2965  *
2966  *  Setup networking device structure and register an interface.
2967  *
2968  **********************************************************************/
2969 static int
2970 em_setup_interface(device_t dev, struct adapter *adapter)
2971 {
2972 	struct ifnet   *ifp;
2973 
2974 	INIT_DEBUGOUT("em_setup_interface: begin");
2975 
2976 	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2977 	if (ifp == NULL) {
2978 		device_printf(dev, "can not allocate ifnet structure\n");
2979 		return (-1);
2980 	}
2981 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2982 	ifp->if_init =  em_init;
2983 	ifp->if_softc = adapter;
2984 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2985 	ifp->if_ioctl = em_ioctl;
2986 #ifdef EM_MULTIQUEUE
2987 	/* Multiqueue stack interface */
2988 	ifp->if_transmit = em_mq_start;
2989 	ifp->if_qflush = em_qflush;
2990 #else
2991 	ifp->if_start = em_start;
2992 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2993 	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2994 	IFQ_SET_READY(&ifp->if_snd);
2995 #endif
2996 
2997 	ether_ifattach(ifp, adapter->hw.mac.addr);
2998 
2999 	ifp->if_capabilities = ifp->if_capenable = 0;
3000 
3001 
3002 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3003 	ifp->if_capabilities |= IFCAP_TSO4;
3004 	/*
3005 	 * Tell the upper layer(s) we
3006 	 * support full VLAN capability
3007 	 */
3008 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3009 	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3010 			     |  IFCAP_VLAN_HWTSO
3011 			     |  IFCAP_VLAN_MTU;
3012 	ifp->if_capenable = ifp->if_capabilities;
3013 
3014 	/*
3015 	** Don't turn this on by default, if vlans are
3016 	** created on another pseudo device (eg. lagg)
3017 	** then vlan events are not passed thru, breaking
3018 	** operation, but with HW FILTER off it works. If
3019 	** using vlans directly on the em driver you can
3020 	** enable this and get full hardware tag filtering.
3021 	*/
3022 	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3023 
3024 #ifdef DEVICE_POLLING
3025 	ifp->if_capabilities |= IFCAP_POLLING;
3026 #endif
3027 
3028 	/* Enable only WOL MAGIC by default */
3029 	if (adapter->wol) {
3030 		ifp->if_capabilities |= IFCAP_WOL;
3031 		ifp->if_capenable |= IFCAP_WOL_MAGIC;
3032 	}
3033 
3034 	/*
3035 	 * Specify the media types supported by this adapter and register
3036 	 * callbacks to update media and link information
3037 	 */
3038 	ifmedia_init(&adapter->media, IFM_IMASK,
3039 	    em_media_change, em_media_status);
3040 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3041 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3042 		u_char fiber_type = IFM_1000_SX;	/* default type */
3043 
3044 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3045 			    0, NULL);
3046 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3047 	} else {
3048 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3049 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3050 			    0, NULL);
3051 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3052 			    0, NULL);
3053 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3054 			    0, NULL);
3055 		if (adapter->hw.phy.type != e1000_phy_ife) {
3056 			ifmedia_add(&adapter->media,
3057 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3058 			ifmedia_add(&adapter->media,
3059 				IFM_ETHER | IFM_1000_T, 0, NULL);
3060 		}
3061 	}
3062 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3063 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3064 	return (0);
3065 }
3066 
3067 
3068 /*
3069  * Manage DMA'able memory.
3070  */
3071 static void
3072 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3073 {
3074 	if (error)
3075 		return;
3076 	*(bus_addr_t *) arg = segs[0].ds_addr;
3077 }
3078 
3079 static int
3080 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3081         struct em_dma_alloc *dma, int mapflags)
3082 {
3083 	int error;
3084 
3085 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3086 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3087 				BUS_SPACE_MAXADDR,	/* lowaddr */
3088 				BUS_SPACE_MAXADDR,	/* highaddr */
3089 				NULL, NULL,		/* filter, filterarg */
3090 				size,			/* maxsize */
3091 				1,			/* nsegments */
3092 				size,			/* maxsegsize */
3093 				0,			/* flags */
3094 				NULL,			/* lockfunc */
3095 				NULL,			/* lockarg */
3096 				&dma->dma_tag);
3097 	if (error) {
3098 		device_printf(adapter->dev,
3099 		    "%s: bus_dma_tag_create failed: %d\n",
3100 		    __func__, error);
3101 		goto fail_0;
3102 	}
3103 
3104 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3105 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3106 	if (error) {
3107 		device_printf(adapter->dev,
3108 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3109 		    __func__, (uintmax_t)size, error);
3110 		goto fail_2;
3111 	}
3112 
3113 	dma->dma_paddr = 0;
3114 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3115 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3116 	if (error || dma->dma_paddr == 0) {
3117 		device_printf(adapter->dev,
3118 		    "%s: bus_dmamap_load failed: %d\n",
3119 		    __func__, error);
3120 		goto fail_3;
3121 	}
3122 
3123 	return (0);
3124 
3125 fail_3:
3126 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3127 fail_2:
3128 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3129 	bus_dma_tag_destroy(dma->dma_tag);
3130 fail_0:
3131 	dma->dma_map = NULL;
3132 	dma->dma_tag = NULL;
3133 
3134 	return (error);
3135 }
3136 
3137 static void
3138 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3139 {
3140 	if (dma->dma_tag == NULL)
3141 		return;
3142 	if (dma->dma_map != NULL) {
3143 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3144 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3145 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3146 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3147 		dma->dma_map = NULL;
3148 	}
3149 	bus_dma_tag_destroy(dma->dma_tag);
3150 	dma->dma_tag = NULL;
3151 }
3152 
3153 
3154 /*********************************************************************
3155  *
3156  *  Allocate memory for the transmit and receive rings, and then
3157  *  the descriptors associated with each, called only once at attach.
3158  *
3159  **********************************************************************/
3160 static int
3161 em_allocate_queues(struct adapter *adapter)
3162 {
3163 	device_t		dev = adapter->dev;
3164 	struct tx_ring		*txr = NULL;
3165 	struct rx_ring		*rxr = NULL;
3166 	int rsize, tsize, error = E1000_SUCCESS;
3167 	int txconf = 0, rxconf = 0;
3168 
3169 
3170 	/* Allocate the TX ring struct memory */
3171 	if (!(adapter->tx_rings =
3172 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3173 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3174 		device_printf(dev, "Unable to allocate TX ring memory\n");
3175 		error = ENOMEM;
3176 		goto fail;
3177 	}
3178 
3179 	/* Now allocate the RX */
3180 	if (!(adapter->rx_rings =
3181 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3182 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3183 		device_printf(dev, "Unable to allocate RX ring memory\n");
3184 		error = ENOMEM;
3185 		goto rx_fail;
3186 	}
3187 
3188 	tsize = roundup2(adapter->num_tx_desc *
3189 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3190 	/*
3191 	 * Now set up the TX queues, txconf is needed to handle the
3192 	 * possibility that things fail midcourse and we need to
3193 	 * undo memory gracefully
3194 	 */
3195 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3196 		/* Set up some basics */
3197 		txr = &adapter->tx_rings[i];
3198 		txr->adapter = adapter;
3199 		txr->me = i;
3200 
3201 		/* Initialize the TX lock */
3202 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3203 		    device_get_nameunit(dev), txr->me);
3204 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3205 
3206 		if (em_dma_malloc(adapter, tsize,
3207 			&txr->txdma, BUS_DMA_NOWAIT)) {
3208 			device_printf(dev,
3209 			    "Unable to allocate TX Descriptor memory\n");
3210 			error = ENOMEM;
3211 			goto err_tx_desc;
3212 		}
3213 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3214 		bzero((void *)txr->tx_base, tsize);
3215 
3216         	if (em_allocate_transmit_buffers(txr)) {
3217 			device_printf(dev,
3218 			    "Critical Failure setting up transmit buffers\n");
3219 			error = ENOMEM;
3220 			goto err_tx_desc;
3221         	}
3222 #if __FreeBSD_version >= 800000
3223 		/* Allocate a buf ring */
3224 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3225 		    M_WAITOK, &txr->tx_mtx);
3226 #endif
3227 	}
3228 
3229 	/*
3230 	 * Next the RX queues...
3231 	 */
3232 	rsize = roundup2(adapter->num_rx_desc *
3233 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3234 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3235 		rxr = &adapter->rx_rings[i];
3236 		rxr->adapter = adapter;
3237 		rxr->me = i;
3238 
3239 		/* Initialize the RX lock */
3240 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3241 		    device_get_nameunit(dev), txr->me);
3242 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3243 
3244 		if (em_dma_malloc(adapter, rsize,
3245 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3246 			device_printf(dev,
3247 			    "Unable to allocate RxDescriptor memory\n");
3248 			error = ENOMEM;
3249 			goto err_rx_desc;
3250 		}
3251 		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3252 		bzero((void *)rxr->rx_base, rsize);
3253 
3254         	/* Allocate receive buffers for the ring*/
3255 		if (em_allocate_receive_buffers(rxr)) {
3256 			device_printf(dev,
3257 			    "Critical Failure setting up receive buffers\n");
3258 			error = ENOMEM;
3259 			goto err_rx_desc;
3260 		}
3261 	}
3262 
3263 	return (0);
3264 
3265 err_rx_desc:
3266 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3267 		em_dma_free(adapter, &rxr->rxdma);
3268 err_tx_desc:
3269 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3270 		em_dma_free(adapter, &txr->txdma);
3271 	free(adapter->rx_rings, M_DEVBUF);
3272 rx_fail:
3273 #if __FreeBSD_version >= 800000
3274 	buf_ring_free(txr->br, M_DEVBUF);
3275 #endif
3276 	free(adapter->tx_rings, M_DEVBUF);
3277 fail:
3278 	return (error);
3279 }
3280 
3281 
3282 /*********************************************************************
3283  *
3284  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3285  *  the information needed to transmit a packet on the wire. This is
3286  *  called only once at attach, setup is done every reset.
3287  *
3288  **********************************************************************/
3289 static int
3290 em_allocate_transmit_buffers(struct tx_ring *txr)
3291 {
3292 	struct adapter *adapter = txr->adapter;
3293 	device_t dev = adapter->dev;
3294 	struct em_buffer *txbuf;
3295 	int error, i;
3296 
3297 	/*
3298 	 * Setup DMA descriptor areas.
3299 	 */
3300 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3301 			       1, 0,			/* alignment, bounds */
3302 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3303 			       BUS_SPACE_MAXADDR,	/* highaddr */
3304 			       NULL, NULL,		/* filter, filterarg */
3305 			       EM_TSO_SIZE,		/* maxsize */
3306 			       EM_MAX_SCATTER,		/* nsegments */
3307 			       PAGE_SIZE,		/* maxsegsize */
3308 			       0,			/* flags */
3309 			       NULL,			/* lockfunc */
3310 			       NULL,			/* lockfuncarg */
3311 			       &txr->txtag))) {
3312 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3313 		goto fail;
3314 	}
3315 
3316 	if (!(txr->tx_buffers =
3317 	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3318 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3319 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3320 		error = ENOMEM;
3321 		goto fail;
3322 	}
3323 
3324         /* Create the descriptor buffer dma maps */
3325 	txbuf = txr->tx_buffers;
3326 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3327 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3328 		if (error != 0) {
3329 			device_printf(dev, "Unable to create TX DMA map\n");
3330 			goto fail;
3331 		}
3332 	}
3333 
3334 	return 0;
3335 fail:
3336 	/* We free all, it handles case where we are in the middle */
3337 	em_free_transmit_structures(adapter);
3338 	return (error);
3339 }
3340 
3341 /*********************************************************************
3342  *
3343  *  Initialize a transmit ring.
3344  *
3345  **********************************************************************/
3346 static void
3347 em_setup_transmit_ring(struct tx_ring *txr)
3348 {
3349 	struct adapter *adapter = txr->adapter;
3350 	struct em_buffer *txbuf;
3351 	int i;
3352 #ifdef DEV_NETMAP
3353 	struct netmap_adapter *na = NA(adapter->ifp);
3354 	struct netmap_slot *slot;
3355 #endif /* DEV_NETMAP */
3356 
3357 	/* Clear the old descriptor contents */
3358 	EM_TX_LOCK(txr);
3359 #ifdef DEV_NETMAP
3360 	slot = netmap_reset(na, NR_TX, txr->me, 0);
3361 #endif /* DEV_NETMAP */
3362 
3363 	bzero((void *)txr->tx_base,
3364 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3365 	/* Reset indices */
3366 	txr->next_avail_desc = 0;
3367 	txr->next_to_clean = 0;
3368 
3369 	/* Free any existing tx buffers. */
3370         txbuf = txr->tx_buffers;
3371 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3372 		if (txbuf->m_head != NULL) {
3373 			bus_dmamap_sync(txr->txtag, txbuf->map,
3374 			    BUS_DMASYNC_POSTWRITE);
3375 			bus_dmamap_unload(txr->txtag, txbuf->map);
3376 			m_freem(txbuf->m_head);
3377 			txbuf->m_head = NULL;
3378 		}
3379 #ifdef DEV_NETMAP
3380 		if (slot) {
3381 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3382 			uint64_t paddr;
3383 			void *addr;
3384 
3385 			addr = PNMB(slot + si, &paddr);
3386 			txr->tx_base[i].buffer_addr = htole64(paddr);
3387 			/* reload the map for netmap mode */
3388 			netmap_load_map(txr->txtag, txbuf->map, addr);
3389 		}
3390 #endif /* DEV_NETMAP */
3391 
3392 		/* clear the watch index */
3393 		txbuf->next_eop = -1;
3394         }
3395 
3396 	/* Set number of descriptors available */
3397 	txr->tx_avail = adapter->num_tx_desc;
3398 	txr->queue_status = EM_QUEUE_IDLE;
3399 
3400 	/* Clear checksum offload context. */
3401 	txr->last_hw_offload = 0;
3402 	txr->last_hw_ipcss = 0;
3403 	txr->last_hw_ipcso = 0;
3404 	txr->last_hw_tucss = 0;
3405 	txr->last_hw_tucso = 0;
3406 
3407 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3408 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3409 	EM_TX_UNLOCK(txr);
3410 }
3411 
3412 /*********************************************************************
3413  *
3414  *  Initialize all transmit rings.
3415  *
3416  **********************************************************************/
3417 static void
3418 em_setup_transmit_structures(struct adapter *adapter)
3419 {
3420 	struct tx_ring *txr = adapter->tx_rings;
3421 
3422 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3423 		em_setup_transmit_ring(txr);
3424 
3425 	return;
3426 }
3427 
3428 /*********************************************************************
3429  *
3430  *  Enable transmit unit.
3431  *
3432  **********************************************************************/
3433 static void
3434 em_initialize_transmit_unit(struct adapter *adapter)
3435 {
3436 	struct tx_ring	*txr = adapter->tx_rings;
3437 	struct e1000_hw	*hw = &adapter->hw;
3438 	u32	tctl, tarc, tipg = 0;
3439 
3440 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3441 
3442 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3443 		u64 bus_addr = txr->txdma.dma_paddr;
3444 		/* Base and Len of TX Ring */
3445 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3446 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3447 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3448 	    	    (u32)(bus_addr >> 32));
3449 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3450 	    	    (u32)bus_addr);
3451 		/* Init the HEAD/TAIL indices */
3452 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3453 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3454 
3455 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3456 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3457 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3458 
3459 		txr->queue_status = EM_QUEUE_IDLE;
3460 	}
3461 
3462 	/* Set the default values for the Tx Inter Packet Gap timer */
3463 	switch (adapter->hw.mac.type) {
3464 	case e1000_80003es2lan:
3465 		tipg = DEFAULT_82543_TIPG_IPGR1;
3466 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3467 		    E1000_TIPG_IPGR2_SHIFT;
3468 		break;
3469 	default:
3470 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3471 		    (adapter->hw.phy.media_type ==
3472 		    e1000_media_type_internal_serdes))
3473 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3474 		else
3475 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3476 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3477 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3478 	}
3479 
3480 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3481 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3482 
3483 	if(adapter->hw.mac.type >= e1000_82540)
3484 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3485 		    adapter->tx_abs_int_delay.value);
3486 
3487 	if ((adapter->hw.mac.type == e1000_82571) ||
3488 	    (adapter->hw.mac.type == e1000_82572)) {
3489 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3490 		tarc |= SPEED_MODE_BIT;
3491 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3492 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3493 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3494 		tarc |= 1;
3495 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3496 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3497 		tarc |= 1;
3498 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3499 	}
3500 
3501 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3502 	if (adapter->tx_int_delay.value > 0)
3503 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3504 
3505 	/* Program the Transmit Control Register */
3506 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3507 	tctl &= ~E1000_TCTL_CT;
3508 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3509 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3510 
3511 	if (adapter->hw.mac.type >= e1000_82571)
3512 		tctl |= E1000_TCTL_MULR;
3513 
3514 	/* This write will effectively turn on the transmit unit. */
3515 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3516 
3517 }
3518 
3519 
3520 /*********************************************************************
3521  *
3522  *  Free all transmit rings.
3523  *
3524  **********************************************************************/
3525 static void
3526 em_free_transmit_structures(struct adapter *adapter)
3527 {
3528 	struct tx_ring *txr = adapter->tx_rings;
3529 
3530 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3531 		EM_TX_LOCK(txr);
3532 		em_free_transmit_buffers(txr);
3533 		em_dma_free(adapter, &txr->txdma);
3534 		EM_TX_UNLOCK(txr);
3535 		EM_TX_LOCK_DESTROY(txr);
3536 	}
3537 
3538 	free(adapter->tx_rings, M_DEVBUF);
3539 }
3540 
3541 /*********************************************************************
3542  *
3543  *  Free transmit ring related data structures.
3544  *
3545  **********************************************************************/
3546 static void
3547 em_free_transmit_buffers(struct tx_ring *txr)
3548 {
3549 	struct adapter		*adapter = txr->adapter;
3550 	struct em_buffer	*txbuf;
3551 
3552 	INIT_DEBUGOUT("free_transmit_ring: begin");
3553 
3554 	if (txr->tx_buffers == NULL)
3555 		return;
3556 
3557 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3558 		txbuf = &txr->tx_buffers[i];
3559 		if (txbuf->m_head != NULL) {
3560 			bus_dmamap_sync(txr->txtag, txbuf->map,
3561 			    BUS_DMASYNC_POSTWRITE);
3562 			bus_dmamap_unload(txr->txtag,
3563 			    txbuf->map);
3564 			m_freem(txbuf->m_head);
3565 			txbuf->m_head = NULL;
3566 			if (txbuf->map != NULL) {
3567 				bus_dmamap_destroy(txr->txtag,
3568 				    txbuf->map);
3569 				txbuf->map = NULL;
3570 			}
3571 		} else if (txbuf->map != NULL) {
3572 			bus_dmamap_unload(txr->txtag,
3573 			    txbuf->map);
3574 			bus_dmamap_destroy(txr->txtag,
3575 			    txbuf->map);
3576 			txbuf->map = NULL;
3577 		}
3578 	}
3579 #if __FreeBSD_version >= 800000
3580 	if (txr->br != NULL)
3581 		buf_ring_free(txr->br, M_DEVBUF);
3582 #endif
3583 	if (txr->tx_buffers != NULL) {
3584 		free(txr->tx_buffers, M_DEVBUF);
3585 		txr->tx_buffers = NULL;
3586 	}
3587 	if (txr->txtag != NULL) {
3588 		bus_dma_tag_destroy(txr->txtag);
3589 		txr->txtag = NULL;
3590 	}
3591 	return;
3592 }
3593 
3594 
3595 /*********************************************************************
3596  *  The offload context is protocol specific (TCP/UDP) and thus
3597  *  only needs to be set when the protocol changes. The occasion
3598  *  of a context change can be a performance detriment, and
3599  *  might be better just disabled. The reason arises in the way
3600  *  in which the controller supports pipelined requests from the
3601  *  Tx data DMA. Up to four requests can be pipelined, and they may
3602  *  belong to the same packet or to multiple packets. However all
3603  *  requests for one packet are issued before a request is issued
3604  *  for a subsequent packet and if a request for the next packet
3605  *  requires a context change, that request will be stalled
3606  *  until the previous request completes. This means setting up
3607  *  a new context effectively disables pipelined Tx data DMA which
3608  *  in turn greatly slow down performance to send small sized
3609  *  frames.
3610  **********************************************************************/
3611 static void
3612 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3613     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3614 {
3615 	struct adapter			*adapter = txr->adapter;
3616 	struct e1000_context_desc	*TXD = NULL;
3617 	struct em_buffer		*tx_buffer;
3618 	int				cur, hdr_len;
3619 	u32				cmd = 0;
3620 	u16				offload = 0;
3621 	u8				ipcso, ipcss, tucso, tucss;
3622 
3623 	ipcss = ipcso = tucss = tucso = 0;
3624 	hdr_len = ip_off + (ip->ip_hl << 2);
3625 	cur = txr->next_avail_desc;
3626 
3627 	/* Setup of IP header checksum. */
3628 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3629 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3630 		offload |= CSUM_IP;
3631 		ipcss = ip_off;
3632 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3633 		/*
3634 		 * Start offset for header checksum calculation.
3635 		 * End offset for header checksum calculation.
3636 		 * Offset of place to put the checksum.
3637 		 */
3638 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3639 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3640 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3641 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3642 		cmd |= E1000_TXD_CMD_IP;
3643 	}
3644 
3645 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3646  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3647  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3648  		offload |= CSUM_TCP;
3649  		tucss = hdr_len;
3650  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3651  		/*
3652  		 * Setting up new checksum offload context for every frames
3653  		 * takes a lot of processing time for hardware. This also
3654  		 * reduces performance a lot for small sized frames so avoid
3655  		 * it if driver can use previously configured checksum
3656  		 * offload context.
3657  		 */
3658  		if (txr->last_hw_offload == offload) {
3659  			if (offload & CSUM_IP) {
3660  				if (txr->last_hw_ipcss == ipcss &&
3661  				    txr->last_hw_ipcso == ipcso &&
3662  				    txr->last_hw_tucss == tucss &&
3663  				    txr->last_hw_tucso == tucso)
3664  					return;
3665  			} else {
3666  				if (txr->last_hw_tucss == tucss &&
3667  				    txr->last_hw_tucso == tucso)
3668  					return;
3669  			}
3670   		}
3671  		txr->last_hw_offload = offload;
3672  		txr->last_hw_tucss = tucss;
3673  		txr->last_hw_tucso = tucso;
3674  		/*
3675  		 * Start offset for payload checksum calculation.
3676  		 * End offset for payload checksum calculation.
3677  		 * Offset of place to put the checksum.
3678  		 */
3679 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3680  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3681  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3682  		TXD->upper_setup.tcp_fields.tucso = tucso;
3683  		cmd |= E1000_TXD_CMD_TCP;
3684  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3685  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3686  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3687  		tucss = hdr_len;
3688  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3689  		/*
3690  		 * Setting up new checksum offload context for every frames
3691  		 * takes a lot of processing time for hardware. This also
3692  		 * reduces performance a lot for small sized frames so avoid
3693  		 * it if driver can use previously configured checksum
3694  		 * offload context.
3695  		 */
3696  		if (txr->last_hw_offload == offload) {
3697  			if (offload & CSUM_IP) {
3698  				if (txr->last_hw_ipcss == ipcss &&
3699  				    txr->last_hw_ipcso == ipcso &&
3700  				    txr->last_hw_tucss == tucss &&
3701  				    txr->last_hw_tucso == tucso)
3702  					return;
3703  			} else {
3704  				if (txr->last_hw_tucss == tucss &&
3705  				    txr->last_hw_tucso == tucso)
3706  					return;
3707  			}
3708  		}
3709  		txr->last_hw_offload = offload;
3710  		txr->last_hw_tucss = tucss;
3711  		txr->last_hw_tucso = tucso;
3712  		/*
3713  		 * Start offset for header checksum calculation.
3714  		 * End offset for header checksum calculation.
3715  		 * Offset of place to put the checksum.
3716  		 */
3717 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3718  		TXD->upper_setup.tcp_fields.tucss = tucss;
3719  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3720  		TXD->upper_setup.tcp_fields.tucso = tucso;
3721   	}
3722 
3723  	if (offload & CSUM_IP) {
3724  		txr->last_hw_ipcss = ipcss;
3725  		txr->last_hw_ipcso = ipcso;
3726   	}
3727 
3728 	TXD->tcp_seg_setup.data = htole32(0);
3729 	TXD->cmd_and_length =
3730 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3731 	tx_buffer = &txr->tx_buffers[cur];
3732 	tx_buffer->m_head = NULL;
3733 	tx_buffer->next_eop = -1;
3734 
3735 	if (++cur == adapter->num_tx_desc)
3736 		cur = 0;
3737 
3738 	txr->tx_avail--;
3739 	txr->next_avail_desc = cur;
3740 }
3741 
3742 
3743 /**********************************************************************
3744  *
3745  *  Setup work for hardware segmentation offload (TSO)
3746  *
3747  **********************************************************************/
3748 static void
3749 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3750     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3751 {
3752 	struct adapter			*adapter = txr->adapter;
3753 	struct e1000_context_desc	*TXD;
3754 	struct em_buffer		*tx_buffer;
3755 	int cur, hdr_len;
3756 
3757 	/*
3758 	 * In theory we can use the same TSO context if and only if
3759 	 * frame is the same type(IP/TCP) and the same MSS. However
3760 	 * checking whether a frame has the same IP/TCP structure is
3761 	 * hard thing so just ignore that and always restablish a
3762 	 * new TSO context.
3763 	 */
3764 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3765 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3766 		      E1000_TXD_DTYP_D |	/* Data descr type */
3767 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3768 
3769 	/* IP and/or TCP header checksum calculation and insertion. */
3770 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3771 
3772 	cur = txr->next_avail_desc;
3773 	tx_buffer = &txr->tx_buffers[cur];
3774 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3775 
3776 	/*
3777 	 * Start offset for header checksum calculation.
3778 	 * End offset for header checksum calculation.
3779 	 * Offset of place put the checksum.
3780 	 */
3781 	TXD->lower_setup.ip_fields.ipcss = ip_off;
3782 	TXD->lower_setup.ip_fields.ipcse =
3783 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3784 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3785 	/*
3786 	 * Start offset for payload checksum calculation.
3787 	 * End offset for payload checksum calculation.
3788 	 * Offset of place to put the checksum.
3789 	 */
3790 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3791 	TXD->upper_setup.tcp_fields.tucse = 0;
3792 	TXD->upper_setup.tcp_fields.tucso =
3793 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3794 	/*
3795 	 * Payload size per packet w/o any headers.
3796 	 * Length of all headers up to payload.
3797 	 */
3798 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3799 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3800 
3801 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3802 				E1000_TXD_CMD_DEXT |	/* Extended descr */
3803 				E1000_TXD_CMD_TSE |	/* TSE context */
3804 				E1000_TXD_CMD_IP |	/* Do IP csum */
3805 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3806 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3807 
3808 	tx_buffer->m_head = NULL;
3809 	tx_buffer->next_eop = -1;
3810 
3811 	if (++cur == adapter->num_tx_desc)
3812 		cur = 0;
3813 
3814 	txr->tx_avail--;
3815 	txr->next_avail_desc = cur;
3816 	txr->tx_tso = TRUE;
3817 }
3818 
3819 
3820 /**********************************************************************
3821  *
3822  *  Examine each tx_buffer in the used queue. If the hardware is done
3823  *  processing the packet then free associated resources. The
3824  *  tx_buffer is put back on the free queue.
3825  *
3826  **********************************************************************/
3827 static void
3828 em_txeof(struct tx_ring *txr)
3829 {
3830 	struct adapter	*adapter = txr->adapter;
3831         int first, last, done, processed;
3832         struct em_buffer *tx_buffer;
3833         struct e1000_tx_desc   *tx_desc, *eop_desc;
3834 	struct ifnet   *ifp = adapter->ifp;
3835 
3836 	EM_TX_LOCK_ASSERT(txr);
3837 #ifdef DEV_NETMAP
3838 	if (netmap_tx_irq(ifp, txr->me |
3839 	    (NETMAP_LOCKED_ENTER | NETMAP_LOCKED_EXIT)))
3840 		return;
3841 #endif /* DEV_NETMAP */
3842 
3843 	/* No work, make sure watchdog is off */
3844         if (txr->tx_avail == adapter->num_tx_desc) {
3845 		txr->queue_status = EM_QUEUE_IDLE;
3846                 return;
3847 	}
3848 
3849 	processed = 0;
3850         first = txr->next_to_clean;
3851         tx_desc = &txr->tx_base[first];
3852         tx_buffer = &txr->tx_buffers[first];
3853 	last = tx_buffer->next_eop;
3854         eop_desc = &txr->tx_base[last];
3855 
3856 	/*
3857 	 * What this does is get the index of the
3858 	 * first descriptor AFTER the EOP of the
3859 	 * first packet, that way we can do the
3860 	 * simple comparison on the inner while loop.
3861 	 */
3862 	if (++last == adapter->num_tx_desc)
3863  		last = 0;
3864 	done = last;
3865 
3866         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3867             BUS_DMASYNC_POSTREAD);
3868 
3869         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3870 		/* We clean the range of the packet */
3871 		while (first != done) {
3872                 	tx_desc->upper.data = 0;
3873                 	tx_desc->lower.data = 0;
3874                 	tx_desc->buffer_addr = 0;
3875                 	++txr->tx_avail;
3876 			++processed;
3877 
3878 			if (tx_buffer->m_head) {
3879 				bus_dmamap_sync(txr->txtag,
3880 				    tx_buffer->map,
3881 				    BUS_DMASYNC_POSTWRITE);
3882 				bus_dmamap_unload(txr->txtag,
3883 				    tx_buffer->map);
3884                         	m_freem(tx_buffer->m_head);
3885                         	tx_buffer->m_head = NULL;
3886                 	}
3887 			tx_buffer->next_eop = -1;
3888 			txr->watchdog_time = ticks;
3889 
3890 	                if (++first == adapter->num_tx_desc)
3891 				first = 0;
3892 
3893 	                tx_buffer = &txr->tx_buffers[first];
3894 			tx_desc = &txr->tx_base[first];
3895 		}
3896 		++ifp->if_opackets;
3897 		/* See if we can continue to the next packet */
3898 		last = tx_buffer->next_eop;
3899 		if (last != -1) {
3900         		eop_desc = &txr->tx_base[last];
3901 			/* Get new done point */
3902 			if (++last == adapter->num_tx_desc) last = 0;
3903 			done = last;
3904 		} else
3905 			break;
3906         }
3907         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3908             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3909 
3910         txr->next_to_clean = first;
3911 
3912 	/*
3913 	** Watchdog calculation, we know there's
3914 	** work outstanding or the first return
3915 	** would have been taken, so none processed
3916 	** for too long indicates a hang. local timer
3917 	** will examine this and do a reset if needed.
3918 	*/
3919 	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3920 		txr->queue_status = EM_QUEUE_HUNG;
3921 
3922         /*
3923          * If we have a minimum free, clear IFF_DRV_OACTIVE
3924          * to tell the stack that it is OK to send packets.
3925 	 * Notice that all writes of OACTIVE happen under the
3926 	 * TX lock which, with a single queue, guarantees
3927 	 * sanity.
3928          */
3929         if (txr->tx_avail >= EM_MAX_SCATTER)
3930 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3931 
3932 	/* Disable watchdog if all clean */
3933 	if (txr->tx_avail == adapter->num_tx_desc) {
3934 		txr->queue_status = EM_QUEUE_IDLE;
3935 	}
3936 }
3937 
3938 
3939 /*********************************************************************
3940  *
3941  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3942  *
3943  **********************************************************************/
3944 static void
3945 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3946 {
3947 	struct adapter		*adapter = rxr->adapter;
3948 	struct mbuf		*m;
3949 	bus_dma_segment_t	segs[1];
3950 	struct em_buffer	*rxbuf;
3951 	int			i, j, error, nsegs;
3952 	bool			cleaned = FALSE;
3953 
3954 	i = j = rxr->next_to_refresh;
3955 	/*
3956 	** Get one descriptor beyond
3957 	** our work mark to control
3958 	** the loop.
3959 	*/
3960 	if (++j == adapter->num_rx_desc)
3961 		j = 0;
3962 
3963 	while (j != limit) {
3964 		rxbuf = &rxr->rx_buffers[i];
3965 		if (rxbuf->m_head == NULL) {
3966 			m = m_getjcl(M_NOWAIT, MT_DATA,
3967 			    M_PKTHDR, adapter->rx_mbuf_sz);
3968 			/*
3969 			** If we have a temporary resource shortage
3970 			** that causes a failure, just abort refresh
3971 			** for now, we will return to this point when
3972 			** reinvoked from em_rxeof.
3973 			*/
3974 			if (m == NULL)
3975 				goto update;
3976 		} else
3977 			m = rxbuf->m_head;
3978 
3979 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3980 		m->m_flags |= M_PKTHDR;
3981 		m->m_data = m->m_ext.ext_buf;
3982 
3983 		/* Use bus_dma machinery to setup the memory mapping  */
3984 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3985 		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3986 		if (error != 0) {
3987 			printf("Refresh mbufs: hdr dmamap load"
3988 			    " failure - %d\n", error);
3989 			m_free(m);
3990 			rxbuf->m_head = NULL;
3991 			goto update;
3992 		}
3993 		rxbuf->m_head = m;
3994 		bus_dmamap_sync(rxr->rxtag,
3995 		    rxbuf->map, BUS_DMASYNC_PREREAD);
3996 		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3997 		cleaned = TRUE;
3998 
3999 		i = j; /* Next is precalulated for us */
4000 		rxr->next_to_refresh = i;
4001 		/* Calculate next controlling index */
4002 		if (++j == adapter->num_rx_desc)
4003 			j = 0;
4004 	}
4005 update:
4006 	/*
4007 	** Update the tail pointer only if,
4008 	** and as far as we have refreshed.
4009 	*/
4010 	if (cleaned)
4011 		E1000_WRITE_REG(&adapter->hw,
4012 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4013 
4014 	return;
4015 }
4016 
4017 
4018 /*********************************************************************
4019  *
4020  *  Allocate memory for rx_buffer structures. Since we use one
4021  *  rx_buffer per received packet, the maximum number of rx_buffer's
4022  *  that we'll need is equal to the number of receive descriptors
4023  *  that we've allocated.
4024  *
4025  **********************************************************************/
4026 static int
4027 em_allocate_receive_buffers(struct rx_ring *rxr)
4028 {
4029 	struct adapter		*adapter = rxr->adapter;
4030 	device_t		dev = adapter->dev;
4031 	struct em_buffer	*rxbuf;
4032 	int			error;
4033 
4034 	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4035 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4036 	if (rxr->rx_buffers == NULL) {
4037 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4038 		return (ENOMEM);
4039 	}
4040 
4041 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4042 				1, 0,			/* alignment, bounds */
4043 				BUS_SPACE_MAXADDR,	/* lowaddr */
4044 				BUS_SPACE_MAXADDR,	/* highaddr */
4045 				NULL, NULL,		/* filter, filterarg */
4046 				MJUM9BYTES,		/* maxsize */
4047 				1,			/* nsegments */
4048 				MJUM9BYTES,		/* maxsegsize */
4049 				0,			/* flags */
4050 				NULL,			/* lockfunc */
4051 				NULL,			/* lockarg */
4052 				&rxr->rxtag);
4053 	if (error) {
4054 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4055 		    __func__, error);
4056 		goto fail;
4057 	}
4058 
4059 	rxbuf = rxr->rx_buffers;
4060 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4061 		rxbuf = &rxr->rx_buffers[i];
4062 		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
4063 		    &rxbuf->map);
4064 		if (error) {
4065 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4066 			    __func__, error);
4067 			goto fail;
4068 		}
4069 	}
4070 
4071 	return (0);
4072 
4073 fail:
4074 	em_free_receive_structures(adapter);
4075 	return (error);
4076 }
4077 
4078 
4079 /*********************************************************************
4080  *
4081  *  Initialize a receive ring and its buffers.
4082  *
4083  **********************************************************************/
4084 static int
4085 em_setup_receive_ring(struct rx_ring *rxr)
4086 {
4087 	struct	adapter 	*adapter = rxr->adapter;
4088 	struct em_buffer	*rxbuf;
4089 	bus_dma_segment_t	seg[1];
4090 	int			rsize, nsegs, error = 0;
4091 #ifdef DEV_NETMAP
4092 	struct netmap_adapter *na = NA(adapter->ifp);
4093 	struct netmap_slot *slot;
4094 #endif
4095 
4096 
4097 	/* Clear the ring contents */
4098 	EM_RX_LOCK(rxr);
4099 	rsize = roundup2(adapter->num_rx_desc *
4100 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4101 	bzero((void *)rxr->rx_base, rsize);
4102 #ifdef DEV_NETMAP
4103 	slot = netmap_reset(na, NR_RX, 0, 0);
4104 #endif
4105 
4106 	/*
4107 	** Free current RX buffer structs and their mbufs
4108 	*/
4109 	for (int i = 0; i < adapter->num_rx_desc; i++) {
4110 		rxbuf = &rxr->rx_buffers[i];
4111 		if (rxbuf->m_head != NULL) {
4112 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4113 			    BUS_DMASYNC_POSTREAD);
4114 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4115 			m_freem(rxbuf->m_head);
4116 			rxbuf->m_head = NULL; /* mark as freed */
4117 		}
4118 	}
4119 
4120 	/* Now replenish the mbufs */
4121         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4122 		rxbuf = &rxr->rx_buffers[j];
4123 #ifdef DEV_NETMAP
4124 		if (slot) {
4125 			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4126 			uint64_t paddr;
4127 			void *addr;
4128 
4129 			addr = PNMB(slot + si, &paddr);
4130 			netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4131 			/* Update descriptor */
4132 			rxr->rx_base[j].buffer_addr = htole64(paddr);
4133 			continue;
4134 		}
4135 #endif /* DEV_NETMAP */
4136 		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4137 		    M_PKTHDR, adapter->rx_mbuf_sz);
4138 		if (rxbuf->m_head == NULL) {
4139 			error = ENOBUFS;
4140 			goto fail;
4141 		}
4142 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4143 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4144 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4145 
4146 		/* Get the memory mapping */
4147 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4148 		    rxbuf->map, rxbuf->m_head, seg,
4149 		    &nsegs, BUS_DMA_NOWAIT);
4150 		if (error != 0) {
4151 			m_freem(rxbuf->m_head);
4152 			rxbuf->m_head = NULL;
4153 			goto fail;
4154 		}
4155 		bus_dmamap_sync(rxr->rxtag,
4156 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4157 
4158 		/* Update descriptor */
4159 		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4160 	}
4161 	rxr->next_to_check = 0;
4162 	rxr->next_to_refresh = 0;
4163 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4164 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4165 
4166 fail:
4167 	EM_RX_UNLOCK(rxr);
4168 	return (error);
4169 }
4170 
4171 /*********************************************************************
4172  *
4173  *  Initialize all receive rings.
4174  *
4175  **********************************************************************/
4176 static int
4177 em_setup_receive_structures(struct adapter *adapter)
4178 {
4179 	struct rx_ring *rxr = adapter->rx_rings;
4180 	int q;
4181 
4182 	for (q = 0; q < adapter->num_queues; q++, rxr++)
4183 		if (em_setup_receive_ring(rxr))
4184 			goto fail;
4185 
4186 	return (0);
4187 fail:
4188 	/*
4189 	 * Free RX buffers allocated so far, we will only handle
4190 	 * the rings that completed, the failing case will have
4191 	 * cleaned up for itself. 'q' failed, so its the terminus.
4192 	 */
4193 	for (int i = 0; i < q; ++i) {
4194 		rxr = &adapter->rx_rings[i];
4195 		for (int n = 0; n < adapter->num_rx_desc; n++) {
4196 			struct em_buffer *rxbuf;
4197 			rxbuf = &rxr->rx_buffers[n];
4198 			if (rxbuf->m_head != NULL) {
4199 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4200 			  	  BUS_DMASYNC_POSTREAD);
4201 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4202 				m_freem(rxbuf->m_head);
4203 				rxbuf->m_head = NULL;
4204 			}
4205 		}
4206 		rxr->next_to_check = 0;
4207 		rxr->next_to_refresh = 0;
4208 	}
4209 
4210 	return (ENOBUFS);
4211 }
4212 
4213 /*********************************************************************
4214  *
4215  *  Free all receive rings.
4216  *
4217  **********************************************************************/
4218 static void
4219 em_free_receive_structures(struct adapter *adapter)
4220 {
4221 	struct rx_ring *rxr = adapter->rx_rings;
4222 
4223 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4224 		em_free_receive_buffers(rxr);
4225 		/* Free the ring memory as well */
4226 		em_dma_free(adapter, &rxr->rxdma);
4227 		EM_RX_LOCK_DESTROY(rxr);
4228 	}
4229 
4230 	free(adapter->rx_rings, M_DEVBUF);
4231 }
4232 
4233 
4234 /*********************************************************************
4235  *
4236  *  Free receive ring data structures
4237  *
4238  **********************************************************************/
4239 static void
4240 em_free_receive_buffers(struct rx_ring *rxr)
4241 {
4242 	struct adapter		*adapter = rxr->adapter;
4243 	struct em_buffer	*rxbuf = NULL;
4244 
4245 	INIT_DEBUGOUT("free_receive_buffers: begin");
4246 
4247 	if (rxr->rx_buffers != NULL) {
4248 		for (int i = 0; i < adapter->num_rx_desc; i++) {
4249 			rxbuf = &rxr->rx_buffers[i];
4250 			if (rxbuf->map != NULL) {
4251 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4252 				    BUS_DMASYNC_POSTREAD);
4253 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4254 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4255 			}
4256 			if (rxbuf->m_head != NULL) {
4257 				m_freem(rxbuf->m_head);
4258 				rxbuf->m_head = NULL;
4259 			}
4260 		}
4261 		free(rxr->rx_buffers, M_DEVBUF);
4262 		rxr->rx_buffers = NULL;
4263 		rxr->next_to_check = 0;
4264 		rxr->next_to_refresh = 0;
4265 	}
4266 
4267 	if (rxr->rxtag != NULL) {
4268 		bus_dma_tag_destroy(rxr->rxtag);
4269 		rxr->rxtag = NULL;
4270 	}
4271 
4272 	return;
4273 }
4274 
4275 
4276 /*********************************************************************
4277  *
4278  *  Enable receive unit.
4279  *
4280  **********************************************************************/
4281 
4282 static void
4283 em_initialize_receive_unit(struct adapter *adapter)
4284 {
4285 	struct rx_ring	*rxr = adapter->rx_rings;
4286 	struct ifnet	*ifp = adapter->ifp;
4287 	struct e1000_hw	*hw = &adapter->hw;
4288 	u64	bus_addr;
4289 	u32	rctl, rxcsum;
4290 
4291 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4292 
4293 	/*
4294 	 * Make sure receives are disabled while setting
4295 	 * up the descriptor ring
4296 	 */
4297 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4298 	/* Do not disable if ever enabled on this hardware */
4299 	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4300 		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4301 
4302 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4303 	    adapter->rx_abs_int_delay.value);
4304 	/*
4305 	 * Set the interrupt throttling rate. Value is calculated
4306 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4307 	 */
4308 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4309 
4310 	/*
4311 	** When using MSIX interrupts we need to throttle
4312 	** using the EITR register (82574 only)
4313 	*/
4314 	if (hw->mac.type == e1000_82574) {
4315 		for (int i = 0; i < 4; i++)
4316 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4317 			    DEFAULT_ITR);
4318 		/* Disable accelerated acknowledge */
4319 		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4320 	}
4321 
4322 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4323 	if (ifp->if_capenable & IFCAP_RXCSUM)
4324 		rxcsum |= E1000_RXCSUM_TUOFL;
4325 	else
4326 		rxcsum &= ~E1000_RXCSUM_TUOFL;
4327 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4328 
4329 	/*
4330 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4331 	** long latencies are observed, like Lenovo X60. This
4332 	** change eliminates the problem, but since having positive
4333 	** values in RDTR is a known source of problems on other
4334 	** platforms another solution is being sought.
4335 	*/
4336 	if (hw->mac.type == e1000_82573)
4337 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4338 
4339 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4340 		/* Setup the Base and Length of the Rx Descriptor Ring */
4341 		u32 rdt = adapter->num_rx_desc - 1; /* default */
4342 
4343 		bus_addr = rxr->rxdma.dma_paddr;
4344 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4345 		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4346 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4347 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4348 		/* Setup the Head and Tail Descriptor Pointers */
4349 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4350 #ifdef DEV_NETMAP
4351 		/*
4352 		 * an init() while a netmap client is active must
4353 		 * preserve the rx buffers passed to userspace.
4354 		 */
4355 		if (ifp->if_capenable & IFCAP_NETMAP)
4356 			rdt -= NA(adapter->ifp)->rx_rings[i].nr_hwavail;
4357 #endif /* DEV_NETMAP */
4358 		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4359 	}
4360 
4361 	/* Set PTHRESH for improved jumbo performance */
4362 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4363 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4364 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4365 	    (ifp->if_mtu > ETHERMTU)) {
4366 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4367 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4368 	}
4369 
4370 	if (adapter->hw.mac.type >= e1000_pch2lan) {
4371 		if (ifp->if_mtu > ETHERMTU)
4372 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4373 		else
4374 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4375 	}
4376 
4377 	/* Setup the Receive Control Register */
4378 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4379 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4380 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4381 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4382 
4383         /* Strip the CRC */
4384         rctl |= E1000_RCTL_SECRC;
4385 
4386         /* Make sure VLAN Filters are off */
4387         rctl &= ~E1000_RCTL_VFE;
4388 	rctl &= ~E1000_RCTL_SBP;
4389 
4390 	if (adapter->rx_mbuf_sz == MCLBYTES)
4391 		rctl |= E1000_RCTL_SZ_2048;
4392 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4393 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4394 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4395 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4396 
4397 	if (ifp->if_mtu > ETHERMTU)
4398 		rctl |= E1000_RCTL_LPE;
4399 	else
4400 		rctl &= ~E1000_RCTL_LPE;
4401 
4402 	/* Write out the settings */
4403 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4404 
4405 	return;
4406 }
4407 
4408 
4409 /*********************************************************************
4410  *
4411  *  This routine executes in interrupt context. It replenishes
4412  *  the mbufs in the descriptor and sends data which has been
4413  *  dma'ed into host memory to upper layer.
4414  *
4415  *  We loop at most count times if count is > 0, or until done if
4416  *  count < 0.
4417  *
4418  *  For polling we also now return the number of cleaned packets
4419  *********************************************************************/
4420 static bool
4421 em_rxeof(struct rx_ring *rxr, int count, int *done)
4422 {
4423 	struct adapter		*adapter = rxr->adapter;
4424 	struct ifnet		*ifp = adapter->ifp;
4425 	struct mbuf		*mp, *sendmp;
4426 	u8			status = 0;
4427 	u16 			len;
4428 	int			i, processed, rxdone = 0;
4429 	bool			eop;
4430 	struct e1000_rx_desc	*cur;
4431 
4432 	EM_RX_LOCK(rxr);
4433 
4434 #ifdef DEV_NETMAP
4435 	if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
4436 		return (FALSE);
4437 #endif /* DEV_NETMAP */
4438 
4439 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4440 
4441 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4442 			break;
4443 
4444 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4445 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4446 
4447 		cur = &rxr->rx_base[i];
4448 		status = cur->status;
4449 		mp = sendmp = NULL;
4450 
4451 		if ((status & E1000_RXD_STAT_DD) == 0)
4452 			break;
4453 
4454 		len = le16toh(cur->length);
4455 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4456 
4457 		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4458 		    (rxr->discard == TRUE)) {
4459 			adapter->dropped_pkts++;
4460 			++rxr->rx_discarded;
4461 			if (!eop) /* Catch subsequent segs */
4462 				rxr->discard = TRUE;
4463 			else
4464 				rxr->discard = FALSE;
4465 			em_rx_discard(rxr, i);
4466 			goto next_desc;
4467 		}
4468 
4469 		/* Assign correct length to the current fragment */
4470 		mp = rxr->rx_buffers[i].m_head;
4471 		mp->m_len = len;
4472 
4473 		/* Trigger for refresh */
4474 		rxr->rx_buffers[i].m_head = NULL;
4475 
4476 		/* First segment? */
4477 		if (rxr->fmp == NULL) {
4478 			mp->m_pkthdr.len = len;
4479 			rxr->fmp = rxr->lmp = mp;
4480 		} else {
4481 			/* Chain mbuf's together */
4482 			mp->m_flags &= ~M_PKTHDR;
4483 			rxr->lmp->m_next = mp;
4484 			rxr->lmp = mp;
4485 			rxr->fmp->m_pkthdr.len += len;
4486 		}
4487 
4488 		if (eop) {
4489 			--count;
4490 			sendmp = rxr->fmp;
4491 			sendmp->m_pkthdr.rcvif = ifp;
4492 			ifp->if_ipackets++;
4493 			em_receive_checksum(cur, sendmp);
4494 #ifndef __NO_STRICT_ALIGNMENT
4495 			if (adapter->hw.mac.max_frame_size >
4496 			    (MCLBYTES - ETHER_ALIGN) &&
4497 			    em_fixup_rx(rxr) != 0)
4498 				goto skip;
4499 #endif
4500 			if (status & E1000_RXD_STAT_VP) {
4501 				sendmp->m_pkthdr.ether_vtag =
4502 				    le16toh(cur->special);
4503 				sendmp->m_flags |= M_VLANTAG;
4504 			}
4505 #ifndef __NO_STRICT_ALIGNMENT
4506 skip:
4507 #endif
4508 			rxr->fmp = rxr->lmp = NULL;
4509 		}
4510 next_desc:
4511 		/* Zero out the receive descriptors status. */
4512 		cur->status = 0;
4513 		++rxdone;	/* cumulative for POLL */
4514 		++processed;
4515 
4516 		/* Advance our pointers to the next descriptor. */
4517 		if (++i == adapter->num_rx_desc)
4518 			i = 0;
4519 
4520 		/* Send to the stack */
4521 		if (sendmp != NULL) {
4522 			rxr->next_to_check = i;
4523 			EM_RX_UNLOCK(rxr);
4524 			(*ifp->if_input)(ifp, sendmp);
4525 			EM_RX_LOCK(rxr);
4526 			i = rxr->next_to_check;
4527 		}
4528 
4529 		/* Only refresh mbufs every 8 descriptors */
4530 		if (processed == 8) {
4531 			em_refresh_mbufs(rxr, i);
4532 			processed = 0;
4533 		}
4534 	}
4535 
4536 	/* Catch any remaining refresh work */
4537 	if (e1000_rx_unrefreshed(rxr))
4538 		em_refresh_mbufs(rxr, i);
4539 
4540 	rxr->next_to_check = i;
4541 	if (done != NULL)
4542 		*done = rxdone;
4543 	EM_RX_UNLOCK(rxr);
4544 
4545 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4546 }
4547 
4548 static __inline void
4549 em_rx_discard(struct rx_ring *rxr, int i)
4550 {
4551 	struct em_buffer	*rbuf;
4552 
4553 	rbuf = &rxr->rx_buffers[i];
4554 	/* Free any previous pieces */
4555 	if (rxr->fmp != NULL) {
4556 		rxr->fmp->m_flags |= M_PKTHDR;
4557 		m_freem(rxr->fmp);
4558 		rxr->fmp = NULL;
4559 		rxr->lmp = NULL;
4560 	}
4561 	/*
4562 	** Free buffer and allow em_refresh_mbufs()
4563 	** to clean up and recharge buffer.
4564 	*/
4565 	if (rbuf->m_head) {
4566 		m_free(rbuf->m_head);
4567 		rbuf->m_head = NULL;
4568 	}
4569 	return;
4570 }
4571 
4572 #ifndef __NO_STRICT_ALIGNMENT
4573 /*
4574  * When jumbo frames are enabled we should realign entire payload on
4575  * architecures with strict alignment. This is serious design mistake of 8254x
4576  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4577  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4578  * payload. On architecures without strict alignment restrictions 8254x still
4579  * performs unaligned memory access which would reduce the performance too.
4580  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4581  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4582  * existing mbuf chain.
4583  *
4584  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4585  * not used at all on architectures with strict alignment.
4586  */
4587 static int
4588 em_fixup_rx(struct rx_ring *rxr)
4589 {
4590 	struct adapter *adapter = rxr->adapter;
4591 	struct mbuf *m, *n;
4592 	int error;
4593 
4594 	error = 0;
4595 	m = rxr->fmp;
4596 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4597 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4598 		m->m_data += ETHER_HDR_LEN;
4599 	} else {
4600 		MGETHDR(n, M_NOWAIT, MT_DATA);
4601 		if (n != NULL) {
4602 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4603 			m->m_data += ETHER_HDR_LEN;
4604 			m->m_len -= ETHER_HDR_LEN;
4605 			n->m_len = ETHER_HDR_LEN;
4606 			M_MOVE_PKTHDR(n, m);
4607 			n->m_next = m;
4608 			rxr->fmp = n;
4609 		} else {
4610 			adapter->dropped_pkts++;
4611 			m_freem(rxr->fmp);
4612 			rxr->fmp = NULL;
4613 			error = ENOMEM;
4614 		}
4615 	}
4616 
4617 	return (error);
4618 }
4619 #endif
4620 
4621 /*********************************************************************
4622  *
4623  *  Verify that the hardware indicated that the checksum is valid.
4624  *  Inform the stack about the status of checksum so that stack
4625  *  doesn't spend time verifying the checksum.
4626  *
4627  *********************************************************************/
4628 static void
4629 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4630 {
4631 	mp->m_pkthdr.csum_flags = 0;
4632 
4633 	/* Ignore Checksum bit is set */
4634 	if (rx_desc->status & E1000_RXD_STAT_IXSM)
4635 		return;
4636 
4637 	if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
4638 		return;
4639 
4640 	/* IP Checksum Good? */
4641 	if (rx_desc->status & E1000_RXD_STAT_IPCS)
4642 		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4643 
4644 	/* TCP or UDP checksum */
4645 	if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4646 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4647 		mp->m_pkthdr.csum_data = htons(0xffff);
4648 	}
4649 }
4650 
4651 /*
4652  * This routine is run via an vlan
4653  * config EVENT
4654  */
4655 static void
4656 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4657 {
4658 	struct adapter	*adapter = ifp->if_softc;
4659 	u32		index, bit;
4660 
4661 	if (ifp->if_softc !=  arg)   /* Not our event */
4662 		return;
4663 
4664 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4665                 return;
4666 
4667 	EM_CORE_LOCK(adapter);
4668 	index = (vtag >> 5) & 0x7F;
4669 	bit = vtag & 0x1F;
4670 	adapter->shadow_vfta[index] |= (1 << bit);
4671 	++adapter->num_vlans;
4672 	/* Re-init to load the changes */
4673 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4674 		em_init_locked(adapter);
4675 	EM_CORE_UNLOCK(adapter);
4676 }
4677 
4678 /*
4679  * This routine is run via an vlan
4680  * unconfig EVENT
4681  */
4682 static void
4683 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4684 {
4685 	struct adapter	*adapter = ifp->if_softc;
4686 	u32		index, bit;
4687 
4688 	if (ifp->if_softc !=  arg)
4689 		return;
4690 
4691 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4692                 return;
4693 
4694 	EM_CORE_LOCK(adapter);
4695 	index = (vtag >> 5) & 0x7F;
4696 	bit = vtag & 0x1F;
4697 	adapter->shadow_vfta[index] &= ~(1 << bit);
4698 	--adapter->num_vlans;
4699 	/* Re-init to load the changes */
4700 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4701 		em_init_locked(adapter);
4702 	EM_CORE_UNLOCK(adapter);
4703 }
4704 
4705 static void
4706 em_setup_vlan_hw_support(struct adapter *adapter)
4707 {
4708 	struct e1000_hw *hw = &adapter->hw;
4709 	u32             reg;
4710 
4711 	/*
4712 	** We get here thru init_locked, meaning
4713 	** a soft reset, this has already cleared
4714 	** the VFTA and other state, so if there
4715 	** have been no vlan's registered do nothing.
4716 	*/
4717 	if (adapter->num_vlans == 0)
4718                 return;
4719 
4720 	/*
4721 	** A soft reset zero's out the VFTA, so
4722 	** we need to repopulate it now.
4723 	*/
4724 	for (int i = 0; i < EM_VFTA_SIZE; i++)
4725                 if (adapter->shadow_vfta[i] != 0)
4726 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4727                             i, adapter->shadow_vfta[i]);
4728 
4729 	reg = E1000_READ_REG(hw, E1000_CTRL);
4730 	reg |= E1000_CTRL_VME;
4731 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4732 
4733 	/* Enable the Filter Table */
4734 	reg = E1000_READ_REG(hw, E1000_RCTL);
4735 	reg &= ~E1000_RCTL_CFIEN;
4736 	reg |= E1000_RCTL_VFE;
4737 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4738 }
4739 
4740 static void
4741 em_enable_intr(struct adapter *adapter)
4742 {
4743 	struct e1000_hw *hw = &adapter->hw;
4744 	u32 ims_mask = IMS_ENABLE_MASK;
4745 
4746 	if (hw->mac.type == e1000_82574) {
4747 		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4748 		ims_mask |= EM_MSIX_MASK;
4749 	}
4750 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4751 }
4752 
4753 static void
4754 em_disable_intr(struct adapter *adapter)
4755 {
4756 	struct e1000_hw *hw = &adapter->hw;
4757 
4758 	if (hw->mac.type == e1000_82574)
4759 		E1000_WRITE_REG(hw, EM_EIAC, 0);
4760 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4761 }
4762 
4763 /*
4764  * Bit of a misnomer, what this really means is
4765  * to enable OS management of the system... aka
4766  * to disable special hardware management features
4767  */
4768 static void
4769 em_init_manageability(struct adapter *adapter)
4770 {
4771 	/* A shared code workaround */
4772 #define E1000_82542_MANC2H E1000_MANC2H
4773 	if (adapter->has_manage) {
4774 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4775 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4776 
4777 		/* disable hardware interception of ARP */
4778 		manc &= ~(E1000_MANC_ARP_EN);
4779 
4780                 /* enable receiving management packets to the host */
4781 		manc |= E1000_MANC_EN_MNG2HOST;
4782 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4783 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4784 		manc2h |= E1000_MNG2HOST_PORT_623;
4785 		manc2h |= E1000_MNG2HOST_PORT_664;
4786 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4787 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4788 	}
4789 }
4790 
4791 /*
4792  * Give control back to hardware management
4793  * controller if there is one.
4794  */
4795 static void
4796 em_release_manageability(struct adapter *adapter)
4797 {
4798 	if (adapter->has_manage) {
4799 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4800 
4801 		/* re-enable hardware interception of ARP */
4802 		manc |= E1000_MANC_ARP_EN;
4803 		manc &= ~E1000_MANC_EN_MNG2HOST;
4804 
4805 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4806 	}
4807 }
4808 
4809 /*
4810  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4811  * For ASF and Pass Through versions of f/w this means
4812  * that the driver is loaded. For AMT version type f/w
4813  * this means that the network i/f is open.
4814  */
4815 static void
4816 em_get_hw_control(struct adapter *adapter)
4817 {
4818 	u32 ctrl_ext, swsm;
4819 
4820 	if (adapter->hw.mac.type == e1000_82573) {
4821 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4822 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4823 		    swsm | E1000_SWSM_DRV_LOAD);
4824 		return;
4825 	}
4826 	/* else */
4827 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4828 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4829 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4830 	return;
4831 }
4832 
4833 /*
4834  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4835  * For ASF and Pass Through versions of f/w this means that
4836  * the driver is no longer loaded. For AMT versions of the
4837  * f/w this means that the network i/f is closed.
4838  */
4839 static void
4840 em_release_hw_control(struct adapter *adapter)
4841 {
4842 	u32 ctrl_ext, swsm;
4843 
4844 	if (!adapter->has_manage)
4845 		return;
4846 
4847 	if (adapter->hw.mac.type == e1000_82573) {
4848 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4849 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4850 		    swsm & ~E1000_SWSM_DRV_LOAD);
4851 		return;
4852 	}
4853 	/* else */
4854 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4855 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4856 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4857 	return;
4858 }
4859 
4860 static int
4861 em_is_valid_ether_addr(u8 *addr)
4862 {
4863 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4864 
4865 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4866 		return (FALSE);
4867 	}
4868 
4869 	return (TRUE);
4870 }
4871 
4872 /*
4873 ** Parse the interface capabilities with regard
4874 ** to both system management and wake-on-lan for
4875 ** later use.
4876 */
4877 static void
4878 em_get_wakeup(device_t dev)
4879 {
4880 	struct adapter	*adapter = device_get_softc(dev);
4881 	u16		eeprom_data = 0, device_id, apme_mask;
4882 
4883 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4884 	apme_mask = EM_EEPROM_APME;
4885 
4886 	switch (adapter->hw.mac.type) {
4887 	case e1000_82573:
4888 	case e1000_82583:
4889 		adapter->has_amt = TRUE;
4890 		/* Falls thru */
4891 	case e1000_82571:
4892 	case e1000_82572:
4893 	case e1000_80003es2lan:
4894 		if (adapter->hw.bus.func == 1) {
4895 			e1000_read_nvm(&adapter->hw,
4896 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4897 			break;
4898 		} else
4899 			e1000_read_nvm(&adapter->hw,
4900 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4901 		break;
4902 	case e1000_ich8lan:
4903 	case e1000_ich9lan:
4904 	case e1000_ich10lan:
4905 	case e1000_pchlan:
4906 	case e1000_pch2lan:
4907 		apme_mask = E1000_WUC_APME;
4908 		adapter->has_amt = TRUE;
4909 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4910 		break;
4911 	default:
4912 		e1000_read_nvm(&adapter->hw,
4913 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4914 		break;
4915 	}
4916 	if (eeprom_data & apme_mask)
4917 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4918 	/*
4919          * We have the eeprom settings, now apply the special cases
4920          * where the eeprom may be wrong or the board won't support
4921          * wake on lan on a particular port
4922 	 */
4923 	device_id = pci_get_device(dev);
4924         switch (device_id) {
4925 	case E1000_DEV_ID_82571EB_FIBER:
4926 		/* Wake events only supported on port A for dual fiber
4927 		 * regardless of eeprom setting */
4928 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4929 		    E1000_STATUS_FUNC_1)
4930 			adapter->wol = 0;
4931 		break;
4932 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4933 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4934 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4935                 /* if quad port adapter, disable WoL on all but port A */
4936 		if (global_quad_port_a != 0)
4937 			adapter->wol = 0;
4938 		/* Reset for multiple quad port adapters */
4939 		if (++global_quad_port_a == 4)
4940 			global_quad_port_a = 0;
4941                 break;
4942 	}
4943 	return;
4944 }
4945 
4946 
4947 /*
4948  * Enable PCI Wake On Lan capability
4949  */
4950 static void
4951 em_enable_wakeup(device_t dev)
4952 {
4953 	struct adapter	*adapter = device_get_softc(dev);
4954 	struct ifnet	*ifp = adapter->ifp;
4955 	u32		pmc, ctrl, ctrl_ext, rctl;
4956 	u16     	status;
4957 
4958 	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4959 		return;
4960 
4961 	/* Advertise the wakeup capability */
4962 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4963 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4964 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4965 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4966 
4967 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4968 	    (adapter->hw.mac.type == e1000_pchlan) ||
4969 	    (adapter->hw.mac.type == e1000_ich9lan) ||
4970 	    (adapter->hw.mac.type == e1000_ich10lan))
4971 		e1000_suspend_workarounds_ich8lan(&adapter->hw);
4972 
4973 	/* Keep the laser running on Fiber adapters */
4974 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4975 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4976 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4977 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4978 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4979 	}
4980 
4981 	/*
4982 	** Determine type of Wakeup: note that wol
4983 	** is set with all bits on by default.
4984 	*/
4985 	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4986 		adapter->wol &= ~E1000_WUFC_MAG;
4987 
4988 	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4989 		adapter->wol &= ~E1000_WUFC_MC;
4990 	else {
4991 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4992 		rctl |= E1000_RCTL_MPE;
4993 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4994 	}
4995 
4996 	if ((adapter->hw.mac.type == e1000_pchlan) ||
4997 	    (adapter->hw.mac.type == e1000_pch2lan)) {
4998 		if (em_enable_phy_wakeup(adapter))
4999 			return;
5000 	} else {
5001 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5002 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5003 	}
5004 
5005 	if (adapter->hw.phy.type == e1000_phy_igp_3)
5006 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5007 
5008         /* Request PME */
5009         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5010 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5011 	if (ifp->if_capenable & IFCAP_WOL)
5012 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5013         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5014 
5015 	return;
5016 }
5017 
5018 /*
5019 ** WOL in the newer chipset interfaces (pchlan)
5020 ** require thing to be copied into the phy
5021 */
5022 static int
5023 em_enable_phy_wakeup(struct adapter *adapter)
5024 {
5025 	struct e1000_hw *hw = &adapter->hw;
5026 	u32 mreg, ret = 0;
5027 	u16 preg;
5028 
5029 	/* copy MAC RARs to PHY RARs */
5030 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5031 
5032 	/* copy MAC MTA to PHY MTA */
5033 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5034 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5035 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5036 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5037 		    (u16)((mreg >> 16) & 0xFFFF));
5038 	}
5039 
5040 	/* configure PHY Rx Control register */
5041 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5042 	mreg = E1000_READ_REG(hw, E1000_RCTL);
5043 	if (mreg & E1000_RCTL_UPE)
5044 		preg |= BM_RCTL_UPE;
5045 	if (mreg & E1000_RCTL_MPE)
5046 		preg |= BM_RCTL_MPE;
5047 	preg &= ~(BM_RCTL_MO_MASK);
5048 	if (mreg & E1000_RCTL_MO_3)
5049 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5050 				<< BM_RCTL_MO_SHIFT);
5051 	if (mreg & E1000_RCTL_BAM)
5052 		preg |= BM_RCTL_BAM;
5053 	if (mreg & E1000_RCTL_PMCF)
5054 		preg |= BM_RCTL_PMCF;
5055 	mreg = E1000_READ_REG(hw, E1000_CTRL);
5056 	if (mreg & E1000_CTRL_RFCE)
5057 		preg |= BM_RCTL_RFCE;
5058 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5059 
5060 	/* enable PHY wakeup in MAC register */
5061 	E1000_WRITE_REG(hw, E1000_WUC,
5062 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5063 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5064 
5065 	/* configure and enable PHY wakeup in PHY registers */
5066 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5067 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5068 
5069 	/* activate PHY wakeup */
5070 	ret = hw->phy.ops.acquire(hw);
5071 	if (ret) {
5072 		printf("Could not acquire PHY\n");
5073 		return ret;
5074 	}
5075 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5076 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5077 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5078 	if (ret) {
5079 		printf("Could not read PHY page 769\n");
5080 		goto out;
5081 	}
5082 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5083 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5084 	if (ret)
5085 		printf("Could not set PHY Host Wakeup bit\n");
5086 out:
5087 	hw->phy.ops.release(hw);
5088 
5089 	return ret;
5090 }
5091 
5092 static void
5093 em_led_func(void *arg, int onoff)
5094 {
5095 	struct adapter	*adapter = arg;
5096 
5097 	EM_CORE_LOCK(adapter);
5098 	if (onoff) {
5099 		e1000_setup_led(&adapter->hw);
5100 		e1000_led_on(&adapter->hw);
5101 	} else {
5102 		e1000_led_off(&adapter->hw);
5103 		e1000_cleanup_led(&adapter->hw);
5104 	}
5105 	EM_CORE_UNLOCK(adapter);
5106 }
5107 
5108 /*
5109 ** Disable the L0S and L1 LINK states
5110 */
5111 static void
5112 em_disable_aspm(struct adapter *adapter)
5113 {
5114 	int		base, reg;
5115 	u16		link_cap,link_ctrl;
5116 	device_t	dev = adapter->dev;
5117 
5118 	switch (adapter->hw.mac.type) {
5119 		case e1000_82573:
5120 		case e1000_82574:
5121 		case e1000_82583:
5122 			break;
5123 		default:
5124 			return;
5125 	}
5126 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5127 		return;
5128 	reg = base + PCIER_LINK_CAP;
5129 	link_cap = pci_read_config(dev, reg, 2);
5130 	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5131 		return;
5132 	reg = base + PCIER_LINK_CTL;
5133 	link_ctrl = pci_read_config(dev, reg, 2);
5134 	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5135 	pci_write_config(dev, reg, link_ctrl, 2);
5136 	return;
5137 }
5138 
5139 /**********************************************************************
5140  *
5141  *  Update the board statistics counters.
5142  *
5143  **********************************************************************/
5144 static void
5145 em_update_stats_counters(struct adapter *adapter)
5146 {
5147 	struct ifnet   *ifp;
5148 
5149 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5150 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5151 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5152 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5153 	}
5154 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5155 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5156 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5157 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5158 
5159 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5160 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5161 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5162 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5163 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5164 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5165 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5166 	/*
5167 	** For watchdog management we need to know if we have been
5168 	** paused during the last interval, so capture that here.
5169 	*/
5170 	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5171 	adapter->stats.xoffrxc += adapter->pause_frames;
5172 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5173 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5174 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5175 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5176 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5177 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5178 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5179 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5180 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5181 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5182 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5183 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5184 
5185 	/* For the 64-bit byte counters the low dword must be read first. */
5186 	/* Both registers clear on the read of the high dword */
5187 
5188 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5189 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5190 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5191 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5192 
5193 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5194 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5195 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5196 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5197 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5198 
5199 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5200 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5201 
5202 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5203 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5204 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5205 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5206 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5207 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5208 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5209 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5210 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5211 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5212 
5213 	/* Interrupt Counts */
5214 
5215 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5216 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5217 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5218 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5219 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5220 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5221 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5222 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5223 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5224 
5225 	if (adapter->hw.mac.type >= e1000_82543) {
5226 		adapter->stats.algnerrc +=
5227 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5228 		adapter->stats.rxerrc +=
5229 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5230 		adapter->stats.tncrs +=
5231 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5232 		adapter->stats.cexterr +=
5233 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5234 		adapter->stats.tsctc +=
5235 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5236 		adapter->stats.tsctfc +=
5237 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5238 	}
5239 	ifp = adapter->ifp;
5240 
5241 	ifp->if_collisions = adapter->stats.colc;
5242 
5243 	/* Rx Errors */
5244 	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5245 	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5246 	    adapter->stats.ruc + adapter->stats.roc +
5247 	    adapter->stats.mpc + adapter->stats.cexterr;
5248 
5249 	/* Tx Errors */
5250 	ifp->if_oerrors = adapter->stats.ecol +
5251 	    adapter->stats.latecol + adapter->watchdog_events;
5252 }
5253 
5254 /* Export a single 32-bit register via a read-only sysctl. */
5255 static int
5256 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5257 {
5258 	struct adapter *adapter;
5259 	u_int val;
5260 
5261 	adapter = oidp->oid_arg1;
5262 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5263 	return (sysctl_handle_int(oidp, &val, 0, req));
5264 }
5265 
5266 /*
5267  * Add sysctl variables, one per statistic, to the system.
5268  */
5269 static void
5270 em_add_hw_stats(struct adapter *adapter)
5271 {
5272 	device_t dev = adapter->dev;
5273 
5274 	struct tx_ring *txr = adapter->tx_rings;
5275 	struct rx_ring *rxr = adapter->rx_rings;
5276 
5277 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5278 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5279 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5280 	struct e1000_hw_stats *stats = &adapter->stats;
5281 
5282 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5283 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5284 
5285 #define QUEUE_NAME_LEN 32
5286 	char namebuf[QUEUE_NAME_LEN];
5287 
5288 	/* Driver Statistics */
5289 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5290 			CTLFLAG_RD, &adapter->link_irq,
5291 			"Link MSIX IRQ Handled");
5292 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5293 			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5294 			 "Std mbuf failed");
5295 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5296 			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5297 			 "Std mbuf cluster failed");
5298 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5299 			CTLFLAG_RD, &adapter->dropped_pkts,
5300 			"Driver dropped packets");
5301 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5302 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5303 			"Driver tx dma failure in xmit");
5304 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5305 			CTLFLAG_RD, &adapter->rx_overruns,
5306 			"RX overruns");
5307 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5308 			CTLFLAG_RD, &adapter->watchdog_events,
5309 			"Watchdog timeouts");
5310 
5311 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5312 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5313 			em_sysctl_reg_handler, "IU",
5314 			"Device Control Register");
5315 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5316 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5317 			em_sysctl_reg_handler, "IU",
5318 			"Receiver Control Register");
5319 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5320 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5321 			"Flow Control High Watermark");
5322 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5323 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5324 			"Flow Control Low Watermark");
5325 
5326 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5327 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5328 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5329 					    CTLFLAG_RD, NULL, "Queue Name");
5330 		queue_list = SYSCTL_CHILDREN(queue_node);
5331 
5332 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5333 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5334 				E1000_TDH(txr->me),
5335 				em_sysctl_reg_handler, "IU",
5336  				"Transmit Descriptor Head");
5337 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5338 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5339 				E1000_TDT(txr->me),
5340 				em_sysctl_reg_handler, "IU",
5341  				"Transmit Descriptor Tail");
5342 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5343 				CTLFLAG_RD, &txr->tx_irq,
5344 				"Queue MSI-X Transmit Interrupts");
5345 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5346 				CTLFLAG_RD, &txr->no_desc_avail,
5347 				"Queue No Descriptor Available");
5348 
5349 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5350 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5351 				E1000_RDH(rxr->me),
5352 				em_sysctl_reg_handler, "IU",
5353 				"Receive Descriptor Head");
5354 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5355 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5356 				E1000_RDT(rxr->me),
5357 				em_sysctl_reg_handler, "IU",
5358 				"Receive Descriptor Tail");
5359 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5360 				CTLFLAG_RD, &rxr->rx_irq,
5361 				"Queue MSI-X Receive Interrupts");
5362 	}
5363 
5364 	/* MAC stats get their own sub node */
5365 
5366 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5367 				    CTLFLAG_RD, NULL, "Statistics");
5368 	stat_list = SYSCTL_CHILDREN(stat_node);
5369 
5370 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5371 			CTLFLAG_RD, &stats->ecol,
5372 			"Excessive collisions");
5373 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5374 			CTLFLAG_RD, &stats->scc,
5375 			"Single collisions");
5376 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5377 			CTLFLAG_RD, &stats->mcc,
5378 			"Multiple collisions");
5379 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5380 			CTLFLAG_RD, &stats->latecol,
5381 			"Late collisions");
5382 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5383 			CTLFLAG_RD, &stats->colc,
5384 			"Collision Count");
5385 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5386 			CTLFLAG_RD, &adapter->stats.symerrs,
5387 			"Symbol Errors");
5388 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5389 			CTLFLAG_RD, &adapter->stats.sec,
5390 			"Sequence Errors");
5391 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5392 			CTLFLAG_RD, &adapter->stats.dc,
5393 			"Defer Count");
5394 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5395 			CTLFLAG_RD, &adapter->stats.mpc,
5396 			"Missed Packets");
5397 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5398 			CTLFLAG_RD, &adapter->stats.rnbc,
5399 			"Receive No Buffers");
5400 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5401 			CTLFLAG_RD, &adapter->stats.ruc,
5402 			"Receive Undersize");
5403 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5404 			CTLFLAG_RD, &adapter->stats.rfc,
5405 			"Fragmented Packets Received ");
5406 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5407 			CTLFLAG_RD, &adapter->stats.roc,
5408 			"Oversized Packets Received");
5409 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5410 			CTLFLAG_RD, &adapter->stats.rjc,
5411 			"Recevied Jabber");
5412 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5413 			CTLFLAG_RD, &adapter->stats.rxerrc,
5414 			"Receive Errors");
5415 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5416 			CTLFLAG_RD, &adapter->stats.crcerrs,
5417 			"CRC errors");
5418 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5419 			CTLFLAG_RD, &adapter->stats.algnerrc,
5420 			"Alignment Errors");
5421 	/* On 82575 these are collision counts */
5422 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5423 			CTLFLAG_RD, &adapter->stats.cexterr,
5424 			"Collision/Carrier extension errors");
5425 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5426 			CTLFLAG_RD, &adapter->stats.xonrxc,
5427 			"XON Received");
5428 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5429 			CTLFLAG_RD, &adapter->stats.xontxc,
5430 			"XON Transmitted");
5431 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5432 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5433 			"XOFF Received");
5434 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5435 			CTLFLAG_RD, &adapter->stats.xofftxc,
5436 			"XOFF Transmitted");
5437 
5438 	/* Packet Reception Stats */
5439 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5440 			CTLFLAG_RD, &adapter->stats.tpr,
5441 			"Total Packets Received ");
5442 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5443 			CTLFLAG_RD, &adapter->stats.gprc,
5444 			"Good Packets Received");
5445 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5446 			CTLFLAG_RD, &adapter->stats.bprc,
5447 			"Broadcast Packets Received");
5448 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5449 			CTLFLAG_RD, &adapter->stats.mprc,
5450 			"Multicast Packets Received");
5451 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5452 			CTLFLAG_RD, &adapter->stats.prc64,
5453 			"64 byte frames received ");
5454 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5455 			CTLFLAG_RD, &adapter->stats.prc127,
5456 			"65-127 byte frames received");
5457 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5458 			CTLFLAG_RD, &adapter->stats.prc255,
5459 			"128-255 byte frames received");
5460 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5461 			CTLFLAG_RD, &adapter->stats.prc511,
5462 			"256-511 byte frames received");
5463 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5464 			CTLFLAG_RD, &adapter->stats.prc1023,
5465 			"512-1023 byte frames received");
5466 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5467 			CTLFLAG_RD, &adapter->stats.prc1522,
5468 			"1023-1522 byte frames received");
5469  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5470  			CTLFLAG_RD, &adapter->stats.gorc,
5471  			"Good Octets Received");
5472 
5473 	/* Packet Transmission Stats */
5474  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5475  			CTLFLAG_RD, &adapter->stats.gotc,
5476  			"Good Octets Transmitted");
5477 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5478 			CTLFLAG_RD, &adapter->stats.tpt,
5479 			"Total Packets Transmitted");
5480 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5481 			CTLFLAG_RD, &adapter->stats.gptc,
5482 			"Good Packets Transmitted");
5483 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5484 			CTLFLAG_RD, &adapter->stats.bptc,
5485 			"Broadcast Packets Transmitted");
5486 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5487 			CTLFLAG_RD, &adapter->stats.mptc,
5488 			"Multicast Packets Transmitted");
5489 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5490 			CTLFLAG_RD, &adapter->stats.ptc64,
5491 			"64 byte frames transmitted ");
5492 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5493 			CTLFLAG_RD, &adapter->stats.ptc127,
5494 			"65-127 byte frames transmitted");
5495 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5496 			CTLFLAG_RD, &adapter->stats.ptc255,
5497 			"128-255 byte frames transmitted");
5498 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5499 			CTLFLAG_RD, &adapter->stats.ptc511,
5500 			"256-511 byte frames transmitted");
5501 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5502 			CTLFLAG_RD, &adapter->stats.ptc1023,
5503 			"512-1023 byte frames transmitted");
5504 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5505 			CTLFLAG_RD, &adapter->stats.ptc1522,
5506 			"1024-1522 byte frames transmitted");
5507 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5508 			CTLFLAG_RD, &adapter->stats.tsctc,
5509 			"TSO Contexts Transmitted");
5510 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5511 			CTLFLAG_RD, &adapter->stats.tsctfc,
5512 			"TSO Contexts Failed");
5513 
5514 
5515 	/* Interrupt Stats */
5516 
5517 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5518 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5519 	int_list = SYSCTL_CHILDREN(int_node);
5520 
5521 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5522 			CTLFLAG_RD, &adapter->stats.iac,
5523 			"Interrupt Assertion Count");
5524 
5525 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5526 			CTLFLAG_RD, &adapter->stats.icrxptc,
5527 			"Interrupt Cause Rx Pkt Timer Expire Count");
5528 
5529 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5530 			CTLFLAG_RD, &adapter->stats.icrxatc,
5531 			"Interrupt Cause Rx Abs Timer Expire Count");
5532 
5533 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5534 			CTLFLAG_RD, &adapter->stats.ictxptc,
5535 			"Interrupt Cause Tx Pkt Timer Expire Count");
5536 
5537 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5538 			CTLFLAG_RD, &adapter->stats.ictxatc,
5539 			"Interrupt Cause Tx Abs Timer Expire Count");
5540 
5541 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5542 			CTLFLAG_RD, &adapter->stats.ictxqec,
5543 			"Interrupt Cause Tx Queue Empty Count");
5544 
5545 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5546 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5547 			"Interrupt Cause Tx Queue Min Thresh Count");
5548 
5549 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5550 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5551 			"Interrupt Cause Rx Desc Min Thresh Count");
5552 
5553 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5554 			CTLFLAG_RD, &adapter->stats.icrxoc,
5555 			"Interrupt Cause Receiver Overrun Count");
5556 }
5557 
5558 /**********************************************************************
5559  *
5560  *  This routine provides a way to dump out the adapter eeprom,
5561  *  often a useful debug/service tool. This only dumps the first
5562  *  32 words, stuff that matters is in that extent.
5563  *
5564  **********************************************************************/
5565 static int
5566 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5567 {
5568 	struct adapter *adapter = (struct adapter *)arg1;
5569 	int error;
5570 	int result;
5571 
5572 	result = -1;
5573 	error = sysctl_handle_int(oidp, &result, 0, req);
5574 
5575 	if (error || !req->newptr)
5576 		return (error);
5577 
5578 	/*
5579 	 * This value will cause a hex dump of the
5580 	 * first 32 16-bit words of the EEPROM to
5581 	 * the screen.
5582 	 */
5583 	if (result == 1)
5584 		em_print_nvm_info(adapter);
5585 
5586 	return (error);
5587 }
5588 
5589 static void
5590 em_print_nvm_info(struct adapter *adapter)
5591 {
5592 	u16	eeprom_data;
5593 	int	i, j, row = 0;
5594 
5595 	/* Its a bit crude, but it gets the job done */
5596 	printf("\nInterface EEPROM Dump:\n");
5597 	printf("Offset\n0x0000  ");
5598 	for (i = 0, j = 0; i < 32; i++, j++) {
5599 		if (j == 8) { /* Make the offset block */
5600 			j = 0; ++row;
5601 			printf("\n0x00%x0  ",row);
5602 		}
5603 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5604 		printf("%04x ", eeprom_data);
5605 	}
5606 	printf("\n");
5607 }
5608 
5609 static int
5610 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5611 {
5612 	struct em_int_delay_info *info;
5613 	struct adapter *adapter;
5614 	u32 regval;
5615 	int error, usecs, ticks;
5616 
5617 	info = (struct em_int_delay_info *)arg1;
5618 	usecs = info->value;
5619 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5620 	if (error != 0 || req->newptr == NULL)
5621 		return (error);
5622 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5623 		return (EINVAL);
5624 	info->value = usecs;
5625 	ticks = EM_USECS_TO_TICKS(usecs);
5626 	if (info->offset == E1000_ITR)	/* units are 256ns here */
5627 		ticks *= 4;
5628 
5629 	adapter = info->adapter;
5630 
5631 	EM_CORE_LOCK(adapter);
5632 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5633 	regval = (regval & ~0xffff) | (ticks & 0xffff);
5634 	/* Handle a few special cases. */
5635 	switch (info->offset) {
5636 	case E1000_RDTR:
5637 		break;
5638 	case E1000_TIDV:
5639 		if (ticks == 0) {
5640 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5641 			/* Don't write 0 into the TIDV register. */
5642 			regval++;
5643 		} else
5644 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5645 		break;
5646 	}
5647 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5648 	EM_CORE_UNLOCK(adapter);
5649 	return (0);
5650 }
5651 
5652 static void
5653 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5654 	const char *description, struct em_int_delay_info *info,
5655 	int offset, int value)
5656 {
5657 	info->adapter = adapter;
5658 	info->offset = offset;
5659 	info->value = value;
5660 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5661 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5662 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5663 	    info, 0, em_sysctl_int_delay, "I", description);
5664 }
5665 
5666 static void
5667 em_set_sysctl_value(struct adapter *adapter, const char *name,
5668 	const char *description, int *limit, int value)
5669 {
5670 	*limit = value;
5671 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5672 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5673 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5674 }
5675 
5676 
5677 /*
5678 ** Set flow control using sysctl:
5679 ** Flow control values:
5680 **      0 - off
5681 **      1 - rx pause
5682 **      2 - tx pause
5683 **      3 - full
5684 */
5685 static int
5686 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5687 {
5688         int		error;
5689 	static int	input = 3; /* default is full */
5690         struct adapter	*adapter = (struct adapter *) arg1;
5691 
5692         error = sysctl_handle_int(oidp, &input, 0, req);
5693 
5694         if ((error) || (req->newptr == NULL))
5695                 return (error);
5696 
5697 	if (input == adapter->fc) /* no change? */
5698 		return (error);
5699 
5700         switch (input) {
5701                 case e1000_fc_rx_pause:
5702                 case e1000_fc_tx_pause:
5703                 case e1000_fc_full:
5704                 case e1000_fc_none:
5705                         adapter->hw.fc.requested_mode = input;
5706 			adapter->fc = input;
5707                         break;
5708                 default:
5709 			/* Do nothing */
5710 			return (error);
5711         }
5712 
5713         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5714         e1000_force_mac_fc(&adapter->hw);
5715         return (error);
5716 }
5717 
5718 /*
5719 ** Manage Energy Efficient Ethernet:
5720 ** Control values:
5721 **     0/1 - enabled/disabled
5722 */
5723 static int
5724 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5725 {
5726        struct adapter *adapter = (struct adapter *) arg1;
5727        int             error, value;
5728 
5729        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5730        error = sysctl_handle_int(oidp, &value, 0, req);
5731        if (error || req->newptr == NULL)
5732                return (error);
5733        EM_CORE_LOCK(adapter);
5734        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5735        em_init_locked(adapter);
5736        EM_CORE_UNLOCK(adapter);
5737        return (0);
5738 }
5739 
5740 static int
5741 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5742 {
5743 	struct adapter *adapter;
5744 	int error;
5745 	int result;
5746 
5747 	result = -1;
5748 	error = sysctl_handle_int(oidp, &result, 0, req);
5749 
5750 	if (error || !req->newptr)
5751 		return (error);
5752 
5753 	if (result == 1) {
5754 		adapter = (struct adapter *)arg1;
5755 		em_print_debug_info(adapter);
5756         }
5757 
5758 	return (error);
5759 }
5760 
5761 /*
5762 ** This routine is meant to be fluid, add whatever is
5763 ** needed for debugging a problem.  -jfv
5764 */
5765 static void
5766 em_print_debug_info(struct adapter *adapter)
5767 {
5768 	device_t dev = adapter->dev;
5769 	struct tx_ring *txr = adapter->tx_rings;
5770 	struct rx_ring *rxr = adapter->rx_rings;
5771 
5772 	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5773 		printf("Interface is RUNNING ");
5774 	else
5775 		printf("Interface is NOT RUNNING\n");
5776 
5777 	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5778 		printf("and INACTIVE\n");
5779 	else
5780 		printf("and ACTIVE\n");
5781 
5782 	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5783 	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5784 	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5785 	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5786 	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5787 	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5788 	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5789 	device_printf(dev, "TX descriptors avail = %d\n",
5790 	    txr->tx_avail);
5791 	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5792 	    txr->no_desc_avail);
5793 	device_printf(dev, "RX discarded packets = %ld\n",
5794 	    rxr->rx_discarded);
5795 	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5796 	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5797 }
5798