xref: /freebsd/sys/dev/e1000/if_em.c (revision 730cecb05aaf016ac52ef7cfc691ccec3a0408cd)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2013, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #endif
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #if __FreeBSD_version >= 800000
44 #include <sys/buf_ring.h>
45 #endif
46 #include <sys/bus.h>
47 #include <sys/endian.h>
48 #include <sys/kernel.h>
49 #include <sys/kthread.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/module.h>
53 #include <sys/rman.h>
54 #include <sys/socket.h>
55 #include <sys/sockio.h>
56 #include <sys/sysctl.h>
57 #include <sys/taskqueue.h>
58 #include <sys/eventhandler.h>
59 #include <machine/bus.h>
60 #include <machine/resource.h>
61 
62 #include <net/bpf.h>
63 #include <net/ethernet.h>
64 #include <net/if.h>
65 #include <net/if_arp.h>
66 #include <net/if_dl.h>
67 #include <net/if_media.h>
68 
69 #include <net/if_types.h>
70 #include <net/if_vlan_var.h>
71 
72 #include <netinet/in_systm.h>
73 #include <netinet/in.h>
74 #include <netinet/if_ether.h>
75 #include <netinet/ip.h>
76 #include <netinet/ip6.h>
77 #include <netinet/tcp.h>
78 #include <netinet/udp.h>
79 
80 #include <machine/in_cksum.h>
81 #include <dev/led/led.h>
82 #include <dev/pci/pcivar.h>
83 #include <dev/pci/pcireg.h>
84 
85 #include "e1000_api.h"
86 #include "e1000_82571.h"
87 #include "if_em.h"
88 
89 /*********************************************************************
90  *  Set this to one to display debug statistics
91  *********************************************************************/
92 int	em_display_debug_stats = 0;
93 
94 /*********************************************************************
95  *  Driver version:
96  *********************************************************************/
97 char em_driver_version[] = "7.3.7";
98 
99 /*********************************************************************
100  *  PCI Device ID Table
101  *
102  *  Used by probe to select devices to load on
103  *  Last field stores an index into e1000_strings
104  *  Last entry must be all 0s
105  *
106  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107  *********************************************************************/
108 
109 static em_vendor_info_t em_vendor_info_array[] =
110 {
111 	/* Intel(R) PRO/1000 Network Connection */
112 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
115 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116 						PCI_ANY_ID, PCI_ANY_ID, 0},
117 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118 						PCI_ANY_ID, PCI_ANY_ID, 0},
119 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120 						PCI_ANY_ID, PCI_ANY_ID, 0},
121 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122 						PCI_ANY_ID, PCI_ANY_ID, 0},
123 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124 						PCI_ANY_ID, PCI_ANY_ID, 0},
125 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126 						PCI_ANY_ID, PCI_ANY_ID, 0},
127 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
130 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
131 
132 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
133 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
134 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
135 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
136 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137 						PCI_ANY_ID, PCI_ANY_ID, 0},
138 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139 						PCI_ANY_ID, PCI_ANY_ID, 0},
140 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141 						PCI_ANY_ID, PCI_ANY_ID, 0},
142 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143 						PCI_ANY_ID, PCI_ANY_ID, 0},
144 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
147 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
148 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
150 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
152 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
175 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
176 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
177 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
178 						PCI_ANY_ID, PCI_ANY_ID, 0},
179 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
180 						PCI_ANY_ID, PCI_ANY_ID, 0},
181 	/* required last entry */
182 	{ 0, 0, 0, 0, 0}
183 };
184 
185 /*********************************************************************
186  *  Table of branding strings for all supported NICs.
187  *********************************************************************/
188 
189 static char *em_strings[] = {
190 	"Intel(R) PRO/1000 Network Connection"
191 };
192 
193 /*********************************************************************
194  *  Function prototypes
195  *********************************************************************/
196 static int	em_probe(device_t);
197 static int	em_attach(device_t);
198 static int	em_detach(device_t);
199 static int	em_shutdown(device_t);
200 static int	em_suspend(device_t);
201 static int	em_resume(device_t);
202 #ifdef EM_MULTIQUEUE
203 static int	em_mq_start(struct ifnet *, struct mbuf *);
204 static int	em_mq_start_locked(struct ifnet *,
205 		    struct tx_ring *, struct mbuf *);
206 static void	em_qflush(struct ifnet *);
207 #else
208 static void	em_start(struct ifnet *);
209 static void	em_start_locked(struct ifnet *, struct tx_ring *);
210 #endif
211 static int	em_ioctl(struct ifnet *, u_long, caddr_t);
212 static void	em_init(void *);
213 static void	em_init_locked(struct adapter *);
214 static void	em_stop(void *);
215 static void	em_media_status(struct ifnet *, struct ifmediareq *);
216 static int	em_media_change(struct ifnet *);
217 static void	em_identify_hardware(struct adapter *);
218 static int	em_allocate_pci_resources(struct adapter *);
219 static int	em_allocate_legacy(struct adapter *);
220 static int	em_allocate_msix(struct adapter *);
221 static int	em_allocate_queues(struct adapter *);
222 static int	em_setup_msix(struct adapter *);
223 static void	em_free_pci_resources(struct adapter *);
224 static void	em_local_timer(void *);
225 static void	em_reset(struct adapter *);
226 static int	em_setup_interface(device_t, struct adapter *);
227 
228 static void	em_setup_transmit_structures(struct adapter *);
229 static void	em_initialize_transmit_unit(struct adapter *);
230 static int	em_allocate_transmit_buffers(struct tx_ring *);
231 static void	em_free_transmit_structures(struct adapter *);
232 static void	em_free_transmit_buffers(struct tx_ring *);
233 
234 static int	em_setup_receive_structures(struct adapter *);
235 static int	em_allocate_receive_buffers(struct rx_ring *);
236 static void	em_initialize_receive_unit(struct adapter *);
237 static void	em_free_receive_structures(struct adapter *);
238 static void	em_free_receive_buffers(struct rx_ring *);
239 
240 static void	em_enable_intr(struct adapter *);
241 static void	em_disable_intr(struct adapter *);
242 static void	em_update_stats_counters(struct adapter *);
243 static void	em_add_hw_stats(struct adapter *adapter);
244 static void	em_txeof(struct tx_ring *);
245 static bool	em_rxeof(struct rx_ring *, int, int *);
246 #ifndef __NO_STRICT_ALIGNMENT
247 static int	em_fixup_rx(struct rx_ring *);
248 #endif
249 static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
250 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
251 		    struct ip *, u32 *, u32 *);
252 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
253 		    struct tcphdr *, u32 *, u32 *);
254 static void	em_set_promisc(struct adapter *);
255 static void	em_disable_promisc(struct adapter *);
256 static void	em_set_multi(struct adapter *);
257 static void	em_update_link_status(struct adapter *);
258 static void	em_refresh_mbufs(struct rx_ring *, int);
259 static void	em_register_vlan(void *, struct ifnet *, u16);
260 static void	em_unregister_vlan(void *, struct ifnet *, u16);
261 static void	em_setup_vlan_hw_support(struct adapter *);
262 static int	em_xmit(struct tx_ring *, struct mbuf **);
263 static int	em_dma_malloc(struct adapter *, bus_size_t,
264 		    struct em_dma_alloc *, int);
265 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
266 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
267 static void	em_print_nvm_info(struct adapter *);
268 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
269 static void	em_print_debug_info(struct adapter *);
270 static int 	em_is_valid_ether_addr(u8 *);
271 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
272 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
273 		    const char *, struct em_int_delay_info *, int, int);
274 /* Management and WOL Support */
275 static void	em_init_manageability(struct adapter *);
276 static void	em_release_manageability(struct adapter *);
277 static void     em_get_hw_control(struct adapter *);
278 static void     em_release_hw_control(struct adapter *);
279 static void	em_get_wakeup(device_t);
280 static void     em_enable_wakeup(device_t);
281 static int	em_enable_phy_wakeup(struct adapter *);
282 static void	em_led_func(void *, int);
283 static void	em_disable_aspm(struct adapter *);
284 
285 static int	em_irq_fast(void *);
286 
287 /* MSIX handlers */
288 static void	em_msix_tx(void *);
289 static void	em_msix_rx(void *);
290 static void	em_msix_link(void *);
291 static void	em_handle_tx(void *context, int pending);
292 static void	em_handle_rx(void *context, int pending);
293 static void	em_handle_link(void *context, int pending);
294 
295 static void	em_set_sysctl_value(struct adapter *, const char *,
296 		    const char *, int *, int);
297 static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
298 static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
299 
300 static __inline void em_rx_discard(struct rx_ring *, int);
301 
302 #ifdef DEVICE_POLLING
303 static poll_handler_t em_poll;
304 #endif /* POLLING */
305 
306 /*********************************************************************
307  *  FreeBSD Device Interface Entry Points
308  *********************************************************************/
309 
310 static device_method_t em_methods[] = {
311 	/* Device interface */
312 	DEVMETHOD(device_probe, em_probe),
313 	DEVMETHOD(device_attach, em_attach),
314 	DEVMETHOD(device_detach, em_detach),
315 	DEVMETHOD(device_shutdown, em_shutdown),
316 	DEVMETHOD(device_suspend, em_suspend),
317 	DEVMETHOD(device_resume, em_resume),
318 	DEVMETHOD_END
319 };
320 
321 static driver_t em_driver = {
322 	"em", em_methods, sizeof(struct adapter),
323 };
324 
325 devclass_t em_devclass;
326 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
327 MODULE_DEPEND(em, pci, 1, 1, 1);
328 MODULE_DEPEND(em, ether, 1, 1, 1);
329 
330 /*********************************************************************
331  *  Tunable default values.
332  *********************************************************************/
333 
334 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
335 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
336 #define M_TSO_LEN			66
337 
338 /* Allow common code without TSO */
339 #ifndef CSUM_TSO
340 #define CSUM_TSO	0
341 #endif
342 
343 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
344 
345 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
346 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
347 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
348 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
349 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
350     0, "Default transmit interrupt delay in usecs");
351 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
352     0, "Default receive interrupt delay in usecs");
353 
354 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
355 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
356 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
357 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
358 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
359     &em_tx_abs_int_delay_dflt, 0,
360     "Default transmit interrupt delay limit in usecs");
361 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
362     &em_rx_abs_int_delay_dflt, 0,
363     "Default receive interrupt delay limit in usecs");
364 
365 static int em_rxd = EM_DEFAULT_RXD;
366 static int em_txd = EM_DEFAULT_TXD;
367 TUNABLE_INT("hw.em.rxd", &em_rxd);
368 TUNABLE_INT("hw.em.txd", &em_txd);
369 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
370     "Number of receive descriptors per queue");
371 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
372     "Number of transmit descriptors per queue");
373 
374 static int em_smart_pwr_down = FALSE;
375 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
376 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
377     0, "Set to true to leave smart power down enabled on newer adapters");
378 
379 /* Controls whether promiscuous also shows bad packets */
380 static int em_debug_sbp = FALSE;
381 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
382 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
383     "Show bad packets in promiscuous mode");
384 
385 static int em_enable_msix = TRUE;
386 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
387 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
388     "Enable MSI-X interrupts");
389 
390 /* How many packets rxeof tries to clean at a time */
391 static int em_rx_process_limit = 100;
392 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
393 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
394     &em_rx_process_limit, 0,
395     "Maximum number of received packets to process "
396     "at a time, -1 means unlimited");
397 
398 /* Energy efficient ethernet - default to OFF */
399 static int eee_setting = 1;
400 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
401 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
402     "Enable Energy Efficient Ethernet");
403 
404 /* Global used in WOL setup with multiport cards */
405 static int global_quad_port_a = 0;
406 
407 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
408 #include <dev/netmap/if_em_netmap.h>
409 #endif /* DEV_NETMAP */
410 
411 /*********************************************************************
412  *  Device identification routine
413  *
414  *  em_probe determines if the driver should be loaded on
415  *  adapter based on PCI vendor/device id of the adapter.
416  *
417  *  return BUS_PROBE_DEFAULT on success, positive on failure
418  *********************************************************************/
419 
420 static int
421 em_probe(device_t dev)
422 {
423 	char		adapter_name[60];
424 	u16		pci_vendor_id = 0;
425 	u16		pci_device_id = 0;
426 	u16		pci_subvendor_id = 0;
427 	u16		pci_subdevice_id = 0;
428 	em_vendor_info_t *ent;
429 
430 	INIT_DEBUGOUT("em_probe: begin");
431 
432 	pci_vendor_id = pci_get_vendor(dev);
433 	if (pci_vendor_id != EM_VENDOR_ID)
434 		return (ENXIO);
435 
436 	pci_device_id = pci_get_device(dev);
437 	pci_subvendor_id = pci_get_subvendor(dev);
438 	pci_subdevice_id = pci_get_subdevice(dev);
439 
440 	ent = em_vendor_info_array;
441 	while (ent->vendor_id != 0) {
442 		if ((pci_vendor_id == ent->vendor_id) &&
443 		    (pci_device_id == ent->device_id) &&
444 
445 		    ((pci_subvendor_id == ent->subvendor_id) ||
446 		    (ent->subvendor_id == PCI_ANY_ID)) &&
447 
448 		    ((pci_subdevice_id == ent->subdevice_id) ||
449 		    (ent->subdevice_id == PCI_ANY_ID))) {
450 			sprintf(adapter_name, "%s %s",
451 				em_strings[ent->index],
452 				em_driver_version);
453 			device_set_desc_copy(dev, adapter_name);
454 			return (BUS_PROBE_DEFAULT);
455 		}
456 		ent++;
457 	}
458 
459 	return (ENXIO);
460 }
461 
462 /*********************************************************************
463  *  Device initialization routine
464  *
465  *  The attach entry point is called when the driver is being loaded.
466  *  This routine identifies the type of hardware, allocates all resources
467  *  and initializes the hardware.
468  *
469  *  return 0 on success, positive on failure
470  *********************************************************************/
471 
472 static int
473 em_attach(device_t dev)
474 {
475 	struct adapter	*adapter;
476 	struct e1000_hw	*hw;
477 	int		error = 0;
478 
479 	INIT_DEBUGOUT("em_attach: begin");
480 
481 	if (resource_disabled("em", device_get_unit(dev))) {
482 		device_printf(dev, "Disabled by device hint\n");
483 		return (ENXIO);
484 	}
485 
486 	adapter = device_get_softc(dev);
487 	adapter->dev = adapter->osdep.dev = dev;
488 	hw = &adapter->hw;
489 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
490 
491 	/* SYSCTL stuff */
492 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
493 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
494 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
495 	    em_sysctl_nvm_info, "I", "NVM Information");
496 
497 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
498 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
499 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
500 	    em_sysctl_debug_info, "I", "Debug Information");
501 
502 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
503 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
504 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
505 	    em_set_flowcntl, "I", "Flow Control");
506 
507 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
508 
509 	/* Determine hardware and mac info */
510 	em_identify_hardware(adapter);
511 
512 	/* Setup PCI resources */
513 	if (em_allocate_pci_resources(adapter)) {
514 		device_printf(dev, "Allocation of PCI resources failed\n");
515 		error = ENXIO;
516 		goto err_pci;
517 	}
518 
519 	/*
520 	** For ICH8 and family we need to
521 	** map the flash memory, and this
522 	** must happen after the MAC is
523 	** identified
524 	*/
525 	if ((hw->mac.type == e1000_ich8lan) ||
526 	    (hw->mac.type == e1000_ich9lan) ||
527 	    (hw->mac.type == e1000_ich10lan) ||
528 	    (hw->mac.type == e1000_pchlan) ||
529 	    (hw->mac.type == e1000_pch2lan) ||
530 	    (hw->mac.type == e1000_pch_lpt)) {
531 		int rid = EM_BAR_TYPE_FLASH;
532 		adapter->flash = bus_alloc_resource_any(dev,
533 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
534 		if (adapter->flash == NULL) {
535 			device_printf(dev, "Mapping of Flash failed\n");
536 			error = ENXIO;
537 			goto err_pci;
538 		}
539 		/* This is used in the shared code */
540 		hw->flash_address = (u8 *)adapter->flash;
541 		adapter->osdep.flash_bus_space_tag =
542 		    rman_get_bustag(adapter->flash);
543 		adapter->osdep.flash_bus_space_handle =
544 		    rman_get_bushandle(adapter->flash);
545 	}
546 
547 	/* Do Shared Code initialization */
548 	if (e1000_setup_init_funcs(hw, TRUE)) {
549 		device_printf(dev, "Setup of Shared code failed\n");
550 		error = ENXIO;
551 		goto err_pci;
552 	}
553 
554 	e1000_get_bus_info(hw);
555 
556 	/* Set up some sysctls for the tunable interrupt delays */
557 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
558 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
559 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
560 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
561 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
562 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
563 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
564 	    "receive interrupt delay limit in usecs",
565 	    &adapter->rx_abs_int_delay,
566 	    E1000_REGISTER(hw, E1000_RADV),
567 	    em_rx_abs_int_delay_dflt);
568 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
569 	    "transmit interrupt delay limit in usecs",
570 	    &adapter->tx_abs_int_delay,
571 	    E1000_REGISTER(hw, E1000_TADV),
572 	    em_tx_abs_int_delay_dflt);
573 
574 	/* Sysctl for limiting the amount of work done in the taskqueue */
575 	em_set_sysctl_value(adapter, "rx_processing_limit",
576 	    "max number of rx packets to process", &adapter->rx_process_limit,
577 	    em_rx_process_limit);
578 
579 	/*
580 	 * Validate number of transmit and receive descriptors. It
581 	 * must not exceed hardware maximum, and must be multiple
582 	 * of E1000_DBA_ALIGN.
583 	 */
584 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
585 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
586 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
587 		    EM_DEFAULT_TXD, em_txd);
588 		adapter->num_tx_desc = EM_DEFAULT_TXD;
589 	} else
590 		adapter->num_tx_desc = em_txd;
591 
592 	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
593 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
594 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
595 		    EM_DEFAULT_RXD, em_rxd);
596 		adapter->num_rx_desc = EM_DEFAULT_RXD;
597 	} else
598 		adapter->num_rx_desc = em_rxd;
599 
600 	hw->mac.autoneg = DO_AUTO_NEG;
601 	hw->phy.autoneg_wait_to_complete = FALSE;
602 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
603 
604 	/* Copper options */
605 	if (hw->phy.media_type == e1000_media_type_copper) {
606 		hw->phy.mdix = AUTO_ALL_MODES;
607 		hw->phy.disable_polarity_correction = FALSE;
608 		hw->phy.ms_type = EM_MASTER_SLAVE;
609 	}
610 
611 	/*
612 	 * Set the frame limits assuming
613 	 * standard ethernet sized frames.
614 	 */
615 	adapter->hw.mac.max_frame_size =
616 	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
617 
618 	/*
619 	 * This controls when hardware reports transmit completion
620 	 * status.
621 	 */
622 	hw->mac.report_tx_early = 1;
623 
624 	/*
625 	** Get queue/ring memory
626 	*/
627 	if (em_allocate_queues(adapter)) {
628 		error = ENOMEM;
629 		goto err_pci;
630 	}
631 
632 	/* Allocate multicast array memory. */
633 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
634 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
635 	if (adapter->mta == NULL) {
636 		device_printf(dev, "Can not allocate multicast setup array\n");
637 		error = ENOMEM;
638 		goto err_late;
639 	}
640 
641 	/* Check SOL/IDER usage */
642 	if (e1000_check_reset_block(hw))
643 		device_printf(dev, "PHY reset is blocked"
644 		    " due to SOL/IDER session.\n");
645 
646 	/* Sysctl for setting Energy Efficient Ethernet */
647 	hw->dev_spec.ich8lan.eee_disable = eee_setting;
648 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
649 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
650 	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
651 	    adapter, 0, em_sysctl_eee, "I",
652 	    "Disable Energy Efficient Ethernet");
653 
654 	/*
655 	** Start from a known state, this is
656 	** important in reading the nvm and
657 	** mac from that.
658 	*/
659 	e1000_reset_hw(hw);
660 
661 
662 	/* Make sure we have a good EEPROM before we read from it */
663 	if (e1000_validate_nvm_checksum(hw) < 0) {
664 		/*
665 		** Some PCI-E parts fail the first check due to
666 		** the link being in sleep state, call it again,
667 		** if it fails a second time its a real issue.
668 		*/
669 		if (e1000_validate_nvm_checksum(hw) < 0) {
670 			device_printf(dev,
671 			    "The EEPROM Checksum Is Not Valid\n");
672 			error = EIO;
673 			goto err_late;
674 		}
675 	}
676 
677 	/* Copy the permanent MAC address out of the EEPROM */
678 	if (e1000_read_mac_addr(hw) < 0) {
679 		device_printf(dev, "EEPROM read error while reading MAC"
680 		    " address\n");
681 		error = EIO;
682 		goto err_late;
683 	}
684 
685 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
686 		device_printf(dev, "Invalid MAC address\n");
687 		error = EIO;
688 		goto err_late;
689 	}
690 
691 	/*
692 	**  Do interrupt configuration
693 	*/
694 	if (adapter->msix > 1) /* Do MSIX */
695 		error = em_allocate_msix(adapter);
696 	else  /* MSI or Legacy */
697 		error = em_allocate_legacy(adapter);
698 	if (error)
699 		goto err_late;
700 
701 	/*
702 	 * Get Wake-on-Lan and Management info for later use
703 	 */
704 	em_get_wakeup(dev);
705 
706 	/* Setup OS specific network interface */
707 	if (em_setup_interface(dev, adapter) != 0)
708 		goto err_late;
709 
710 	em_reset(adapter);
711 
712 	/* Initialize statistics */
713 	em_update_stats_counters(adapter);
714 
715 	hw->mac.get_link_status = 1;
716 	em_update_link_status(adapter);
717 
718 	/* Register for VLAN events */
719 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
720 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
721 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
722 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
723 
724 	em_add_hw_stats(adapter);
725 
726 	/* Non-AMT based hardware can now take control from firmware */
727 	if (adapter->has_manage && !adapter->has_amt)
728 		em_get_hw_control(adapter);
729 
730 	/* Tell the stack that the interface is not active */
731 	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
732 	adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
733 
734 	adapter->led_dev = led_create(em_led_func, adapter,
735 	    device_get_nameunit(dev));
736 #ifdef DEV_NETMAP
737 	em_netmap_attach(adapter);
738 #endif /* DEV_NETMAP */
739 
740 	INIT_DEBUGOUT("em_attach: end");
741 
742 	return (0);
743 
744 err_late:
745 	em_free_transmit_structures(adapter);
746 	em_free_receive_structures(adapter);
747 	em_release_hw_control(adapter);
748 	if (adapter->ifp != NULL)
749 		if_free(adapter->ifp);
750 err_pci:
751 	em_free_pci_resources(adapter);
752 	free(adapter->mta, M_DEVBUF);
753 	EM_CORE_LOCK_DESTROY(adapter);
754 
755 	return (error);
756 }
757 
758 /*********************************************************************
759  *  Device removal routine
760  *
761  *  The detach entry point is called when the driver is being removed.
762  *  This routine stops the adapter and deallocates all the resources
763  *  that were allocated for driver operation.
764  *
765  *  return 0 on success, positive on failure
766  *********************************************************************/
767 
768 static int
769 em_detach(device_t dev)
770 {
771 	struct adapter	*adapter = device_get_softc(dev);
772 	struct ifnet	*ifp = adapter->ifp;
773 
774 	INIT_DEBUGOUT("em_detach: begin");
775 
776 	/* Make sure VLANS are not using driver */
777 	if (adapter->ifp->if_vlantrunk != NULL) {
778 		device_printf(dev,"Vlan in use, detach first\n");
779 		return (EBUSY);
780 	}
781 
782 #ifdef DEVICE_POLLING
783 	if (ifp->if_capenable & IFCAP_POLLING)
784 		ether_poll_deregister(ifp);
785 #endif
786 
787 	if (adapter->led_dev != NULL)
788 		led_destroy(adapter->led_dev);
789 
790 	EM_CORE_LOCK(adapter);
791 	adapter->in_detach = 1;
792 	em_stop(adapter);
793 	EM_CORE_UNLOCK(adapter);
794 	EM_CORE_LOCK_DESTROY(adapter);
795 
796 	e1000_phy_hw_reset(&adapter->hw);
797 
798 	em_release_manageability(adapter);
799 	em_release_hw_control(adapter);
800 
801 	/* Unregister VLAN events */
802 	if (adapter->vlan_attach != NULL)
803 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
804 	if (adapter->vlan_detach != NULL)
805 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
806 
807 	ether_ifdetach(adapter->ifp);
808 	callout_drain(&adapter->timer);
809 
810 #ifdef DEV_NETMAP
811 	netmap_detach(ifp);
812 #endif /* DEV_NETMAP */
813 
814 	em_free_pci_resources(adapter);
815 	bus_generic_detach(dev);
816 	if_free(ifp);
817 
818 	em_free_transmit_structures(adapter);
819 	em_free_receive_structures(adapter);
820 
821 	em_release_hw_control(adapter);
822 	free(adapter->mta, M_DEVBUF);
823 
824 	return (0);
825 }
826 
827 /*********************************************************************
828  *
829  *  Shutdown entry point
830  *
831  **********************************************************************/
832 
833 static int
834 em_shutdown(device_t dev)
835 {
836 	return em_suspend(dev);
837 }
838 
839 /*
840  * Suspend/resume device methods.
841  */
842 static int
843 em_suspend(device_t dev)
844 {
845 	struct adapter *adapter = device_get_softc(dev);
846 
847 	EM_CORE_LOCK(adapter);
848 
849         em_release_manageability(adapter);
850 	em_release_hw_control(adapter);
851 	em_enable_wakeup(dev);
852 
853 	EM_CORE_UNLOCK(adapter);
854 
855 	return bus_generic_suspend(dev);
856 }
857 
858 static int
859 em_resume(device_t dev)
860 {
861 	struct adapter *adapter = device_get_softc(dev);
862 	struct tx_ring	*txr = adapter->tx_rings;
863 	struct ifnet *ifp = adapter->ifp;
864 
865 	EM_CORE_LOCK(adapter);
866 	if (adapter->hw.mac.type == e1000_pch2lan)
867 		e1000_resume_workarounds_pchlan(&adapter->hw);
868 	em_init_locked(adapter);
869 	em_init_manageability(adapter);
870 
871 	if ((ifp->if_flags & IFF_UP) &&
872 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
873 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
874 			EM_TX_LOCK(txr);
875 #ifdef EM_MULTIQUEUE
876 			if (!drbr_empty(ifp, txr->br))
877 				em_mq_start_locked(ifp, txr, NULL);
878 #else
879 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
880 				em_start_locked(ifp, txr);
881 #endif
882 			EM_TX_UNLOCK(txr);
883 		}
884 	}
885 	EM_CORE_UNLOCK(adapter);
886 
887 	return bus_generic_resume(dev);
888 }
889 
890 
891 #ifdef EM_MULTIQUEUE
892 /*********************************************************************
893  *  Multiqueue Transmit routines
894  *
895  *  em_mq_start is called by the stack to initiate a transmit.
896  *  however, if busy the driver can queue the request rather
897  *  than do an immediate send. It is this that is an advantage
898  *  in this driver, rather than also having multiple tx queues.
899  **********************************************************************/
900 static int
901 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
902 {
903 	struct adapter  *adapter = txr->adapter;
904         struct mbuf     *next;
905         int             err = 0, enq = 0;
906 
907 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
908 	    IFF_DRV_RUNNING || adapter->link_active == 0) {
909 		if (m != NULL)
910 			err = drbr_enqueue(ifp, txr->br, m);
911 		return (err);
912 	}
913 
914 	enq = 0;
915 	if (m != NULL) {
916 		err = drbr_enqueue(ifp, txr->br, m);
917 		if (err)
918 			return (err);
919 	}
920 
921 	/* Process the queue */
922 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
923 		if ((err = em_xmit(txr, &next)) != 0) {
924 			if (next == NULL)
925 				drbr_advance(ifp, txr->br);
926 			else
927 				drbr_putback(ifp, txr->br, next);
928 			break;
929 		}
930 		drbr_advance(ifp, txr->br);
931 		enq++;
932 		ifp->if_obytes += next->m_pkthdr.len;
933 		if (next->m_flags & M_MCAST)
934 			ifp->if_omcasts++;
935 		ETHER_BPF_MTAP(ifp, next);
936 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
937                         break;
938 	}
939 
940 	if (enq > 0) {
941                 /* Set the watchdog */
942                 txr->queue_status = EM_QUEUE_WORKING;
943 		txr->watchdog_time = ticks;
944 	}
945 
946 	if (txr->tx_avail < EM_MAX_SCATTER)
947 		em_txeof(txr);
948 	if (txr->tx_avail < EM_MAX_SCATTER)
949 		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
950 	return (err);
951 }
952 
953 /*
954 ** Multiqueue capable stack interface
955 */
956 static int
957 em_mq_start(struct ifnet *ifp, struct mbuf *m)
958 {
959 	struct adapter	*adapter = ifp->if_softc;
960 	struct tx_ring	*txr = adapter->tx_rings;
961 	int 		error;
962 
963 	if (EM_TX_TRYLOCK(txr)) {
964 		error = em_mq_start_locked(ifp, txr, m);
965 		EM_TX_UNLOCK(txr);
966 	} else
967 		error = drbr_enqueue(ifp, txr->br, m);
968 
969 	return (error);
970 }
971 
972 /*
973 ** Flush all ring buffers
974 */
975 static void
976 em_qflush(struct ifnet *ifp)
977 {
978 	struct adapter  *adapter = ifp->if_softc;
979 	struct tx_ring  *txr = adapter->tx_rings;
980 	struct mbuf     *m;
981 
982 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
983 		EM_TX_LOCK(txr);
984 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
985 			m_freem(m);
986 		EM_TX_UNLOCK(txr);
987 	}
988 	if_qflush(ifp);
989 }
990 #else  /* !EM_MULTIQUEUE */
991 
992 static void
993 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
994 {
995 	struct adapter	*adapter = ifp->if_softc;
996 	struct mbuf	*m_head;
997 
998 	EM_TX_LOCK_ASSERT(txr);
999 
1000 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
1001 	    IFF_DRV_RUNNING)
1002 		return;
1003 
1004 	if (!adapter->link_active)
1005 		return;
1006 
1007 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1008         	/* Call cleanup if number of TX descriptors low */
1009 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1010 			em_txeof(txr);
1011 		if (txr->tx_avail < EM_MAX_SCATTER) {
1012 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1013 			break;
1014 		}
1015                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1016 		if (m_head == NULL)
1017 			break;
1018 		/*
1019 		 *  Encapsulation can modify our pointer, and or make it
1020 		 *  NULL on failure.  In that event, we can't requeue.
1021 		 */
1022 		if (em_xmit(txr, &m_head)) {
1023 			if (m_head == NULL)
1024 				break;
1025 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1026 			break;
1027 		}
1028 
1029 		/* Send a copy of the frame to the BPF listener */
1030 		ETHER_BPF_MTAP(ifp, m_head);
1031 
1032 		/* Set timeout in case hardware has problems transmitting. */
1033 		txr->watchdog_time = ticks;
1034                 txr->queue_status = EM_QUEUE_WORKING;
1035 	}
1036 
1037 	return;
1038 }
1039 
1040 static void
1041 em_start(struct ifnet *ifp)
1042 {
1043 	struct adapter	*adapter = ifp->if_softc;
1044 	struct tx_ring	*txr = adapter->tx_rings;
1045 
1046 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1047 		EM_TX_LOCK(txr);
1048 		em_start_locked(ifp, txr);
1049 		EM_TX_UNLOCK(txr);
1050 	}
1051 	return;
1052 }
1053 #endif /* EM_MULTIQUEUE */
1054 
1055 /*********************************************************************
1056  *  Ioctl entry point
1057  *
1058  *  em_ioctl is called when the user wants to configure the
1059  *  interface.
1060  *
1061  *  return 0 on success, positive on failure
1062  **********************************************************************/
1063 
1064 static int
1065 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1066 {
1067 	struct adapter	*adapter = ifp->if_softc;
1068 	struct ifreq	*ifr = (struct ifreq *)data;
1069 #if defined(INET) || defined(INET6)
1070 	struct ifaddr	*ifa = (struct ifaddr *)data;
1071 #endif
1072 	bool		avoid_reset = FALSE;
1073 	int		error = 0;
1074 
1075 	if (adapter->in_detach)
1076 		return (error);
1077 
1078 	switch (command) {
1079 	case SIOCSIFADDR:
1080 #ifdef INET
1081 		if (ifa->ifa_addr->sa_family == AF_INET)
1082 			avoid_reset = TRUE;
1083 #endif
1084 #ifdef INET6
1085 		if (ifa->ifa_addr->sa_family == AF_INET6)
1086 			avoid_reset = TRUE;
1087 #endif
1088 		/*
1089 		** Calling init results in link renegotiation,
1090 		** so we avoid doing it when possible.
1091 		*/
1092 		if (avoid_reset) {
1093 			ifp->if_flags |= IFF_UP;
1094 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1095 				em_init(adapter);
1096 #ifdef INET
1097 			if (!(ifp->if_flags & IFF_NOARP))
1098 				arp_ifinit(ifp, ifa);
1099 #endif
1100 		} else
1101 			error = ether_ioctl(ifp, command, data);
1102 		break;
1103 	case SIOCSIFMTU:
1104 	    {
1105 		int max_frame_size;
1106 
1107 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1108 
1109 		EM_CORE_LOCK(adapter);
1110 		switch (adapter->hw.mac.type) {
1111 		case e1000_82571:
1112 		case e1000_82572:
1113 		case e1000_ich9lan:
1114 		case e1000_ich10lan:
1115 		case e1000_pch2lan:
1116 		case e1000_pch_lpt:
1117 		case e1000_82574:
1118 		case e1000_82583:
1119 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1120 			max_frame_size = 9234;
1121 			break;
1122 		case e1000_pchlan:
1123 			max_frame_size = 4096;
1124 			break;
1125 			/* Adapters that do not support jumbo frames */
1126 		case e1000_ich8lan:
1127 			max_frame_size = ETHER_MAX_LEN;
1128 			break;
1129 		default:
1130 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1131 		}
1132 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1133 		    ETHER_CRC_LEN) {
1134 			EM_CORE_UNLOCK(adapter);
1135 			error = EINVAL;
1136 			break;
1137 		}
1138 
1139 		ifp->if_mtu = ifr->ifr_mtu;
1140 		adapter->hw.mac.max_frame_size =
1141 		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1142 		em_init_locked(adapter);
1143 		EM_CORE_UNLOCK(adapter);
1144 		break;
1145 	    }
1146 	case SIOCSIFFLAGS:
1147 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1148 		    SIOCSIFFLAGS (Set Interface Flags)");
1149 		EM_CORE_LOCK(adapter);
1150 		if (ifp->if_flags & IFF_UP) {
1151 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1152 				if ((ifp->if_flags ^ adapter->if_flags) &
1153 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1154 					em_disable_promisc(adapter);
1155 					em_set_promisc(adapter);
1156 				}
1157 			} else
1158 				em_init_locked(adapter);
1159 		} else
1160 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1161 				em_stop(adapter);
1162 		adapter->if_flags = ifp->if_flags;
1163 		EM_CORE_UNLOCK(adapter);
1164 		break;
1165 	case SIOCADDMULTI:
1166 	case SIOCDELMULTI:
1167 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1168 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1169 			EM_CORE_LOCK(adapter);
1170 			em_disable_intr(adapter);
1171 			em_set_multi(adapter);
1172 #ifdef DEVICE_POLLING
1173 			if (!(ifp->if_capenable & IFCAP_POLLING))
1174 #endif
1175 				em_enable_intr(adapter);
1176 			EM_CORE_UNLOCK(adapter);
1177 		}
1178 		break;
1179 	case SIOCSIFMEDIA:
1180 		/* Check SOL/IDER usage */
1181 		EM_CORE_LOCK(adapter);
1182 		if (e1000_check_reset_block(&adapter->hw)) {
1183 			EM_CORE_UNLOCK(adapter);
1184 			device_printf(adapter->dev, "Media change is"
1185 			    " blocked due to SOL/IDER session.\n");
1186 			break;
1187 		}
1188 		EM_CORE_UNLOCK(adapter);
1189 		/* falls thru */
1190 	case SIOCGIFMEDIA:
1191 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1192 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1193 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1194 		break;
1195 	case SIOCSIFCAP:
1196 	    {
1197 		int mask, reinit;
1198 
1199 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1200 		reinit = 0;
1201 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1202 #ifdef DEVICE_POLLING
1203 		if (mask & IFCAP_POLLING) {
1204 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1205 				error = ether_poll_register(em_poll, ifp);
1206 				if (error)
1207 					return (error);
1208 				EM_CORE_LOCK(adapter);
1209 				em_disable_intr(adapter);
1210 				ifp->if_capenable |= IFCAP_POLLING;
1211 				EM_CORE_UNLOCK(adapter);
1212 			} else {
1213 				error = ether_poll_deregister(ifp);
1214 				/* Enable interrupt even in error case */
1215 				EM_CORE_LOCK(adapter);
1216 				em_enable_intr(adapter);
1217 				ifp->if_capenable &= ~IFCAP_POLLING;
1218 				EM_CORE_UNLOCK(adapter);
1219 			}
1220 		}
1221 #endif
1222 		if (mask & IFCAP_HWCSUM) {
1223 			ifp->if_capenable ^= IFCAP_HWCSUM;
1224 			reinit = 1;
1225 		}
1226 		if (mask & IFCAP_TSO4) {
1227 			ifp->if_capenable ^= IFCAP_TSO4;
1228 			reinit = 1;
1229 		}
1230 		if (mask & IFCAP_VLAN_HWTAGGING) {
1231 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1232 			reinit = 1;
1233 		}
1234 		if (mask & IFCAP_VLAN_HWFILTER) {
1235 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1236 			reinit = 1;
1237 		}
1238 		if (mask & IFCAP_VLAN_HWTSO) {
1239 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1240 			reinit = 1;
1241 		}
1242 		if ((mask & IFCAP_WOL) &&
1243 		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1244 			if (mask & IFCAP_WOL_MCAST)
1245 				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1246 			if (mask & IFCAP_WOL_MAGIC)
1247 				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1248 		}
1249 		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1250 			em_init(adapter);
1251 		VLAN_CAPABILITIES(ifp);
1252 		break;
1253 	    }
1254 
1255 	default:
1256 		error = ether_ioctl(ifp, command, data);
1257 		break;
1258 	}
1259 
1260 	return (error);
1261 }
1262 
1263 
1264 /*********************************************************************
1265  *  Init entry point
1266  *
1267  *  This routine is used in two ways. It is used by the stack as
1268  *  init entry point in network interface structure. It is also used
1269  *  by the driver as a hw/sw initialization routine to get to a
1270  *  consistent state.
1271  *
1272  *  return 0 on success, positive on failure
1273  **********************************************************************/
1274 
1275 static void
1276 em_init_locked(struct adapter *adapter)
1277 {
1278 	struct ifnet	*ifp = adapter->ifp;
1279 	device_t	dev = adapter->dev;
1280 
1281 	INIT_DEBUGOUT("em_init: begin");
1282 
1283 	EM_CORE_LOCK_ASSERT(adapter);
1284 
1285 	em_disable_intr(adapter);
1286 	callout_stop(&adapter->timer);
1287 
1288 	/* Get the latest mac address, User can use a LAA */
1289         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1290               ETHER_ADDR_LEN);
1291 
1292 	/* Put the address into the Receive Address Array */
1293 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1294 
1295 	/*
1296 	 * With the 82571 adapter, RAR[0] may be overwritten
1297 	 * when the other port is reset, we make a duplicate
1298 	 * in RAR[14] for that eventuality, this assures
1299 	 * the interface continues to function.
1300 	 */
1301 	if (adapter->hw.mac.type == e1000_82571) {
1302 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1303 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1304 		    E1000_RAR_ENTRIES - 1);
1305 	}
1306 
1307 	/* Initialize the hardware */
1308 	em_reset(adapter);
1309 	em_update_link_status(adapter);
1310 
1311 	/* Setup VLAN support, basic and offload if available */
1312 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1313 
1314 	/* Set hardware offload abilities */
1315 	ifp->if_hwassist = 0;
1316 	if (ifp->if_capenable & IFCAP_TXCSUM)
1317 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1318 	if (ifp->if_capenable & IFCAP_TSO4)
1319 		ifp->if_hwassist |= CSUM_TSO;
1320 
1321 	/* Configure for OS presence */
1322 	em_init_manageability(adapter);
1323 
1324 	/* Prepare transmit descriptors and buffers */
1325 	em_setup_transmit_structures(adapter);
1326 	em_initialize_transmit_unit(adapter);
1327 
1328 	/* Setup Multicast table */
1329 	em_set_multi(adapter);
1330 
1331 	/*
1332 	** Figure out the desired mbuf
1333 	** pool for doing jumbos
1334 	*/
1335 	if (adapter->hw.mac.max_frame_size <= 2048)
1336 		adapter->rx_mbuf_sz = MCLBYTES;
1337 	else if (adapter->hw.mac.max_frame_size <= 4096)
1338 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1339 	else
1340 		adapter->rx_mbuf_sz = MJUM9BYTES;
1341 
1342 	/* Prepare receive descriptors and buffers */
1343 	if (em_setup_receive_structures(adapter)) {
1344 		device_printf(dev, "Could not setup receive structures\n");
1345 		em_stop(adapter);
1346 		return;
1347 	}
1348 	em_initialize_receive_unit(adapter);
1349 
1350 	/* Use real VLAN Filter support? */
1351 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1352 		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1353 			/* Use real VLAN Filter support */
1354 			em_setup_vlan_hw_support(adapter);
1355 		else {
1356 			u32 ctrl;
1357 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1358 			ctrl |= E1000_CTRL_VME;
1359 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1360 		}
1361 	}
1362 
1363 	/* Don't lose promiscuous settings */
1364 	em_set_promisc(adapter);
1365 
1366 	/* Set the interface as ACTIVE */
1367 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1368 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1369 
1370 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1371 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1372 
1373 	/* MSI/X configuration for 82574 */
1374 	if (adapter->hw.mac.type == e1000_82574) {
1375 		int tmp;
1376 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1377 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1378 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1379 		/* Set the IVAR - interrupt vector routing. */
1380 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1381 	}
1382 
1383 #ifdef DEVICE_POLLING
1384 	/*
1385 	 * Only enable interrupts if we are not polling, make sure
1386 	 * they are off otherwise.
1387 	 */
1388 	if (ifp->if_capenable & IFCAP_POLLING)
1389 		em_disable_intr(adapter);
1390 	else
1391 #endif /* DEVICE_POLLING */
1392 		em_enable_intr(adapter);
1393 
1394 	/* AMT based hardware can now take control from firmware */
1395 	if (adapter->has_manage && adapter->has_amt)
1396 		em_get_hw_control(adapter);
1397 }
1398 
1399 static void
1400 em_init(void *arg)
1401 {
1402 	struct adapter *adapter = arg;
1403 
1404 	EM_CORE_LOCK(adapter);
1405 	em_init_locked(adapter);
1406 	EM_CORE_UNLOCK(adapter);
1407 }
1408 
1409 
1410 #ifdef DEVICE_POLLING
1411 /*********************************************************************
1412  *
1413  *  Legacy polling routine: note this only works with single queue
1414  *
1415  *********************************************************************/
1416 static int
1417 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1418 {
1419 	struct adapter *adapter = ifp->if_softc;
1420 	struct tx_ring	*txr = adapter->tx_rings;
1421 	struct rx_ring	*rxr = adapter->rx_rings;
1422 	u32		reg_icr;
1423 	int		rx_done;
1424 
1425 	EM_CORE_LOCK(adapter);
1426 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1427 		EM_CORE_UNLOCK(adapter);
1428 		return (0);
1429 	}
1430 
1431 	if (cmd == POLL_AND_CHECK_STATUS) {
1432 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1433 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1434 			callout_stop(&adapter->timer);
1435 			adapter->hw.mac.get_link_status = 1;
1436 			em_update_link_status(adapter);
1437 			callout_reset(&adapter->timer, hz,
1438 			    em_local_timer, adapter);
1439 		}
1440 	}
1441 	EM_CORE_UNLOCK(adapter);
1442 
1443 	em_rxeof(rxr, count, &rx_done);
1444 
1445 	EM_TX_LOCK(txr);
1446 	em_txeof(txr);
1447 #ifdef EM_MULTIQUEUE
1448 	if (!drbr_empty(ifp, txr->br))
1449 		em_mq_start_locked(ifp, txr, NULL);
1450 #else
1451 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1452 		em_start_locked(ifp, txr);
1453 #endif
1454 	EM_TX_UNLOCK(txr);
1455 
1456 	return (rx_done);
1457 }
1458 #endif /* DEVICE_POLLING */
1459 
1460 
1461 /*********************************************************************
1462  *
1463  *  Fast Legacy/MSI Combined Interrupt Service routine
1464  *
1465  *********************************************************************/
1466 static int
1467 em_irq_fast(void *arg)
1468 {
1469 	struct adapter	*adapter = arg;
1470 	struct ifnet	*ifp;
1471 	u32		reg_icr;
1472 
1473 	ifp = adapter->ifp;
1474 
1475 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1476 
1477 	/* Hot eject?  */
1478 	if (reg_icr == 0xffffffff)
1479 		return FILTER_STRAY;
1480 
1481 	/* Definitely not our interrupt.  */
1482 	if (reg_icr == 0x0)
1483 		return FILTER_STRAY;
1484 
1485 	/*
1486 	 * Starting with the 82571 chip, bit 31 should be used to
1487 	 * determine whether the interrupt belongs to us.
1488 	 */
1489 	if (adapter->hw.mac.type >= e1000_82571 &&
1490 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1491 		return FILTER_STRAY;
1492 
1493 	em_disable_intr(adapter);
1494 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1495 
1496 	/* Link status change */
1497 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1498 		adapter->hw.mac.get_link_status = 1;
1499 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1500 	}
1501 
1502 	if (reg_icr & E1000_ICR_RXO)
1503 		adapter->rx_overruns++;
1504 	return FILTER_HANDLED;
1505 }
1506 
1507 /* Combined RX/TX handler, used by Legacy and MSI */
1508 static void
1509 em_handle_que(void *context, int pending)
1510 {
1511 	struct adapter	*adapter = context;
1512 	struct ifnet	*ifp = adapter->ifp;
1513 	struct tx_ring	*txr = adapter->tx_rings;
1514 	struct rx_ring	*rxr = adapter->rx_rings;
1515 
1516 
1517 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1518 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1519 		EM_TX_LOCK(txr);
1520 		em_txeof(txr);
1521 #ifdef EM_MULTIQUEUE
1522 		if (!drbr_empty(ifp, txr->br))
1523 			em_mq_start_locked(ifp, txr, NULL);
1524 #else
1525 		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1526 			em_start_locked(ifp, txr);
1527 #endif
1528 		EM_TX_UNLOCK(txr);
1529 		if (more) {
1530 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1531 			return;
1532 		}
1533 	}
1534 
1535 	em_enable_intr(adapter);
1536 	return;
1537 }
1538 
1539 
1540 /*********************************************************************
1541  *
1542  *  MSIX Interrupt Service Routines
1543  *
1544  **********************************************************************/
1545 static void
1546 em_msix_tx(void *arg)
1547 {
1548 	struct tx_ring *txr = arg;
1549 	struct adapter *adapter = txr->adapter;
1550 	struct ifnet	*ifp = adapter->ifp;
1551 
1552 	++txr->tx_irq;
1553 	EM_TX_LOCK(txr);
1554 	em_txeof(txr);
1555 #ifdef EM_MULTIQUEUE
1556 	if (!drbr_empty(ifp, txr->br))
1557 		em_mq_start_locked(ifp, txr, NULL);
1558 #else
1559 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1560 		em_start_locked(ifp, txr);
1561 #endif
1562 	/* Reenable this interrupt */
1563 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1564 	EM_TX_UNLOCK(txr);
1565 	return;
1566 }
1567 
1568 /*********************************************************************
1569  *
1570  *  MSIX RX Interrupt Service routine
1571  *
1572  **********************************************************************/
1573 
1574 static void
1575 em_msix_rx(void *arg)
1576 {
1577 	struct rx_ring	*rxr = arg;
1578 	struct adapter	*adapter = rxr->adapter;
1579 	bool		more;
1580 
1581 	++rxr->rx_irq;
1582 	if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1583 		return;
1584 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1585 	if (more)
1586 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1587 	else
1588 		/* Reenable this interrupt */
1589 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1590 	return;
1591 }
1592 
1593 /*********************************************************************
1594  *
1595  *  MSIX Link Fast Interrupt Service routine
1596  *
1597  **********************************************************************/
1598 static void
1599 em_msix_link(void *arg)
1600 {
1601 	struct adapter	*adapter = arg;
1602 	u32		reg_icr;
1603 
1604 	++adapter->link_irq;
1605 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1606 
1607 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1608 		adapter->hw.mac.get_link_status = 1;
1609 		em_handle_link(adapter, 0);
1610 	} else
1611 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1612 		    EM_MSIX_LINK | E1000_IMS_LSC);
1613 	return;
1614 }
1615 
1616 static void
1617 em_handle_rx(void *context, int pending)
1618 {
1619 	struct rx_ring	*rxr = context;
1620 	struct adapter	*adapter = rxr->adapter;
1621         bool            more;
1622 
1623 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1624 	if (more)
1625 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1626 	else
1627 		/* Reenable this interrupt */
1628 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1629 }
1630 
1631 static void
1632 em_handle_tx(void *context, int pending)
1633 {
1634 	struct tx_ring	*txr = context;
1635 	struct adapter	*adapter = txr->adapter;
1636 	struct ifnet	*ifp = adapter->ifp;
1637 
1638 	EM_TX_LOCK(txr);
1639 	em_txeof(txr);
1640 #ifdef EM_MULTIQUEUE
1641 	if (!drbr_empty(ifp, txr->br))
1642 		em_mq_start_locked(ifp, txr, NULL);
1643 #else
1644 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1645 		em_start_locked(ifp, txr);
1646 #endif
1647 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1648 	EM_TX_UNLOCK(txr);
1649 }
1650 
1651 static void
1652 em_handle_link(void *context, int pending)
1653 {
1654 	struct adapter	*adapter = context;
1655 	struct tx_ring	*txr = adapter->tx_rings;
1656 	struct ifnet *ifp = adapter->ifp;
1657 
1658 	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1659 		return;
1660 
1661 	EM_CORE_LOCK(adapter);
1662 	callout_stop(&adapter->timer);
1663 	em_update_link_status(adapter);
1664 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1665 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1666 	    EM_MSIX_LINK | E1000_IMS_LSC);
1667 	if (adapter->link_active) {
1668 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1669 			EM_TX_LOCK(txr);
1670 #ifdef EM_MULTIQUEUE
1671 			if (!drbr_empty(ifp, txr->br))
1672 				em_mq_start_locked(ifp, txr, NULL);
1673 #else
1674 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1675 				em_start_locked(ifp, txr);
1676 #endif
1677 			EM_TX_UNLOCK(txr);
1678 		}
1679 	}
1680 	EM_CORE_UNLOCK(adapter);
1681 }
1682 
1683 
1684 /*********************************************************************
1685  *
1686  *  Media Ioctl callback
1687  *
1688  *  This routine is called whenever the user queries the status of
1689  *  the interface using ifconfig.
1690  *
1691  **********************************************************************/
1692 static void
1693 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1694 {
1695 	struct adapter *adapter = ifp->if_softc;
1696 	u_char fiber_type = IFM_1000_SX;
1697 
1698 	INIT_DEBUGOUT("em_media_status: begin");
1699 
1700 	EM_CORE_LOCK(adapter);
1701 	em_update_link_status(adapter);
1702 
1703 	ifmr->ifm_status = IFM_AVALID;
1704 	ifmr->ifm_active = IFM_ETHER;
1705 
1706 	if (!adapter->link_active) {
1707 		EM_CORE_UNLOCK(adapter);
1708 		return;
1709 	}
1710 
1711 	ifmr->ifm_status |= IFM_ACTIVE;
1712 
1713 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1714 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1715 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1716 	} else {
1717 		switch (adapter->link_speed) {
1718 		case 10:
1719 			ifmr->ifm_active |= IFM_10_T;
1720 			break;
1721 		case 100:
1722 			ifmr->ifm_active |= IFM_100_TX;
1723 			break;
1724 		case 1000:
1725 			ifmr->ifm_active |= IFM_1000_T;
1726 			break;
1727 		}
1728 		if (adapter->link_duplex == FULL_DUPLEX)
1729 			ifmr->ifm_active |= IFM_FDX;
1730 		else
1731 			ifmr->ifm_active |= IFM_HDX;
1732 	}
1733 	EM_CORE_UNLOCK(adapter);
1734 }
1735 
1736 /*********************************************************************
1737  *
1738  *  Media Ioctl callback
1739  *
1740  *  This routine is called when the user changes speed/duplex using
1741  *  media/mediopt option with ifconfig.
1742  *
1743  **********************************************************************/
1744 static int
1745 em_media_change(struct ifnet *ifp)
1746 {
1747 	struct adapter *adapter = ifp->if_softc;
1748 	struct ifmedia  *ifm = &adapter->media;
1749 
1750 	INIT_DEBUGOUT("em_media_change: begin");
1751 
1752 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1753 		return (EINVAL);
1754 
1755 	EM_CORE_LOCK(adapter);
1756 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1757 	case IFM_AUTO:
1758 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1759 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1760 		break;
1761 	case IFM_1000_LX:
1762 	case IFM_1000_SX:
1763 	case IFM_1000_T:
1764 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1765 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1766 		break;
1767 	case IFM_100_TX:
1768 		adapter->hw.mac.autoneg = FALSE;
1769 		adapter->hw.phy.autoneg_advertised = 0;
1770 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1771 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1772 		else
1773 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1774 		break;
1775 	case IFM_10_T:
1776 		adapter->hw.mac.autoneg = FALSE;
1777 		adapter->hw.phy.autoneg_advertised = 0;
1778 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1779 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1780 		else
1781 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1782 		break;
1783 	default:
1784 		device_printf(adapter->dev, "Unsupported media type\n");
1785 	}
1786 
1787 	em_init_locked(adapter);
1788 	EM_CORE_UNLOCK(adapter);
1789 
1790 	return (0);
1791 }
1792 
1793 /*********************************************************************
1794  *
1795  *  This routine maps the mbufs to tx descriptors.
1796  *
1797  *  return 0 on success, positive on failure
1798  **********************************************************************/
1799 
1800 static int
1801 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1802 {
1803 	struct adapter		*adapter = txr->adapter;
1804 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1805 	bus_dmamap_t		map;
1806 	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1807 	struct e1000_tx_desc	*ctxd = NULL;
1808 	struct mbuf		*m_head;
1809 	struct ether_header	*eh;
1810 	struct ip		*ip = NULL;
1811 	struct tcphdr		*tp = NULL;
1812 	u32			txd_upper, txd_lower, txd_used, txd_saved;
1813 	int			ip_off, poff;
1814 	int			nsegs, i, j, first, last = 0;
1815 	int			error, do_tso, tso_desc = 0, remap = 1;
1816 
1817 retry:
1818 	m_head = *m_headp;
1819 	txd_upper = txd_lower = txd_used = txd_saved = 0;
1820 	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1821 	ip_off = poff = 0;
1822 
1823 	/*
1824 	 * Intel recommends entire IP/TCP header length reside in a single
1825 	 * buffer. If multiple descriptors are used to describe the IP and
1826 	 * TCP header, each descriptor should describe one or more
1827 	 * complete headers; descriptors referencing only parts of headers
1828 	 * are not supported. If all layer headers are not coalesced into
1829 	 * a single buffer, each buffer should not cross a 4KB boundary,
1830 	 * or be larger than the maximum read request size.
1831 	 * Controller also requires modifing IP/TCP header to make TSO work
1832 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1833 	 * IP/TCP header into a single buffer to meet the requirement of
1834 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1835 	 * which also has similiar restrictions.
1836 	 */
1837 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1838 		if (do_tso || (m_head->m_next != NULL &&
1839 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1840 			if (M_WRITABLE(*m_headp) == 0) {
1841 				m_head = m_dup(*m_headp, M_NOWAIT);
1842 				m_freem(*m_headp);
1843 				if (m_head == NULL) {
1844 					*m_headp = NULL;
1845 					return (ENOBUFS);
1846 				}
1847 				*m_headp = m_head;
1848 			}
1849 		}
1850 		/*
1851 		 * XXX
1852 		 * Assume IPv4, we don't have TSO/checksum offload support
1853 		 * for IPv6 yet.
1854 		 */
1855 		ip_off = sizeof(struct ether_header);
1856 		m_head = m_pullup(m_head, ip_off);
1857 		if (m_head == NULL) {
1858 			*m_headp = NULL;
1859 			return (ENOBUFS);
1860 		}
1861 		eh = mtod(m_head, struct ether_header *);
1862 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1863 			ip_off = sizeof(struct ether_vlan_header);
1864 			m_head = m_pullup(m_head, ip_off);
1865 			if (m_head == NULL) {
1866 				*m_headp = NULL;
1867 				return (ENOBUFS);
1868 			}
1869 		}
1870 		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1871 		if (m_head == NULL) {
1872 			*m_headp = NULL;
1873 			return (ENOBUFS);
1874 		}
1875 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1876 		poff = ip_off + (ip->ip_hl << 2);
1877 		if (do_tso) {
1878 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1879 			if (m_head == NULL) {
1880 				*m_headp = NULL;
1881 				return (ENOBUFS);
1882 			}
1883 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1884 			/*
1885 			 * TSO workaround:
1886 			 *   pull 4 more bytes of data into it.
1887 			 */
1888 			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1889 			if (m_head == NULL) {
1890 				*m_headp = NULL;
1891 				return (ENOBUFS);
1892 			}
1893 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1894 			ip->ip_len = 0;
1895 			ip->ip_sum = 0;
1896 			/*
1897 			 * The pseudo TCP checksum does not include TCP payload
1898 			 * length so driver should recompute the checksum here
1899 			 * what hardware expect to see. This is adherence of
1900 			 * Microsoft's Large Send specification.
1901 			 */
1902 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1903 			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1904 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1905 		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1906 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1907 			if (m_head == NULL) {
1908 				*m_headp = NULL;
1909 				return (ENOBUFS);
1910 			}
1911 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1912 			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1913 			if (m_head == NULL) {
1914 				*m_headp = NULL;
1915 				return (ENOBUFS);
1916 			}
1917 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1918 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1919 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1920 			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1921 			if (m_head == NULL) {
1922 				*m_headp = NULL;
1923 				return (ENOBUFS);
1924 			}
1925 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1926 		}
1927 		*m_headp = m_head;
1928 	}
1929 
1930 	/*
1931 	 * Map the packet for DMA
1932 	 *
1933 	 * Capture the first descriptor index,
1934 	 * this descriptor will have the index
1935 	 * of the EOP which is the only one that
1936 	 * now gets a DONE bit writeback.
1937 	 */
1938 	first = txr->next_avail_desc;
1939 	tx_buffer = &txr->tx_buffers[first];
1940 	tx_buffer_mapped = tx_buffer;
1941 	map = tx_buffer->map;
1942 
1943 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1944 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1945 
1946 	/*
1947 	 * There are two types of errors we can (try) to handle:
1948 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1949 	 *   out of segments.  Defragment the mbuf chain and try again.
1950 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1951 	 *   at this point in time.  Defer sending and try again later.
1952 	 * All other errors, in particular EINVAL, are fatal and prevent the
1953 	 * mbuf chain from ever going through.  Drop it and report error.
1954 	 */
1955 	if (error == EFBIG && remap) {
1956 		struct mbuf *m;
1957 
1958 		m = m_defrag(*m_headp, M_NOWAIT);
1959 		if (m == NULL) {
1960 			adapter->mbuf_alloc_failed++;
1961 			m_freem(*m_headp);
1962 			*m_headp = NULL;
1963 			return (ENOBUFS);
1964 		}
1965 		*m_headp = m;
1966 
1967 		/* Try it again, but only once */
1968 		remap = 0;
1969 		goto retry;
1970 	} else if (error == ENOMEM) {
1971 		adapter->no_tx_dma_setup++;
1972 		return (error);
1973 	} else if (error != 0) {
1974 		adapter->no_tx_dma_setup++;
1975 		m_freem(*m_headp);
1976 		*m_headp = NULL;
1977 		return (error);
1978 	}
1979 
1980 	/*
1981 	 * TSO Hardware workaround, if this packet is not
1982 	 * TSO, and is only a single descriptor long, and
1983 	 * it follows a TSO burst, then we need to add a
1984 	 * sentinel descriptor to prevent premature writeback.
1985 	 */
1986 	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1987 		if (nsegs == 1)
1988 			tso_desc = TRUE;
1989 		txr->tx_tso = FALSE;
1990 	}
1991 
1992         if (nsegs > (txr->tx_avail - 2)) {
1993                 txr->no_desc_avail++;
1994 		bus_dmamap_unload(txr->txtag, map);
1995 		return (ENOBUFS);
1996         }
1997 	m_head = *m_headp;
1998 
1999 	/* Do hardware assists */
2000 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2001 		em_tso_setup(txr, m_head, ip_off, ip, tp,
2002 		    &txd_upper, &txd_lower);
2003 		/* we need to make a final sentinel transmit desc */
2004 		tso_desc = TRUE;
2005 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2006 		em_transmit_checksum_setup(txr, m_head,
2007 		    ip_off, ip, &txd_upper, &txd_lower);
2008 
2009 	if (m_head->m_flags & M_VLANTAG) {
2010 		/* Set the vlan id. */
2011 		txd_upper |=
2012 		    (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2013                 /* Tell hardware to add tag */
2014                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2015         }
2016 
2017 	i = txr->next_avail_desc;
2018 
2019 	/* Set up our transmit descriptors */
2020 	for (j = 0; j < nsegs; j++) {
2021 		bus_size_t seg_len;
2022 		bus_addr_t seg_addr;
2023 
2024 		tx_buffer = &txr->tx_buffers[i];
2025 		ctxd = &txr->tx_base[i];
2026 		seg_addr = segs[j].ds_addr;
2027 		seg_len  = segs[j].ds_len;
2028 		/*
2029 		** TSO Workaround:
2030 		** If this is the last descriptor, we want to
2031 		** split it so we have a small final sentinel
2032 		*/
2033 		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2034 			seg_len -= 4;
2035 			ctxd->buffer_addr = htole64(seg_addr);
2036 			ctxd->lower.data = htole32(
2037 			adapter->txd_cmd | txd_lower | seg_len);
2038 			ctxd->upper.data =
2039 			    htole32(txd_upper);
2040 			if (++i == adapter->num_tx_desc)
2041 				i = 0;
2042 			/* Now make the sentinel */
2043 			++txd_used; /* using an extra txd */
2044 			ctxd = &txr->tx_base[i];
2045 			tx_buffer = &txr->tx_buffers[i];
2046 			ctxd->buffer_addr =
2047 			    htole64(seg_addr + seg_len);
2048 			ctxd->lower.data = htole32(
2049 			adapter->txd_cmd | txd_lower | 4);
2050 			ctxd->upper.data =
2051 			    htole32(txd_upper);
2052 			last = i;
2053 			if (++i == adapter->num_tx_desc)
2054 				i = 0;
2055 		} else {
2056 			ctxd->buffer_addr = htole64(seg_addr);
2057 			ctxd->lower.data = htole32(
2058 			adapter->txd_cmd | txd_lower | seg_len);
2059 			ctxd->upper.data =
2060 			    htole32(txd_upper);
2061 			last = i;
2062 			if (++i == adapter->num_tx_desc)
2063 				i = 0;
2064 		}
2065 		tx_buffer->m_head = NULL;
2066 		tx_buffer->next_eop = -1;
2067 	}
2068 
2069 	txr->next_avail_desc = i;
2070 	txr->tx_avail -= nsegs;
2071 	if (tso_desc) /* TSO used an extra for sentinel */
2072 		txr->tx_avail -= txd_used;
2073 
2074         tx_buffer->m_head = m_head;
2075 	/*
2076 	** Here we swap the map so the last descriptor,
2077 	** which gets the completion interrupt has the
2078 	** real map, and the first descriptor gets the
2079 	** unused map from this descriptor.
2080 	*/
2081 	tx_buffer_mapped->map = tx_buffer->map;
2082 	tx_buffer->map = map;
2083         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2084 
2085         /*
2086          * Last Descriptor of Packet
2087 	 * needs End Of Packet (EOP)
2088 	 * and Report Status (RS)
2089          */
2090         ctxd->lower.data |=
2091 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2092 	/*
2093 	 * Keep track in the first buffer which
2094 	 * descriptor will be written back
2095 	 */
2096 	tx_buffer = &txr->tx_buffers[first];
2097 	tx_buffer->next_eop = last;
2098 	/* Update the watchdog time early and often */
2099 	txr->watchdog_time = ticks;
2100 
2101 	/*
2102 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2103 	 * that this frame is available to transmit.
2104 	 */
2105 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2106 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2107 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2108 
2109 	return (0);
2110 }
2111 
2112 static void
2113 em_set_promisc(struct adapter *adapter)
2114 {
2115 	struct ifnet	*ifp = adapter->ifp;
2116 	u32		reg_rctl;
2117 
2118 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2119 
2120 	if (ifp->if_flags & IFF_PROMISC) {
2121 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2122 		/* Turn this on if you want to see bad packets */
2123 		if (em_debug_sbp)
2124 			reg_rctl |= E1000_RCTL_SBP;
2125 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2126 	} else if (ifp->if_flags & IFF_ALLMULTI) {
2127 		reg_rctl |= E1000_RCTL_MPE;
2128 		reg_rctl &= ~E1000_RCTL_UPE;
2129 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2130 	}
2131 }
2132 
2133 static void
2134 em_disable_promisc(struct adapter *adapter)
2135 {
2136 	u32	reg_rctl;
2137 
2138 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2139 
2140 	reg_rctl &=  (~E1000_RCTL_UPE);
2141 	reg_rctl &=  (~E1000_RCTL_MPE);
2142 	reg_rctl &=  (~E1000_RCTL_SBP);
2143 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2144 }
2145 
2146 
2147 /*********************************************************************
2148  *  Multicast Update
2149  *
2150  *  This routine is called whenever multicast address list is updated.
2151  *
2152  **********************************************************************/
2153 
2154 static void
2155 em_set_multi(struct adapter *adapter)
2156 {
2157 	struct ifnet	*ifp = adapter->ifp;
2158 	struct ifmultiaddr *ifma;
2159 	u32 reg_rctl = 0;
2160 	u8  *mta; /* Multicast array memory */
2161 	int mcnt = 0;
2162 
2163 	IOCTL_DEBUGOUT("em_set_multi: begin");
2164 
2165 	mta = adapter->mta;
2166 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2167 
2168 	if (adapter->hw.mac.type == e1000_82542 &&
2169 	    adapter->hw.revision_id == E1000_REVISION_2) {
2170 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2171 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2172 			e1000_pci_clear_mwi(&adapter->hw);
2173 		reg_rctl |= E1000_RCTL_RST;
2174 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2175 		msec_delay(5);
2176 	}
2177 
2178 #if __FreeBSD_version < 800000
2179 	IF_ADDR_LOCK(ifp);
2180 #else
2181 	if_maddr_rlock(ifp);
2182 #endif
2183 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2184 		if (ifma->ifma_addr->sa_family != AF_LINK)
2185 			continue;
2186 
2187 		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2188 			break;
2189 
2190 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2191 		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2192 		mcnt++;
2193 	}
2194 #if __FreeBSD_version < 800000
2195 	IF_ADDR_UNLOCK(ifp);
2196 #else
2197 	if_maddr_runlock(ifp);
2198 #endif
2199 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2200 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2201 		reg_rctl |= E1000_RCTL_MPE;
2202 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2203 	} else
2204 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2205 
2206 	if (adapter->hw.mac.type == e1000_82542 &&
2207 	    adapter->hw.revision_id == E1000_REVISION_2) {
2208 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2209 		reg_rctl &= ~E1000_RCTL_RST;
2210 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2211 		msec_delay(5);
2212 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2213 			e1000_pci_set_mwi(&adapter->hw);
2214 	}
2215 }
2216 
2217 
2218 /*********************************************************************
2219  *  Timer routine
2220  *
2221  *  This routine checks for link status and updates statistics.
2222  *
2223  **********************************************************************/
2224 
2225 static void
2226 em_local_timer(void *arg)
2227 {
2228 	struct adapter	*adapter = arg;
2229 	struct ifnet	*ifp = adapter->ifp;
2230 	struct tx_ring	*txr = adapter->tx_rings;
2231 	struct rx_ring	*rxr = adapter->rx_rings;
2232 	u32		trigger;
2233 
2234 	EM_CORE_LOCK_ASSERT(adapter);
2235 
2236 	em_update_link_status(adapter);
2237 	em_update_stats_counters(adapter);
2238 
2239 	/* Reset LAA into RAR[0] on 82571 */
2240 	if ((adapter->hw.mac.type == e1000_82571) &&
2241 	    e1000_get_laa_state_82571(&adapter->hw))
2242 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2243 
2244 	/* Mask to use in the irq trigger */
2245 	if (adapter->msix_mem)
2246 		trigger = rxr->ims; /* RX for 82574 */
2247 	else
2248 		trigger = E1000_ICS_RXDMT0;
2249 
2250 	/*
2251 	** Check on the state of the TX queue(s), this
2252 	** can be done without the lock because its RO
2253 	** and the HUNG state will be static if set.
2254 	*/
2255 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2256 		if ((txr->queue_status == EM_QUEUE_HUNG) &&
2257 		    (adapter->pause_frames == 0))
2258 			goto hung;
2259 		/* Schedule a TX tasklet if needed */
2260 		if (txr->tx_avail <= EM_MAX_SCATTER)
2261 			taskqueue_enqueue(txr->tq, &txr->tx_task);
2262 	}
2263 
2264 	adapter->pause_frames = 0;
2265 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2266 #ifndef DEVICE_POLLING
2267 	/* Trigger an RX interrupt to guarantee mbuf refresh */
2268 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2269 #endif
2270 	return;
2271 hung:
2272 	/* Looks like we're hung */
2273 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2274 	device_printf(adapter->dev,
2275 	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2276 	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2277 	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2278 	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2279 	    "Next TX to Clean = %d\n",
2280 	    txr->me, txr->tx_avail, txr->next_to_clean);
2281 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2282 	adapter->watchdog_events++;
2283 	adapter->pause_frames = 0;
2284 	em_init_locked(adapter);
2285 }
2286 
2287 
2288 static void
2289 em_update_link_status(struct adapter *adapter)
2290 {
2291 	struct e1000_hw *hw = &adapter->hw;
2292 	struct ifnet *ifp = adapter->ifp;
2293 	device_t dev = adapter->dev;
2294 	struct tx_ring *txr = adapter->tx_rings;
2295 	u32 link_check = 0;
2296 
2297 	/* Get the cached link value or read phy for real */
2298 	switch (hw->phy.media_type) {
2299 	case e1000_media_type_copper:
2300 		if (hw->mac.get_link_status) {
2301 			/* Do the work to read phy */
2302 			e1000_check_for_link(hw);
2303 			link_check = !hw->mac.get_link_status;
2304 			if (link_check) /* ESB2 fix */
2305 				e1000_cfg_on_link_up(hw);
2306 		} else
2307 			link_check = TRUE;
2308 		break;
2309 	case e1000_media_type_fiber:
2310 		e1000_check_for_link(hw);
2311 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2312                                  E1000_STATUS_LU);
2313 		break;
2314 	case e1000_media_type_internal_serdes:
2315 		e1000_check_for_link(hw);
2316 		link_check = adapter->hw.mac.serdes_has_link;
2317 		break;
2318 	default:
2319 	case e1000_media_type_unknown:
2320 		break;
2321 	}
2322 
2323 	/* Now check for a transition */
2324 	if (link_check && (adapter->link_active == 0)) {
2325 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2326 		    &adapter->link_duplex);
2327 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2328 		if ((adapter->link_speed != SPEED_1000) &&
2329 		    ((hw->mac.type == e1000_82571) ||
2330 		    (hw->mac.type == e1000_82572))) {
2331 			int tarc0;
2332 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2333 			tarc0 &= ~SPEED_MODE_BIT;
2334 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2335 		}
2336 		if (bootverbose)
2337 			device_printf(dev, "Link is up %d Mbps %s\n",
2338 			    adapter->link_speed,
2339 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2340 			    "Full Duplex" : "Half Duplex"));
2341 		adapter->link_active = 1;
2342 		adapter->smartspeed = 0;
2343 		ifp->if_baudrate = adapter->link_speed * 1000000;
2344 		if_link_state_change(ifp, LINK_STATE_UP);
2345 	} else if (!link_check && (adapter->link_active == 1)) {
2346 		ifp->if_baudrate = adapter->link_speed = 0;
2347 		adapter->link_duplex = 0;
2348 		if (bootverbose)
2349 			device_printf(dev, "Link is Down\n");
2350 		adapter->link_active = 0;
2351 		/* Link down, disable watchdog */
2352 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2353 			txr->queue_status = EM_QUEUE_IDLE;
2354 		if_link_state_change(ifp, LINK_STATE_DOWN);
2355 	}
2356 }
2357 
2358 /*********************************************************************
2359  *
2360  *  This routine disables all traffic on the adapter by issuing a
2361  *  global reset on the MAC and deallocates TX/RX buffers.
2362  *
2363  *  This routine should always be called with BOTH the CORE
2364  *  and TX locks.
2365  **********************************************************************/
2366 
2367 static void
2368 em_stop(void *arg)
2369 {
2370 	struct adapter	*adapter = arg;
2371 	struct ifnet	*ifp = adapter->ifp;
2372 	struct tx_ring	*txr = adapter->tx_rings;
2373 
2374 	EM_CORE_LOCK_ASSERT(adapter);
2375 
2376 	INIT_DEBUGOUT("em_stop: begin");
2377 
2378 	em_disable_intr(adapter);
2379 	callout_stop(&adapter->timer);
2380 
2381 	/* Tell the stack that the interface is no longer active */
2382 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2383 	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2384 
2385         /* Unarm watchdog timer. */
2386 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2387 		EM_TX_LOCK(txr);
2388 		txr->queue_status = EM_QUEUE_IDLE;
2389 		EM_TX_UNLOCK(txr);
2390 	}
2391 
2392 	e1000_reset_hw(&adapter->hw);
2393 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2394 
2395 	e1000_led_off(&adapter->hw);
2396 	e1000_cleanup_led(&adapter->hw);
2397 }
2398 
2399 
2400 /*********************************************************************
2401  *
2402  *  Determine hardware revision.
2403  *
2404  **********************************************************************/
2405 static void
2406 em_identify_hardware(struct adapter *adapter)
2407 {
2408 	device_t dev = adapter->dev;
2409 
2410 	/* Make sure our PCI config space has the necessary stuff set */
2411 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2412 	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2413 	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2414 		device_printf(dev, "Memory Access and/or Bus Master bits "
2415 		    "were not set!\n");
2416 		adapter->hw.bus.pci_cmd_word |=
2417 		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2418 		pci_write_config(dev, PCIR_COMMAND,
2419 		    adapter->hw.bus.pci_cmd_word, 2);
2420 	}
2421 
2422 	/* Save off the information about this board */
2423 	adapter->hw.vendor_id = pci_get_vendor(dev);
2424 	adapter->hw.device_id = pci_get_device(dev);
2425 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2426 	adapter->hw.subsystem_vendor_id =
2427 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2428 	adapter->hw.subsystem_device_id =
2429 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2430 
2431 	/* Do Shared Code Init and Setup */
2432 	if (e1000_set_mac_type(&adapter->hw)) {
2433 		device_printf(dev, "Setup init failure\n");
2434 		return;
2435 	}
2436 }
2437 
2438 static int
2439 em_allocate_pci_resources(struct adapter *adapter)
2440 {
2441 	device_t	dev = adapter->dev;
2442 	int		rid;
2443 
2444 	rid = PCIR_BAR(0);
2445 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2446 	    &rid, RF_ACTIVE);
2447 	if (adapter->memory == NULL) {
2448 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2449 		return (ENXIO);
2450 	}
2451 	adapter->osdep.mem_bus_space_tag =
2452 	    rman_get_bustag(adapter->memory);
2453 	adapter->osdep.mem_bus_space_handle =
2454 	    rman_get_bushandle(adapter->memory);
2455 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2456 
2457 	/* Default to a single queue */
2458 	adapter->num_queues = 1;
2459 
2460 	/*
2461 	 * Setup MSI/X or MSI if PCI Express
2462 	 */
2463 	adapter->msix = em_setup_msix(adapter);
2464 
2465 	adapter->hw.back = &adapter->osdep;
2466 
2467 	return (0);
2468 }
2469 
2470 /*********************************************************************
2471  *
2472  *  Setup the Legacy or MSI Interrupt handler
2473  *
2474  **********************************************************************/
2475 int
2476 em_allocate_legacy(struct adapter *adapter)
2477 {
2478 	device_t dev = adapter->dev;
2479 	struct tx_ring	*txr = adapter->tx_rings;
2480 	int error, rid = 0;
2481 
2482 	/* Manually turn off all interrupts */
2483 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2484 
2485 	if (adapter->msix == 1) /* using MSI */
2486 		rid = 1;
2487 	/* We allocate a single interrupt resource */
2488 	adapter->res = bus_alloc_resource_any(dev,
2489 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2490 	if (adapter->res == NULL) {
2491 		device_printf(dev, "Unable to allocate bus resource: "
2492 		    "interrupt\n");
2493 		return (ENXIO);
2494 	}
2495 
2496 	/*
2497 	 * Allocate a fast interrupt and the associated
2498 	 * deferred processing contexts.
2499 	 */
2500 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2501 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2502 	    taskqueue_thread_enqueue, &adapter->tq);
2503 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2504 	    device_get_nameunit(adapter->dev));
2505 	/* Use a TX only tasklet for local timer */
2506 	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2507 	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2508 	    taskqueue_thread_enqueue, &txr->tq);
2509 	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2510 	    device_get_nameunit(adapter->dev));
2511 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2512 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2513 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2514 		device_printf(dev, "Failed to register fast interrupt "
2515 			    "handler: %d\n", error);
2516 		taskqueue_free(adapter->tq);
2517 		adapter->tq = NULL;
2518 		return (error);
2519 	}
2520 
2521 	return (0);
2522 }
2523 
2524 /*********************************************************************
2525  *
2526  *  Setup the MSIX Interrupt handlers
2527  *   This is not really Multiqueue, rather
2528  *   its just seperate interrupt vectors
2529  *   for TX, RX, and Link.
2530  *
2531  **********************************************************************/
2532 int
2533 em_allocate_msix(struct adapter *adapter)
2534 {
2535 	device_t	dev = adapter->dev;
2536 	struct		tx_ring *txr = adapter->tx_rings;
2537 	struct		rx_ring *rxr = adapter->rx_rings;
2538 	int		error, rid, vector = 0;
2539 
2540 
2541 	/* Make sure all interrupts are disabled */
2542 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2543 
2544 	/* First set up ring resources */
2545 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2546 
2547 		/* RX ring */
2548 		rid = vector + 1;
2549 
2550 		rxr->res = bus_alloc_resource_any(dev,
2551 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2552 		if (rxr->res == NULL) {
2553 			device_printf(dev,
2554 			    "Unable to allocate bus resource: "
2555 			    "RX MSIX Interrupt %d\n", i);
2556 			return (ENXIO);
2557 		}
2558 		if ((error = bus_setup_intr(dev, rxr->res,
2559 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2560 		    rxr, &rxr->tag)) != 0) {
2561 			device_printf(dev, "Failed to register RX handler");
2562 			return (error);
2563 		}
2564 #if __FreeBSD_version >= 800504
2565 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2566 #endif
2567 		rxr->msix = vector++; /* NOTE increment vector for TX */
2568 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2569 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2570 		    taskqueue_thread_enqueue, &rxr->tq);
2571 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2572 		    device_get_nameunit(adapter->dev));
2573 		/*
2574 		** Set the bit to enable interrupt
2575 		** in E1000_IMS -- bits 20 and 21
2576 		** are for RX0 and RX1, note this has
2577 		** NOTHING to do with the MSIX vector
2578 		*/
2579 		rxr->ims = 1 << (20 + i);
2580 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2581 
2582 		/* TX ring */
2583 		rid = vector + 1;
2584 		txr->res = bus_alloc_resource_any(dev,
2585 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2586 		if (txr->res == NULL) {
2587 			device_printf(dev,
2588 			    "Unable to allocate bus resource: "
2589 			    "TX MSIX Interrupt %d\n", i);
2590 			return (ENXIO);
2591 		}
2592 		if ((error = bus_setup_intr(dev, txr->res,
2593 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2594 		    txr, &txr->tag)) != 0) {
2595 			device_printf(dev, "Failed to register TX handler");
2596 			return (error);
2597 		}
2598 #if __FreeBSD_version >= 800504
2599 		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2600 #endif
2601 		txr->msix = vector++; /* Increment vector for next pass */
2602 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2603 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2604 		    taskqueue_thread_enqueue, &txr->tq);
2605 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2606 		    device_get_nameunit(adapter->dev));
2607 		/*
2608 		** Set the bit to enable interrupt
2609 		** in E1000_IMS -- bits 22 and 23
2610 		** are for TX0 and TX1, note this has
2611 		** NOTHING to do with the MSIX vector
2612 		*/
2613 		txr->ims = 1 << (22 + i);
2614 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2615 	}
2616 
2617 	/* Link interrupt */
2618 	++rid;
2619 	adapter->res = bus_alloc_resource_any(dev,
2620 	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2621 	if (!adapter->res) {
2622 		device_printf(dev,"Unable to allocate "
2623 		    "bus resource: Link interrupt [%d]\n", rid);
2624 		return (ENXIO);
2625         }
2626 	/* Set the link handler function */
2627 	error = bus_setup_intr(dev, adapter->res,
2628 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2629 	    em_msix_link, adapter, &adapter->tag);
2630 	if (error) {
2631 		adapter->res = NULL;
2632 		device_printf(dev, "Failed to register LINK handler");
2633 		return (error);
2634 	}
2635 #if __FreeBSD_version >= 800504
2636 		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2637 #endif
2638 	adapter->linkvec = vector;
2639 	adapter->ivars |=  (8 | vector) << 16;
2640 	adapter->ivars |= 0x80000000;
2641 
2642 	return (0);
2643 }
2644 
2645 
2646 static void
2647 em_free_pci_resources(struct adapter *adapter)
2648 {
2649 	device_t	dev = adapter->dev;
2650 	struct tx_ring	*txr;
2651 	struct rx_ring	*rxr;
2652 	int		rid;
2653 
2654 
2655 	/*
2656 	** Release all the queue interrupt resources:
2657 	*/
2658 	for (int i = 0; i < adapter->num_queues; i++) {
2659 		txr = &adapter->tx_rings[i];
2660 		rxr = &adapter->rx_rings[i];
2661 		/* an early abort? */
2662 		if ((txr == NULL) || (rxr == NULL))
2663 			break;
2664 		rid = txr->msix +1;
2665 		if (txr->tag != NULL) {
2666 			bus_teardown_intr(dev, txr->res, txr->tag);
2667 			txr->tag = NULL;
2668 		}
2669 		if (txr->res != NULL)
2670 			bus_release_resource(dev, SYS_RES_IRQ,
2671 			    rid, txr->res);
2672 		rid = rxr->msix +1;
2673 		if (rxr->tag != NULL) {
2674 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2675 			rxr->tag = NULL;
2676 		}
2677 		if (rxr->res != NULL)
2678 			bus_release_resource(dev, SYS_RES_IRQ,
2679 			    rid, rxr->res);
2680 	}
2681 
2682         if (adapter->linkvec) /* we are doing MSIX */
2683                 rid = adapter->linkvec + 1;
2684         else
2685                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2686 
2687 	if (adapter->tag != NULL) {
2688 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2689 		adapter->tag = NULL;
2690 	}
2691 
2692 	if (adapter->res != NULL)
2693 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2694 
2695 
2696 	if (adapter->msix)
2697 		pci_release_msi(dev);
2698 
2699 	if (adapter->msix_mem != NULL)
2700 		bus_release_resource(dev, SYS_RES_MEMORY,
2701 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2702 
2703 	if (adapter->memory != NULL)
2704 		bus_release_resource(dev, SYS_RES_MEMORY,
2705 		    PCIR_BAR(0), adapter->memory);
2706 
2707 	if (adapter->flash != NULL)
2708 		bus_release_resource(dev, SYS_RES_MEMORY,
2709 		    EM_FLASH, adapter->flash);
2710 }
2711 
2712 /*
2713  * Setup MSI or MSI/X
2714  */
2715 static int
2716 em_setup_msix(struct adapter *adapter)
2717 {
2718 	device_t dev = adapter->dev;
2719 	int val = 0;
2720 
2721 	/*
2722 	** Setup MSI/X for Hartwell: tests have shown
2723 	** use of two queues to be unstable, and to
2724 	** provide no great gain anyway, so we simply
2725 	** seperate the interrupts and use a single queue.
2726 	*/
2727 	if ((adapter->hw.mac.type == e1000_82574) &&
2728 	    (em_enable_msix == TRUE)) {
2729 		/* Map the MSIX BAR */
2730 		int rid = PCIR_BAR(EM_MSIX_BAR);
2731 		adapter->msix_mem = bus_alloc_resource_any(dev,
2732 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2733        		if (!adapter->msix_mem) {
2734 			/* May not be enabled */
2735                		device_printf(adapter->dev,
2736 			    "Unable to map MSIX table \n");
2737 			goto msi;
2738        		}
2739 		val = pci_msix_count(dev);
2740 		/* We only need 3 vectors */
2741 		if (val > 3)
2742 			val = 3;
2743 		if ((val != 3) && (val != 5)) {
2744 			bus_release_resource(dev, SYS_RES_MEMORY,
2745 			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2746 			adapter->msix_mem = NULL;
2747                		device_printf(adapter->dev,
2748 			    "MSIX: incorrect vectors, using MSI\n");
2749 			goto msi;
2750 		}
2751 
2752 		if (pci_alloc_msix(dev, &val) == 0) {
2753 			device_printf(adapter->dev,
2754 			    "Using MSIX interrupts "
2755 			    "with %d vectors\n", val);
2756 		}
2757 
2758 		return (val);
2759 	}
2760 msi:
2761        	val = pci_msi_count(dev);
2762        	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2763                	adapter->msix = 1;
2764                	device_printf(adapter->dev,"Using an MSI interrupt\n");
2765 		return (val);
2766 	}
2767 	/* Should only happen due to manual configuration */
2768 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2769 	return (0);
2770 }
2771 
2772 
2773 /*********************************************************************
2774  *
2775  *  Initialize the hardware to a configuration
2776  *  as specified by the adapter structure.
2777  *
2778  **********************************************************************/
2779 static void
2780 em_reset(struct adapter *adapter)
2781 {
2782 	device_t	dev = adapter->dev;
2783 	struct ifnet	*ifp = adapter->ifp;
2784 	struct e1000_hw	*hw = &adapter->hw;
2785 	u16		rx_buffer_size;
2786 	u32		pba;
2787 
2788 	INIT_DEBUGOUT("em_reset: begin");
2789 
2790 	/* Set up smart power down as default off on newer adapters. */
2791 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2792 	    hw->mac.type == e1000_82572)) {
2793 		u16 phy_tmp = 0;
2794 
2795 		/* Speed up time to link by disabling smart power down. */
2796 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2797 		phy_tmp &= ~IGP02E1000_PM_SPD;
2798 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2799 	}
2800 
2801 	/*
2802 	 * Packet Buffer Allocation (PBA)
2803 	 * Writing PBA sets the receive portion of the buffer
2804 	 * the remainder is used for the transmit buffer.
2805 	 */
2806 	switch (hw->mac.type) {
2807 	/* Total Packet Buffer on these is 48K */
2808 	case e1000_82571:
2809 	case e1000_82572:
2810 	case e1000_80003es2lan:
2811 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2812 		break;
2813 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2814 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2815 		break;
2816 	case e1000_82574:
2817 	case e1000_82583:
2818 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2819 		break;
2820 	case e1000_ich8lan:
2821 		pba = E1000_PBA_8K;
2822 		break;
2823 	case e1000_ich9lan:
2824 	case e1000_ich10lan:
2825 		/* Boost Receive side for jumbo frames */
2826 		if (adapter->hw.mac.max_frame_size > 4096)
2827 			pba = E1000_PBA_14K;
2828 		else
2829 			pba = E1000_PBA_10K;
2830 		break;
2831 	case e1000_pchlan:
2832 	case e1000_pch2lan:
2833 	case e1000_pch_lpt:
2834 		pba = E1000_PBA_26K;
2835 		break;
2836 	default:
2837 		if (adapter->hw.mac.max_frame_size > 8192)
2838 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2839 		else
2840 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2841 	}
2842 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2843 
2844 	/*
2845 	 * These parameters control the automatic generation (Tx) and
2846 	 * response (Rx) to Ethernet PAUSE frames.
2847 	 * - High water mark should allow for at least two frames to be
2848 	 *   received after sending an XOFF.
2849 	 * - Low water mark works best when it is very near the high water mark.
2850 	 *   This allows the receiver to restart by sending XON when it has
2851 	 *   drained a bit. Here we use an arbitary value of 1500 which will
2852 	 *   restart after one full frame is pulled from the buffer. There
2853 	 *   could be several smaller frames in the buffer and if so they will
2854 	 *   not trigger the XON until their total number reduces the buffer
2855 	 *   by 1500.
2856 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2857 	 */
2858 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2859 	hw->fc.high_water = rx_buffer_size -
2860 	    roundup2(adapter->hw.mac.max_frame_size, 1024);
2861 	hw->fc.low_water = hw->fc.high_water - 1500;
2862 
2863 	if (adapter->fc) /* locally set flow control value? */
2864 		hw->fc.requested_mode = adapter->fc;
2865 	else
2866 		hw->fc.requested_mode = e1000_fc_full;
2867 
2868 	if (hw->mac.type == e1000_80003es2lan)
2869 		hw->fc.pause_time = 0xFFFF;
2870 	else
2871 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2872 
2873 	hw->fc.send_xon = TRUE;
2874 
2875 	/* Device specific overrides/settings */
2876 	switch (hw->mac.type) {
2877 	case e1000_pchlan:
2878 		/* Workaround: no TX flow ctrl for PCH */
2879                 hw->fc.requested_mode = e1000_fc_rx_pause;
2880 		hw->fc.pause_time = 0xFFFF; /* override */
2881 		if (ifp->if_mtu > ETHERMTU) {
2882 			hw->fc.high_water = 0x3500;
2883 			hw->fc.low_water = 0x1500;
2884 		} else {
2885 			hw->fc.high_water = 0x5000;
2886 			hw->fc.low_water = 0x3000;
2887 		}
2888 		hw->fc.refresh_time = 0x1000;
2889 		break;
2890 	case e1000_pch2lan:
2891 	case e1000_pch_lpt:
2892 		hw->fc.high_water = 0x5C20;
2893 		hw->fc.low_water = 0x5048;
2894 		hw->fc.pause_time = 0x0650;
2895 		hw->fc.refresh_time = 0x0400;
2896 		/* Jumbos need adjusted PBA */
2897 		if (ifp->if_mtu > ETHERMTU)
2898 			E1000_WRITE_REG(hw, E1000_PBA, 12);
2899 		else
2900 			E1000_WRITE_REG(hw, E1000_PBA, 26);
2901 		break;
2902         case e1000_ich9lan:
2903         case e1000_ich10lan:
2904 		if (ifp->if_mtu > ETHERMTU) {
2905 			hw->fc.high_water = 0x2800;
2906 			hw->fc.low_water = hw->fc.high_water - 8;
2907 			break;
2908 		}
2909 		/* else fall thru */
2910 	default:
2911 		if (hw->mac.type == e1000_80003es2lan)
2912 			hw->fc.pause_time = 0xFFFF;
2913 		break;
2914 	}
2915 
2916 	/* Issue a global reset */
2917 	e1000_reset_hw(hw);
2918 	E1000_WRITE_REG(hw, E1000_WUC, 0);
2919 	em_disable_aspm(adapter);
2920 	/* and a re-init */
2921 	if (e1000_init_hw(hw) < 0) {
2922 		device_printf(dev, "Hardware Initialization Failed\n");
2923 		return;
2924 	}
2925 
2926 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2927 	e1000_get_phy_info(hw);
2928 	e1000_check_for_link(hw);
2929 	return;
2930 }
2931 
2932 /*********************************************************************
2933  *
2934  *  Setup networking device structure and register an interface.
2935  *
2936  **********************************************************************/
2937 static int
2938 em_setup_interface(device_t dev, struct adapter *adapter)
2939 {
2940 	struct ifnet   *ifp;
2941 
2942 	INIT_DEBUGOUT("em_setup_interface: begin");
2943 
2944 	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2945 	if (ifp == NULL) {
2946 		device_printf(dev, "can not allocate ifnet structure\n");
2947 		return (-1);
2948 	}
2949 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2950 	ifp->if_init =  em_init;
2951 	ifp->if_softc = adapter;
2952 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2953 	ifp->if_ioctl = em_ioctl;
2954 #ifdef EM_MULTIQUEUE
2955 	/* Multiqueue stack interface */
2956 	ifp->if_transmit = em_mq_start;
2957 	ifp->if_qflush = em_qflush;
2958 #else
2959 	ifp->if_start = em_start;
2960 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2961 	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2962 	IFQ_SET_READY(&ifp->if_snd);
2963 #endif
2964 
2965 	ether_ifattach(ifp, adapter->hw.mac.addr);
2966 
2967 	ifp->if_capabilities = ifp->if_capenable = 0;
2968 
2969 
2970 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2971 	ifp->if_capabilities |= IFCAP_TSO4;
2972 	/*
2973 	 * Tell the upper layer(s) we
2974 	 * support full VLAN capability
2975 	 */
2976 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2977 	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2978 			     |  IFCAP_VLAN_HWTSO
2979 			     |  IFCAP_VLAN_MTU;
2980 	ifp->if_capenable = ifp->if_capabilities;
2981 
2982 	/*
2983 	** Don't turn this on by default, if vlans are
2984 	** created on another pseudo device (eg. lagg)
2985 	** then vlan events are not passed thru, breaking
2986 	** operation, but with HW FILTER off it works. If
2987 	** using vlans directly on the em driver you can
2988 	** enable this and get full hardware tag filtering.
2989 	*/
2990 	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2991 
2992 #ifdef DEVICE_POLLING
2993 	ifp->if_capabilities |= IFCAP_POLLING;
2994 #endif
2995 
2996 	/* Enable only WOL MAGIC by default */
2997 	if (adapter->wol) {
2998 		ifp->if_capabilities |= IFCAP_WOL;
2999 		ifp->if_capenable |= IFCAP_WOL_MAGIC;
3000 	}
3001 
3002 	/*
3003 	 * Specify the media types supported by this adapter and register
3004 	 * callbacks to update media and link information
3005 	 */
3006 	ifmedia_init(&adapter->media, IFM_IMASK,
3007 	    em_media_change, em_media_status);
3008 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3009 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3010 		u_char fiber_type = IFM_1000_SX;	/* default type */
3011 
3012 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3013 			    0, NULL);
3014 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3015 	} else {
3016 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3017 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3018 			    0, NULL);
3019 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3020 			    0, NULL);
3021 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3022 			    0, NULL);
3023 		if (adapter->hw.phy.type != e1000_phy_ife) {
3024 			ifmedia_add(&adapter->media,
3025 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3026 			ifmedia_add(&adapter->media,
3027 				IFM_ETHER | IFM_1000_T, 0, NULL);
3028 		}
3029 	}
3030 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3031 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3032 	return (0);
3033 }
3034 
3035 
3036 /*
3037  * Manage DMA'able memory.
3038  */
3039 static void
3040 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3041 {
3042 	if (error)
3043 		return;
3044 	*(bus_addr_t *) arg = segs[0].ds_addr;
3045 }
3046 
3047 static int
3048 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3049         struct em_dma_alloc *dma, int mapflags)
3050 {
3051 	int error;
3052 
3053 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3054 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3055 				BUS_SPACE_MAXADDR,	/* lowaddr */
3056 				BUS_SPACE_MAXADDR,	/* highaddr */
3057 				NULL, NULL,		/* filter, filterarg */
3058 				size,			/* maxsize */
3059 				1,			/* nsegments */
3060 				size,			/* maxsegsize */
3061 				0,			/* flags */
3062 				NULL,			/* lockfunc */
3063 				NULL,			/* lockarg */
3064 				&dma->dma_tag);
3065 	if (error) {
3066 		device_printf(adapter->dev,
3067 		    "%s: bus_dma_tag_create failed: %d\n",
3068 		    __func__, error);
3069 		goto fail_0;
3070 	}
3071 
3072 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3073 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3074 	if (error) {
3075 		device_printf(adapter->dev,
3076 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3077 		    __func__, (uintmax_t)size, error);
3078 		goto fail_2;
3079 	}
3080 
3081 	dma->dma_paddr = 0;
3082 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3083 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3084 	if (error || dma->dma_paddr == 0) {
3085 		device_printf(adapter->dev,
3086 		    "%s: bus_dmamap_load failed: %d\n",
3087 		    __func__, error);
3088 		goto fail_3;
3089 	}
3090 
3091 	return (0);
3092 
3093 fail_3:
3094 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3095 fail_2:
3096 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3097 	bus_dma_tag_destroy(dma->dma_tag);
3098 fail_0:
3099 	dma->dma_map = NULL;
3100 	dma->dma_tag = NULL;
3101 
3102 	return (error);
3103 }
3104 
3105 static void
3106 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3107 {
3108 	if (dma->dma_tag == NULL)
3109 		return;
3110 	if (dma->dma_map != NULL) {
3111 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3112 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3113 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3114 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3115 		dma->dma_map = NULL;
3116 	}
3117 	bus_dma_tag_destroy(dma->dma_tag);
3118 	dma->dma_tag = NULL;
3119 }
3120 
3121 
3122 /*********************************************************************
3123  *
3124  *  Allocate memory for the transmit and receive rings, and then
3125  *  the descriptors associated with each, called only once at attach.
3126  *
3127  **********************************************************************/
3128 static int
3129 em_allocate_queues(struct adapter *adapter)
3130 {
3131 	device_t		dev = adapter->dev;
3132 	struct tx_ring		*txr = NULL;
3133 	struct rx_ring		*rxr = NULL;
3134 	int rsize, tsize, error = E1000_SUCCESS;
3135 	int txconf = 0, rxconf = 0;
3136 
3137 
3138 	/* Allocate the TX ring struct memory */
3139 	if (!(adapter->tx_rings =
3140 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3141 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3142 		device_printf(dev, "Unable to allocate TX ring memory\n");
3143 		error = ENOMEM;
3144 		goto fail;
3145 	}
3146 
3147 	/* Now allocate the RX */
3148 	if (!(adapter->rx_rings =
3149 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3150 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3151 		device_printf(dev, "Unable to allocate RX ring memory\n");
3152 		error = ENOMEM;
3153 		goto rx_fail;
3154 	}
3155 
3156 	tsize = roundup2(adapter->num_tx_desc *
3157 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3158 	/*
3159 	 * Now set up the TX queues, txconf is needed to handle the
3160 	 * possibility that things fail midcourse and we need to
3161 	 * undo memory gracefully
3162 	 */
3163 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3164 		/* Set up some basics */
3165 		txr = &adapter->tx_rings[i];
3166 		txr->adapter = adapter;
3167 		txr->me = i;
3168 
3169 		/* Initialize the TX lock */
3170 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3171 		    device_get_nameunit(dev), txr->me);
3172 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3173 
3174 		if (em_dma_malloc(adapter, tsize,
3175 			&txr->txdma, BUS_DMA_NOWAIT)) {
3176 			device_printf(dev,
3177 			    "Unable to allocate TX Descriptor memory\n");
3178 			error = ENOMEM;
3179 			goto err_tx_desc;
3180 		}
3181 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3182 		bzero((void *)txr->tx_base, tsize);
3183 
3184         	if (em_allocate_transmit_buffers(txr)) {
3185 			device_printf(dev,
3186 			    "Critical Failure setting up transmit buffers\n");
3187 			error = ENOMEM;
3188 			goto err_tx_desc;
3189         	}
3190 #if __FreeBSD_version >= 800000
3191 		/* Allocate a buf ring */
3192 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3193 		    M_WAITOK, &txr->tx_mtx);
3194 #endif
3195 	}
3196 
3197 	/*
3198 	 * Next the RX queues...
3199 	 */
3200 	rsize = roundup2(adapter->num_rx_desc *
3201 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3202 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3203 		rxr = &adapter->rx_rings[i];
3204 		rxr->adapter = adapter;
3205 		rxr->me = i;
3206 
3207 		/* Initialize the RX lock */
3208 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3209 		    device_get_nameunit(dev), txr->me);
3210 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3211 
3212 		if (em_dma_malloc(adapter, rsize,
3213 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3214 			device_printf(dev,
3215 			    "Unable to allocate RxDescriptor memory\n");
3216 			error = ENOMEM;
3217 			goto err_rx_desc;
3218 		}
3219 		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3220 		bzero((void *)rxr->rx_base, rsize);
3221 
3222         	/* Allocate receive buffers for the ring*/
3223 		if (em_allocate_receive_buffers(rxr)) {
3224 			device_printf(dev,
3225 			    "Critical Failure setting up receive buffers\n");
3226 			error = ENOMEM;
3227 			goto err_rx_desc;
3228 		}
3229 	}
3230 
3231 	return (0);
3232 
3233 err_rx_desc:
3234 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3235 		em_dma_free(adapter, &rxr->rxdma);
3236 err_tx_desc:
3237 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3238 		em_dma_free(adapter, &txr->txdma);
3239 	free(adapter->rx_rings, M_DEVBUF);
3240 rx_fail:
3241 #if __FreeBSD_version >= 800000
3242 	buf_ring_free(txr->br, M_DEVBUF);
3243 #endif
3244 	free(adapter->tx_rings, M_DEVBUF);
3245 fail:
3246 	return (error);
3247 }
3248 
3249 
3250 /*********************************************************************
3251  *
3252  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3253  *  the information needed to transmit a packet on the wire. This is
3254  *  called only once at attach, setup is done every reset.
3255  *
3256  **********************************************************************/
3257 static int
3258 em_allocate_transmit_buffers(struct tx_ring *txr)
3259 {
3260 	struct adapter *adapter = txr->adapter;
3261 	device_t dev = adapter->dev;
3262 	struct em_buffer *txbuf;
3263 	int error, i;
3264 
3265 	/*
3266 	 * Setup DMA descriptor areas.
3267 	 */
3268 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3269 			       1, 0,			/* alignment, bounds */
3270 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3271 			       BUS_SPACE_MAXADDR,	/* highaddr */
3272 			       NULL, NULL,		/* filter, filterarg */
3273 			       EM_TSO_SIZE,		/* maxsize */
3274 			       EM_MAX_SCATTER,		/* nsegments */
3275 			       PAGE_SIZE,		/* maxsegsize */
3276 			       0,			/* flags */
3277 			       NULL,			/* lockfunc */
3278 			       NULL,			/* lockfuncarg */
3279 			       &txr->txtag))) {
3280 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3281 		goto fail;
3282 	}
3283 
3284 	if (!(txr->tx_buffers =
3285 	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3286 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3287 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3288 		error = ENOMEM;
3289 		goto fail;
3290 	}
3291 
3292         /* Create the descriptor buffer dma maps */
3293 	txbuf = txr->tx_buffers;
3294 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3295 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3296 		if (error != 0) {
3297 			device_printf(dev, "Unable to create TX DMA map\n");
3298 			goto fail;
3299 		}
3300 	}
3301 
3302 	return 0;
3303 fail:
3304 	/* We free all, it handles case where we are in the middle */
3305 	em_free_transmit_structures(adapter);
3306 	return (error);
3307 }
3308 
3309 /*********************************************************************
3310  *
3311  *  Initialize a transmit ring.
3312  *
3313  **********************************************************************/
3314 static void
3315 em_setup_transmit_ring(struct tx_ring *txr)
3316 {
3317 	struct adapter *adapter = txr->adapter;
3318 	struct em_buffer *txbuf;
3319 	int i;
3320 #ifdef DEV_NETMAP
3321 	struct netmap_adapter *na = NA(adapter->ifp);
3322 	struct netmap_slot *slot;
3323 #endif /* DEV_NETMAP */
3324 
3325 	/* Clear the old descriptor contents */
3326 	EM_TX_LOCK(txr);
3327 #ifdef DEV_NETMAP
3328 	slot = netmap_reset(na, NR_TX, txr->me, 0);
3329 #endif /* DEV_NETMAP */
3330 
3331 	bzero((void *)txr->tx_base,
3332 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3333 	/* Reset indices */
3334 	txr->next_avail_desc = 0;
3335 	txr->next_to_clean = 0;
3336 
3337 	/* Free any existing tx buffers. */
3338         txbuf = txr->tx_buffers;
3339 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3340 		if (txbuf->m_head != NULL) {
3341 			bus_dmamap_sync(txr->txtag, txbuf->map,
3342 			    BUS_DMASYNC_POSTWRITE);
3343 			bus_dmamap_unload(txr->txtag, txbuf->map);
3344 			m_freem(txbuf->m_head);
3345 			txbuf->m_head = NULL;
3346 		}
3347 #ifdef DEV_NETMAP
3348 		if (slot) {
3349 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3350 			uint64_t paddr;
3351 			void *addr;
3352 
3353 			addr = PNMB(slot + si, &paddr);
3354 			txr->tx_base[i].buffer_addr = htole64(paddr);
3355 			/* reload the map for netmap mode */
3356 			netmap_load_map(txr->txtag, txbuf->map, addr);
3357 		}
3358 #endif /* DEV_NETMAP */
3359 
3360 		/* clear the watch index */
3361 		txbuf->next_eop = -1;
3362         }
3363 
3364 	/* Set number of descriptors available */
3365 	txr->tx_avail = adapter->num_tx_desc;
3366 	txr->queue_status = EM_QUEUE_IDLE;
3367 
3368 	/* Clear checksum offload context. */
3369 	txr->last_hw_offload = 0;
3370 	txr->last_hw_ipcss = 0;
3371 	txr->last_hw_ipcso = 0;
3372 	txr->last_hw_tucss = 0;
3373 	txr->last_hw_tucso = 0;
3374 
3375 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3376 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3377 	EM_TX_UNLOCK(txr);
3378 }
3379 
3380 /*********************************************************************
3381  *
3382  *  Initialize all transmit rings.
3383  *
3384  **********************************************************************/
3385 static void
3386 em_setup_transmit_structures(struct adapter *adapter)
3387 {
3388 	struct tx_ring *txr = adapter->tx_rings;
3389 
3390 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3391 		em_setup_transmit_ring(txr);
3392 
3393 	return;
3394 }
3395 
3396 /*********************************************************************
3397  *
3398  *  Enable transmit unit.
3399  *
3400  **********************************************************************/
3401 static void
3402 em_initialize_transmit_unit(struct adapter *adapter)
3403 {
3404 	struct tx_ring	*txr = adapter->tx_rings;
3405 	struct e1000_hw	*hw = &adapter->hw;
3406 	u32	tctl, tarc, tipg = 0;
3407 
3408 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3409 
3410 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3411 		u64 bus_addr = txr->txdma.dma_paddr;
3412 		/* Base and Len of TX Ring */
3413 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3414 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3415 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3416 	    	    (u32)(bus_addr >> 32));
3417 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3418 	    	    (u32)bus_addr);
3419 		/* Init the HEAD/TAIL indices */
3420 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3421 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3422 
3423 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3424 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3425 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3426 
3427 		txr->queue_status = EM_QUEUE_IDLE;
3428 	}
3429 
3430 	/* Set the default values for the Tx Inter Packet Gap timer */
3431 	switch (adapter->hw.mac.type) {
3432 	case e1000_80003es2lan:
3433 		tipg = DEFAULT_82543_TIPG_IPGR1;
3434 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3435 		    E1000_TIPG_IPGR2_SHIFT;
3436 		break;
3437 	default:
3438 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3439 		    (adapter->hw.phy.media_type ==
3440 		    e1000_media_type_internal_serdes))
3441 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3442 		else
3443 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3444 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3445 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3446 	}
3447 
3448 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3449 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3450 
3451 	if(adapter->hw.mac.type >= e1000_82540)
3452 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3453 		    adapter->tx_abs_int_delay.value);
3454 
3455 	if ((adapter->hw.mac.type == e1000_82571) ||
3456 	    (adapter->hw.mac.type == e1000_82572)) {
3457 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3458 		tarc |= SPEED_MODE_BIT;
3459 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3460 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3461 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3462 		tarc |= 1;
3463 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3464 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3465 		tarc |= 1;
3466 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3467 	}
3468 
3469 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3470 	if (adapter->tx_int_delay.value > 0)
3471 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3472 
3473 	/* Program the Transmit Control Register */
3474 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3475 	tctl &= ~E1000_TCTL_CT;
3476 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3477 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3478 
3479 	if (adapter->hw.mac.type >= e1000_82571)
3480 		tctl |= E1000_TCTL_MULR;
3481 
3482 	/* This write will effectively turn on the transmit unit. */
3483 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3484 
3485 }
3486 
3487 
3488 /*********************************************************************
3489  *
3490  *  Free all transmit rings.
3491  *
3492  **********************************************************************/
3493 static void
3494 em_free_transmit_structures(struct adapter *adapter)
3495 {
3496 	struct tx_ring *txr = adapter->tx_rings;
3497 
3498 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3499 		EM_TX_LOCK(txr);
3500 		em_free_transmit_buffers(txr);
3501 		em_dma_free(adapter, &txr->txdma);
3502 		EM_TX_UNLOCK(txr);
3503 		EM_TX_LOCK_DESTROY(txr);
3504 	}
3505 
3506 	free(adapter->tx_rings, M_DEVBUF);
3507 }
3508 
3509 /*********************************************************************
3510  *
3511  *  Free transmit ring related data structures.
3512  *
3513  **********************************************************************/
3514 static void
3515 em_free_transmit_buffers(struct tx_ring *txr)
3516 {
3517 	struct adapter		*adapter = txr->adapter;
3518 	struct em_buffer	*txbuf;
3519 
3520 	INIT_DEBUGOUT("free_transmit_ring: begin");
3521 
3522 	if (txr->tx_buffers == NULL)
3523 		return;
3524 
3525 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3526 		txbuf = &txr->tx_buffers[i];
3527 		if (txbuf->m_head != NULL) {
3528 			bus_dmamap_sync(txr->txtag, txbuf->map,
3529 			    BUS_DMASYNC_POSTWRITE);
3530 			bus_dmamap_unload(txr->txtag,
3531 			    txbuf->map);
3532 			m_freem(txbuf->m_head);
3533 			txbuf->m_head = NULL;
3534 			if (txbuf->map != NULL) {
3535 				bus_dmamap_destroy(txr->txtag,
3536 				    txbuf->map);
3537 				txbuf->map = NULL;
3538 			}
3539 		} else if (txbuf->map != NULL) {
3540 			bus_dmamap_unload(txr->txtag,
3541 			    txbuf->map);
3542 			bus_dmamap_destroy(txr->txtag,
3543 			    txbuf->map);
3544 			txbuf->map = NULL;
3545 		}
3546 	}
3547 #if __FreeBSD_version >= 800000
3548 	if (txr->br != NULL)
3549 		buf_ring_free(txr->br, M_DEVBUF);
3550 #endif
3551 	if (txr->tx_buffers != NULL) {
3552 		free(txr->tx_buffers, M_DEVBUF);
3553 		txr->tx_buffers = NULL;
3554 	}
3555 	if (txr->txtag != NULL) {
3556 		bus_dma_tag_destroy(txr->txtag);
3557 		txr->txtag = NULL;
3558 	}
3559 	return;
3560 }
3561 
3562 
3563 /*********************************************************************
3564  *  The offload context is protocol specific (TCP/UDP) and thus
3565  *  only needs to be set when the protocol changes. The occasion
3566  *  of a context change can be a performance detriment, and
3567  *  might be better just disabled. The reason arises in the way
3568  *  in which the controller supports pipelined requests from the
3569  *  Tx data DMA. Up to four requests can be pipelined, and they may
3570  *  belong to the same packet or to multiple packets. However all
3571  *  requests for one packet are issued before a request is issued
3572  *  for a subsequent packet and if a request for the next packet
3573  *  requires a context change, that request will be stalled
3574  *  until the previous request completes. This means setting up
3575  *  a new context effectively disables pipelined Tx data DMA which
3576  *  in turn greatly slow down performance to send small sized
3577  *  frames.
3578  **********************************************************************/
3579 static void
3580 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3581     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3582 {
3583 	struct adapter			*adapter = txr->adapter;
3584 	struct e1000_context_desc	*TXD = NULL;
3585 	struct em_buffer		*tx_buffer;
3586 	int				cur, hdr_len;
3587 	u32				cmd = 0;
3588 	u16				offload = 0;
3589 	u8				ipcso, ipcss, tucso, tucss;
3590 
3591 	ipcss = ipcso = tucss = tucso = 0;
3592 	hdr_len = ip_off + (ip->ip_hl << 2);
3593 	cur = txr->next_avail_desc;
3594 
3595 	/* Setup of IP header checksum. */
3596 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3597 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3598 		offload |= CSUM_IP;
3599 		ipcss = ip_off;
3600 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3601 		/*
3602 		 * Start offset for header checksum calculation.
3603 		 * End offset for header checksum calculation.
3604 		 * Offset of place to put the checksum.
3605 		 */
3606 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3607 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3608 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3609 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3610 		cmd |= E1000_TXD_CMD_IP;
3611 	}
3612 
3613 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3614  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3615  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3616  		offload |= CSUM_TCP;
3617  		tucss = hdr_len;
3618  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3619  		/*
3620  		 * Setting up new checksum offload context for every frames
3621  		 * takes a lot of processing time for hardware. This also
3622  		 * reduces performance a lot for small sized frames so avoid
3623  		 * it if driver can use previously configured checksum
3624  		 * offload context.
3625  		 */
3626  		if (txr->last_hw_offload == offload) {
3627  			if (offload & CSUM_IP) {
3628  				if (txr->last_hw_ipcss == ipcss &&
3629  				    txr->last_hw_ipcso == ipcso &&
3630  				    txr->last_hw_tucss == tucss &&
3631  				    txr->last_hw_tucso == tucso)
3632  					return;
3633  			} else {
3634  				if (txr->last_hw_tucss == tucss &&
3635  				    txr->last_hw_tucso == tucso)
3636  					return;
3637  			}
3638   		}
3639  		txr->last_hw_offload = offload;
3640  		txr->last_hw_tucss = tucss;
3641  		txr->last_hw_tucso = tucso;
3642  		/*
3643  		 * Start offset for payload checksum calculation.
3644  		 * End offset for payload checksum calculation.
3645  		 * Offset of place to put the checksum.
3646  		 */
3647 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3648  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3649  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3650  		TXD->upper_setup.tcp_fields.tucso = tucso;
3651  		cmd |= E1000_TXD_CMD_TCP;
3652  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3653  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3654  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3655  		tucss = hdr_len;
3656  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3657  		/*
3658  		 * Setting up new checksum offload context for every frames
3659  		 * takes a lot of processing time for hardware. This also
3660  		 * reduces performance a lot for small sized frames so avoid
3661  		 * it if driver can use previously configured checksum
3662  		 * offload context.
3663  		 */
3664  		if (txr->last_hw_offload == offload) {
3665  			if (offload & CSUM_IP) {
3666  				if (txr->last_hw_ipcss == ipcss &&
3667  				    txr->last_hw_ipcso == ipcso &&
3668  				    txr->last_hw_tucss == tucss &&
3669  				    txr->last_hw_tucso == tucso)
3670  					return;
3671  			} else {
3672  				if (txr->last_hw_tucss == tucss &&
3673  				    txr->last_hw_tucso == tucso)
3674  					return;
3675  			}
3676  		}
3677  		txr->last_hw_offload = offload;
3678  		txr->last_hw_tucss = tucss;
3679  		txr->last_hw_tucso = tucso;
3680  		/*
3681  		 * Start offset for header checksum calculation.
3682  		 * End offset for header checksum calculation.
3683  		 * Offset of place to put the checksum.
3684  		 */
3685 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3686  		TXD->upper_setup.tcp_fields.tucss = tucss;
3687  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3688  		TXD->upper_setup.tcp_fields.tucso = tucso;
3689   	}
3690 
3691  	if (offload & CSUM_IP) {
3692  		txr->last_hw_ipcss = ipcss;
3693  		txr->last_hw_ipcso = ipcso;
3694   	}
3695 
3696 	TXD->tcp_seg_setup.data = htole32(0);
3697 	TXD->cmd_and_length =
3698 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3699 	tx_buffer = &txr->tx_buffers[cur];
3700 	tx_buffer->m_head = NULL;
3701 	tx_buffer->next_eop = -1;
3702 
3703 	if (++cur == adapter->num_tx_desc)
3704 		cur = 0;
3705 
3706 	txr->tx_avail--;
3707 	txr->next_avail_desc = cur;
3708 }
3709 
3710 
3711 /**********************************************************************
3712  *
3713  *  Setup work for hardware segmentation offload (TSO)
3714  *
3715  **********************************************************************/
3716 static void
3717 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3718     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3719 {
3720 	struct adapter			*adapter = txr->adapter;
3721 	struct e1000_context_desc	*TXD;
3722 	struct em_buffer		*tx_buffer;
3723 	int cur, hdr_len;
3724 
3725 	/*
3726 	 * In theory we can use the same TSO context if and only if
3727 	 * frame is the same type(IP/TCP) and the same MSS. However
3728 	 * checking whether a frame has the same IP/TCP structure is
3729 	 * hard thing so just ignore that and always restablish a
3730 	 * new TSO context.
3731 	 */
3732 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3733 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3734 		      E1000_TXD_DTYP_D |	/* Data descr type */
3735 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3736 
3737 	/* IP and/or TCP header checksum calculation and insertion. */
3738 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3739 
3740 	cur = txr->next_avail_desc;
3741 	tx_buffer = &txr->tx_buffers[cur];
3742 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3743 
3744 	/*
3745 	 * Start offset for header checksum calculation.
3746 	 * End offset for header checksum calculation.
3747 	 * Offset of place put the checksum.
3748 	 */
3749 	TXD->lower_setup.ip_fields.ipcss = ip_off;
3750 	TXD->lower_setup.ip_fields.ipcse =
3751 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3752 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3753 	/*
3754 	 * Start offset for payload checksum calculation.
3755 	 * End offset for payload checksum calculation.
3756 	 * Offset of place to put the checksum.
3757 	 */
3758 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3759 	TXD->upper_setup.tcp_fields.tucse = 0;
3760 	TXD->upper_setup.tcp_fields.tucso =
3761 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3762 	/*
3763 	 * Payload size per packet w/o any headers.
3764 	 * Length of all headers up to payload.
3765 	 */
3766 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3767 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3768 
3769 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3770 				E1000_TXD_CMD_DEXT |	/* Extended descr */
3771 				E1000_TXD_CMD_TSE |	/* TSE context */
3772 				E1000_TXD_CMD_IP |	/* Do IP csum */
3773 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3774 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3775 
3776 	tx_buffer->m_head = NULL;
3777 	tx_buffer->next_eop = -1;
3778 
3779 	if (++cur == adapter->num_tx_desc)
3780 		cur = 0;
3781 
3782 	txr->tx_avail--;
3783 	txr->next_avail_desc = cur;
3784 	txr->tx_tso = TRUE;
3785 }
3786 
3787 
3788 /**********************************************************************
3789  *
3790  *  Examine each tx_buffer in the used queue. If the hardware is done
3791  *  processing the packet then free associated resources. The
3792  *  tx_buffer is put back on the free queue.
3793  *
3794  **********************************************************************/
3795 static void
3796 em_txeof(struct tx_ring *txr)
3797 {
3798 	struct adapter	*adapter = txr->adapter;
3799         int first, last, done, processed;
3800         struct em_buffer *tx_buffer;
3801         struct e1000_tx_desc   *tx_desc, *eop_desc;
3802 	struct ifnet   *ifp = adapter->ifp;
3803 
3804 	EM_TX_LOCK_ASSERT(txr);
3805 #ifdef DEV_NETMAP
3806 	if (ifp->if_capenable & IFCAP_NETMAP) {
3807 		struct netmap_adapter *na = NA(ifp);
3808 
3809 		selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3810 		EM_TX_UNLOCK(txr);
3811 		EM_CORE_LOCK(adapter);
3812 		selwakeuppri(&na->tx_si, PI_NET);
3813 		EM_CORE_UNLOCK(adapter);
3814 		EM_TX_LOCK(txr);
3815 		return;
3816 	}
3817 #endif /* DEV_NETMAP */
3818 
3819 	/* No work, make sure watchdog is off */
3820         if (txr->tx_avail == adapter->num_tx_desc) {
3821 		txr->queue_status = EM_QUEUE_IDLE;
3822                 return;
3823 	}
3824 
3825 	processed = 0;
3826         first = txr->next_to_clean;
3827         tx_desc = &txr->tx_base[first];
3828         tx_buffer = &txr->tx_buffers[first];
3829 	last = tx_buffer->next_eop;
3830         eop_desc = &txr->tx_base[last];
3831 
3832 	/*
3833 	 * What this does is get the index of the
3834 	 * first descriptor AFTER the EOP of the
3835 	 * first packet, that way we can do the
3836 	 * simple comparison on the inner while loop.
3837 	 */
3838 	if (++last == adapter->num_tx_desc)
3839  		last = 0;
3840 	done = last;
3841 
3842         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3843             BUS_DMASYNC_POSTREAD);
3844 
3845         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3846 		/* We clean the range of the packet */
3847 		while (first != done) {
3848                 	tx_desc->upper.data = 0;
3849                 	tx_desc->lower.data = 0;
3850                 	tx_desc->buffer_addr = 0;
3851                 	++txr->tx_avail;
3852 			++processed;
3853 
3854 			if (tx_buffer->m_head) {
3855 				bus_dmamap_sync(txr->txtag,
3856 				    tx_buffer->map,
3857 				    BUS_DMASYNC_POSTWRITE);
3858 				bus_dmamap_unload(txr->txtag,
3859 				    tx_buffer->map);
3860                         	m_freem(tx_buffer->m_head);
3861                         	tx_buffer->m_head = NULL;
3862                 	}
3863 			tx_buffer->next_eop = -1;
3864 			txr->watchdog_time = ticks;
3865 
3866 	                if (++first == adapter->num_tx_desc)
3867 				first = 0;
3868 
3869 	                tx_buffer = &txr->tx_buffers[first];
3870 			tx_desc = &txr->tx_base[first];
3871 		}
3872 		++ifp->if_opackets;
3873 		/* See if we can continue to the next packet */
3874 		last = tx_buffer->next_eop;
3875 		if (last != -1) {
3876         		eop_desc = &txr->tx_base[last];
3877 			/* Get new done point */
3878 			if (++last == adapter->num_tx_desc) last = 0;
3879 			done = last;
3880 		} else
3881 			break;
3882         }
3883         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3884             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3885 
3886         txr->next_to_clean = first;
3887 
3888 	/*
3889 	** Watchdog calculation, we know there's
3890 	** work outstanding or the first return
3891 	** would have been taken, so none processed
3892 	** for too long indicates a hang. local timer
3893 	** will examine this and do a reset if needed.
3894 	*/
3895 	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3896 		txr->queue_status = EM_QUEUE_HUNG;
3897 
3898         /*
3899          * If we have a minimum free, clear IFF_DRV_OACTIVE
3900          * to tell the stack that it is OK to send packets.
3901 	 * Notice that all writes of OACTIVE happen under the
3902 	 * TX lock which, with a single queue, guarantees
3903 	 * sanity.
3904          */
3905         if (txr->tx_avail >= EM_MAX_SCATTER)
3906 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3907 
3908 	/* Disable watchdog if all clean */
3909 	if (txr->tx_avail == adapter->num_tx_desc) {
3910 		txr->queue_status = EM_QUEUE_IDLE;
3911 	}
3912 }
3913 
3914 
3915 /*********************************************************************
3916  *
3917  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3918  *
3919  **********************************************************************/
3920 static void
3921 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3922 {
3923 	struct adapter		*adapter = rxr->adapter;
3924 	struct mbuf		*m;
3925 	bus_dma_segment_t	segs[1];
3926 	struct em_buffer	*rxbuf;
3927 	int			i, j, error, nsegs;
3928 	bool			cleaned = FALSE;
3929 
3930 	i = j = rxr->next_to_refresh;
3931 	/*
3932 	** Get one descriptor beyond
3933 	** our work mark to control
3934 	** the loop.
3935 	*/
3936 	if (++j == adapter->num_rx_desc)
3937 		j = 0;
3938 
3939 	while (j != limit) {
3940 		rxbuf = &rxr->rx_buffers[i];
3941 		if (rxbuf->m_head == NULL) {
3942 			m = m_getjcl(M_NOWAIT, MT_DATA,
3943 			    M_PKTHDR, adapter->rx_mbuf_sz);
3944 			/*
3945 			** If we have a temporary resource shortage
3946 			** that causes a failure, just abort refresh
3947 			** for now, we will return to this point when
3948 			** reinvoked from em_rxeof.
3949 			*/
3950 			if (m == NULL)
3951 				goto update;
3952 		} else
3953 			m = rxbuf->m_head;
3954 
3955 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3956 		m->m_flags |= M_PKTHDR;
3957 		m->m_data = m->m_ext.ext_buf;
3958 
3959 		/* Use bus_dma machinery to setup the memory mapping  */
3960 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3961 		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3962 		if (error != 0) {
3963 			printf("Refresh mbufs: hdr dmamap load"
3964 			    " failure - %d\n", error);
3965 			m_free(m);
3966 			rxbuf->m_head = NULL;
3967 			goto update;
3968 		}
3969 		rxbuf->m_head = m;
3970 		bus_dmamap_sync(rxr->rxtag,
3971 		    rxbuf->map, BUS_DMASYNC_PREREAD);
3972 		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3973 		cleaned = TRUE;
3974 
3975 		i = j; /* Next is precalulated for us */
3976 		rxr->next_to_refresh = i;
3977 		/* Calculate next controlling index */
3978 		if (++j == adapter->num_rx_desc)
3979 			j = 0;
3980 	}
3981 update:
3982 	/*
3983 	** Update the tail pointer only if,
3984 	** and as far as we have refreshed.
3985 	*/
3986 	if (cleaned)
3987 		E1000_WRITE_REG(&adapter->hw,
3988 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
3989 
3990 	return;
3991 }
3992 
3993 
3994 /*********************************************************************
3995  *
3996  *  Allocate memory for rx_buffer structures. Since we use one
3997  *  rx_buffer per received packet, the maximum number of rx_buffer's
3998  *  that we'll need is equal to the number of receive descriptors
3999  *  that we've allocated.
4000  *
4001  **********************************************************************/
4002 static int
4003 em_allocate_receive_buffers(struct rx_ring *rxr)
4004 {
4005 	struct adapter		*adapter = rxr->adapter;
4006 	device_t		dev = adapter->dev;
4007 	struct em_buffer	*rxbuf;
4008 	int			error;
4009 
4010 	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4011 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4012 	if (rxr->rx_buffers == NULL) {
4013 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4014 		return (ENOMEM);
4015 	}
4016 
4017 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4018 				1, 0,			/* alignment, bounds */
4019 				BUS_SPACE_MAXADDR,	/* lowaddr */
4020 				BUS_SPACE_MAXADDR,	/* highaddr */
4021 				NULL, NULL,		/* filter, filterarg */
4022 				MJUM9BYTES,		/* maxsize */
4023 				1,			/* nsegments */
4024 				MJUM9BYTES,		/* maxsegsize */
4025 				0,			/* flags */
4026 				NULL,			/* lockfunc */
4027 				NULL,			/* lockarg */
4028 				&rxr->rxtag);
4029 	if (error) {
4030 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4031 		    __func__, error);
4032 		goto fail;
4033 	}
4034 
4035 	rxbuf = rxr->rx_buffers;
4036 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4037 		rxbuf = &rxr->rx_buffers[i];
4038 		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
4039 		    &rxbuf->map);
4040 		if (error) {
4041 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4042 			    __func__, error);
4043 			goto fail;
4044 		}
4045 	}
4046 
4047 	return (0);
4048 
4049 fail:
4050 	em_free_receive_structures(adapter);
4051 	return (error);
4052 }
4053 
4054 
4055 /*********************************************************************
4056  *
4057  *  Initialize a receive ring and its buffers.
4058  *
4059  **********************************************************************/
4060 static int
4061 em_setup_receive_ring(struct rx_ring *rxr)
4062 {
4063 	struct	adapter 	*adapter = rxr->adapter;
4064 	struct em_buffer	*rxbuf;
4065 	bus_dma_segment_t	seg[1];
4066 	int			rsize, nsegs, error = 0;
4067 #ifdef DEV_NETMAP
4068 	struct netmap_adapter *na = NA(adapter->ifp);
4069 	struct netmap_slot *slot;
4070 #endif
4071 
4072 
4073 	/* Clear the ring contents */
4074 	EM_RX_LOCK(rxr);
4075 	rsize = roundup2(adapter->num_rx_desc *
4076 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4077 	bzero((void *)rxr->rx_base, rsize);
4078 #ifdef DEV_NETMAP
4079 	slot = netmap_reset(na, NR_RX, 0, 0);
4080 #endif
4081 
4082 	/*
4083 	** Free current RX buffer structs and their mbufs
4084 	*/
4085 	for (int i = 0; i < adapter->num_rx_desc; i++) {
4086 		rxbuf = &rxr->rx_buffers[i];
4087 		if (rxbuf->m_head != NULL) {
4088 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4089 			    BUS_DMASYNC_POSTREAD);
4090 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4091 			m_freem(rxbuf->m_head);
4092 			rxbuf->m_head = NULL; /* mark as freed */
4093 		}
4094 	}
4095 
4096 	/* Now replenish the mbufs */
4097         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4098 		rxbuf = &rxr->rx_buffers[j];
4099 #ifdef DEV_NETMAP
4100 		if (slot) {
4101 			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4102 			uint64_t paddr;
4103 			void *addr;
4104 
4105 			addr = PNMB(slot + si, &paddr);
4106 			netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4107 			/* Update descriptor */
4108 			rxr->rx_base[j].buffer_addr = htole64(paddr);
4109 			continue;
4110 		}
4111 #endif /* DEV_NETMAP */
4112 		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4113 		    M_PKTHDR, adapter->rx_mbuf_sz);
4114 		if (rxbuf->m_head == NULL) {
4115 			error = ENOBUFS;
4116 			goto fail;
4117 		}
4118 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4119 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4120 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4121 
4122 		/* Get the memory mapping */
4123 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4124 		    rxbuf->map, rxbuf->m_head, seg,
4125 		    &nsegs, BUS_DMA_NOWAIT);
4126 		if (error != 0) {
4127 			m_freem(rxbuf->m_head);
4128 			rxbuf->m_head = NULL;
4129 			goto fail;
4130 		}
4131 		bus_dmamap_sync(rxr->rxtag,
4132 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4133 
4134 		/* Update descriptor */
4135 		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4136 	}
4137 	rxr->next_to_check = 0;
4138 	rxr->next_to_refresh = 0;
4139 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4140 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4141 
4142 fail:
4143 	EM_RX_UNLOCK(rxr);
4144 	return (error);
4145 }
4146 
4147 /*********************************************************************
4148  *
4149  *  Initialize all receive rings.
4150  *
4151  **********************************************************************/
4152 static int
4153 em_setup_receive_structures(struct adapter *adapter)
4154 {
4155 	struct rx_ring *rxr = adapter->rx_rings;
4156 	int q;
4157 
4158 	for (q = 0; q < adapter->num_queues; q++, rxr++)
4159 		if (em_setup_receive_ring(rxr))
4160 			goto fail;
4161 
4162 	return (0);
4163 fail:
4164 	/*
4165 	 * Free RX buffers allocated so far, we will only handle
4166 	 * the rings that completed, the failing case will have
4167 	 * cleaned up for itself. 'q' failed, so its the terminus.
4168 	 */
4169 	for (int i = 0; i < q; ++i) {
4170 		rxr = &adapter->rx_rings[i];
4171 		for (int n = 0; n < adapter->num_rx_desc; n++) {
4172 			struct em_buffer *rxbuf;
4173 			rxbuf = &rxr->rx_buffers[n];
4174 			if (rxbuf->m_head != NULL) {
4175 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4176 			  	  BUS_DMASYNC_POSTREAD);
4177 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4178 				m_freem(rxbuf->m_head);
4179 				rxbuf->m_head = NULL;
4180 			}
4181 		}
4182 		rxr->next_to_check = 0;
4183 		rxr->next_to_refresh = 0;
4184 	}
4185 
4186 	return (ENOBUFS);
4187 }
4188 
4189 /*********************************************************************
4190  *
4191  *  Free all receive rings.
4192  *
4193  **********************************************************************/
4194 static void
4195 em_free_receive_structures(struct adapter *adapter)
4196 {
4197 	struct rx_ring *rxr = adapter->rx_rings;
4198 
4199 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4200 		em_free_receive_buffers(rxr);
4201 		/* Free the ring memory as well */
4202 		em_dma_free(adapter, &rxr->rxdma);
4203 		EM_RX_LOCK_DESTROY(rxr);
4204 	}
4205 
4206 	free(adapter->rx_rings, M_DEVBUF);
4207 }
4208 
4209 
4210 /*********************************************************************
4211  *
4212  *  Free receive ring data structures
4213  *
4214  **********************************************************************/
4215 static void
4216 em_free_receive_buffers(struct rx_ring *rxr)
4217 {
4218 	struct adapter		*adapter = rxr->adapter;
4219 	struct em_buffer	*rxbuf = NULL;
4220 
4221 	INIT_DEBUGOUT("free_receive_buffers: begin");
4222 
4223 	if (rxr->rx_buffers != NULL) {
4224 		for (int i = 0; i < adapter->num_rx_desc; i++) {
4225 			rxbuf = &rxr->rx_buffers[i];
4226 			if (rxbuf->map != NULL) {
4227 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4228 				    BUS_DMASYNC_POSTREAD);
4229 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4230 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4231 			}
4232 			if (rxbuf->m_head != NULL) {
4233 				m_freem(rxbuf->m_head);
4234 				rxbuf->m_head = NULL;
4235 			}
4236 		}
4237 		free(rxr->rx_buffers, M_DEVBUF);
4238 		rxr->rx_buffers = NULL;
4239 		rxr->next_to_check = 0;
4240 		rxr->next_to_refresh = 0;
4241 	}
4242 
4243 	if (rxr->rxtag != NULL) {
4244 		bus_dma_tag_destroy(rxr->rxtag);
4245 		rxr->rxtag = NULL;
4246 	}
4247 
4248 	return;
4249 }
4250 
4251 
4252 /*********************************************************************
4253  *
4254  *  Enable receive unit.
4255  *
4256  **********************************************************************/
4257 #define MAX_INTS_PER_SEC	8000
4258 #define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
4259 
4260 static void
4261 em_initialize_receive_unit(struct adapter *adapter)
4262 {
4263 	struct rx_ring	*rxr = adapter->rx_rings;
4264 	struct ifnet	*ifp = adapter->ifp;
4265 	struct e1000_hw	*hw = &adapter->hw;
4266 	u64	bus_addr;
4267 	u32	rctl, rxcsum;
4268 
4269 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4270 
4271 	/*
4272 	 * Make sure receives are disabled while setting
4273 	 * up the descriptor ring
4274 	 */
4275 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4276 	/* Do not disable if ever enabled on this hardware */
4277 	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4278 		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4279 
4280 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4281 	    adapter->rx_abs_int_delay.value);
4282 	/*
4283 	 * Set the interrupt throttling rate. Value is calculated
4284 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4285 	 */
4286 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4287 
4288 	/*
4289 	** When using MSIX interrupts we need to throttle
4290 	** using the EITR register (82574 only)
4291 	*/
4292 	if (hw->mac.type == e1000_82574) {
4293 		for (int i = 0; i < 4; i++)
4294 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4295 			    DEFAULT_ITR);
4296 		/* Disable accelerated acknowledge */
4297 		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4298 	}
4299 
4300 	if (ifp->if_capenable & IFCAP_RXCSUM) {
4301 		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4302 		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4303 		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4304 	}
4305 
4306 	/*
4307 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4308 	** long latencies are observed, like Lenovo X60. This
4309 	** change eliminates the problem, but since having positive
4310 	** values in RDTR is a known source of problems on other
4311 	** platforms another solution is being sought.
4312 	*/
4313 	if (hw->mac.type == e1000_82573)
4314 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4315 
4316 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4317 		/* Setup the Base and Length of the Rx Descriptor Ring */
4318 		bus_addr = rxr->rxdma.dma_paddr;
4319 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4320 		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4321 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4322 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4323 		/* Setup the Head and Tail Descriptor Pointers */
4324 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4325 #ifdef DEV_NETMAP
4326 		/*
4327 		 * an init() while a netmap client is active must
4328 		 * preserve the rx buffers passed to userspace.
4329 		 * In this driver it means we adjust RDT to
4330 		 * something different from na->num_rx_desc - 1.
4331 		 */
4332 		if (ifp->if_capenable & IFCAP_NETMAP) {
4333 			struct netmap_adapter *na = NA(adapter->ifp);
4334 			struct netmap_kring *kring = &na->rx_rings[i];
4335 			int t = na->num_rx_desc - 1 - kring->nr_hwavail;
4336 
4337 			E1000_WRITE_REG(hw, E1000_RDT(i), t);
4338 		} else
4339 #endif /* DEV_NETMAP */
4340 		E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4341 	}
4342 
4343 	/* Set PTHRESH for improved jumbo performance */
4344 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4345 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4346 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4347 	    (ifp->if_mtu > ETHERMTU)) {
4348 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4349 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4350 	}
4351 
4352 	if (adapter->hw.mac.type >= e1000_pch2lan) {
4353 		if (ifp->if_mtu > ETHERMTU)
4354 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4355 		else
4356 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4357 	}
4358 
4359 	/* Setup the Receive Control Register */
4360 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4361 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4362 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4363 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4364 
4365         /* Strip the CRC */
4366         rctl |= E1000_RCTL_SECRC;
4367 
4368         /* Make sure VLAN Filters are off */
4369         rctl &= ~E1000_RCTL_VFE;
4370 	rctl &= ~E1000_RCTL_SBP;
4371 
4372 	if (adapter->rx_mbuf_sz == MCLBYTES)
4373 		rctl |= E1000_RCTL_SZ_2048;
4374 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4375 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4376 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4377 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4378 
4379 	if (ifp->if_mtu > ETHERMTU)
4380 		rctl |= E1000_RCTL_LPE;
4381 	else
4382 		rctl &= ~E1000_RCTL_LPE;
4383 
4384 	/* Write out the settings */
4385 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4386 
4387 	return;
4388 }
4389 
4390 
4391 /*********************************************************************
4392  *
4393  *  This routine executes in interrupt context. It replenishes
4394  *  the mbufs in the descriptor and sends data which has been
4395  *  dma'ed into host memory to upper layer.
4396  *
4397  *  We loop at most count times if count is > 0, or until done if
4398  *  count < 0.
4399  *
4400  *  For polling we also now return the number of cleaned packets
4401  *********************************************************************/
4402 static bool
4403 em_rxeof(struct rx_ring *rxr, int count, int *done)
4404 {
4405 	struct adapter		*adapter = rxr->adapter;
4406 	struct ifnet		*ifp = adapter->ifp;
4407 	struct mbuf		*mp, *sendmp;
4408 	u8			status = 0;
4409 	u16 			len;
4410 	int			i, processed, rxdone = 0;
4411 	bool			eop;
4412 	struct e1000_rx_desc	*cur;
4413 
4414 	EM_RX_LOCK(rxr);
4415 
4416 #ifdef DEV_NETMAP
4417 	if (ifp->if_capenable & IFCAP_NETMAP) {
4418 		struct netmap_adapter *na = NA(ifp);
4419 
4420 		na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
4421 		selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4422 		EM_RX_UNLOCK(rxr);
4423 		EM_CORE_LOCK(adapter);
4424 		selwakeuppri(&na->rx_si, PI_NET);
4425 		EM_CORE_UNLOCK(adapter);
4426 		return (0);
4427 	}
4428 #endif /* DEV_NETMAP */
4429 
4430 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4431 
4432 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4433 			break;
4434 
4435 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4436 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4437 
4438 		cur = &rxr->rx_base[i];
4439 		status = cur->status;
4440 		mp = sendmp = NULL;
4441 
4442 		if ((status & E1000_RXD_STAT_DD) == 0)
4443 			break;
4444 
4445 		len = le16toh(cur->length);
4446 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4447 
4448 		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4449 		    (rxr->discard == TRUE)) {
4450 			adapter->dropped_pkts++;
4451 			++rxr->rx_discarded;
4452 			if (!eop) /* Catch subsequent segs */
4453 				rxr->discard = TRUE;
4454 			else
4455 				rxr->discard = FALSE;
4456 			em_rx_discard(rxr, i);
4457 			goto next_desc;
4458 		}
4459 
4460 		/* Assign correct length to the current fragment */
4461 		mp = rxr->rx_buffers[i].m_head;
4462 		mp->m_len = len;
4463 
4464 		/* Trigger for refresh */
4465 		rxr->rx_buffers[i].m_head = NULL;
4466 
4467 		/* First segment? */
4468 		if (rxr->fmp == NULL) {
4469 			mp->m_pkthdr.len = len;
4470 			rxr->fmp = rxr->lmp = mp;
4471 		} else {
4472 			/* Chain mbuf's together */
4473 			mp->m_flags &= ~M_PKTHDR;
4474 			rxr->lmp->m_next = mp;
4475 			rxr->lmp = mp;
4476 			rxr->fmp->m_pkthdr.len += len;
4477 		}
4478 
4479 		if (eop) {
4480 			--count;
4481 			sendmp = rxr->fmp;
4482 			sendmp->m_pkthdr.rcvif = ifp;
4483 			ifp->if_ipackets++;
4484 			em_receive_checksum(cur, sendmp);
4485 #ifndef __NO_STRICT_ALIGNMENT
4486 			if (adapter->hw.mac.max_frame_size >
4487 			    (MCLBYTES - ETHER_ALIGN) &&
4488 			    em_fixup_rx(rxr) != 0)
4489 				goto skip;
4490 #endif
4491 			if (status & E1000_RXD_STAT_VP) {
4492 				sendmp->m_pkthdr.ether_vtag =
4493 				    le16toh(cur->special);
4494 				sendmp->m_flags |= M_VLANTAG;
4495 			}
4496 #ifndef __NO_STRICT_ALIGNMENT
4497 skip:
4498 #endif
4499 			rxr->fmp = rxr->lmp = NULL;
4500 		}
4501 next_desc:
4502 		/* Zero out the receive descriptors status. */
4503 		cur->status = 0;
4504 		++rxdone;	/* cumulative for POLL */
4505 		++processed;
4506 
4507 		/* Advance our pointers to the next descriptor. */
4508 		if (++i == adapter->num_rx_desc)
4509 			i = 0;
4510 
4511 		/* Send to the stack */
4512 		if (sendmp != NULL) {
4513 			rxr->next_to_check = i;
4514 			EM_RX_UNLOCK(rxr);
4515 			(*ifp->if_input)(ifp, sendmp);
4516 			EM_RX_LOCK(rxr);
4517 			i = rxr->next_to_check;
4518 		}
4519 
4520 		/* Only refresh mbufs every 8 descriptors */
4521 		if (processed == 8) {
4522 			em_refresh_mbufs(rxr, i);
4523 			processed = 0;
4524 		}
4525 	}
4526 
4527 	/* Catch any remaining refresh work */
4528 	if (e1000_rx_unrefreshed(rxr))
4529 		em_refresh_mbufs(rxr, i);
4530 
4531 	rxr->next_to_check = i;
4532 	if (done != NULL)
4533 		*done = rxdone;
4534 	EM_RX_UNLOCK(rxr);
4535 
4536 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4537 }
4538 
4539 static __inline void
4540 em_rx_discard(struct rx_ring *rxr, int i)
4541 {
4542 	struct em_buffer	*rbuf;
4543 
4544 	rbuf = &rxr->rx_buffers[i];
4545 	/* Free any previous pieces */
4546 	if (rxr->fmp != NULL) {
4547 		rxr->fmp->m_flags |= M_PKTHDR;
4548 		m_freem(rxr->fmp);
4549 		rxr->fmp = NULL;
4550 		rxr->lmp = NULL;
4551 	}
4552 	/*
4553 	** Free buffer and allow em_refresh_mbufs()
4554 	** to clean up and recharge buffer.
4555 	*/
4556 	if (rbuf->m_head) {
4557 		m_free(rbuf->m_head);
4558 		rbuf->m_head = NULL;
4559 	}
4560 	return;
4561 }
4562 
4563 #ifndef __NO_STRICT_ALIGNMENT
4564 /*
4565  * When jumbo frames are enabled we should realign entire payload on
4566  * architecures with strict alignment. This is serious design mistake of 8254x
4567  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4568  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4569  * payload. On architecures without strict alignment restrictions 8254x still
4570  * performs unaligned memory access which would reduce the performance too.
4571  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4572  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4573  * existing mbuf chain.
4574  *
4575  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4576  * not used at all on architectures with strict alignment.
4577  */
4578 static int
4579 em_fixup_rx(struct rx_ring *rxr)
4580 {
4581 	struct adapter *adapter = rxr->adapter;
4582 	struct mbuf *m, *n;
4583 	int error;
4584 
4585 	error = 0;
4586 	m = rxr->fmp;
4587 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4588 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4589 		m->m_data += ETHER_HDR_LEN;
4590 	} else {
4591 		MGETHDR(n, M_NOWAIT, MT_DATA);
4592 		if (n != NULL) {
4593 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4594 			m->m_data += ETHER_HDR_LEN;
4595 			m->m_len -= ETHER_HDR_LEN;
4596 			n->m_len = ETHER_HDR_LEN;
4597 			M_MOVE_PKTHDR(n, m);
4598 			n->m_next = m;
4599 			rxr->fmp = n;
4600 		} else {
4601 			adapter->dropped_pkts++;
4602 			m_freem(rxr->fmp);
4603 			rxr->fmp = NULL;
4604 			error = ENOMEM;
4605 		}
4606 	}
4607 
4608 	return (error);
4609 }
4610 #endif
4611 
4612 /*********************************************************************
4613  *
4614  *  Verify that the hardware indicated that the checksum is valid.
4615  *  Inform the stack about the status of checksum so that stack
4616  *  doesn't spend time verifying the checksum.
4617  *
4618  *********************************************************************/
4619 static void
4620 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4621 {
4622 	/* Ignore Checksum bit is set */
4623 	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4624 		mp->m_pkthdr.csum_flags = 0;
4625 		return;
4626 	}
4627 
4628 	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4629 		/* Did it pass? */
4630 		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4631 			/* IP Checksum Good */
4632 			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4633 			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4634 
4635 		} else {
4636 			mp->m_pkthdr.csum_flags = 0;
4637 		}
4638 	}
4639 
4640 	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4641 		/* Did it pass? */
4642 		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4643 			mp->m_pkthdr.csum_flags |=
4644 			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4645 			mp->m_pkthdr.csum_data = htons(0xffff);
4646 		}
4647 	}
4648 }
4649 
4650 /*
4651  * This routine is run via an vlan
4652  * config EVENT
4653  */
4654 static void
4655 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4656 {
4657 	struct adapter	*adapter = ifp->if_softc;
4658 	u32		index, bit;
4659 
4660 	if (ifp->if_softc !=  arg)   /* Not our event */
4661 		return;
4662 
4663 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4664                 return;
4665 
4666 	EM_CORE_LOCK(adapter);
4667 	index = (vtag >> 5) & 0x7F;
4668 	bit = vtag & 0x1F;
4669 	adapter->shadow_vfta[index] |= (1 << bit);
4670 	++adapter->num_vlans;
4671 	/* Re-init to load the changes */
4672 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4673 		em_init_locked(adapter);
4674 	EM_CORE_UNLOCK(adapter);
4675 }
4676 
4677 /*
4678  * This routine is run via an vlan
4679  * unconfig EVENT
4680  */
4681 static void
4682 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4683 {
4684 	struct adapter	*adapter = ifp->if_softc;
4685 	u32		index, bit;
4686 
4687 	if (ifp->if_softc !=  arg)
4688 		return;
4689 
4690 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4691                 return;
4692 
4693 	EM_CORE_LOCK(adapter);
4694 	index = (vtag >> 5) & 0x7F;
4695 	bit = vtag & 0x1F;
4696 	adapter->shadow_vfta[index] &= ~(1 << bit);
4697 	--adapter->num_vlans;
4698 	/* Re-init to load the changes */
4699 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4700 		em_init_locked(adapter);
4701 	EM_CORE_UNLOCK(adapter);
4702 }
4703 
4704 static void
4705 em_setup_vlan_hw_support(struct adapter *adapter)
4706 {
4707 	struct e1000_hw *hw = &adapter->hw;
4708 	u32             reg;
4709 
4710 	/*
4711 	** We get here thru init_locked, meaning
4712 	** a soft reset, this has already cleared
4713 	** the VFTA and other state, so if there
4714 	** have been no vlan's registered do nothing.
4715 	*/
4716 	if (adapter->num_vlans == 0)
4717                 return;
4718 
4719 	/*
4720 	** A soft reset zero's out the VFTA, so
4721 	** we need to repopulate it now.
4722 	*/
4723 	for (int i = 0; i < EM_VFTA_SIZE; i++)
4724                 if (adapter->shadow_vfta[i] != 0)
4725 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4726                             i, adapter->shadow_vfta[i]);
4727 
4728 	reg = E1000_READ_REG(hw, E1000_CTRL);
4729 	reg |= E1000_CTRL_VME;
4730 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4731 
4732 	/* Enable the Filter Table */
4733 	reg = E1000_READ_REG(hw, E1000_RCTL);
4734 	reg &= ~E1000_RCTL_CFIEN;
4735 	reg |= E1000_RCTL_VFE;
4736 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4737 }
4738 
4739 static void
4740 em_enable_intr(struct adapter *adapter)
4741 {
4742 	struct e1000_hw *hw = &adapter->hw;
4743 	u32 ims_mask = IMS_ENABLE_MASK;
4744 
4745 	if (hw->mac.type == e1000_82574) {
4746 		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4747 		ims_mask |= EM_MSIX_MASK;
4748 	}
4749 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4750 }
4751 
4752 static void
4753 em_disable_intr(struct adapter *adapter)
4754 {
4755 	struct e1000_hw *hw = &adapter->hw;
4756 
4757 	if (hw->mac.type == e1000_82574)
4758 		E1000_WRITE_REG(hw, EM_EIAC, 0);
4759 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4760 }
4761 
4762 /*
4763  * Bit of a misnomer, what this really means is
4764  * to enable OS management of the system... aka
4765  * to disable special hardware management features
4766  */
4767 static void
4768 em_init_manageability(struct adapter *adapter)
4769 {
4770 	/* A shared code workaround */
4771 #define E1000_82542_MANC2H E1000_MANC2H
4772 	if (adapter->has_manage) {
4773 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4774 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4775 
4776 		/* disable hardware interception of ARP */
4777 		manc &= ~(E1000_MANC_ARP_EN);
4778 
4779                 /* enable receiving management packets to the host */
4780 		manc |= E1000_MANC_EN_MNG2HOST;
4781 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4782 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4783 		manc2h |= E1000_MNG2HOST_PORT_623;
4784 		manc2h |= E1000_MNG2HOST_PORT_664;
4785 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4786 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4787 	}
4788 }
4789 
4790 /*
4791  * Give control back to hardware management
4792  * controller if there is one.
4793  */
4794 static void
4795 em_release_manageability(struct adapter *adapter)
4796 {
4797 	if (adapter->has_manage) {
4798 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4799 
4800 		/* re-enable hardware interception of ARP */
4801 		manc |= E1000_MANC_ARP_EN;
4802 		manc &= ~E1000_MANC_EN_MNG2HOST;
4803 
4804 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4805 	}
4806 }
4807 
4808 /*
4809  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4810  * For ASF and Pass Through versions of f/w this means
4811  * that the driver is loaded. For AMT version type f/w
4812  * this means that the network i/f is open.
4813  */
4814 static void
4815 em_get_hw_control(struct adapter *adapter)
4816 {
4817 	u32 ctrl_ext, swsm;
4818 
4819 	if (adapter->hw.mac.type == e1000_82573) {
4820 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4821 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4822 		    swsm | E1000_SWSM_DRV_LOAD);
4823 		return;
4824 	}
4825 	/* else */
4826 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4827 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4828 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4829 	return;
4830 }
4831 
4832 /*
4833  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4834  * For ASF and Pass Through versions of f/w this means that
4835  * the driver is no longer loaded. For AMT versions of the
4836  * f/w this means that the network i/f is closed.
4837  */
4838 static void
4839 em_release_hw_control(struct adapter *adapter)
4840 {
4841 	u32 ctrl_ext, swsm;
4842 
4843 	if (!adapter->has_manage)
4844 		return;
4845 
4846 	if (adapter->hw.mac.type == e1000_82573) {
4847 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4848 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4849 		    swsm & ~E1000_SWSM_DRV_LOAD);
4850 		return;
4851 	}
4852 	/* else */
4853 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4854 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4855 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4856 	return;
4857 }
4858 
4859 static int
4860 em_is_valid_ether_addr(u8 *addr)
4861 {
4862 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4863 
4864 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4865 		return (FALSE);
4866 	}
4867 
4868 	return (TRUE);
4869 }
4870 
4871 /*
4872 ** Parse the interface capabilities with regard
4873 ** to both system management and wake-on-lan for
4874 ** later use.
4875 */
4876 static void
4877 em_get_wakeup(device_t dev)
4878 {
4879 	struct adapter	*adapter = device_get_softc(dev);
4880 	u16		eeprom_data = 0, device_id, apme_mask;
4881 
4882 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4883 	apme_mask = EM_EEPROM_APME;
4884 
4885 	switch (adapter->hw.mac.type) {
4886 	case e1000_82573:
4887 	case e1000_82583:
4888 		adapter->has_amt = TRUE;
4889 		/* Falls thru */
4890 	case e1000_82571:
4891 	case e1000_82572:
4892 	case e1000_80003es2lan:
4893 		if (adapter->hw.bus.func == 1) {
4894 			e1000_read_nvm(&adapter->hw,
4895 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4896 			break;
4897 		} else
4898 			e1000_read_nvm(&adapter->hw,
4899 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4900 		break;
4901 	case e1000_ich8lan:
4902 	case e1000_ich9lan:
4903 	case e1000_ich10lan:
4904 	case e1000_pchlan:
4905 	case e1000_pch2lan:
4906 		apme_mask = E1000_WUC_APME;
4907 		adapter->has_amt = TRUE;
4908 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4909 		break;
4910 	default:
4911 		e1000_read_nvm(&adapter->hw,
4912 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4913 		break;
4914 	}
4915 	if (eeprom_data & apme_mask)
4916 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4917 	/*
4918          * We have the eeprom settings, now apply the special cases
4919          * where the eeprom may be wrong or the board won't support
4920          * wake on lan on a particular port
4921 	 */
4922 	device_id = pci_get_device(dev);
4923         switch (device_id) {
4924 	case E1000_DEV_ID_82571EB_FIBER:
4925 		/* Wake events only supported on port A for dual fiber
4926 		 * regardless of eeprom setting */
4927 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4928 		    E1000_STATUS_FUNC_1)
4929 			adapter->wol = 0;
4930 		break;
4931 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4932 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4933 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4934                 /* if quad port adapter, disable WoL on all but port A */
4935 		if (global_quad_port_a != 0)
4936 			adapter->wol = 0;
4937 		/* Reset for multiple quad port adapters */
4938 		if (++global_quad_port_a == 4)
4939 			global_quad_port_a = 0;
4940                 break;
4941 	}
4942 	return;
4943 }
4944 
4945 
4946 /*
4947  * Enable PCI Wake On Lan capability
4948  */
4949 static void
4950 em_enable_wakeup(device_t dev)
4951 {
4952 	struct adapter	*adapter = device_get_softc(dev);
4953 	struct ifnet	*ifp = adapter->ifp;
4954 	u32		pmc, ctrl, ctrl_ext, rctl;
4955 	u16     	status;
4956 
4957 	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4958 		return;
4959 
4960 	/* Advertise the wakeup capability */
4961 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4962 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4963 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4964 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4965 
4966 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4967 	    (adapter->hw.mac.type == e1000_pchlan) ||
4968 	    (adapter->hw.mac.type == e1000_ich9lan) ||
4969 	    (adapter->hw.mac.type == e1000_ich10lan))
4970 		e1000_suspend_workarounds_ich8lan(&adapter->hw);
4971 
4972 	/* Keep the laser running on Fiber adapters */
4973 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4974 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4975 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4976 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4977 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4978 	}
4979 
4980 	/*
4981 	** Determine type of Wakeup: note that wol
4982 	** is set with all bits on by default.
4983 	*/
4984 	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4985 		adapter->wol &= ~E1000_WUFC_MAG;
4986 
4987 	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4988 		adapter->wol &= ~E1000_WUFC_MC;
4989 	else {
4990 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4991 		rctl |= E1000_RCTL_MPE;
4992 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4993 	}
4994 
4995 	if ((adapter->hw.mac.type == e1000_pchlan) ||
4996 	    (adapter->hw.mac.type == e1000_pch2lan)) {
4997 		if (em_enable_phy_wakeup(adapter))
4998 			return;
4999 	} else {
5000 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5001 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5002 	}
5003 
5004 	if (adapter->hw.phy.type == e1000_phy_igp_3)
5005 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5006 
5007         /* Request PME */
5008         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5009 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5010 	if (ifp->if_capenable & IFCAP_WOL)
5011 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5012         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5013 
5014 	return;
5015 }
5016 
5017 /*
5018 ** WOL in the newer chipset interfaces (pchlan)
5019 ** require thing to be copied into the phy
5020 */
5021 static int
5022 em_enable_phy_wakeup(struct adapter *adapter)
5023 {
5024 	struct e1000_hw *hw = &adapter->hw;
5025 	u32 mreg, ret = 0;
5026 	u16 preg;
5027 
5028 	/* copy MAC RARs to PHY RARs */
5029 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5030 
5031 	/* copy MAC MTA to PHY MTA */
5032 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5033 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5034 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5035 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5036 		    (u16)((mreg >> 16) & 0xFFFF));
5037 	}
5038 
5039 	/* configure PHY Rx Control register */
5040 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5041 	mreg = E1000_READ_REG(hw, E1000_RCTL);
5042 	if (mreg & E1000_RCTL_UPE)
5043 		preg |= BM_RCTL_UPE;
5044 	if (mreg & E1000_RCTL_MPE)
5045 		preg |= BM_RCTL_MPE;
5046 	preg &= ~(BM_RCTL_MO_MASK);
5047 	if (mreg & E1000_RCTL_MO_3)
5048 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5049 				<< BM_RCTL_MO_SHIFT);
5050 	if (mreg & E1000_RCTL_BAM)
5051 		preg |= BM_RCTL_BAM;
5052 	if (mreg & E1000_RCTL_PMCF)
5053 		preg |= BM_RCTL_PMCF;
5054 	mreg = E1000_READ_REG(hw, E1000_CTRL);
5055 	if (mreg & E1000_CTRL_RFCE)
5056 		preg |= BM_RCTL_RFCE;
5057 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5058 
5059 	/* enable PHY wakeup in MAC register */
5060 	E1000_WRITE_REG(hw, E1000_WUC,
5061 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5062 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5063 
5064 	/* configure and enable PHY wakeup in PHY registers */
5065 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5066 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5067 
5068 	/* activate PHY wakeup */
5069 	ret = hw->phy.ops.acquire(hw);
5070 	if (ret) {
5071 		printf("Could not acquire PHY\n");
5072 		return ret;
5073 	}
5074 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5075 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5076 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5077 	if (ret) {
5078 		printf("Could not read PHY page 769\n");
5079 		goto out;
5080 	}
5081 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5082 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5083 	if (ret)
5084 		printf("Could not set PHY Host Wakeup bit\n");
5085 out:
5086 	hw->phy.ops.release(hw);
5087 
5088 	return ret;
5089 }
5090 
5091 static void
5092 em_led_func(void *arg, int onoff)
5093 {
5094 	struct adapter	*adapter = arg;
5095 
5096 	EM_CORE_LOCK(adapter);
5097 	if (onoff) {
5098 		e1000_setup_led(&adapter->hw);
5099 		e1000_led_on(&adapter->hw);
5100 	} else {
5101 		e1000_led_off(&adapter->hw);
5102 		e1000_cleanup_led(&adapter->hw);
5103 	}
5104 	EM_CORE_UNLOCK(adapter);
5105 }
5106 
5107 /*
5108 ** Disable the L0S and L1 LINK states
5109 */
5110 static void
5111 em_disable_aspm(struct adapter *adapter)
5112 {
5113 	int		base, reg;
5114 	u16		link_cap,link_ctrl;
5115 	device_t	dev = adapter->dev;
5116 
5117 	switch (adapter->hw.mac.type) {
5118 		case e1000_82573:
5119 		case e1000_82574:
5120 		case e1000_82583:
5121 			break;
5122 		default:
5123 			return;
5124 	}
5125 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5126 		return;
5127 	reg = base + PCIER_LINK_CAP;
5128 	link_cap = pci_read_config(dev, reg, 2);
5129 	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5130 		return;
5131 	reg = base + PCIER_LINK_CTL;
5132 	link_ctrl = pci_read_config(dev, reg, 2);
5133 	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5134 	pci_write_config(dev, reg, link_ctrl, 2);
5135 	return;
5136 }
5137 
5138 /**********************************************************************
5139  *
5140  *  Update the board statistics counters.
5141  *
5142  **********************************************************************/
5143 static void
5144 em_update_stats_counters(struct adapter *adapter)
5145 {
5146 	struct ifnet   *ifp;
5147 
5148 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5149 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5150 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5151 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5152 	}
5153 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5154 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5155 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5156 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5157 
5158 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5159 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5160 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5161 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5162 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5163 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5164 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5165 	/*
5166 	** For watchdog management we need to know if we have been
5167 	** paused during the last interval, so capture that here.
5168 	*/
5169 	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5170 	adapter->stats.xoffrxc += adapter->pause_frames;
5171 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5172 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5173 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5174 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5175 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5176 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5177 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5178 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5179 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5180 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5181 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5182 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5183 
5184 	/* For the 64-bit byte counters the low dword must be read first. */
5185 	/* Both registers clear on the read of the high dword */
5186 
5187 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5188 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5189 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5190 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5191 
5192 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5193 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5194 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5195 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5196 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5197 
5198 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5199 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5200 
5201 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5202 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5203 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5204 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5205 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5206 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5207 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5208 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5209 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5210 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5211 
5212 	/* Interrupt Counts */
5213 
5214 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5215 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5216 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5217 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5218 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5219 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5220 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5221 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5222 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5223 
5224 	if (adapter->hw.mac.type >= e1000_82543) {
5225 		adapter->stats.algnerrc +=
5226 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5227 		adapter->stats.rxerrc +=
5228 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5229 		adapter->stats.tncrs +=
5230 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5231 		adapter->stats.cexterr +=
5232 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5233 		adapter->stats.tsctc +=
5234 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5235 		adapter->stats.tsctfc +=
5236 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5237 	}
5238 	ifp = adapter->ifp;
5239 
5240 	ifp->if_collisions = adapter->stats.colc;
5241 
5242 	/* Rx Errors */
5243 	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5244 	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5245 	    adapter->stats.ruc + adapter->stats.roc +
5246 	    adapter->stats.mpc + adapter->stats.cexterr;
5247 
5248 	/* Tx Errors */
5249 	ifp->if_oerrors = adapter->stats.ecol +
5250 	    adapter->stats.latecol + adapter->watchdog_events;
5251 }
5252 
5253 /* Export a single 32-bit register via a read-only sysctl. */
5254 static int
5255 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5256 {
5257 	struct adapter *adapter;
5258 	u_int val;
5259 
5260 	adapter = oidp->oid_arg1;
5261 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5262 	return (sysctl_handle_int(oidp, &val, 0, req));
5263 }
5264 
5265 /*
5266  * Add sysctl variables, one per statistic, to the system.
5267  */
5268 static void
5269 em_add_hw_stats(struct adapter *adapter)
5270 {
5271 	device_t dev = adapter->dev;
5272 
5273 	struct tx_ring *txr = adapter->tx_rings;
5274 	struct rx_ring *rxr = adapter->rx_rings;
5275 
5276 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5277 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5278 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5279 	struct e1000_hw_stats *stats = &adapter->stats;
5280 
5281 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5282 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5283 
5284 #define QUEUE_NAME_LEN 32
5285 	char namebuf[QUEUE_NAME_LEN];
5286 
5287 	/* Driver Statistics */
5288 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5289 			CTLFLAG_RD, &adapter->link_irq,
5290 			"Link MSIX IRQ Handled");
5291 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5292 			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5293 			 "Std mbuf failed");
5294 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5295 			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5296 			 "Std mbuf cluster failed");
5297 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5298 			CTLFLAG_RD, &adapter->dropped_pkts,
5299 			"Driver dropped packets");
5300 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5301 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5302 			"Driver tx dma failure in xmit");
5303 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5304 			CTLFLAG_RD, &adapter->rx_overruns,
5305 			"RX overruns");
5306 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5307 			CTLFLAG_RD, &adapter->watchdog_events,
5308 			"Watchdog timeouts");
5309 
5310 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5311 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5312 			em_sysctl_reg_handler, "IU",
5313 			"Device Control Register");
5314 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5315 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5316 			em_sysctl_reg_handler, "IU",
5317 			"Receiver Control Register");
5318 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5319 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5320 			"Flow Control High Watermark");
5321 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5322 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5323 			"Flow Control Low Watermark");
5324 
5325 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5326 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5327 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5328 					    CTLFLAG_RD, NULL, "Queue Name");
5329 		queue_list = SYSCTL_CHILDREN(queue_node);
5330 
5331 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5332 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5333 				E1000_TDH(txr->me),
5334 				em_sysctl_reg_handler, "IU",
5335  				"Transmit Descriptor Head");
5336 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5337 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5338 				E1000_TDT(txr->me),
5339 				em_sysctl_reg_handler, "IU",
5340  				"Transmit Descriptor Tail");
5341 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5342 				CTLFLAG_RD, &txr->tx_irq,
5343 				"Queue MSI-X Transmit Interrupts");
5344 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5345 				CTLFLAG_RD, &txr->no_desc_avail,
5346 				"Queue No Descriptor Available");
5347 
5348 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5349 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5350 				E1000_RDH(rxr->me),
5351 				em_sysctl_reg_handler, "IU",
5352 				"Receive Descriptor Head");
5353 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5354 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5355 				E1000_RDT(rxr->me),
5356 				em_sysctl_reg_handler, "IU",
5357 				"Receive Descriptor Tail");
5358 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5359 				CTLFLAG_RD, &rxr->rx_irq,
5360 				"Queue MSI-X Receive Interrupts");
5361 	}
5362 
5363 	/* MAC stats get their own sub node */
5364 
5365 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5366 				    CTLFLAG_RD, NULL, "Statistics");
5367 	stat_list = SYSCTL_CHILDREN(stat_node);
5368 
5369 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5370 			CTLFLAG_RD, &stats->ecol,
5371 			"Excessive collisions");
5372 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5373 			CTLFLAG_RD, &stats->scc,
5374 			"Single collisions");
5375 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5376 			CTLFLAG_RD, &stats->mcc,
5377 			"Multiple collisions");
5378 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5379 			CTLFLAG_RD, &stats->latecol,
5380 			"Late collisions");
5381 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5382 			CTLFLAG_RD, &stats->colc,
5383 			"Collision Count");
5384 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5385 			CTLFLAG_RD, &adapter->stats.symerrs,
5386 			"Symbol Errors");
5387 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5388 			CTLFLAG_RD, &adapter->stats.sec,
5389 			"Sequence Errors");
5390 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5391 			CTLFLAG_RD, &adapter->stats.dc,
5392 			"Defer Count");
5393 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5394 			CTLFLAG_RD, &adapter->stats.mpc,
5395 			"Missed Packets");
5396 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5397 			CTLFLAG_RD, &adapter->stats.rnbc,
5398 			"Receive No Buffers");
5399 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5400 			CTLFLAG_RD, &adapter->stats.ruc,
5401 			"Receive Undersize");
5402 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5403 			CTLFLAG_RD, &adapter->stats.rfc,
5404 			"Fragmented Packets Received ");
5405 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5406 			CTLFLAG_RD, &adapter->stats.roc,
5407 			"Oversized Packets Received");
5408 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5409 			CTLFLAG_RD, &adapter->stats.rjc,
5410 			"Recevied Jabber");
5411 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5412 			CTLFLAG_RD, &adapter->stats.rxerrc,
5413 			"Receive Errors");
5414 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5415 			CTLFLAG_RD, &adapter->stats.crcerrs,
5416 			"CRC errors");
5417 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5418 			CTLFLAG_RD, &adapter->stats.algnerrc,
5419 			"Alignment Errors");
5420 	/* On 82575 these are collision counts */
5421 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5422 			CTLFLAG_RD, &adapter->stats.cexterr,
5423 			"Collision/Carrier extension errors");
5424 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5425 			CTLFLAG_RD, &adapter->stats.xonrxc,
5426 			"XON Received");
5427 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5428 			CTLFLAG_RD, &adapter->stats.xontxc,
5429 			"XON Transmitted");
5430 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5431 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5432 			"XOFF Received");
5433 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5434 			CTLFLAG_RD, &adapter->stats.xofftxc,
5435 			"XOFF Transmitted");
5436 
5437 	/* Packet Reception Stats */
5438 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5439 			CTLFLAG_RD, &adapter->stats.tpr,
5440 			"Total Packets Received ");
5441 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5442 			CTLFLAG_RD, &adapter->stats.gprc,
5443 			"Good Packets Received");
5444 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5445 			CTLFLAG_RD, &adapter->stats.bprc,
5446 			"Broadcast Packets Received");
5447 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5448 			CTLFLAG_RD, &adapter->stats.mprc,
5449 			"Multicast Packets Received");
5450 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5451 			CTLFLAG_RD, &adapter->stats.prc64,
5452 			"64 byte frames received ");
5453 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5454 			CTLFLAG_RD, &adapter->stats.prc127,
5455 			"65-127 byte frames received");
5456 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5457 			CTLFLAG_RD, &adapter->stats.prc255,
5458 			"128-255 byte frames received");
5459 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5460 			CTLFLAG_RD, &adapter->stats.prc511,
5461 			"256-511 byte frames received");
5462 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5463 			CTLFLAG_RD, &adapter->stats.prc1023,
5464 			"512-1023 byte frames received");
5465 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5466 			CTLFLAG_RD, &adapter->stats.prc1522,
5467 			"1023-1522 byte frames received");
5468  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5469  			CTLFLAG_RD, &adapter->stats.gorc,
5470  			"Good Octets Received");
5471 
5472 	/* Packet Transmission Stats */
5473  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5474  			CTLFLAG_RD, &adapter->stats.gotc,
5475  			"Good Octets Transmitted");
5476 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5477 			CTLFLAG_RD, &adapter->stats.tpt,
5478 			"Total Packets Transmitted");
5479 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5480 			CTLFLAG_RD, &adapter->stats.gptc,
5481 			"Good Packets Transmitted");
5482 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5483 			CTLFLAG_RD, &adapter->stats.bptc,
5484 			"Broadcast Packets Transmitted");
5485 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5486 			CTLFLAG_RD, &adapter->stats.mptc,
5487 			"Multicast Packets Transmitted");
5488 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5489 			CTLFLAG_RD, &adapter->stats.ptc64,
5490 			"64 byte frames transmitted ");
5491 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5492 			CTLFLAG_RD, &adapter->stats.ptc127,
5493 			"65-127 byte frames transmitted");
5494 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5495 			CTLFLAG_RD, &adapter->stats.ptc255,
5496 			"128-255 byte frames transmitted");
5497 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5498 			CTLFLAG_RD, &adapter->stats.ptc511,
5499 			"256-511 byte frames transmitted");
5500 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5501 			CTLFLAG_RD, &adapter->stats.ptc1023,
5502 			"512-1023 byte frames transmitted");
5503 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5504 			CTLFLAG_RD, &adapter->stats.ptc1522,
5505 			"1024-1522 byte frames transmitted");
5506 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5507 			CTLFLAG_RD, &adapter->stats.tsctc,
5508 			"TSO Contexts Transmitted");
5509 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5510 			CTLFLAG_RD, &adapter->stats.tsctfc,
5511 			"TSO Contexts Failed");
5512 
5513 
5514 	/* Interrupt Stats */
5515 
5516 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5517 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5518 	int_list = SYSCTL_CHILDREN(int_node);
5519 
5520 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5521 			CTLFLAG_RD, &adapter->stats.iac,
5522 			"Interrupt Assertion Count");
5523 
5524 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5525 			CTLFLAG_RD, &adapter->stats.icrxptc,
5526 			"Interrupt Cause Rx Pkt Timer Expire Count");
5527 
5528 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5529 			CTLFLAG_RD, &adapter->stats.icrxatc,
5530 			"Interrupt Cause Rx Abs Timer Expire Count");
5531 
5532 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5533 			CTLFLAG_RD, &adapter->stats.ictxptc,
5534 			"Interrupt Cause Tx Pkt Timer Expire Count");
5535 
5536 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5537 			CTLFLAG_RD, &adapter->stats.ictxatc,
5538 			"Interrupt Cause Tx Abs Timer Expire Count");
5539 
5540 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5541 			CTLFLAG_RD, &adapter->stats.ictxqec,
5542 			"Interrupt Cause Tx Queue Empty Count");
5543 
5544 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5545 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5546 			"Interrupt Cause Tx Queue Min Thresh Count");
5547 
5548 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5549 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5550 			"Interrupt Cause Rx Desc Min Thresh Count");
5551 
5552 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5553 			CTLFLAG_RD, &adapter->stats.icrxoc,
5554 			"Interrupt Cause Receiver Overrun Count");
5555 }
5556 
5557 /**********************************************************************
5558  *
5559  *  This routine provides a way to dump out the adapter eeprom,
5560  *  often a useful debug/service tool. This only dumps the first
5561  *  32 words, stuff that matters is in that extent.
5562  *
5563  **********************************************************************/
5564 static int
5565 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5566 {
5567 	struct adapter *adapter = (struct adapter *)arg1;
5568 	int error;
5569 	int result;
5570 
5571 	result = -1;
5572 	error = sysctl_handle_int(oidp, &result, 0, req);
5573 
5574 	if (error || !req->newptr)
5575 		return (error);
5576 
5577 	/*
5578 	 * This value will cause a hex dump of the
5579 	 * first 32 16-bit words of the EEPROM to
5580 	 * the screen.
5581 	 */
5582 	if (result == 1)
5583 		em_print_nvm_info(adapter);
5584 
5585 	return (error);
5586 }
5587 
5588 static void
5589 em_print_nvm_info(struct adapter *adapter)
5590 {
5591 	u16	eeprom_data;
5592 	int	i, j, row = 0;
5593 
5594 	/* Its a bit crude, but it gets the job done */
5595 	printf("\nInterface EEPROM Dump:\n");
5596 	printf("Offset\n0x0000  ");
5597 	for (i = 0, j = 0; i < 32; i++, j++) {
5598 		if (j == 8) { /* Make the offset block */
5599 			j = 0; ++row;
5600 			printf("\n0x00%x0  ",row);
5601 		}
5602 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5603 		printf("%04x ", eeprom_data);
5604 	}
5605 	printf("\n");
5606 }
5607 
5608 static int
5609 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5610 {
5611 	struct em_int_delay_info *info;
5612 	struct adapter *adapter;
5613 	u32 regval;
5614 	int error, usecs, ticks;
5615 
5616 	info = (struct em_int_delay_info *)arg1;
5617 	usecs = info->value;
5618 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5619 	if (error != 0 || req->newptr == NULL)
5620 		return (error);
5621 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5622 		return (EINVAL);
5623 	info->value = usecs;
5624 	ticks = EM_USECS_TO_TICKS(usecs);
5625 
5626 	adapter = info->adapter;
5627 
5628 	EM_CORE_LOCK(adapter);
5629 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5630 	regval = (regval & ~0xffff) | (ticks & 0xffff);
5631 	/* Handle a few special cases. */
5632 	switch (info->offset) {
5633 	case E1000_RDTR:
5634 		break;
5635 	case E1000_TIDV:
5636 		if (ticks == 0) {
5637 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5638 			/* Don't write 0 into the TIDV register. */
5639 			regval++;
5640 		} else
5641 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5642 		break;
5643 	}
5644 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5645 	EM_CORE_UNLOCK(adapter);
5646 	return (0);
5647 }
5648 
5649 static void
5650 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5651 	const char *description, struct em_int_delay_info *info,
5652 	int offset, int value)
5653 {
5654 	info->adapter = adapter;
5655 	info->offset = offset;
5656 	info->value = value;
5657 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5658 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5659 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5660 	    info, 0, em_sysctl_int_delay, "I", description);
5661 }
5662 
5663 static void
5664 em_set_sysctl_value(struct adapter *adapter, const char *name,
5665 	const char *description, int *limit, int value)
5666 {
5667 	*limit = value;
5668 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5669 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5670 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5671 }
5672 
5673 
5674 /*
5675 ** Set flow control using sysctl:
5676 ** Flow control values:
5677 **      0 - off
5678 **      1 - rx pause
5679 **      2 - tx pause
5680 **      3 - full
5681 */
5682 static int
5683 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5684 {
5685         int		error;
5686 	static int	input = 3; /* default is full */
5687         struct adapter	*adapter = (struct adapter *) arg1;
5688 
5689         error = sysctl_handle_int(oidp, &input, 0, req);
5690 
5691         if ((error) || (req->newptr == NULL))
5692                 return (error);
5693 
5694 	if (input == adapter->fc) /* no change? */
5695 		return (error);
5696 
5697         switch (input) {
5698                 case e1000_fc_rx_pause:
5699                 case e1000_fc_tx_pause:
5700                 case e1000_fc_full:
5701                 case e1000_fc_none:
5702                         adapter->hw.fc.requested_mode = input;
5703 			adapter->fc = input;
5704                         break;
5705                 default:
5706 			/* Do nothing */
5707 			return (error);
5708         }
5709 
5710         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5711         e1000_force_mac_fc(&adapter->hw);
5712         return (error);
5713 }
5714 
5715 /*
5716 ** Manage Energy Efficient Ethernet:
5717 ** Control values:
5718 **     0/1 - enabled/disabled
5719 */
5720 static int
5721 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5722 {
5723        struct adapter *adapter = (struct adapter *) arg1;
5724        int             error, value;
5725 
5726        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5727        error = sysctl_handle_int(oidp, &value, 0, req);
5728        if (error || req->newptr == NULL)
5729                return (error);
5730        EM_CORE_LOCK(adapter);
5731        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5732        em_init_locked(adapter);
5733        EM_CORE_UNLOCK(adapter);
5734        return (0);
5735 }
5736 
5737 static int
5738 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5739 {
5740 	struct adapter *adapter;
5741 	int error;
5742 	int result;
5743 
5744 	result = -1;
5745 	error = sysctl_handle_int(oidp, &result, 0, req);
5746 
5747 	if (error || !req->newptr)
5748 		return (error);
5749 
5750 	if (result == 1) {
5751 		adapter = (struct adapter *)arg1;
5752 		em_print_debug_info(adapter);
5753         }
5754 
5755 	return (error);
5756 }
5757 
5758 /*
5759 ** This routine is meant to be fluid, add whatever is
5760 ** needed for debugging a problem.  -jfv
5761 */
5762 static void
5763 em_print_debug_info(struct adapter *adapter)
5764 {
5765 	device_t dev = adapter->dev;
5766 	struct tx_ring *txr = adapter->tx_rings;
5767 	struct rx_ring *rxr = adapter->rx_rings;
5768 
5769 	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5770 		printf("Interface is RUNNING ");
5771 	else
5772 		printf("Interface is NOT RUNNING\n");
5773 
5774 	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5775 		printf("and INACTIVE\n");
5776 	else
5777 		printf("and ACTIVE\n");
5778 
5779 	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5780 	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5781 	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5782 	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5783 	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5784 	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5785 	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5786 	device_printf(dev, "TX descriptors avail = %d\n",
5787 	    txr->tx_avail);
5788 	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5789 	    txr->no_desc_avail);
5790 	device_printf(dev, "RX discarded packets = %ld\n",
5791 	    rxr->rx_discarded);
5792 	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5793 	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5794 }
5795