xref: /freebsd/sys/dev/e1000/if_em.c (revision c243e4902be8df1e643c76b5f18b68bb77cc5268)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2011, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #endif
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #if __FreeBSD_version >= 800000
44 #include <sys/buf_ring.h>
45 #endif
46 #include <sys/bus.h>
47 #include <sys/endian.h>
48 #include <sys/kernel.h>
49 #include <sys/kthread.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/module.h>
53 #include <sys/rman.h>
54 #include <sys/socket.h>
55 #include <sys/sockio.h>
56 #include <sys/sysctl.h>
57 #include <sys/taskqueue.h>
58 #include <sys/eventhandler.h>
59 #include <machine/bus.h>
60 #include <machine/resource.h>
61 
62 #include <net/bpf.h>
63 #include <net/ethernet.h>
64 #include <net/if.h>
65 #include <net/if_arp.h>
66 #include <net/if_dl.h>
67 #include <net/if_media.h>
68 
69 #include <net/if_types.h>
70 #include <net/if_vlan_var.h>
71 
72 #include <netinet/in_systm.h>
73 #include <netinet/in.h>
74 #include <netinet/if_ether.h>
75 #include <netinet/ip.h>
76 #include <netinet/ip6.h>
77 #include <netinet/tcp.h>
78 #include <netinet/udp.h>
79 
80 #include <machine/in_cksum.h>
81 #include <dev/led/led.h>
82 #include <dev/pci/pcivar.h>
83 #include <dev/pci/pcireg.h>
84 
85 #include "e1000_api.h"
86 #include "e1000_82571.h"
87 #include "if_em.h"
88 
89 /*********************************************************************
90  *  Set this to one to display debug statistics
91  *********************************************************************/
92 int	em_display_debug_stats = 0;
93 
94 /*********************************************************************
95  *  Driver version:
96  *********************************************************************/
97 char em_driver_version[] = "7.3.2";
98 
99 /*********************************************************************
100  *  PCI Device ID Table
101  *
102  *  Used by probe to select devices to load on
103  *  Last field stores an index into e1000_strings
104  *  Last entry must be all 0s
105  *
106  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107  *********************************************************************/
108 
109 static em_vendor_info_t em_vendor_info_array[] =
110 {
111 	/* Intel(R) PRO/1000 Network Connection */
112 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
115 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116 						PCI_ANY_ID, PCI_ANY_ID, 0},
117 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118 						PCI_ANY_ID, PCI_ANY_ID, 0},
119 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120 						PCI_ANY_ID, PCI_ANY_ID, 0},
121 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122 						PCI_ANY_ID, PCI_ANY_ID, 0},
123 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124 						PCI_ANY_ID, PCI_ANY_ID, 0},
125 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126 						PCI_ANY_ID, PCI_ANY_ID, 0},
127 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
130 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
131 
132 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
133 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
134 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
135 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
136 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137 						PCI_ANY_ID, PCI_ANY_ID, 0},
138 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139 						PCI_ANY_ID, PCI_ANY_ID, 0},
140 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141 						PCI_ANY_ID, PCI_ANY_ID, 0},
142 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143 						PCI_ANY_ID, PCI_ANY_ID, 0},
144 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
147 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
148 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
150 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
152 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
175 	/* required last entry */
176 	{ 0, 0, 0, 0, 0}
177 };
178 
179 /*********************************************************************
180  *  Table of branding strings for all supported NICs.
181  *********************************************************************/
182 
183 static char *em_strings[] = {
184 	"Intel(R) PRO/1000 Network Connection"
185 };
186 
187 /*********************************************************************
188  *  Function prototypes
189  *********************************************************************/
190 static int	em_probe(device_t);
191 static int	em_attach(device_t);
192 static int	em_detach(device_t);
193 static int	em_shutdown(device_t);
194 static int	em_suspend(device_t);
195 static int	em_resume(device_t);
196 #ifdef EM_MULTIQUEUE
197 static int	em_mq_start(struct ifnet *, struct mbuf *);
198 static int	em_mq_start_locked(struct ifnet *,
199 		    struct tx_ring *, struct mbuf *);
200 static void	em_qflush(struct ifnet *);
201 #else
202 static void	em_start(struct ifnet *);
203 static void	em_start_locked(struct ifnet *, struct tx_ring *);
204 #endif
205 static int	em_ioctl(struct ifnet *, u_long, caddr_t);
206 static void	em_init(void *);
207 static void	em_init_locked(struct adapter *);
208 static void	em_stop(void *);
209 static void	em_media_status(struct ifnet *, struct ifmediareq *);
210 static int	em_media_change(struct ifnet *);
211 static void	em_identify_hardware(struct adapter *);
212 static int	em_allocate_pci_resources(struct adapter *);
213 static int	em_allocate_legacy(struct adapter *);
214 static int	em_allocate_msix(struct adapter *);
215 static int	em_allocate_queues(struct adapter *);
216 static int	em_setup_msix(struct adapter *);
217 static void	em_free_pci_resources(struct adapter *);
218 static void	em_local_timer(void *);
219 static void	em_reset(struct adapter *);
220 static int	em_setup_interface(device_t, struct adapter *);
221 
222 static void	em_setup_transmit_structures(struct adapter *);
223 static void	em_initialize_transmit_unit(struct adapter *);
224 static int	em_allocate_transmit_buffers(struct tx_ring *);
225 static void	em_free_transmit_structures(struct adapter *);
226 static void	em_free_transmit_buffers(struct tx_ring *);
227 
228 static int	em_setup_receive_structures(struct adapter *);
229 static int	em_allocate_receive_buffers(struct rx_ring *);
230 static void	em_initialize_receive_unit(struct adapter *);
231 static void	em_free_receive_structures(struct adapter *);
232 static void	em_free_receive_buffers(struct rx_ring *);
233 
234 static void	em_enable_intr(struct adapter *);
235 static void	em_disable_intr(struct adapter *);
236 static void	em_update_stats_counters(struct adapter *);
237 static void	em_add_hw_stats(struct adapter *adapter);
238 static void	em_txeof(struct tx_ring *);
239 static bool	em_rxeof(struct rx_ring *, int, int *);
240 #ifndef __NO_STRICT_ALIGNMENT
241 static int	em_fixup_rx(struct rx_ring *);
242 #endif
243 static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
244 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
245 		    struct ip *, u32 *, u32 *);
246 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
247 		    struct tcphdr *, u32 *, u32 *);
248 static void	em_set_promisc(struct adapter *);
249 static void	em_disable_promisc(struct adapter *);
250 static void	em_set_multi(struct adapter *);
251 static void	em_update_link_status(struct adapter *);
252 static void	em_refresh_mbufs(struct rx_ring *, int);
253 static void	em_register_vlan(void *, struct ifnet *, u16);
254 static void	em_unregister_vlan(void *, struct ifnet *, u16);
255 static void	em_setup_vlan_hw_support(struct adapter *);
256 static int	em_xmit(struct tx_ring *, struct mbuf **);
257 static int	em_dma_malloc(struct adapter *, bus_size_t,
258 		    struct em_dma_alloc *, int);
259 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
260 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
261 static void	em_print_nvm_info(struct adapter *);
262 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
263 static void	em_print_debug_info(struct adapter *);
264 static int 	em_is_valid_ether_addr(u8 *);
265 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
266 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
267 		    const char *, struct em_int_delay_info *, int, int);
268 /* Management and WOL Support */
269 static void	em_init_manageability(struct adapter *);
270 static void	em_release_manageability(struct adapter *);
271 static void     em_get_hw_control(struct adapter *);
272 static void     em_release_hw_control(struct adapter *);
273 static void	em_get_wakeup(device_t);
274 static void     em_enable_wakeup(device_t);
275 static int	em_enable_phy_wakeup(struct adapter *);
276 static void	em_led_func(void *, int);
277 static void	em_disable_aspm(struct adapter *);
278 
279 static int	em_irq_fast(void *);
280 
281 /* MSIX handlers */
282 static void	em_msix_tx(void *);
283 static void	em_msix_rx(void *);
284 static void	em_msix_link(void *);
285 static void	em_handle_tx(void *context, int pending);
286 static void	em_handle_rx(void *context, int pending);
287 static void	em_handle_link(void *context, int pending);
288 
289 static void	em_set_sysctl_value(struct adapter *, const char *,
290 		    const char *, int *, int);
291 static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
292 static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
293 
294 static __inline void em_rx_discard(struct rx_ring *, int);
295 
296 #ifdef DEVICE_POLLING
297 static poll_handler_t em_poll;
298 #endif /* POLLING */
299 
300 /*********************************************************************
301  *  FreeBSD Device Interface Entry Points
302  *********************************************************************/
303 
304 static device_method_t em_methods[] = {
305 	/* Device interface */
306 	DEVMETHOD(device_probe, em_probe),
307 	DEVMETHOD(device_attach, em_attach),
308 	DEVMETHOD(device_detach, em_detach),
309 	DEVMETHOD(device_shutdown, em_shutdown),
310 	DEVMETHOD(device_suspend, em_suspend),
311 	DEVMETHOD(device_resume, em_resume),
312 	{0, 0}
313 };
314 
315 static driver_t em_driver = {
316 	"em", em_methods, sizeof(struct adapter),
317 };
318 
319 devclass_t em_devclass;
320 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
321 MODULE_DEPEND(em, pci, 1, 1, 1);
322 MODULE_DEPEND(em, ether, 1, 1, 1);
323 
324 /*********************************************************************
325  *  Tunable default values.
326  *********************************************************************/
327 
328 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
329 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
330 #define M_TSO_LEN			66
331 
332 /* Allow common code without TSO */
333 #ifndef CSUM_TSO
334 #define CSUM_TSO	0
335 #endif
336 
337 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
338 
339 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
340 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
341 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
342 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
343 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
344     0, "Default transmit interrupt delay in usecs");
345 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
346     0, "Default receive interrupt delay in usecs");
347 
348 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
349 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
350 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
351 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
352 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
353     &em_tx_abs_int_delay_dflt, 0,
354     "Default transmit interrupt delay limit in usecs");
355 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
356     &em_rx_abs_int_delay_dflt, 0,
357     "Default receive interrupt delay limit in usecs");
358 
359 static int em_rxd = EM_DEFAULT_RXD;
360 static int em_txd = EM_DEFAULT_TXD;
361 TUNABLE_INT("hw.em.rxd", &em_rxd);
362 TUNABLE_INT("hw.em.txd", &em_txd);
363 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
364     "Number of receive descriptors per queue");
365 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
366     "Number of transmit descriptors per queue");
367 
368 static int em_smart_pwr_down = FALSE;
369 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
370 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
371     0, "Set to true to leave smart power down enabled on newer adapters");
372 
373 /* Controls whether promiscuous also shows bad packets */
374 static int em_debug_sbp = FALSE;
375 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
376 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
377     "Show bad packets in promiscuous mode");
378 
379 static int em_enable_msix = TRUE;
380 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
381 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
382     "Enable MSI-X interrupts");
383 
384 /* How many packets rxeof tries to clean at a time */
385 static int em_rx_process_limit = 100;
386 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
387 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
388     &em_rx_process_limit, 0,
389     "Maximum number of received packets to process "
390     "at a time, -1 means unlimited");
391 
392 /* Energy efficient ethernet - default to OFF */
393 static int eee_setting = 1;
394 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
395 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
396     "Enable Energy Efficient Ethernet");
397 
398 /* Global used in WOL setup with multiport cards */
399 static int global_quad_port_a = 0;
400 
401 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
402 #include <dev/netmap/if_em_netmap.h>
403 #endif /* DEV_NETMAP */
404 
405 /*********************************************************************
406  *  Device identification routine
407  *
408  *  em_probe determines if the driver should be loaded on
409  *  adapter based on PCI vendor/device id of the adapter.
410  *
411  *  return BUS_PROBE_DEFAULT on success, positive on failure
412  *********************************************************************/
413 
414 static int
415 em_probe(device_t dev)
416 {
417 	char		adapter_name[60];
418 	u16		pci_vendor_id = 0;
419 	u16		pci_device_id = 0;
420 	u16		pci_subvendor_id = 0;
421 	u16		pci_subdevice_id = 0;
422 	em_vendor_info_t *ent;
423 
424 	INIT_DEBUGOUT("em_probe: begin");
425 
426 	pci_vendor_id = pci_get_vendor(dev);
427 	if (pci_vendor_id != EM_VENDOR_ID)
428 		return (ENXIO);
429 
430 	pci_device_id = pci_get_device(dev);
431 	pci_subvendor_id = pci_get_subvendor(dev);
432 	pci_subdevice_id = pci_get_subdevice(dev);
433 
434 	ent = em_vendor_info_array;
435 	while (ent->vendor_id != 0) {
436 		if ((pci_vendor_id == ent->vendor_id) &&
437 		    (pci_device_id == ent->device_id) &&
438 
439 		    ((pci_subvendor_id == ent->subvendor_id) ||
440 		    (ent->subvendor_id == PCI_ANY_ID)) &&
441 
442 		    ((pci_subdevice_id == ent->subdevice_id) ||
443 		    (ent->subdevice_id == PCI_ANY_ID))) {
444 			sprintf(adapter_name, "%s %s",
445 				em_strings[ent->index],
446 				em_driver_version);
447 			device_set_desc_copy(dev, adapter_name);
448 			return (BUS_PROBE_DEFAULT);
449 		}
450 		ent++;
451 	}
452 
453 	return (ENXIO);
454 }
455 
456 /*********************************************************************
457  *  Device initialization routine
458  *
459  *  The attach entry point is called when the driver is being loaded.
460  *  This routine identifies the type of hardware, allocates all resources
461  *  and initializes the hardware.
462  *
463  *  return 0 on success, positive on failure
464  *********************************************************************/
465 
466 static int
467 em_attach(device_t dev)
468 {
469 	struct adapter	*adapter;
470 	struct e1000_hw	*hw;
471 	int		error = 0;
472 
473 	INIT_DEBUGOUT("em_attach: begin");
474 
475 	if (resource_disabled("em", device_get_unit(dev))) {
476 		device_printf(dev, "Disabled by device hint\n");
477 		return (ENXIO);
478 	}
479 
480 	adapter = device_get_softc(dev);
481 	adapter->dev = adapter->osdep.dev = dev;
482 	hw = &adapter->hw;
483 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
484 
485 	/* SYSCTL stuff */
486 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
487 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
488 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
489 	    em_sysctl_nvm_info, "I", "NVM Information");
490 
491 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
492 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
493 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
494 	    em_sysctl_debug_info, "I", "Debug Information");
495 
496 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
497 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
498 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
499 	    em_set_flowcntl, "I", "Flow Control");
500 
501 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
502 
503 	/* Determine hardware and mac info */
504 	em_identify_hardware(adapter);
505 
506 	/* Setup PCI resources */
507 	if (em_allocate_pci_resources(adapter)) {
508 		device_printf(dev, "Allocation of PCI resources failed\n");
509 		error = ENXIO;
510 		goto err_pci;
511 	}
512 
513 	/*
514 	** For ICH8 and family we need to
515 	** map the flash memory, and this
516 	** must happen after the MAC is
517 	** identified
518 	*/
519 	if ((hw->mac.type == e1000_ich8lan) ||
520 	    (hw->mac.type == e1000_ich9lan) ||
521 	    (hw->mac.type == e1000_ich10lan) ||
522 	    (hw->mac.type == e1000_pchlan) ||
523 	    (hw->mac.type == e1000_pch2lan)) {
524 		int rid = EM_BAR_TYPE_FLASH;
525 		adapter->flash = bus_alloc_resource_any(dev,
526 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
527 		if (adapter->flash == NULL) {
528 			device_printf(dev, "Mapping of Flash failed\n");
529 			error = ENXIO;
530 			goto err_pci;
531 		}
532 		/* This is used in the shared code */
533 		hw->flash_address = (u8 *)adapter->flash;
534 		adapter->osdep.flash_bus_space_tag =
535 		    rman_get_bustag(adapter->flash);
536 		adapter->osdep.flash_bus_space_handle =
537 		    rman_get_bushandle(adapter->flash);
538 	}
539 
540 	/* Do Shared Code initialization */
541 	if (e1000_setup_init_funcs(hw, TRUE)) {
542 		device_printf(dev, "Setup of Shared code failed\n");
543 		error = ENXIO;
544 		goto err_pci;
545 	}
546 
547 	e1000_get_bus_info(hw);
548 
549 	/* Set up some sysctls for the tunable interrupt delays */
550 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
551 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
552 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
553 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
554 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
555 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
556 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
557 	    "receive interrupt delay limit in usecs",
558 	    &adapter->rx_abs_int_delay,
559 	    E1000_REGISTER(hw, E1000_RADV),
560 	    em_rx_abs_int_delay_dflt);
561 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
562 	    "transmit interrupt delay limit in usecs",
563 	    &adapter->tx_abs_int_delay,
564 	    E1000_REGISTER(hw, E1000_TADV),
565 	    em_tx_abs_int_delay_dflt);
566 
567 	/* Sysctl for limiting the amount of work done in the taskqueue */
568 	em_set_sysctl_value(adapter, "rx_processing_limit",
569 	    "max number of rx packets to process", &adapter->rx_process_limit,
570 	    em_rx_process_limit);
571 
572 	/*
573 	 * Validate number of transmit and receive descriptors. It
574 	 * must not exceed hardware maximum, and must be multiple
575 	 * of E1000_DBA_ALIGN.
576 	 */
577 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
578 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
579 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
580 		    EM_DEFAULT_TXD, em_txd);
581 		adapter->num_tx_desc = EM_DEFAULT_TXD;
582 	} else
583 		adapter->num_tx_desc = em_txd;
584 
585 	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
586 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
587 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
588 		    EM_DEFAULT_RXD, em_rxd);
589 		adapter->num_rx_desc = EM_DEFAULT_RXD;
590 	} else
591 		adapter->num_rx_desc = em_rxd;
592 
593 	hw->mac.autoneg = DO_AUTO_NEG;
594 	hw->phy.autoneg_wait_to_complete = FALSE;
595 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
596 
597 	/* Copper options */
598 	if (hw->phy.media_type == e1000_media_type_copper) {
599 		hw->phy.mdix = AUTO_ALL_MODES;
600 		hw->phy.disable_polarity_correction = FALSE;
601 		hw->phy.ms_type = EM_MASTER_SLAVE;
602 	}
603 
604 	/*
605 	 * Set the frame limits assuming
606 	 * standard ethernet sized frames.
607 	 */
608 	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
609 	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
610 
611 	/*
612 	 * This controls when hardware reports transmit completion
613 	 * status.
614 	 */
615 	hw->mac.report_tx_early = 1;
616 
617 	/*
618 	** Get queue/ring memory
619 	*/
620 	if (em_allocate_queues(adapter)) {
621 		error = ENOMEM;
622 		goto err_pci;
623 	}
624 
625 	/* Allocate multicast array memory. */
626 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
627 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
628 	if (adapter->mta == NULL) {
629 		device_printf(dev, "Can not allocate multicast setup array\n");
630 		error = ENOMEM;
631 		goto err_late;
632 	}
633 
634 	/* Check SOL/IDER usage */
635 	if (e1000_check_reset_block(hw))
636 		device_printf(dev, "PHY reset is blocked"
637 		    " due to SOL/IDER session.\n");
638 
639 	/* Sysctl for setting Energy Efficient Ethernet */
640 	hw->dev_spec.ich8lan.eee_disable = eee_setting;
641 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
642 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
643 	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
644 	    adapter, 0, em_sysctl_eee, "I",
645 	    "Disable Energy Efficient Ethernet");
646 
647 	/*
648 	** Start from a known state, this is
649 	** important in reading the nvm and
650 	** mac from that.
651 	*/
652 	e1000_reset_hw(hw);
653 
654 
655 	/* Make sure we have a good EEPROM before we read from it */
656 	if (e1000_validate_nvm_checksum(hw) < 0) {
657 		/*
658 		** Some PCI-E parts fail the first check due to
659 		** the link being in sleep state, call it again,
660 		** if it fails a second time its a real issue.
661 		*/
662 		if (e1000_validate_nvm_checksum(hw) < 0) {
663 			device_printf(dev,
664 			    "The EEPROM Checksum Is Not Valid\n");
665 			error = EIO;
666 			goto err_late;
667 		}
668 	}
669 
670 	/* Copy the permanent MAC address out of the EEPROM */
671 	if (e1000_read_mac_addr(hw) < 0) {
672 		device_printf(dev, "EEPROM read error while reading MAC"
673 		    " address\n");
674 		error = EIO;
675 		goto err_late;
676 	}
677 
678 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
679 		device_printf(dev, "Invalid MAC address\n");
680 		error = EIO;
681 		goto err_late;
682 	}
683 
684 	/*
685 	**  Do interrupt configuration
686 	*/
687 	if (adapter->msix > 1) /* Do MSIX */
688 		error = em_allocate_msix(adapter);
689 	else  /* MSI or Legacy */
690 		error = em_allocate_legacy(adapter);
691 	if (error)
692 		goto err_late;
693 
694 	/*
695 	 * Get Wake-on-Lan and Management info for later use
696 	 */
697 	em_get_wakeup(dev);
698 
699 	/* Setup OS specific network interface */
700 	if (em_setup_interface(dev, adapter) != 0)
701 		goto err_late;
702 
703 	em_reset(adapter);
704 
705 	/* Initialize statistics */
706 	em_update_stats_counters(adapter);
707 
708 	hw->mac.get_link_status = 1;
709 	em_update_link_status(adapter);
710 
711 	/* Register for VLAN events */
712 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
713 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
714 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
715 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
716 
717 	em_add_hw_stats(adapter);
718 
719 	/* Non-AMT based hardware can now take control from firmware */
720 	if (adapter->has_manage && !adapter->has_amt)
721 		em_get_hw_control(adapter);
722 
723 	/* Tell the stack that the interface is not active */
724 	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
725 	adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
726 
727 	adapter->led_dev = led_create(em_led_func, adapter,
728 	    device_get_nameunit(dev));
729 #ifdef DEV_NETMAP
730 	em_netmap_attach(adapter);
731 #endif /* DEV_NETMAP */
732 
733 	INIT_DEBUGOUT("em_attach: end");
734 
735 	return (0);
736 
737 err_late:
738 	em_free_transmit_structures(adapter);
739 	em_free_receive_structures(adapter);
740 	em_release_hw_control(adapter);
741 	if (adapter->ifp != NULL)
742 		if_free(adapter->ifp);
743 err_pci:
744 	em_free_pci_resources(adapter);
745 	free(adapter->mta, M_DEVBUF);
746 	EM_CORE_LOCK_DESTROY(adapter);
747 
748 	return (error);
749 }
750 
751 /*********************************************************************
752  *  Device removal routine
753  *
754  *  The detach entry point is called when the driver is being removed.
755  *  This routine stops the adapter and deallocates all the resources
756  *  that were allocated for driver operation.
757  *
758  *  return 0 on success, positive on failure
759  *********************************************************************/
760 
761 static int
762 em_detach(device_t dev)
763 {
764 	struct adapter	*adapter = device_get_softc(dev);
765 	struct ifnet	*ifp = adapter->ifp;
766 
767 	INIT_DEBUGOUT("em_detach: begin");
768 
769 	/* Make sure VLANS are not using driver */
770 	if (adapter->ifp->if_vlantrunk != NULL) {
771 		device_printf(dev,"Vlan in use, detach first\n");
772 		return (EBUSY);
773 	}
774 
775 #ifdef DEVICE_POLLING
776 	if (ifp->if_capenable & IFCAP_POLLING)
777 		ether_poll_deregister(ifp);
778 #endif
779 
780 	if (adapter->led_dev != NULL)
781 		led_destroy(adapter->led_dev);
782 
783 	EM_CORE_LOCK(adapter);
784 	adapter->in_detach = 1;
785 	em_stop(adapter);
786 	EM_CORE_UNLOCK(adapter);
787 	EM_CORE_LOCK_DESTROY(adapter);
788 
789 	e1000_phy_hw_reset(&adapter->hw);
790 
791 	em_release_manageability(adapter);
792 	em_release_hw_control(adapter);
793 
794 	/* Unregister VLAN events */
795 	if (adapter->vlan_attach != NULL)
796 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
797 	if (adapter->vlan_detach != NULL)
798 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
799 
800 	ether_ifdetach(adapter->ifp);
801 	callout_drain(&adapter->timer);
802 
803 #ifdef DEV_NETMAP
804 	netmap_detach(ifp);
805 #endif /* DEV_NETMAP */
806 
807 	em_free_pci_resources(adapter);
808 	bus_generic_detach(dev);
809 	if_free(ifp);
810 
811 	em_free_transmit_structures(adapter);
812 	em_free_receive_structures(adapter);
813 
814 	em_release_hw_control(adapter);
815 	free(adapter->mta, M_DEVBUF);
816 
817 	return (0);
818 }
819 
820 /*********************************************************************
821  *
822  *  Shutdown entry point
823  *
824  **********************************************************************/
825 
826 static int
827 em_shutdown(device_t dev)
828 {
829 	return em_suspend(dev);
830 }
831 
832 /*
833  * Suspend/resume device methods.
834  */
835 static int
836 em_suspend(device_t dev)
837 {
838 	struct adapter *adapter = device_get_softc(dev);
839 
840 	EM_CORE_LOCK(adapter);
841 
842         em_release_manageability(adapter);
843 	em_release_hw_control(adapter);
844 	em_enable_wakeup(dev);
845 
846 	EM_CORE_UNLOCK(adapter);
847 
848 	return bus_generic_suspend(dev);
849 }
850 
851 static int
852 em_resume(device_t dev)
853 {
854 	struct adapter *adapter = device_get_softc(dev);
855 	struct tx_ring	*txr = adapter->tx_rings;
856 	struct ifnet *ifp = adapter->ifp;
857 
858 	EM_CORE_LOCK(adapter);
859 	if (adapter->hw.mac.type == e1000_pch2lan)
860 		e1000_resume_workarounds_pchlan(&adapter->hw);
861 	em_init_locked(adapter);
862 	em_init_manageability(adapter);
863 
864 	if ((ifp->if_flags & IFF_UP) &&
865 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
866 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
867 			EM_TX_LOCK(txr);
868 #ifdef EM_MULTIQUEUE
869 			if (!drbr_empty(ifp, txr->br))
870 				em_mq_start_locked(ifp, txr, NULL);
871 #else
872 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
873 				em_start_locked(ifp, txr);
874 #endif
875 			EM_TX_UNLOCK(txr);
876 		}
877 	}
878 	EM_CORE_UNLOCK(adapter);
879 
880 	return bus_generic_resume(dev);
881 }
882 
883 
884 #ifdef EM_MULTIQUEUE
885 /*********************************************************************
886  *  Multiqueue Transmit routines
887  *
888  *  em_mq_start is called by the stack to initiate a transmit.
889  *  however, if busy the driver can queue the request rather
890  *  than do an immediate send. It is this that is an advantage
891  *  in this driver, rather than also having multiple tx queues.
892  **********************************************************************/
893 static int
894 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
895 {
896 	struct adapter  *adapter = txr->adapter;
897         struct mbuf     *next;
898         int             err = 0, enq = 0;
899 
900 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
901 	    IFF_DRV_RUNNING || adapter->link_active == 0) {
902 		if (m != NULL)
903 			err = drbr_enqueue(ifp, txr->br, m);
904 		return (err);
905 	}
906 
907 	enq = 0;
908 	if (m == NULL) {
909 		next = drbr_dequeue(ifp, txr->br);
910 	} else if (drbr_needs_enqueue(ifp, txr->br)) {
911 		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
912 			return (err);
913 		next = drbr_dequeue(ifp, txr->br);
914 	} else
915 		next = m;
916 
917 	/* Process the queue */
918 	while (next != NULL) {
919 		if ((err = em_xmit(txr, &next)) != 0) {
920                         if (next != NULL)
921                                 err = drbr_enqueue(ifp, txr->br, next);
922                         break;
923 		}
924 		enq++;
925 		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
926 		ETHER_BPF_MTAP(ifp, next);
927 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
928                         break;
929 		next = drbr_dequeue(ifp, txr->br);
930 	}
931 
932 	if (enq > 0) {
933                 /* Set the watchdog */
934                 txr->queue_status = EM_QUEUE_WORKING;
935 		txr->watchdog_time = ticks;
936 	}
937 
938 	if (txr->tx_avail < EM_MAX_SCATTER)
939 		em_txeof(txr);
940 	if (txr->tx_avail < EM_MAX_SCATTER)
941 		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
942 	return (err);
943 }
944 
945 /*
946 ** Multiqueue capable stack interface
947 */
948 static int
949 em_mq_start(struct ifnet *ifp, struct mbuf *m)
950 {
951 	struct adapter	*adapter = ifp->if_softc;
952 	struct tx_ring	*txr = adapter->tx_rings;
953 	int 		error;
954 
955 	if (EM_TX_TRYLOCK(txr)) {
956 		error = em_mq_start_locked(ifp, txr, m);
957 		EM_TX_UNLOCK(txr);
958 	} else
959 		error = drbr_enqueue(ifp, txr->br, m);
960 
961 	return (error);
962 }
963 
964 /*
965 ** Flush all ring buffers
966 */
967 static void
968 em_qflush(struct ifnet *ifp)
969 {
970 	struct adapter  *adapter = ifp->if_softc;
971 	struct tx_ring  *txr = adapter->tx_rings;
972 	struct mbuf     *m;
973 
974 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
975 		EM_TX_LOCK(txr);
976 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
977 			m_freem(m);
978 		EM_TX_UNLOCK(txr);
979 	}
980 	if_qflush(ifp);
981 }
982 #else  /* !EM_MULTIQUEUE */
983 
984 static void
985 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
986 {
987 	struct adapter	*adapter = ifp->if_softc;
988 	struct mbuf	*m_head;
989 
990 	EM_TX_LOCK_ASSERT(txr);
991 
992 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
993 	    IFF_DRV_RUNNING)
994 		return;
995 
996 	if (!adapter->link_active)
997 		return;
998 
999 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1000         	/* Call cleanup if number of TX descriptors low */
1001 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1002 			em_txeof(txr);
1003 		if (txr->tx_avail < EM_MAX_SCATTER) {
1004 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1005 			break;
1006 		}
1007                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1008 		if (m_head == NULL)
1009 			break;
1010 		/*
1011 		 *  Encapsulation can modify our pointer, and or make it
1012 		 *  NULL on failure.  In that event, we can't requeue.
1013 		 */
1014 		if (em_xmit(txr, &m_head)) {
1015 			if (m_head == NULL)
1016 				break;
1017 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1018 			break;
1019 		}
1020 
1021 		/* Send a copy of the frame to the BPF listener */
1022 		ETHER_BPF_MTAP(ifp, m_head);
1023 
1024 		/* Set timeout in case hardware has problems transmitting. */
1025 		txr->watchdog_time = ticks;
1026                 txr->queue_status = EM_QUEUE_WORKING;
1027 	}
1028 
1029 	return;
1030 }
1031 
1032 static void
1033 em_start(struct ifnet *ifp)
1034 {
1035 	struct adapter	*adapter = ifp->if_softc;
1036 	struct tx_ring	*txr = adapter->tx_rings;
1037 
1038 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1039 		EM_TX_LOCK(txr);
1040 		em_start_locked(ifp, txr);
1041 		EM_TX_UNLOCK(txr);
1042 	}
1043 	return;
1044 }
1045 #endif /* EM_MULTIQUEUE */
1046 
1047 /*********************************************************************
1048  *  Ioctl entry point
1049  *
1050  *  em_ioctl is called when the user wants to configure the
1051  *  interface.
1052  *
1053  *  return 0 on success, positive on failure
1054  **********************************************************************/
1055 
1056 static int
1057 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1058 {
1059 	struct adapter	*adapter = ifp->if_softc;
1060 	struct ifreq	*ifr = (struct ifreq *)data;
1061 #if defined(INET) || defined(INET6)
1062 	struct ifaddr	*ifa = (struct ifaddr *)data;
1063 #endif
1064 	bool		avoid_reset = FALSE;
1065 	int		error = 0;
1066 
1067 	if (adapter->in_detach)
1068 		return (error);
1069 
1070 	switch (command) {
1071 	case SIOCSIFADDR:
1072 #ifdef INET
1073 		if (ifa->ifa_addr->sa_family == AF_INET)
1074 			avoid_reset = TRUE;
1075 #endif
1076 #ifdef INET6
1077 		if (ifa->ifa_addr->sa_family == AF_INET6)
1078 			avoid_reset = TRUE;
1079 #endif
1080 		/*
1081 		** Calling init results in link renegotiation,
1082 		** so we avoid doing it when possible.
1083 		*/
1084 		if (avoid_reset) {
1085 			ifp->if_flags |= IFF_UP;
1086 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1087 				em_init(adapter);
1088 #ifdef INET
1089 			if (!(ifp->if_flags & IFF_NOARP))
1090 				arp_ifinit(ifp, ifa);
1091 #endif
1092 		} else
1093 			error = ether_ioctl(ifp, command, data);
1094 		break;
1095 	case SIOCSIFMTU:
1096 	    {
1097 		int max_frame_size;
1098 
1099 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1100 
1101 		EM_CORE_LOCK(adapter);
1102 		switch (adapter->hw.mac.type) {
1103 		case e1000_82571:
1104 		case e1000_82572:
1105 		case e1000_ich9lan:
1106 		case e1000_ich10lan:
1107 		case e1000_pch2lan:
1108 		case e1000_82574:
1109 		case e1000_82583:
1110 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1111 			max_frame_size = 9234;
1112 			break;
1113 		case e1000_pchlan:
1114 			max_frame_size = 4096;
1115 			break;
1116 			/* Adapters that do not support jumbo frames */
1117 		case e1000_ich8lan:
1118 			max_frame_size = ETHER_MAX_LEN;
1119 			break;
1120 		default:
1121 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1122 		}
1123 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1124 		    ETHER_CRC_LEN) {
1125 			EM_CORE_UNLOCK(adapter);
1126 			error = EINVAL;
1127 			break;
1128 		}
1129 
1130 		ifp->if_mtu = ifr->ifr_mtu;
1131 		adapter->max_frame_size =
1132 		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1133 		em_init_locked(adapter);
1134 		EM_CORE_UNLOCK(adapter);
1135 		break;
1136 	    }
1137 	case SIOCSIFFLAGS:
1138 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1139 		    SIOCSIFFLAGS (Set Interface Flags)");
1140 		EM_CORE_LOCK(adapter);
1141 		if (ifp->if_flags & IFF_UP) {
1142 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1143 				if ((ifp->if_flags ^ adapter->if_flags) &
1144 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1145 					em_disable_promisc(adapter);
1146 					em_set_promisc(adapter);
1147 				}
1148 			} else
1149 				em_init_locked(adapter);
1150 		} else
1151 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1152 				em_stop(adapter);
1153 		adapter->if_flags = ifp->if_flags;
1154 		EM_CORE_UNLOCK(adapter);
1155 		break;
1156 	case SIOCADDMULTI:
1157 	case SIOCDELMULTI:
1158 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1159 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1160 			EM_CORE_LOCK(adapter);
1161 			em_disable_intr(adapter);
1162 			em_set_multi(adapter);
1163 #ifdef DEVICE_POLLING
1164 			if (!(ifp->if_capenable & IFCAP_POLLING))
1165 #endif
1166 				em_enable_intr(adapter);
1167 			EM_CORE_UNLOCK(adapter);
1168 		}
1169 		break;
1170 	case SIOCSIFMEDIA:
1171 		/* Check SOL/IDER usage */
1172 		EM_CORE_LOCK(adapter);
1173 		if (e1000_check_reset_block(&adapter->hw)) {
1174 			EM_CORE_UNLOCK(adapter);
1175 			device_printf(adapter->dev, "Media change is"
1176 			    " blocked due to SOL/IDER session.\n");
1177 			break;
1178 		}
1179 		EM_CORE_UNLOCK(adapter);
1180 		/* falls thru */
1181 	case SIOCGIFMEDIA:
1182 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1183 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1184 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1185 		break;
1186 	case SIOCSIFCAP:
1187 	    {
1188 		int mask, reinit;
1189 
1190 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1191 		reinit = 0;
1192 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1193 #ifdef DEVICE_POLLING
1194 		if (mask & IFCAP_POLLING) {
1195 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1196 				error = ether_poll_register(em_poll, ifp);
1197 				if (error)
1198 					return (error);
1199 				EM_CORE_LOCK(adapter);
1200 				em_disable_intr(adapter);
1201 				ifp->if_capenable |= IFCAP_POLLING;
1202 				EM_CORE_UNLOCK(adapter);
1203 			} else {
1204 				error = ether_poll_deregister(ifp);
1205 				/* Enable interrupt even in error case */
1206 				EM_CORE_LOCK(adapter);
1207 				em_enable_intr(adapter);
1208 				ifp->if_capenable &= ~IFCAP_POLLING;
1209 				EM_CORE_UNLOCK(adapter);
1210 			}
1211 		}
1212 #endif
1213 		if (mask & IFCAP_HWCSUM) {
1214 			ifp->if_capenable ^= IFCAP_HWCSUM;
1215 			reinit = 1;
1216 		}
1217 		if (mask & IFCAP_TSO4) {
1218 			ifp->if_capenable ^= IFCAP_TSO4;
1219 			reinit = 1;
1220 		}
1221 		if (mask & IFCAP_VLAN_HWTAGGING) {
1222 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1223 			reinit = 1;
1224 		}
1225 		if (mask & IFCAP_VLAN_HWFILTER) {
1226 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1227 			reinit = 1;
1228 		}
1229 		if (mask & IFCAP_VLAN_HWTSO) {
1230 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1231 			reinit = 1;
1232 		}
1233 		if ((mask & IFCAP_WOL) &&
1234 		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1235 			if (mask & IFCAP_WOL_MCAST)
1236 				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1237 			if (mask & IFCAP_WOL_MAGIC)
1238 				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1239 		}
1240 		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1241 			em_init(adapter);
1242 		VLAN_CAPABILITIES(ifp);
1243 		break;
1244 	    }
1245 
1246 	default:
1247 		error = ether_ioctl(ifp, command, data);
1248 		break;
1249 	}
1250 
1251 	return (error);
1252 }
1253 
1254 
1255 /*********************************************************************
1256  *  Init entry point
1257  *
1258  *  This routine is used in two ways. It is used by the stack as
1259  *  init entry point in network interface structure. It is also used
1260  *  by the driver as a hw/sw initialization routine to get to a
1261  *  consistent state.
1262  *
1263  *  return 0 on success, positive on failure
1264  **********************************************************************/
1265 
1266 static void
1267 em_init_locked(struct adapter *adapter)
1268 {
1269 	struct ifnet	*ifp = adapter->ifp;
1270 	device_t	dev = adapter->dev;
1271 
1272 	INIT_DEBUGOUT("em_init: begin");
1273 
1274 	EM_CORE_LOCK_ASSERT(adapter);
1275 
1276 	em_disable_intr(adapter);
1277 	callout_stop(&adapter->timer);
1278 
1279 	/* Get the latest mac address, User can use a LAA */
1280         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1281               ETHER_ADDR_LEN);
1282 
1283 	/* Put the address into the Receive Address Array */
1284 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1285 
1286 	/*
1287 	 * With the 82571 adapter, RAR[0] may be overwritten
1288 	 * when the other port is reset, we make a duplicate
1289 	 * in RAR[14] for that eventuality, this assures
1290 	 * the interface continues to function.
1291 	 */
1292 	if (adapter->hw.mac.type == e1000_82571) {
1293 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1294 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1295 		    E1000_RAR_ENTRIES - 1);
1296 	}
1297 
1298 	/* Initialize the hardware */
1299 	em_reset(adapter);
1300 	em_update_link_status(adapter);
1301 
1302 	/* Setup VLAN support, basic and offload if available */
1303 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1304 
1305 	/* Set hardware offload abilities */
1306 	ifp->if_hwassist = 0;
1307 	if (ifp->if_capenable & IFCAP_TXCSUM)
1308 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1309 	if (ifp->if_capenable & IFCAP_TSO4)
1310 		ifp->if_hwassist |= CSUM_TSO;
1311 
1312 	/* Configure for OS presence */
1313 	em_init_manageability(adapter);
1314 
1315 	/* Prepare transmit descriptors and buffers */
1316 	em_setup_transmit_structures(adapter);
1317 	em_initialize_transmit_unit(adapter);
1318 
1319 	/* Setup Multicast table */
1320 	em_set_multi(adapter);
1321 
1322 	/*
1323 	** Figure out the desired mbuf
1324 	** pool for doing jumbos
1325 	*/
1326 	if (adapter->max_frame_size <= 2048)
1327 		adapter->rx_mbuf_sz = MCLBYTES;
1328 	else if (adapter->max_frame_size <= 4096)
1329 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1330 	else
1331 		adapter->rx_mbuf_sz = MJUM9BYTES;
1332 
1333 	/* Prepare receive descriptors and buffers */
1334 	if (em_setup_receive_structures(adapter)) {
1335 		device_printf(dev, "Could not setup receive structures\n");
1336 		em_stop(adapter);
1337 		return;
1338 	}
1339 	em_initialize_receive_unit(adapter);
1340 
1341 	/* Use real VLAN Filter support? */
1342 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1343 		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1344 			/* Use real VLAN Filter support */
1345 			em_setup_vlan_hw_support(adapter);
1346 		else {
1347 			u32 ctrl;
1348 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1349 			ctrl |= E1000_CTRL_VME;
1350 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1351 		}
1352 	}
1353 
1354 	/* Don't lose promiscuous settings */
1355 	em_set_promisc(adapter);
1356 
1357 	/* Set the interface as ACTIVE */
1358 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1359 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1360 
1361 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1362 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1363 
1364 	/* MSI/X configuration for 82574 */
1365 	if (adapter->hw.mac.type == e1000_82574) {
1366 		int tmp;
1367 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1368 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1369 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1370 		/* Set the IVAR - interrupt vector routing. */
1371 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1372 	}
1373 
1374 #ifdef DEVICE_POLLING
1375 	/*
1376 	 * Only enable interrupts if we are not polling, make sure
1377 	 * they are off otherwise.
1378 	 */
1379 	if (ifp->if_capenable & IFCAP_POLLING)
1380 		em_disable_intr(adapter);
1381 	else
1382 #endif /* DEVICE_POLLING */
1383 		em_enable_intr(adapter);
1384 
1385 	/* AMT based hardware can now take control from firmware */
1386 	if (adapter->has_manage && adapter->has_amt)
1387 		em_get_hw_control(adapter);
1388 }
1389 
1390 static void
1391 em_init(void *arg)
1392 {
1393 	struct adapter *adapter = arg;
1394 
1395 	EM_CORE_LOCK(adapter);
1396 	em_init_locked(adapter);
1397 	EM_CORE_UNLOCK(adapter);
1398 }
1399 
1400 
1401 #ifdef DEVICE_POLLING
1402 /*********************************************************************
1403  *
1404  *  Legacy polling routine: note this only works with single queue
1405  *
1406  *********************************************************************/
1407 static int
1408 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1409 {
1410 	struct adapter *adapter = ifp->if_softc;
1411 	struct tx_ring	*txr = adapter->tx_rings;
1412 	struct rx_ring	*rxr = adapter->rx_rings;
1413 	u32		reg_icr;
1414 	int		rx_done;
1415 
1416 	EM_CORE_LOCK(adapter);
1417 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1418 		EM_CORE_UNLOCK(adapter);
1419 		return (0);
1420 	}
1421 
1422 	if (cmd == POLL_AND_CHECK_STATUS) {
1423 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1424 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1425 			callout_stop(&adapter->timer);
1426 			adapter->hw.mac.get_link_status = 1;
1427 			em_update_link_status(adapter);
1428 			callout_reset(&adapter->timer, hz,
1429 			    em_local_timer, adapter);
1430 		}
1431 	}
1432 	EM_CORE_UNLOCK(adapter);
1433 
1434 	em_rxeof(rxr, count, &rx_done);
1435 
1436 	EM_TX_LOCK(txr);
1437 	em_txeof(txr);
1438 #ifdef EM_MULTIQUEUE
1439 	if (!drbr_empty(ifp, txr->br))
1440 		em_mq_start_locked(ifp, txr, NULL);
1441 #else
1442 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1443 		em_start_locked(ifp, txr);
1444 #endif
1445 	EM_TX_UNLOCK(txr);
1446 
1447 	return (rx_done);
1448 }
1449 #endif /* DEVICE_POLLING */
1450 
1451 
1452 /*********************************************************************
1453  *
1454  *  Fast Legacy/MSI Combined Interrupt Service routine
1455  *
1456  *********************************************************************/
1457 static int
1458 em_irq_fast(void *arg)
1459 {
1460 	struct adapter	*adapter = arg;
1461 	struct ifnet	*ifp;
1462 	u32		reg_icr;
1463 
1464 	ifp = adapter->ifp;
1465 
1466 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1467 
1468 	/* Hot eject?  */
1469 	if (reg_icr == 0xffffffff)
1470 		return FILTER_STRAY;
1471 
1472 	/* Definitely not our interrupt.  */
1473 	if (reg_icr == 0x0)
1474 		return FILTER_STRAY;
1475 
1476 	/*
1477 	 * Starting with the 82571 chip, bit 31 should be used to
1478 	 * determine whether the interrupt belongs to us.
1479 	 */
1480 	if (adapter->hw.mac.type >= e1000_82571 &&
1481 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1482 		return FILTER_STRAY;
1483 
1484 	em_disable_intr(adapter);
1485 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1486 
1487 	/* Link status change */
1488 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1489 		adapter->hw.mac.get_link_status = 1;
1490 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1491 	}
1492 
1493 	if (reg_icr & E1000_ICR_RXO)
1494 		adapter->rx_overruns++;
1495 	return FILTER_HANDLED;
1496 }
1497 
1498 /* Combined RX/TX handler, used by Legacy and MSI */
1499 static void
1500 em_handle_que(void *context, int pending)
1501 {
1502 	struct adapter	*adapter = context;
1503 	struct ifnet	*ifp = adapter->ifp;
1504 	struct tx_ring	*txr = adapter->tx_rings;
1505 	struct rx_ring	*rxr = adapter->rx_rings;
1506 
1507 
1508 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1509 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1510 		EM_TX_LOCK(txr);
1511 		em_txeof(txr);
1512 #ifdef EM_MULTIQUEUE
1513 		if (!drbr_empty(ifp, txr->br))
1514 			em_mq_start_locked(ifp, txr, NULL);
1515 #else
1516 		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1517 			em_start_locked(ifp, txr);
1518 #endif
1519 		EM_TX_UNLOCK(txr);
1520 		if (more) {
1521 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1522 			return;
1523 		}
1524 	}
1525 
1526 	em_enable_intr(adapter);
1527 	return;
1528 }
1529 
1530 
1531 /*********************************************************************
1532  *
1533  *  MSIX Interrupt Service Routines
1534  *
1535  **********************************************************************/
1536 static void
1537 em_msix_tx(void *arg)
1538 {
1539 	struct tx_ring *txr = arg;
1540 	struct adapter *adapter = txr->adapter;
1541 	struct ifnet	*ifp = adapter->ifp;
1542 
1543 	++txr->tx_irq;
1544 	EM_TX_LOCK(txr);
1545 	em_txeof(txr);
1546 #ifdef EM_MULTIQUEUE
1547 	if (!drbr_empty(ifp, txr->br))
1548 		em_mq_start_locked(ifp, txr, NULL);
1549 #else
1550 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1551 		em_start_locked(ifp, txr);
1552 #endif
1553 	/* Reenable this interrupt */
1554 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1555 	EM_TX_UNLOCK(txr);
1556 	return;
1557 }
1558 
1559 /*********************************************************************
1560  *
1561  *  MSIX RX Interrupt Service routine
1562  *
1563  **********************************************************************/
1564 
1565 static void
1566 em_msix_rx(void *arg)
1567 {
1568 	struct rx_ring	*rxr = arg;
1569 	struct adapter	*adapter = rxr->adapter;
1570 	bool		more;
1571 
1572 	++rxr->rx_irq;
1573 	if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1574 		return;
1575 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1576 	if (more)
1577 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1578 	else
1579 		/* Reenable this interrupt */
1580 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1581 	return;
1582 }
1583 
1584 /*********************************************************************
1585  *
1586  *  MSIX Link Fast Interrupt Service routine
1587  *
1588  **********************************************************************/
1589 static void
1590 em_msix_link(void *arg)
1591 {
1592 	struct adapter	*adapter = arg;
1593 	u32		reg_icr;
1594 
1595 	++adapter->link_irq;
1596 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1597 
1598 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1599 		adapter->hw.mac.get_link_status = 1;
1600 		em_handle_link(adapter, 0);
1601 	} else
1602 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1603 		    EM_MSIX_LINK | E1000_IMS_LSC);
1604 	return;
1605 }
1606 
1607 static void
1608 em_handle_rx(void *context, int pending)
1609 {
1610 	struct rx_ring	*rxr = context;
1611 	struct adapter	*adapter = rxr->adapter;
1612         bool            more;
1613 
1614 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1615 	if (more)
1616 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1617 	else
1618 		/* Reenable this interrupt */
1619 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1620 }
1621 
1622 static void
1623 em_handle_tx(void *context, int pending)
1624 {
1625 	struct tx_ring	*txr = context;
1626 	struct adapter	*adapter = txr->adapter;
1627 	struct ifnet	*ifp = adapter->ifp;
1628 
1629 	EM_TX_LOCK(txr);
1630 	em_txeof(txr);
1631 #ifdef EM_MULTIQUEUE
1632 	if (!drbr_empty(ifp, txr->br))
1633 		em_mq_start_locked(ifp, txr, NULL);
1634 #else
1635 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1636 		em_start_locked(ifp, txr);
1637 #endif
1638 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1639 	EM_TX_UNLOCK(txr);
1640 }
1641 
1642 static void
1643 em_handle_link(void *context, int pending)
1644 {
1645 	struct adapter	*adapter = context;
1646 	struct tx_ring	*txr = adapter->tx_rings;
1647 	struct ifnet *ifp = adapter->ifp;
1648 
1649 	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1650 		return;
1651 
1652 	EM_CORE_LOCK(adapter);
1653 	callout_stop(&adapter->timer);
1654 	em_update_link_status(adapter);
1655 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1656 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1657 	    EM_MSIX_LINK | E1000_IMS_LSC);
1658 	if (adapter->link_active) {
1659 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1660 			EM_TX_LOCK(txr);
1661 #ifdef EM_MULTIQUEUE
1662 			if (!drbr_empty(ifp, txr->br))
1663 				em_mq_start_locked(ifp, txr, NULL);
1664 #else
1665 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1666 				em_start_locked(ifp, txr);
1667 #endif
1668 			EM_TX_UNLOCK(txr);
1669 		}
1670 	}
1671 	EM_CORE_UNLOCK(adapter);
1672 }
1673 
1674 
1675 /*********************************************************************
1676  *
1677  *  Media Ioctl callback
1678  *
1679  *  This routine is called whenever the user queries the status of
1680  *  the interface using ifconfig.
1681  *
1682  **********************************************************************/
1683 static void
1684 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1685 {
1686 	struct adapter *adapter = ifp->if_softc;
1687 	u_char fiber_type = IFM_1000_SX;
1688 
1689 	INIT_DEBUGOUT("em_media_status: begin");
1690 
1691 	EM_CORE_LOCK(adapter);
1692 	em_update_link_status(adapter);
1693 
1694 	ifmr->ifm_status = IFM_AVALID;
1695 	ifmr->ifm_active = IFM_ETHER;
1696 
1697 	if (!adapter->link_active) {
1698 		EM_CORE_UNLOCK(adapter);
1699 		return;
1700 	}
1701 
1702 	ifmr->ifm_status |= IFM_ACTIVE;
1703 
1704 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1705 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1706 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1707 	} else {
1708 		switch (adapter->link_speed) {
1709 		case 10:
1710 			ifmr->ifm_active |= IFM_10_T;
1711 			break;
1712 		case 100:
1713 			ifmr->ifm_active |= IFM_100_TX;
1714 			break;
1715 		case 1000:
1716 			ifmr->ifm_active |= IFM_1000_T;
1717 			break;
1718 		}
1719 		if (adapter->link_duplex == FULL_DUPLEX)
1720 			ifmr->ifm_active |= IFM_FDX;
1721 		else
1722 			ifmr->ifm_active |= IFM_HDX;
1723 	}
1724 	EM_CORE_UNLOCK(adapter);
1725 }
1726 
1727 /*********************************************************************
1728  *
1729  *  Media Ioctl callback
1730  *
1731  *  This routine is called when the user changes speed/duplex using
1732  *  media/mediopt option with ifconfig.
1733  *
1734  **********************************************************************/
1735 static int
1736 em_media_change(struct ifnet *ifp)
1737 {
1738 	struct adapter *adapter = ifp->if_softc;
1739 	struct ifmedia  *ifm = &adapter->media;
1740 
1741 	INIT_DEBUGOUT("em_media_change: begin");
1742 
1743 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1744 		return (EINVAL);
1745 
1746 	EM_CORE_LOCK(adapter);
1747 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1748 	case IFM_AUTO:
1749 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1750 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1751 		break;
1752 	case IFM_1000_LX:
1753 	case IFM_1000_SX:
1754 	case IFM_1000_T:
1755 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1756 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1757 		break;
1758 	case IFM_100_TX:
1759 		adapter->hw.mac.autoneg = FALSE;
1760 		adapter->hw.phy.autoneg_advertised = 0;
1761 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1762 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1763 		else
1764 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1765 		break;
1766 	case IFM_10_T:
1767 		adapter->hw.mac.autoneg = FALSE;
1768 		adapter->hw.phy.autoneg_advertised = 0;
1769 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1770 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1771 		else
1772 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1773 		break;
1774 	default:
1775 		device_printf(adapter->dev, "Unsupported media type\n");
1776 	}
1777 
1778 	em_init_locked(adapter);
1779 	EM_CORE_UNLOCK(adapter);
1780 
1781 	return (0);
1782 }
1783 
1784 /*********************************************************************
1785  *
1786  *  This routine maps the mbufs to tx descriptors.
1787  *
1788  *  return 0 on success, positive on failure
1789  **********************************************************************/
1790 
1791 static int
1792 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1793 {
1794 	struct adapter		*adapter = txr->adapter;
1795 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1796 	bus_dmamap_t		map;
1797 	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1798 	struct e1000_tx_desc	*ctxd = NULL;
1799 	struct mbuf		*m_head;
1800 	struct ether_header	*eh;
1801 	struct ip		*ip = NULL;
1802 	struct tcphdr		*tp = NULL;
1803 	u32			txd_upper, txd_lower, txd_used, txd_saved;
1804 	int			ip_off, poff;
1805 	int			nsegs, i, j, first, last = 0;
1806 	int			error, do_tso, tso_desc = 0, remap = 1;
1807 
1808 retry:
1809 	m_head = *m_headp;
1810 	txd_upper = txd_lower = txd_used = txd_saved = 0;
1811 	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1812 	ip_off = poff = 0;
1813 
1814 	/*
1815 	 * Intel recommends entire IP/TCP header length reside in a single
1816 	 * buffer. If multiple descriptors are used to describe the IP and
1817 	 * TCP header, each descriptor should describe one or more
1818 	 * complete headers; descriptors referencing only parts of headers
1819 	 * are not supported. If all layer headers are not coalesced into
1820 	 * a single buffer, each buffer should not cross a 4KB boundary,
1821 	 * or be larger than the maximum read request size.
1822 	 * Controller also requires modifing IP/TCP header to make TSO work
1823 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1824 	 * IP/TCP header into a single buffer to meet the requirement of
1825 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1826 	 * which also has similiar restrictions.
1827 	 */
1828 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1829 		if (do_tso || (m_head->m_next != NULL &&
1830 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1831 			if (M_WRITABLE(*m_headp) == 0) {
1832 				m_head = m_dup(*m_headp, M_DONTWAIT);
1833 				m_freem(*m_headp);
1834 				if (m_head == NULL) {
1835 					*m_headp = NULL;
1836 					return (ENOBUFS);
1837 				}
1838 				*m_headp = m_head;
1839 			}
1840 		}
1841 		/*
1842 		 * XXX
1843 		 * Assume IPv4, we don't have TSO/checksum offload support
1844 		 * for IPv6 yet.
1845 		 */
1846 		ip_off = sizeof(struct ether_header);
1847 		m_head = m_pullup(m_head, ip_off);
1848 		if (m_head == NULL) {
1849 			*m_headp = NULL;
1850 			return (ENOBUFS);
1851 		}
1852 		eh = mtod(m_head, struct ether_header *);
1853 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1854 			ip_off = sizeof(struct ether_vlan_header);
1855 			m_head = m_pullup(m_head, ip_off);
1856 			if (m_head == NULL) {
1857 				*m_headp = NULL;
1858 				return (ENOBUFS);
1859 			}
1860 		}
1861 		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1862 		if (m_head == NULL) {
1863 			*m_headp = NULL;
1864 			return (ENOBUFS);
1865 		}
1866 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1867 		poff = ip_off + (ip->ip_hl << 2);
1868 		if (do_tso) {
1869 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1870 			if (m_head == NULL) {
1871 				*m_headp = NULL;
1872 				return (ENOBUFS);
1873 			}
1874 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1875 			/*
1876 			 * TSO workaround:
1877 			 *   pull 4 more bytes of data into it.
1878 			 */
1879 			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1880 			if (m_head == NULL) {
1881 				*m_headp = NULL;
1882 				return (ENOBUFS);
1883 			}
1884 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1885 			ip->ip_len = 0;
1886 			ip->ip_sum = 0;
1887 			/*
1888 			 * The pseudo TCP checksum does not include TCP payload
1889 			 * length so driver should recompute the checksum here
1890 			 * what hardware expect to see. This is adherence of
1891 			 * Microsoft's Large Send specification.
1892 			 */
1893 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1894 			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1895 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1896 		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1897 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1898 			if (m_head == NULL) {
1899 				*m_headp = NULL;
1900 				return (ENOBUFS);
1901 			}
1902 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1903 			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1904 			if (m_head == NULL) {
1905 				*m_headp = NULL;
1906 				return (ENOBUFS);
1907 			}
1908 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1909 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1910 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1911 			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1912 			if (m_head == NULL) {
1913 				*m_headp = NULL;
1914 				return (ENOBUFS);
1915 			}
1916 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1917 		}
1918 		*m_headp = m_head;
1919 	}
1920 
1921 	/*
1922 	 * Map the packet for DMA
1923 	 *
1924 	 * Capture the first descriptor index,
1925 	 * this descriptor will have the index
1926 	 * of the EOP which is the only one that
1927 	 * now gets a DONE bit writeback.
1928 	 */
1929 	first = txr->next_avail_desc;
1930 	tx_buffer = &txr->tx_buffers[first];
1931 	tx_buffer_mapped = tx_buffer;
1932 	map = tx_buffer->map;
1933 
1934 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1935 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1936 
1937 	/*
1938 	 * There are two types of errors we can (try) to handle:
1939 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1940 	 *   out of segments.  Defragment the mbuf chain and try again.
1941 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1942 	 *   at this point in time.  Defer sending and try again later.
1943 	 * All other errors, in particular EINVAL, are fatal and prevent the
1944 	 * mbuf chain from ever going through.  Drop it and report error.
1945 	 */
1946 	if (error == EFBIG && remap) {
1947 		struct mbuf *m;
1948 
1949 		m = m_defrag(*m_headp, M_DONTWAIT);
1950 		if (m == NULL) {
1951 			adapter->mbuf_alloc_failed++;
1952 			m_freem(*m_headp);
1953 			*m_headp = NULL;
1954 			return (ENOBUFS);
1955 		}
1956 		*m_headp = m;
1957 
1958 		/* Try it again, but only once */
1959 		remap = 0;
1960 		goto retry;
1961 	} else if (error == ENOMEM) {
1962 		adapter->no_tx_dma_setup++;
1963 		return (error);
1964 	} else if (error != 0) {
1965 		adapter->no_tx_dma_setup++;
1966 		m_freem(*m_headp);
1967 		*m_headp = NULL;
1968 		return (error);
1969 	}
1970 
1971 	/*
1972 	 * TSO Hardware workaround, if this packet is not
1973 	 * TSO, and is only a single descriptor long, and
1974 	 * it follows a TSO burst, then we need to add a
1975 	 * sentinel descriptor to prevent premature writeback.
1976 	 */
1977 	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1978 		if (nsegs == 1)
1979 			tso_desc = TRUE;
1980 		txr->tx_tso = FALSE;
1981 	}
1982 
1983         if (nsegs > (txr->tx_avail - 2)) {
1984                 txr->no_desc_avail++;
1985 		bus_dmamap_unload(txr->txtag, map);
1986 		return (ENOBUFS);
1987         }
1988 	m_head = *m_headp;
1989 
1990 	/* Do hardware assists */
1991 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1992 		em_tso_setup(txr, m_head, ip_off, ip, tp,
1993 		    &txd_upper, &txd_lower);
1994 		/* we need to make a final sentinel transmit desc */
1995 		tso_desc = TRUE;
1996 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1997 		em_transmit_checksum_setup(txr, m_head,
1998 		    ip_off, ip, &txd_upper, &txd_lower);
1999 
2000 	if (m_head->m_flags & M_VLANTAG) {
2001 		/* Set the vlan id. */
2002 		txd_upper |=
2003 		    (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2004                 /* Tell hardware to add tag */
2005                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2006         }
2007 
2008 	i = txr->next_avail_desc;
2009 
2010 	/* Set up our transmit descriptors */
2011 	for (j = 0; j < nsegs; j++) {
2012 		bus_size_t seg_len;
2013 		bus_addr_t seg_addr;
2014 
2015 		tx_buffer = &txr->tx_buffers[i];
2016 		ctxd = &txr->tx_base[i];
2017 		seg_addr = segs[j].ds_addr;
2018 		seg_len  = segs[j].ds_len;
2019 		/*
2020 		** TSO Workaround:
2021 		** If this is the last descriptor, we want to
2022 		** split it so we have a small final sentinel
2023 		*/
2024 		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2025 			seg_len -= 4;
2026 			ctxd->buffer_addr = htole64(seg_addr);
2027 			ctxd->lower.data = htole32(
2028 			adapter->txd_cmd | txd_lower | seg_len);
2029 			ctxd->upper.data =
2030 			    htole32(txd_upper);
2031 			if (++i == adapter->num_tx_desc)
2032 				i = 0;
2033 			/* Now make the sentinel */
2034 			++txd_used; /* using an extra txd */
2035 			ctxd = &txr->tx_base[i];
2036 			tx_buffer = &txr->tx_buffers[i];
2037 			ctxd->buffer_addr =
2038 			    htole64(seg_addr + seg_len);
2039 			ctxd->lower.data = htole32(
2040 			adapter->txd_cmd | txd_lower | 4);
2041 			ctxd->upper.data =
2042 			    htole32(txd_upper);
2043 			last = i;
2044 			if (++i == adapter->num_tx_desc)
2045 				i = 0;
2046 		} else {
2047 			ctxd->buffer_addr = htole64(seg_addr);
2048 			ctxd->lower.data = htole32(
2049 			adapter->txd_cmd | txd_lower | seg_len);
2050 			ctxd->upper.data =
2051 			    htole32(txd_upper);
2052 			last = i;
2053 			if (++i == adapter->num_tx_desc)
2054 				i = 0;
2055 		}
2056 		tx_buffer->m_head = NULL;
2057 		tx_buffer->next_eop = -1;
2058 	}
2059 
2060 	txr->next_avail_desc = i;
2061 	txr->tx_avail -= nsegs;
2062 	if (tso_desc) /* TSO used an extra for sentinel */
2063 		txr->tx_avail -= txd_used;
2064 
2065         tx_buffer->m_head = m_head;
2066 	/*
2067 	** Here we swap the map so the last descriptor,
2068 	** which gets the completion interrupt has the
2069 	** real map, and the first descriptor gets the
2070 	** unused map from this descriptor.
2071 	*/
2072 	tx_buffer_mapped->map = tx_buffer->map;
2073 	tx_buffer->map = map;
2074         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2075 
2076         /*
2077          * Last Descriptor of Packet
2078 	 * needs End Of Packet (EOP)
2079 	 * and Report Status (RS)
2080          */
2081         ctxd->lower.data |=
2082 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2083 	/*
2084 	 * Keep track in the first buffer which
2085 	 * descriptor will be written back
2086 	 */
2087 	tx_buffer = &txr->tx_buffers[first];
2088 	tx_buffer->next_eop = last;
2089 	/* Update the watchdog time early and often */
2090 	txr->watchdog_time = ticks;
2091 
2092 	/*
2093 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2094 	 * that this frame is available to transmit.
2095 	 */
2096 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2097 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2098 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2099 
2100 	return (0);
2101 }
2102 
2103 static void
2104 em_set_promisc(struct adapter *adapter)
2105 {
2106 	struct ifnet	*ifp = adapter->ifp;
2107 	u32		reg_rctl;
2108 
2109 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2110 
2111 	if (ifp->if_flags & IFF_PROMISC) {
2112 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2113 		/* Turn this on if you want to see bad packets */
2114 		if (em_debug_sbp)
2115 			reg_rctl |= E1000_RCTL_SBP;
2116 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2117 	} else if (ifp->if_flags & IFF_ALLMULTI) {
2118 		reg_rctl |= E1000_RCTL_MPE;
2119 		reg_rctl &= ~E1000_RCTL_UPE;
2120 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2121 	}
2122 }
2123 
2124 static void
2125 em_disable_promisc(struct adapter *adapter)
2126 {
2127 	u32	reg_rctl;
2128 
2129 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2130 
2131 	reg_rctl &=  (~E1000_RCTL_UPE);
2132 	reg_rctl &=  (~E1000_RCTL_MPE);
2133 	reg_rctl &=  (~E1000_RCTL_SBP);
2134 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2135 }
2136 
2137 
2138 /*********************************************************************
2139  *  Multicast Update
2140  *
2141  *  This routine is called whenever multicast address list is updated.
2142  *
2143  **********************************************************************/
2144 
2145 static void
2146 em_set_multi(struct adapter *adapter)
2147 {
2148 	struct ifnet	*ifp = adapter->ifp;
2149 	struct ifmultiaddr *ifma;
2150 	u32 reg_rctl = 0;
2151 	u8  *mta; /* Multicast array memory */
2152 	int mcnt = 0;
2153 
2154 	IOCTL_DEBUGOUT("em_set_multi: begin");
2155 
2156 	mta = adapter->mta;
2157 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2158 
2159 	if (adapter->hw.mac.type == e1000_82542 &&
2160 	    adapter->hw.revision_id == E1000_REVISION_2) {
2161 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2162 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2163 			e1000_pci_clear_mwi(&adapter->hw);
2164 		reg_rctl |= E1000_RCTL_RST;
2165 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2166 		msec_delay(5);
2167 	}
2168 
2169 #if __FreeBSD_version < 800000
2170 	IF_ADDR_LOCK(ifp);
2171 #else
2172 	if_maddr_rlock(ifp);
2173 #endif
2174 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2175 		if (ifma->ifma_addr->sa_family != AF_LINK)
2176 			continue;
2177 
2178 		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2179 			break;
2180 
2181 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2182 		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2183 		mcnt++;
2184 	}
2185 #if __FreeBSD_version < 800000
2186 	IF_ADDR_UNLOCK(ifp);
2187 #else
2188 	if_maddr_runlock(ifp);
2189 #endif
2190 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2191 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2192 		reg_rctl |= E1000_RCTL_MPE;
2193 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2194 	} else
2195 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2196 
2197 	if (adapter->hw.mac.type == e1000_82542 &&
2198 	    adapter->hw.revision_id == E1000_REVISION_2) {
2199 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2200 		reg_rctl &= ~E1000_RCTL_RST;
2201 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2202 		msec_delay(5);
2203 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2204 			e1000_pci_set_mwi(&adapter->hw);
2205 	}
2206 }
2207 
2208 
2209 /*********************************************************************
2210  *  Timer routine
2211  *
2212  *  This routine checks for link status and updates statistics.
2213  *
2214  **********************************************************************/
2215 
2216 static void
2217 em_local_timer(void *arg)
2218 {
2219 	struct adapter	*adapter = arg;
2220 	struct ifnet	*ifp = adapter->ifp;
2221 	struct tx_ring	*txr = adapter->tx_rings;
2222 	struct rx_ring	*rxr = adapter->rx_rings;
2223 	u32		trigger;
2224 
2225 	EM_CORE_LOCK_ASSERT(adapter);
2226 
2227 	em_update_link_status(adapter);
2228 	em_update_stats_counters(adapter);
2229 
2230 	/* Reset LAA into RAR[0] on 82571 */
2231 	if ((adapter->hw.mac.type == e1000_82571) &&
2232 	    e1000_get_laa_state_82571(&adapter->hw))
2233 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2234 
2235 	/* Mask to use in the irq trigger */
2236 	if (adapter->msix_mem)
2237 		trigger = rxr->ims; /* RX for 82574 */
2238 	else
2239 		trigger = E1000_ICS_RXDMT0;
2240 
2241 	/*
2242 	** Check on the state of the TX queue(s), this
2243 	** can be done without the lock because its RO
2244 	** and the HUNG state will be static if set.
2245 	*/
2246 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2247 		if ((txr->queue_status == EM_QUEUE_HUNG) &&
2248 		    (adapter->pause_frames == 0))
2249 			goto hung;
2250 		/* Schedule a TX tasklet if needed */
2251 		if (txr->tx_avail <= EM_MAX_SCATTER)
2252 			taskqueue_enqueue(txr->tq, &txr->tx_task);
2253 	}
2254 
2255 	adapter->pause_frames = 0;
2256 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2257 #ifndef DEVICE_POLLING
2258 	/* Trigger an RX interrupt to guarantee mbuf refresh */
2259 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2260 #endif
2261 	return;
2262 hung:
2263 	/* Looks like we're hung */
2264 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2265 	device_printf(adapter->dev,
2266 	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2267 	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2268 	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2269 	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2270 	    "Next TX to Clean = %d\n",
2271 	    txr->me, txr->tx_avail, txr->next_to_clean);
2272 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2273 	adapter->watchdog_events++;
2274 	adapter->pause_frames = 0;
2275 	em_init_locked(adapter);
2276 }
2277 
2278 
2279 static void
2280 em_update_link_status(struct adapter *adapter)
2281 {
2282 	struct e1000_hw *hw = &adapter->hw;
2283 	struct ifnet *ifp = adapter->ifp;
2284 	device_t dev = adapter->dev;
2285 	struct tx_ring *txr = adapter->tx_rings;
2286 	u32 link_check = 0;
2287 
2288 	/* Get the cached link value or read phy for real */
2289 	switch (hw->phy.media_type) {
2290 	case e1000_media_type_copper:
2291 		if (hw->mac.get_link_status) {
2292 			/* Do the work to read phy */
2293 			e1000_check_for_link(hw);
2294 			link_check = !hw->mac.get_link_status;
2295 			if (link_check) /* ESB2 fix */
2296 				e1000_cfg_on_link_up(hw);
2297 		} else
2298 			link_check = TRUE;
2299 		break;
2300 	case e1000_media_type_fiber:
2301 		e1000_check_for_link(hw);
2302 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2303                                  E1000_STATUS_LU);
2304 		break;
2305 	case e1000_media_type_internal_serdes:
2306 		e1000_check_for_link(hw);
2307 		link_check = adapter->hw.mac.serdes_has_link;
2308 		break;
2309 	default:
2310 	case e1000_media_type_unknown:
2311 		break;
2312 	}
2313 
2314 	/* Now check for a transition */
2315 	if (link_check && (adapter->link_active == 0)) {
2316 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2317 		    &adapter->link_duplex);
2318 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2319 		if ((adapter->link_speed != SPEED_1000) &&
2320 		    ((hw->mac.type == e1000_82571) ||
2321 		    (hw->mac.type == e1000_82572))) {
2322 			int tarc0;
2323 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2324 			tarc0 &= ~SPEED_MODE_BIT;
2325 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2326 		}
2327 		if (bootverbose)
2328 			device_printf(dev, "Link is up %d Mbps %s\n",
2329 			    adapter->link_speed,
2330 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2331 			    "Full Duplex" : "Half Duplex"));
2332 		adapter->link_active = 1;
2333 		adapter->smartspeed = 0;
2334 		ifp->if_baudrate = adapter->link_speed * 1000000;
2335 		if_link_state_change(ifp, LINK_STATE_UP);
2336 	} else if (!link_check && (adapter->link_active == 1)) {
2337 		ifp->if_baudrate = adapter->link_speed = 0;
2338 		adapter->link_duplex = 0;
2339 		if (bootverbose)
2340 			device_printf(dev, "Link is Down\n");
2341 		adapter->link_active = 0;
2342 		/* Link down, disable watchdog */
2343 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2344 			txr->queue_status = EM_QUEUE_IDLE;
2345 		if_link_state_change(ifp, LINK_STATE_DOWN);
2346 	}
2347 }
2348 
2349 /*********************************************************************
2350  *
2351  *  This routine disables all traffic on the adapter by issuing a
2352  *  global reset on the MAC and deallocates TX/RX buffers.
2353  *
2354  *  This routine should always be called with BOTH the CORE
2355  *  and TX locks.
2356  **********************************************************************/
2357 
2358 static void
2359 em_stop(void *arg)
2360 {
2361 	struct adapter	*adapter = arg;
2362 	struct ifnet	*ifp = adapter->ifp;
2363 	struct tx_ring	*txr = adapter->tx_rings;
2364 
2365 	EM_CORE_LOCK_ASSERT(adapter);
2366 
2367 	INIT_DEBUGOUT("em_stop: begin");
2368 
2369 	em_disable_intr(adapter);
2370 	callout_stop(&adapter->timer);
2371 
2372 	/* Tell the stack that the interface is no longer active */
2373 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2374 	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2375 
2376         /* Unarm watchdog timer. */
2377 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2378 		EM_TX_LOCK(txr);
2379 		txr->queue_status = EM_QUEUE_IDLE;
2380 		EM_TX_UNLOCK(txr);
2381 	}
2382 
2383 	e1000_reset_hw(&adapter->hw);
2384 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2385 
2386 	e1000_led_off(&adapter->hw);
2387 	e1000_cleanup_led(&adapter->hw);
2388 }
2389 
2390 
2391 /*********************************************************************
2392  *
2393  *  Determine hardware revision.
2394  *
2395  **********************************************************************/
2396 static void
2397 em_identify_hardware(struct adapter *adapter)
2398 {
2399 	device_t dev = adapter->dev;
2400 
2401 	/* Make sure our PCI config space has the necessary stuff set */
2402 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2403 	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2404 	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2405 		device_printf(dev, "Memory Access and/or Bus Master bits "
2406 		    "were not set!\n");
2407 		adapter->hw.bus.pci_cmd_word |=
2408 		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2409 		pci_write_config(dev, PCIR_COMMAND,
2410 		    adapter->hw.bus.pci_cmd_word, 2);
2411 	}
2412 
2413 	/* Save off the information about this board */
2414 	adapter->hw.vendor_id = pci_get_vendor(dev);
2415 	adapter->hw.device_id = pci_get_device(dev);
2416 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2417 	adapter->hw.subsystem_vendor_id =
2418 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2419 	adapter->hw.subsystem_device_id =
2420 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2421 
2422 	/* Do Shared Code Init and Setup */
2423 	if (e1000_set_mac_type(&adapter->hw)) {
2424 		device_printf(dev, "Setup init failure\n");
2425 		return;
2426 	}
2427 }
2428 
2429 static int
2430 em_allocate_pci_resources(struct adapter *adapter)
2431 {
2432 	device_t	dev = adapter->dev;
2433 	int		rid;
2434 
2435 	rid = PCIR_BAR(0);
2436 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2437 	    &rid, RF_ACTIVE);
2438 	if (adapter->memory == NULL) {
2439 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2440 		return (ENXIO);
2441 	}
2442 	adapter->osdep.mem_bus_space_tag =
2443 	    rman_get_bustag(adapter->memory);
2444 	adapter->osdep.mem_bus_space_handle =
2445 	    rman_get_bushandle(adapter->memory);
2446 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2447 
2448 	/* Default to a single queue */
2449 	adapter->num_queues = 1;
2450 
2451 	/*
2452 	 * Setup MSI/X or MSI if PCI Express
2453 	 */
2454 	adapter->msix = em_setup_msix(adapter);
2455 
2456 	adapter->hw.back = &adapter->osdep;
2457 
2458 	return (0);
2459 }
2460 
2461 /*********************************************************************
2462  *
2463  *  Setup the Legacy or MSI Interrupt handler
2464  *
2465  **********************************************************************/
2466 int
2467 em_allocate_legacy(struct adapter *adapter)
2468 {
2469 	device_t dev = adapter->dev;
2470 	struct tx_ring	*txr = adapter->tx_rings;
2471 	int error, rid = 0;
2472 
2473 	/* Manually turn off all interrupts */
2474 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2475 
2476 	if (adapter->msix == 1) /* using MSI */
2477 		rid = 1;
2478 	/* We allocate a single interrupt resource */
2479 	adapter->res = bus_alloc_resource_any(dev,
2480 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2481 	if (adapter->res == NULL) {
2482 		device_printf(dev, "Unable to allocate bus resource: "
2483 		    "interrupt\n");
2484 		return (ENXIO);
2485 	}
2486 
2487 	/*
2488 	 * Allocate a fast interrupt and the associated
2489 	 * deferred processing contexts.
2490 	 */
2491 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2492 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2493 	    taskqueue_thread_enqueue, &adapter->tq);
2494 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2495 	    device_get_nameunit(adapter->dev));
2496 	/* Use a TX only tasklet for local timer */
2497 	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2498 	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2499 	    taskqueue_thread_enqueue, &txr->tq);
2500 	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2501 	    device_get_nameunit(adapter->dev));
2502 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2503 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2504 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2505 		device_printf(dev, "Failed to register fast interrupt "
2506 			    "handler: %d\n", error);
2507 		taskqueue_free(adapter->tq);
2508 		adapter->tq = NULL;
2509 		return (error);
2510 	}
2511 
2512 	return (0);
2513 }
2514 
2515 /*********************************************************************
2516  *
2517  *  Setup the MSIX Interrupt handlers
2518  *   This is not really Multiqueue, rather
2519  *   its just seperate interrupt vectors
2520  *   for TX, RX, and Link.
2521  *
2522  **********************************************************************/
2523 int
2524 em_allocate_msix(struct adapter *adapter)
2525 {
2526 	device_t	dev = adapter->dev;
2527 	struct		tx_ring *txr = adapter->tx_rings;
2528 	struct		rx_ring *rxr = adapter->rx_rings;
2529 	int		error, rid, vector = 0;
2530 
2531 
2532 	/* Make sure all interrupts are disabled */
2533 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2534 
2535 	/* First set up ring resources */
2536 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2537 
2538 		/* RX ring */
2539 		rid = vector + 1;
2540 
2541 		rxr->res = bus_alloc_resource_any(dev,
2542 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2543 		if (rxr->res == NULL) {
2544 			device_printf(dev,
2545 			    "Unable to allocate bus resource: "
2546 			    "RX MSIX Interrupt %d\n", i);
2547 			return (ENXIO);
2548 		}
2549 		if ((error = bus_setup_intr(dev, rxr->res,
2550 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2551 		    rxr, &rxr->tag)) != 0) {
2552 			device_printf(dev, "Failed to register RX handler");
2553 			return (error);
2554 		}
2555 #if __FreeBSD_version >= 800504
2556 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2557 #endif
2558 		rxr->msix = vector++; /* NOTE increment vector for TX */
2559 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2560 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2561 		    taskqueue_thread_enqueue, &rxr->tq);
2562 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2563 		    device_get_nameunit(adapter->dev));
2564 		/*
2565 		** Set the bit to enable interrupt
2566 		** in E1000_IMS -- bits 20 and 21
2567 		** are for RX0 and RX1, note this has
2568 		** NOTHING to do with the MSIX vector
2569 		*/
2570 		rxr->ims = 1 << (20 + i);
2571 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2572 
2573 		/* TX ring */
2574 		rid = vector + 1;
2575 		txr->res = bus_alloc_resource_any(dev,
2576 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2577 		if (txr->res == NULL) {
2578 			device_printf(dev,
2579 			    "Unable to allocate bus resource: "
2580 			    "TX MSIX Interrupt %d\n", i);
2581 			return (ENXIO);
2582 		}
2583 		if ((error = bus_setup_intr(dev, txr->res,
2584 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2585 		    txr, &txr->tag)) != 0) {
2586 			device_printf(dev, "Failed to register TX handler");
2587 			return (error);
2588 		}
2589 #if __FreeBSD_version >= 800504
2590 		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2591 #endif
2592 		txr->msix = vector++; /* Increment vector for next pass */
2593 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2594 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2595 		    taskqueue_thread_enqueue, &txr->tq);
2596 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2597 		    device_get_nameunit(adapter->dev));
2598 		/*
2599 		** Set the bit to enable interrupt
2600 		** in E1000_IMS -- bits 22 and 23
2601 		** are for TX0 and TX1, note this has
2602 		** NOTHING to do with the MSIX vector
2603 		*/
2604 		txr->ims = 1 << (22 + i);
2605 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2606 	}
2607 
2608 	/* Link interrupt */
2609 	++rid;
2610 	adapter->res = bus_alloc_resource_any(dev,
2611 	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2612 	if (!adapter->res) {
2613 		device_printf(dev,"Unable to allocate "
2614 		    "bus resource: Link interrupt [%d]\n", rid);
2615 		return (ENXIO);
2616         }
2617 	/* Set the link handler function */
2618 	error = bus_setup_intr(dev, adapter->res,
2619 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2620 	    em_msix_link, adapter, &adapter->tag);
2621 	if (error) {
2622 		adapter->res = NULL;
2623 		device_printf(dev, "Failed to register LINK handler");
2624 		return (error);
2625 	}
2626 #if __FreeBSD_version >= 800504
2627 		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2628 #endif
2629 	adapter->linkvec = vector;
2630 	adapter->ivars |=  (8 | vector) << 16;
2631 	adapter->ivars |= 0x80000000;
2632 
2633 	return (0);
2634 }
2635 
2636 
2637 static void
2638 em_free_pci_resources(struct adapter *adapter)
2639 {
2640 	device_t	dev = adapter->dev;
2641 	struct tx_ring	*txr;
2642 	struct rx_ring	*rxr;
2643 	int		rid;
2644 
2645 
2646 	/*
2647 	** Release all the queue interrupt resources:
2648 	*/
2649 	for (int i = 0; i < adapter->num_queues; i++) {
2650 		txr = &adapter->tx_rings[i];
2651 		rxr = &adapter->rx_rings[i];
2652 		/* an early abort? */
2653 		if ((txr == NULL) || (rxr == NULL))
2654 			break;
2655 		rid = txr->msix +1;
2656 		if (txr->tag != NULL) {
2657 			bus_teardown_intr(dev, txr->res, txr->tag);
2658 			txr->tag = NULL;
2659 		}
2660 		if (txr->res != NULL)
2661 			bus_release_resource(dev, SYS_RES_IRQ,
2662 			    rid, txr->res);
2663 		rid = rxr->msix +1;
2664 		if (rxr->tag != NULL) {
2665 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2666 			rxr->tag = NULL;
2667 		}
2668 		if (rxr->res != NULL)
2669 			bus_release_resource(dev, SYS_RES_IRQ,
2670 			    rid, rxr->res);
2671 	}
2672 
2673         if (adapter->linkvec) /* we are doing MSIX */
2674                 rid = adapter->linkvec + 1;
2675         else
2676                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2677 
2678 	if (adapter->tag != NULL) {
2679 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2680 		adapter->tag = NULL;
2681 	}
2682 
2683 	if (adapter->res != NULL)
2684 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2685 
2686 
2687 	if (adapter->msix)
2688 		pci_release_msi(dev);
2689 
2690 	if (adapter->msix_mem != NULL)
2691 		bus_release_resource(dev, SYS_RES_MEMORY,
2692 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2693 
2694 	if (adapter->memory != NULL)
2695 		bus_release_resource(dev, SYS_RES_MEMORY,
2696 		    PCIR_BAR(0), adapter->memory);
2697 
2698 	if (adapter->flash != NULL)
2699 		bus_release_resource(dev, SYS_RES_MEMORY,
2700 		    EM_FLASH, adapter->flash);
2701 }
2702 
2703 /*
2704  * Setup MSI or MSI/X
2705  */
2706 static int
2707 em_setup_msix(struct adapter *adapter)
2708 {
2709 	device_t dev = adapter->dev;
2710 	int val = 0;
2711 
2712 	/*
2713 	** Setup MSI/X for Hartwell: tests have shown
2714 	** use of two queues to be unstable, and to
2715 	** provide no great gain anyway, so we simply
2716 	** seperate the interrupts and use a single queue.
2717 	*/
2718 	if ((adapter->hw.mac.type == e1000_82574) &&
2719 	    (em_enable_msix == TRUE)) {
2720 		/* Map the MSIX BAR */
2721 		int rid = PCIR_BAR(EM_MSIX_BAR);
2722 		adapter->msix_mem = bus_alloc_resource_any(dev,
2723 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2724        		if (!adapter->msix_mem) {
2725 			/* May not be enabled */
2726                		device_printf(adapter->dev,
2727 			    "Unable to map MSIX table \n");
2728 			goto msi;
2729        		}
2730 		val = pci_msix_count(dev);
2731 		/* We only need 3 vectors */
2732 		if (val > 3)
2733 			val = 3;
2734 		if ((val != 3) && (val != 5)) {
2735 			bus_release_resource(dev, SYS_RES_MEMORY,
2736 			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2737 			adapter->msix_mem = NULL;
2738                		device_printf(adapter->dev,
2739 			    "MSIX: incorrect vectors, using MSI\n");
2740 			goto msi;
2741 		}
2742 
2743 		if (pci_alloc_msix(dev, &val) == 0) {
2744 			device_printf(adapter->dev,
2745 			    "Using MSIX interrupts "
2746 			    "with %d vectors\n", val);
2747 		}
2748 
2749 		return (val);
2750 	}
2751 msi:
2752        	val = pci_msi_count(dev);
2753        	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2754                	adapter->msix = 1;
2755                	device_printf(adapter->dev,"Using an MSI interrupt\n");
2756 		return (val);
2757 	}
2758 	/* Should only happen due to manual configuration */
2759 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2760 	return (0);
2761 }
2762 
2763 
2764 /*********************************************************************
2765  *
2766  *  Initialize the hardware to a configuration
2767  *  as specified by the adapter structure.
2768  *
2769  **********************************************************************/
2770 static void
2771 em_reset(struct adapter *adapter)
2772 {
2773 	device_t	dev = adapter->dev;
2774 	struct ifnet	*ifp = adapter->ifp;
2775 	struct e1000_hw	*hw = &adapter->hw;
2776 	u16		rx_buffer_size;
2777 	u32		pba;
2778 
2779 	INIT_DEBUGOUT("em_reset: begin");
2780 
2781 	/* Set up smart power down as default off on newer adapters. */
2782 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2783 	    hw->mac.type == e1000_82572)) {
2784 		u16 phy_tmp = 0;
2785 
2786 		/* Speed up time to link by disabling smart power down. */
2787 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2788 		phy_tmp &= ~IGP02E1000_PM_SPD;
2789 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2790 	}
2791 
2792 	/*
2793 	 * Packet Buffer Allocation (PBA)
2794 	 * Writing PBA sets the receive portion of the buffer
2795 	 * the remainder is used for the transmit buffer.
2796 	 */
2797 	switch (hw->mac.type) {
2798 	/* Total Packet Buffer on these is 48K */
2799 	case e1000_82571:
2800 	case e1000_82572:
2801 	case e1000_80003es2lan:
2802 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2803 		break;
2804 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2805 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2806 		break;
2807 	case e1000_82574:
2808 	case e1000_82583:
2809 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2810 		break;
2811 	case e1000_ich8lan:
2812 		pba = E1000_PBA_8K;
2813 		break;
2814 	case e1000_ich9lan:
2815 	case e1000_ich10lan:
2816 		/* Boost Receive side for jumbo frames */
2817 		if (adapter->max_frame_size > 4096)
2818 			pba = E1000_PBA_14K;
2819 		else
2820 			pba = E1000_PBA_10K;
2821 		break;
2822 	case e1000_pchlan:
2823 	case e1000_pch2lan:
2824 		pba = E1000_PBA_26K;
2825 		break;
2826 	default:
2827 		if (adapter->max_frame_size > 8192)
2828 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2829 		else
2830 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2831 	}
2832 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2833 
2834 	/*
2835 	 * These parameters control the automatic generation (Tx) and
2836 	 * response (Rx) to Ethernet PAUSE frames.
2837 	 * - High water mark should allow for at least two frames to be
2838 	 *   received after sending an XOFF.
2839 	 * - Low water mark works best when it is very near the high water mark.
2840 	 *   This allows the receiver to restart by sending XON when it has
2841 	 *   drained a bit. Here we use an arbitary value of 1500 which will
2842 	 *   restart after one full frame is pulled from the buffer. There
2843 	 *   could be several smaller frames in the buffer and if so they will
2844 	 *   not trigger the XON until their total number reduces the buffer
2845 	 *   by 1500.
2846 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2847 	 */
2848 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2849 	hw->fc.high_water = rx_buffer_size -
2850 	    roundup2(adapter->max_frame_size, 1024);
2851 	hw->fc.low_water = hw->fc.high_water - 1500;
2852 
2853 	if (adapter->fc) /* locally set flow control value? */
2854 		hw->fc.requested_mode = adapter->fc;
2855 	else
2856 		hw->fc.requested_mode = e1000_fc_full;
2857 
2858 	if (hw->mac.type == e1000_80003es2lan)
2859 		hw->fc.pause_time = 0xFFFF;
2860 	else
2861 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2862 
2863 	hw->fc.send_xon = TRUE;
2864 
2865 	/* Device specific overrides/settings */
2866 	switch (hw->mac.type) {
2867 	case e1000_pchlan:
2868 		/* Workaround: no TX flow ctrl for PCH */
2869                 hw->fc.requested_mode = e1000_fc_rx_pause;
2870 		hw->fc.pause_time = 0xFFFF; /* override */
2871 		if (ifp->if_mtu > ETHERMTU) {
2872 			hw->fc.high_water = 0x3500;
2873 			hw->fc.low_water = 0x1500;
2874 		} else {
2875 			hw->fc.high_water = 0x5000;
2876 			hw->fc.low_water = 0x3000;
2877 		}
2878 		hw->fc.refresh_time = 0x1000;
2879 		break;
2880 	case e1000_pch2lan:
2881 		hw->fc.high_water = 0x5C20;
2882 		hw->fc.low_water = 0x5048;
2883 		hw->fc.pause_time = 0x0650;
2884 		hw->fc.refresh_time = 0x0400;
2885 		/* Jumbos need adjusted PBA */
2886 		if (ifp->if_mtu > ETHERMTU)
2887 			E1000_WRITE_REG(hw, E1000_PBA, 12);
2888 		else
2889 			E1000_WRITE_REG(hw, E1000_PBA, 26);
2890 		break;
2891         case e1000_ich9lan:
2892         case e1000_ich10lan:
2893 		if (ifp->if_mtu > ETHERMTU) {
2894 			hw->fc.high_water = 0x2800;
2895 			hw->fc.low_water = hw->fc.high_water - 8;
2896 			break;
2897 		}
2898 		/* else fall thru */
2899 	default:
2900 		if (hw->mac.type == e1000_80003es2lan)
2901 			hw->fc.pause_time = 0xFFFF;
2902 		break;
2903 	}
2904 
2905 	/* Issue a global reset */
2906 	e1000_reset_hw(hw);
2907 	E1000_WRITE_REG(hw, E1000_WUC, 0);
2908 	em_disable_aspm(adapter);
2909 	/* and a re-init */
2910 	if (e1000_init_hw(hw) < 0) {
2911 		device_printf(dev, "Hardware Initialization Failed\n");
2912 		return;
2913 	}
2914 
2915 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2916 	e1000_get_phy_info(hw);
2917 	e1000_check_for_link(hw);
2918 	return;
2919 }
2920 
2921 /*********************************************************************
2922  *
2923  *  Setup networking device structure and register an interface.
2924  *
2925  **********************************************************************/
2926 static int
2927 em_setup_interface(device_t dev, struct adapter *adapter)
2928 {
2929 	struct ifnet   *ifp;
2930 
2931 	INIT_DEBUGOUT("em_setup_interface: begin");
2932 
2933 	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2934 	if (ifp == NULL) {
2935 		device_printf(dev, "can not allocate ifnet structure\n");
2936 		return (-1);
2937 	}
2938 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2939 	ifp->if_init =  em_init;
2940 	ifp->if_softc = adapter;
2941 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2942 	ifp->if_ioctl = em_ioctl;
2943 #ifdef EM_MULTIQUEUE
2944 	/* Multiqueue stack interface */
2945 	ifp->if_transmit = em_mq_start;
2946 	ifp->if_qflush = em_qflush;
2947 #else
2948 	ifp->if_start = em_start;
2949 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2950 	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2951 	IFQ_SET_READY(&ifp->if_snd);
2952 #endif
2953 
2954 	ether_ifattach(ifp, adapter->hw.mac.addr);
2955 
2956 	ifp->if_capabilities = ifp->if_capenable = 0;
2957 
2958 
2959 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2960 	ifp->if_capabilities |= IFCAP_TSO4;
2961 	/*
2962 	 * Tell the upper layer(s) we
2963 	 * support full VLAN capability
2964 	 */
2965 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2966 	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2967 			     |  IFCAP_VLAN_HWTSO
2968 			     |  IFCAP_VLAN_MTU;
2969 	ifp->if_capenable = ifp->if_capabilities;
2970 
2971 	/*
2972 	** Don't turn this on by default, if vlans are
2973 	** created on another pseudo device (eg. lagg)
2974 	** then vlan events are not passed thru, breaking
2975 	** operation, but with HW FILTER off it works. If
2976 	** using vlans directly on the em driver you can
2977 	** enable this and get full hardware tag filtering.
2978 	*/
2979 	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2980 
2981 #ifdef DEVICE_POLLING
2982 	ifp->if_capabilities |= IFCAP_POLLING;
2983 #endif
2984 
2985 	/* Enable only WOL MAGIC by default */
2986 	if (adapter->wol) {
2987 		ifp->if_capabilities |= IFCAP_WOL;
2988 		ifp->if_capenable |= IFCAP_WOL_MAGIC;
2989 	}
2990 
2991 	/*
2992 	 * Specify the media types supported by this adapter and register
2993 	 * callbacks to update media and link information
2994 	 */
2995 	ifmedia_init(&adapter->media, IFM_IMASK,
2996 	    em_media_change, em_media_status);
2997 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2998 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2999 		u_char fiber_type = IFM_1000_SX;	/* default type */
3000 
3001 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3002 			    0, NULL);
3003 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3004 	} else {
3005 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3006 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3007 			    0, NULL);
3008 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3009 			    0, NULL);
3010 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3011 			    0, NULL);
3012 		if (adapter->hw.phy.type != e1000_phy_ife) {
3013 			ifmedia_add(&adapter->media,
3014 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3015 			ifmedia_add(&adapter->media,
3016 				IFM_ETHER | IFM_1000_T, 0, NULL);
3017 		}
3018 	}
3019 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3020 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3021 	return (0);
3022 }
3023 
3024 
3025 /*
3026  * Manage DMA'able memory.
3027  */
3028 static void
3029 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3030 {
3031 	if (error)
3032 		return;
3033 	*(bus_addr_t *) arg = segs[0].ds_addr;
3034 }
3035 
3036 static int
3037 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3038         struct em_dma_alloc *dma, int mapflags)
3039 {
3040 	int error;
3041 
3042 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3043 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3044 				BUS_SPACE_MAXADDR,	/* lowaddr */
3045 				BUS_SPACE_MAXADDR,	/* highaddr */
3046 				NULL, NULL,		/* filter, filterarg */
3047 				size,			/* maxsize */
3048 				1,			/* nsegments */
3049 				size,			/* maxsegsize */
3050 				0,			/* flags */
3051 				NULL,			/* lockfunc */
3052 				NULL,			/* lockarg */
3053 				&dma->dma_tag);
3054 	if (error) {
3055 		device_printf(adapter->dev,
3056 		    "%s: bus_dma_tag_create failed: %d\n",
3057 		    __func__, error);
3058 		goto fail_0;
3059 	}
3060 
3061 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3062 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3063 	if (error) {
3064 		device_printf(adapter->dev,
3065 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3066 		    __func__, (uintmax_t)size, error);
3067 		goto fail_2;
3068 	}
3069 
3070 	dma->dma_paddr = 0;
3071 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3072 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3073 	if (error || dma->dma_paddr == 0) {
3074 		device_printf(adapter->dev,
3075 		    "%s: bus_dmamap_load failed: %d\n",
3076 		    __func__, error);
3077 		goto fail_3;
3078 	}
3079 
3080 	return (0);
3081 
3082 fail_3:
3083 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3084 fail_2:
3085 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3086 	bus_dma_tag_destroy(dma->dma_tag);
3087 fail_0:
3088 	dma->dma_map = NULL;
3089 	dma->dma_tag = NULL;
3090 
3091 	return (error);
3092 }
3093 
3094 static void
3095 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3096 {
3097 	if (dma->dma_tag == NULL)
3098 		return;
3099 	if (dma->dma_map != NULL) {
3100 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3101 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3102 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3103 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3104 		dma->dma_map = NULL;
3105 	}
3106 	bus_dma_tag_destroy(dma->dma_tag);
3107 	dma->dma_tag = NULL;
3108 }
3109 
3110 
3111 /*********************************************************************
3112  *
3113  *  Allocate memory for the transmit and receive rings, and then
3114  *  the descriptors associated with each, called only once at attach.
3115  *
3116  **********************************************************************/
3117 static int
3118 em_allocate_queues(struct adapter *adapter)
3119 {
3120 	device_t		dev = adapter->dev;
3121 	struct tx_ring		*txr = NULL;
3122 	struct rx_ring		*rxr = NULL;
3123 	int rsize, tsize, error = E1000_SUCCESS;
3124 	int txconf = 0, rxconf = 0;
3125 
3126 
3127 	/* Allocate the TX ring struct memory */
3128 	if (!(adapter->tx_rings =
3129 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3130 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3131 		device_printf(dev, "Unable to allocate TX ring memory\n");
3132 		error = ENOMEM;
3133 		goto fail;
3134 	}
3135 
3136 	/* Now allocate the RX */
3137 	if (!(adapter->rx_rings =
3138 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3139 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3140 		device_printf(dev, "Unable to allocate RX ring memory\n");
3141 		error = ENOMEM;
3142 		goto rx_fail;
3143 	}
3144 
3145 	tsize = roundup2(adapter->num_tx_desc *
3146 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3147 	/*
3148 	 * Now set up the TX queues, txconf is needed to handle the
3149 	 * possibility that things fail midcourse and we need to
3150 	 * undo memory gracefully
3151 	 */
3152 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3153 		/* Set up some basics */
3154 		txr = &adapter->tx_rings[i];
3155 		txr->adapter = adapter;
3156 		txr->me = i;
3157 
3158 		/* Initialize the TX lock */
3159 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3160 		    device_get_nameunit(dev), txr->me);
3161 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3162 
3163 		if (em_dma_malloc(adapter, tsize,
3164 			&txr->txdma, BUS_DMA_NOWAIT)) {
3165 			device_printf(dev,
3166 			    "Unable to allocate TX Descriptor memory\n");
3167 			error = ENOMEM;
3168 			goto err_tx_desc;
3169 		}
3170 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3171 		bzero((void *)txr->tx_base, tsize);
3172 
3173         	if (em_allocate_transmit_buffers(txr)) {
3174 			device_printf(dev,
3175 			    "Critical Failure setting up transmit buffers\n");
3176 			error = ENOMEM;
3177 			goto err_tx_desc;
3178         	}
3179 #if __FreeBSD_version >= 800000
3180 		/* Allocate a buf ring */
3181 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3182 		    M_WAITOK, &txr->tx_mtx);
3183 #endif
3184 	}
3185 
3186 	/*
3187 	 * Next the RX queues...
3188 	 */
3189 	rsize = roundup2(adapter->num_rx_desc *
3190 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3191 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3192 		rxr = &adapter->rx_rings[i];
3193 		rxr->adapter = adapter;
3194 		rxr->me = i;
3195 
3196 		/* Initialize the RX lock */
3197 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3198 		    device_get_nameunit(dev), txr->me);
3199 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3200 
3201 		if (em_dma_malloc(adapter, rsize,
3202 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3203 			device_printf(dev,
3204 			    "Unable to allocate RxDescriptor memory\n");
3205 			error = ENOMEM;
3206 			goto err_rx_desc;
3207 		}
3208 		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3209 		bzero((void *)rxr->rx_base, rsize);
3210 
3211         	/* Allocate receive buffers for the ring*/
3212 		if (em_allocate_receive_buffers(rxr)) {
3213 			device_printf(dev,
3214 			    "Critical Failure setting up receive buffers\n");
3215 			error = ENOMEM;
3216 			goto err_rx_desc;
3217 		}
3218 	}
3219 
3220 	return (0);
3221 
3222 err_rx_desc:
3223 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3224 		em_dma_free(adapter, &rxr->rxdma);
3225 err_tx_desc:
3226 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3227 		em_dma_free(adapter, &txr->txdma);
3228 	free(adapter->rx_rings, M_DEVBUF);
3229 rx_fail:
3230 #if __FreeBSD_version >= 800000
3231 	buf_ring_free(txr->br, M_DEVBUF);
3232 #endif
3233 	free(adapter->tx_rings, M_DEVBUF);
3234 fail:
3235 	return (error);
3236 }
3237 
3238 
3239 /*********************************************************************
3240  *
3241  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3242  *  the information needed to transmit a packet on the wire. This is
3243  *  called only once at attach, setup is done every reset.
3244  *
3245  **********************************************************************/
3246 static int
3247 em_allocate_transmit_buffers(struct tx_ring *txr)
3248 {
3249 	struct adapter *adapter = txr->adapter;
3250 	device_t dev = adapter->dev;
3251 	struct em_buffer *txbuf;
3252 	int error, i;
3253 
3254 	/*
3255 	 * Setup DMA descriptor areas.
3256 	 */
3257 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3258 			       1, 0,			/* alignment, bounds */
3259 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3260 			       BUS_SPACE_MAXADDR,	/* highaddr */
3261 			       NULL, NULL,		/* filter, filterarg */
3262 			       EM_TSO_SIZE,		/* maxsize */
3263 			       EM_MAX_SCATTER,		/* nsegments */
3264 			       PAGE_SIZE,		/* maxsegsize */
3265 			       0,			/* flags */
3266 			       NULL,			/* lockfunc */
3267 			       NULL,			/* lockfuncarg */
3268 			       &txr->txtag))) {
3269 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3270 		goto fail;
3271 	}
3272 
3273 	if (!(txr->tx_buffers =
3274 	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3275 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3276 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3277 		error = ENOMEM;
3278 		goto fail;
3279 	}
3280 
3281         /* Create the descriptor buffer dma maps */
3282 	txbuf = txr->tx_buffers;
3283 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3284 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3285 		if (error != 0) {
3286 			device_printf(dev, "Unable to create TX DMA map\n");
3287 			goto fail;
3288 		}
3289 	}
3290 
3291 	return 0;
3292 fail:
3293 	/* We free all, it handles case where we are in the middle */
3294 	em_free_transmit_structures(adapter);
3295 	return (error);
3296 }
3297 
3298 /*********************************************************************
3299  *
3300  *  Initialize a transmit ring.
3301  *
3302  **********************************************************************/
3303 static void
3304 em_setup_transmit_ring(struct tx_ring *txr)
3305 {
3306 	struct adapter *adapter = txr->adapter;
3307 	struct em_buffer *txbuf;
3308 	int i;
3309 #ifdef DEV_NETMAP
3310 	struct netmap_adapter *na = NA(adapter->ifp);
3311 	struct netmap_slot *slot;
3312 #endif /* DEV_NETMAP */
3313 
3314 	/* Clear the old descriptor contents */
3315 	EM_TX_LOCK(txr);
3316 #ifdef DEV_NETMAP
3317 	slot = netmap_reset(na, NR_TX, txr->me, 0);
3318 #endif /* DEV_NETMAP */
3319 
3320 	bzero((void *)txr->tx_base,
3321 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3322 	/* Reset indices */
3323 	txr->next_avail_desc = 0;
3324 	txr->next_to_clean = 0;
3325 
3326 	/* Free any existing tx buffers. */
3327         txbuf = txr->tx_buffers;
3328 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3329 		if (txbuf->m_head != NULL) {
3330 			bus_dmamap_sync(txr->txtag, txbuf->map,
3331 			    BUS_DMASYNC_POSTWRITE);
3332 			bus_dmamap_unload(txr->txtag, txbuf->map);
3333 			m_freem(txbuf->m_head);
3334 			txbuf->m_head = NULL;
3335 		}
3336 #ifdef DEV_NETMAP
3337 		if (slot) {
3338 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3339 			uint64_t paddr;
3340 			void *addr;
3341 
3342 			addr = PNMB(slot + si, &paddr);
3343 			txr->tx_base[i].buffer_addr = htole64(paddr);
3344 			/* reload the map for netmap mode */
3345 			netmap_load_map(txr->txtag, txbuf->map, addr);
3346 		}
3347 #endif /* DEV_NETMAP */
3348 
3349 		/* clear the watch index */
3350 		txbuf->next_eop = -1;
3351         }
3352 
3353 	/* Set number of descriptors available */
3354 	txr->tx_avail = adapter->num_tx_desc;
3355 	txr->queue_status = EM_QUEUE_IDLE;
3356 
3357 	/* Clear checksum offload context. */
3358 	txr->last_hw_offload = 0;
3359 	txr->last_hw_ipcss = 0;
3360 	txr->last_hw_ipcso = 0;
3361 	txr->last_hw_tucss = 0;
3362 	txr->last_hw_tucso = 0;
3363 
3364 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3365 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3366 	EM_TX_UNLOCK(txr);
3367 }
3368 
3369 /*********************************************************************
3370  *
3371  *  Initialize all transmit rings.
3372  *
3373  **********************************************************************/
3374 static void
3375 em_setup_transmit_structures(struct adapter *adapter)
3376 {
3377 	struct tx_ring *txr = adapter->tx_rings;
3378 
3379 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3380 		em_setup_transmit_ring(txr);
3381 
3382 	return;
3383 }
3384 
3385 /*********************************************************************
3386  *
3387  *  Enable transmit unit.
3388  *
3389  **********************************************************************/
3390 static void
3391 em_initialize_transmit_unit(struct adapter *adapter)
3392 {
3393 	struct tx_ring	*txr = adapter->tx_rings;
3394 	struct e1000_hw	*hw = &adapter->hw;
3395 	u32	tctl, tarc, tipg = 0;
3396 
3397 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3398 
3399 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3400 		u64 bus_addr = txr->txdma.dma_paddr;
3401 		/* Base and Len of TX Ring */
3402 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3403 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3404 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3405 	    	    (u32)(bus_addr >> 32));
3406 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3407 	    	    (u32)bus_addr);
3408 		/* Init the HEAD/TAIL indices */
3409 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3410 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3411 
3412 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3413 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3414 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3415 
3416 		txr->queue_status = EM_QUEUE_IDLE;
3417 	}
3418 
3419 	/* Set the default values for the Tx Inter Packet Gap timer */
3420 	switch (adapter->hw.mac.type) {
3421 	case e1000_80003es2lan:
3422 		tipg = DEFAULT_82543_TIPG_IPGR1;
3423 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3424 		    E1000_TIPG_IPGR2_SHIFT;
3425 		break;
3426 	default:
3427 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3428 		    (adapter->hw.phy.media_type ==
3429 		    e1000_media_type_internal_serdes))
3430 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3431 		else
3432 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3433 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3434 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3435 	}
3436 
3437 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3438 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3439 
3440 	if(adapter->hw.mac.type >= e1000_82540)
3441 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3442 		    adapter->tx_abs_int_delay.value);
3443 
3444 	if ((adapter->hw.mac.type == e1000_82571) ||
3445 	    (adapter->hw.mac.type == e1000_82572)) {
3446 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3447 		tarc |= SPEED_MODE_BIT;
3448 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3449 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3450 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3451 		tarc |= 1;
3452 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3453 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3454 		tarc |= 1;
3455 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3456 	}
3457 
3458 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3459 	if (adapter->tx_int_delay.value > 0)
3460 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3461 
3462 	/* Program the Transmit Control Register */
3463 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3464 	tctl &= ~E1000_TCTL_CT;
3465 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3466 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3467 
3468 	if (adapter->hw.mac.type >= e1000_82571)
3469 		tctl |= E1000_TCTL_MULR;
3470 
3471 	/* This write will effectively turn on the transmit unit. */
3472 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3473 
3474 }
3475 
3476 
3477 /*********************************************************************
3478  *
3479  *  Free all transmit rings.
3480  *
3481  **********************************************************************/
3482 static void
3483 em_free_transmit_structures(struct adapter *adapter)
3484 {
3485 	struct tx_ring *txr = adapter->tx_rings;
3486 
3487 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3488 		EM_TX_LOCK(txr);
3489 		em_free_transmit_buffers(txr);
3490 		em_dma_free(adapter, &txr->txdma);
3491 		EM_TX_UNLOCK(txr);
3492 		EM_TX_LOCK_DESTROY(txr);
3493 	}
3494 
3495 	free(adapter->tx_rings, M_DEVBUF);
3496 }
3497 
3498 /*********************************************************************
3499  *
3500  *  Free transmit ring related data structures.
3501  *
3502  **********************************************************************/
3503 static void
3504 em_free_transmit_buffers(struct tx_ring *txr)
3505 {
3506 	struct adapter		*adapter = txr->adapter;
3507 	struct em_buffer	*txbuf;
3508 
3509 	INIT_DEBUGOUT("free_transmit_ring: begin");
3510 
3511 	if (txr->tx_buffers == NULL)
3512 		return;
3513 
3514 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3515 		txbuf = &txr->tx_buffers[i];
3516 		if (txbuf->m_head != NULL) {
3517 			bus_dmamap_sync(txr->txtag, txbuf->map,
3518 			    BUS_DMASYNC_POSTWRITE);
3519 			bus_dmamap_unload(txr->txtag,
3520 			    txbuf->map);
3521 			m_freem(txbuf->m_head);
3522 			txbuf->m_head = NULL;
3523 			if (txbuf->map != NULL) {
3524 				bus_dmamap_destroy(txr->txtag,
3525 				    txbuf->map);
3526 				txbuf->map = NULL;
3527 			}
3528 		} else if (txbuf->map != NULL) {
3529 			bus_dmamap_unload(txr->txtag,
3530 			    txbuf->map);
3531 			bus_dmamap_destroy(txr->txtag,
3532 			    txbuf->map);
3533 			txbuf->map = NULL;
3534 		}
3535 	}
3536 #if __FreeBSD_version >= 800000
3537 	if (txr->br != NULL)
3538 		buf_ring_free(txr->br, M_DEVBUF);
3539 #endif
3540 	if (txr->tx_buffers != NULL) {
3541 		free(txr->tx_buffers, M_DEVBUF);
3542 		txr->tx_buffers = NULL;
3543 	}
3544 	if (txr->txtag != NULL) {
3545 		bus_dma_tag_destroy(txr->txtag);
3546 		txr->txtag = NULL;
3547 	}
3548 	return;
3549 }
3550 
3551 
3552 /*********************************************************************
3553  *  The offload context is protocol specific (TCP/UDP) and thus
3554  *  only needs to be set when the protocol changes. The occasion
3555  *  of a context change can be a performance detriment, and
3556  *  might be better just disabled. The reason arises in the way
3557  *  in which the controller supports pipelined requests from the
3558  *  Tx data DMA. Up to four requests can be pipelined, and they may
3559  *  belong to the same packet or to multiple packets. However all
3560  *  requests for one packet are issued before a request is issued
3561  *  for a subsequent packet and if a request for the next packet
3562  *  requires a context change, that request will be stalled
3563  *  until the previous request completes. This means setting up
3564  *  a new context effectively disables pipelined Tx data DMA which
3565  *  in turn greatly slow down performance to send small sized
3566  *  frames.
3567  **********************************************************************/
3568 static void
3569 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3570     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3571 {
3572 	struct adapter			*adapter = txr->adapter;
3573 	struct e1000_context_desc	*TXD = NULL;
3574 	struct em_buffer		*tx_buffer;
3575 	int				cur, hdr_len;
3576 	u32				cmd = 0;
3577 	u16				offload = 0;
3578 	u8				ipcso, ipcss, tucso, tucss;
3579 
3580 	ipcss = ipcso = tucss = tucso = 0;
3581 	hdr_len = ip_off + (ip->ip_hl << 2);
3582 	cur = txr->next_avail_desc;
3583 
3584 	/* Setup of IP header checksum. */
3585 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3586 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3587 		offload |= CSUM_IP;
3588 		ipcss = ip_off;
3589 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3590 		/*
3591 		 * Start offset for header checksum calculation.
3592 		 * End offset for header checksum calculation.
3593 		 * Offset of place to put the checksum.
3594 		 */
3595 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3596 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3597 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3598 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3599 		cmd |= E1000_TXD_CMD_IP;
3600 	}
3601 
3602 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3603  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3604  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3605  		offload |= CSUM_TCP;
3606  		tucss = hdr_len;
3607  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3608  		/*
3609  		 * Setting up new checksum offload context for every frames
3610  		 * takes a lot of processing time for hardware. This also
3611  		 * reduces performance a lot for small sized frames so avoid
3612  		 * it if driver can use previously configured checksum
3613  		 * offload context.
3614  		 */
3615  		if (txr->last_hw_offload == offload) {
3616  			if (offload & CSUM_IP) {
3617  				if (txr->last_hw_ipcss == ipcss &&
3618  				    txr->last_hw_ipcso == ipcso &&
3619  				    txr->last_hw_tucss == tucss &&
3620  				    txr->last_hw_tucso == tucso)
3621  					return;
3622  			} else {
3623  				if (txr->last_hw_tucss == tucss &&
3624  				    txr->last_hw_tucso == tucso)
3625  					return;
3626  			}
3627   		}
3628  		txr->last_hw_offload = offload;
3629  		txr->last_hw_tucss = tucss;
3630  		txr->last_hw_tucso = tucso;
3631  		/*
3632  		 * Start offset for payload checksum calculation.
3633  		 * End offset for payload checksum calculation.
3634  		 * Offset of place to put the checksum.
3635  		 */
3636 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3637  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3638  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3639  		TXD->upper_setup.tcp_fields.tucso = tucso;
3640  		cmd |= E1000_TXD_CMD_TCP;
3641  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3642  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3643  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3644  		tucss = hdr_len;
3645  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3646  		/*
3647  		 * Setting up new checksum offload context for every frames
3648  		 * takes a lot of processing time for hardware. This also
3649  		 * reduces performance a lot for small sized frames so avoid
3650  		 * it if driver can use previously configured checksum
3651  		 * offload context.
3652  		 */
3653  		if (txr->last_hw_offload == offload) {
3654  			if (offload & CSUM_IP) {
3655  				if (txr->last_hw_ipcss == ipcss &&
3656  				    txr->last_hw_ipcso == ipcso &&
3657  				    txr->last_hw_tucss == tucss &&
3658  				    txr->last_hw_tucso == tucso)
3659  					return;
3660  			} else {
3661  				if (txr->last_hw_tucss == tucss &&
3662  				    txr->last_hw_tucso == tucso)
3663  					return;
3664  			}
3665  		}
3666  		txr->last_hw_offload = offload;
3667  		txr->last_hw_tucss = tucss;
3668  		txr->last_hw_tucso = tucso;
3669  		/*
3670  		 * Start offset for header checksum calculation.
3671  		 * End offset for header checksum calculation.
3672  		 * Offset of place to put the checksum.
3673  		 */
3674 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3675  		TXD->upper_setup.tcp_fields.tucss = tucss;
3676  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3677  		TXD->upper_setup.tcp_fields.tucso = tucso;
3678   	}
3679 
3680  	if (offload & CSUM_IP) {
3681  		txr->last_hw_ipcss = ipcss;
3682  		txr->last_hw_ipcso = ipcso;
3683   	}
3684 
3685 	TXD->tcp_seg_setup.data = htole32(0);
3686 	TXD->cmd_and_length =
3687 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3688 	tx_buffer = &txr->tx_buffers[cur];
3689 	tx_buffer->m_head = NULL;
3690 	tx_buffer->next_eop = -1;
3691 
3692 	if (++cur == adapter->num_tx_desc)
3693 		cur = 0;
3694 
3695 	txr->tx_avail--;
3696 	txr->next_avail_desc = cur;
3697 }
3698 
3699 
3700 /**********************************************************************
3701  *
3702  *  Setup work for hardware segmentation offload (TSO)
3703  *
3704  **********************************************************************/
3705 static void
3706 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3707     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3708 {
3709 	struct adapter			*adapter = txr->adapter;
3710 	struct e1000_context_desc	*TXD;
3711 	struct em_buffer		*tx_buffer;
3712 	int cur, hdr_len;
3713 
3714 	/*
3715 	 * In theory we can use the same TSO context if and only if
3716 	 * frame is the same type(IP/TCP) and the same MSS. However
3717 	 * checking whether a frame has the same IP/TCP structure is
3718 	 * hard thing so just ignore that and always restablish a
3719 	 * new TSO context.
3720 	 */
3721 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3722 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3723 		      E1000_TXD_DTYP_D |	/* Data descr type */
3724 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3725 
3726 	/* IP and/or TCP header checksum calculation and insertion. */
3727 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3728 
3729 	cur = txr->next_avail_desc;
3730 	tx_buffer = &txr->tx_buffers[cur];
3731 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3732 
3733 	/*
3734 	 * Start offset for header checksum calculation.
3735 	 * End offset for header checksum calculation.
3736 	 * Offset of place put the checksum.
3737 	 */
3738 	TXD->lower_setup.ip_fields.ipcss = ip_off;
3739 	TXD->lower_setup.ip_fields.ipcse =
3740 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3741 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3742 	/*
3743 	 * Start offset for payload checksum calculation.
3744 	 * End offset for payload checksum calculation.
3745 	 * Offset of place to put the checksum.
3746 	 */
3747 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3748 	TXD->upper_setup.tcp_fields.tucse = 0;
3749 	TXD->upper_setup.tcp_fields.tucso =
3750 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3751 	/*
3752 	 * Payload size per packet w/o any headers.
3753 	 * Length of all headers up to payload.
3754 	 */
3755 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3756 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3757 
3758 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3759 				E1000_TXD_CMD_DEXT |	/* Extended descr */
3760 				E1000_TXD_CMD_TSE |	/* TSE context */
3761 				E1000_TXD_CMD_IP |	/* Do IP csum */
3762 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3763 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3764 
3765 	tx_buffer->m_head = NULL;
3766 	tx_buffer->next_eop = -1;
3767 
3768 	if (++cur == adapter->num_tx_desc)
3769 		cur = 0;
3770 
3771 	txr->tx_avail--;
3772 	txr->next_avail_desc = cur;
3773 	txr->tx_tso = TRUE;
3774 }
3775 
3776 
3777 /**********************************************************************
3778  *
3779  *  Examine each tx_buffer in the used queue. If the hardware is done
3780  *  processing the packet then free associated resources. The
3781  *  tx_buffer is put back on the free queue.
3782  *
3783  **********************************************************************/
3784 static void
3785 em_txeof(struct tx_ring *txr)
3786 {
3787 	struct adapter	*adapter = txr->adapter;
3788         int first, last, done, processed;
3789         struct em_buffer *tx_buffer;
3790         struct e1000_tx_desc   *tx_desc, *eop_desc;
3791 	struct ifnet   *ifp = adapter->ifp;
3792 
3793 	EM_TX_LOCK_ASSERT(txr);
3794 #ifdef DEV_NETMAP
3795 	if (ifp->if_capenable & IFCAP_NETMAP) {
3796 		struct netmap_adapter *na = NA(ifp);
3797 
3798 		selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3799 		EM_TX_UNLOCK(txr);
3800 		EM_CORE_LOCK(adapter);
3801 		selwakeuppri(&na->tx_si, PI_NET);
3802 		EM_CORE_UNLOCK(adapter);
3803 		EM_TX_LOCK(txr);
3804 		return;
3805 	}
3806 #endif /* DEV_NETMAP */
3807 
3808 	/* No work, make sure watchdog is off */
3809         if (txr->tx_avail == adapter->num_tx_desc) {
3810 		txr->queue_status = EM_QUEUE_IDLE;
3811                 return;
3812 	}
3813 
3814 	processed = 0;
3815         first = txr->next_to_clean;
3816         tx_desc = &txr->tx_base[first];
3817         tx_buffer = &txr->tx_buffers[first];
3818 	last = tx_buffer->next_eop;
3819         eop_desc = &txr->tx_base[last];
3820 
3821 	/*
3822 	 * What this does is get the index of the
3823 	 * first descriptor AFTER the EOP of the
3824 	 * first packet, that way we can do the
3825 	 * simple comparison on the inner while loop.
3826 	 */
3827 	if (++last == adapter->num_tx_desc)
3828  		last = 0;
3829 	done = last;
3830 
3831         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3832             BUS_DMASYNC_POSTREAD);
3833 
3834         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3835 		/* We clean the range of the packet */
3836 		while (first != done) {
3837                 	tx_desc->upper.data = 0;
3838                 	tx_desc->lower.data = 0;
3839                 	tx_desc->buffer_addr = 0;
3840                 	++txr->tx_avail;
3841 			++processed;
3842 
3843 			if (tx_buffer->m_head) {
3844 				bus_dmamap_sync(txr->txtag,
3845 				    tx_buffer->map,
3846 				    BUS_DMASYNC_POSTWRITE);
3847 				bus_dmamap_unload(txr->txtag,
3848 				    tx_buffer->map);
3849                         	m_freem(tx_buffer->m_head);
3850                         	tx_buffer->m_head = NULL;
3851                 	}
3852 			tx_buffer->next_eop = -1;
3853 			txr->watchdog_time = ticks;
3854 
3855 	                if (++first == adapter->num_tx_desc)
3856 				first = 0;
3857 
3858 	                tx_buffer = &txr->tx_buffers[first];
3859 			tx_desc = &txr->tx_base[first];
3860 		}
3861 		++ifp->if_opackets;
3862 		/* See if we can continue to the next packet */
3863 		last = tx_buffer->next_eop;
3864 		if (last != -1) {
3865         		eop_desc = &txr->tx_base[last];
3866 			/* Get new done point */
3867 			if (++last == adapter->num_tx_desc) last = 0;
3868 			done = last;
3869 		} else
3870 			break;
3871         }
3872         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3873             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3874 
3875         txr->next_to_clean = first;
3876 
3877 	/*
3878 	** Watchdog calculation, we know there's
3879 	** work outstanding or the first return
3880 	** would have been taken, so none processed
3881 	** for too long indicates a hang. local timer
3882 	** will examine this and do a reset if needed.
3883 	*/
3884 	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3885 		txr->queue_status = EM_QUEUE_HUNG;
3886 
3887         /*
3888          * If we have a minimum free, clear IFF_DRV_OACTIVE
3889          * to tell the stack that it is OK to send packets.
3890 	 * Notice that all writes of OACTIVE happen under the
3891 	 * TX lock which, with a single queue, guarantees
3892 	 * sanity.
3893          */
3894         if (txr->tx_avail >= EM_MAX_SCATTER)
3895 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3896 
3897 	/* Disable watchdog if all clean */
3898 	if (txr->tx_avail == adapter->num_tx_desc) {
3899 		txr->queue_status = EM_QUEUE_IDLE;
3900 	}
3901 }
3902 
3903 
3904 /*********************************************************************
3905  *
3906  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3907  *
3908  **********************************************************************/
3909 static void
3910 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3911 {
3912 	struct adapter		*adapter = rxr->adapter;
3913 	struct mbuf		*m;
3914 	bus_dma_segment_t	segs[1];
3915 	struct em_buffer	*rxbuf;
3916 	int			i, j, error, nsegs;
3917 	bool			cleaned = FALSE;
3918 
3919 	i = j = rxr->next_to_refresh;
3920 	/*
3921 	** Get one descriptor beyond
3922 	** our work mark to control
3923 	** the loop.
3924 	*/
3925 	if (++j == adapter->num_rx_desc)
3926 		j = 0;
3927 
3928 	while (j != limit) {
3929 		rxbuf = &rxr->rx_buffers[i];
3930 		if (rxbuf->m_head == NULL) {
3931 			m = m_getjcl(M_DONTWAIT, MT_DATA,
3932 			    M_PKTHDR, adapter->rx_mbuf_sz);
3933 			/*
3934 			** If we have a temporary resource shortage
3935 			** that causes a failure, just abort refresh
3936 			** for now, we will return to this point when
3937 			** reinvoked from em_rxeof.
3938 			*/
3939 			if (m == NULL)
3940 				goto update;
3941 		} else
3942 			m = rxbuf->m_head;
3943 
3944 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3945 		m->m_flags |= M_PKTHDR;
3946 		m->m_data = m->m_ext.ext_buf;
3947 
3948 		/* Use bus_dma machinery to setup the memory mapping  */
3949 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3950 		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3951 		if (error != 0) {
3952 			printf("Refresh mbufs: hdr dmamap load"
3953 			    " failure - %d\n", error);
3954 			m_free(m);
3955 			rxbuf->m_head = NULL;
3956 			goto update;
3957 		}
3958 		rxbuf->m_head = m;
3959 		bus_dmamap_sync(rxr->rxtag,
3960 		    rxbuf->map, BUS_DMASYNC_PREREAD);
3961 		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3962 		cleaned = TRUE;
3963 
3964 		i = j; /* Next is precalulated for us */
3965 		rxr->next_to_refresh = i;
3966 		/* Calculate next controlling index */
3967 		if (++j == adapter->num_rx_desc)
3968 			j = 0;
3969 	}
3970 update:
3971 	/*
3972 	** Update the tail pointer only if,
3973 	** and as far as we have refreshed.
3974 	*/
3975 	if (cleaned)
3976 		E1000_WRITE_REG(&adapter->hw,
3977 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
3978 
3979 	return;
3980 }
3981 
3982 
3983 /*********************************************************************
3984  *
3985  *  Allocate memory for rx_buffer structures. Since we use one
3986  *  rx_buffer per received packet, the maximum number of rx_buffer's
3987  *  that we'll need is equal to the number of receive descriptors
3988  *  that we've allocated.
3989  *
3990  **********************************************************************/
3991 static int
3992 em_allocate_receive_buffers(struct rx_ring *rxr)
3993 {
3994 	struct adapter		*adapter = rxr->adapter;
3995 	device_t		dev = adapter->dev;
3996 	struct em_buffer	*rxbuf;
3997 	int			error;
3998 
3999 	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4000 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4001 	if (rxr->rx_buffers == NULL) {
4002 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4003 		return (ENOMEM);
4004 	}
4005 
4006 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4007 				1, 0,			/* alignment, bounds */
4008 				BUS_SPACE_MAXADDR,	/* lowaddr */
4009 				BUS_SPACE_MAXADDR,	/* highaddr */
4010 				NULL, NULL,		/* filter, filterarg */
4011 				MJUM9BYTES,		/* maxsize */
4012 				1,			/* nsegments */
4013 				MJUM9BYTES,		/* maxsegsize */
4014 				0,			/* flags */
4015 				NULL,			/* lockfunc */
4016 				NULL,			/* lockarg */
4017 				&rxr->rxtag);
4018 	if (error) {
4019 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4020 		    __func__, error);
4021 		goto fail;
4022 	}
4023 
4024 	rxbuf = rxr->rx_buffers;
4025 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4026 		rxbuf = &rxr->rx_buffers[i];
4027 		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
4028 		    &rxbuf->map);
4029 		if (error) {
4030 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4031 			    __func__, error);
4032 			goto fail;
4033 		}
4034 	}
4035 
4036 	return (0);
4037 
4038 fail:
4039 	em_free_receive_structures(adapter);
4040 	return (error);
4041 }
4042 
4043 
4044 /*********************************************************************
4045  *
4046  *  Initialize a receive ring and its buffers.
4047  *
4048  **********************************************************************/
4049 static int
4050 em_setup_receive_ring(struct rx_ring *rxr)
4051 {
4052 	struct	adapter 	*adapter = rxr->adapter;
4053 	struct em_buffer	*rxbuf;
4054 	bus_dma_segment_t	seg[1];
4055 	int			rsize, nsegs, error = 0;
4056 #ifdef DEV_NETMAP
4057 	struct netmap_adapter *na = NA(adapter->ifp);
4058 	struct netmap_slot *slot;
4059 #endif
4060 
4061 
4062 	/* Clear the ring contents */
4063 	EM_RX_LOCK(rxr);
4064 	rsize = roundup2(adapter->num_rx_desc *
4065 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4066 	bzero((void *)rxr->rx_base, rsize);
4067 #ifdef DEV_NETMAP
4068 	slot = netmap_reset(na, NR_RX, 0, 0);
4069 #endif
4070 
4071 	/*
4072 	** Free current RX buffer structs and their mbufs
4073 	*/
4074 	for (int i = 0; i < adapter->num_rx_desc; i++) {
4075 		rxbuf = &rxr->rx_buffers[i];
4076 		if (rxbuf->m_head != NULL) {
4077 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4078 			    BUS_DMASYNC_POSTREAD);
4079 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4080 			m_freem(rxbuf->m_head);
4081 			rxbuf->m_head = NULL; /* mark as freed */
4082 		}
4083 	}
4084 
4085 	/* Now replenish the mbufs */
4086         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4087 		rxbuf = &rxr->rx_buffers[j];
4088 #ifdef DEV_NETMAP
4089 		if (slot) {
4090 			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4091 			uint64_t paddr;
4092 			void *addr;
4093 
4094 			addr = PNMB(slot + si, &paddr);
4095 			netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4096 			/* Update descriptor */
4097 			rxr->rx_base[j].buffer_addr = htole64(paddr);
4098 			continue;
4099 		}
4100 #endif /* DEV_NETMAP */
4101 		rxbuf->m_head = m_getjcl(M_DONTWAIT, MT_DATA,
4102 		    M_PKTHDR, adapter->rx_mbuf_sz);
4103 		if (rxbuf->m_head == NULL) {
4104 			error = ENOBUFS;
4105 			goto fail;
4106 		}
4107 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4108 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4109 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4110 
4111 		/* Get the memory mapping */
4112 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4113 		    rxbuf->map, rxbuf->m_head, seg,
4114 		    &nsegs, BUS_DMA_NOWAIT);
4115 		if (error != 0) {
4116 			m_freem(rxbuf->m_head);
4117 			rxbuf->m_head = NULL;
4118 			goto fail;
4119 		}
4120 		bus_dmamap_sync(rxr->rxtag,
4121 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4122 
4123 		/* Update descriptor */
4124 		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4125 	}
4126 	rxr->next_to_check = 0;
4127 	rxr->next_to_refresh = 0;
4128 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4129 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4130 
4131 fail:
4132 	EM_RX_UNLOCK(rxr);
4133 	return (error);
4134 }
4135 
4136 /*********************************************************************
4137  *
4138  *  Initialize all receive rings.
4139  *
4140  **********************************************************************/
4141 static int
4142 em_setup_receive_structures(struct adapter *adapter)
4143 {
4144 	struct rx_ring *rxr = adapter->rx_rings;
4145 	int q;
4146 
4147 	for (q = 0; q < adapter->num_queues; q++, rxr++)
4148 		if (em_setup_receive_ring(rxr))
4149 			goto fail;
4150 
4151 	return (0);
4152 fail:
4153 	/*
4154 	 * Free RX buffers allocated so far, we will only handle
4155 	 * the rings that completed, the failing case will have
4156 	 * cleaned up for itself. 'q' failed, so its the terminus.
4157 	 */
4158 	for (int i = 0; i < q; ++i) {
4159 		rxr = &adapter->rx_rings[i];
4160 		for (int n = 0; n < adapter->num_rx_desc; n++) {
4161 			struct em_buffer *rxbuf;
4162 			rxbuf = &rxr->rx_buffers[n];
4163 			if (rxbuf->m_head != NULL) {
4164 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4165 			  	  BUS_DMASYNC_POSTREAD);
4166 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4167 				m_freem(rxbuf->m_head);
4168 				rxbuf->m_head = NULL;
4169 			}
4170 		}
4171 		rxr->next_to_check = 0;
4172 		rxr->next_to_refresh = 0;
4173 	}
4174 
4175 	return (ENOBUFS);
4176 }
4177 
4178 /*********************************************************************
4179  *
4180  *  Free all receive rings.
4181  *
4182  **********************************************************************/
4183 static void
4184 em_free_receive_structures(struct adapter *adapter)
4185 {
4186 	struct rx_ring *rxr = adapter->rx_rings;
4187 
4188 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4189 		em_free_receive_buffers(rxr);
4190 		/* Free the ring memory as well */
4191 		em_dma_free(adapter, &rxr->rxdma);
4192 		EM_RX_LOCK_DESTROY(rxr);
4193 	}
4194 
4195 	free(adapter->rx_rings, M_DEVBUF);
4196 }
4197 
4198 
4199 /*********************************************************************
4200  *
4201  *  Free receive ring data structures
4202  *
4203  **********************************************************************/
4204 static void
4205 em_free_receive_buffers(struct rx_ring *rxr)
4206 {
4207 	struct adapter		*adapter = rxr->adapter;
4208 	struct em_buffer	*rxbuf = NULL;
4209 
4210 	INIT_DEBUGOUT("free_receive_buffers: begin");
4211 
4212 	if (rxr->rx_buffers != NULL) {
4213 		for (int i = 0; i < adapter->num_rx_desc; i++) {
4214 			rxbuf = &rxr->rx_buffers[i];
4215 			if (rxbuf->map != NULL) {
4216 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4217 				    BUS_DMASYNC_POSTREAD);
4218 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4219 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4220 			}
4221 			if (rxbuf->m_head != NULL) {
4222 				m_freem(rxbuf->m_head);
4223 				rxbuf->m_head = NULL;
4224 			}
4225 		}
4226 		free(rxr->rx_buffers, M_DEVBUF);
4227 		rxr->rx_buffers = NULL;
4228 		rxr->next_to_check = 0;
4229 		rxr->next_to_refresh = 0;
4230 	}
4231 
4232 	if (rxr->rxtag != NULL) {
4233 		bus_dma_tag_destroy(rxr->rxtag);
4234 		rxr->rxtag = NULL;
4235 	}
4236 
4237 	return;
4238 }
4239 
4240 
4241 /*********************************************************************
4242  *
4243  *  Enable receive unit.
4244  *
4245  **********************************************************************/
4246 #define MAX_INTS_PER_SEC	8000
4247 #define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
4248 
4249 static void
4250 em_initialize_receive_unit(struct adapter *adapter)
4251 {
4252 	struct rx_ring	*rxr = adapter->rx_rings;
4253 	struct ifnet	*ifp = adapter->ifp;
4254 	struct e1000_hw	*hw = &adapter->hw;
4255 	u64	bus_addr;
4256 	u32	rctl, rxcsum;
4257 
4258 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4259 
4260 	/*
4261 	 * Make sure receives are disabled while setting
4262 	 * up the descriptor ring
4263 	 */
4264 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4265 	/* Do not disable if ever enabled on this hardware */
4266 	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4267 		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4268 
4269 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4270 	    adapter->rx_abs_int_delay.value);
4271 	/*
4272 	 * Set the interrupt throttling rate. Value is calculated
4273 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4274 	 */
4275 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4276 
4277 	/*
4278 	** When using MSIX interrupts we need to throttle
4279 	** using the EITR register (82574 only)
4280 	*/
4281 	if (hw->mac.type == e1000_82574) {
4282 		for (int i = 0; i < 4; i++)
4283 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4284 			    DEFAULT_ITR);
4285 		/* Disable accelerated acknowledge */
4286 		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4287 	}
4288 
4289 	if (ifp->if_capenable & IFCAP_RXCSUM) {
4290 		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4291 		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4292 		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4293 	}
4294 
4295 	/*
4296 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4297 	** long latencies are observed, like Lenovo X60. This
4298 	** change eliminates the problem, but since having positive
4299 	** values in RDTR is a known source of problems on other
4300 	** platforms another solution is being sought.
4301 	*/
4302 	if (hw->mac.type == e1000_82573)
4303 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4304 
4305 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4306 		/* Setup the Base and Length of the Rx Descriptor Ring */
4307 		bus_addr = rxr->rxdma.dma_paddr;
4308 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4309 		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4310 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4311 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4312 		/* Setup the Head and Tail Descriptor Pointers */
4313 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4314 #ifdef DEV_NETMAP
4315 		/*
4316 		 * an init() while a netmap client is active must
4317 		 * preserve the rx buffers passed to userspace.
4318 		 * In this driver it means we adjust RDT to
4319 		 * something different from na->num_rx_desc - 1.
4320 		 */
4321 		if (ifp->if_capenable & IFCAP_NETMAP) {
4322 			struct netmap_adapter *na = NA(adapter->ifp);
4323 			struct netmap_kring *kring = &na->rx_rings[i];
4324 			int t = na->num_rx_desc - 1 - kring->nr_hwavail;
4325 
4326 			E1000_WRITE_REG(hw, E1000_RDT(i), t);
4327 		} else
4328 #endif /* DEV_NETMAP */
4329 		E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4330 	}
4331 
4332 	/* Set PTHRESH for improved jumbo performance */
4333 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4334 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4335 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4336 	    (ifp->if_mtu > ETHERMTU)) {
4337 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4338 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4339 	}
4340 
4341 	if (adapter->hw.mac.type == e1000_pch2lan) {
4342 		if (ifp->if_mtu > ETHERMTU)
4343 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4344 		else
4345 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4346 	}
4347 
4348 	/* Setup the Receive Control Register */
4349 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4350 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4351 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4352 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4353 
4354         /* Strip the CRC */
4355         rctl |= E1000_RCTL_SECRC;
4356 
4357         /* Make sure VLAN Filters are off */
4358         rctl &= ~E1000_RCTL_VFE;
4359 	rctl &= ~E1000_RCTL_SBP;
4360 
4361 	if (adapter->rx_mbuf_sz == MCLBYTES)
4362 		rctl |= E1000_RCTL_SZ_2048;
4363 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4364 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4365 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4366 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4367 
4368 	if (ifp->if_mtu > ETHERMTU)
4369 		rctl |= E1000_RCTL_LPE;
4370 	else
4371 		rctl &= ~E1000_RCTL_LPE;
4372 
4373 	/* Write out the settings */
4374 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4375 
4376 	return;
4377 }
4378 
4379 
4380 /*********************************************************************
4381  *
4382  *  This routine executes in interrupt context. It replenishes
4383  *  the mbufs in the descriptor and sends data which has been
4384  *  dma'ed into host memory to upper layer.
4385  *
4386  *  We loop at most count times if count is > 0, or until done if
4387  *  count < 0.
4388  *
4389  *  For polling we also now return the number of cleaned packets
4390  *********************************************************************/
4391 static bool
4392 em_rxeof(struct rx_ring *rxr, int count, int *done)
4393 {
4394 	struct adapter		*adapter = rxr->adapter;
4395 	struct ifnet		*ifp = adapter->ifp;
4396 	struct mbuf		*mp, *sendmp;
4397 	u8			status = 0;
4398 	u16 			len;
4399 	int			i, processed, rxdone = 0;
4400 	bool			eop;
4401 	struct e1000_rx_desc	*cur;
4402 
4403 	EM_RX_LOCK(rxr);
4404 
4405 #ifdef DEV_NETMAP
4406 	if (ifp->if_capenable & IFCAP_NETMAP) {
4407 		struct netmap_adapter *na = NA(ifp);
4408 
4409 		na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
4410 		selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4411 		EM_RX_UNLOCK(rxr);
4412 		EM_CORE_LOCK(adapter);
4413 		selwakeuppri(&na->rx_si, PI_NET);
4414 		EM_CORE_UNLOCK(adapter);
4415 		return (0);
4416 	}
4417 #endif /* DEV_NETMAP */
4418 
4419 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4420 
4421 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4422 			break;
4423 
4424 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4425 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4426 
4427 		cur = &rxr->rx_base[i];
4428 		status = cur->status;
4429 		mp = sendmp = NULL;
4430 
4431 		if ((status & E1000_RXD_STAT_DD) == 0)
4432 			break;
4433 
4434 		len = le16toh(cur->length);
4435 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4436 
4437 		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4438 		    (rxr->discard == TRUE)) {
4439 			ifp->if_ierrors++;
4440 			++rxr->rx_discarded;
4441 			if (!eop) /* Catch subsequent segs */
4442 				rxr->discard = TRUE;
4443 			else
4444 				rxr->discard = FALSE;
4445 			em_rx_discard(rxr, i);
4446 			goto next_desc;
4447 		}
4448 
4449 		/* Assign correct length to the current fragment */
4450 		mp = rxr->rx_buffers[i].m_head;
4451 		mp->m_len = len;
4452 
4453 		/* Trigger for refresh */
4454 		rxr->rx_buffers[i].m_head = NULL;
4455 
4456 		/* First segment? */
4457 		if (rxr->fmp == NULL) {
4458 			mp->m_pkthdr.len = len;
4459 			rxr->fmp = rxr->lmp = mp;
4460 		} else {
4461 			/* Chain mbuf's together */
4462 			mp->m_flags &= ~M_PKTHDR;
4463 			rxr->lmp->m_next = mp;
4464 			rxr->lmp = mp;
4465 			rxr->fmp->m_pkthdr.len += len;
4466 		}
4467 
4468 		if (eop) {
4469 			--count;
4470 			sendmp = rxr->fmp;
4471 			sendmp->m_pkthdr.rcvif = ifp;
4472 			ifp->if_ipackets++;
4473 			em_receive_checksum(cur, sendmp);
4474 #ifndef __NO_STRICT_ALIGNMENT
4475 			if (adapter->max_frame_size >
4476 			    (MCLBYTES - ETHER_ALIGN) &&
4477 			    em_fixup_rx(rxr) != 0)
4478 				goto skip;
4479 #endif
4480 			if (status & E1000_RXD_STAT_VP) {
4481 				sendmp->m_pkthdr.ether_vtag =
4482 				    le16toh(cur->special);
4483 				sendmp->m_flags |= M_VLANTAG;
4484 			}
4485 #ifndef __NO_STRICT_ALIGNMENT
4486 skip:
4487 #endif
4488 			rxr->fmp = rxr->lmp = NULL;
4489 		}
4490 next_desc:
4491 		/* Zero out the receive descriptors status. */
4492 		cur->status = 0;
4493 		++rxdone;	/* cumulative for POLL */
4494 		++processed;
4495 
4496 		/* Advance our pointers to the next descriptor. */
4497 		if (++i == adapter->num_rx_desc)
4498 			i = 0;
4499 
4500 		/* Send to the stack */
4501 		if (sendmp != NULL) {
4502 			rxr->next_to_check = i;
4503 			EM_RX_UNLOCK(rxr);
4504 			(*ifp->if_input)(ifp, sendmp);
4505 			EM_RX_LOCK(rxr);
4506 			i = rxr->next_to_check;
4507 		}
4508 
4509 		/* Only refresh mbufs every 8 descriptors */
4510 		if (processed == 8) {
4511 			em_refresh_mbufs(rxr, i);
4512 			processed = 0;
4513 		}
4514 	}
4515 
4516 	/* Catch any remaining refresh work */
4517 	if (e1000_rx_unrefreshed(rxr))
4518 		em_refresh_mbufs(rxr, i);
4519 
4520 	rxr->next_to_check = i;
4521 	if (done != NULL)
4522 		*done = rxdone;
4523 	EM_RX_UNLOCK(rxr);
4524 
4525 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4526 }
4527 
4528 static __inline void
4529 em_rx_discard(struct rx_ring *rxr, int i)
4530 {
4531 	struct em_buffer	*rbuf;
4532 
4533 	rbuf = &rxr->rx_buffers[i];
4534 	/* Free any previous pieces */
4535 	if (rxr->fmp != NULL) {
4536 		rxr->fmp->m_flags |= M_PKTHDR;
4537 		m_freem(rxr->fmp);
4538 		rxr->fmp = NULL;
4539 		rxr->lmp = NULL;
4540 	}
4541 	/*
4542 	** Free buffer and allow em_refresh_mbufs()
4543 	** to clean up and recharge buffer.
4544 	*/
4545 	if (rbuf->m_head) {
4546 		m_free(rbuf->m_head);
4547 		rbuf->m_head = NULL;
4548 	}
4549 	return;
4550 }
4551 
4552 #ifndef __NO_STRICT_ALIGNMENT
4553 /*
4554  * When jumbo frames are enabled we should realign entire payload on
4555  * architecures with strict alignment. This is serious design mistake of 8254x
4556  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4557  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4558  * payload. On architecures without strict alignment restrictions 8254x still
4559  * performs unaligned memory access which would reduce the performance too.
4560  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4561  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4562  * existing mbuf chain.
4563  *
4564  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4565  * not used at all on architectures with strict alignment.
4566  */
4567 static int
4568 em_fixup_rx(struct rx_ring *rxr)
4569 {
4570 	struct adapter *adapter = rxr->adapter;
4571 	struct mbuf *m, *n;
4572 	int error;
4573 
4574 	error = 0;
4575 	m = rxr->fmp;
4576 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4577 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4578 		m->m_data += ETHER_HDR_LEN;
4579 	} else {
4580 		MGETHDR(n, M_DONTWAIT, MT_DATA);
4581 		if (n != NULL) {
4582 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4583 			m->m_data += ETHER_HDR_LEN;
4584 			m->m_len -= ETHER_HDR_LEN;
4585 			n->m_len = ETHER_HDR_LEN;
4586 			M_MOVE_PKTHDR(n, m);
4587 			n->m_next = m;
4588 			rxr->fmp = n;
4589 		} else {
4590 			adapter->dropped_pkts++;
4591 			m_freem(rxr->fmp);
4592 			rxr->fmp = NULL;
4593 			error = ENOMEM;
4594 		}
4595 	}
4596 
4597 	return (error);
4598 }
4599 #endif
4600 
4601 /*********************************************************************
4602  *
4603  *  Verify that the hardware indicated that the checksum is valid.
4604  *  Inform the stack about the status of checksum so that stack
4605  *  doesn't spend time verifying the checksum.
4606  *
4607  *********************************************************************/
4608 static void
4609 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4610 {
4611 	/* Ignore Checksum bit is set */
4612 	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4613 		mp->m_pkthdr.csum_flags = 0;
4614 		return;
4615 	}
4616 
4617 	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4618 		/* Did it pass? */
4619 		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4620 			/* IP Checksum Good */
4621 			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4622 			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4623 
4624 		} else {
4625 			mp->m_pkthdr.csum_flags = 0;
4626 		}
4627 	}
4628 
4629 	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4630 		/* Did it pass? */
4631 		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4632 			mp->m_pkthdr.csum_flags |=
4633 			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4634 			mp->m_pkthdr.csum_data = htons(0xffff);
4635 		}
4636 	}
4637 }
4638 
4639 /*
4640  * This routine is run via an vlan
4641  * config EVENT
4642  */
4643 static void
4644 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4645 {
4646 	struct adapter	*adapter = ifp->if_softc;
4647 	u32		index, bit;
4648 
4649 	if (ifp->if_softc !=  arg)   /* Not our event */
4650 		return;
4651 
4652 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4653                 return;
4654 
4655 	EM_CORE_LOCK(adapter);
4656 	index = (vtag >> 5) & 0x7F;
4657 	bit = vtag & 0x1F;
4658 	adapter->shadow_vfta[index] |= (1 << bit);
4659 	++adapter->num_vlans;
4660 	/* Re-init to load the changes */
4661 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4662 		em_init_locked(adapter);
4663 	EM_CORE_UNLOCK(adapter);
4664 }
4665 
4666 /*
4667  * This routine is run via an vlan
4668  * unconfig EVENT
4669  */
4670 static void
4671 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4672 {
4673 	struct adapter	*adapter = ifp->if_softc;
4674 	u32		index, bit;
4675 
4676 	if (ifp->if_softc !=  arg)
4677 		return;
4678 
4679 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4680                 return;
4681 
4682 	EM_CORE_LOCK(adapter);
4683 	index = (vtag >> 5) & 0x7F;
4684 	bit = vtag & 0x1F;
4685 	adapter->shadow_vfta[index] &= ~(1 << bit);
4686 	--adapter->num_vlans;
4687 	/* Re-init to load the changes */
4688 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4689 		em_init_locked(adapter);
4690 	EM_CORE_UNLOCK(adapter);
4691 }
4692 
4693 static void
4694 em_setup_vlan_hw_support(struct adapter *adapter)
4695 {
4696 	struct e1000_hw *hw = &adapter->hw;
4697 	u32             reg;
4698 
4699 	/*
4700 	** We get here thru init_locked, meaning
4701 	** a soft reset, this has already cleared
4702 	** the VFTA and other state, so if there
4703 	** have been no vlan's registered do nothing.
4704 	*/
4705 	if (adapter->num_vlans == 0)
4706                 return;
4707 
4708 	/*
4709 	** A soft reset zero's out the VFTA, so
4710 	** we need to repopulate it now.
4711 	*/
4712 	for (int i = 0; i < EM_VFTA_SIZE; i++)
4713                 if (adapter->shadow_vfta[i] != 0)
4714 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4715                             i, adapter->shadow_vfta[i]);
4716 
4717 	reg = E1000_READ_REG(hw, E1000_CTRL);
4718 	reg |= E1000_CTRL_VME;
4719 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4720 
4721 	/* Enable the Filter Table */
4722 	reg = E1000_READ_REG(hw, E1000_RCTL);
4723 	reg &= ~E1000_RCTL_CFIEN;
4724 	reg |= E1000_RCTL_VFE;
4725 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4726 }
4727 
4728 static void
4729 em_enable_intr(struct adapter *adapter)
4730 {
4731 	struct e1000_hw *hw = &adapter->hw;
4732 	u32 ims_mask = IMS_ENABLE_MASK;
4733 
4734 	if (hw->mac.type == e1000_82574) {
4735 		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4736 		ims_mask |= EM_MSIX_MASK;
4737 	}
4738 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4739 }
4740 
4741 static void
4742 em_disable_intr(struct adapter *adapter)
4743 {
4744 	struct e1000_hw *hw = &adapter->hw;
4745 
4746 	if (hw->mac.type == e1000_82574)
4747 		E1000_WRITE_REG(hw, EM_EIAC, 0);
4748 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4749 }
4750 
4751 /*
4752  * Bit of a misnomer, what this really means is
4753  * to enable OS management of the system... aka
4754  * to disable special hardware management features
4755  */
4756 static void
4757 em_init_manageability(struct adapter *adapter)
4758 {
4759 	/* A shared code workaround */
4760 #define E1000_82542_MANC2H E1000_MANC2H
4761 	if (adapter->has_manage) {
4762 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4763 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4764 
4765 		/* disable hardware interception of ARP */
4766 		manc &= ~(E1000_MANC_ARP_EN);
4767 
4768                 /* enable receiving management packets to the host */
4769 		manc |= E1000_MANC_EN_MNG2HOST;
4770 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4771 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4772 		manc2h |= E1000_MNG2HOST_PORT_623;
4773 		manc2h |= E1000_MNG2HOST_PORT_664;
4774 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4775 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4776 	}
4777 }
4778 
4779 /*
4780  * Give control back to hardware management
4781  * controller if there is one.
4782  */
4783 static void
4784 em_release_manageability(struct adapter *adapter)
4785 {
4786 	if (adapter->has_manage) {
4787 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4788 
4789 		/* re-enable hardware interception of ARP */
4790 		manc |= E1000_MANC_ARP_EN;
4791 		manc &= ~E1000_MANC_EN_MNG2HOST;
4792 
4793 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4794 	}
4795 }
4796 
4797 /*
4798  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4799  * For ASF and Pass Through versions of f/w this means
4800  * that the driver is loaded. For AMT version type f/w
4801  * this means that the network i/f is open.
4802  */
4803 static void
4804 em_get_hw_control(struct adapter *adapter)
4805 {
4806 	u32 ctrl_ext, swsm;
4807 
4808 	if (adapter->hw.mac.type == e1000_82573) {
4809 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4810 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4811 		    swsm | E1000_SWSM_DRV_LOAD);
4812 		return;
4813 	}
4814 	/* else */
4815 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4816 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4817 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4818 	return;
4819 }
4820 
4821 /*
4822  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4823  * For ASF and Pass Through versions of f/w this means that
4824  * the driver is no longer loaded. For AMT versions of the
4825  * f/w this means that the network i/f is closed.
4826  */
4827 static void
4828 em_release_hw_control(struct adapter *adapter)
4829 {
4830 	u32 ctrl_ext, swsm;
4831 
4832 	if (!adapter->has_manage)
4833 		return;
4834 
4835 	if (adapter->hw.mac.type == e1000_82573) {
4836 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4837 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4838 		    swsm & ~E1000_SWSM_DRV_LOAD);
4839 		return;
4840 	}
4841 	/* else */
4842 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4843 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4844 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4845 	return;
4846 }
4847 
4848 static int
4849 em_is_valid_ether_addr(u8 *addr)
4850 {
4851 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4852 
4853 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4854 		return (FALSE);
4855 	}
4856 
4857 	return (TRUE);
4858 }
4859 
4860 /*
4861 ** Parse the interface capabilities with regard
4862 ** to both system management and wake-on-lan for
4863 ** later use.
4864 */
4865 static void
4866 em_get_wakeup(device_t dev)
4867 {
4868 	struct adapter	*adapter = device_get_softc(dev);
4869 	u16		eeprom_data = 0, device_id, apme_mask;
4870 
4871 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4872 	apme_mask = EM_EEPROM_APME;
4873 
4874 	switch (adapter->hw.mac.type) {
4875 	case e1000_82573:
4876 	case e1000_82583:
4877 		adapter->has_amt = TRUE;
4878 		/* Falls thru */
4879 	case e1000_82571:
4880 	case e1000_82572:
4881 	case e1000_80003es2lan:
4882 		if (adapter->hw.bus.func == 1) {
4883 			e1000_read_nvm(&adapter->hw,
4884 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4885 			break;
4886 		} else
4887 			e1000_read_nvm(&adapter->hw,
4888 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4889 		break;
4890 	case e1000_ich8lan:
4891 	case e1000_ich9lan:
4892 	case e1000_ich10lan:
4893 	case e1000_pchlan:
4894 	case e1000_pch2lan:
4895 		apme_mask = E1000_WUC_APME;
4896 		adapter->has_amt = TRUE;
4897 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4898 		break;
4899 	default:
4900 		e1000_read_nvm(&adapter->hw,
4901 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4902 		break;
4903 	}
4904 	if (eeprom_data & apme_mask)
4905 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4906 	/*
4907          * We have the eeprom settings, now apply the special cases
4908          * where the eeprom may be wrong or the board won't support
4909          * wake on lan on a particular port
4910 	 */
4911 	device_id = pci_get_device(dev);
4912         switch (device_id) {
4913 	case E1000_DEV_ID_82571EB_FIBER:
4914 		/* Wake events only supported on port A for dual fiber
4915 		 * regardless of eeprom setting */
4916 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4917 		    E1000_STATUS_FUNC_1)
4918 			adapter->wol = 0;
4919 		break;
4920 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4921 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4922 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4923                 /* if quad port adapter, disable WoL on all but port A */
4924 		if (global_quad_port_a != 0)
4925 			adapter->wol = 0;
4926 		/* Reset for multiple quad port adapters */
4927 		if (++global_quad_port_a == 4)
4928 			global_quad_port_a = 0;
4929                 break;
4930 	}
4931 	return;
4932 }
4933 
4934 
4935 /*
4936  * Enable PCI Wake On Lan capability
4937  */
4938 static void
4939 em_enable_wakeup(device_t dev)
4940 {
4941 	struct adapter	*adapter = device_get_softc(dev);
4942 	struct ifnet	*ifp = adapter->ifp;
4943 	u32		pmc, ctrl, ctrl_ext, rctl;
4944 	u16     	status;
4945 
4946 	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4947 		return;
4948 
4949 	/* Advertise the wakeup capability */
4950 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4951 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4952 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4953 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4954 
4955 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4956 	    (adapter->hw.mac.type == e1000_pchlan) ||
4957 	    (adapter->hw.mac.type == e1000_ich9lan) ||
4958 	    (adapter->hw.mac.type == e1000_ich10lan))
4959 		e1000_suspend_workarounds_ich8lan(&adapter->hw);
4960 
4961 	/* Keep the laser running on Fiber adapters */
4962 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4963 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4964 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4965 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4966 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4967 	}
4968 
4969 	/*
4970 	** Determine type of Wakeup: note that wol
4971 	** is set with all bits on by default.
4972 	*/
4973 	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4974 		adapter->wol &= ~E1000_WUFC_MAG;
4975 
4976 	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4977 		adapter->wol &= ~E1000_WUFC_MC;
4978 	else {
4979 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4980 		rctl |= E1000_RCTL_MPE;
4981 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4982 	}
4983 
4984 	if ((adapter->hw.mac.type == e1000_pchlan) ||
4985 	    (adapter->hw.mac.type == e1000_pch2lan)) {
4986 		if (em_enable_phy_wakeup(adapter))
4987 			return;
4988 	} else {
4989 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4990 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4991 	}
4992 
4993 	if (adapter->hw.phy.type == e1000_phy_igp_3)
4994 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4995 
4996         /* Request PME */
4997         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4998 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4999 	if (ifp->if_capenable & IFCAP_WOL)
5000 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5001         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5002 
5003 	return;
5004 }
5005 
5006 /*
5007 ** WOL in the newer chipset interfaces (pchlan)
5008 ** require thing to be copied into the phy
5009 */
5010 static int
5011 em_enable_phy_wakeup(struct adapter *adapter)
5012 {
5013 	struct e1000_hw *hw = &adapter->hw;
5014 	u32 mreg, ret = 0;
5015 	u16 preg;
5016 
5017 	/* copy MAC RARs to PHY RARs */
5018 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5019 
5020 	/* copy MAC MTA to PHY MTA */
5021 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5022 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5023 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5024 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5025 		    (u16)((mreg >> 16) & 0xFFFF));
5026 	}
5027 
5028 	/* configure PHY Rx Control register */
5029 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5030 	mreg = E1000_READ_REG(hw, E1000_RCTL);
5031 	if (mreg & E1000_RCTL_UPE)
5032 		preg |= BM_RCTL_UPE;
5033 	if (mreg & E1000_RCTL_MPE)
5034 		preg |= BM_RCTL_MPE;
5035 	preg &= ~(BM_RCTL_MO_MASK);
5036 	if (mreg & E1000_RCTL_MO_3)
5037 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5038 				<< BM_RCTL_MO_SHIFT);
5039 	if (mreg & E1000_RCTL_BAM)
5040 		preg |= BM_RCTL_BAM;
5041 	if (mreg & E1000_RCTL_PMCF)
5042 		preg |= BM_RCTL_PMCF;
5043 	mreg = E1000_READ_REG(hw, E1000_CTRL);
5044 	if (mreg & E1000_CTRL_RFCE)
5045 		preg |= BM_RCTL_RFCE;
5046 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5047 
5048 	/* enable PHY wakeup in MAC register */
5049 	E1000_WRITE_REG(hw, E1000_WUC,
5050 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5051 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5052 
5053 	/* configure and enable PHY wakeup in PHY registers */
5054 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5055 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5056 
5057 	/* activate PHY wakeup */
5058 	ret = hw->phy.ops.acquire(hw);
5059 	if (ret) {
5060 		printf("Could not acquire PHY\n");
5061 		return ret;
5062 	}
5063 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5064 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5065 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5066 	if (ret) {
5067 		printf("Could not read PHY page 769\n");
5068 		goto out;
5069 	}
5070 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5071 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5072 	if (ret)
5073 		printf("Could not set PHY Host Wakeup bit\n");
5074 out:
5075 	hw->phy.ops.release(hw);
5076 
5077 	return ret;
5078 }
5079 
5080 static void
5081 em_led_func(void *arg, int onoff)
5082 {
5083 	struct adapter	*adapter = arg;
5084 
5085 	EM_CORE_LOCK(adapter);
5086 	if (onoff) {
5087 		e1000_setup_led(&adapter->hw);
5088 		e1000_led_on(&adapter->hw);
5089 	} else {
5090 		e1000_led_off(&adapter->hw);
5091 		e1000_cleanup_led(&adapter->hw);
5092 	}
5093 	EM_CORE_UNLOCK(adapter);
5094 }
5095 
5096 /*
5097 ** Disable the L0S and L1 LINK states
5098 */
5099 static void
5100 em_disable_aspm(struct adapter *adapter)
5101 {
5102 	int		base, reg;
5103 	u16		link_cap,link_ctrl;
5104 	device_t	dev = adapter->dev;
5105 
5106 	switch (adapter->hw.mac.type) {
5107 		case e1000_82573:
5108 		case e1000_82574:
5109 		case e1000_82583:
5110 			break;
5111 		default:
5112 			return;
5113 	}
5114 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5115 		return;
5116 	reg = base + PCIR_EXPRESS_LINK_CAP;
5117 	link_cap = pci_read_config(dev, reg, 2);
5118 	if ((link_cap & PCIM_LINK_CAP_ASPM) == 0)
5119 		return;
5120 	reg = base + PCIR_EXPRESS_LINK_CTL;
5121 	link_ctrl = pci_read_config(dev, reg, 2);
5122 	link_ctrl &= 0xFFFC; /* turn off bit 1 and 2 */
5123 	pci_write_config(dev, reg, link_ctrl, 2);
5124 	return;
5125 }
5126 
5127 /**********************************************************************
5128  *
5129  *  Update the board statistics counters.
5130  *
5131  **********************************************************************/
5132 static void
5133 em_update_stats_counters(struct adapter *adapter)
5134 {
5135 	struct ifnet   *ifp;
5136 
5137 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5138 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5139 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5140 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5141 	}
5142 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5143 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5144 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5145 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5146 
5147 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5148 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5149 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5150 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5151 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5152 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5153 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5154 	/*
5155 	** For watchdog management we need to know if we have been
5156 	** paused during the last interval, so capture that here.
5157 	*/
5158 	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5159 	adapter->stats.xoffrxc += adapter->pause_frames;
5160 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5161 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5162 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5163 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5164 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5165 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5166 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5167 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5168 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5169 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5170 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5171 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5172 
5173 	/* For the 64-bit byte counters the low dword must be read first. */
5174 	/* Both registers clear on the read of the high dword */
5175 
5176 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5177 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5178 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5179 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5180 
5181 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5182 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5183 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5184 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5185 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5186 
5187 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5188 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5189 
5190 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5191 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5192 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5193 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5194 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5195 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5196 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5197 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5198 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5199 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5200 
5201 	/* Interrupt Counts */
5202 
5203 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5204 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5205 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5206 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5207 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5208 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5209 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5210 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5211 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5212 
5213 	if (adapter->hw.mac.type >= e1000_82543) {
5214 		adapter->stats.algnerrc +=
5215 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5216 		adapter->stats.rxerrc +=
5217 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5218 		adapter->stats.tncrs +=
5219 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5220 		adapter->stats.cexterr +=
5221 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5222 		adapter->stats.tsctc +=
5223 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5224 		adapter->stats.tsctfc +=
5225 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5226 	}
5227 	ifp = adapter->ifp;
5228 
5229 	ifp->if_collisions = adapter->stats.colc;
5230 
5231 	/* Rx Errors */
5232 	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5233 	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5234 	    adapter->stats.ruc + adapter->stats.roc +
5235 	    adapter->stats.mpc + adapter->stats.cexterr;
5236 
5237 	/* Tx Errors */
5238 	ifp->if_oerrors = adapter->stats.ecol +
5239 	    adapter->stats.latecol + adapter->watchdog_events;
5240 }
5241 
5242 /* Export a single 32-bit register via a read-only sysctl. */
5243 static int
5244 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5245 {
5246 	struct adapter *adapter;
5247 	u_int val;
5248 
5249 	adapter = oidp->oid_arg1;
5250 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5251 	return (sysctl_handle_int(oidp, &val, 0, req));
5252 }
5253 
5254 /*
5255  * Add sysctl variables, one per statistic, to the system.
5256  */
5257 static void
5258 em_add_hw_stats(struct adapter *adapter)
5259 {
5260 	device_t dev = adapter->dev;
5261 
5262 	struct tx_ring *txr = adapter->tx_rings;
5263 	struct rx_ring *rxr = adapter->rx_rings;
5264 
5265 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5266 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5267 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5268 	struct e1000_hw_stats *stats = &adapter->stats;
5269 
5270 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5271 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5272 
5273 #define QUEUE_NAME_LEN 32
5274 	char namebuf[QUEUE_NAME_LEN];
5275 
5276 	/* Driver Statistics */
5277 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5278 			CTLFLAG_RD, &adapter->link_irq,
5279 			"Link MSIX IRQ Handled");
5280 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5281 			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5282 			 "Std mbuf failed");
5283 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5284 			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5285 			 "Std mbuf cluster failed");
5286 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5287 			CTLFLAG_RD, &adapter->dropped_pkts,
5288 			"Driver dropped packets");
5289 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5290 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5291 			"Driver tx dma failure in xmit");
5292 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5293 			CTLFLAG_RD, &adapter->rx_overruns,
5294 			"RX overruns");
5295 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5296 			CTLFLAG_RD, &adapter->watchdog_events,
5297 			"Watchdog timeouts");
5298 
5299 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5300 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5301 			em_sysctl_reg_handler, "IU",
5302 			"Device Control Register");
5303 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5304 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5305 			em_sysctl_reg_handler, "IU",
5306 			"Receiver Control Register");
5307 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5308 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5309 			"Flow Control High Watermark");
5310 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5311 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5312 			"Flow Control Low Watermark");
5313 
5314 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5315 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5316 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5317 					    CTLFLAG_RD, NULL, "Queue Name");
5318 		queue_list = SYSCTL_CHILDREN(queue_node);
5319 
5320 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5321 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5322 				E1000_TDH(txr->me),
5323 				em_sysctl_reg_handler, "IU",
5324  				"Transmit Descriptor Head");
5325 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5326 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5327 				E1000_TDT(txr->me),
5328 				em_sysctl_reg_handler, "IU",
5329  				"Transmit Descriptor Tail");
5330 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5331 				CTLFLAG_RD, &txr->tx_irq,
5332 				"Queue MSI-X Transmit Interrupts");
5333 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5334 				CTLFLAG_RD, &txr->no_desc_avail,
5335 				"Queue No Descriptor Available");
5336 
5337 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5338 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5339 				E1000_RDH(rxr->me),
5340 				em_sysctl_reg_handler, "IU",
5341 				"Receive Descriptor Head");
5342 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5343 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5344 				E1000_RDT(rxr->me),
5345 				em_sysctl_reg_handler, "IU",
5346 				"Receive Descriptor Tail");
5347 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5348 				CTLFLAG_RD, &rxr->rx_irq,
5349 				"Queue MSI-X Receive Interrupts");
5350 	}
5351 
5352 	/* MAC stats get their own sub node */
5353 
5354 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5355 				    CTLFLAG_RD, NULL, "Statistics");
5356 	stat_list = SYSCTL_CHILDREN(stat_node);
5357 
5358 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5359 			CTLFLAG_RD, &stats->ecol,
5360 			"Excessive collisions");
5361 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5362 			CTLFLAG_RD, &stats->scc,
5363 			"Single collisions");
5364 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5365 			CTLFLAG_RD, &stats->mcc,
5366 			"Multiple collisions");
5367 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5368 			CTLFLAG_RD, &stats->latecol,
5369 			"Late collisions");
5370 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5371 			CTLFLAG_RD, &stats->colc,
5372 			"Collision Count");
5373 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5374 			CTLFLAG_RD, &adapter->stats.symerrs,
5375 			"Symbol Errors");
5376 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5377 			CTLFLAG_RD, &adapter->stats.sec,
5378 			"Sequence Errors");
5379 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5380 			CTLFLAG_RD, &adapter->stats.dc,
5381 			"Defer Count");
5382 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5383 			CTLFLAG_RD, &adapter->stats.mpc,
5384 			"Missed Packets");
5385 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5386 			CTLFLAG_RD, &adapter->stats.rnbc,
5387 			"Receive No Buffers");
5388 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5389 			CTLFLAG_RD, &adapter->stats.ruc,
5390 			"Receive Undersize");
5391 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5392 			CTLFLAG_RD, &adapter->stats.rfc,
5393 			"Fragmented Packets Received ");
5394 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5395 			CTLFLAG_RD, &adapter->stats.roc,
5396 			"Oversized Packets Received");
5397 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5398 			CTLFLAG_RD, &adapter->stats.rjc,
5399 			"Recevied Jabber");
5400 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5401 			CTLFLAG_RD, &adapter->stats.rxerrc,
5402 			"Receive Errors");
5403 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5404 			CTLFLAG_RD, &adapter->stats.crcerrs,
5405 			"CRC errors");
5406 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5407 			CTLFLAG_RD, &adapter->stats.algnerrc,
5408 			"Alignment Errors");
5409 	/* On 82575 these are collision counts */
5410 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5411 			CTLFLAG_RD, &adapter->stats.cexterr,
5412 			"Collision/Carrier extension errors");
5413 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5414 			CTLFLAG_RD, &adapter->stats.xonrxc,
5415 			"XON Received");
5416 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5417 			CTLFLAG_RD, &adapter->stats.xontxc,
5418 			"XON Transmitted");
5419 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5420 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5421 			"XOFF Received");
5422 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5423 			CTLFLAG_RD, &adapter->stats.xofftxc,
5424 			"XOFF Transmitted");
5425 
5426 	/* Packet Reception Stats */
5427 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5428 			CTLFLAG_RD, &adapter->stats.tpr,
5429 			"Total Packets Received ");
5430 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5431 			CTLFLAG_RD, &adapter->stats.gprc,
5432 			"Good Packets Received");
5433 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5434 			CTLFLAG_RD, &adapter->stats.bprc,
5435 			"Broadcast Packets Received");
5436 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5437 			CTLFLAG_RD, &adapter->stats.mprc,
5438 			"Multicast Packets Received");
5439 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5440 			CTLFLAG_RD, &adapter->stats.prc64,
5441 			"64 byte frames received ");
5442 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5443 			CTLFLAG_RD, &adapter->stats.prc127,
5444 			"65-127 byte frames received");
5445 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5446 			CTLFLAG_RD, &adapter->stats.prc255,
5447 			"128-255 byte frames received");
5448 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5449 			CTLFLAG_RD, &adapter->stats.prc511,
5450 			"256-511 byte frames received");
5451 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5452 			CTLFLAG_RD, &adapter->stats.prc1023,
5453 			"512-1023 byte frames received");
5454 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5455 			CTLFLAG_RD, &adapter->stats.prc1522,
5456 			"1023-1522 byte frames received");
5457  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5458  			CTLFLAG_RD, &adapter->stats.gorc,
5459  			"Good Octets Received");
5460 
5461 	/* Packet Transmission Stats */
5462  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5463  			CTLFLAG_RD, &adapter->stats.gotc,
5464  			"Good Octets Transmitted");
5465 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5466 			CTLFLAG_RD, &adapter->stats.tpt,
5467 			"Total Packets Transmitted");
5468 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5469 			CTLFLAG_RD, &adapter->stats.gptc,
5470 			"Good Packets Transmitted");
5471 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5472 			CTLFLAG_RD, &adapter->stats.bptc,
5473 			"Broadcast Packets Transmitted");
5474 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5475 			CTLFLAG_RD, &adapter->stats.mptc,
5476 			"Multicast Packets Transmitted");
5477 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5478 			CTLFLAG_RD, &adapter->stats.ptc64,
5479 			"64 byte frames transmitted ");
5480 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5481 			CTLFLAG_RD, &adapter->stats.ptc127,
5482 			"65-127 byte frames transmitted");
5483 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5484 			CTLFLAG_RD, &adapter->stats.ptc255,
5485 			"128-255 byte frames transmitted");
5486 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5487 			CTLFLAG_RD, &adapter->stats.ptc511,
5488 			"256-511 byte frames transmitted");
5489 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5490 			CTLFLAG_RD, &adapter->stats.ptc1023,
5491 			"512-1023 byte frames transmitted");
5492 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5493 			CTLFLAG_RD, &adapter->stats.ptc1522,
5494 			"1024-1522 byte frames transmitted");
5495 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5496 			CTLFLAG_RD, &adapter->stats.tsctc,
5497 			"TSO Contexts Transmitted");
5498 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5499 			CTLFLAG_RD, &adapter->stats.tsctfc,
5500 			"TSO Contexts Failed");
5501 
5502 
5503 	/* Interrupt Stats */
5504 
5505 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5506 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5507 	int_list = SYSCTL_CHILDREN(int_node);
5508 
5509 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5510 			CTLFLAG_RD, &adapter->stats.iac,
5511 			"Interrupt Assertion Count");
5512 
5513 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5514 			CTLFLAG_RD, &adapter->stats.icrxptc,
5515 			"Interrupt Cause Rx Pkt Timer Expire Count");
5516 
5517 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5518 			CTLFLAG_RD, &adapter->stats.icrxatc,
5519 			"Interrupt Cause Rx Abs Timer Expire Count");
5520 
5521 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5522 			CTLFLAG_RD, &adapter->stats.ictxptc,
5523 			"Interrupt Cause Tx Pkt Timer Expire Count");
5524 
5525 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5526 			CTLFLAG_RD, &adapter->stats.ictxatc,
5527 			"Interrupt Cause Tx Abs Timer Expire Count");
5528 
5529 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5530 			CTLFLAG_RD, &adapter->stats.ictxqec,
5531 			"Interrupt Cause Tx Queue Empty Count");
5532 
5533 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5534 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5535 			"Interrupt Cause Tx Queue Min Thresh Count");
5536 
5537 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5538 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5539 			"Interrupt Cause Rx Desc Min Thresh Count");
5540 
5541 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5542 			CTLFLAG_RD, &adapter->stats.icrxoc,
5543 			"Interrupt Cause Receiver Overrun Count");
5544 }
5545 
5546 /**********************************************************************
5547  *
5548  *  This routine provides a way to dump out the adapter eeprom,
5549  *  often a useful debug/service tool. This only dumps the first
5550  *  32 words, stuff that matters is in that extent.
5551  *
5552  **********************************************************************/
5553 static int
5554 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5555 {
5556 	struct adapter *adapter = (struct adapter *)arg1;
5557 	int error;
5558 	int result;
5559 
5560 	result = -1;
5561 	error = sysctl_handle_int(oidp, &result, 0, req);
5562 
5563 	if (error || !req->newptr)
5564 		return (error);
5565 
5566 	/*
5567 	 * This value will cause a hex dump of the
5568 	 * first 32 16-bit words of the EEPROM to
5569 	 * the screen.
5570 	 */
5571 	if (result == 1)
5572 		em_print_nvm_info(adapter);
5573 
5574 	return (error);
5575 }
5576 
5577 static void
5578 em_print_nvm_info(struct adapter *adapter)
5579 {
5580 	u16	eeprom_data;
5581 	int	i, j, row = 0;
5582 
5583 	/* Its a bit crude, but it gets the job done */
5584 	printf("\nInterface EEPROM Dump:\n");
5585 	printf("Offset\n0x0000  ");
5586 	for (i = 0, j = 0; i < 32; i++, j++) {
5587 		if (j == 8) { /* Make the offset block */
5588 			j = 0; ++row;
5589 			printf("\n0x00%x0  ",row);
5590 		}
5591 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5592 		printf("%04x ", eeprom_data);
5593 	}
5594 	printf("\n");
5595 }
5596 
5597 static int
5598 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5599 {
5600 	struct em_int_delay_info *info;
5601 	struct adapter *adapter;
5602 	u32 regval;
5603 	int error, usecs, ticks;
5604 
5605 	info = (struct em_int_delay_info *)arg1;
5606 	usecs = info->value;
5607 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5608 	if (error != 0 || req->newptr == NULL)
5609 		return (error);
5610 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5611 		return (EINVAL);
5612 	info->value = usecs;
5613 	ticks = EM_USECS_TO_TICKS(usecs);
5614 
5615 	adapter = info->adapter;
5616 
5617 	EM_CORE_LOCK(adapter);
5618 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5619 	regval = (regval & ~0xffff) | (ticks & 0xffff);
5620 	/* Handle a few special cases. */
5621 	switch (info->offset) {
5622 	case E1000_RDTR:
5623 		break;
5624 	case E1000_TIDV:
5625 		if (ticks == 0) {
5626 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5627 			/* Don't write 0 into the TIDV register. */
5628 			regval++;
5629 		} else
5630 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5631 		break;
5632 	}
5633 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5634 	EM_CORE_UNLOCK(adapter);
5635 	return (0);
5636 }
5637 
5638 static void
5639 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5640 	const char *description, struct em_int_delay_info *info,
5641 	int offset, int value)
5642 {
5643 	info->adapter = adapter;
5644 	info->offset = offset;
5645 	info->value = value;
5646 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5647 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5648 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5649 	    info, 0, em_sysctl_int_delay, "I", description);
5650 }
5651 
5652 static void
5653 em_set_sysctl_value(struct adapter *adapter, const char *name,
5654 	const char *description, int *limit, int value)
5655 {
5656 	*limit = value;
5657 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5658 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5659 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5660 }
5661 
5662 
5663 /*
5664 ** Set flow control using sysctl:
5665 ** Flow control values:
5666 **      0 - off
5667 **      1 - rx pause
5668 **      2 - tx pause
5669 **      3 - full
5670 */
5671 static int
5672 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5673 {
5674         int		error;
5675 	static int	input = 3; /* default is full */
5676         struct adapter	*adapter = (struct adapter *) arg1;
5677 
5678         error = sysctl_handle_int(oidp, &input, 0, req);
5679 
5680         if ((error) || (req->newptr == NULL))
5681                 return (error);
5682 
5683 	if (input == adapter->fc) /* no change? */
5684 		return (error);
5685 
5686         switch (input) {
5687                 case e1000_fc_rx_pause:
5688                 case e1000_fc_tx_pause:
5689                 case e1000_fc_full:
5690                 case e1000_fc_none:
5691                         adapter->hw.fc.requested_mode = input;
5692 			adapter->fc = input;
5693                         break;
5694                 default:
5695 			/* Do nothing */
5696 			return (error);
5697         }
5698 
5699         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5700         e1000_force_mac_fc(&adapter->hw);
5701         return (error);
5702 }
5703 
5704 /*
5705 ** Manage Energy Efficient Ethernet:
5706 ** Control values:
5707 **     0/1 - enabled/disabled
5708 */
5709 static int
5710 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5711 {
5712        struct adapter *adapter = (struct adapter *) arg1;
5713        int             error, value;
5714 
5715        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5716        error = sysctl_handle_int(oidp, &value, 0, req);
5717        if (error || req->newptr == NULL)
5718                return (error);
5719        EM_CORE_LOCK(adapter);
5720        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5721        em_init_locked(adapter);
5722        EM_CORE_UNLOCK(adapter);
5723        return (0);
5724 }
5725 
5726 static int
5727 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5728 {
5729 	struct adapter *adapter;
5730 	int error;
5731 	int result;
5732 
5733 	result = -1;
5734 	error = sysctl_handle_int(oidp, &result, 0, req);
5735 
5736 	if (error || !req->newptr)
5737 		return (error);
5738 
5739 	if (result == 1) {
5740 		adapter = (struct adapter *)arg1;
5741 		em_print_debug_info(adapter);
5742         }
5743 
5744 	return (error);
5745 }
5746 
5747 /*
5748 ** This routine is meant to be fluid, add whatever is
5749 ** needed for debugging a problem.  -jfv
5750 */
5751 static void
5752 em_print_debug_info(struct adapter *adapter)
5753 {
5754 	device_t dev = adapter->dev;
5755 	struct tx_ring *txr = adapter->tx_rings;
5756 	struct rx_ring *rxr = adapter->rx_rings;
5757 
5758 	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5759 		printf("Interface is RUNNING ");
5760 	else
5761 		printf("Interface is NOT RUNNING\n");
5762 
5763 	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5764 		printf("and INACTIVE\n");
5765 	else
5766 		printf("and ACTIVE\n");
5767 
5768 	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5769 	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5770 	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5771 	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5772 	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5773 	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5774 	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5775 	device_printf(dev, "TX descriptors avail = %d\n",
5776 	    txr->tx_avail);
5777 	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5778 	    txr->no_desc_avail);
5779 	device_printf(dev, "RX discarded packets = %ld\n",
5780 	    rxr->rx_discarded);
5781 	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5782 	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5783 }
5784