xref: /freebsd/sys/dev/e1000/if_em.c (revision a0e793cbf1951d07fc47a0d9ea389d7dacba5213)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2014, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #include "opt_inet.h"
36 #include "opt_inet6.h"
37 
38 #ifdef HAVE_KERNEL_OPTION_HEADERS
39 #include "opt_device_polling.h"
40 #endif
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #if __FreeBSD_version >= 800000
45 #include <sys/buf_ring.h>
46 #endif
47 #include <sys/bus.h>
48 #include <sys/endian.h>
49 #include <sys/kernel.h>
50 #include <sys/kthread.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/rman.h>
55 #include <sys/socket.h>
56 #include <sys/sockio.h>
57 #include <sys/sysctl.h>
58 #include <sys/taskqueue.h>
59 #include <sys/eventhandler.h>
60 #include <machine/bus.h>
61 #include <machine/resource.h>
62 
63 #include <net/bpf.h>
64 #include <net/ethernet.h>
65 #include <net/if.h>
66 #include <net/if_var.h>
67 #include <net/if_arp.h>
68 #include <net/if_dl.h>
69 #include <net/if_media.h>
70 
71 #include <net/if_types.h>
72 #include <net/if_vlan_var.h>
73 
74 #include <netinet/in_systm.h>
75 #include <netinet/in.h>
76 #include <netinet/if_ether.h>
77 #include <netinet/ip.h>
78 #include <netinet/ip6.h>
79 #include <netinet/tcp.h>
80 #include <netinet/udp.h>
81 
82 #include <machine/in_cksum.h>
83 #include <dev/led/led.h>
84 #include <dev/pci/pcivar.h>
85 #include <dev/pci/pcireg.h>
86 
87 #include "e1000_api.h"
88 #include "e1000_82571.h"
89 #include "if_em.h"
90 
91 /*********************************************************************
92  *  Set this to one to display debug statistics
93  *********************************************************************/
94 int	em_display_debug_stats = 0;
95 
96 /*********************************************************************
97  *  Driver version:
98  *********************************************************************/
99 char em_driver_version[] = "7.4.2";
100 
101 /*********************************************************************
102  *  PCI Device ID Table
103  *
104  *  Used by probe to select devices to load on
105  *  Last field stores an index into e1000_strings
106  *  Last entry must be all 0s
107  *
108  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
109  *********************************************************************/
110 
111 static em_vendor_info_t em_vendor_info_array[] =
112 {
113 	/* Intel(R) PRO/1000 Network Connection */
114 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
115 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
116 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
117 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
118 						PCI_ANY_ID, PCI_ANY_ID, 0},
119 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
120 						PCI_ANY_ID, PCI_ANY_ID, 0},
121 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
122 						PCI_ANY_ID, PCI_ANY_ID, 0},
123 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
124 						PCI_ANY_ID, PCI_ANY_ID, 0},
125 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
126 						PCI_ANY_ID, PCI_ANY_ID, 0},
127 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
128 						PCI_ANY_ID, PCI_ANY_ID, 0},
129 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
130 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
131 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
132 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
133 
134 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
135 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
136 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
137 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
138 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
139 						PCI_ANY_ID, PCI_ANY_ID, 0},
140 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
141 						PCI_ANY_ID, PCI_ANY_ID, 0},
142 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
143 						PCI_ANY_ID, PCI_ANY_ID, 0},
144 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
145 						PCI_ANY_ID, PCI_ANY_ID, 0},
146 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
147 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
148 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
150 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
152 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
175 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
176 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
177 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
178 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
179 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
180 						PCI_ANY_ID, PCI_ANY_ID, 0},
181 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
182 						PCI_ANY_ID, PCI_ANY_ID, 0},
183 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM2,	PCI_ANY_ID, PCI_ANY_ID, 0},
184 	{ 0x8086, E1000_DEV_ID_PCH_I218_V2,	PCI_ANY_ID, PCI_ANY_ID, 0},
185 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM3,	PCI_ANY_ID, PCI_ANY_ID, 0},
186 	{ 0x8086, E1000_DEV_ID_PCH_I218_V3,	PCI_ANY_ID, PCI_ANY_ID, 0},
187 	/* required last entry */
188 	{ 0, 0, 0, 0, 0}
189 };
190 
191 /*********************************************************************
192  *  Table of branding strings for all supported NICs.
193  *********************************************************************/
194 
195 static char *em_strings[] = {
196 	"Intel(R) PRO/1000 Network Connection"
197 };
198 
199 /*********************************************************************
200  *  Function prototypes
201  *********************************************************************/
202 static int	em_probe(device_t);
203 static int	em_attach(device_t);
204 static int	em_detach(device_t);
205 static int	em_shutdown(device_t);
206 static int	em_suspend(device_t);
207 static int	em_resume(device_t);
208 #ifdef EM_MULTIQUEUE
209 static int	em_mq_start(if_t, struct mbuf *);
210 static int	em_mq_start_locked(if_t,
211 		    struct tx_ring *, struct mbuf *);
212 static void	em_qflush(if_t);
213 #else
214 static void	em_start(if_t);
215 static void	em_start_locked(if_t, struct tx_ring *);
216 #endif
217 static int	em_ioctl(if_t, u_long, caddr_t);
218 static void	em_init(void *);
219 static void	em_init_locked(struct adapter *);
220 static void	em_stop(void *);
221 static void	em_media_status(if_t, struct ifmediareq *);
222 static int	em_media_change(if_t);
223 static void	em_identify_hardware(struct adapter *);
224 static int	em_allocate_pci_resources(struct adapter *);
225 static int	em_allocate_legacy(struct adapter *);
226 static int	em_allocate_msix(struct adapter *);
227 static int	em_allocate_queues(struct adapter *);
228 static int	em_setup_msix(struct adapter *);
229 static void	em_free_pci_resources(struct adapter *);
230 static void	em_local_timer(void *);
231 static void	em_reset(struct adapter *);
232 static int	em_setup_interface(device_t, struct adapter *);
233 
234 static void	em_setup_transmit_structures(struct adapter *);
235 static void	em_initialize_transmit_unit(struct adapter *);
236 static int	em_allocate_transmit_buffers(struct tx_ring *);
237 static void	em_free_transmit_structures(struct adapter *);
238 static void	em_free_transmit_buffers(struct tx_ring *);
239 
240 static int	em_setup_receive_structures(struct adapter *);
241 static int	em_allocate_receive_buffers(struct rx_ring *);
242 static void	em_initialize_receive_unit(struct adapter *);
243 static void	em_free_receive_structures(struct adapter *);
244 static void	em_free_receive_buffers(struct rx_ring *);
245 
246 static void	em_enable_intr(struct adapter *);
247 static void	em_disable_intr(struct adapter *);
248 static void	em_update_stats_counters(struct adapter *);
249 static void	em_add_hw_stats(struct adapter *adapter);
250 static void	em_txeof(struct tx_ring *);
251 static bool	em_rxeof(struct rx_ring *, int, int *);
252 #ifndef __NO_STRICT_ALIGNMENT
253 static int	em_fixup_rx(struct rx_ring *);
254 #endif
255 static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
256 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
257 		    struct ip *, u32 *, u32 *);
258 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
259 		    struct tcphdr *, u32 *, u32 *);
260 static void	em_set_promisc(struct adapter *);
261 static void	em_disable_promisc(struct adapter *);
262 static void	em_set_multi(struct adapter *);
263 static void	em_update_link_status(struct adapter *);
264 static void	em_refresh_mbufs(struct rx_ring *, int);
265 static void	em_register_vlan(void *, if_t, u16);
266 static void	em_unregister_vlan(void *, if_t, u16);
267 static void	em_setup_vlan_hw_support(struct adapter *);
268 static int	em_xmit(struct tx_ring *, struct mbuf **);
269 static int	em_dma_malloc(struct adapter *, bus_size_t,
270 		    struct em_dma_alloc *, int);
271 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
272 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
273 static void	em_print_nvm_info(struct adapter *);
274 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
275 static void	em_print_debug_info(struct adapter *);
276 static int 	em_is_valid_ether_addr(u8 *);
277 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
278 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
279 		    const char *, struct em_int_delay_info *, int, int);
280 /* Management and WOL Support */
281 static void	em_init_manageability(struct adapter *);
282 static void	em_release_manageability(struct adapter *);
283 static void     em_get_hw_control(struct adapter *);
284 static void     em_release_hw_control(struct adapter *);
285 static void	em_get_wakeup(device_t);
286 static void     em_enable_wakeup(device_t);
287 static int	em_enable_phy_wakeup(struct adapter *);
288 static void	em_led_func(void *, int);
289 static void	em_disable_aspm(struct adapter *);
290 
291 static int	em_irq_fast(void *);
292 
293 /* MSIX handlers */
294 static void	em_msix_tx(void *);
295 static void	em_msix_rx(void *);
296 static void	em_msix_link(void *);
297 static void	em_handle_tx(void *context, int pending);
298 static void	em_handle_rx(void *context, int pending);
299 static void	em_handle_link(void *context, int pending);
300 
301 static void	em_set_sysctl_value(struct adapter *, const char *,
302 		    const char *, int *, int);
303 static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
304 static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
305 
306 static __inline void em_rx_discard(struct rx_ring *, int);
307 
308 #ifdef DEVICE_POLLING
309 static poll_handler_drv_t em_poll;
310 #endif /* POLLING */
311 
312 /*********************************************************************
313  *  FreeBSD Device Interface Entry Points
314  *********************************************************************/
315 
316 static device_method_t em_methods[] = {
317 	/* Device interface */
318 	DEVMETHOD(device_probe, em_probe),
319 	DEVMETHOD(device_attach, em_attach),
320 	DEVMETHOD(device_detach, em_detach),
321 	DEVMETHOD(device_shutdown, em_shutdown),
322 	DEVMETHOD(device_suspend, em_suspend),
323 	DEVMETHOD(device_resume, em_resume),
324 	DEVMETHOD_END
325 };
326 
327 static driver_t em_driver = {
328 	"em", em_methods, sizeof(struct adapter),
329 };
330 
331 devclass_t em_devclass;
332 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
333 MODULE_DEPEND(em, pci, 1, 1, 1);
334 MODULE_DEPEND(em, ether, 1, 1, 1);
335 
336 /*********************************************************************
337  *  Tunable default values.
338  *********************************************************************/
339 
340 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
341 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
342 #define M_TSO_LEN			66
343 
344 #define MAX_INTS_PER_SEC	8000
345 #define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
346 
347 /* Allow common code without TSO */
348 #ifndef CSUM_TSO
349 #define CSUM_TSO	0
350 #endif
351 
352 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
353 
354 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
355 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
356 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
357     0, "Default transmit interrupt delay in usecs");
358 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
359     0, "Default receive interrupt delay in usecs");
360 
361 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
362 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
363 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
364     &em_tx_abs_int_delay_dflt, 0,
365     "Default transmit interrupt delay limit in usecs");
366 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
367     &em_rx_abs_int_delay_dflt, 0,
368     "Default receive interrupt delay limit in usecs");
369 
370 static int em_rxd = EM_DEFAULT_RXD;
371 static int em_txd = EM_DEFAULT_TXD;
372 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
373     "Number of receive descriptors per queue");
374 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
375     "Number of transmit descriptors per queue");
376 
377 static int em_smart_pwr_down = FALSE;
378 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
379     0, "Set to true to leave smart power down enabled on newer adapters");
380 
381 /* Controls whether promiscuous also shows bad packets */
382 static int em_debug_sbp = FALSE;
383 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
384     "Show bad packets in promiscuous mode");
385 
386 static int em_enable_msix = TRUE;
387 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
388     "Enable MSI-X interrupts");
389 
390 /* How many packets rxeof tries to clean at a time */
391 static int em_rx_process_limit = 100;
392 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
393     &em_rx_process_limit, 0,
394     "Maximum number of received packets to process "
395     "at a time, -1 means unlimited");
396 
397 /* Energy efficient ethernet - default to OFF */
398 static int eee_setting = 1;
399 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
400     "Enable Energy Efficient Ethernet");
401 
402 /* Global used in WOL setup with multiport cards */
403 static int global_quad_port_a = 0;
404 
405 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
406 #include <dev/netmap/if_em_netmap.h>
407 #endif /* DEV_NETMAP */
408 
409 /*********************************************************************
410  *  Device identification routine
411  *
412  *  em_probe determines if the driver should be loaded on
413  *  adapter based on PCI vendor/device id of the adapter.
414  *
415  *  return BUS_PROBE_DEFAULT on success, positive on failure
416  *********************************************************************/
417 
418 static int
419 em_probe(device_t dev)
420 {
421 	char		adapter_name[60];
422 	u16		pci_vendor_id = 0;
423 	u16		pci_device_id = 0;
424 	u16		pci_subvendor_id = 0;
425 	u16		pci_subdevice_id = 0;
426 	em_vendor_info_t *ent;
427 
428 	INIT_DEBUGOUT("em_probe: begin");
429 
430 	pci_vendor_id = pci_get_vendor(dev);
431 	if (pci_vendor_id != EM_VENDOR_ID)
432 		return (ENXIO);
433 
434 	pci_device_id = pci_get_device(dev);
435 	pci_subvendor_id = pci_get_subvendor(dev);
436 	pci_subdevice_id = pci_get_subdevice(dev);
437 
438 	ent = em_vendor_info_array;
439 	while (ent->vendor_id != 0) {
440 		if ((pci_vendor_id == ent->vendor_id) &&
441 		    (pci_device_id == ent->device_id) &&
442 
443 		    ((pci_subvendor_id == ent->subvendor_id) ||
444 		    (ent->subvendor_id == PCI_ANY_ID)) &&
445 
446 		    ((pci_subdevice_id == ent->subdevice_id) ||
447 		    (ent->subdevice_id == PCI_ANY_ID))) {
448 			sprintf(adapter_name, "%s %s",
449 				em_strings[ent->index],
450 				em_driver_version);
451 			device_set_desc_copy(dev, adapter_name);
452 			return (BUS_PROBE_DEFAULT);
453 		}
454 		ent++;
455 	}
456 
457 	return (ENXIO);
458 }
459 
460 /*********************************************************************
461  *  Device initialization routine
462  *
463  *  The attach entry point is called when the driver is being loaded.
464  *  This routine identifies the type of hardware, allocates all resources
465  *  and initializes the hardware.
466  *
467  *  return 0 on success, positive on failure
468  *********************************************************************/
469 
470 static int
471 em_attach(device_t dev)
472 {
473 	struct adapter	*adapter;
474 	struct e1000_hw	*hw;
475 	int		error = 0;
476 
477 	INIT_DEBUGOUT("em_attach: begin");
478 
479 	if (resource_disabled("em", device_get_unit(dev))) {
480 		device_printf(dev, "Disabled by device hint\n");
481 		return (ENXIO);
482 	}
483 
484 	adapter = device_get_softc(dev);
485 	adapter->dev = adapter->osdep.dev = dev;
486 	hw = &adapter->hw;
487 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
488 
489 	/* SYSCTL stuff */
490 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
491 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
492 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
493 	    em_sysctl_nvm_info, "I", "NVM Information");
494 
495 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
496 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
497 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
498 	    em_sysctl_debug_info, "I", "Debug Information");
499 
500 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
501 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
502 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
503 	    em_set_flowcntl, "I", "Flow Control");
504 
505 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
506 
507 	/* Determine hardware and mac info */
508 	em_identify_hardware(adapter);
509 
510 	/* Setup PCI resources */
511 	if (em_allocate_pci_resources(adapter)) {
512 		device_printf(dev, "Allocation of PCI resources failed\n");
513 		error = ENXIO;
514 		goto err_pci;
515 	}
516 
517 	/*
518 	** For ICH8 and family we need to
519 	** map the flash memory, and this
520 	** must happen after the MAC is
521 	** identified
522 	*/
523 	if ((hw->mac.type == e1000_ich8lan) ||
524 	    (hw->mac.type == e1000_ich9lan) ||
525 	    (hw->mac.type == e1000_ich10lan) ||
526 	    (hw->mac.type == e1000_pchlan) ||
527 	    (hw->mac.type == e1000_pch2lan) ||
528 	    (hw->mac.type == e1000_pch_lpt)) {
529 		int rid = EM_BAR_TYPE_FLASH;
530 		adapter->flash = bus_alloc_resource_any(dev,
531 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
532 		if (adapter->flash == NULL) {
533 			device_printf(dev, "Mapping of Flash failed\n");
534 			error = ENXIO;
535 			goto err_pci;
536 		}
537 		/* This is used in the shared code */
538 		hw->flash_address = (u8 *)adapter->flash;
539 		adapter->osdep.flash_bus_space_tag =
540 		    rman_get_bustag(adapter->flash);
541 		adapter->osdep.flash_bus_space_handle =
542 		    rman_get_bushandle(adapter->flash);
543 	}
544 
545 	/* Do Shared Code initialization */
546 	if (e1000_setup_init_funcs(hw, TRUE)) {
547 		device_printf(dev, "Setup of Shared code failed\n");
548 		error = ENXIO;
549 		goto err_pci;
550 	}
551 
552 	e1000_get_bus_info(hw);
553 
554 	/* Set up some sysctls for the tunable interrupt delays */
555 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
556 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
557 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
558 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
559 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
560 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
561 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
562 	    "receive interrupt delay limit in usecs",
563 	    &adapter->rx_abs_int_delay,
564 	    E1000_REGISTER(hw, E1000_RADV),
565 	    em_rx_abs_int_delay_dflt);
566 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
567 	    "transmit interrupt delay limit in usecs",
568 	    &adapter->tx_abs_int_delay,
569 	    E1000_REGISTER(hw, E1000_TADV),
570 	    em_tx_abs_int_delay_dflt);
571 	em_add_int_delay_sysctl(adapter, "itr",
572 	    "interrupt delay limit in usecs/4",
573 	    &adapter->tx_itr,
574 	    E1000_REGISTER(hw, E1000_ITR),
575 	    DEFAULT_ITR);
576 
577 	/* Sysctl for limiting the amount of work done in the taskqueue */
578 	em_set_sysctl_value(adapter, "rx_processing_limit",
579 	    "max number of rx packets to process", &adapter->rx_process_limit,
580 	    em_rx_process_limit);
581 
582 	/*
583 	 * Validate number of transmit and receive descriptors. It
584 	 * must not exceed hardware maximum, and must be multiple
585 	 * of E1000_DBA_ALIGN.
586 	 */
587 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
588 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
589 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
590 		    EM_DEFAULT_TXD, em_txd);
591 		adapter->num_tx_desc = EM_DEFAULT_TXD;
592 	} else
593 		adapter->num_tx_desc = em_txd;
594 
595 	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
596 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
597 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
598 		    EM_DEFAULT_RXD, em_rxd);
599 		adapter->num_rx_desc = EM_DEFAULT_RXD;
600 	} else
601 		adapter->num_rx_desc = em_rxd;
602 
603 	hw->mac.autoneg = DO_AUTO_NEG;
604 	hw->phy.autoneg_wait_to_complete = FALSE;
605 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
606 
607 	/* Copper options */
608 	if (hw->phy.media_type == e1000_media_type_copper) {
609 		hw->phy.mdix = AUTO_ALL_MODES;
610 		hw->phy.disable_polarity_correction = FALSE;
611 		hw->phy.ms_type = EM_MASTER_SLAVE;
612 	}
613 
614 	/*
615 	 * Set the frame limits assuming
616 	 * standard ethernet sized frames.
617 	 */
618 	adapter->hw.mac.max_frame_size =
619 	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
620 
621 	/*
622 	 * This controls when hardware reports transmit completion
623 	 * status.
624 	 */
625 	hw->mac.report_tx_early = 1;
626 
627 	/*
628 	** Get queue/ring memory
629 	*/
630 	if (em_allocate_queues(adapter)) {
631 		error = ENOMEM;
632 		goto err_pci;
633 	}
634 
635 	/* Allocate multicast array memory. */
636 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
637 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
638 	if (adapter->mta == NULL) {
639 		device_printf(dev, "Can not allocate multicast setup array\n");
640 		error = ENOMEM;
641 		goto err_late;
642 	}
643 
644 	/* Check SOL/IDER usage */
645 	if (e1000_check_reset_block(hw))
646 		device_printf(dev, "PHY reset is blocked"
647 		    " due to SOL/IDER session.\n");
648 
649 	/* Sysctl for setting Energy Efficient Ethernet */
650 	hw->dev_spec.ich8lan.eee_disable = eee_setting;
651 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
652 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
653 	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
654 	    adapter, 0, em_sysctl_eee, "I",
655 	    "Disable Energy Efficient Ethernet");
656 
657 	/*
658 	** Start from a known state, this is
659 	** important in reading the nvm and
660 	** mac from that.
661 	*/
662 	e1000_reset_hw(hw);
663 
664 
665 	/* Make sure we have a good EEPROM before we read from it */
666 	if (e1000_validate_nvm_checksum(hw) < 0) {
667 		/*
668 		** Some PCI-E parts fail the first check due to
669 		** the link being in sleep state, call it again,
670 		** if it fails a second time its a real issue.
671 		*/
672 		if (e1000_validate_nvm_checksum(hw) < 0) {
673 			device_printf(dev,
674 			    "The EEPROM Checksum Is Not Valid\n");
675 			error = EIO;
676 			goto err_late;
677 		}
678 	}
679 
680 	/* Copy the permanent MAC address out of the EEPROM */
681 	if (e1000_read_mac_addr(hw) < 0) {
682 		device_printf(dev, "EEPROM read error while reading MAC"
683 		    " address\n");
684 		error = EIO;
685 		goto err_late;
686 	}
687 
688 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
689 		device_printf(dev, "Invalid MAC address\n");
690 		error = EIO;
691 		goto err_late;
692 	}
693 
694 	/* Disable ULP support */
695 	e1000_disable_ulp_lpt_lp(hw, TRUE);
696 
697 	/*
698 	**  Do interrupt configuration
699 	*/
700 	if (adapter->msix > 1) /* Do MSIX */
701 		error = em_allocate_msix(adapter);
702 	else  /* MSI or Legacy */
703 		error = em_allocate_legacy(adapter);
704 	if (error)
705 		goto err_late;
706 
707 	/*
708 	 * Get Wake-on-Lan and Management info for later use
709 	 */
710 	em_get_wakeup(dev);
711 
712 	/* Setup OS specific network interface */
713 	if (em_setup_interface(dev, adapter) != 0)
714 		goto err_late;
715 
716 	em_reset(adapter);
717 
718 	/* Initialize statistics */
719 	em_update_stats_counters(adapter);
720 
721 	hw->mac.get_link_status = 1;
722 	em_update_link_status(adapter);
723 
724 	/* Register for VLAN events */
725 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
726 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
727 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
728 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
729 
730 	em_add_hw_stats(adapter);
731 
732 	/* Non-AMT based hardware can now take control from firmware */
733 	if (adapter->has_manage && !adapter->has_amt)
734 		em_get_hw_control(adapter);
735 
736 	/* Tell the stack that the interface is not active */
737 	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
738 
739 	adapter->led_dev = led_create(em_led_func, adapter,
740 	    device_get_nameunit(dev));
741 #ifdef DEV_NETMAP
742 	em_netmap_attach(adapter);
743 #endif /* DEV_NETMAP */
744 
745 	INIT_DEBUGOUT("em_attach: end");
746 
747 	return (0);
748 
749 err_late:
750 	em_free_transmit_structures(adapter);
751 	em_free_receive_structures(adapter);
752 	em_release_hw_control(adapter);
753 	if (adapter->ifp != (void *)NULL)
754 		if_free_drv(adapter->ifp);
755 err_pci:
756 	em_free_pci_resources(adapter);
757 	free(adapter->mta, M_DEVBUF);
758 	EM_CORE_LOCK_DESTROY(adapter);
759 
760 	return (error);
761 }
762 
763 /*********************************************************************
764  *  Device removal routine
765  *
766  *  The detach entry point is called when the driver is being removed.
767  *  This routine stops the adapter and deallocates all the resources
768  *  that were allocated for driver operation.
769  *
770  *  return 0 on success, positive on failure
771  *********************************************************************/
772 
773 static int
774 em_detach(device_t dev)
775 {
776 	struct adapter	*adapter = device_get_softc(dev);
777 	if_t ifp = adapter->ifp;
778 
779 	INIT_DEBUGOUT("em_detach: begin");
780 
781 	/* Make sure VLANS are not using driver */
782 	if (if_vlantrunkinuse(ifp)) {
783 		device_printf(dev,"Vlan in use, detach first\n");
784 		return (EBUSY);
785 	}
786 
787 #ifdef DEVICE_POLLING
788 	if (if_getcapenable(ifp) & IFCAP_POLLING)
789 		ether_poll_deregister_drv(ifp);
790 #endif
791 
792 	if (adapter->led_dev != NULL)
793 		led_destroy(adapter->led_dev);
794 
795 	EM_CORE_LOCK(adapter);
796 	adapter->in_detach = 1;
797 	em_stop(adapter);
798 	EM_CORE_UNLOCK(adapter);
799 	EM_CORE_LOCK_DESTROY(adapter);
800 
801 	e1000_phy_hw_reset(&adapter->hw);
802 
803 	em_release_manageability(adapter);
804 	em_release_hw_control(adapter);
805 
806 	/* Unregister VLAN events */
807 	if (adapter->vlan_attach != NULL)
808 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
809 	if (adapter->vlan_detach != NULL)
810 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
811 
812 	ether_ifdetach_drv(adapter->ifp);
813 	callout_drain(&adapter->timer);
814 
815 #ifdef DEV_NETMAP
816 	netmap_detach(ifp);
817 #endif /* DEV_NETMAP */
818 
819 	em_free_pci_resources(adapter);
820 	bus_generic_detach(dev);
821 	if_free_drv(ifp);
822 
823 	em_free_transmit_structures(adapter);
824 	em_free_receive_structures(adapter);
825 
826 	em_release_hw_control(adapter);
827 	free(adapter->mta, M_DEVBUF);
828 
829 	return (0);
830 }
831 
832 /*********************************************************************
833  *
834  *  Shutdown entry point
835  *
836  **********************************************************************/
837 
838 static int
839 em_shutdown(device_t dev)
840 {
841 	return em_suspend(dev);
842 }
843 
844 /*
845  * Suspend/resume device methods.
846  */
847 static int
848 em_suspend(device_t dev)
849 {
850 	struct adapter *adapter = device_get_softc(dev);
851 
852 	EM_CORE_LOCK(adapter);
853 
854         em_release_manageability(adapter);
855 	em_release_hw_control(adapter);
856 	em_enable_wakeup(dev);
857 
858 	EM_CORE_UNLOCK(adapter);
859 
860 	return bus_generic_suspend(dev);
861 }
862 
863 static int
864 em_resume(device_t dev)
865 {
866 	struct adapter *adapter = device_get_softc(dev);
867 	struct tx_ring	*txr = adapter->tx_rings;
868 	if_t ifp = adapter->ifp;
869 
870 	EM_CORE_LOCK(adapter);
871 	if (adapter->hw.mac.type == e1000_pch2lan)
872 		e1000_resume_workarounds_pchlan(&adapter->hw);
873 	em_init_locked(adapter);
874 	em_init_manageability(adapter);
875 
876 	if ((if_getflags(ifp) & IFF_UP) &&
877 	    (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) {
878 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
879 			EM_TX_LOCK(txr);
880 #ifdef EM_MULTIQUEUE
881 			if (!drbr_empty(ifp, txr->br))
882 				em_mq_start_locked(ifp, txr, NULL);
883 #else
884 			if (!if_sendq_empty(ifp))
885 				em_start_locked(ifp, txr);
886 #endif
887 			EM_TX_UNLOCK(txr);
888 		}
889 	}
890 	EM_CORE_UNLOCK(adapter);
891 
892 	return bus_generic_resume(dev);
893 }
894 
895 
896 #ifdef EM_MULTIQUEUE
897 /*********************************************************************
898  *  Multiqueue Transmit routines
899  *
900  *  em_mq_start is called by the stack to initiate a transmit.
901  *  however, if busy the driver can queue the request rather
902  *  than do an immediate send. It is this that is an advantage
903  *  in this driver, rather than also having multiple tx queues.
904  **********************************************************************/
905 static int
906 em_mq_start_locked(if_t ifp, struct tx_ring *txr, struct mbuf *m)
907 {
908 	struct adapter  *adapter = txr->adapter;
909         struct mbuf     *next;
910         int             err = 0, enq = 0;
911 
912 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
913 	    IFF_DRV_RUNNING || adapter->link_active == 0) {
914 		if (m != NULL)
915 			err = drbr_enqueue(ifp, txr->br, m);
916 		return (err);
917 	}
918 
919 	enq = 0;
920 	if (m != NULL) {
921 		err = drbr_enqueue(ifp, txr->br, m);
922 		if (err)
923 			return (err);
924 	}
925 
926 	/* Process the queue */
927 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
928 		if ((err = em_xmit(txr, &next)) != 0) {
929 			if (next == NULL)
930 				drbr_advance(ifp, txr->br);
931 			else
932 				drbr_putback(ifp, txr->br, next);
933 			break;
934 		}
935 		drbr_advance(ifp, txr->br);
936 		enq++;
937 		if_incobytes(ifp,  next->m_pkthdr.len);
938 		if (next->m_flags & M_MCAST)
939 			if_incomcasts(ifp, 1);
940 		if_etherbpfmtap(ifp, next);
941 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
942                         break;
943 	}
944 
945 	if (enq > 0) {
946                 /* Set the watchdog */
947                 txr->queue_status = EM_QUEUE_WORKING;
948 		txr->watchdog_time = ticks;
949 	}
950 
951 	if (txr->tx_avail < EM_MAX_SCATTER)
952 		em_txeof(txr);
953 	if (txr->tx_avail < EM_MAX_SCATTER)
954 		if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
955 	return (err);
956 }
957 
958 /*
959 ** Multiqueue capable stack interface
960 */
961 static int
962 em_mq_start(if_t ifp, struct mbuf *m)
963 {
964 	struct adapter	*adapter = if_getsoftc(ifp);
965 	struct tx_ring	*txr = adapter->tx_rings;
966 	int 		error;
967 
968 	if (EM_TX_TRYLOCK(txr)) {
969 		error = em_mq_start_locked(ifp, txr, m);
970 		EM_TX_UNLOCK(txr);
971 	} else
972 		error = drbr_enqueue(ifp, txr->br, m);
973 
974 	return (error);
975 }
976 
977 /*
978 ** Flush all ring buffers
979 */
980 static void
981 em_qflush(if_t ifp)
982 {
983 	struct adapter  *adapter = if_getsoftc(ifp);
984 	struct tx_ring  *txr = adapter->tx_rings;
985 	struct mbuf     *m;
986 
987 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
988 		EM_TX_LOCK(txr);
989 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
990 			m_freem(m);
991 		EM_TX_UNLOCK(txr);
992 	}
993 	if_qflush(ifp);
994 }
995 #else  /* !EM_MULTIQUEUE */
996 
997 static void
998 em_start_locked(if_t ifp, struct tx_ring *txr)
999 {
1000 	struct adapter	*adapter = if_getsoftc(ifp);
1001 	struct mbuf	*m_head;
1002 
1003 	EM_TX_LOCK_ASSERT(txr);
1004 
1005 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
1006 	    IFF_DRV_RUNNING)
1007 		return;
1008 
1009 	if (!adapter->link_active)
1010 		return;
1011 
1012 	while (!if_sendq_empty(ifp)) {
1013         	/* Call cleanup if number of TX descriptors low */
1014 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1015 			em_txeof(txr);
1016 		if (txr->tx_avail < EM_MAX_SCATTER) {
1017 			if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0);
1018 			break;
1019 		}
1020 		m_head = if_dequeue(ifp);
1021 		if (m_head == NULL)
1022 			break;
1023 		/*
1024 		 *  Encapsulation can modify our pointer, and or make it
1025 		 *  NULL on failure.  In that event, we can't requeue.
1026 		 */
1027 		if (em_xmit(txr, &m_head)) {
1028 			if (m_head == NULL)
1029 				break;
1030 			if_sendq_prepend(ifp, m_head);
1031 			break;
1032 		}
1033 
1034 		/* Send a copy of the frame to the BPF listener */
1035 		if_etherbpfmtap(ifp, m_head);
1036 
1037 		/* Set timeout in case hardware has problems transmitting. */
1038 		txr->watchdog_time = ticks;
1039                 txr->queue_status = EM_QUEUE_WORKING;
1040 	}
1041 
1042 	return;
1043 }
1044 
1045 static void
1046 em_start(if_t ifp)
1047 {
1048 	struct adapter	*adapter = if_getsoftc(ifp);
1049 	struct tx_ring	*txr = adapter->tx_rings;
1050 
1051 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1052 		EM_TX_LOCK(txr);
1053 		em_start_locked(ifp, txr);
1054 		EM_TX_UNLOCK(txr);
1055 	}
1056 	return;
1057 }
1058 #endif /* EM_MULTIQUEUE */
1059 
1060 /*********************************************************************
1061  *  Ioctl entry point
1062  *
1063  *  em_ioctl is called when the user wants to configure the
1064  *  interface.
1065  *
1066  *  return 0 on success, positive on failure
1067  **********************************************************************/
1068 
1069 static int
1070 em_ioctl(if_t ifp, u_long command, caddr_t data)
1071 {
1072 	struct adapter	*adapter = if_getsoftc(ifp);
1073 	struct ifreq	*ifr = (struct ifreq *)data;
1074 #if defined(INET) || defined(INET6)
1075 	struct ifaddr	*ifa = (struct ifaddr *)data;
1076 #endif
1077 	bool		avoid_reset = FALSE;
1078 	int		error = 0;
1079 
1080 	if (adapter->in_detach)
1081 		return (error);
1082 
1083 	switch (command) {
1084 	case SIOCSIFADDR:
1085 #ifdef INET
1086 		if (ifa->ifa_addr->sa_family == AF_INET)
1087 			avoid_reset = TRUE;
1088 #endif
1089 #ifdef INET6
1090 		if (ifa->ifa_addr->sa_family == AF_INET6)
1091 			avoid_reset = TRUE;
1092 #endif
1093 		/*
1094 		** Calling init results in link renegotiation,
1095 		** so we avoid doing it when possible.
1096 		*/
1097 		if (avoid_reset) {
1098 			if_setflagbits(ifp,IFF_UP,0);
1099 			if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
1100 				em_init(adapter);
1101 #ifdef INET
1102 			if (!(if_getflags(ifp) & IFF_NOARP))
1103 				arp_ifinit_drv(ifp, ifa);
1104 #endif
1105 		} else
1106 			error = ether_ioctl_drv(ifp, command, data);
1107 		break;
1108 	case SIOCSIFMTU:
1109 	    {
1110 		int max_frame_size;
1111 
1112 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1113 
1114 		EM_CORE_LOCK(adapter);
1115 		switch (adapter->hw.mac.type) {
1116 		case e1000_82571:
1117 		case e1000_82572:
1118 		case e1000_ich9lan:
1119 		case e1000_ich10lan:
1120 		case e1000_pch2lan:
1121 		case e1000_pch_lpt:
1122 		case e1000_82574:
1123 		case e1000_82583:
1124 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1125 			max_frame_size = 9234;
1126 			break;
1127 		case e1000_pchlan:
1128 			max_frame_size = 4096;
1129 			break;
1130 			/* Adapters that do not support jumbo frames */
1131 		case e1000_ich8lan:
1132 			max_frame_size = ETHER_MAX_LEN;
1133 			break;
1134 		default:
1135 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1136 		}
1137 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1138 		    ETHER_CRC_LEN) {
1139 			EM_CORE_UNLOCK(adapter);
1140 			error = EINVAL;
1141 			break;
1142 		}
1143 
1144 		if_setmtu(ifp, ifr->ifr_mtu);
1145 		adapter->hw.mac.max_frame_size =
1146 		    if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
1147 		em_init_locked(adapter);
1148 		EM_CORE_UNLOCK(adapter);
1149 		break;
1150 	    }
1151 	case SIOCSIFFLAGS:
1152 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1153 		    SIOCSIFFLAGS (Set Interface Flags)");
1154 		EM_CORE_LOCK(adapter);
1155 		if (if_getflags(ifp) & IFF_UP) {
1156 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1157 				if ((if_getflags(ifp) ^ adapter->if_flags) &
1158 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1159 					em_disable_promisc(adapter);
1160 					em_set_promisc(adapter);
1161 				}
1162 			} else
1163 				em_init_locked(adapter);
1164 		} else
1165 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1166 				em_stop(adapter);
1167 		adapter->if_flags = if_getflags(ifp);
1168 		EM_CORE_UNLOCK(adapter);
1169 		break;
1170 	case SIOCADDMULTI:
1171 	case SIOCDELMULTI:
1172 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1173 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1174 			EM_CORE_LOCK(adapter);
1175 			em_disable_intr(adapter);
1176 			em_set_multi(adapter);
1177 #ifdef DEVICE_POLLING
1178 			if (!(if_getcapenable(ifp) & IFCAP_POLLING))
1179 #endif
1180 				em_enable_intr(adapter);
1181 			EM_CORE_UNLOCK(adapter);
1182 		}
1183 		break;
1184 	case SIOCSIFMEDIA:
1185 		/* Check SOL/IDER usage */
1186 		EM_CORE_LOCK(adapter);
1187 		if (e1000_check_reset_block(&adapter->hw)) {
1188 			EM_CORE_UNLOCK(adapter);
1189 			device_printf(adapter->dev, "Media change is"
1190 			    " blocked due to SOL/IDER session.\n");
1191 			break;
1192 		}
1193 		EM_CORE_UNLOCK(adapter);
1194 		/* falls thru */
1195 	case SIOCGIFMEDIA:
1196 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1197 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1198 		error = ifmedia_ioctl_drv(ifp, ifr, &adapter->media, command);
1199 		break;
1200 	case SIOCSIFCAP:
1201 	    {
1202 		int mask, reinit;
1203 
1204 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1205 		reinit = 0;
1206 		mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
1207 #ifdef DEVICE_POLLING
1208 		if (mask & IFCAP_POLLING) {
1209 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1210 				error = ether_poll_register_drv(em_poll, ifp);
1211 				if (error)
1212 					return (error);
1213 				EM_CORE_LOCK(adapter);
1214 				em_disable_intr(adapter);
1215 				if_setcapenablebit(ifp, IFCAP_POLLING, 0);
1216 				EM_CORE_UNLOCK(adapter);
1217 			} else {
1218 				error = ether_poll_deregister_drv(ifp);
1219 				/* Enable interrupt even in error case */
1220 				EM_CORE_LOCK(adapter);
1221 				em_enable_intr(adapter);
1222 				if_setcapenablebit(ifp, 0, IFCAP_POLLING);
1223 				EM_CORE_UNLOCK(adapter);
1224 			}
1225 		}
1226 #endif
1227 		if (mask & IFCAP_HWCSUM) {
1228 			if_togglecapenable(ifp,IFCAP_HWCSUM);
1229 			reinit = 1;
1230 		}
1231 		if (mask & IFCAP_TSO4) {
1232 			if_togglecapenable(ifp,IFCAP_TSO4);
1233 			reinit = 1;
1234 		}
1235 		if (mask & IFCAP_VLAN_HWTAGGING) {
1236 			if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING);
1237 			reinit = 1;
1238 		}
1239 		if (mask & IFCAP_VLAN_HWFILTER) {
1240 			if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1241 			reinit = 1;
1242 		}
1243 		if (mask & IFCAP_VLAN_HWTSO) {
1244 			if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1245 			reinit = 1;
1246 		}
1247 		if ((mask & IFCAP_WOL) &&
1248 		    (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
1249 			if (mask & IFCAP_WOL_MCAST)
1250 				if_togglecapenable(ifp, IFCAP_WOL_MCAST);
1251 			if (mask & IFCAP_WOL_MAGIC)
1252 				if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
1253 		}
1254 		if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1255 			em_init(adapter);
1256 		if_vlancap(ifp);
1257 		break;
1258 	    }
1259 
1260 	default:
1261 		error = ether_ioctl_drv(ifp, command, data);
1262 		break;
1263 	}
1264 
1265 	return (error);
1266 }
1267 
1268 
1269 /*********************************************************************
1270  *  Init entry point
1271  *
1272  *  This routine is used in two ways. It is used by the stack as
1273  *  init entry point in network interface structure. It is also used
1274  *  by the driver as a hw/sw initialization routine to get to a
1275  *  consistent state.
1276  *
1277  *  return 0 on success, positive on failure
1278  **********************************************************************/
1279 
1280 static void
1281 em_init_locked(struct adapter *adapter)
1282 {
1283 	if_t ifp = adapter->ifp;
1284 	device_t	dev = adapter->dev;
1285 
1286 	INIT_DEBUGOUT("em_init: begin");
1287 
1288 	EM_CORE_LOCK_ASSERT(adapter);
1289 
1290 	em_disable_intr(adapter);
1291 	callout_stop(&adapter->timer);
1292 
1293 	/* Get the latest mac address, User can use a LAA */
1294         bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
1295               ETHER_ADDR_LEN);
1296 
1297 	/* Put the address into the Receive Address Array */
1298 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1299 
1300 	/*
1301 	 * With the 82571 adapter, RAR[0] may be overwritten
1302 	 * when the other port is reset, we make a duplicate
1303 	 * in RAR[14] for that eventuality, this assures
1304 	 * the interface continues to function.
1305 	 */
1306 	if (adapter->hw.mac.type == e1000_82571) {
1307 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1308 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1309 		    E1000_RAR_ENTRIES - 1);
1310 	}
1311 
1312 	/* Initialize the hardware */
1313 	em_reset(adapter);
1314 	em_update_link_status(adapter);
1315 
1316 	/* Setup VLAN support, basic and offload if available */
1317 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1318 
1319 	/* Set hardware offload abilities */
1320 	if_clearhwassist(ifp);
1321 	if (if_getcapenable(ifp) & IFCAP_TXCSUM)
1322 		if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
1323 	if (if_getcapenable(ifp) & IFCAP_TSO4)
1324 		if_sethwassistbits(ifp, CSUM_TSO, 0);
1325 
1326 	/* Configure for OS presence */
1327 	em_init_manageability(adapter);
1328 
1329 	/* Prepare transmit descriptors and buffers */
1330 	em_setup_transmit_structures(adapter);
1331 	em_initialize_transmit_unit(adapter);
1332 
1333 	/* Setup Multicast table */
1334 	em_set_multi(adapter);
1335 
1336 	/*
1337 	** Figure out the desired mbuf
1338 	** pool for doing jumbos
1339 	*/
1340 	if (adapter->hw.mac.max_frame_size <= 2048)
1341 		adapter->rx_mbuf_sz = MCLBYTES;
1342 	else if (adapter->hw.mac.max_frame_size <= 4096)
1343 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1344 	else
1345 		adapter->rx_mbuf_sz = MJUM9BYTES;
1346 
1347 	/* Prepare receive descriptors and buffers */
1348 	if (em_setup_receive_structures(adapter)) {
1349 		device_printf(dev, "Could not setup receive structures\n");
1350 		em_stop(adapter);
1351 		return;
1352 	}
1353 	em_initialize_receive_unit(adapter);
1354 
1355 	/* Use real VLAN Filter support? */
1356 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
1357 		if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1358 			/* Use real VLAN Filter support */
1359 			em_setup_vlan_hw_support(adapter);
1360 		else {
1361 			u32 ctrl;
1362 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1363 			ctrl |= E1000_CTRL_VME;
1364 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1365 		}
1366 	}
1367 
1368 	/* Don't lose promiscuous settings */
1369 	em_set_promisc(adapter);
1370 
1371 	/* Set the interface as ACTIVE */
1372 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
1373 
1374 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1375 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1376 
1377 	/* MSI/X configuration for 82574 */
1378 	if (adapter->hw.mac.type == e1000_82574) {
1379 		int tmp;
1380 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1381 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1382 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1383 		/* Set the IVAR - interrupt vector routing. */
1384 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1385 	}
1386 
1387 #ifdef DEVICE_POLLING
1388 	/*
1389 	 * Only enable interrupts if we are not polling, make sure
1390 	 * they are off otherwise.
1391 	 */
1392 	if (if_getcapenable(ifp) & IFCAP_POLLING)
1393 		em_disable_intr(adapter);
1394 	else
1395 #endif /* DEVICE_POLLING */
1396 		em_enable_intr(adapter);
1397 
1398 	/* AMT based hardware can now take control from firmware */
1399 	if (adapter->has_manage && adapter->has_amt)
1400 		em_get_hw_control(adapter);
1401 }
1402 
1403 static void
1404 em_init(void *arg)
1405 {
1406 	struct adapter *adapter = arg;
1407 
1408 	EM_CORE_LOCK(adapter);
1409 	em_init_locked(adapter);
1410 	EM_CORE_UNLOCK(adapter);
1411 }
1412 
1413 
1414 #ifdef DEVICE_POLLING
1415 /*********************************************************************
1416  *
1417  *  Legacy polling routine: note this only works with single queue
1418  *
1419  *********************************************************************/
1420 static int
1421 em_poll(if_t ifp, enum poll_cmd cmd, int count)
1422 {
1423 	struct adapter *adapter = if_getsoftc(ifp);
1424 	struct tx_ring	*txr = adapter->tx_rings;
1425 	struct rx_ring	*rxr = adapter->rx_rings;
1426 	u32		reg_icr;
1427 	int		rx_done;
1428 
1429 	EM_CORE_LOCK(adapter);
1430 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
1431 		EM_CORE_UNLOCK(adapter);
1432 		return (0);
1433 	}
1434 
1435 	if (cmd == POLL_AND_CHECK_STATUS) {
1436 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1437 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1438 			callout_stop(&adapter->timer);
1439 			adapter->hw.mac.get_link_status = 1;
1440 			em_update_link_status(adapter);
1441 			callout_reset(&adapter->timer, hz,
1442 			    em_local_timer, adapter);
1443 		}
1444 	}
1445 	EM_CORE_UNLOCK(adapter);
1446 
1447 	em_rxeof(rxr, count, &rx_done);
1448 
1449 	EM_TX_LOCK(txr);
1450 	em_txeof(txr);
1451 #ifdef EM_MULTIQUEUE
1452 	if (!drbr_empty(ifp, txr->br))
1453 		em_mq_start_locked(ifp, txr, NULL);
1454 #else
1455 	if (!if_sendq_empty(ifp))
1456 		em_start_locked(ifp, txr);
1457 #endif
1458 	EM_TX_UNLOCK(txr);
1459 
1460 	return (rx_done);
1461 }
1462 #endif /* DEVICE_POLLING */
1463 
1464 
1465 /*********************************************************************
1466  *
1467  *  Fast Legacy/MSI Combined Interrupt Service routine
1468  *
1469  *********************************************************************/
1470 static int
1471 em_irq_fast(void *arg)
1472 {
1473 	struct adapter	*adapter = arg;
1474 	if_t ifp;
1475 	u32		reg_icr;
1476 
1477 	ifp = adapter->ifp;
1478 
1479 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1480 
1481 	/* Hot eject?  */
1482 	if (reg_icr == 0xffffffff)
1483 		return FILTER_STRAY;
1484 
1485 	/* Definitely not our interrupt.  */
1486 	if (reg_icr == 0x0)
1487 		return FILTER_STRAY;
1488 
1489 	/*
1490 	 * Starting with the 82571 chip, bit 31 should be used to
1491 	 * determine whether the interrupt belongs to us.
1492 	 */
1493 	if (adapter->hw.mac.type >= e1000_82571 &&
1494 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1495 		return FILTER_STRAY;
1496 
1497 	em_disable_intr(adapter);
1498 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1499 
1500 	/* Link status change */
1501 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1502 		adapter->hw.mac.get_link_status = 1;
1503 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1504 	}
1505 
1506 	if (reg_icr & E1000_ICR_RXO)
1507 		adapter->rx_overruns++;
1508 	return FILTER_HANDLED;
1509 }
1510 
1511 /* Combined RX/TX handler, used by Legacy and MSI */
1512 static void
1513 em_handle_que(void *context, int pending)
1514 {
1515 	struct adapter	*adapter = context;
1516 	if_t ifp = adapter->ifp;
1517 	struct tx_ring	*txr = adapter->tx_rings;
1518 	struct rx_ring	*rxr = adapter->rx_rings;
1519 
1520 
1521 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1522 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1523 		EM_TX_LOCK(txr);
1524 		em_txeof(txr);
1525 #ifdef EM_MULTIQUEUE
1526 		if (!drbr_empty(ifp, txr->br))
1527 			em_mq_start_locked(ifp, txr, NULL);
1528 #else
1529 		if (!if_sendq_empty(ifp))
1530 			em_start_locked(ifp, txr);
1531 #endif
1532 		EM_TX_UNLOCK(txr);
1533 		if (more) {
1534 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1535 			return;
1536 		}
1537 	}
1538 
1539 	em_enable_intr(adapter);
1540 	return;
1541 }
1542 
1543 
1544 /*********************************************************************
1545  *
1546  *  MSIX Interrupt Service Routines
1547  *
1548  **********************************************************************/
1549 static void
1550 em_msix_tx(void *arg)
1551 {
1552 	struct tx_ring *txr = arg;
1553 	struct adapter *adapter = txr->adapter;
1554 	if_t ifp = adapter->ifp;
1555 
1556 	++txr->tx_irq;
1557 	EM_TX_LOCK(txr);
1558 	em_txeof(txr);
1559 #ifdef EM_MULTIQUEUE
1560 	if (!drbr_empty(ifp, txr->br))
1561 		em_mq_start_locked(ifp, txr, NULL);
1562 #else
1563 	if (!if_sendq_empty(ifp))
1564 		em_start_locked(ifp, txr);
1565 #endif
1566 	/* Reenable this interrupt */
1567 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1568 	EM_TX_UNLOCK(txr);
1569 	return;
1570 }
1571 
1572 /*********************************************************************
1573  *
1574  *  MSIX RX Interrupt Service routine
1575  *
1576  **********************************************************************/
1577 
1578 static void
1579 em_msix_rx(void *arg)
1580 {
1581 	struct rx_ring	*rxr = arg;
1582 	struct adapter	*adapter = rxr->adapter;
1583 	bool		more;
1584 
1585 	++rxr->rx_irq;
1586 	if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING))
1587 		return;
1588 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1589 	if (more)
1590 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1591 	else
1592 		/* Reenable this interrupt */
1593 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1594 	return;
1595 }
1596 
1597 /*********************************************************************
1598  *
1599  *  MSIX Link Fast Interrupt Service routine
1600  *
1601  **********************************************************************/
1602 static void
1603 em_msix_link(void *arg)
1604 {
1605 	struct adapter	*adapter = arg;
1606 	u32		reg_icr;
1607 
1608 	++adapter->link_irq;
1609 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1610 
1611 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1612 		adapter->hw.mac.get_link_status = 1;
1613 		em_handle_link(adapter, 0);
1614 	} else
1615 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1616 		    EM_MSIX_LINK | E1000_IMS_LSC);
1617 	return;
1618 }
1619 
1620 static void
1621 em_handle_rx(void *context, int pending)
1622 {
1623 	struct rx_ring	*rxr = context;
1624 	struct adapter	*adapter = rxr->adapter;
1625         bool            more;
1626 
1627 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1628 	if (more)
1629 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1630 	else
1631 		/* Reenable this interrupt */
1632 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1633 }
1634 
1635 static void
1636 em_handle_tx(void *context, int pending)
1637 {
1638 	struct tx_ring	*txr = context;
1639 	struct adapter	*adapter = txr->adapter;
1640 	if_t ifp = adapter->ifp;
1641 
1642 	EM_TX_LOCK(txr);
1643 	em_txeof(txr);
1644 #ifdef EM_MULTIQUEUE
1645 	if (!drbr_empty(ifp, txr->br))
1646 		em_mq_start_locked(ifp, txr, NULL);
1647 #else
1648 	if (!if_sendq_empty(ifp))
1649 		em_start_locked(ifp, txr);
1650 #endif
1651 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1652 	EM_TX_UNLOCK(txr);
1653 }
1654 
1655 static void
1656 em_handle_link(void *context, int pending)
1657 {
1658 	struct adapter	*adapter = context;
1659 	struct tx_ring	*txr = adapter->tx_rings;
1660 	if_t ifp = adapter->ifp;
1661 
1662 	if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1663 		return;
1664 
1665 	EM_CORE_LOCK(adapter);
1666 	callout_stop(&adapter->timer);
1667 	em_update_link_status(adapter);
1668 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1669 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1670 	    EM_MSIX_LINK | E1000_IMS_LSC);
1671 	if (adapter->link_active) {
1672 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1673 			EM_TX_LOCK(txr);
1674 #ifdef EM_MULTIQUEUE
1675 			if (!drbr_empty(ifp, txr->br))
1676 				em_mq_start_locked(ifp, txr, NULL);
1677 #else
1678 			if (if_sendq_empty(ifp))
1679 				em_start_locked(ifp, txr);
1680 #endif
1681 			EM_TX_UNLOCK(txr);
1682 		}
1683 	}
1684 	EM_CORE_UNLOCK(adapter);
1685 }
1686 
1687 
1688 /*********************************************************************
1689  *
1690  *  Media Ioctl callback
1691  *
1692  *  This routine is called whenever the user queries the status of
1693  *  the interface using ifconfig.
1694  *
1695  **********************************************************************/
1696 static void
1697 em_media_status(if_t ifp, struct ifmediareq *ifmr)
1698 {
1699 	struct adapter *adapter = if_getsoftc(ifp);
1700 	u_char fiber_type = IFM_1000_SX;
1701 
1702 	INIT_DEBUGOUT("em_media_status: begin");
1703 
1704 	EM_CORE_LOCK(adapter);
1705 	em_update_link_status(adapter);
1706 
1707 	ifmr->ifm_status = IFM_AVALID;
1708 	ifmr->ifm_active = IFM_ETHER;
1709 
1710 	if (!adapter->link_active) {
1711 		EM_CORE_UNLOCK(adapter);
1712 		return;
1713 	}
1714 
1715 	ifmr->ifm_status |= IFM_ACTIVE;
1716 
1717 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1718 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1719 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1720 	} else {
1721 		switch (adapter->link_speed) {
1722 		case 10:
1723 			ifmr->ifm_active |= IFM_10_T;
1724 			break;
1725 		case 100:
1726 			ifmr->ifm_active |= IFM_100_TX;
1727 			break;
1728 		case 1000:
1729 			ifmr->ifm_active |= IFM_1000_T;
1730 			break;
1731 		}
1732 		if (adapter->link_duplex == FULL_DUPLEX)
1733 			ifmr->ifm_active |= IFM_FDX;
1734 		else
1735 			ifmr->ifm_active |= IFM_HDX;
1736 	}
1737 	EM_CORE_UNLOCK(adapter);
1738 }
1739 
1740 /*********************************************************************
1741  *
1742  *  Media Ioctl callback
1743  *
1744  *  This routine is called when the user changes speed/duplex using
1745  *  media/mediopt option with ifconfig.
1746  *
1747  **********************************************************************/
1748 static int
1749 em_media_change(if_t ifp)
1750 {
1751 	struct adapter *adapter = if_getsoftc(ifp);
1752 	struct ifmedia  *ifm = &adapter->media;
1753 
1754 	INIT_DEBUGOUT("em_media_change: begin");
1755 
1756 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1757 		return (EINVAL);
1758 
1759 	EM_CORE_LOCK(adapter);
1760 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1761 	case IFM_AUTO:
1762 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1763 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1764 		break;
1765 	case IFM_1000_LX:
1766 	case IFM_1000_SX:
1767 	case IFM_1000_T:
1768 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1769 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1770 		break;
1771 	case IFM_100_TX:
1772 		adapter->hw.mac.autoneg = FALSE;
1773 		adapter->hw.phy.autoneg_advertised = 0;
1774 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1775 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1776 		else
1777 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1778 		break;
1779 	case IFM_10_T:
1780 		adapter->hw.mac.autoneg = FALSE;
1781 		adapter->hw.phy.autoneg_advertised = 0;
1782 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1783 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1784 		else
1785 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1786 		break;
1787 	default:
1788 		device_printf(adapter->dev, "Unsupported media type\n");
1789 	}
1790 
1791 	em_init_locked(adapter);
1792 	EM_CORE_UNLOCK(adapter);
1793 
1794 	return (0);
1795 }
1796 
1797 /*********************************************************************
1798  *
1799  *  This routine maps the mbufs to tx descriptors.
1800  *
1801  *  return 0 on success, positive on failure
1802  **********************************************************************/
1803 
1804 static int
1805 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1806 {
1807 	struct adapter		*adapter = txr->adapter;
1808 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1809 	bus_dmamap_t		map;
1810 	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1811 	struct e1000_tx_desc	*ctxd = NULL;
1812 	struct mbuf		*m_head;
1813 	struct ether_header	*eh;
1814 	struct ip		*ip = NULL;
1815 	struct tcphdr		*tp = NULL;
1816 	u32			txd_upper, txd_lower, txd_used, txd_saved;
1817 	int			ip_off, poff;
1818 	int			nsegs, i, j, first, last = 0;
1819 	int			error, do_tso, tso_desc = 0, remap = 1;
1820 
1821 	m_head = *m_headp;
1822 	txd_upper = txd_lower = txd_used = txd_saved = 0;
1823 	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1824 	ip_off = poff = 0;
1825 
1826 	/*
1827 	 * Intel recommends entire IP/TCP header length reside in a single
1828 	 * buffer. If multiple descriptors are used to describe the IP and
1829 	 * TCP header, each descriptor should describe one or more
1830 	 * complete headers; descriptors referencing only parts of headers
1831 	 * are not supported. If all layer headers are not coalesced into
1832 	 * a single buffer, each buffer should not cross a 4KB boundary,
1833 	 * or be larger than the maximum read request size.
1834 	 * Controller also requires modifing IP/TCP header to make TSO work
1835 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1836 	 * IP/TCP header into a single buffer to meet the requirement of
1837 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1838 	 * which also has similiar restrictions.
1839 	 */
1840 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1841 		if (do_tso || (m_head->m_next != NULL &&
1842 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1843 			if (M_WRITABLE(*m_headp) == 0) {
1844 				m_head = m_dup(*m_headp, M_NOWAIT);
1845 				m_freem(*m_headp);
1846 				if (m_head == NULL) {
1847 					*m_headp = NULL;
1848 					return (ENOBUFS);
1849 				}
1850 				*m_headp = m_head;
1851 			}
1852 		}
1853 		/*
1854 		 * XXX
1855 		 * Assume IPv4, we don't have TSO/checksum offload support
1856 		 * for IPv6 yet.
1857 		 */
1858 		ip_off = sizeof(struct ether_header);
1859 		m_head = m_pullup(m_head, ip_off);
1860 		if (m_head == NULL) {
1861 			*m_headp = NULL;
1862 			return (ENOBUFS);
1863 		}
1864 		eh = mtod(m_head, struct ether_header *);
1865 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1866 			ip_off = sizeof(struct ether_vlan_header);
1867 			m_head = m_pullup(m_head, ip_off);
1868 			if (m_head == NULL) {
1869 				*m_headp = NULL;
1870 				return (ENOBUFS);
1871 			}
1872 		}
1873 		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1874 		if (m_head == NULL) {
1875 			*m_headp = NULL;
1876 			return (ENOBUFS);
1877 		}
1878 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1879 		poff = ip_off + (ip->ip_hl << 2);
1880 		if (do_tso) {
1881 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1882 			if (m_head == NULL) {
1883 				*m_headp = NULL;
1884 				return (ENOBUFS);
1885 			}
1886 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1887 			/*
1888 			 * TSO workaround:
1889 			 *   pull 4 more bytes of data into it.
1890 			 */
1891 			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1892 			if (m_head == NULL) {
1893 				*m_headp = NULL;
1894 				return (ENOBUFS);
1895 			}
1896 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1897 			ip->ip_len = 0;
1898 			ip->ip_sum = 0;
1899 			/*
1900 			 * The pseudo TCP checksum does not include TCP payload
1901 			 * length so driver should recompute the checksum here
1902 			 * what hardware expect to see. This is adherence of
1903 			 * Microsoft's Large Send specification.
1904 			 */
1905 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1906 			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1907 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1908 		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1909 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1910 			if (m_head == NULL) {
1911 				*m_headp = NULL;
1912 				return (ENOBUFS);
1913 			}
1914 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1915 			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1916 			if (m_head == NULL) {
1917 				*m_headp = NULL;
1918 				return (ENOBUFS);
1919 			}
1920 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1921 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1922 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1923 			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1924 			if (m_head == NULL) {
1925 				*m_headp = NULL;
1926 				return (ENOBUFS);
1927 			}
1928 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1929 		}
1930 		*m_headp = m_head;
1931 	}
1932 
1933 	/*
1934 	 * Map the packet for DMA
1935 	 *
1936 	 * Capture the first descriptor index,
1937 	 * this descriptor will have the index
1938 	 * of the EOP which is the only one that
1939 	 * now gets a DONE bit writeback.
1940 	 */
1941 	first = txr->next_avail_desc;
1942 	tx_buffer = &txr->tx_buffers[first];
1943 	tx_buffer_mapped = tx_buffer;
1944 	map = tx_buffer->map;
1945 
1946 retry:
1947 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1948 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1949 
1950 	/*
1951 	 * There are two types of errors we can (try) to handle:
1952 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1953 	 *   out of segments.  Defragment the mbuf chain and try again.
1954 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1955 	 *   at this point in time.  Defer sending and try again later.
1956 	 * All other errors, in particular EINVAL, are fatal and prevent the
1957 	 * mbuf chain from ever going through.  Drop it and report error.
1958 	 */
1959 	if (error == EFBIG && remap) {
1960 		struct mbuf *m;
1961 
1962 		m = m_defrag(*m_headp, M_NOWAIT);
1963 		if (m == NULL) {
1964 			adapter->mbuf_alloc_failed++;
1965 			m_freem(*m_headp);
1966 			*m_headp = NULL;
1967 			return (ENOBUFS);
1968 		}
1969 		*m_headp = m;
1970 
1971 		/* Try it again, but only once */
1972 		remap = 0;
1973 		goto retry;
1974 	} else if (error == ENOMEM) {
1975 		adapter->no_tx_dma_setup++;
1976 		return (error);
1977 	} else if (error != 0) {
1978 		adapter->no_tx_dma_setup++;
1979 		m_freem(*m_headp);
1980 		*m_headp = NULL;
1981 		return (error);
1982 	}
1983 
1984 	/*
1985 	 * TSO Hardware workaround, if this packet is not
1986 	 * TSO, and is only a single descriptor long, and
1987 	 * it follows a TSO burst, then we need to add a
1988 	 * sentinel descriptor to prevent premature writeback.
1989 	 */
1990 	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1991 		if (nsegs == 1)
1992 			tso_desc = TRUE;
1993 		txr->tx_tso = FALSE;
1994 	}
1995 
1996         if (nsegs > (txr->tx_avail - 2)) {
1997                 txr->no_desc_avail++;
1998 		bus_dmamap_unload(txr->txtag, map);
1999 		return (ENOBUFS);
2000         }
2001 	m_head = *m_headp;
2002 
2003 	/* Do hardware assists */
2004 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2005 		em_tso_setup(txr, m_head, ip_off, ip, tp,
2006 		    &txd_upper, &txd_lower);
2007 		/* we need to make a final sentinel transmit desc */
2008 		tso_desc = TRUE;
2009 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2010 		em_transmit_checksum_setup(txr, m_head,
2011 		    ip_off, ip, &txd_upper, &txd_lower);
2012 
2013 	if (m_head->m_flags & M_VLANTAG) {
2014 		/* Set the vlan id. */
2015 		txd_upper |= htole16(if_getvtag(m_head)) << 16;
2016                 /* Tell hardware to add tag */
2017                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2018         }
2019 
2020 	i = txr->next_avail_desc;
2021 
2022 	/* Set up our transmit descriptors */
2023 	for (j = 0; j < nsegs; j++) {
2024 		bus_size_t seg_len;
2025 		bus_addr_t seg_addr;
2026 
2027 		tx_buffer = &txr->tx_buffers[i];
2028 		ctxd = &txr->tx_base[i];
2029 		seg_addr = segs[j].ds_addr;
2030 		seg_len  = segs[j].ds_len;
2031 		/*
2032 		** TSO Workaround:
2033 		** If this is the last descriptor, we want to
2034 		** split it so we have a small final sentinel
2035 		*/
2036 		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2037 			seg_len -= 4;
2038 			ctxd->buffer_addr = htole64(seg_addr);
2039 			ctxd->lower.data = htole32(
2040 			adapter->txd_cmd | txd_lower | seg_len);
2041 			ctxd->upper.data =
2042 			    htole32(txd_upper);
2043 			if (++i == adapter->num_tx_desc)
2044 				i = 0;
2045 			/* Now make the sentinel */
2046 			++txd_used; /* using an extra txd */
2047 			ctxd = &txr->tx_base[i];
2048 			tx_buffer = &txr->tx_buffers[i];
2049 			ctxd->buffer_addr =
2050 			    htole64(seg_addr + seg_len);
2051 			ctxd->lower.data = htole32(
2052 			adapter->txd_cmd | txd_lower | 4);
2053 			ctxd->upper.data =
2054 			    htole32(txd_upper);
2055 			last = i;
2056 			if (++i == adapter->num_tx_desc)
2057 				i = 0;
2058 		} else {
2059 			ctxd->buffer_addr = htole64(seg_addr);
2060 			ctxd->lower.data = htole32(
2061 			adapter->txd_cmd | txd_lower | seg_len);
2062 			ctxd->upper.data =
2063 			    htole32(txd_upper);
2064 			last = i;
2065 			if (++i == adapter->num_tx_desc)
2066 				i = 0;
2067 		}
2068 		tx_buffer->m_head = NULL;
2069 		tx_buffer->next_eop = -1;
2070 	}
2071 
2072 	txr->next_avail_desc = i;
2073 	txr->tx_avail -= nsegs;
2074 	if (tso_desc) /* TSO used an extra for sentinel */
2075 		txr->tx_avail -= txd_used;
2076 
2077         tx_buffer->m_head = m_head;
2078 	/*
2079 	** Here we swap the map so the last descriptor,
2080 	** which gets the completion interrupt has the
2081 	** real map, and the first descriptor gets the
2082 	** unused map from this descriptor.
2083 	*/
2084 	tx_buffer_mapped->map = tx_buffer->map;
2085 	tx_buffer->map = map;
2086         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2087 
2088         /*
2089          * Last Descriptor of Packet
2090 	 * needs End Of Packet (EOP)
2091 	 * and Report Status (RS)
2092          */
2093         ctxd->lower.data |=
2094 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2095 	/*
2096 	 * Keep track in the first buffer which
2097 	 * descriptor will be written back
2098 	 */
2099 	tx_buffer = &txr->tx_buffers[first];
2100 	tx_buffer->next_eop = last;
2101 	/* Update the watchdog time early and often */
2102 	txr->watchdog_time = ticks;
2103 
2104 	/*
2105 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2106 	 * that this frame is available to transmit.
2107 	 */
2108 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2109 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2110 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2111 
2112 	return (0);
2113 }
2114 
2115 static void
2116 em_set_promisc(struct adapter *adapter)
2117 {
2118 	if_t ifp = adapter->ifp;
2119 	u32		reg_rctl;
2120 
2121 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2122 
2123 	if (if_getflags(ifp) & IFF_PROMISC) {
2124 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2125 		/* Turn this on if you want to see bad packets */
2126 		if (em_debug_sbp)
2127 			reg_rctl |= E1000_RCTL_SBP;
2128 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2129 	} else if (if_getflags(ifp) & IFF_ALLMULTI) {
2130 		reg_rctl |= E1000_RCTL_MPE;
2131 		reg_rctl &= ~E1000_RCTL_UPE;
2132 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2133 	}
2134 }
2135 
2136 static void
2137 em_disable_promisc(struct adapter *adapter)
2138 {
2139 	if_t		ifp = adapter->ifp;
2140 	u32		reg_rctl;
2141 	int		mcnt = 0;
2142 
2143 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2144 	reg_rctl &=  (~E1000_RCTL_UPE);
2145 	if (if_getflags(ifp) & IFF_ALLMULTI)
2146 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2147 	else
2148 		mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
2149 	/* Don't disable if in MAX groups */
2150 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2151 		reg_rctl &=  (~E1000_RCTL_MPE);
2152 	reg_rctl &=  (~E1000_RCTL_SBP);
2153 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2154 }
2155 
2156 
2157 /*********************************************************************
2158  *  Multicast Update
2159  *
2160  *  This routine is called whenever multicast address list is updated.
2161  *
2162  **********************************************************************/
2163 
2164 static void
2165 em_set_multi(struct adapter *adapter)
2166 {
2167 	if_t ifp = adapter->ifp;
2168 	u32 reg_rctl = 0;
2169 	u8  *mta; /* Multicast array memory */
2170 	int mcnt = 0;
2171 
2172 	IOCTL_DEBUGOUT("em_set_multi: begin");
2173 
2174 	mta = adapter->mta;
2175 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2176 
2177 	if (adapter->hw.mac.type == e1000_82542 &&
2178 	    adapter->hw.revision_id == E1000_REVISION_2) {
2179 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2180 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2181 			e1000_pci_clear_mwi(&adapter->hw);
2182 		reg_rctl |= E1000_RCTL_RST;
2183 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2184 		msec_delay(5);
2185 	}
2186 
2187 	if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
2188 
2189 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2190 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2191 		reg_rctl |= E1000_RCTL_MPE;
2192 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2193 	} else
2194 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2195 
2196 	if (adapter->hw.mac.type == e1000_82542 &&
2197 	    adapter->hw.revision_id == E1000_REVISION_2) {
2198 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2199 		reg_rctl &= ~E1000_RCTL_RST;
2200 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2201 		msec_delay(5);
2202 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2203 			e1000_pci_set_mwi(&adapter->hw);
2204 	}
2205 }
2206 
2207 
2208 /*********************************************************************
2209  *  Timer routine
2210  *
2211  *  This routine checks for link status and updates statistics.
2212  *
2213  **********************************************************************/
2214 
2215 static void
2216 em_local_timer(void *arg)
2217 {
2218 	struct adapter	*adapter = arg;
2219 	if_t ifp = adapter->ifp;
2220 	struct tx_ring	*txr = adapter->tx_rings;
2221 	struct rx_ring	*rxr = adapter->rx_rings;
2222 	u32		trigger;
2223 
2224 	EM_CORE_LOCK_ASSERT(adapter);
2225 
2226 	em_update_link_status(adapter);
2227 	em_update_stats_counters(adapter);
2228 
2229 	/* Reset LAA into RAR[0] on 82571 */
2230 	if ((adapter->hw.mac.type == e1000_82571) &&
2231 	    e1000_get_laa_state_82571(&adapter->hw))
2232 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2233 
2234 	/* Mask to use in the irq trigger */
2235 	if (adapter->msix_mem)
2236 		trigger = rxr->ims;
2237 	else
2238 		trigger = E1000_ICS_RXDMT0;
2239 
2240 	/*
2241 	** Check on the state of the TX queue(s), this
2242 	** can be done without the lock because its RO
2243 	** and the HUNG state will be static if set.
2244 	*/
2245 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2246 		if ((txr->queue_status == EM_QUEUE_HUNG) &&
2247 		    (adapter->pause_frames == 0))
2248 			goto hung;
2249 		/* Schedule a TX tasklet if needed */
2250 		if (txr->tx_avail <= EM_MAX_SCATTER)
2251 			taskqueue_enqueue(txr->tq, &txr->tx_task);
2252 	}
2253 
2254 	adapter->pause_frames = 0;
2255 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2256 #ifndef DEVICE_POLLING
2257 	/* Trigger an RX interrupt to guarantee mbuf refresh */
2258 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2259 #endif
2260 	return;
2261 hung:
2262 	/* Looks like we're hung */
2263 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2264 	device_printf(adapter->dev,
2265 	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2266 	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2267 	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2268 	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2269 	    "Next TX to Clean = %d\n",
2270 	    txr->me, txr->tx_avail, txr->next_to_clean);
2271 	if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
2272 	adapter->watchdog_events++;
2273 	adapter->pause_frames = 0;
2274 	em_init_locked(adapter);
2275 }
2276 
2277 
2278 static void
2279 em_update_link_status(struct adapter *adapter)
2280 {
2281 	struct e1000_hw *hw = &adapter->hw;
2282 	if_t ifp = adapter->ifp;
2283 	device_t dev = adapter->dev;
2284 	struct tx_ring *txr = adapter->tx_rings;
2285 	u32 link_check = 0;
2286 
2287 	/* Get the cached link value or read phy for real */
2288 	switch (hw->phy.media_type) {
2289 	case e1000_media_type_copper:
2290 		if (hw->mac.get_link_status) {
2291 			/* Do the work to read phy */
2292 			e1000_check_for_link(hw);
2293 			link_check = !hw->mac.get_link_status;
2294 			if (link_check) /* ESB2 fix */
2295 				e1000_cfg_on_link_up(hw);
2296 		} else
2297 			link_check = TRUE;
2298 		break;
2299 	case e1000_media_type_fiber:
2300 		e1000_check_for_link(hw);
2301 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2302                                  E1000_STATUS_LU);
2303 		break;
2304 	case e1000_media_type_internal_serdes:
2305 		e1000_check_for_link(hw);
2306 		link_check = adapter->hw.mac.serdes_has_link;
2307 		break;
2308 	default:
2309 	case e1000_media_type_unknown:
2310 		break;
2311 	}
2312 
2313 	/* Now check for a transition */
2314 	if (link_check && (adapter->link_active == 0)) {
2315 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2316 		    &adapter->link_duplex);
2317 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2318 		if ((adapter->link_speed != SPEED_1000) &&
2319 		    ((hw->mac.type == e1000_82571) ||
2320 		    (hw->mac.type == e1000_82572))) {
2321 			int tarc0;
2322 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2323 			tarc0 &= ~SPEED_MODE_BIT;
2324 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2325 		}
2326 		if (bootverbose)
2327 			device_printf(dev, "Link is up %d Mbps %s\n",
2328 			    adapter->link_speed,
2329 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2330 			    "Full Duplex" : "Half Duplex"));
2331 		adapter->link_active = 1;
2332 		adapter->smartspeed = 0;
2333 		if_setbaudrate(ifp, adapter->link_speed * 1000000);
2334 		if_linkstate_change_drv(ifp, LINK_STATE_UP);
2335 	} else if (!link_check && (adapter->link_active == 1)) {
2336 		if_setbaudrate(ifp, 0);
2337 		adapter->link_speed = 0;
2338 		adapter->link_duplex = 0;
2339 		if (bootverbose)
2340 			device_printf(dev, "Link is Down\n");
2341 		adapter->link_active = 0;
2342 		/* Link down, disable watchdog */
2343 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2344 			txr->queue_status = EM_QUEUE_IDLE;
2345 		if_linkstate_change_drv(ifp, LINK_STATE_DOWN);
2346 	}
2347 }
2348 
2349 /*********************************************************************
2350  *
2351  *  This routine disables all traffic on the adapter by issuing a
2352  *  global reset on the MAC and deallocates TX/RX buffers.
2353  *
2354  *  This routine should always be called with BOTH the CORE
2355  *  and TX locks.
2356  **********************************************************************/
2357 
2358 static void
2359 em_stop(void *arg)
2360 {
2361 	struct adapter	*adapter = arg;
2362 	if_t ifp = adapter->ifp;
2363 	struct tx_ring	*txr = adapter->tx_rings;
2364 
2365 	EM_CORE_LOCK_ASSERT(adapter);
2366 
2367 	INIT_DEBUGOUT("em_stop: begin");
2368 
2369 	em_disable_intr(adapter);
2370 	callout_stop(&adapter->timer);
2371 
2372 	/* Tell the stack that the interface is no longer active */
2373 	if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2374 
2375         /* Unarm watchdog timer. */
2376 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2377 		EM_TX_LOCK(txr);
2378 		txr->queue_status = EM_QUEUE_IDLE;
2379 		EM_TX_UNLOCK(txr);
2380 	}
2381 
2382 	e1000_reset_hw(&adapter->hw);
2383 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2384 
2385 	e1000_led_off(&adapter->hw);
2386 	e1000_cleanup_led(&adapter->hw);
2387 }
2388 
2389 
2390 /*********************************************************************
2391  *
2392  *  Determine hardware revision.
2393  *
2394  **********************************************************************/
2395 static void
2396 em_identify_hardware(struct adapter *adapter)
2397 {
2398 	device_t dev = adapter->dev;
2399 
2400 	/* Make sure our PCI config space has the necessary stuff set */
2401 	pci_enable_busmaster(dev);
2402 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2403 
2404 	/* Save off the information about this board */
2405 	adapter->hw.vendor_id = pci_get_vendor(dev);
2406 	adapter->hw.device_id = pci_get_device(dev);
2407 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2408 	adapter->hw.subsystem_vendor_id =
2409 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2410 	adapter->hw.subsystem_device_id =
2411 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2412 
2413 	/* Do Shared Code Init and Setup */
2414 	if (e1000_set_mac_type(&adapter->hw)) {
2415 		device_printf(dev, "Setup init failure\n");
2416 		return;
2417 	}
2418 }
2419 
2420 static int
2421 em_allocate_pci_resources(struct adapter *adapter)
2422 {
2423 	device_t	dev = adapter->dev;
2424 	int		rid;
2425 
2426 	rid = PCIR_BAR(0);
2427 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2428 	    &rid, RF_ACTIVE);
2429 	if (adapter->memory == NULL) {
2430 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2431 		return (ENXIO);
2432 	}
2433 	adapter->osdep.mem_bus_space_tag =
2434 	    rman_get_bustag(adapter->memory);
2435 	adapter->osdep.mem_bus_space_handle =
2436 	    rman_get_bushandle(adapter->memory);
2437 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2438 
2439 	/* Default to a single queue */
2440 	adapter->num_queues = 1;
2441 
2442 	/*
2443 	 * Setup MSI/X or MSI if PCI Express
2444 	 */
2445 	adapter->msix = em_setup_msix(adapter);
2446 
2447 	adapter->hw.back = &adapter->osdep;
2448 
2449 	return (0);
2450 }
2451 
2452 /*********************************************************************
2453  *
2454  *  Setup the Legacy or MSI Interrupt handler
2455  *
2456  **********************************************************************/
2457 int
2458 em_allocate_legacy(struct adapter *adapter)
2459 {
2460 	device_t dev = adapter->dev;
2461 	struct tx_ring	*txr = adapter->tx_rings;
2462 	int error, rid = 0;
2463 
2464 	/* Manually turn off all interrupts */
2465 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2466 
2467 	if (adapter->msix == 1) /* using MSI */
2468 		rid = 1;
2469 	/* We allocate a single interrupt resource */
2470 	adapter->res = bus_alloc_resource_any(dev,
2471 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2472 	if (adapter->res == NULL) {
2473 		device_printf(dev, "Unable to allocate bus resource: "
2474 		    "interrupt\n");
2475 		return (ENXIO);
2476 	}
2477 
2478 	/*
2479 	 * Allocate a fast interrupt and the associated
2480 	 * deferred processing contexts.
2481 	 */
2482 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2483 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2484 	    taskqueue_thread_enqueue, &adapter->tq);
2485 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2486 	    device_get_nameunit(adapter->dev));
2487 	/* Use a TX only tasklet for local timer */
2488 	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2489 	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2490 	    taskqueue_thread_enqueue, &txr->tq);
2491 	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2492 	    device_get_nameunit(adapter->dev));
2493 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2494 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2495 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2496 		device_printf(dev, "Failed to register fast interrupt "
2497 			    "handler: %d\n", error);
2498 		taskqueue_free(adapter->tq);
2499 		adapter->tq = NULL;
2500 		return (error);
2501 	}
2502 
2503 	return (0);
2504 }
2505 
2506 /*********************************************************************
2507  *
2508  *  Setup the MSIX Interrupt handlers
2509  *   This is not really Multiqueue, rather
2510  *   its just seperate interrupt vectors
2511  *   for TX, RX, and Link.
2512  *
2513  **********************************************************************/
2514 int
2515 em_allocate_msix(struct adapter *adapter)
2516 {
2517 	device_t	dev = adapter->dev;
2518 	struct		tx_ring *txr = adapter->tx_rings;
2519 	struct		rx_ring *rxr = adapter->rx_rings;
2520 	int		error, rid, vector = 0;
2521 
2522 
2523 	/* Make sure all interrupts are disabled */
2524 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2525 
2526 	/* First set up ring resources */
2527 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2528 
2529 		/* RX ring */
2530 		rid = vector + 1;
2531 
2532 		rxr->res = bus_alloc_resource_any(dev,
2533 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2534 		if (rxr->res == NULL) {
2535 			device_printf(dev,
2536 			    "Unable to allocate bus resource: "
2537 			    "RX MSIX Interrupt %d\n", i);
2538 			return (ENXIO);
2539 		}
2540 		if ((error = bus_setup_intr(dev, rxr->res,
2541 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2542 		    rxr, &rxr->tag)) != 0) {
2543 			device_printf(dev, "Failed to register RX handler");
2544 			return (error);
2545 		}
2546 #if __FreeBSD_version >= 800504
2547 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2548 #endif
2549 		rxr->msix = vector++; /* NOTE increment vector for TX */
2550 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2551 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2552 		    taskqueue_thread_enqueue, &rxr->tq);
2553 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2554 		    device_get_nameunit(adapter->dev));
2555 		/*
2556 		** Set the bit to enable interrupt
2557 		** in E1000_IMS -- bits 20 and 21
2558 		** are for RX0 and RX1, note this has
2559 		** NOTHING to do with the MSIX vector
2560 		*/
2561 		rxr->ims = 1 << (20 + i);
2562 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2563 
2564 		/* TX ring */
2565 		rid = vector + 1;
2566 		txr->res = bus_alloc_resource_any(dev,
2567 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2568 		if (txr->res == NULL) {
2569 			device_printf(dev,
2570 			    "Unable to allocate bus resource: "
2571 			    "TX MSIX Interrupt %d\n", i);
2572 			return (ENXIO);
2573 		}
2574 		if ((error = bus_setup_intr(dev, txr->res,
2575 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2576 		    txr, &txr->tag)) != 0) {
2577 			device_printf(dev, "Failed to register TX handler");
2578 			return (error);
2579 		}
2580 #if __FreeBSD_version >= 800504
2581 		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2582 #endif
2583 		txr->msix = vector++; /* Increment vector for next pass */
2584 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2585 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2586 		    taskqueue_thread_enqueue, &txr->tq);
2587 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2588 		    device_get_nameunit(adapter->dev));
2589 		/*
2590 		** Set the bit to enable interrupt
2591 		** in E1000_IMS -- bits 22 and 23
2592 		** are for TX0 and TX1, note this has
2593 		** NOTHING to do with the MSIX vector
2594 		*/
2595 		txr->ims = 1 << (22 + i);
2596 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2597 	}
2598 
2599 	/* Link interrupt */
2600 	++rid;
2601 	adapter->res = bus_alloc_resource_any(dev,
2602 	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2603 	if (!adapter->res) {
2604 		device_printf(dev,"Unable to allocate "
2605 		    "bus resource: Link interrupt [%d]\n", rid);
2606 		return (ENXIO);
2607         }
2608 	/* Set the link handler function */
2609 	error = bus_setup_intr(dev, adapter->res,
2610 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2611 	    em_msix_link, adapter, &adapter->tag);
2612 	if (error) {
2613 		adapter->res = NULL;
2614 		device_printf(dev, "Failed to register LINK handler");
2615 		return (error);
2616 	}
2617 #if __FreeBSD_version >= 800504
2618 		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2619 #endif
2620 	adapter->linkvec = vector;
2621 	adapter->ivars |=  (8 | vector) << 16;
2622 	adapter->ivars |= 0x80000000;
2623 
2624 	return (0);
2625 }
2626 
2627 
2628 static void
2629 em_free_pci_resources(struct adapter *adapter)
2630 {
2631 	device_t	dev = adapter->dev;
2632 	struct tx_ring	*txr;
2633 	struct rx_ring	*rxr;
2634 	int		rid;
2635 
2636 
2637 	/*
2638 	** Release all the queue interrupt resources:
2639 	*/
2640 	for (int i = 0; i < adapter->num_queues; i++) {
2641 		txr = &adapter->tx_rings[i];
2642 		rxr = &adapter->rx_rings[i];
2643 		/* an early abort? */
2644 		if ((txr == NULL) || (rxr == NULL))
2645 			break;
2646 		rid = txr->msix +1;
2647 		if (txr->tag != NULL) {
2648 			bus_teardown_intr(dev, txr->res, txr->tag);
2649 			txr->tag = NULL;
2650 		}
2651 		if (txr->res != NULL)
2652 			bus_release_resource(dev, SYS_RES_IRQ,
2653 			    rid, txr->res);
2654 		rid = rxr->msix +1;
2655 		if (rxr->tag != NULL) {
2656 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2657 			rxr->tag = NULL;
2658 		}
2659 		if (rxr->res != NULL)
2660 			bus_release_resource(dev, SYS_RES_IRQ,
2661 			    rid, rxr->res);
2662 	}
2663 
2664         if (adapter->linkvec) /* we are doing MSIX */
2665                 rid = adapter->linkvec + 1;
2666         else
2667                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2668 
2669 	if (adapter->tag != NULL) {
2670 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2671 		adapter->tag = NULL;
2672 	}
2673 
2674 	if (adapter->res != NULL)
2675 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2676 
2677 
2678 	if (adapter->msix)
2679 		pci_release_msi(dev);
2680 
2681 	if (adapter->msix_mem != NULL)
2682 		bus_release_resource(dev, SYS_RES_MEMORY,
2683 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2684 
2685 	if (adapter->memory != NULL)
2686 		bus_release_resource(dev, SYS_RES_MEMORY,
2687 		    PCIR_BAR(0), adapter->memory);
2688 
2689 	if (adapter->flash != NULL)
2690 		bus_release_resource(dev, SYS_RES_MEMORY,
2691 		    EM_FLASH, adapter->flash);
2692 }
2693 
2694 /*
2695  * Setup MSI or MSI/X
2696  */
2697 static int
2698 em_setup_msix(struct adapter *adapter)
2699 {
2700 	device_t dev = adapter->dev;
2701 	int val;
2702 
2703 	/*
2704 	** Setup MSI/X for Hartwell: tests have shown
2705 	** use of two queues to be unstable, and to
2706 	** provide no great gain anyway, so we simply
2707 	** seperate the interrupts and use a single queue.
2708 	*/
2709 	if ((adapter->hw.mac.type == e1000_82574) &&
2710 	    (em_enable_msix == TRUE)) {
2711 		/* Map the MSIX BAR */
2712 		int rid = PCIR_BAR(EM_MSIX_BAR);
2713 		adapter->msix_mem = bus_alloc_resource_any(dev,
2714 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2715        		if (adapter->msix_mem == NULL) {
2716 			/* May not be enabled */
2717                		device_printf(adapter->dev,
2718 			    "Unable to map MSIX table \n");
2719 			goto msi;
2720        		}
2721 		val = pci_msix_count(dev);
2722 		/* We only need/want 3 vectors */
2723 		if (val >= 3)
2724 			val = 3;
2725 		else {
2726                		device_printf(adapter->dev,
2727 			    "MSIX: insufficient vectors, using MSI\n");
2728 			goto msi;
2729 		}
2730 
2731 		if ((pci_alloc_msix(dev, &val) == 0) && (val == 3)) {
2732 			device_printf(adapter->dev,
2733 			    "Using MSIX interrupts "
2734 			    "with %d vectors\n", val);
2735 			return (val);
2736 		}
2737 
2738 		/*
2739 		** If MSIX alloc failed or provided us with
2740 		** less than needed, free and fall through to MSI
2741 		*/
2742 		pci_release_msi(dev);
2743 	}
2744 msi:
2745 	if (adapter->msix_mem != NULL) {
2746 		bus_release_resource(dev, SYS_RES_MEMORY,
2747 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2748 		adapter->msix_mem = NULL;
2749 	}
2750        	val = 1;
2751        	if (pci_alloc_msi(dev, &val) == 0) {
2752                	device_printf(adapter->dev,"Using an MSI interrupt\n");
2753 		return (val);
2754 	}
2755 	/* Should only happen due to manual configuration */
2756 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2757 	return (0);
2758 }
2759 
2760 
2761 /*********************************************************************
2762  *
2763  *  Initialize the hardware to a configuration
2764  *  as specified by the adapter structure.
2765  *
2766  **********************************************************************/
2767 static void
2768 em_reset(struct adapter *adapter)
2769 {
2770 	device_t	dev = adapter->dev;
2771 	if_t ifp = adapter->ifp;
2772 	struct e1000_hw	*hw = &adapter->hw;
2773 	u16		rx_buffer_size;
2774 	u32		pba;
2775 
2776 	INIT_DEBUGOUT("em_reset: begin");
2777 
2778 	/* Set up smart power down as default off on newer adapters. */
2779 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2780 	    hw->mac.type == e1000_82572)) {
2781 		u16 phy_tmp = 0;
2782 
2783 		/* Speed up time to link by disabling smart power down. */
2784 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2785 		phy_tmp &= ~IGP02E1000_PM_SPD;
2786 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2787 	}
2788 
2789 	/*
2790 	 * Packet Buffer Allocation (PBA)
2791 	 * Writing PBA sets the receive portion of the buffer
2792 	 * the remainder is used for the transmit buffer.
2793 	 */
2794 	switch (hw->mac.type) {
2795 	/* Total Packet Buffer on these is 48K */
2796 	case e1000_82571:
2797 	case e1000_82572:
2798 	case e1000_80003es2lan:
2799 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2800 		break;
2801 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2802 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2803 		break;
2804 	case e1000_82574:
2805 	case e1000_82583:
2806 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2807 		break;
2808 	case e1000_ich8lan:
2809 		pba = E1000_PBA_8K;
2810 		break;
2811 	case e1000_ich9lan:
2812 	case e1000_ich10lan:
2813 		/* Boost Receive side for jumbo frames */
2814 		if (adapter->hw.mac.max_frame_size > 4096)
2815 			pba = E1000_PBA_14K;
2816 		else
2817 			pba = E1000_PBA_10K;
2818 		break;
2819 	case e1000_pchlan:
2820 	case e1000_pch2lan:
2821 	case e1000_pch_lpt:
2822 		pba = E1000_PBA_26K;
2823 		break;
2824 	default:
2825 		if (adapter->hw.mac.max_frame_size > 8192)
2826 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2827 		else
2828 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2829 	}
2830 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2831 
2832 	/*
2833 	 * These parameters control the automatic generation (Tx) and
2834 	 * response (Rx) to Ethernet PAUSE frames.
2835 	 * - High water mark should allow for at least two frames to be
2836 	 *   received after sending an XOFF.
2837 	 * - Low water mark works best when it is very near the high water mark.
2838 	 *   This allows the receiver to restart by sending XON when it has
2839 	 *   drained a bit. Here we use an arbitary value of 1500 which will
2840 	 *   restart after one full frame is pulled from the buffer. There
2841 	 *   could be several smaller frames in the buffer and if so they will
2842 	 *   not trigger the XON until their total number reduces the buffer
2843 	 *   by 1500.
2844 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2845 	 */
2846 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2847 	hw->fc.high_water = rx_buffer_size -
2848 	    roundup2(adapter->hw.mac.max_frame_size, 1024);
2849 	hw->fc.low_water = hw->fc.high_water - 1500;
2850 
2851 	if (adapter->fc) /* locally set flow control value? */
2852 		hw->fc.requested_mode = adapter->fc;
2853 	else
2854 		hw->fc.requested_mode = e1000_fc_full;
2855 
2856 	if (hw->mac.type == e1000_80003es2lan)
2857 		hw->fc.pause_time = 0xFFFF;
2858 	else
2859 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2860 
2861 	hw->fc.send_xon = TRUE;
2862 
2863 	/* Device specific overrides/settings */
2864 	switch (hw->mac.type) {
2865 	case e1000_pchlan:
2866 		/* Workaround: no TX flow ctrl for PCH */
2867                 hw->fc.requested_mode = e1000_fc_rx_pause;
2868 		hw->fc.pause_time = 0xFFFF; /* override */
2869 		if (if_getmtu(ifp) > ETHERMTU) {
2870 			hw->fc.high_water = 0x3500;
2871 			hw->fc.low_water = 0x1500;
2872 		} else {
2873 			hw->fc.high_water = 0x5000;
2874 			hw->fc.low_water = 0x3000;
2875 		}
2876 		hw->fc.refresh_time = 0x1000;
2877 		break;
2878 	case e1000_pch2lan:
2879 	case e1000_pch_lpt:
2880 		hw->fc.high_water = 0x5C20;
2881 		hw->fc.low_water = 0x5048;
2882 		hw->fc.pause_time = 0x0650;
2883 		hw->fc.refresh_time = 0x0400;
2884 		/* Jumbos need adjusted PBA */
2885 		if (if_getmtu(ifp) > ETHERMTU)
2886 			E1000_WRITE_REG(hw, E1000_PBA, 12);
2887 		else
2888 			E1000_WRITE_REG(hw, E1000_PBA, 26);
2889 		break;
2890         case e1000_ich9lan:
2891         case e1000_ich10lan:
2892 		if (if_getmtu(ifp) > ETHERMTU) {
2893 			hw->fc.high_water = 0x2800;
2894 			hw->fc.low_water = hw->fc.high_water - 8;
2895 			break;
2896 		}
2897 		/* else fall thru */
2898 	default:
2899 		if (hw->mac.type == e1000_80003es2lan)
2900 			hw->fc.pause_time = 0xFFFF;
2901 		break;
2902 	}
2903 
2904 	/* Issue a global reset */
2905 	e1000_reset_hw(hw);
2906 	E1000_WRITE_REG(hw, E1000_WUC, 0);
2907 	em_disable_aspm(adapter);
2908 	/* and a re-init */
2909 	if (e1000_init_hw(hw) < 0) {
2910 		device_printf(dev, "Hardware Initialization Failed\n");
2911 		return;
2912 	}
2913 
2914 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2915 	e1000_get_phy_info(hw);
2916 	e1000_check_for_link(hw);
2917 	return;
2918 }
2919 
2920 /*********************************************************************
2921  *
2922  *  Setup networking device structure and register an interface.
2923  *
2924  **********************************************************************/
2925 static int
2926 em_setup_interface(device_t dev, struct adapter *adapter)
2927 {
2928 	if_t ifp;
2929 
2930 	INIT_DEBUGOUT("em_setup_interface: begin");
2931 
2932 	ifp = adapter->ifp = if_gethandle(IFT_ETHER);
2933 	if (ifp == 0) {
2934 		device_printf(dev, "can not allocate ifnet structure\n");
2935 		return (-1);
2936 	}
2937 	if_initname_drv(ifp, device_get_name(dev), device_get_unit(dev));
2938 	if_setdev(ifp, dev);
2939 	if_setinitfn(ifp, em_init);
2940 	if_setsoftc(ifp, adapter);
2941 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
2942 	if_setioctlfn(ifp, em_ioctl);
2943 #ifdef EM_MULTIQUEUE
2944 	/* Multiqueue stack interface */
2945 	if_settransmitfn(ifp, em_mq_start);
2946 	if_setqflushfn(ifp, em_qflush);
2947 #else
2948 	if_setstartfn(ifp, em_start);
2949 	if_setsendqlen(ifp, adapter->num_tx_desc - 1);
2950 	if_setsendqready(ifp);
2951 #endif
2952 
2953 	ether_ifattach_drv(ifp, adapter->hw.mac.addr);
2954 
2955 	if_setcapabilities(ifp, 0);
2956 	if_setcapenable(ifp, 0);
2957 
2958 
2959 	if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM |
2960 	    IFCAP_TSO4, 0);
2961 	/*
2962 	 * Tell the upper layer(s) we
2963 	 * support full VLAN capability
2964 	 */
2965 	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
2966 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
2967 	    IFCAP_VLAN_MTU, 0);
2968 	if_setcapenable(ifp, if_getcapabilities(ifp));
2969 
2970 	/*
2971 	** Don't turn this on by default, if vlans are
2972 	** created on another pseudo device (eg. lagg)
2973 	** then vlan events are not passed thru, breaking
2974 	** operation, but with HW FILTER off it works. If
2975 	** using vlans directly on the em driver you can
2976 	** enable this and get full hardware tag filtering.
2977 	*/
2978 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
2979 
2980 #ifdef DEVICE_POLLING
2981 	if_setcapabilitiesbit(ifp, IFCAP_POLLING,0);
2982 #endif
2983 
2984 	/* Enable only WOL MAGIC by default */
2985 	if (adapter->wol) {
2986 		if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
2987 		if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
2988 	}
2989 
2990 	/*
2991 	 * Specify the media types supported by this adapter and register
2992 	 * callbacks to update media and link information
2993 	 */
2994 	ifmedia_init_drv(&adapter->media, IFM_IMASK,
2995 	    em_media_change, em_media_status);
2996 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2997 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2998 		u_char fiber_type = IFM_1000_SX;	/* default type */
2999 
3000 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3001 			    0, NULL);
3002 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3003 	} else {
3004 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3005 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3006 			    0, NULL);
3007 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3008 			    0, NULL);
3009 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3010 			    0, NULL);
3011 		if (adapter->hw.phy.type != e1000_phy_ife) {
3012 			ifmedia_add(&adapter->media,
3013 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3014 			ifmedia_add(&adapter->media,
3015 				IFM_ETHER | IFM_1000_T, 0, NULL);
3016 		}
3017 	}
3018 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3019 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3020 	return (0);
3021 }
3022 
3023 
3024 /*
3025  * Manage DMA'able memory.
3026  */
3027 static void
3028 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3029 {
3030 	if (error)
3031 		return;
3032 	*(bus_addr_t *) arg = segs[0].ds_addr;
3033 }
3034 
3035 static int
3036 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3037         struct em_dma_alloc *dma, int mapflags)
3038 {
3039 	int error;
3040 
3041 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3042 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3043 				BUS_SPACE_MAXADDR,	/* lowaddr */
3044 				BUS_SPACE_MAXADDR,	/* highaddr */
3045 				NULL, NULL,		/* filter, filterarg */
3046 				size,			/* maxsize */
3047 				1,			/* nsegments */
3048 				size,			/* maxsegsize */
3049 				0,			/* flags */
3050 				NULL,			/* lockfunc */
3051 				NULL,			/* lockarg */
3052 				&dma->dma_tag);
3053 	if (error) {
3054 		device_printf(adapter->dev,
3055 		    "%s: bus_dma_tag_create failed: %d\n",
3056 		    __func__, error);
3057 		goto fail_0;
3058 	}
3059 
3060 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3061 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3062 	if (error) {
3063 		device_printf(adapter->dev,
3064 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3065 		    __func__, (uintmax_t)size, error);
3066 		goto fail_2;
3067 	}
3068 
3069 	dma->dma_paddr = 0;
3070 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3071 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3072 	if (error || dma->dma_paddr == 0) {
3073 		device_printf(adapter->dev,
3074 		    "%s: bus_dmamap_load failed: %d\n",
3075 		    __func__, error);
3076 		goto fail_3;
3077 	}
3078 
3079 	return (0);
3080 
3081 fail_3:
3082 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3083 fail_2:
3084 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3085 	bus_dma_tag_destroy(dma->dma_tag);
3086 fail_0:
3087 	dma->dma_tag = NULL;
3088 
3089 	return (error);
3090 }
3091 
3092 static void
3093 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3094 {
3095 	if (dma->dma_tag == NULL)
3096 		return;
3097 	if (dma->dma_paddr != 0) {
3098 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3099 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3100 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3101 		dma->dma_paddr = 0;
3102 	}
3103 	if (dma->dma_vaddr != NULL) {
3104 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3105 		dma->dma_vaddr = NULL;
3106 	}
3107 	bus_dma_tag_destroy(dma->dma_tag);
3108 	dma->dma_tag = NULL;
3109 }
3110 
3111 
3112 /*********************************************************************
3113  *
3114  *  Allocate memory for the transmit and receive rings, and then
3115  *  the descriptors associated with each, called only once at attach.
3116  *
3117  **********************************************************************/
3118 static int
3119 em_allocate_queues(struct adapter *adapter)
3120 {
3121 	device_t		dev = adapter->dev;
3122 	struct tx_ring		*txr = NULL;
3123 	struct rx_ring		*rxr = NULL;
3124 	int rsize, tsize, error = E1000_SUCCESS;
3125 	int txconf = 0, rxconf = 0;
3126 
3127 
3128 	/* Allocate the TX ring struct memory */
3129 	if (!(adapter->tx_rings =
3130 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3131 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3132 		device_printf(dev, "Unable to allocate TX ring memory\n");
3133 		error = ENOMEM;
3134 		goto fail;
3135 	}
3136 
3137 	/* Now allocate the RX */
3138 	if (!(adapter->rx_rings =
3139 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3140 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3141 		device_printf(dev, "Unable to allocate RX ring memory\n");
3142 		error = ENOMEM;
3143 		goto rx_fail;
3144 	}
3145 
3146 	tsize = roundup2(adapter->num_tx_desc *
3147 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3148 	/*
3149 	 * Now set up the TX queues, txconf is needed to handle the
3150 	 * possibility that things fail midcourse and we need to
3151 	 * undo memory gracefully
3152 	 */
3153 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3154 		/* Set up some basics */
3155 		txr = &adapter->tx_rings[i];
3156 		txr->adapter = adapter;
3157 		txr->me = i;
3158 
3159 		/* Initialize the TX lock */
3160 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3161 		    device_get_nameunit(dev), txr->me);
3162 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3163 
3164 		if (em_dma_malloc(adapter, tsize,
3165 			&txr->txdma, BUS_DMA_NOWAIT)) {
3166 			device_printf(dev,
3167 			    "Unable to allocate TX Descriptor memory\n");
3168 			error = ENOMEM;
3169 			goto err_tx_desc;
3170 		}
3171 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3172 		bzero((void *)txr->tx_base, tsize);
3173 
3174         	if (em_allocate_transmit_buffers(txr)) {
3175 			device_printf(dev,
3176 			    "Critical Failure setting up transmit buffers\n");
3177 			error = ENOMEM;
3178 			goto err_tx_desc;
3179         	}
3180 #if __FreeBSD_version >= 800000
3181 		/* Allocate a buf ring */
3182 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3183 		    M_WAITOK, &txr->tx_mtx);
3184 #endif
3185 	}
3186 
3187 	/*
3188 	 * Next the RX queues...
3189 	 */
3190 	rsize = roundup2(adapter->num_rx_desc *
3191 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3192 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3193 		rxr = &adapter->rx_rings[i];
3194 		rxr->adapter = adapter;
3195 		rxr->me = i;
3196 
3197 		/* Initialize the RX lock */
3198 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3199 		    device_get_nameunit(dev), txr->me);
3200 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3201 
3202 		if (em_dma_malloc(adapter, rsize,
3203 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3204 			device_printf(dev,
3205 			    "Unable to allocate RxDescriptor memory\n");
3206 			error = ENOMEM;
3207 			goto err_rx_desc;
3208 		}
3209 		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3210 		bzero((void *)rxr->rx_base, rsize);
3211 
3212         	/* Allocate receive buffers for the ring*/
3213 		if (em_allocate_receive_buffers(rxr)) {
3214 			device_printf(dev,
3215 			    "Critical Failure setting up receive buffers\n");
3216 			error = ENOMEM;
3217 			goto err_rx_desc;
3218 		}
3219 	}
3220 
3221 	return (0);
3222 
3223 err_rx_desc:
3224 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3225 		em_dma_free(adapter, &rxr->rxdma);
3226 err_tx_desc:
3227 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3228 		em_dma_free(adapter, &txr->txdma);
3229 	free(adapter->rx_rings, M_DEVBUF);
3230 rx_fail:
3231 #if __FreeBSD_version >= 800000
3232 	buf_ring_free(txr->br, M_DEVBUF);
3233 #endif
3234 	free(adapter->tx_rings, M_DEVBUF);
3235 fail:
3236 	return (error);
3237 }
3238 
3239 
3240 /*********************************************************************
3241  *
3242  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3243  *  the information needed to transmit a packet on the wire. This is
3244  *  called only once at attach, setup is done every reset.
3245  *
3246  **********************************************************************/
3247 static int
3248 em_allocate_transmit_buffers(struct tx_ring *txr)
3249 {
3250 	struct adapter *adapter = txr->adapter;
3251 	device_t dev = adapter->dev;
3252 	struct em_buffer *txbuf;
3253 	int error, i;
3254 
3255 	/*
3256 	 * Setup DMA descriptor areas.
3257 	 */
3258 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3259 			       1, 0,			/* alignment, bounds */
3260 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3261 			       BUS_SPACE_MAXADDR,	/* highaddr */
3262 			       NULL, NULL,		/* filter, filterarg */
3263 			       EM_TSO_SIZE,		/* maxsize */
3264 			       EM_MAX_SCATTER,		/* nsegments */
3265 			       PAGE_SIZE,		/* maxsegsize */
3266 			       0,			/* flags */
3267 			       NULL,			/* lockfunc */
3268 			       NULL,			/* lockfuncarg */
3269 			       &txr->txtag))) {
3270 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3271 		goto fail;
3272 	}
3273 
3274 	if (!(txr->tx_buffers =
3275 	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3276 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3277 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3278 		error = ENOMEM;
3279 		goto fail;
3280 	}
3281 
3282         /* Create the descriptor buffer dma maps */
3283 	txbuf = txr->tx_buffers;
3284 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3285 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3286 		if (error != 0) {
3287 			device_printf(dev, "Unable to create TX DMA map\n");
3288 			goto fail;
3289 		}
3290 	}
3291 
3292 	return 0;
3293 fail:
3294 	/* We free all, it handles case where we are in the middle */
3295 	em_free_transmit_structures(adapter);
3296 	return (error);
3297 }
3298 
3299 /*********************************************************************
3300  *
3301  *  Initialize a transmit ring.
3302  *
3303  **********************************************************************/
3304 static void
3305 em_setup_transmit_ring(struct tx_ring *txr)
3306 {
3307 	struct adapter *adapter = txr->adapter;
3308 	struct em_buffer *txbuf;
3309 	int i;
3310 #ifdef DEV_NETMAP
3311 	struct netmap_slot *slot;
3312 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
3313 #endif /* DEV_NETMAP */
3314 
3315 	/* Clear the old descriptor contents */
3316 	EM_TX_LOCK(txr);
3317 #ifdef DEV_NETMAP
3318 	slot = netmap_reset(na, NR_TX, txr->me, 0);
3319 #endif /* DEV_NETMAP */
3320 
3321 	bzero((void *)txr->tx_base,
3322 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3323 	/* Reset indices */
3324 	txr->next_avail_desc = 0;
3325 	txr->next_to_clean = 0;
3326 
3327 	/* Free any existing tx buffers. */
3328         txbuf = txr->tx_buffers;
3329 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3330 		if (txbuf->m_head != NULL) {
3331 			bus_dmamap_sync(txr->txtag, txbuf->map,
3332 			    BUS_DMASYNC_POSTWRITE);
3333 			bus_dmamap_unload(txr->txtag, txbuf->map);
3334 			m_freem(txbuf->m_head);
3335 			txbuf->m_head = NULL;
3336 		}
3337 #ifdef DEV_NETMAP
3338 		if (slot) {
3339 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3340 			uint64_t paddr;
3341 			void *addr;
3342 
3343 			addr = PNMB(slot + si, &paddr);
3344 			txr->tx_base[i].buffer_addr = htole64(paddr);
3345 			/* reload the map for netmap mode */
3346 			netmap_load_map(txr->txtag, txbuf->map, addr);
3347 		}
3348 #endif /* DEV_NETMAP */
3349 
3350 		/* clear the watch index */
3351 		txbuf->next_eop = -1;
3352         }
3353 
3354 	/* Set number of descriptors available */
3355 	txr->tx_avail = adapter->num_tx_desc;
3356 	txr->queue_status = EM_QUEUE_IDLE;
3357 
3358 	/* Clear checksum offload context. */
3359 	txr->last_hw_offload = 0;
3360 	txr->last_hw_ipcss = 0;
3361 	txr->last_hw_ipcso = 0;
3362 	txr->last_hw_tucss = 0;
3363 	txr->last_hw_tucso = 0;
3364 
3365 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3366 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3367 	EM_TX_UNLOCK(txr);
3368 }
3369 
3370 /*********************************************************************
3371  *
3372  *  Initialize all transmit rings.
3373  *
3374  **********************************************************************/
3375 static void
3376 em_setup_transmit_structures(struct adapter *adapter)
3377 {
3378 	struct tx_ring *txr = adapter->tx_rings;
3379 
3380 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3381 		em_setup_transmit_ring(txr);
3382 
3383 	return;
3384 }
3385 
3386 /*********************************************************************
3387  *
3388  *  Enable transmit unit.
3389  *
3390  **********************************************************************/
3391 static void
3392 em_initialize_transmit_unit(struct adapter *adapter)
3393 {
3394 	struct tx_ring	*txr = adapter->tx_rings;
3395 	struct e1000_hw	*hw = &adapter->hw;
3396 	u32	tctl, tarc, tipg = 0;
3397 
3398 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3399 
3400 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3401 		u64 bus_addr = txr->txdma.dma_paddr;
3402 		/* Base and Len of TX Ring */
3403 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3404 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3405 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3406 	    	    (u32)(bus_addr >> 32));
3407 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3408 	    	    (u32)bus_addr);
3409 		/* Init the HEAD/TAIL indices */
3410 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3411 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3412 
3413 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3414 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3415 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3416 
3417 		txr->queue_status = EM_QUEUE_IDLE;
3418 	}
3419 
3420 	/* Set the default values for the Tx Inter Packet Gap timer */
3421 	switch (adapter->hw.mac.type) {
3422 	case e1000_80003es2lan:
3423 		tipg = DEFAULT_82543_TIPG_IPGR1;
3424 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3425 		    E1000_TIPG_IPGR2_SHIFT;
3426 		break;
3427 	default:
3428 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3429 		    (adapter->hw.phy.media_type ==
3430 		    e1000_media_type_internal_serdes))
3431 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3432 		else
3433 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3434 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3435 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3436 	}
3437 
3438 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3439 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3440 
3441 	if(adapter->hw.mac.type >= e1000_82540)
3442 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3443 		    adapter->tx_abs_int_delay.value);
3444 
3445 	if ((adapter->hw.mac.type == e1000_82571) ||
3446 	    (adapter->hw.mac.type == e1000_82572)) {
3447 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3448 		tarc |= SPEED_MODE_BIT;
3449 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3450 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3451 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3452 		tarc |= 1;
3453 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3454 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3455 		tarc |= 1;
3456 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3457 	}
3458 
3459 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3460 	if (adapter->tx_int_delay.value > 0)
3461 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3462 
3463 	/* Program the Transmit Control Register */
3464 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3465 	tctl &= ~E1000_TCTL_CT;
3466 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3467 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3468 
3469 	if (adapter->hw.mac.type >= e1000_82571)
3470 		tctl |= E1000_TCTL_MULR;
3471 
3472 	/* This write will effectively turn on the transmit unit. */
3473 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3474 
3475 }
3476 
3477 
3478 /*********************************************************************
3479  *
3480  *  Free all transmit rings.
3481  *
3482  **********************************************************************/
3483 static void
3484 em_free_transmit_structures(struct adapter *adapter)
3485 {
3486 	struct tx_ring *txr = adapter->tx_rings;
3487 
3488 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3489 		EM_TX_LOCK(txr);
3490 		em_free_transmit_buffers(txr);
3491 		em_dma_free(adapter, &txr->txdma);
3492 		EM_TX_UNLOCK(txr);
3493 		EM_TX_LOCK_DESTROY(txr);
3494 	}
3495 
3496 	free(adapter->tx_rings, M_DEVBUF);
3497 }
3498 
3499 /*********************************************************************
3500  *
3501  *  Free transmit ring related data structures.
3502  *
3503  **********************************************************************/
3504 static void
3505 em_free_transmit_buffers(struct tx_ring *txr)
3506 {
3507 	struct adapter		*adapter = txr->adapter;
3508 	struct em_buffer	*txbuf;
3509 
3510 	INIT_DEBUGOUT("free_transmit_ring: begin");
3511 
3512 	if (txr->tx_buffers == NULL)
3513 		return;
3514 
3515 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3516 		txbuf = &txr->tx_buffers[i];
3517 		if (txbuf->m_head != NULL) {
3518 			bus_dmamap_sync(txr->txtag, txbuf->map,
3519 			    BUS_DMASYNC_POSTWRITE);
3520 			bus_dmamap_unload(txr->txtag,
3521 			    txbuf->map);
3522 			m_freem(txbuf->m_head);
3523 			txbuf->m_head = NULL;
3524 			if (txbuf->map != NULL) {
3525 				bus_dmamap_destroy(txr->txtag,
3526 				    txbuf->map);
3527 				txbuf->map = NULL;
3528 			}
3529 		} else if (txbuf->map != NULL) {
3530 			bus_dmamap_unload(txr->txtag,
3531 			    txbuf->map);
3532 			bus_dmamap_destroy(txr->txtag,
3533 			    txbuf->map);
3534 			txbuf->map = NULL;
3535 		}
3536 	}
3537 #if __FreeBSD_version >= 800000
3538 	if (txr->br != NULL)
3539 		buf_ring_free(txr->br, M_DEVBUF);
3540 #endif
3541 	if (txr->tx_buffers != NULL) {
3542 		free(txr->tx_buffers, M_DEVBUF);
3543 		txr->tx_buffers = NULL;
3544 	}
3545 	if (txr->txtag != NULL) {
3546 		bus_dma_tag_destroy(txr->txtag);
3547 		txr->txtag = NULL;
3548 	}
3549 	return;
3550 }
3551 
3552 
3553 /*********************************************************************
3554  *  The offload context is protocol specific (TCP/UDP) and thus
3555  *  only needs to be set when the protocol changes. The occasion
3556  *  of a context change can be a performance detriment, and
3557  *  might be better just disabled. The reason arises in the way
3558  *  in which the controller supports pipelined requests from the
3559  *  Tx data DMA. Up to four requests can be pipelined, and they may
3560  *  belong to the same packet or to multiple packets. However all
3561  *  requests for one packet are issued before a request is issued
3562  *  for a subsequent packet and if a request for the next packet
3563  *  requires a context change, that request will be stalled
3564  *  until the previous request completes. This means setting up
3565  *  a new context effectively disables pipelined Tx data DMA which
3566  *  in turn greatly slow down performance to send small sized
3567  *  frames.
3568  **********************************************************************/
3569 static void
3570 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3571     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3572 {
3573 	struct adapter			*adapter = txr->adapter;
3574 	struct e1000_context_desc	*TXD = NULL;
3575 	struct em_buffer		*tx_buffer;
3576 	int				cur, hdr_len;
3577 	u32				cmd = 0;
3578 	u16				offload = 0;
3579 	u8				ipcso, ipcss, tucso, tucss;
3580 
3581 	ipcss = ipcso = tucss = tucso = 0;
3582 	hdr_len = ip_off + (ip->ip_hl << 2);
3583 	cur = txr->next_avail_desc;
3584 
3585 	/* Setup of IP header checksum. */
3586 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3587 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3588 		offload |= CSUM_IP;
3589 		ipcss = ip_off;
3590 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3591 		/*
3592 		 * Start offset for header checksum calculation.
3593 		 * End offset for header checksum calculation.
3594 		 * Offset of place to put the checksum.
3595 		 */
3596 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3597 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3598 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3599 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3600 		cmd |= E1000_TXD_CMD_IP;
3601 	}
3602 
3603 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3604  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3605  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3606  		offload |= CSUM_TCP;
3607  		tucss = hdr_len;
3608  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3609  		/*
3610  		 * Setting up new checksum offload context for every frames
3611  		 * takes a lot of processing time for hardware. This also
3612  		 * reduces performance a lot for small sized frames so avoid
3613  		 * it if driver can use previously configured checksum
3614  		 * offload context.
3615  		 */
3616  		if (txr->last_hw_offload == offload) {
3617  			if (offload & CSUM_IP) {
3618  				if (txr->last_hw_ipcss == ipcss &&
3619  				    txr->last_hw_ipcso == ipcso &&
3620  				    txr->last_hw_tucss == tucss &&
3621  				    txr->last_hw_tucso == tucso)
3622  					return;
3623  			} else {
3624  				if (txr->last_hw_tucss == tucss &&
3625  				    txr->last_hw_tucso == tucso)
3626  					return;
3627  			}
3628   		}
3629  		txr->last_hw_offload = offload;
3630  		txr->last_hw_tucss = tucss;
3631  		txr->last_hw_tucso = tucso;
3632  		/*
3633  		 * Start offset for payload checksum calculation.
3634  		 * End offset for payload checksum calculation.
3635  		 * Offset of place to put the checksum.
3636  		 */
3637 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3638  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3639  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3640  		TXD->upper_setup.tcp_fields.tucso = tucso;
3641  		cmd |= E1000_TXD_CMD_TCP;
3642  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3643  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3644  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3645  		tucss = hdr_len;
3646  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3647  		/*
3648  		 * Setting up new checksum offload context for every frames
3649  		 * takes a lot of processing time for hardware. This also
3650  		 * reduces performance a lot for small sized frames so avoid
3651  		 * it if driver can use previously configured checksum
3652  		 * offload context.
3653  		 */
3654  		if (txr->last_hw_offload == offload) {
3655  			if (offload & CSUM_IP) {
3656  				if (txr->last_hw_ipcss == ipcss &&
3657  				    txr->last_hw_ipcso == ipcso &&
3658  				    txr->last_hw_tucss == tucss &&
3659  				    txr->last_hw_tucso == tucso)
3660  					return;
3661  			} else {
3662  				if (txr->last_hw_tucss == tucss &&
3663  				    txr->last_hw_tucso == tucso)
3664  					return;
3665  			}
3666  		}
3667  		txr->last_hw_offload = offload;
3668  		txr->last_hw_tucss = tucss;
3669  		txr->last_hw_tucso = tucso;
3670  		/*
3671  		 * Start offset for header checksum calculation.
3672  		 * End offset for header checksum calculation.
3673  		 * Offset of place to put the checksum.
3674  		 */
3675 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3676  		TXD->upper_setup.tcp_fields.tucss = tucss;
3677  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3678  		TXD->upper_setup.tcp_fields.tucso = tucso;
3679   	}
3680 
3681  	if (offload & CSUM_IP) {
3682  		txr->last_hw_ipcss = ipcss;
3683  		txr->last_hw_ipcso = ipcso;
3684   	}
3685 
3686 	TXD->tcp_seg_setup.data = htole32(0);
3687 	TXD->cmd_and_length =
3688 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3689 	tx_buffer = &txr->tx_buffers[cur];
3690 	tx_buffer->m_head = NULL;
3691 	tx_buffer->next_eop = -1;
3692 
3693 	if (++cur == adapter->num_tx_desc)
3694 		cur = 0;
3695 
3696 	txr->tx_avail--;
3697 	txr->next_avail_desc = cur;
3698 }
3699 
3700 
3701 /**********************************************************************
3702  *
3703  *  Setup work for hardware segmentation offload (TSO)
3704  *
3705  **********************************************************************/
3706 static void
3707 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3708     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3709 {
3710 	struct adapter			*adapter = txr->adapter;
3711 	struct e1000_context_desc	*TXD;
3712 	struct em_buffer		*tx_buffer;
3713 	int cur, hdr_len;
3714 
3715 	/*
3716 	 * In theory we can use the same TSO context if and only if
3717 	 * frame is the same type(IP/TCP) and the same MSS. However
3718 	 * checking whether a frame has the same IP/TCP structure is
3719 	 * hard thing so just ignore that and always restablish a
3720 	 * new TSO context.
3721 	 */
3722 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3723 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3724 		      E1000_TXD_DTYP_D |	/* Data descr type */
3725 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3726 
3727 	/* IP and/or TCP header checksum calculation and insertion. */
3728 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3729 
3730 	cur = txr->next_avail_desc;
3731 	tx_buffer = &txr->tx_buffers[cur];
3732 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3733 
3734 	/*
3735 	 * Start offset for header checksum calculation.
3736 	 * End offset for header checksum calculation.
3737 	 * Offset of place put the checksum.
3738 	 */
3739 	TXD->lower_setup.ip_fields.ipcss = ip_off;
3740 	TXD->lower_setup.ip_fields.ipcse =
3741 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3742 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3743 	/*
3744 	 * Start offset for payload checksum calculation.
3745 	 * End offset for payload checksum calculation.
3746 	 * Offset of place to put the checksum.
3747 	 */
3748 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3749 	TXD->upper_setup.tcp_fields.tucse = 0;
3750 	TXD->upper_setup.tcp_fields.tucso =
3751 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3752 	/*
3753 	 * Payload size per packet w/o any headers.
3754 	 * Length of all headers up to payload.
3755 	 */
3756 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3757 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3758 
3759 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3760 				E1000_TXD_CMD_DEXT |	/* Extended descr */
3761 				E1000_TXD_CMD_TSE |	/* TSE context */
3762 				E1000_TXD_CMD_IP |	/* Do IP csum */
3763 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3764 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3765 
3766 	tx_buffer->m_head = NULL;
3767 	tx_buffer->next_eop = -1;
3768 
3769 	if (++cur == adapter->num_tx_desc)
3770 		cur = 0;
3771 
3772 	txr->tx_avail--;
3773 	txr->next_avail_desc = cur;
3774 	txr->tx_tso = TRUE;
3775 }
3776 
3777 
3778 /**********************************************************************
3779  *
3780  *  Examine each tx_buffer in the used queue. If the hardware is done
3781  *  processing the packet then free associated resources. The
3782  *  tx_buffer is put back on the free queue.
3783  *
3784  **********************************************************************/
3785 static void
3786 em_txeof(struct tx_ring *txr)
3787 {
3788 	struct adapter	*adapter = txr->adapter;
3789         int first, last, done, processed;
3790         struct em_buffer *tx_buffer;
3791         struct e1000_tx_desc   *tx_desc, *eop_desc;
3792 	if_t ifp = adapter->ifp;
3793 
3794 	EM_TX_LOCK_ASSERT(txr);
3795 #ifdef DEV_NETMAP
3796 	if (netmap_tx_irq(ifp, txr->me))
3797 		return;
3798 #endif /* DEV_NETMAP */
3799 
3800 	/* No work, make sure watchdog is off */
3801         if (txr->tx_avail == adapter->num_tx_desc) {
3802 		txr->queue_status = EM_QUEUE_IDLE;
3803                 return;
3804 	}
3805 
3806 	processed = 0;
3807         first = txr->next_to_clean;
3808         tx_desc = &txr->tx_base[first];
3809         tx_buffer = &txr->tx_buffers[first];
3810 	last = tx_buffer->next_eop;
3811         eop_desc = &txr->tx_base[last];
3812 
3813 	/*
3814 	 * What this does is get the index of the
3815 	 * first descriptor AFTER the EOP of the
3816 	 * first packet, that way we can do the
3817 	 * simple comparison on the inner while loop.
3818 	 */
3819 	if (++last == adapter->num_tx_desc)
3820  		last = 0;
3821 	done = last;
3822 
3823         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3824             BUS_DMASYNC_POSTREAD);
3825 
3826         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3827 		/* We clean the range of the packet */
3828 		while (first != done) {
3829                 	tx_desc->upper.data = 0;
3830                 	tx_desc->lower.data = 0;
3831                 	tx_desc->buffer_addr = 0;
3832                 	++txr->tx_avail;
3833 			++processed;
3834 
3835 			if (tx_buffer->m_head) {
3836 				bus_dmamap_sync(txr->txtag,
3837 				    tx_buffer->map,
3838 				    BUS_DMASYNC_POSTWRITE);
3839 				bus_dmamap_unload(txr->txtag,
3840 				    tx_buffer->map);
3841                         	m_freem(tx_buffer->m_head);
3842                         	tx_buffer->m_head = NULL;
3843                 	}
3844 			tx_buffer->next_eop = -1;
3845 			txr->watchdog_time = ticks;
3846 
3847 	                if (++first == adapter->num_tx_desc)
3848 				first = 0;
3849 
3850 	                tx_buffer = &txr->tx_buffers[first];
3851 			tx_desc = &txr->tx_base[first];
3852 		}
3853 		if_incopackets(ifp, 1);
3854 		/* See if we can continue to the next packet */
3855 		last = tx_buffer->next_eop;
3856 		if (last != -1) {
3857         		eop_desc = &txr->tx_base[last];
3858 			/* Get new done point */
3859 			if (++last == adapter->num_tx_desc) last = 0;
3860 			done = last;
3861 		} else
3862 			break;
3863         }
3864         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3865             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3866 
3867         txr->next_to_clean = first;
3868 
3869 	/*
3870 	** Watchdog calculation, we know there's
3871 	** work outstanding or the first return
3872 	** would have been taken, so none processed
3873 	** for too long indicates a hang. local timer
3874 	** will examine this and do a reset if needed.
3875 	*/
3876 	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3877 		txr->queue_status = EM_QUEUE_HUNG;
3878 
3879         /*
3880          * If we have a minimum free, clear IFF_DRV_OACTIVE
3881          * to tell the stack that it is OK to send packets.
3882 	 * Notice that all writes of OACTIVE happen under the
3883 	 * TX lock which, with a single queue, guarantees
3884 	 * sanity.
3885          */
3886         if (txr->tx_avail >= EM_MAX_SCATTER)
3887 		if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
3888 
3889 	/* Disable watchdog if all clean */
3890 	if (txr->tx_avail == adapter->num_tx_desc) {
3891 		txr->queue_status = EM_QUEUE_IDLE;
3892 	}
3893 }
3894 
3895 
3896 /*********************************************************************
3897  *
3898  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3899  *
3900  **********************************************************************/
3901 static void
3902 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3903 {
3904 	struct adapter		*adapter = rxr->adapter;
3905 	struct mbuf		*m;
3906 	bus_dma_segment_t	segs[1];
3907 	struct em_buffer	*rxbuf;
3908 	int			i, j, error, nsegs;
3909 	bool			cleaned = FALSE;
3910 
3911 	i = j = rxr->next_to_refresh;
3912 	/*
3913 	** Get one descriptor beyond
3914 	** our work mark to control
3915 	** the loop.
3916 	*/
3917 	if (++j == adapter->num_rx_desc)
3918 		j = 0;
3919 
3920 	while (j != limit) {
3921 		rxbuf = &rxr->rx_buffers[i];
3922 		if (rxbuf->m_head == NULL) {
3923 			m = m_getjcl(M_NOWAIT, MT_DATA,
3924 			    M_PKTHDR, adapter->rx_mbuf_sz);
3925 			/*
3926 			** If we have a temporary resource shortage
3927 			** that causes a failure, just abort refresh
3928 			** for now, we will return to this point when
3929 			** reinvoked from em_rxeof.
3930 			*/
3931 			if (m == NULL)
3932 				goto update;
3933 		} else
3934 			m = rxbuf->m_head;
3935 
3936 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3937 		m->m_flags |= M_PKTHDR;
3938 		m->m_data = m->m_ext.ext_buf;
3939 
3940 		/* Use bus_dma machinery to setup the memory mapping  */
3941 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3942 		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3943 		if (error != 0) {
3944 			printf("Refresh mbufs: hdr dmamap load"
3945 			    " failure - %d\n", error);
3946 			m_free(m);
3947 			rxbuf->m_head = NULL;
3948 			goto update;
3949 		}
3950 		rxbuf->m_head = m;
3951 		bus_dmamap_sync(rxr->rxtag,
3952 		    rxbuf->map, BUS_DMASYNC_PREREAD);
3953 		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3954 		cleaned = TRUE;
3955 
3956 		i = j; /* Next is precalulated for us */
3957 		rxr->next_to_refresh = i;
3958 		/* Calculate next controlling index */
3959 		if (++j == adapter->num_rx_desc)
3960 			j = 0;
3961 	}
3962 update:
3963 	/*
3964 	** Update the tail pointer only if,
3965 	** and as far as we have refreshed.
3966 	*/
3967 	if (cleaned)
3968 		E1000_WRITE_REG(&adapter->hw,
3969 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
3970 
3971 	return;
3972 }
3973 
3974 
3975 /*********************************************************************
3976  *
3977  *  Allocate memory for rx_buffer structures. Since we use one
3978  *  rx_buffer per received packet, the maximum number of rx_buffer's
3979  *  that we'll need is equal to the number of receive descriptors
3980  *  that we've allocated.
3981  *
3982  **********************************************************************/
3983 static int
3984 em_allocate_receive_buffers(struct rx_ring *rxr)
3985 {
3986 	struct adapter		*adapter = rxr->adapter;
3987 	device_t		dev = adapter->dev;
3988 	struct em_buffer	*rxbuf;
3989 	int			error;
3990 
3991 	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3992 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3993 	if (rxr->rx_buffers == NULL) {
3994 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3995 		return (ENOMEM);
3996 	}
3997 
3998 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3999 				1, 0,			/* alignment, bounds */
4000 				BUS_SPACE_MAXADDR,	/* lowaddr */
4001 				BUS_SPACE_MAXADDR,	/* highaddr */
4002 				NULL, NULL,		/* filter, filterarg */
4003 				MJUM9BYTES,		/* maxsize */
4004 				1,			/* nsegments */
4005 				MJUM9BYTES,		/* maxsegsize */
4006 				0,			/* flags */
4007 				NULL,			/* lockfunc */
4008 				NULL,			/* lockarg */
4009 				&rxr->rxtag);
4010 	if (error) {
4011 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4012 		    __func__, error);
4013 		goto fail;
4014 	}
4015 
4016 	rxbuf = rxr->rx_buffers;
4017 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4018 		rxbuf = &rxr->rx_buffers[i];
4019 		error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4020 		if (error) {
4021 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4022 			    __func__, error);
4023 			goto fail;
4024 		}
4025 	}
4026 
4027 	return (0);
4028 
4029 fail:
4030 	em_free_receive_structures(adapter);
4031 	return (error);
4032 }
4033 
4034 
4035 /*********************************************************************
4036  *
4037  *  Initialize a receive ring and its buffers.
4038  *
4039  **********************************************************************/
4040 static int
4041 em_setup_receive_ring(struct rx_ring *rxr)
4042 {
4043 	struct	adapter 	*adapter = rxr->adapter;
4044 	struct em_buffer	*rxbuf;
4045 	bus_dma_segment_t	seg[1];
4046 	int			rsize, nsegs, error = 0;
4047 #ifdef DEV_NETMAP
4048 	struct netmap_slot *slot;
4049 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
4050 #endif
4051 
4052 
4053 	/* Clear the ring contents */
4054 	EM_RX_LOCK(rxr);
4055 	rsize = roundup2(adapter->num_rx_desc *
4056 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4057 	bzero((void *)rxr->rx_base, rsize);
4058 #ifdef DEV_NETMAP
4059 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4060 #endif
4061 
4062 	/*
4063 	** Free current RX buffer structs and their mbufs
4064 	*/
4065 	for (int i = 0; i < adapter->num_rx_desc; i++) {
4066 		rxbuf = &rxr->rx_buffers[i];
4067 		if (rxbuf->m_head != NULL) {
4068 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4069 			    BUS_DMASYNC_POSTREAD);
4070 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4071 			m_freem(rxbuf->m_head);
4072 			rxbuf->m_head = NULL; /* mark as freed */
4073 		}
4074 	}
4075 
4076 	/* Now replenish the mbufs */
4077         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4078 		rxbuf = &rxr->rx_buffers[j];
4079 #ifdef DEV_NETMAP
4080 		if (slot) {
4081 			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4082 			uint64_t paddr;
4083 			void *addr;
4084 
4085 			addr = PNMB(slot + si, &paddr);
4086 			netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4087 			/* Update descriptor */
4088 			rxr->rx_base[j].buffer_addr = htole64(paddr);
4089 			continue;
4090 		}
4091 #endif /* DEV_NETMAP */
4092 		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4093 		    M_PKTHDR, adapter->rx_mbuf_sz);
4094 		if (rxbuf->m_head == NULL) {
4095 			error = ENOBUFS;
4096 			goto fail;
4097 		}
4098 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4099 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4100 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4101 
4102 		/* Get the memory mapping */
4103 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4104 		    rxbuf->map, rxbuf->m_head, seg,
4105 		    &nsegs, BUS_DMA_NOWAIT);
4106 		if (error != 0) {
4107 			m_freem(rxbuf->m_head);
4108 			rxbuf->m_head = NULL;
4109 			goto fail;
4110 		}
4111 		bus_dmamap_sync(rxr->rxtag,
4112 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4113 
4114 		/* Update descriptor */
4115 		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4116 	}
4117 	rxr->next_to_check = 0;
4118 	rxr->next_to_refresh = 0;
4119 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4120 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4121 
4122 fail:
4123 	EM_RX_UNLOCK(rxr);
4124 	return (error);
4125 }
4126 
4127 /*********************************************************************
4128  *
4129  *  Initialize all receive rings.
4130  *
4131  **********************************************************************/
4132 static int
4133 em_setup_receive_structures(struct adapter *adapter)
4134 {
4135 	struct rx_ring *rxr = adapter->rx_rings;
4136 	int q;
4137 
4138 	for (q = 0; q < adapter->num_queues; q++, rxr++)
4139 		if (em_setup_receive_ring(rxr))
4140 			goto fail;
4141 
4142 	return (0);
4143 fail:
4144 	/*
4145 	 * Free RX buffers allocated so far, we will only handle
4146 	 * the rings that completed, the failing case will have
4147 	 * cleaned up for itself. 'q' failed, so its the terminus.
4148 	 */
4149 	for (int i = 0; i < q; ++i) {
4150 		rxr = &adapter->rx_rings[i];
4151 		for (int n = 0; n < adapter->num_rx_desc; n++) {
4152 			struct em_buffer *rxbuf;
4153 			rxbuf = &rxr->rx_buffers[n];
4154 			if (rxbuf->m_head != NULL) {
4155 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4156 			  	  BUS_DMASYNC_POSTREAD);
4157 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4158 				m_freem(rxbuf->m_head);
4159 				rxbuf->m_head = NULL;
4160 			}
4161 		}
4162 		rxr->next_to_check = 0;
4163 		rxr->next_to_refresh = 0;
4164 	}
4165 
4166 	return (ENOBUFS);
4167 }
4168 
4169 /*********************************************************************
4170  *
4171  *  Free all receive rings.
4172  *
4173  **********************************************************************/
4174 static void
4175 em_free_receive_structures(struct adapter *adapter)
4176 {
4177 	struct rx_ring *rxr = adapter->rx_rings;
4178 
4179 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4180 		em_free_receive_buffers(rxr);
4181 		/* Free the ring memory as well */
4182 		em_dma_free(adapter, &rxr->rxdma);
4183 		EM_RX_LOCK_DESTROY(rxr);
4184 	}
4185 
4186 	free(adapter->rx_rings, M_DEVBUF);
4187 }
4188 
4189 
4190 /*********************************************************************
4191  *
4192  *  Free receive ring data structures
4193  *
4194  **********************************************************************/
4195 static void
4196 em_free_receive_buffers(struct rx_ring *rxr)
4197 {
4198 	struct adapter		*adapter = rxr->adapter;
4199 	struct em_buffer	*rxbuf = NULL;
4200 
4201 	INIT_DEBUGOUT("free_receive_buffers: begin");
4202 
4203 	if (rxr->rx_buffers != NULL) {
4204 		for (int i = 0; i < adapter->num_rx_desc; i++) {
4205 			rxbuf = &rxr->rx_buffers[i];
4206 			if (rxbuf->map != NULL) {
4207 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4208 				    BUS_DMASYNC_POSTREAD);
4209 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4210 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4211 			}
4212 			if (rxbuf->m_head != NULL) {
4213 				m_freem(rxbuf->m_head);
4214 				rxbuf->m_head = NULL;
4215 			}
4216 		}
4217 		free(rxr->rx_buffers, M_DEVBUF);
4218 		rxr->rx_buffers = NULL;
4219 		rxr->next_to_check = 0;
4220 		rxr->next_to_refresh = 0;
4221 	}
4222 
4223 	if (rxr->rxtag != NULL) {
4224 		bus_dma_tag_destroy(rxr->rxtag);
4225 		rxr->rxtag = NULL;
4226 	}
4227 
4228 	return;
4229 }
4230 
4231 
4232 /*********************************************************************
4233  *
4234  *  Enable receive unit.
4235  *
4236  **********************************************************************/
4237 
4238 static void
4239 em_initialize_receive_unit(struct adapter *adapter)
4240 {
4241 	struct rx_ring	*rxr = adapter->rx_rings;
4242 	if_t ifp = adapter->ifp;
4243 	struct e1000_hw	*hw = &adapter->hw;
4244 	u64	bus_addr;
4245 	u32	rctl, rxcsum;
4246 
4247 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4248 
4249 	/*
4250 	 * Make sure receives are disabled while setting
4251 	 * up the descriptor ring
4252 	 */
4253 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4254 	/* Do not disable if ever enabled on this hardware */
4255 	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4256 		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4257 
4258 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4259 	    adapter->rx_abs_int_delay.value);
4260 	/*
4261 	 * Set the interrupt throttling rate. Value is calculated
4262 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4263 	 */
4264 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4265 
4266 	/*
4267 	** When using MSIX interrupts we need to throttle
4268 	** using the EITR register (82574 only)
4269 	*/
4270 	if (hw->mac.type == e1000_82574) {
4271 		for (int i = 0; i < 4; i++)
4272 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4273 			    DEFAULT_ITR);
4274 		/* Disable accelerated acknowledge */
4275 		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4276 	}
4277 
4278 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4279 	if (if_getcapenable(ifp) & IFCAP_RXCSUM)
4280 		rxcsum |= E1000_RXCSUM_TUOFL;
4281 	else
4282 		rxcsum &= ~E1000_RXCSUM_TUOFL;
4283 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4284 
4285 	/*
4286 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4287 	** long latencies are observed, like Lenovo X60. This
4288 	** change eliminates the problem, but since having positive
4289 	** values in RDTR is a known source of problems on other
4290 	** platforms another solution is being sought.
4291 	*/
4292 	if (hw->mac.type == e1000_82573)
4293 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4294 
4295 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4296 		/* Setup the Base and Length of the Rx Descriptor Ring */
4297 		u32 rdt = adapter->num_rx_desc - 1; /* default */
4298 
4299 		bus_addr = rxr->rxdma.dma_paddr;
4300 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4301 		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4302 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4303 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4304 		/* Setup the Head and Tail Descriptor Pointers */
4305 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4306 #ifdef DEV_NETMAP
4307 		/*
4308 		 * an init() while a netmap client is active must
4309 		 * preserve the rx buffers passed to userspace.
4310 		 */
4311 		if (if_getcapenable(ifp) & IFCAP_NETMAP) {
4312 			struct netmap_adapter *na = netmap_getna(adapter->ifp);
4313 			rdt -= nm_kr_rxspace(&na->rx_rings[i]);
4314 		}
4315 #endif /* DEV_NETMAP */
4316 		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4317 	}
4318 
4319 	/* Set PTHRESH for improved jumbo performance */
4320 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4321 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4322 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4323 	    (if_getmtu(ifp) > ETHERMTU)) {
4324 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4325 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4326 	}
4327 
4328 	if (adapter->hw.mac.type >= e1000_pch2lan) {
4329 		if (if_getmtu(ifp) > ETHERMTU)
4330 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4331 		else
4332 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4333 	}
4334 
4335 	/* Setup the Receive Control Register */
4336 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4337 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4338 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4339 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4340 
4341         /* Strip the CRC */
4342         rctl |= E1000_RCTL_SECRC;
4343 
4344         /* Make sure VLAN Filters are off */
4345         rctl &= ~E1000_RCTL_VFE;
4346 	rctl &= ~E1000_RCTL_SBP;
4347 
4348 	if (adapter->rx_mbuf_sz == MCLBYTES)
4349 		rctl |= E1000_RCTL_SZ_2048;
4350 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4351 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4352 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4353 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4354 
4355 	if (if_getmtu(ifp) > ETHERMTU)
4356 		rctl |= E1000_RCTL_LPE;
4357 	else
4358 		rctl &= ~E1000_RCTL_LPE;
4359 
4360 	/* Write out the settings */
4361 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4362 
4363 	return;
4364 }
4365 
4366 
4367 /*********************************************************************
4368  *
4369  *  This routine executes in interrupt context. It replenishes
4370  *  the mbufs in the descriptor and sends data which has been
4371  *  dma'ed into host memory to upper layer.
4372  *
4373  *  We loop at most count times if count is > 0, or until done if
4374  *  count < 0.
4375  *
4376  *  For polling we also now return the number of cleaned packets
4377  *********************************************************************/
4378 static bool
4379 em_rxeof(struct rx_ring *rxr, int count, int *done)
4380 {
4381 	struct adapter		*adapter = rxr->adapter;
4382 	if_t ifp = adapter->ifp;
4383 	struct mbuf		*mp, *sendmp;
4384 	u8			status = 0;
4385 	u16 			len;
4386 	int			i, processed, rxdone = 0;
4387 	bool			eop;
4388 	struct e1000_rx_desc	*cur;
4389 
4390 	EM_RX_LOCK(rxr);
4391 
4392 #ifdef DEV_NETMAP
4393 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4394 		EM_RX_UNLOCK(rxr);
4395 		return (FALSE);
4396 	}
4397 #endif /* DEV_NETMAP */
4398 
4399 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4400 
4401 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
4402 			break;
4403 
4404 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4405 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4406 
4407 		cur = &rxr->rx_base[i];
4408 		status = cur->status;
4409 		mp = sendmp = NULL;
4410 
4411 		if ((status & E1000_RXD_STAT_DD) == 0)
4412 			break;
4413 
4414 		len = le16toh(cur->length);
4415 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4416 
4417 		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4418 		    (rxr->discard == TRUE)) {
4419 			adapter->dropped_pkts++;
4420 			++rxr->rx_discarded;
4421 			if (!eop) /* Catch subsequent segs */
4422 				rxr->discard = TRUE;
4423 			else
4424 				rxr->discard = FALSE;
4425 			em_rx_discard(rxr, i);
4426 			goto next_desc;
4427 		}
4428 		bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4429 
4430 		/* Assign correct length to the current fragment */
4431 		mp = rxr->rx_buffers[i].m_head;
4432 		mp->m_len = len;
4433 
4434 		/* Trigger for refresh */
4435 		rxr->rx_buffers[i].m_head = NULL;
4436 
4437 		/* First segment? */
4438 		if (rxr->fmp == NULL) {
4439 			mp->m_pkthdr.len = len;
4440 			rxr->fmp = rxr->lmp = mp;
4441 		} else {
4442 			/* Chain mbuf's together */
4443 			mp->m_flags &= ~M_PKTHDR;
4444 			rxr->lmp->m_next = mp;
4445 			rxr->lmp = mp;
4446 			rxr->fmp->m_pkthdr.len += len;
4447 		}
4448 
4449 		if (eop) {
4450 			--count;
4451 			sendmp = rxr->fmp;
4452 			if_setrcvif(sendmp, ifp);
4453 			if_incipackets(ifp, 1);
4454 			em_receive_checksum(cur, sendmp);
4455 #ifndef __NO_STRICT_ALIGNMENT
4456 			if (adapter->hw.mac.max_frame_size >
4457 			    (MCLBYTES - ETHER_ALIGN) &&
4458 			    em_fixup_rx(rxr) != 0)
4459 				goto skip;
4460 #endif
4461 			if (status & E1000_RXD_STAT_VP) {
4462 				if_setvtag(sendmp,
4463 				    le16toh(cur->special));
4464 				sendmp->m_flags |= M_VLANTAG;
4465 			}
4466 #ifndef __NO_STRICT_ALIGNMENT
4467 skip:
4468 #endif
4469 			rxr->fmp = rxr->lmp = NULL;
4470 		}
4471 next_desc:
4472 		/* Zero out the receive descriptors status. */
4473 		cur->status = 0;
4474 		++rxdone;	/* cumulative for POLL */
4475 		++processed;
4476 
4477 		/* Advance our pointers to the next descriptor. */
4478 		if (++i == adapter->num_rx_desc)
4479 			i = 0;
4480 
4481 		/* Send to the stack */
4482 		if (sendmp != NULL) {
4483 			rxr->next_to_check = i;
4484 			EM_RX_UNLOCK(rxr);
4485 			if_input(ifp, sendmp);
4486 			EM_RX_LOCK(rxr);
4487 			i = rxr->next_to_check;
4488 		}
4489 
4490 		/* Only refresh mbufs every 8 descriptors */
4491 		if (processed == 8) {
4492 			em_refresh_mbufs(rxr, i);
4493 			processed = 0;
4494 		}
4495 	}
4496 
4497 	/* Catch any remaining refresh work */
4498 	if (e1000_rx_unrefreshed(rxr))
4499 		em_refresh_mbufs(rxr, i);
4500 
4501 	rxr->next_to_check = i;
4502 	if (done != NULL)
4503 		*done = rxdone;
4504 	EM_RX_UNLOCK(rxr);
4505 
4506 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4507 }
4508 
4509 static __inline void
4510 em_rx_discard(struct rx_ring *rxr, int i)
4511 {
4512 	struct em_buffer	*rbuf;
4513 
4514 	rbuf = &rxr->rx_buffers[i];
4515 	bus_dmamap_unload(rxr->rxtag, rbuf->map);
4516 
4517 	/* Free any previous pieces */
4518 	if (rxr->fmp != NULL) {
4519 		rxr->fmp->m_flags |= M_PKTHDR;
4520 		m_freem(rxr->fmp);
4521 		rxr->fmp = NULL;
4522 		rxr->lmp = NULL;
4523 	}
4524 	/*
4525 	** Free buffer and allow em_refresh_mbufs()
4526 	** to clean up and recharge buffer.
4527 	*/
4528 	if (rbuf->m_head) {
4529 		m_free(rbuf->m_head);
4530 		rbuf->m_head = NULL;
4531 	}
4532 	return;
4533 }
4534 
4535 #ifndef __NO_STRICT_ALIGNMENT
4536 /*
4537  * When jumbo frames are enabled we should realign entire payload on
4538  * architecures with strict alignment. This is serious design mistake of 8254x
4539  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4540  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4541  * payload. On architecures without strict alignment restrictions 8254x still
4542  * performs unaligned memory access which would reduce the performance too.
4543  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4544  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4545  * existing mbuf chain.
4546  *
4547  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4548  * not used at all on architectures with strict alignment.
4549  */
4550 static int
4551 em_fixup_rx(struct rx_ring *rxr)
4552 {
4553 	struct adapter *adapter = rxr->adapter;
4554 	struct mbuf *m, *n;
4555 	int error;
4556 
4557 	error = 0;
4558 	m = rxr->fmp;
4559 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4560 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4561 		m->m_data += ETHER_HDR_LEN;
4562 	} else {
4563 		MGETHDR(n, M_NOWAIT, MT_DATA);
4564 		if (n != NULL) {
4565 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4566 			m->m_data += ETHER_HDR_LEN;
4567 			m->m_len -= ETHER_HDR_LEN;
4568 			n->m_len = ETHER_HDR_LEN;
4569 			M_MOVE_PKTHDR(n, m);
4570 			n->m_next = m;
4571 			rxr->fmp = n;
4572 		} else {
4573 			adapter->dropped_pkts++;
4574 			m_freem(rxr->fmp);
4575 			rxr->fmp = NULL;
4576 			error = ENOMEM;
4577 		}
4578 	}
4579 
4580 	return (error);
4581 }
4582 #endif
4583 
4584 /*********************************************************************
4585  *
4586  *  Verify that the hardware indicated that the checksum is valid.
4587  *  Inform the stack about the status of checksum so that stack
4588  *  doesn't spend time verifying the checksum.
4589  *
4590  *********************************************************************/
4591 static void
4592 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4593 {
4594 	mp->m_pkthdr.csum_flags = 0;
4595 
4596 	/* Ignore Checksum bit is set */
4597 	if (rx_desc->status & E1000_RXD_STAT_IXSM)
4598 		return;
4599 
4600 	if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
4601 		return;
4602 
4603 	/* IP Checksum Good? */
4604 	if (rx_desc->status & E1000_RXD_STAT_IPCS)
4605 		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4606 
4607 	/* TCP or UDP checksum */
4608 	if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4609 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4610 		mp->m_pkthdr.csum_data = htons(0xffff);
4611 	}
4612 }
4613 
4614 /*
4615  * This routine is run via an vlan
4616  * config EVENT
4617  */
4618 static void
4619 em_register_vlan(void *arg, if_t ifp, u16 vtag)
4620 {
4621 	struct adapter	*adapter = if_getsoftc(ifp);
4622 	u32		index, bit;
4623 
4624 	if ((void*)adapter !=  arg)   /* Not our event */
4625 		return;
4626 
4627 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4628                 return;
4629 
4630 	EM_CORE_LOCK(adapter);
4631 	index = (vtag >> 5) & 0x7F;
4632 	bit = vtag & 0x1F;
4633 	adapter->shadow_vfta[index] |= (1 << bit);
4634 	++adapter->num_vlans;
4635 	/* Re-init to load the changes */
4636 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
4637 		em_init_locked(adapter);
4638 	EM_CORE_UNLOCK(adapter);
4639 }
4640 
4641 /*
4642  * This routine is run via an vlan
4643  * unconfig EVENT
4644  */
4645 static void
4646 em_unregister_vlan(void *arg, if_t ifp, u16 vtag)
4647 {
4648 	struct adapter	*adapter = if_getsoftc(ifp);
4649 	u32		index, bit;
4650 
4651 	if (adapter != arg)
4652 		return;
4653 
4654 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4655                 return;
4656 
4657 	EM_CORE_LOCK(adapter);
4658 	index = (vtag >> 5) & 0x7F;
4659 	bit = vtag & 0x1F;
4660 	adapter->shadow_vfta[index] &= ~(1 << bit);
4661 	--adapter->num_vlans;
4662 	/* Re-init to load the changes */
4663 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
4664 		em_init_locked(adapter);
4665 	EM_CORE_UNLOCK(adapter);
4666 }
4667 
4668 static void
4669 em_setup_vlan_hw_support(struct adapter *adapter)
4670 {
4671 	struct e1000_hw *hw = &adapter->hw;
4672 	u32             reg;
4673 
4674 	/*
4675 	** We get here thru init_locked, meaning
4676 	** a soft reset, this has already cleared
4677 	** the VFTA and other state, so if there
4678 	** have been no vlan's registered do nothing.
4679 	*/
4680 	if (adapter->num_vlans == 0)
4681                 return;
4682 
4683 	/*
4684 	** A soft reset zero's out the VFTA, so
4685 	** we need to repopulate it now.
4686 	*/
4687 	for (int i = 0; i < EM_VFTA_SIZE; i++)
4688                 if (adapter->shadow_vfta[i] != 0)
4689 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4690                             i, adapter->shadow_vfta[i]);
4691 
4692 	reg = E1000_READ_REG(hw, E1000_CTRL);
4693 	reg |= E1000_CTRL_VME;
4694 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4695 
4696 	/* Enable the Filter Table */
4697 	reg = E1000_READ_REG(hw, E1000_RCTL);
4698 	reg &= ~E1000_RCTL_CFIEN;
4699 	reg |= E1000_RCTL_VFE;
4700 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4701 }
4702 
4703 static void
4704 em_enable_intr(struct adapter *adapter)
4705 {
4706 	struct e1000_hw *hw = &adapter->hw;
4707 	u32 ims_mask = IMS_ENABLE_MASK;
4708 
4709 	if (hw->mac.type == e1000_82574) {
4710 		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4711 		ims_mask |= EM_MSIX_MASK;
4712 	}
4713 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4714 }
4715 
4716 static void
4717 em_disable_intr(struct adapter *adapter)
4718 {
4719 	struct e1000_hw *hw = &adapter->hw;
4720 
4721 	if (hw->mac.type == e1000_82574)
4722 		E1000_WRITE_REG(hw, EM_EIAC, 0);
4723 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4724 }
4725 
4726 /*
4727  * Bit of a misnomer, what this really means is
4728  * to enable OS management of the system... aka
4729  * to disable special hardware management features
4730  */
4731 static void
4732 em_init_manageability(struct adapter *adapter)
4733 {
4734 	/* A shared code workaround */
4735 #define E1000_82542_MANC2H E1000_MANC2H
4736 	if (adapter->has_manage) {
4737 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4738 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4739 
4740 		/* disable hardware interception of ARP */
4741 		manc &= ~(E1000_MANC_ARP_EN);
4742 
4743                 /* enable receiving management packets to the host */
4744 		manc |= E1000_MANC_EN_MNG2HOST;
4745 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4746 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4747 		manc2h |= E1000_MNG2HOST_PORT_623;
4748 		manc2h |= E1000_MNG2HOST_PORT_664;
4749 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4750 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4751 	}
4752 }
4753 
4754 /*
4755  * Give control back to hardware management
4756  * controller if there is one.
4757  */
4758 static void
4759 em_release_manageability(struct adapter *adapter)
4760 {
4761 	if (adapter->has_manage) {
4762 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4763 
4764 		/* re-enable hardware interception of ARP */
4765 		manc |= E1000_MANC_ARP_EN;
4766 		manc &= ~E1000_MANC_EN_MNG2HOST;
4767 
4768 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4769 	}
4770 }
4771 
4772 /*
4773  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4774  * For ASF and Pass Through versions of f/w this means
4775  * that the driver is loaded. For AMT version type f/w
4776  * this means that the network i/f is open.
4777  */
4778 static void
4779 em_get_hw_control(struct adapter *adapter)
4780 {
4781 	u32 ctrl_ext, swsm;
4782 
4783 	if (adapter->hw.mac.type == e1000_82573) {
4784 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4785 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4786 		    swsm | E1000_SWSM_DRV_LOAD);
4787 		return;
4788 	}
4789 	/* else */
4790 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4791 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4792 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4793 	return;
4794 }
4795 
4796 /*
4797  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4798  * For ASF and Pass Through versions of f/w this means that
4799  * the driver is no longer loaded. For AMT versions of the
4800  * f/w this means that the network i/f is closed.
4801  */
4802 static void
4803 em_release_hw_control(struct adapter *adapter)
4804 {
4805 	u32 ctrl_ext, swsm;
4806 
4807 	if (!adapter->has_manage)
4808 		return;
4809 
4810 	if (adapter->hw.mac.type == e1000_82573) {
4811 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4812 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4813 		    swsm & ~E1000_SWSM_DRV_LOAD);
4814 		return;
4815 	}
4816 	/* else */
4817 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4818 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4819 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4820 	return;
4821 }
4822 
4823 static int
4824 em_is_valid_ether_addr(u8 *addr)
4825 {
4826 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4827 
4828 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4829 		return (FALSE);
4830 	}
4831 
4832 	return (TRUE);
4833 }
4834 
4835 /*
4836 ** Parse the interface capabilities with regard
4837 ** to both system management and wake-on-lan for
4838 ** later use.
4839 */
4840 static void
4841 em_get_wakeup(device_t dev)
4842 {
4843 	struct adapter	*adapter = device_get_softc(dev);
4844 	u16		eeprom_data = 0, device_id, apme_mask;
4845 
4846 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4847 	apme_mask = EM_EEPROM_APME;
4848 
4849 	switch (adapter->hw.mac.type) {
4850 	case e1000_82573:
4851 	case e1000_82583:
4852 		adapter->has_amt = TRUE;
4853 		/* Falls thru */
4854 	case e1000_82571:
4855 	case e1000_82572:
4856 	case e1000_80003es2lan:
4857 		if (adapter->hw.bus.func == 1) {
4858 			e1000_read_nvm(&adapter->hw,
4859 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4860 			break;
4861 		} else
4862 			e1000_read_nvm(&adapter->hw,
4863 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4864 		break;
4865 	case e1000_ich8lan:
4866 	case e1000_ich9lan:
4867 	case e1000_ich10lan:
4868 	case e1000_pchlan:
4869 	case e1000_pch2lan:
4870 		apme_mask = E1000_WUC_APME;
4871 		adapter->has_amt = TRUE;
4872 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4873 		break;
4874 	default:
4875 		e1000_read_nvm(&adapter->hw,
4876 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4877 		break;
4878 	}
4879 	if (eeprom_data & apme_mask)
4880 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4881 	/*
4882          * We have the eeprom settings, now apply the special cases
4883          * where the eeprom may be wrong or the board won't support
4884          * wake on lan on a particular port
4885 	 */
4886 	device_id = pci_get_device(dev);
4887         switch (device_id) {
4888 	case E1000_DEV_ID_82571EB_FIBER:
4889 		/* Wake events only supported on port A for dual fiber
4890 		 * regardless of eeprom setting */
4891 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4892 		    E1000_STATUS_FUNC_1)
4893 			adapter->wol = 0;
4894 		break;
4895 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4896 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4897 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4898                 /* if quad port adapter, disable WoL on all but port A */
4899 		if (global_quad_port_a != 0)
4900 			adapter->wol = 0;
4901 		/* Reset for multiple quad port adapters */
4902 		if (++global_quad_port_a == 4)
4903 			global_quad_port_a = 0;
4904                 break;
4905 	}
4906 	return;
4907 }
4908 
4909 
4910 /*
4911  * Enable PCI Wake On Lan capability
4912  */
4913 static void
4914 em_enable_wakeup(device_t dev)
4915 {
4916 	struct adapter	*adapter = device_get_softc(dev);
4917 	if_t ifp = adapter->ifp;
4918 	u32		pmc, ctrl, ctrl_ext, rctl;
4919 	u16     	status;
4920 
4921 	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4922 		return;
4923 
4924 	/* Advertise the wakeup capability */
4925 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4926 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4927 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4928 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4929 
4930 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4931 	    (adapter->hw.mac.type == e1000_pchlan) ||
4932 	    (adapter->hw.mac.type == e1000_ich9lan) ||
4933 	    (adapter->hw.mac.type == e1000_ich10lan))
4934 		e1000_suspend_workarounds_ich8lan(&adapter->hw);
4935 
4936 	/* Keep the laser running on Fiber adapters */
4937 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4938 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4939 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4940 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4941 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4942 	}
4943 
4944 	/*
4945 	** Determine type of Wakeup: note that wol
4946 	** is set with all bits on by default.
4947 	*/
4948 	if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
4949 		adapter->wol &= ~E1000_WUFC_MAG;
4950 
4951 	if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
4952 		adapter->wol &= ~E1000_WUFC_MC;
4953 	else {
4954 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4955 		rctl |= E1000_RCTL_MPE;
4956 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4957 	}
4958 
4959 	if ((adapter->hw.mac.type == e1000_pchlan) ||
4960 	    (adapter->hw.mac.type == e1000_pch2lan)) {
4961 		if (em_enable_phy_wakeup(adapter))
4962 			return;
4963 	} else {
4964 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4965 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4966 	}
4967 
4968 	if (adapter->hw.phy.type == e1000_phy_igp_3)
4969 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4970 
4971         /* Request PME */
4972         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4973 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4974 	if (if_getcapenable(ifp) & IFCAP_WOL)
4975 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4976         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4977 
4978 	return;
4979 }
4980 
4981 /*
4982 ** WOL in the newer chipset interfaces (pchlan)
4983 ** require thing to be copied into the phy
4984 */
4985 static int
4986 em_enable_phy_wakeup(struct adapter *adapter)
4987 {
4988 	struct e1000_hw *hw = &adapter->hw;
4989 	u32 mreg, ret = 0;
4990 	u16 preg;
4991 
4992 	/* copy MAC RARs to PHY RARs */
4993 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
4994 
4995 	/* copy MAC MTA to PHY MTA */
4996 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4997 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4998 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4999 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5000 		    (u16)((mreg >> 16) & 0xFFFF));
5001 	}
5002 
5003 	/* configure PHY Rx Control register */
5004 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5005 	mreg = E1000_READ_REG(hw, E1000_RCTL);
5006 	if (mreg & E1000_RCTL_UPE)
5007 		preg |= BM_RCTL_UPE;
5008 	if (mreg & E1000_RCTL_MPE)
5009 		preg |= BM_RCTL_MPE;
5010 	preg &= ~(BM_RCTL_MO_MASK);
5011 	if (mreg & E1000_RCTL_MO_3)
5012 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5013 				<< BM_RCTL_MO_SHIFT);
5014 	if (mreg & E1000_RCTL_BAM)
5015 		preg |= BM_RCTL_BAM;
5016 	if (mreg & E1000_RCTL_PMCF)
5017 		preg |= BM_RCTL_PMCF;
5018 	mreg = E1000_READ_REG(hw, E1000_CTRL);
5019 	if (mreg & E1000_CTRL_RFCE)
5020 		preg |= BM_RCTL_RFCE;
5021 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5022 
5023 	/* enable PHY wakeup in MAC register */
5024 	E1000_WRITE_REG(hw, E1000_WUC,
5025 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5026 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5027 
5028 	/* configure and enable PHY wakeup in PHY registers */
5029 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5030 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5031 
5032 	/* activate PHY wakeup */
5033 	ret = hw->phy.ops.acquire(hw);
5034 	if (ret) {
5035 		printf("Could not acquire PHY\n");
5036 		return ret;
5037 	}
5038 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5039 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5040 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5041 	if (ret) {
5042 		printf("Could not read PHY page 769\n");
5043 		goto out;
5044 	}
5045 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5046 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5047 	if (ret)
5048 		printf("Could not set PHY Host Wakeup bit\n");
5049 out:
5050 	hw->phy.ops.release(hw);
5051 
5052 	return ret;
5053 }
5054 
5055 static void
5056 em_led_func(void *arg, int onoff)
5057 {
5058 	struct adapter	*adapter = arg;
5059 
5060 	EM_CORE_LOCK(adapter);
5061 	if (onoff) {
5062 		e1000_setup_led(&adapter->hw);
5063 		e1000_led_on(&adapter->hw);
5064 	} else {
5065 		e1000_led_off(&adapter->hw);
5066 		e1000_cleanup_led(&adapter->hw);
5067 	}
5068 	EM_CORE_UNLOCK(adapter);
5069 }
5070 
5071 /*
5072 ** Disable the L0S and L1 LINK states
5073 */
5074 static void
5075 em_disable_aspm(struct adapter *adapter)
5076 {
5077 	int		base, reg;
5078 	u16		link_cap,link_ctrl;
5079 	device_t	dev = adapter->dev;
5080 
5081 	switch (adapter->hw.mac.type) {
5082 		case e1000_82573:
5083 		case e1000_82574:
5084 		case e1000_82583:
5085 			break;
5086 		default:
5087 			return;
5088 	}
5089 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5090 		return;
5091 	reg = base + PCIER_LINK_CAP;
5092 	link_cap = pci_read_config(dev, reg, 2);
5093 	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5094 		return;
5095 	reg = base + PCIER_LINK_CTL;
5096 	link_ctrl = pci_read_config(dev, reg, 2);
5097 	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5098 	pci_write_config(dev, reg, link_ctrl, 2);
5099 	return;
5100 }
5101 
5102 /**********************************************************************
5103  *
5104  *  Update the board statistics counters.
5105  *
5106  **********************************************************************/
5107 static void
5108 em_update_stats_counters(struct adapter *adapter)
5109 {
5110 	if_t ifp;
5111 
5112 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5113 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5114 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5115 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5116 	}
5117 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5118 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5119 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5120 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5121 
5122 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5123 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5124 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5125 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5126 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5127 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5128 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5129 	/*
5130 	** For watchdog management we need to know if we have been
5131 	** paused during the last interval, so capture that here.
5132 	*/
5133 	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5134 	adapter->stats.xoffrxc += adapter->pause_frames;
5135 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5136 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5137 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5138 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5139 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5140 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5141 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5142 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5143 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5144 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5145 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5146 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5147 
5148 	/* For the 64-bit byte counters the low dword must be read first. */
5149 	/* Both registers clear on the read of the high dword */
5150 
5151 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5152 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5153 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5154 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5155 
5156 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5157 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5158 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5159 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5160 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5161 
5162 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5163 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5164 
5165 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5166 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5167 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5168 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5169 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5170 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5171 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5172 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5173 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5174 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5175 
5176 	/* Interrupt Counts */
5177 
5178 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5179 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5180 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5181 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5182 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5183 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5184 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5185 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5186 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5187 
5188 	if (adapter->hw.mac.type >= e1000_82543) {
5189 		adapter->stats.algnerrc +=
5190 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5191 		adapter->stats.rxerrc +=
5192 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5193 		adapter->stats.tncrs +=
5194 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5195 		adapter->stats.cexterr +=
5196 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5197 		adapter->stats.tsctc +=
5198 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5199 		adapter->stats.tsctfc +=
5200 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5201 	}
5202 	ifp = adapter->ifp;
5203 
5204 	if_setcollisions(ifp, adapter->stats.colc);
5205 
5206 	/* Rx Errors */
5207 	if_setierrors(ifp, adapter->dropped_pkts + adapter->stats.rxerrc +
5208 	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5209 	    adapter->stats.ruc + adapter->stats.roc +
5210 	    adapter->stats.mpc + adapter->stats.cexterr);
5211 
5212 	/* Tx Errors */
5213 	if_setoerrors(ifp, adapter->stats.ecol + adapter->stats.latecol +
5214 	    adapter->watchdog_events);
5215 }
5216 
5217 /* Export a single 32-bit register via a read-only sysctl. */
5218 static int
5219 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5220 {
5221 	struct adapter *adapter;
5222 	u_int val;
5223 
5224 	adapter = oidp->oid_arg1;
5225 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5226 	return (sysctl_handle_int(oidp, &val, 0, req));
5227 }
5228 
5229 /*
5230  * Add sysctl variables, one per statistic, to the system.
5231  */
5232 static void
5233 em_add_hw_stats(struct adapter *adapter)
5234 {
5235 	device_t dev = adapter->dev;
5236 
5237 	struct tx_ring *txr = adapter->tx_rings;
5238 	struct rx_ring *rxr = adapter->rx_rings;
5239 
5240 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5241 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5242 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5243 	struct e1000_hw_stats *stats = &adapter->stats;
5244 
5245 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5246 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5247 
5248 #define QUEUE_NAME_LEN 32
5249 	char namebuf[QUEUE_NAME_LEN];
5250 
5251 	/* Driver Statistics */
5252 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5253 			CTLFLAG_RD, &adapter->link_irq,
5254 			"Link MSIX IRQ Handled");
5255 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5256 			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5257 			 "Std mbuf failed");
5258 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5259 			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5260 			 "Std mbuf cluster failed");
5261 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5262 			CTLFLAG_RD, &adapter->dropped_pkts,
5263 			"Driver dropped packets");
5264 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5265 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5266 			"Driver tx dma failure in xmit");
5267 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5268 			CTLFLAG_RD, &adapter->rx_overruns,
5269 			"RX overruns");
5270 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5271 			CTLFLAG_RD, &adapter->watchdog_events,
5272 			"Watchdog timeouts");
5273 
5274 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5275 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5276 			em_sysctl_reg_handler, "IU",
5277 			"Device Control Register");
5278 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5279 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5280 			em_sysctl_reg_handler, "IU",
5281 			"Receiver Control Register");
5282 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5283 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5284 			"Flow Control High Watermark");
5285 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5286 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5287 			"Flow Control Low Watermark");
5288 
5289 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5290 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5291 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5292 					    CTLFLAG_RD, NULL, "Queue Name");
5293 		queue_list = SYSCTL_CHILDREN(queue_node);
5294 
5295 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5296 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5297 				E1000_TDH(txr->me),
5298 				em_sysctl_reg_handler, "IU",
5299  				"Transmit Descriptor Head");
5300 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5301 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5302 				E1000_TDT(txr->me),
5303 				em_sysctl_reg_handler, "IU",
5304  				"Transmit Descriptor Tail");
5305 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5306 				CTLFLAG_RD, &txr->tx_irq,
5307 				"Queue MSI-X Transmit Interrupts");
5308 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5309 				CTLFLAG_RD, &txr->no_desc_avail,
5310 				"Queue No Descriptor Available");
5311 
5312 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5313 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5314 				E1000_RDH(rxr->me),
5315 				em_sysctl_reg_handler, "IU",
5316 				"Receive Descriptor Head");
5317 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5318 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5319 				E1000_RDT(rxr->me),
5320 				em_sysctl_reg_handler, "IU",
5321 				"Receive Descriptor Tail");
5322 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5323 				CTLFLAG_RD, &rxr->rx_irq,
5324 				"Queue MSI-X Receive Interrupts");
5325 	}
5326 
5327 	/* MAC stats get their own sub node */
5328 
5329 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5330 				    CTLFLAG_RD, NULL, "Statistics");
5331 	stat_list = SYSCTL_CHILDREN(stat_node);
5332 
5333 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5334 			CTLFLAG_RD, &stats->ecol,
5335 			"Excessive collisions");
5336 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5337 			CTLFLAG_RD, &stats->scc,
5338 			"Single collisions");
5339 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5340 			CTLFLAG_RD, &stats->mcc,
5341 			"Multiple collisions");
5342 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5343 			CTLFLAG_RD, &stats->latecol,
5344 			"Late collisions");
5345 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5346 			CTLFLAG_RD, &stats->colc,
5347 			"Collision Count");
5348 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5349 			CTLFLAG_RD, &adapter->stats.symerrs,
5350 			"Symbol Errors");
5351 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5352 			CTLFLAG_RD, &adapter->stats.sec,
5353 			"Sequence Errors");
5354 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5355 			CTLFLAG_RD, &adapter->stats.dc,
5356 			"Defer Count");
5357 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5358 			CTLFLAG_RD, &adapter->stats.mpc,
5359 			"Missed Packets");
5360 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5361 			CTLFLAG_RD, &adapter->stats.rnbc,
5362 			"Receive No Buffers");
5363 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5364 			CTLFLAG_RD, &adapter->stats.ruc,
5365 			"Receive Undersize");
5366 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5367 			CTLFLAG_RD, &adapter->stats.rfc,
5368 			"Fragmented Packets Received ");
5369 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5370 			CTLFLAG_RD, &adapter->stats.roc,
5371 			"Oversized Packets Received");
5372 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5373 			CTLFLAG_RD, &adapter->stats.rjc,
5374 			"Recevied Jabber");
5375 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5376 			CTLFLAG_RD, &adapter->stats.rxerrc,
5377 			"Receive Errors");
5378 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5379 			CTLFLAG_RD, &adapter->stats.crcerrs,
5380 			"CRC errors");
5381 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5382 			CTLFLAG_RD, &adapter->stats.algnerrc,
5383 			"Alignment Errors");
5384 	/* On 82575 these are collision counts */
5385 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5386 			CTLFLAG_RD, &adapter->stats.cexterr,
5387 			"Collision/Carrier extension errors");
5388 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5389 			CTLFLAG_RD, &adapter->stats.xonrxc,
5390 			"XON Received");
5391 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5392 			CTLFLAG_RD, &adapter->stats.xontxc,
5393 			"XON Transmitted");
5394 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5395 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5396 			"XOFF Received");
5397 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5398 			CTLFLAG_RD, &adapter->stats.xofftxc,
5399 			"XOFF Transmitted");
5400 
5401 	/* Packet Reception Stats */
5402 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5403 			CTLFLAG_RD, &adapter->stats.tpr,
5404 			"Total Packets Received ");
5405 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5406 			CTLFLAG_RD, &adapter->stats.gprc,
5407 			"Good Packets Received");
5408 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5409 			CTLFLAG_RD, &adapter->stats.bprc,
5410 			"Broadcast Packets Received");
5411 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5412 			CTLFLAG_RD, &adapter->stats.mprc,
5413 			"Multicast Packets Received");
5414 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5415 			CTLFLAG_RD, &adapter->stats.prc64,
5416 			"64 byte frames received ");
5417 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5418 			CTLFLAG_RD, &adapter->stats.prc127,
5419 			"65-127 byte frames received");
5420 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5421 			CTLFLAG_RD, &adapter->stats.prc255,
5422 			"128-255 byte frames received");
5423 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5424 			CTLFLAG_RD, &adapter->stats.prc511,
5425 			"256-511 byte frames received");
5426 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5427 			CTLFLAG_RD, &adapter->stats.prc1023,
5428 			"512-1023 byte frames received");
5429 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5430 			CTLFLAG_RD, &adapter->stats.prc1522,
5431 			"1023-1522 byte frames received");
5432  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5433  			CTLFLAG_RD, &adapter->stats.gorc,
5434  			"Good Octets Received");
5435 
5436 	/* Packet Transmission Stats */
5437  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5438  			CTLFLAG_RD, &adapter->stats.gotc,
5439  			"Good Octets Transmitted");
5440 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5441 			CTLFLAG_RD, &adapter->stats.tpt,
5442 			"Total Packets Transmitted");
5443 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5444 			CTLFLAG_RD, &adapter->stats.gptc,
5445 			"Good Packets Transmitted");
5446 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5447 			CTLFLAG_RD, &adapter->stats.bptc,
5448 			"Broadcast Packets Transmitted");
5449 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5450 			CTLFLAG_RD, &adapter->stats.mptc,
5451 			"Multicast Packets Transmitted");
5452 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5453 			CTLFLAG_RD, &adapter->stats.ptc64,
5454 			"64 byte frames transmitted ");
5455 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5456 			CTLFLAG_RD, &adapter->stats.ptc127,
5457 			"65-127 byte frames transmitted");
5458 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5459 			CTLFLAG_RD, &adapter->stats.ptc255,
5460 			"128-255 byte frames transmitted");
5461 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5462 			CTLFLAG_RD, &adapter->stats.ptc511,
5463 			"256-511 byte frames transmitted");
5464 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5465 			CTLFLAG_RD, &adapter->stats.ptc1023,
5466 			"512-1023 byte frames transmitted");
5467 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5468 			CTLFLAG_RD, &adapter->stats.ptc1522,
5469 			"1024-1522 byte frames transmitted");
5470 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5471 			CTLFLAG_RD, &adapter->stats.tsctc,
5472 			"TSO Contexts Transmitted");
5473 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5474 			CTLFLAG_RD, &adapter->stats.tsctfc,
5475 			"TSO Contexts Failed");
5476 
5477 
5478 	/* Interrupt Stats */
5479 
5480 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5481 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5482 	int_list = SYSCTL_CHILDREN(int_node);
5483 
5484 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5485 			CTLFLAG_RD, &adapter->stats.iac,
5486 			"Interrupt Assertion Count");
5487 
5488 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5489 			CTLFLAG_RD, &adapter->stats.icrxptc,
5490 			"Interrupt Cause Rx Pkt Timer Expire Count");
5491 
5492 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5493 			CTLFLAG_RD, &adapter->stats.icrxatc,
5494 			"Interrupt Cause Rx Abs Timer Expire Count");
5495 
5496 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5497 			CTLFLAG_RD, &adapter->stats.ictxptc,
5498 			"Interrupt Cause Tx Pkt Timer Expire Count");
5499 
5500 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5501 			CTLFLAG_RD, &adapter->stats.ictxatc,
5502 			"Interrupt Cause Tx Abs Timer Expire Count");
5503 
5504 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5505 			CTLFLAG_RD, &adapter->stats.ictxqec,
5506 			"Interrupt Cause Tx Queue Empty Count");
5507 
5508 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5509 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5510 			"Interrupt Cause Tx Queue Min Thresh Count");
5511 
5512 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5513 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5514 			"Interrupt Cause Rx Desc Min Thresh Count");
5515 
5516 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5517 			CTLFLAG_RD, &adapter->stats.icrxoc,
5518 			"Interrupt Cause Receiver Overrun Count");
5519 }
5520 
5521 /**********************************************************************
5522  *
5523  *  This routine provides a way to dump out the adapter eeprom,
5524  *  often a useful debug/service tool. This only dumps the first
5525  *  32 words, stuff that matters is in that extent.
5526  *
5527  **********************************************************************/
5528 static int
5529 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5530 {
5531 	struct adapter *adapter = (struct adapter *)arg1;
5532 	int error;
5533 	int result;
5534 
5535 	result = -1;
5536 	error = sysctl_handle_int(oidp, &result, 0, req);
5537 
5538 	if (error || !req->newptr)
5539 		return (error);
5540 
5541 	/*
5542 	 * This value will cause a hex dump of the
5543 	 * first 32 16-bit words of the EEPROM to
5544 	 * the screen.
5545 	 */
5546 	if (result == 1)
5547 		em_print_nvm_info(adapter);
5548 
5549 	return (error);
5550 }
5551 
5552 static void
5553 em_print_nvm_info(struct adapter *adapter)
5554 {
5555 	u16	eeprom_data;
5556 	int	i, j, row = 0;
5557 
5558 	/* Its a bit crude, but it gets the job done */
5559 	printf("\nInterface EEPROM Dump:\n");
5560 	printf("Offset\n0x0000  ");
5561 	for (i = 0, j = 0; i < 32; i++, j++) {
5562 		if (j == 8) { /* Make the offset block */
5563 			j = 0; ++row;
5564 			printf("\n0x00%x0  ",row);
5565 		}
5566 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5567 		printf("%04x ", eeprom_data);
5568 	}
5569 	printf("\n");
5570 }
5571 
5572 static int
5573 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5574 {
5575 	struct em_int_delay_info *info;
5576 	struct adapter *adapter;
5577 	u32 regval;
5578 	int error, usecs, ticks;
5579 
5580 	info = (struct em_int_delay_info *)arg1;
5581 	usecs = info->value;
5582 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5583 	if (error != 0 || req->newptr == NULL)
5584 		return (error);
5585 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5586 		return (EINVAL);
5587 	info->value = usecs;
5588 	ticks = EM_USECS_TO_TICKS(usecs);
5589 	if (info->offset == E1000_ITR)	/* units are 256ns here */
5590 		ticks *= 4;
5591 
5592 	adapter = info->adapter;
5593 
5594 	EM_CORE_LOCK(adapter);
5595 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5596 	regval = (regval & ~0xffff) | (ticks & 0xffff);
5597 	/* Handle a few special cases. */
5598 	switch (info->offset) {
5599 	case E1000_RDTR:
5600 		break;
5601 	case E1000_TIDV:
5602 		if (ticks == 0) {
5603 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5604 			/* Don't write 0 into the TIDV register. */
5605 			regval++;
5606 		} else
5607 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5608 		break;
5609 	}
5610 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5611 	EM_CORE_UNLOCK(adapter);
5612 	return (0);
5613 }
5614 
5615 static void
5616 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5617 	const char *description, struct em_int_delay_info *info,
5618 	int offset, int value)
5619 {
5620 	info->adapter = adapter;
5621 	info->offset = offset;
5622 	info->value = value;
5623 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5624 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5625 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5626 	    info, 0, em_sysctl_int_delay, "I", description);
5627 }
5628 
5629 static void
5630 em_set_sysctl_value(struct adapter *adapter, const char *name,
5631 	const char *description, int *limit, int value)
5632 {
5633 	*limit = value;
5634 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5635 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5636 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5637 }
5638 
5639 
5640 /*
5641 ** Set flow control using sysctl:
5642 ** Flow control values:
5643 **      0 - off
5644 **      1 - rx pause
5645 **      2 - tx pause
5646 **      3 - full
5647 */
5648 static int
5649 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5650 {
5651         int		error;
5652 	static int	input = 3; /* default is full */
5653         struct adapter	*adapter = (struct adapter *) arg1;
5654 
5655         error = sysctl_handle_int(oidp, &input, 0, req);
5656 
5657         if ((error) || (req->newptr == NULL))
5658                 return (error);
5659 
5660 	if (input == adapter->fc) /* no change? */
5661 		return (error);
5662 
5663         switch (input) {
5664                 case e1000_fc_rx_pause:
5665                 case e1000_fc_tx_pause:
5666                 case e1000_fc_full:
5667                 case e1000_fc_none:
5668                         adapter->hw.fc.requested_mode = input;
5669 			adapter->fc = input;
5670                         break;
5671                 default:
5672 			/* Do nothing */
5673 			return (error);
5674         }
5675 
5676         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5677         e1000_force_mac_fc(&adapter->hw);
5678         return (error);
5679 }
5680 
5681 /*
5682 ** Manage Energy Efficient Ethernet:
5683 ** Control values:
5684 **     0/1 - enabled/disabled
5685 */
5686 static int
5687 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5688 {
5689        struct adapter *adapter = (struct adapter *) arg1;
5690        int             error, value;
5691 
5692        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5693        error = sysctl_handle_int(oidp, &value, 0, req);
5694        if (error || req->newptr == NULL)
5695                return (error);
5696        EM_CORE_LOCK(adapter);
5697        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5698        em_init_locked(adapter);
5699        EM_CORE_UNLOCK(adapter);
5700        return (0);
5701 }
5702 
5703 static int
5704 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5705 {
5706 	struct adapter *adapter;
5707 	int error;
5708 	int result;
5709 
5710 	result = -1;
5711 	error = sysctl_handle_int(oidp, &result, 0, req);
5712 
5713 	if (error || !req->newptr)
5714 		return (error);
5715 
5716 	if (result == 1) {
5717 		adapter = (struct adapter *)arg1;
5718 		em_print_debug_info(adapter);
5719         }
5720 
5721 	return (error);
5722 }
5723 
5724 /*
5725 ** This routine is meant to be fluid, add whatever is
5726 ** needed for debugging a problem.  -jfv
5727 */
5728 static void
5729 em_print_debug_info(struct adapter *adapter)
5730 {
5731 	device_t dev = adapter->dev;
5732 	struct tx_ring *txr = adapter->tx_rings;
5733 	struct rx_ring *rxr = adapter->rx_rings;
5734 
5735 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)
5736 		printf("Interface is RUNNING ");
5737 	else
5738 		printf("Interface is NOT RUNNING\n");
5739 
5740 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE)
5741 		printf("and INACTIVE\n");
5742 	else
5743 		printf("and ACTIVE\n");
5744 
5745 	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5746 	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5747 	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5748 	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5749 	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5750 	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5751 	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5752 	device_printf(dev, "TX descriptors avail = %d\n",
5753 	    txr->tx_avail);
5754 	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5755 	    txr->no_desc_avail);
5756 	device_printf(dev, "RX discarded packets = %ld\n",
5757 	    rxr->rx_discarded);
5758 	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5759 	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5760 }
5761