xref: /freebsd/sys/dev/sfxge/sfxge.c (revision d9f0ce31900a48d1a2bfc1c8c86f79d1e831451a)
1 /*-
2  * Copyright (c) 2010-2015 Solarflare Communications Inc.
3  * All rights reserved.
4  *
5  * This software was developed in part by Philip Paeps under contract for
6  * Solarflare Communications, Inc.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright notice,
12  *    this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  *    this list of conditions and the following disclaimer in the documentation
15  *    and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
21  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
24  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  *
29  * The views and conclusions contained in the software and documentation are
30  * those of the authors and should not be interpreted as representing official
31  * policies, either expressed or implied, of the FreeBSD Project.
32  */
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 #include <sys/param.h>
38 #include <sys/kernel.h>
39 #include <sys/bus.h>
40 #include <sys/rman.h>
41 #include <sys/lock.h>
42 #include <sys/module.h>
43 #include <sys/mutex.h>
44 #include <sys/smp.h>
45 #include <sys/socket.h>
46 #include <sys/taskqueue.h>
47 #include <sys/sockio.h>
48 #include <sys/sysctl.h>
49 #include <sys/priv.h>
50 #include <sys/syslog.h>
51 
52 #include <dev/pci/pcireg.h>
53 #include <dev/pci/pcivar.h>
54 
55 #include <net/ethernet.h>
56 #include <net/if.h>
57 #include <net/if_var.h>
58 #include <net/if_media.h>
59 #include <net/if_types.h>
60 
61 #include "common/efx.h"
62 
63 #include "sfxge.h"
64 #include "sfxge_rx.h"
65 #include "sfxge_ioc.h"
66 #include "sfxge_version.h"
67 
68 #define	SFXGE_CAP (IFCAP_VLAN_MTU | IFCAP_VLAN_HWCSUM |			\
69 		   IFCAP_RXCSUM | IFCAP_TXCSUM |			\
70 		   IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6 |		\
71 		   IFCAP_TSO4 | IFCAP_TSO6 |				\
72 		   IFCAP_JUMBO_MTU |					\
73 		   IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWSTATS)
74 #define	SFXGE_CAP_ENABLE SFXGE_CAP
75 #define	SFXGE_CAP_FIXED (IFCAP_VLAN_MTU |				\
76 			 IFCAP_JUMBO_MTU | IFCAP_LINKSTATE | IFCAP_HWSTATS)
77 
78 MALLOC_DEFINE(M_SFXGE, "sfxge", "Solarflare 10GigE driver");
79 
80 
81 SYSCTL_NODE(_hw, OID_AUTO, sfxge, CTLFLAG_RD, 0,
82 	    "SFXGE driver parameters");
83 
84 #define	SFXGE_PARAM_RX_RING	SFXGE_PARAM(rx_ring)
85 static int sfxge_rx_ring_entries = SFXGE_NDESCS;
86 TUNABLE_INT(SFXGE_PARAM_RX_RING, &sfxge_rx_ring_entries);
87 SYSCTL_INT(_hw_sfxge, OID_AUTO, rx_ring, CTLFLAG_RDTUN,
88 	   &sfxge_rx_ring_entries, 0,
89 	   "Maximum number of descriptors in a receive ring");
90 
91 #define	SFXGE_PARAM_TX_RING	SFXGE_PARAM(tx_ring)
92 static int sfxge_tx_ring_entries = SFXGE_NDESCS;
93 TUNABLE_INT(SFXGE_PARAM_TX_RING, &sfxge_tx_ring_entries);
94 SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_ring, CTLFLAG_RDTUN,
95 	   &sfxge_tx_ring_entries, 0,
96 	   "Maximum number of descriptors in a transmit ring");
97 
98 #define	SFXGE_PARAM_RESTART_ATTEMPTS	SFXGE_PARAM(restart_attempts)
99 static int sfxge_restart_attempts = 3;
100 TUNABLE_INT(SFXGE_PARAM_RESTART_ATTEMPTS, &sfxge_restart_attempts);
101 SYSCTL_INT(_hw_sfxge, OID_AUTO, restart_attempts, CTLFLAG_RDTUN,
102 	   &sfxge_restart_attempts, 0,
103 	   "Maximum number of attempts to bring interface up after reset");
104 
105 #if EFSYS_OPT_MCDI_LOGGING
106 #define	SFXGE_PARAM_MCDI_LOGGING	SFXGE_PARAM(mcdi_logging)
107 static int sfxge_mcdi_logging = 0;
108 TUNABLE_INT(SFXGE_PARAM_MCDI_LOGGING, &sfxge_mcdi_logging);
109 #endif
110 
111 static void
112 sfxge_reset(void *arg, int npending);
113 
114 static int
115 sfxge_estimate_rsrc_limits(struct sfxge_softc *sc)
116 {
117 	efx_drv_limits_t limits;
118 	int rc;
119 	unsigned int evq_max;
120 	uint32_t evq_allocated;
121 	uint32_t rxq_allocated;
122 	uint32_t txq_allocated;
123 
124 	/*
125 	 * Limit the number of event queues to:
126 	 *  - number of CPUs
127 	 *  - hardwire maximum RSS channels
128 	 *  - administratively specified maximum RSS channels
129 	 */
130 	evq_max = MIN(mp_ncpus, EFX_MAXRSS);
131 	if (sc->max_rss_channels > 0)
132 		evq_max = MIN(evq_max, sc->max_rss_channels);
133 
134 	memset(&limits, 0, sizeof(limits));
135 
136 	limits.edl_min_evq_count = 1;
137 	limits.edl_max_evq_count = evq_max;
138 	limits.edl_min_txq_count = SFXGE_TXQ_NTYPES;
139 	limits.edl_max_txq_count = evq_max + SFXGE_TXQ_NTYPES - 1;
140 	limits.edl_min_rxq_count = 1;
141 	limits.edl_max_rxq_count = evq_max;
142 
143 	efx_nic_set_drv_limits(sc->enp, &limits);
144 
145 	if ((rc = efx_nic_init(sc->enp)) != 0)
146 		return (rc);
147 
148 	rc = efx_nic_get_vi_pool(sc->enp, &evq_allocated, &rxq_allocated,
149 				 &txq_allocated);
150 	if (rc != 0) {
151 		efx_nic_fini(sc->enp);
152 		return (rc);
153 	}
154 
155 	KASSERT(txq_allocated >= SFXGE_TXQ_NTYPES,
156 		("txq_allocated < SFXGE_TXQ_NTYPES"));
157 
158 	sc->evq_max = MIN(evq_allocated, evq_max);
159 	sc->evq_max = MIN(rxq_allocated, sc->evq_max);
160 	sc->evq_max = MIN(txq_allocated - (SFXGE_TXQ_NTYPES - 1),
161 			  sc->evq_max);
162 
163 	KASSERT(sc->evq_max <= evq_max,
164 		("allocated more than maximum requested"));
165 
166 	/*
167 	 * NIC is kept initialized in the case of success to be able to
168 	 * initialize port to find out media types.
169 	 */
170 	return (0);
171 }
172 
173 static int
174 sfxge_set_drv_limits(struct sfxge_softc *sc)
175 {
176 	efx_drv_limits_t limits;
177 
178 	memset(&limits, 0, sizeof(limits));
179 
180 	/* Limits are strict since take into account initial estimation */
181 	limits.edl_min_evq_count = limits.edl_max_evq_count =
182 	    sc->intr.n_alloc;
183 	limits.edl_min_txq_count = limits.edl_max_txq_count =
184 	    sc->intr.n_alloc + SFXGE_TXQ_NTYPES - 1;
185 	limits.edl_min_rxq_count = limits.edl_max_rxq_count =
186 	    sc->intr.n_alloc;
187 
188 	return (efx_nic_set_drv_limits(sc->enp, &limits));
189 }
190 
191 static int
192 sfxge_start(struct sfxge_softc *sc)
193 {
194 	int rc;
195 
196 	SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc);
197 
198 	if (sc->init_state == SFXGE_STARTED)
199 		return (0);
200 
201 	if (sc->init_state != SFXGE_REGISTERED) {
202 		rc = EINVAL;
203 		goto fail;
204 	}
205 
206 	/* Set required resource limits */
207 	if ((rc = sfxge_set_drv_limits(sc)) != 0)
208 		goto fail;
209 
210 	if ((rc = efx_nic_init(sc->enp)) != 0)
211 		goto fail;
212 
213 	/* Start processing interrupts. */
214 	if ((rc = sfxge_intr_start(sc)) != 0)
215 		goto fail2;
216 
217 	/* Start processing events. */
218 	if ((rc = sfxge_ev_start(sc)) != 0)
219 		goto fail3;
220 
221 	/* Fire up the port. */
222 	if ((rc = sfxge_port_start(sc)) != 0)
223 		goto fail4;
224 
225 	/* Start the receiver side. */
226 	if ((rc = sfxge_rx_start(sc)) != 0)
227 		goto fail5;
228 
229 	/* Start the transmitter side. */
230 	if ((rc = sfxge_tx_start(sc)) != 0)
231 		goto fail6;
232 
233 	sc->init_state = SFXGE_STARTED;
234 
235 	/* Tell the stack we're running. */
236 	sc->ifnet->if_drv_flags |= IFF_DRV_RUNNING;
237 	sc->ifnet->if_drv_flags &= ~IFF_DRV_OACTIVE;
238 
239 	return (0);
240 
241 fail6:
242 	sfxge_rx_stop(sc);
243 
244 fail5:
245 	sfxge_port_stop(sc);
246 
247 fail4:
248 	sfxge_ev_stop(sc);
249 
250 fail3:
251 	sfxge_intr_stop(sc);
252 
253 fail2:
254 	efx_nic_fini(sc->enp);
255 
256 fail:
257 	device_printf(sc->dev, "sfxge_start: %d\n", rc);
258 
259 	return (rc);
260 }
261 
262 static void
263 sfxge_if_init(void *arg)
264 {
265 	struct sfxge_softc *sc;
266 
267 	sc = (struct sfxge_softc *)arg;
268 
269 	SFXGE_ADAPTER_LOCK(sc);
270 	(void)sfxge_start(sc);
271 	SFXGE_ADAPTER_UNLOCK(sc);
272 }
273 
274 static void
275 sfxge_stop(struct sfxge_softc *sc)
276 {
277 	SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc);
278 
279 	if (sc->init_state != SFXGE_STARTED)
280 		return;
281 
282 	sc->init_state = SFXGE_REGISTERED;
283 
284 	/* Stop the transmitter. */
285 	sfxge_tx_stop(sc);
286 
287 	/* Stop the receiver. */
288 	sfxge_rx_stop(sc);
289 
290 	/* Stop the port. */
291 	sfxge_port_stop(sc);
292 
293 	/* Stop processing events. */
294 	sfxge_ev_stop(sc);
295 
296 	/* Stop processing interrupts. */
297 	sfxge_intr_stop(sc);
298 
299 	efx_nic_fini(sc->enp);
300 
301 	sc->ifnet->if_drv_flags &= ~IFF_DRV_RUNNING;
302 }
303 
304 
305 static int
306 sfxge_vpd_ioctl(struct sfxge_softc *sc, sfxge_ioc_t *ioc)
307 {
308 	efx_vpd_value_t value;
309 	int rc = 0;
310 
311 	switch (ioc->u.vpd.op) {
312 	case SFXGE_VPD_OP_GET_KEYWORD:
313 		value.evv_tag = ioc->u.vpd.tag;
314 		value.evv_keyword = ioc->u.vpd.keyword;
315 		rc = efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value);
316 		if (rc != 0)
317 			break;
318 		ioc->u.vpd.len = MIN(ioc->u.vpd.len, value.evv_length);
319 		if (ioc->u.vpd.payload != 0) {
320 			rc = copyout(value.evv_value, ioc->u.vpd.payload,
321 				     ioc->u.vpd.len);
322 		}
323 		break;
324 	case SFXGE_VPD_OP_SET_KEYWORD:
325 		if (ioc->u.vpd.len > sizeof(value.evv_value))
326 			return (EINVAL);
327 		value.evv_tag = ioc->u.vpd.tag;
328 		value.evv_keyword = ioc->u.vpd.keyword;
329 		value.evv_length = ioc->u.vpd.len;
330 		rc = copyin(ioc->u.vpd.payload, value.evv_value, value.evv_length);
331 		if (rc != 0)
332 			break;
333 		rc = efx_vpd_set(sc->enp, sc->vpd_data, sc->vpd_size, &value);
334 		if (rc != 0)
335 			break;
336 		rc = efx_vpd_verify(sc->enp, sc->vpd_data, sc->vpd_size);
337 		if (rc != 0)
338 			break;
339 		rc = efx_vpd_write(sc->enp, sc->vpd_data, sc->vpd_size);
340 		break;
341 	default:
342 		rc = EOPNOTSUPP;
343 		break;
344 	}
345 
346 	return (rc);
347 }
348 
349 static int
350 sfxge_private_ioctl(struct sfxge_softc *sc, sfxge_ioc_t *ioc)
351 {
352 	switch (ioc->op) {
353 	case SFXGE_MCDI_IOC:
354 		return (sfxge_mcdi_ioctl(sc, ioc));
355 	case SFXGE_NVRAM_IOC:
356 		return (sfxge_nvram_ioctl(sc, ioc));
357 	case SFXGE_VPD_IOC:
358 		return (sfxge_vpd_ioctl(sc, ioc));
359 	default:
360 		return (EOPNOTSUPP);
361 	}
362 }
363 
364 
365 static int
366 sfxge_if_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
367 {
368 	struct sfxge_softc *sc;
369 	struct ifreq *ifr;
370 	sfxge_ioc_t ioc;
371 	int error;
372 
373 	ifr = (struct ifreq *)data;
374 	sc = ifp->if_softc;
375 	error = 0;
376 
377 	switch (command) {
378 	case SIOCSIFFLAGS:
379 		SFXGE_ADAPTER_LOCK(sc);
380 		if (ifp->if_flags & IFF_UP) {
381 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
382 				if ((ifp->if_flags ^ sc->if_flags) &
383 				    (IFF_PROMISC | IFF_ALLMULTI)) {
384 					sfxge_mac_filter_set(sc);
385 				}
386 			} else
387 				sfxge_start(sc);
388 		} else
389 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
390 				sfxge_stop(sc);
391 		sc->if_flags = ifp->if_flags;
392 		SFXGE_ADAPTER_UNLOCK(sc);
393 		break;
394 	case SIOCSIFMTU:
395 		if (ifr->ifr_mtu == ifp->if_mtu) {
396 			/* Nothing to do */
397 			error = 0;
398 		} else if (ifr->ifr_mtu > SFXGE_MAX_MTU) {
399 			error = EINVAL;
400 		} else if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
401 			ifp->if_mtu = ifr->ifr_mtu;
402 			error = 0;
403 		} else {
404 			/* Restart required */
405 			SFXGE_ADAPTER_LOCK(sc);
406 			sfxge_stop(sc);
407 			ifp->if_mtu = ifr->ifr_mtu;
408 			error = sfxge_start(sc);
409 			SFXGE_ADAPTER_UNLOCK(sc);
410 			if (error != 0) {
411 				ifp->if_flags &= ~IFF_UP;
412 				ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
413 				if_down(ifp);
414 			}
415 		}
416 		break;
417 	case SIOCADDMULTI:
418 	case SIOCDELMULTI:
419 		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
420 			sfxge_mac_filter_set(sc);
421 		break;
422 	case SIOCSIFCAP:
423 	{
424 		int reqcap = ifr->ifr_reqcap;
425 		int capchg_mask;
426 
427 		SFXGE_ADAPTER_LOCK(sc);
428 
429 		/* Capabilities to be changed in accordance with request */
430 		capchg_mask = ifp->if_capenable ^ reqcap;
431 
432 		/*
433 		 * The networking core already rejects attempts to
434 		 * enable capabilities we don't have.  We still have
435 		 * to reject attempts to disable capabilities that we
436 		 * can't (yet) disable.
437 		 */
438 		KASSERT((reqcap & ~ifp->if_capabilities) == 0,
439 		    ("Unsupported capabilities 0x%x requested 0x%x vs "
440 		     "supported 0x%x",
441 		     reqcap & ~ifp->if_capabilities,
442 		     reqcap , ifp->if_capabilities));
443 		if (capchg_mask & SFXGE_CAP_FIXED) {
444 			error = EINVAL;
445 			SFXGE_ADAPTER_UNLOCK(sc);
446 			break;
447 		}
448 
449 		/* Check request before any changes */
450 		if ((capchg_mask & IFCAP_TSO4) &&
451 		    (reqcap & (IFCAP_TSO4 | IFCAP_TXCSUM)) == IFCAP_TSO4) {
452 			error = EAGAIN;
453 			SFXGE_ADAPTER_UNLOCK(sc);
454 			if_printf(ifp, "enable txcsum before tso4\n");
455 			break;
456 		}
457 		if ((capchg_mask & IFCAP_TSO6) &&
458 		    (reqcap & (IFCAP_TSO6 | IFCAP_TXCSUM_IPV6)) == IFCAP_TSO6) {
459 			error = EAGAIN;
460 			SFXGE_ADAPTER_UNLOCK(sc);
461 			if_printf(ifp, "enable txcsum6 before tso6\n");
462 			break;
463 		}
464 
465 		if (reqcap & IFCAP_TXCSUM) {
466 			ifp->if_hwassist |= (CSUM_IP | CSUM_TCP | CSUM_UDP);
467 		} else {
468 			ifp->if_hwassist &= ~(CSUM_IP | CSUM_TCP | CSUM_UDP);
469 			if (reqcap & IFCAP_TSO4) {
470 				reqcap &= ~IFCAP_TSO4;
471 				if_printf(ifp,
472 				    "tso4 disabled due to -txcsum\n");
473 			}
474 		}
475 		if (reqcap & IFCAP_TXCSUM_IPV6) {
476 			ifp->if_hwassist |= (CSUM_TCP_IPV6 | CSUM_UDP_IPV6);
477 		} else {
478 			ifp->if_hwassist &= ~(CSUM_TCP_IPV6 | CSUM_UDP_IPV6);
479 			if (reqcap & IFCAP_TSO6) {
480 				reqcap &= ~IFCAP_TSO6;
481 				if_printf(ifp,
482 				    "tso6 disabled due to -txcsum6\n");
483 			}
484 		}
485 
486 		/*
487 		 * The kernel takes both IFCAP_TSOx and CSUM_TSO into
488 		 * account before using TSO. So, we do not touch
489 		 * checksum flags when IFCAP_TSOx is modified.
490 		 * Note that CSUM_TSO is (CSUM_IP_TSO|CSUM_IP6_TSO),
491 		 * but both bits are set in IPv4 and IPv6 mbufs.
492 		 */
493 
494 		ifp->if_capenable = reqcap;
495 
496 		SFXGE_ADAPTER_UNLOCK(sc);
497 		break;
498 	}
499 	case SIOCSIFMEDIA:
500 	case SIOCGIFMEDIA:
501 		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
502 		break;
503 #ifdef SIOCGI2C
504 	case SIOCGI2C:
505 	{
506 		struct ifi2creq i2c;
507 
508 		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
509 		if (error != 0)
510 			break;
511 
512 		if (i2c.len > sizeof(i2c.data)) {
513 			error = EINVAL;
514 			break;
515 		}
516 
517 		SFXGE_ADAPTER_LOCK(sc);
518 		error = efx_phy_module_get_info(sc->enp, i2c.dev_addr,
519 						i2c.offset, i2c.len,
520 						&i2c.data[0]);
521 		SFXGE_ADAPTER_UNLOCK(sc);
522 		if (error == 0)
523 			error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
524 		break;
525 	}
526 #endif
527 	case SIOCGPRIVATE_0:
528 		error = priv_check(curthread, PRIV_DRIVER);
529 		if (error != 0)
530 			break;
531 		error = copyin(ifr->ifr_data, &ioc, sizeof(ioc));
532 		if (error != 0)
533 			return (error);
534 		error = sfxge_private_ioctl(sc, &ioc);
535 		if (error == 0) {
536 			error = copyout(&ioc, ifr->ifr_data, sizeof(ioc));
537 		}
538 		break;
539 	default:
540 		error = ether_ioctl(ifp, command, data);
541 	}
542 
543 	return (error);
544 }
545 
546 static void
547 sfxge_ifnet_fini(struct ifnet *ifp)
548 {
549 	struct sfxge_softc *sc = ifp->if_softc;
550 
551 	SFXGE_ADAPTER_LOCK(sc);
552 	sfxge_stop(sc);
553 	SFXGE_ADAPTER_UNLOCK(sc);
554 
555 	ifmedia_removeall(&sc->media);
556 	ether_ifdetach(ifp);
557 	if_free(ifp);
558 }
559 
560 static int
561 sfxge_ifnet_init(struct ifnet *ifp, struct sfxge_softc *sc)
562 {
563 	const efx_nic_cfg_t *encp = efx_nic_cfg_get(sc->enp);
564 	device_t dev;
565 	int rc;
566 
567 	dev = sc->dev;
568 	sc->ifnet = ifp;
569 
570 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
571 	ifp->if_init = sfxge_if_init;
572 	ifp->if_softc = sc;
573 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
574 	ifp->if_ioctl = sfxge_if_ioctl;
575 
576 	ifp->if_capabilities = SFXGE_CAP;
577 	ifp->if_capenable = SFXGE_CAP_ENABLE;
578 	ifp->if_hw_tsomax = SFXGE_TSO_MAX_SIZE;
579 	ifp->if_hw_tsomaxsegcount = SFXGE_TX_MAPPING_MAX_SEG;
580 	ifp->if_hw_tsomaxsegsize = PAGE_SIZE;
581 
582 #ifdef SFXGE_LRO
583 	ifp->if_capabilities |= IFCAP_LRO;
584 	ifp->if_capenable |= IFCAP_LRO;
585 #endif
586 
587 	if (encp->enc_hw_tx_insert_vlan_enabled) {
588 		ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
589 		ifp->if_capenable |= IFCAP_VLAN_HWTAGGING;
590 	}
591 	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
592 			   CSUM_TCP_IPV6 | CSUM_UDP_IPV6;
593 
594 	ether_ifattach(ifp, encp->enc_mac_addr);
595 
596 	ifp->if_transmit = sfxge_if_transmit;
597 	ifp->if_qflush = sfxge_if_qflush;
598 
599 	ifp->if_get_counter = sfxge_get_counter;
600 
601 	DBGPRINT(sc->dev, "ifmedia_init");
602 	if ((rc = sfxge_port_ifmedia_init(sc)) != 0)
603 		goto fail;
604 
605 	return (0);
606 
607 fail:
608 	ether_ifdetach(sc->ifnet);
609 	return (rc);
610 }
611 
612 void
613 sfxge_sram_buf_tbl_alloc(struct sfxge_softc *sc, size_t n, uint32_t *idp)
614 {
615 	KASSERT(sc->buffer_table_next + n <=
616 		efx_nic_cfg_get(sc->enp)->enc_buftbl_limit,
617 		("buffer table full"));
618 
619 	*idp = sc->buffer_table_next;
620 	sc->buffer_table_next += n;
621 }
622 
623 static int
624 sfxge_bar_init(struct sfxge_softc *sc)
625 {
626 	efsys_bar_t *esbp = &sc->bar;
627 
628 	esbp->esb_rid = PCIR_BAR(EFX_MEM_BAR);
629 	if ((esbp->esb_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
630 	    &esbp->esb_rid, RF_ACTIVE)) == NULL) {
631 		device_printf(sc->dev, "Cannot allocate BAR region %d\n",
632 		    EFX_MEM_BAR);
633 		return (ENXIO);
634 	}
635 	esbp->esb_tag = rman_get_bustag(esbp->esb_res);
636 	esbp->esb_handle = rman_get_bushandle(esbp->esb_res);
637 
638 	SFXGE_BAR_LOCK_INIT(esbp, device_get_nameunit(sc->dev));
639 
640 	return (0);
641 }
642 
643 static void
644 sfxge_bar_fini(struct sfxge_softc *sc)
645 {
646 	efsys_bar_t *esbp = &sc->bar;
647 
648 	bus_release_resource(sc->dev, SYS_RES_MEMORY, esbp->esb_rid,
649 	    esbp->esb_res);
650 	SFXGE_BAR_LOCK_DESTROY(esbp);
651 }
652 
653 static int
654 sfxge_create(struct sfxge_softc *sc)
655 {
656 	device_t dev;
657 	efx_nic_t *enp;
658 	int error;
659 	char rss_param_name[sizeof(SFXGE_PARAM(%d.max_rss_channels))];
660 #if EFSYS_OPT_MCDI_LOGGING
661 	char mcdi_log_param_name[sizeof(SFXGE_PARAM(%d.mcdi_logging))];
662 #endif
663 
664 	dev = sc->dev;
665 
666 	SFXGE_ADAPTER_LOCK_INIT(sc, device_get_nameunit(sc->dev));
667 
668 	sc->max_rss_channels = 0;
669 	snprintf(rss_param_name, sizeof(rss_param_name),
670 		 SFXGE_PARAM(%d.max_rss_channels),
671 		 (int)device_get_unit(dev));
672 	TUNABLE_INT_FETCH(rss_param_name, &sc->max_rss_channels);
673 #if EFSYS_OPT_MCDI_LOGGING
674 	sc->mcdi_logging = sfxge_mcdi_logging;
675 	snprintf(mcdi_log_param_name, sizeof(mcdi_log_param_name),
676 		 SFXGE_PARAM(%d.mcdi_logging),
677 		 (int)device_get_unit(dev));
678 	TUNABLE_INT_FETCH(mcdi_log_param_name, &sc->mcdi_logging);
679 #endif
680 
681 	sc->stats_node = SYSCTL_ADD_NODE(
682 		device_get_sysctl_ctx(dev),
683 		SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
684 		OID_AUTO, "stats", CTLFLAG_RD, NULL, "Statistics");
685 	if (sc->stats_node == NULL) {
686 		error = ENOMEM;
687 		goto fail;
688 	}
689 
690 	TASK_INIT(&sc->task_reset, 0, sfxge_reset, sc);
691 
692 	(void) pci_enable_busmaster(dev);
693 
694 	/* Initialize DMA mappings. */
695 	DBGPRINT(sc->dev, "dma_init...");
696 	if ((error = sfxge_dma_init(sc)) != 0)
697 		goto fail;
698 
699 	/* Map the device registers. */
700 	DBGPRINT(sc->dev, "bar_init...");
701 	if ((error = sfxge_bar_init(sc)) != 0)
702 		goto fail;
703 
704 	error = efx_family(pci_get_vendor(dev), pci_get_device(dev),
705 	    &sc->family);
706 	KASSERT(error == 0, ("Family should be filtered by sfxge_probe()"));
707 
708 	DBGPRINT(sc->dev, "nic_create...");
709 
710 	/* Create the common code nic object. */
711 	SFXGE_EFSYS_LOCK_INIT(&sc->enp_lock,
712 			      device_get_nameunit(sc->dev), "nic");
713 	if ((error = efx_nic_create(sc->family, (efsys_identifier_t *)sc,
714 	    &sc->bar, &sc->enp_lock, &enp)) != 0)
715 		goto fail3;
716 	sc->enp = enp;
717 
718 	if (!ISP2(sfxge_rx_ring_entries) ||
719 	    (sfxge_rx_ring_entries < EFX_RXQ_MINNDESCS) ||
720 	    (sfxge_rx_ring_entries > EFX_RXQ_MAXNDESCS)) {
721 		log(LOG_ERR, "%s=%d must be power of 2 from %u to %u",
722 		    SFXGE_PARAM_RX_RING, sfxge_rx_ring_entries,
723 		    EFX_RXQ_MINNDESCS, EFX_RXQ_MAXNDESCS);
724 		error = EINVAL;
725 		goto fail_rx_ring_entries;
726 	}
727 	sc->rxq_entries = sfxge_rx_ring_entries;
728 
729 	if (!ISP2(sfxge_tx_ring_entries) ||
730 	    (sfxge_tx_ring_entries < EFX_TXQ_MINNDESCS) ||
731 	    (sfxge_tx_ring_entries > EFX_TXQ_MAXNDESCS(efx_nic_cfg_get(enp)))) {
732 		log(LOG_ERR, "%s=%d must be power of 2 from %u to %u",
733 		    SFXGE_PARAM_TX_RING, sfxge_tx_ring_entries,
734 		    EFX_TXQ_MINNDESCS, EFX_TXQ_MAXNDESCS(efx_nic_cfg_get(enp)));
735 		error = EINVAL;
736 		goto fail_tx_ring_entries;
737 	}
738 	sc->txq_entries = sfxge_tx_ring_entries;
739 
740 	/* Initialize MCDI to talk to the microcontroller. */
741 	DBGPRINT(sc->dev, "mcdi_init...");
742 	if ((error = sfxge_mcdi_init(sc)) != 0)
743 		goto fail4;
744 
745 	/* Probe the NIC and build the configuration data area. */
746 	DBGPRINT(sc->dev, "nic_probe...");
747 	if ((error = efx_nic_probe(enp)) != 0)
748 		goto fail5;
749 
750 	SYSCTL_ADD_STRING(device_get_sysctl_ctx(dev),
751 			  SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
752 			  OID_AUTO, "version", CTLFLAG_RD,
753 			  SFXGE_VERSION_STRING, 0,
754 			  "Driver version");
755 
756 	SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev),
757 			SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
758 			OID_AUTO, "phy_type", CTLFLAG_RD,
759 			NULL, efx_nic_cfg_get(enp)->enc_phy_type,
760 			"PHY type");
761 
762 	/* Initialize the NVRAM. */
763 	DBGPRINT(sc->dev, "nvram_init...");
764 	if ((error = efx_nvram_init(enp)) != 0)
765 		goto fail6;
766 
767 	/* Initialize the VPD. */
768 	DBGPRINT(sc->dev, "vpd_init...");
769 	if ((error = efx_vpd_init(enp)) != 0)
770 		goto fail7;
771 
772 	efx_mcdi_new_epoch(enp);
773 
774 	/* Reset the NIC. */
775 	DBGPRINT(sc->dev, "nic_reset...");
776 	if ((error = efx_nic_reset(enp)) != 0)
777 		goto fail8;
778 
779 	/* Initialize buffer table allocation. */
780 	sc->buffer_table_next = 0;
781 
782 	/*
783 	 * Guarantee minimum and estimate maximum number of event queues
784 	 * to take it into account when MSI-X interrupts are allocated.
785 	 * It initializes NIC and keeps it initialized on success.
786 	 */
787 	if ((error = sfxge_estimate_rsrc_limits(sc)) != 0)
788 		goto fail8;
789 
790 	/* Set up interrupts. */
791 	DBGPRINT(sc->dev, "intr_init...");
792 	if ((error = sfxge_intr_init(sc)) != 0)
793 		goto fail9;
794 
795 	/* Initialize event processing state. */
796 	DBGPRINT(sc->dev, "ev_init...");
797 	if ((error = sfxge_ev_init(sc)) != 0)
798 		goto fail11;
799 
800 	/* Initialize port state. */
801 	DBGPRINT(sc->dev, "port_init...");
802 	if ((error = sfxge_port_init(sc)) != 0)
803 		goto fail12;
804 
805 	/* Initialize receive state. */
806 	DBGPRINT(sc->dev, "rx_init...");
807 	if ((error = sfxge_rx_init(sc)) != 0)
808 		goto fail13;
809 
810 	/* Initialize transmit state. */
811 	DBGPRINT(sc->dev, "tx_init...");
812 	if ((error = sfxge_tx_init(sc)) != 0)
813 		goto fail14;
814 
815 	sc->init_state = SFXGE_INITIALIZED;
816 
817 	DBGPRINT(sc->dev, "success");
818 	return (0);
819 
820 fail14:
821 	sfxge_rx_fini(sc);
822 
823 fail13:
824 	sfxge_port_fini(sc);
825 
826 fail12:
827 	sfxge_ev_fini(sc);
828 
829 fail11:
830 	sfxge_intr_fini(sc);
831 
832 fail9:
833 	efx_nic_fini(sc->enp);
834 
835 fail8:
836 	efx_vpd_fini(enp);
837 
838 fail7:
839 	efx_nvram_fini(enp);
840 
841 fail6:
842 	efx_nic_unprobe(enp);
843 
844 fail5:
845 	sfxge_mcdi_fini(sc);
846 
847 fail4:
848 fail_tx_ring_entries:
849 fail_rx_ring_entries:
850 	sc->enp = NULL;
851 	efx_nic_destroy(enp);
852 	SFXGE_EFSYS_LOCK_DESTROY(&sc->enp_lock);
853 
854 fail3:
855 	sfxge_bar_fini(sc);
856 	(void) pci_disable_busmaster(sc->dev);
857 
858 fail:
859 	DBGPRINT(sc->dev, "failed %d", error);
860 	sc->dev = NULL;
861 	SFXGE_ADAPTER_LOCK_DESTROY(sc);
862 	return (error);
863 }
864 
865 static void
866 sfxge_destroy(struct sfxge_softc *sc)
867 {
868 	efx_nic_t *enp;
869 
870 	/* Clean up transmit state. */
871 	sfxge_tx_fini(sc);
872 
873 	/* Clean up receive state. */
874 	sfxge_rx_fini(sc);
875 
876 	/* Clean up port state. */
877 	sfxge_port_fini(sc);
878 
879 	/* Clean up event processing state. */
880 	sfxge_ev_fini(sc);
881 
882 	/* Clean up interrupts. */
883 	sfxge_intr_fini(sc);
884 
885 	/* Tear down common code subsystems. */
886 	efx_nic_reset(sc->enp);
887 	efx_vpd_fini(sc->enp);
888 	efx_nvram_fini(sc->enp);
889 	efx_nic_unprobe(sc->enp);
890 
891 	/* Tear down MCDI. */
892 	sfxge_mcdi_fini(sc);
893 
894 	/* Destroy common code context. */
895 	enp = sc->enp;
896 	sc->enp = NULL;
897 	efx_nic_destroy(enp);
898 
899 	/* Free DMA memory. */
900 	sfxge_dma_fini(sc);
901 
902 	/* Free mapped BARs. */
903 	sfxge_bar_fini(sc);
904 
905 	(void) pci_disable_busmaster(sc->dev);
906 
907 	taskqueue_drain(taskqueue_thread, &sc->task_reset);
908 
909 	/* Destroy the softc lock. */
910 	SFXGE_ADAPTER_LOCK_DESTROY(sc);
911 }
912 
913 static int
914 sfxge_vpd_handler(SYSCTL_HANDLER_ARGS)
915 {
916 	struct sfxge_softc *sc = arg1;
917 	efx_vpd_value_t value;
918 	int rc;
919 
920 	value.evv_tag = arg2 >> 16;
921 	value.evv_keyword = arg2 & 0xffff;
922 	if ((rc = efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value))
923 	    != 0)
924 		return (rc);
925 
926 	return (SYSCTL_OUT(req, value.evv_value, value.evv_length));
927 }
928 
929 static void
930 sfxge_vpd_try_add(struct sfxge_softc *sc, struct sysctl_oid_list *list,
931 		  efx_vpd_tag_t tag, const char *keyword)
932 {
933 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
934 	efx_vpd_value_t value;
935 
936 	/* Check whether VPD tag/keyword is present */
937 	value.evv_tag = tag;
938 	value.evv_keyword = EFX_VPD_KEYWORD(keyword[0], keyword[1]);
939 	if (efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value) != 0)
940 		return;
941 
942 	SYSCTL_ADD_PROC(
943 		ctx, list, OID_AUTO, keyword, CTLTYPE_STRING|CTLFLAG_RD,
944 		sc, tag << 16 | EFX_VPD_KEYWORD(keyword[0], keyword[1]),
945 		sfxge_vpd_handler, "A", "");
946 }
947 
948 static int
949 sfxge_vpd_init(struct sfxge_softc *sc)
950 {
951 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
952 	struct sysctl_oid *vpd_node;
953 	struct sysctl_oid_list *vpd_list;
954 	char keyword[3];
955 	efx_vpd_value_t value;
956 	int rc;
957 
958 	if ((rc = efx_vpd_size(sc->enp, &sc->vpd_size)) != 0) {
959 		/*
960 		 * Unpriviledged functions deny VPD access.
961 		 * Simply skip VPD in this case.
962 		 */
963 		if (rc == EACCES)
964 			goto done;
965 		goto fail;
966 	}
967 	sc->vpd_data = malloc(sc->vpd_size, M_SFXGE, M_WAITOK);
968 	if ((rc = efx_vpd_read(sc->enp, sc->vpd_data, sc->vpd_size)) != 0)
969 		goto fail2;
970 
971 	/* Copy ID (product name) into device description, and log it. */
972 	value.evv_tag = EFX_VPD_ID;
973 	if (efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value) == 0) {
974 		value.evv_value[value.evv_length] = 0;
975 		device_set_desc_copy(sc->dev, value.evv_value);
976 		device_printf(sc->dev, "%s\n", value.evv_value);
977 	}
978 
979 	vpd_node = SYSCTL_ADD_NODE(
980 		ctx, SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)),
981 		OID_AUTO, "vpd", CTLFLAG_RD, NULL, "Vital Product Data");
982 	vpd_list = SYSCTL_CHILDREN(vpd_node);
983 
984 	/* Add sysctls for all expected and any vendor-defined keywords. */
985 	sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "PN");
986 	sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "EC");
987 	sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "SN");
988 	keyword[0] = 'V';
989 	keyword[2] = 0;
990 	for (keyword[1] = '0'; keyword[1] <= '9'; keyword[1]++)
991 		sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, keyword);
992 	for (keyword[1] = 'A'; keyword[1] <= 'Z'; keyword[1]++)
993 		sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, keyword);
994 
995 done:
996 	return (0);
997 
998 fail2:
999 	free(sc->vpd_data, M_SFXGE);
1000 fail:
1001 	return (rc);
1002 }
1003 
1004 static void
1005 sfxge_vpd_fini(struct sfxge_softc *sc)
1006 {
1007 	free(sc->vpd_data, M_SFXGE);
1008 }
1009 
1010 static void
1011 sfxge_reset(void *arg, int npending)
1012 {
1013 	struct sfxge_softc *sc;
1014 	int rc;
1015 	unsigned attempt;
1016 
1017 	(void)npending;
1018 
1019 	sc = (struct sfxge_softc *)arg;
1020 
1021 	SFXGE_ADAPTER_LOCK(sc);
1022 
1023 	if (sc->init_state != SFXGE_STARTED)
1024 		goto done;
1025 
1026 	sfxge_stop(sc);
1027 	efx_nic_reset(sc->enp);
1028 	for (attempt = 0; attempt < sfxge_restart_attempts; ++attempt) {
1029 		if ((rc = sfxge_start(sc)) == 0)
1030 			goto done;
1031 
1032 		device_printf(sc->dev, "start on reset failed (%d)\n", rc);
1033 		DELAY(100000);
1034 	}
1035 
1036 	device_printf(sc->dev, "reset failed; interface is now stopped\n");
1037 
1038 done:
1039 	SFXGE_ADAPTER_UNLOCK(sc);
1040 }
1041 
1042 void
1043 sfxge_schedule_reset(struct sfxge_softc *sc)
1044 {
1045 	taskqueue_enqueue(taskqueue_thread, &sc->task_reset);
1046 }
1047 
1048 static int
1049 sfxge_attach(device_t dev)
1050 {
1051 	struct sfxge_softc *sc;
1052 	struct ifnet *ifp;
1053 	int error;
1054 
1055 	sc = device_get_softc(dev);
1056 	sc->dev = dev;
1057 
1058 	/* Allocate ifnet. */
1059 	ifp = if_alloc(IFT_ETHER);
1060 	if (ifp == NULL) {
1061 		device_printf(dev, "Couldn't allocate ifnet\n");
1062 		error = ENOMEM;
1063 		goto fail;
1064 	}
1065 	sc->ifnet = ifp;
1066 
1067 	/* Initialize hardware. */
1068 	DBGPRINT(sc->dev, "create nic");
1069 	if ((error = sfxge_create(sc)) != 0)
1070 		goto fail2;
1071 
1072 	/* Create the ifnet for the port. */
1073 	DBGPRINT(sc->dev, "init ifnet");
1074 	if ((error = sfxge_ifnet_init(ifp, sc)) != 0)
1075 		goto fail3;
1076 
1077 	DBGPRINT(sc->dev, "init vpd");
1078 	if ((error = sfxge_vpd_init(sc)) != 0)
1079 		goto fail4;
1080 
1081 	/*
1082 	 * NIC is initialized inside sfxge_create() and kept inialized
1083 	 * to be able to initialize port to discover media types in
1084 	 * sfxge_ifnet_init().
1085 	 */
1086 	efx_nic_fini(sc->enp);
1087 
1088 	sc->init_state = SFXGE_REGISTERED;
1089 
1090 	DBGPRINT(sc->dev, "success");
1091 	return (0);
1092 
1093 fail4:
1094 	sfxge_ifnet_fini(ifp);
1095 fail3:
1096 	efx_nic_fini(sc->enp);
1097 	sfxge_destroy(sc);
1098 
1099 fail2:
1100 	if_free(sc->ifnet);
1101 
1102 fail:
1103 	DBGPRINT(sc->dev, "failed %d", error);
1104 	return (error);
1105 }
1106 
1107 static int
1108 sfxge_detach(device_t dev)
1109 {
1110 	struct sfxge_softc *sc;
1111 
1112 	sc = device_get_softc(dev);
1113 
1114 	sfxge_vpd_fini(sc);
1115 
1116 	/* Destroy the ifnet. */
1117 	sfxge_ifnet_fini(sc->ifnet);
1118 
1119 	/* Tear down hardware. */
1120 	sfxge_destroy(sc);
1121 
1122 	return (0);
1123 }
1124 
1125 static int
1126 sfxge_probe(device_t dev)
1127 {
1128 	uint16_t pci_vendor_id;
1129 	uint16_t pci_device_id;
1130 	efx_family_t family;
1131 	int rc;
1132 
1133 	pci_vendor_id = pci_get_vendor(dev);
1134 	pci_device_id = pci_get_device(dev);
1135 
1136 	DBGPRINT(dev, "PCI ID %04x:%04x", pci_vendor_id, pci_device_id);
1137 	rc = efx_family(pci_vendor_id, pci_device_id, &family);
1138 	if (rc != 0) {
1139 		DBGPRINT(dev, "efx_family fail %d", rc);
1140 		return (ENXIO);
1141 	}
1142 
1143 	if (family == EFX_FAMILY_SIENA) {
1144 		device_set_desc(dev, "Solarflare SFC9000 family");
1145 		return (0);
1146 	}
1147 
1148 	if (family == EFX_FAMILY_HUNTINGTON) {
1149 		device_set_desc(dev, "Solarflare SFC9100 family");
1150 		return (0);
1151 	}
1152 
1153 	DBGPRINT(dev, "impossible controller family %d", family);
1154 	return (ENXIO);
1155 }
1156 
1157 static device_method_t sfxge_methods[] = {
1158 	DEVMETHOD(device_probe,		sfxge_probe),
1159 	DEVMETHOD(device_attach,	sfxge_attach),
1160 	DEVMETHOD(device_detach,	sfxge_detach),
1161 
1162 	DEVMETHOD_END
1163 };
1164 
1165 static devclass_t sfxge_devclass;
1166 
1167 static driver_t sfxge_driver = {
1168 	"sfxge",
1169 	sfxge_methods,
1170 	sizeof(struct sfxge_softc)
1171 };
1172 
1173 DRIVER_MODULE(sfxge, pci, sfxge_driver, sfxge_devclass, 0, 0);
1174