xref: /freebsd/sys/dev/sfxge/sfxge.c (revision 31d62a73c2e6ac0ff413a7a17700ffc7dce254ef)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2010-2016 Solarflare Communications Inc.
5  * All rights reserved.
6  *
7  * This software was developed in part by Philip Paeps under contract for
8  * Solarflare Communications, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright notice,
14  *    this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright notice,
16  *    this list of conditions and the following disclaimer in the documentation
17  *    and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * The views and conclusions contained in the software and documentation are
32  * those of the authors and should not be interpreted as representing official
33  * policies, either expressed or implied, of the FreeBSD Project.
34  */
35 
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
38 
39 #include "opt_rss.h"
40 
41 #include <sys/param.h>
42 #include <sys/kernel.h>
43 #include <sys/bus.h>
44 #include <sys/rman.h>
45 #include <sys/lock.h>
46 #include <sys/module.h>
47 #include <sys/mutex.h>
48 #include <sys/smp.h>
49 #include <sys/socket.h>
50 #include <sys/taskqueue.h>
51 #include <sys/sockio.h>
52 #include <sys/sysctl.h>
53 #include <sys/priv.h>
54 #include <sys/syslog.h>
55 
56 #include <dev/pci/pcireg.h>
57 #include <dev/pci/pcivar.h>
58 
59 #include <net/ethernet.h>
60 #include <net/if.h>
61 #include <net/if_var.h>
62 #include <net/if_media.h>
63 #include <net/if_types.h>
64 
65 #ifdef RSS
66 #include <net/rss_config.h>
67 #endif
68 
69 #include "common/efx.h"
70 
71 #include "sfxge.h"
72 #include "sfxge_rx.h"
73 #include "sfxge_ioc.h"
74 #include "sfxge_version.h"
75 
76 #define	SFXGE_CAP (IFCAP_VLAN_MTU | IFCAP_VLAN_HWCSUM |			\
77 		   IFCAP_RXCSUM | IFCAP_TXCSUM |			\
78 		   IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6 |		\
79 		   IFCAP_TSO4 | IFCAP_TSO6 |				\
80 		   IFCAP_JUMBO_MTU |					\
81 		   IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWSTATS)
82 #define	SFXGE_CAP_ENABLE SFXGE_CAP
83 #define	SFXGE_CAP_FIXED (IFCAP_VLAN_MTU |				\
84 			 IFCAP_JUMBO_MTU | IFCAP_LINKSTATE | IFCAP_HWSTATS)
85 
86 MALLOC_DEFINE(M_SFXGE, "sfxge", "Solarflare 10GigE driver");
87 
88 
89 SYSCTL_NODE(_hw, OID_AUTO, sfxge, CTLFLAG_RD, 0,
90 	    "SFXGE driver parameters");
91 
92 #define	SFXGE_PARAM_RX_RING	SFXGE_PARAM(rx_ring)
93 static int sfxge_rx_ring_entries = SFXGE_NDESCS;
94 TUNABLE_INT(SFXGE_PARAM_RX_RING, &sfxge_rx_ring_entries);
95 SYSCTL_INT(_hw_sfxge, OID_AUTO, rx_ring, CTLFLAG_RDTUN,
96 	   &sfxge_rx_ring_entries, 0,
97 	   "Maximum number of descriptors in a receive ring");
98 
99 #define	SFXGE_PARAM_TX_RING	SFXGE_PARAM(tx_ring)
100 static int sfxge_tx_ring_entries = SFXGE_NDESCS;
101 TUNABLE_INT(SFXGE_PARAM_TX_RING, &sfxge_tx_ring_entries);
102 SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_ring, CTLFLAG_RDTUN,
103 	   &sfxge_tx_ring_entries, 0,
104 	   "Maximum number of descriptors in a transmit ring");
105 
106 #define	SFXGE_PARAM_RESTART_ATTEMPTS	SFXGE_PARAM(restart_attempts)
107 static int sfxge_restart_attempts = 3;
108 TUNABLE_INT(SFXGE_PARAM_RESTART_ATTEMPTS, &sfxge_restart_attempts);
109 SYSCTL_INT(_hw_sfxge, OID_AUTO, restart_attempts, CTLFLAG_RDTUN,
110 	   &sfxge_restart_attempts, 0,
111 	   "Maximum number of attempts to bring interface up after reset");
112 
113 #if EFSYS_OPT_MCDI_LOGGING
114 #define	SFXGE_PARAM_MCDI_LOGGING	SFXGE_PARAM(mcdi_logging)
115 static int sfxge_mcdi_logging = 0;
116 TUNABLE_INT(SFXGE_PARAM_MCDI_LOGGING, &sfxge_mcdi_logging);
117 #endif
118 
119 static void
120 sfxge_reset(void *arg, int npending);
121 
122 static int
123 sfxge_estimate_rsrc_limits(struct sfxge_softc *sc)
124 {
125 	efx_drv_limits_t limits;
126 	int rc;
127 	unsigned int evq_max;
128 	uint32_t evq_allocated;
129 	uint32_t rxq_allocated;
130 	uint32_t txq_allocated;
131 
132 	/*
133 	 * Limit the number of event queues to:
134 	 *  - number of CPUs
135 	 *  - hardwire maximum RSS channels
136 	 *  - administratively specified maximum RSS channels
137 	 */
138 #ifdef RSS
139 	/*
140 	 * Avoid extra limitations so that the number of queues
141 	 * may be configured at administrator's will
142 	 */
143 	evq_max = MIN(MAX(rss_getnumbuckets(), 1), EFX_MAXRSS);
144 #else
145 	evq_max = MIN(mp_ncpus, EFX_MAXRSS);
146 #endif
147 	if (sc->max_rss_channels > 0)
148 		evq_max = MIN(evq_max, sc->max_rss_channels);
149 
150 	memset(&limits, 0, sizeof(limits));
151 
152 	limits.edl_min_evq_count = 1;
153 	limits.edl_max_evq_count = evq_max;
154 	limits.edl_min_txq_count = SFXGE_TXQ_NTYPES;
155 	limits.edl_max_txq_count = evq_max + SFXGE_TXQ_NTYPES - 1;
156 	limits.edl_min_rxq_count = 1;
157 	limits.edl_max_rxq_count = evq_max;
158 
159 	efx_nic_set_drv_limits(sc->enp, &limits);
160 
161 	if ((rc = efx_nic_init(sc->enp)) != 0)
162 		return (rc);
163 
164 	rc = efx_nic_get_vi_pool(sc->enp, &evq_allocated, &rxq_allocated,
165 				 &txq_allocated);
166 	if (rc != 0) {
167 		efx_nic_fini(sc->enp);
168 		return (rc);
169 	}
170 
171 	KASSERT(txq_allocated >= SFXGE_TXQ_NTYPES,
172 		("txq_allocated < SFXGE_TXQ_NTYPES"));
173 
174 	sc->evq_max = MIN(evq_allocated, evq_max);
175 	sc->evq_max = MIN(rxq_allocated, sc->evq_max);
176 	sc->evq_max = MIN(txq_allocated - (SFXGE_TXQ_NTYPES - 1),
177 			  sc->evq_max);
178 
179 	KASSERT(sc->evq_max <= evq_max,
180 		("allocated more than maximum requested"));
181 
182 #ifdef RSS
183 	if (sc->evq_max < rss_getnumbuckets())
184 		device_printf(sc->dev, "The number of allocated queues (%u) "
185 			      "is less than the number of RSS buckets (%u); "
186 			      "performance degradation might be observed",
187 			      sc->evq_max, rss_getnumbuckets());
188 #endif
189 
190 	/*
191 	 * NIC is kept initialized in the case of success to be able to
192 	 * initialize port to find out media types.
193 	 */
194 	return (0);
195 }
196 
197 static int
198 sfxge_set_drv_limits(struct sfxge_softc *sc)
199 {
200 	efx_drv_limits_t limits;
201 
202 	memset(&limits, 0, sizeof(limits));
203 
204 	/* Limits are strict since take into account initial estimation */
205 	limits.edl_min_evq_count = limits.edl_max_evq_count =
206 	    sc->intr.n_alloc;
207 	limits.edl_min_txq_count = limits.edl_max_txq_count =
208 	    sc->intr.n_alloc + SFXGE_TXQ_NTYPES - 1;
209 	limits.edl_min_rxq_count = limits.edl_max_rxq_count =
210 	    sc->intr.n_alloc;
211 
212 	return (efx_nic_set_drv_limits(sc->enp, &limits));
213 }
214 
215 static int
216 sfxge_start(struct sfxge_softc *sc)
217 {
218 	int rc;
219 
220 	SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc);
221 
222 	if (sc->init_state == SFXGE_STARTED)
223 		return (0);
224 
225 	if (sc->init_state != SFXGE_REGISTERED) {
226 		rc = EINVAL;
227 		goto fail;
228 	}
229 
230 	/* Set required resource limits */
231 	if ((rc = sfxge_set_drv_limits(sc)) != 0)
232 		goto fail;
233 
234 	if ((rc = efx_nic_init(sc->enp)) != 0)
235 		goto fail;
236 
237 	/* Start processing interrupts. */
238 	if ((rc = sfxge_intr_start(sc)) != 0)
239 		goto fail2;
240 
241 	/* Start processing events. */
242 	if ((rc = sfxge_ev_start(sc)) != 0)
243 		goto fail3;
244 
245 	/* Fire up the port. */
246 	if ((rc = sfxge_port_start(sc)) != 0)
247 		goto fail4;
248 
249 	/* Start the receiver side. */
250 	if ((rc = sfxge_rx_start(sc)) != 0)
251 		goto fail5;
252 
253 	/* Start the transmitter side. */
254 	if ((rc = sfxge_tx_start(sc)) != 0)
255 		goto fail6;
256 
257 	sc->init_state = SFXGE_STARTED;
258 
259 	/* Tell the stack we're running. */
260 	sc->ifnet->if_drv_flags |= IFF_DRV_RUNNING;
261 	sc->ifnet->if_drv_flags &= ~IFF_DRV_OACTIVE;
262 
263 	return (0);
264 
265 fail6:
266 	sfxge_rx_stop(sc);
267 
268 fail5:
269 	sfxge_port_stop(sc);
270 
271 fail4:
272 	sfxge_ev_stop(sc);
273 
274 fail3:
275 	sfxge_intr_stop(sc);
276 
277 fail2:
278 	efx_nic_fini(sc->enp);
279 
280 fail:
281 	device_printf(sc->dev, "sfxge_start: %d\n", rc);
282 
283 	return (rc);
284 }
285 
286 static void
287 sfxge_if_init(void *arg)
288 {
289 	struct sfxge_softc *sc;
290 
291 	sc = (struct sfxge_softc *)arg;
292 
293 	SFXGE_ADAPTER_LOCK(sc);
294 	(void)sfxge_start(sc);
295 	SFXGE_ADAPTER_UNLOCK(sc);
296 }
297 
298 static void
299 sfxge_stop(struct sfxge_softc *sc)
300 {
301 	SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc);
302 
303 	if (sc->init_state != SFXGE_STARTED)
304 		return;
305 
306 	sc->init_state = SFXGE_REGISTERED;
307 
308 	/* Stop the transmitter. */
309 	sfxge_tx_stop(sc);
310 
311 	/* Stop the receiver. */
312 	sfxge_rx_stop(sc);
313 
314 	/* Stop the port. */
315 	sfxge_port_stop(sc);
316 
317 	/* Stop processing events. */
318 	sfxge_ev_stop(sc);
319 
320 	/* Stop processing interrupts. */
321 	sfxge_intr_stop(sc);
322 
323 	efx_nic_fini(sc->enp);
324 
325 	sc->ifnet->if_drv_flags &= ~IFF_DRV_RUNNING;
326 }
327 
328 
329 static int
330 sfxge_vpd_ioctl(struct sfxge_softc *sc, sfxge_ioc_t *ioc)
331 {
332 	efx_vpd_value_t value;
333 	int rc = 0;
334 
335 	switch (ioc->u.vpd.op) {
336 	case SFXGE_VPD_OP_GET_KEYWORD:
337 		value.evv_tag = ioc->u.vpd.tag;
338 		value.evv_keyword = ioc->u.vpd.keyword;
339 		rc = efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value);
340 		if (rc != 0)
341 			break;
342 		ioc->u.vpd.len = MIN(ioc->u.vpd.len, value.evv_length);
343 		if (ioc->u.vpd.payload != 0) {
344 			rc = copyout(value.evv_value, ioc->u.vpd.payload,
345 				     ioc->u.vpd.len);
346 		}
347 		break;
348 	case SFXGE_VPD_OP_SET_KEYWORD:
349 		if (ioc->u.vpd.len > sizeof(value.evv_value))
350 			return (EINVAL);
351 		value.evv_tag = ioc->u.vpd.tag;
352 		value.evv_keyword = ioc->u.vpd.keyword;
353 		value.evv_length = ioc->u.vpd.len;
354 		rc = copyin(ioc->u.vpd.payload, value.evv_value, value.evv_length);
355 		if (rc != 0)
356 			break;
357 		rc = efx_vpd_set(sc->enp, sc->vpd_data, sc->vpd_size, &value);
358 		if (rc != 0)
359 			break;
360 		rc = efx_vpd_verify(sc->enp, sc->vpd_data, sc->vpd_size);
361 		if (rc != 0)
362 			break;
363 		rc = efx_vpd_write(sc->enp, sc->vpd_data, sc->vpd_size);
364 		break;
365 	default:
366 		rc = EOPNOTSUPP;
367 		break;
368 	}
369 
370 	return (rc);
371 }
372 
373 static int
374 sfxge_private_ioctl(struct sfxge_softc *sc, sfxge_ioc_t *ioc)
375 {
376 	switch (ioc->op) {
377 	case SFXGE_MCDI_IOC:
378 		return (sfxge_mcdi_ioctl(sc, ioc));
379 	case SFXGE_NVRAM_IOC:
380 		return (sfxge_nvram_ioctl(sc, ioc));
381 	case SFXGE_VPD_IOC:
382 		return (sfxge_vpd_ioctl(sc, ioc));
383 	default:
384 		return (EOPNOTSUPP);
385 	}
386 }
387 
388 
389 static int
390 sfxge_if_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
391 {
392 	struct sfxge_softc *sc;
393 	struct ifreq *ifr;
394 	sfxge_ioc_t ioc;
395 	int error;
396 
397 	ifr = (struct ifreq *)data;
398 	sc = ifp->if_softc;
399 	error = 0;
400 
401 	switch (command) {
402 	case SIOCSIFFLAGS:
403 		SFXGE_ADAPTER_LOCK(sc);
404 		if (ifp->if_flags & IFF_UP) {
405 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
406 				if ((ifp->if_flags ^ sc->if_flags) &
407 				    (IFF_PROMISC | IFF_ALLMULTI)) {
408 					sfxge_mac_filter_set(sc);
409 				}
410 			} else
411 				sfxge_start(sc);
412 		} else
413 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
414 				sfxge_stop(sc);
415 		sc->if_flags = ifp->if_flags;
416 		SFXGE_ADAPTER_UNLOCK(sc);
417 		break;
418 	case SIOCSIFMTU:
419 		if (ifr->ifr_mtu == ifp->if_mtu) {
420 			/* Nothing to do */
421 			error = 0;
422 		} else if (ifr->ifr_mtu > SFXGE_MAX_MTU) {
423 			error = EINVAL;
424 		} else if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
425 			ifp->if_mtu = ifr->ifr_mtu;
426 			error = 0;
427 		} else {
428 			/* Restart required */
429 			SFXGE_ADAPTER_LOCK(sc);
430 			sfxge_stop(sc);
431 			ifp->if_mtu = ifr->ifr_mtu;
432 			error = sfxge_start(sc);
433 			SFXGE_ADAPTER_UNLOCK(sc);
434 			if (error != 0) {
435 				ifp->if_flags &= ~IFF_UP;
436 				ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
437 				if_down(ifp);
438 			}
439 		}
440 		break;
441 	case SIOCADDMULTI:
442 	case SIOCDELMULTI:
443 		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
444 			sfxge_mac_filter_set(sc);
445 		break;
446 	case SIOCSIFCAP:
447 	{
448 		int reqcap = ifr->ifr_reqcap;
449 		int capchg_mask;
450 
451 		SFXGE_ADAPTER_LOCK(sc);
452 
453 		/* Capabilities to be changed in accordance with request */
454 		capchg_mask = ifp->if_capenable ^ reqcap;
455 
456 		/*
457 		 * The networking core already rejects attempts to
458 		 * enable capabilities we don't have.  We still have
459 		 * to reject attempts to disable capabilities that we
460 		 * can't (yet) disable.
461 		 */
462 		KASSERT((reqcap & ~ifp->if_capabilities) == 0,
463 		    ("Unsupported capabilities 0x%x requested 0x%x vs "
464 		     "supported 0x%x",
465 		     reqcap & ~ifp->if_capabilities,
466 		     reqcap , ifp->if_capabilities));
467 		if (capchg_mask & SFXGE_CAP_FIXED) {
468 			error = EINVAL;
469 			SFXGE_ADAPTER_UNLOCK(sc);
470 			break;
471 		}
472 
473 		/* Check request before any changes */
474 		if ((capchg_mask & IFCAP_TSO4) &&
475 		    (reqcap & (IFCAP_TSO4 | IFCAP_TXCSUM)) == IFCAP_TSO4) {
476 			error = EAGAIN;
477 			SFXGE_ADAPTER_UNLOCK(sc);
478 			if_printf(ifp, "enable txcsum before tso4\n");
479 			break;
480 		}
481 		if ((capchg_mask & IFCAP_TSO6) &&
482 		    (reqcap & (IFCAP_TSO6 | IFCAP_TXCSUM_IPV6)) == IFCAP_TSO6) {
483 			error = EAGAIN;
484 			SFXGE_ADAPTER_UNLOCK(sc);
485 			if_printf(ifp, "enable txcsum6 before tso6\n");
486 			break;
487 		}
488 
489 		if (reqcap & IFCAP_TXCSUM) {
490 			ifp->if_hwassist |= (CSUM_IP | CSUM_TCP | CSUM_UDP);
491 		} else {
492 			ifp->if_hwassist &= ~(CSUM_IP | CSUM_TCP | CSUM_UDP);
493 			if (reqcap & IFCAP_TSO4) {
494 				reqcap &= ~IFCAP_TSO4;
495 				if_printf(ifp,
496 				    "tso4 disabled due to -txcsum\n");
497 			}
498 		}
499 		if (reqcap & IFCAP_TXCSUM_IPV6) {
500 			ifp->if_hwassist |= (CSUM_TCP_IPV6 | CSUM_UDP_IPV6);
501 		} else {
502 			ifp->if_hwassist &= ~(CSUM_TCP_IPV6 | CSUM_UDP_IPV6);
503 			if (reqcap & IFCAP_TSO6) {
504 				reqcap &= ~IFCAP_TSO6;
505 				if_printf(ifp,
506 				    "tso6 disabled due to -txcsum6\n");
507 			}
508 		}
509 
510 		/*
511 		 * The kernel takes both IFCAP_TSOx and CSUM_TSO into
512 		 * account before using TSO. So, we do not touch
513 		 * checksum flags when IFCAP_TSOx is modified.
514 		 * Note that CSUM_TSO is (CSUM_IP_TSO|CSUM_IP6_TSO),
515 		 * but both bits are set in IPv4 and IPv6 mbufs.
516 		 */
517 
518 		ifp->if_capenable = reqcap;
519 
520 		SFXGE_ADAPTER_UNLOCK(sc);
521 		break;
522 	}
523 	case SIOCSIFMEDIA:
524 	case SIOCGIFMEDIA:
525 		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
526 		break;
527 #ifdef SIOCGI2C
528 	case SIOCGI2C:
529 	{
530 		struct ifi2creq i2c;
531 
532 		error = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
533 		if (error != 0)
534 			break;
535 
536 		if (i2c.len > sizeof(i2c.data)) {
537 			error = EINVAL;
538 			break;
539 		}
540 
541 		SFXGE_ADAPTER_LOCK(sc);
542 		error = efx_phy_module_get_info(sc->enp, i2c.dev_addr,
543 						i2c.offset, i2c.len,
544 						&i2c.data[0]);
545 		SFXGE_ADAPTER_UNLOCK(sc);
546 		if (error == 0)
547 			error = copyout(&i2c, ifr_data_get_ptr(ifr),
548 			    sizeof(i2c));
549 		break;
550 	}
551 #endif
552 	case SIOCGPRIVATE_0:
553 		error = priv_check(curthread, PRIV_DRIVER);
554 		if (error != 0)
555 			break;
556 		error = copyin(ifr_data_get_ptr(ifr), &ioc, sizeof(ioc));
557 		if (error != 0)
558 			return (error);
559 		error = sfxge_private_ioctl(sc, &ioc);
560 		if (error == 0) {
561 			error = copyout(&ioc, ifr_data_get_ptr(ifr),
562 			    sizeof(ioc));
563 		}
564 		break;
565 	default:
566 		error = ether_ioctl(ifp, command, data);
567 	}
568 
569 	return (error);
570 }
571 
572 static void
573 sfxge_ifnet_fini(struct ifnet *ifp)
574 {
575 	struct sfxge_softc *sc = ifp->if_softc;
576 
577 	SFXGE_ADAPTER_LOCK(sc);
578 	sfxge_stop(sc);
579 	SFXGE_ADAPTER_UNLOCK(sc);
580 
581 	ifmedia_removeall(&sc->media);
582 	ether_ifdetach(ifp);
583 	if_free(ifp);
584 }
585 
586 static int
587 sfxge_ifnet_init(struct ifnet *ifp, struct sfxge_softc *sc)
588 {
589 	const efx_nic_cfg_t *encp = efx_nic_cfg_get(sc->enp);
590 	device_t dev;
591 	int rc;
592 
593 	dev = sc->dev;
594 	sc->ifnet = ifp;
595 
596 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
597 	ifp->if_init = sfxge_if_init;
598 	ifp->if_softc = sc;
599 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
600 	ifp->if_ioctl = sfxge_if_ioctl;
601 
602 	ifp->if_capabilities = SFXGE_CAP;
603 	ifp->if_capenable = SFXGE_CAP_ENABLE;
604 	ifp->if_hw_tsomax = SFXGE_TSO_MAX_SIZE;
605 	ifp->if_hw_tsomaxsegcount = SFXGE_TX_MAPPING_MAX_SEG;
606 	ifp->if_hw_tsomaxsegsize = PAGE_SIZE;
607 
608 #ifdef SFXGE_LRO
609 	ifp->if_capabilities |= IFCAP_LRO;
610 	ifp->if_capenable |= IFCAP_LRO;
611 #endif
612 
613 	if (encp->enc_hw_tx_insert_vlan_enabled) {
614 		ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
615 		ifp->if_capenable |= IFCAP_VLAN_HWTAGGING;
616 	}
617 	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
618 			   CSUM_TCP_IPV6 | CSUM_UDP_IPV6;
619 
620 	ether_ifattach(ifp, encp->enc_mac_addr);
621 
622 	ifp->if_transmit = sfxge_if_transmit;
623 	ifp->if_qflush = sfxge_if_qflush;
624 
625 	ifp->if_get_counter = sfxge_get_counter;
626 
627 	DBGPRINT(sc->dev, "ifmedia_init");
628 	if ((rc = sfxge_port_ifmedia_init(sc)) != 0)
629 		goto fail;
630 
631 	return (0);
632 
633 fail:
634 	ether_ifdetach(sc->ifnet);
635 	return (rc);
636 }
637 
638 void
639 sfxge_sram_buf_tbl_alloc(struct sfxge_softc *sc, size_t n, uint32_t *idp)
640 {
641 	KASSERT(sc->buffer_table_next + n <=
642 		efx_nic_cfg_get(sc->enp)->enc_buftbl_limit,
643 		("buffer table full"));
644 
645 	*idp = sc->buffer_table_next;
646 	sc->buffer_table_next += n;
647 }
648 
649 static int
650 sfxge_bar_init(struct sfxge_softc *sc)
651 {
652 	efsys_bar_t *esbp = &sc->bar;
653 
654 	esbp->esb_rid = PCIR_BAR(EFX_MEM_BAR);
655 	if ((esbp->esb_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
656 	    &esbp->esb_rid, RF_ACTIVE)) == NULL) {
657 		device_printf(sc->dev, "Cannot allocate BAR region %d\n",
658 		    EFX_MEM_BAR);
659 		return (ENXIO);
660 	}
661 	esbp->esb_tag = rman_get_bustag(esbp->esb_res);
662 	esbp->esb_handle = rman_get_bushandle(esbp->esb_res);
663 
664 	SFXGE_BAR_LOCK_INIT(esbp, device_get_nameunit(sc->dev));
665 
666 	return (0);
667 }
668 
669 static void
670 sfxge_bar_fini(struct sfxge_softc *sc)
671 {
672 	efsys_bar_t *esbp = &sc->bar;
673 
674 	bus_release_resource(sc->dev, SYS_RES_MEMORY, esbp->esb_rid,
675 	    esbp->esb_res);
676 	SFXGE_BAR_LOCK_DESTROY(esbp);
677 }
678 
679 static int
680 sfxge_create(struct sfxge_softc *sc)
681 {
682 	device_t dev;
683 	efx_nic_t *enp;
684 	int error;
685 	char rss_param_name[sizeof(SFXGE_PARAM(%d.max_rss_channels))];
686 #if EFSYS_OPT_MCDI_LOGGING
687 	char mcdi_log_param_name[sizeof(SFXGE_PARAM(%d.mcdi_logging))];
688 #endif
689 
690 	dev = sc->dev;
691 
692 	SFXGE_ADAPTER_LOCK_INIT(sc, device_get_nameunit(sc->dev));
693 
694 	sc->max_rss_channels = 0;
695 	snprintf(rss_param_name, sizeof(rss_param_name),
696 		 SFXGE_PARAM(%d.max_rss_channels),
697 		 (int)device_get_unit(dev));
698 	TUNABLE_INT_FETCH(rss_param_name, &sc->max_rss_channels);
699 #if EFSYS_OPT_MCDI_LOGGING
700 	sc->mcdi_logging = sfxge_mcdi_logging;
701 	snprintf(mcdi_log_param_name, sizeof(mcdi_log_param_name),
702 		 SFXGE_PARAM(%d.mcdi_logging),
703 		 (int)device_get_unit(dev));
704 	TUNABLE_INT_FETCH(mcdi_log_param_name, &sc->mcdi_logging);
705 #endif
706 
707 	sc->stats_node = SYSCTL_ADD_NODE(
708 		device_get_sysctl_ctx(dev),
709 		SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
710 		OID_AUTO, "stats", CTLFLAG_RD, NULL, "Statistics");
711 	if (sc->stats_node == NULL) {
712 		error = ENOMEM;
713 		goto fail;
714 	}
715 
716 	TASK_INIT(&sc->task_reset, 0, sfxge_reset, sc);
717 
718 	(void) pci_enable_busmaster(dev);
719 
720 	/* Initialize DMA mappings. */
721 	DBGPRINT(sc->dev, "dma_init...");
722 	if ((error = sfxge_dma_init(sc)) != 0)
723 		goto fail;
724 
725 	/* Map the device registers. */
726 	DBGPRINT(sc->dev, "bar_init...");
727 	if ((error = sfxge_bar_init(sc)) != 0)
728 		goto fail;
729 
730 	error = efx_family(pci_get_vendor(dev), pci_get_device(dev),
731 	    &sc->family);
732 	KASSERT(error == 0, ("Family should be filtered by sfxge_probe()"));
733 
734 	DBGPRINT(sc->dev, "nic_create...");
735 
736 	/* Create the common code nic object. */
737 	SFXGE_EFSYS_LOCK_INIT(&sc->enp_lock,
738 			      device_get_nameunit(sc->dev), "nic");
739 	if ((error = efx_nic_create(sc->family, (efsys_identifier_t *)sc,
740 	    &sc->bar, &sc->enp_lock, &enp)) != 0)
741 		goto fail3;
742 	sc->enp = enp;
743 
744 	/* Initialize MCDI to talk to the microcontroller. */
745 	DBGPRINT(sc->dev, "mcdi_init...");
746 	if ((error = sfxge_mcdi_init(sc)) != 0)
747 		goto fail4;
748 
749 	/* Probe the NIC and build the configuration data area. */
750 	DBGPRINT(sc->dev, "nic_probe...");
751 	if ((error = efx_nic_probe(enp)) != 0)
752 		goto fail5;
753 
754 	if (!ISP2(sfxge_rx_ring_entries) ||
755 	    (sfxge_rx_ring_entries < EFX_RXQ_MINNDESCS) ||
756 	    (sfxge_rx_ring_entries > EFX_RXQ_MAXNDESCS)) {
757 		log(LOG_ERR, "%s=%d must be power of 2 from %u to %u",
758 		    SFXGE_PARAM_RX_RING, sfxge_rx_ring_entries,
759 		    EFX_RXQ_MINNDESCS, EFX_RXQ_MAXNDESCS);
760 		error = EINVAL;
761 		goto fail_rx_ring_entries;
762 	}
763 	sc->rxq_entries = sfxge_rx_ring_entries;
764 
765 	if (!ISP2(sfxge_tx_ring_entries) ||
766 	    (sfxge_tx_ring_entries < EFX_TXQ_MINNDESCS) ||
767 	    (sfxge_tx_ring_entries > EFX_TXQ_MAXNDESCS(efx_nic_cfg_get(enp)))) {
768 		log(LOG_ERR, "%s=%d must be power of 2 from %u to %u",
769 		    SFXGE_PARAM_TX_RING, sfxge_tx_ring_entries,
770 		    EFX_TXQ_MINNDESCS, EFX_TXQ_MAXNDESCS(efx_nic_cfg_get(enp)));
771 		error = EINVAL;
772 		goto fail_tx_ring_entries;
773 	}
774 	sc->txq_entries = sfxge_tx_ring_entries;
775 
776 	SYSCTL_ADD_STRING(device_get_sysctl_ctx(dev),
777 			  SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
778 			  OID_AUTO, "version", CTLFLAG_RD,
779 			  SFXGE_VERSION_STRING, 0,
780 			  "Driver version");
781 
782 	SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev),
783 			SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
784 			OID_AUTO, "phy_type", CTLFLAG_RD,
785 			NULL, efx_nic_cfg_get(enp)->enc_phy_type,
786 			"PHY type");
787 
788 	/* Initialize the NVRAM. */
789 	DBGPRINT(sc->dev, "nvram_init...");
790 	if ((error = efx_nvram_init(enp)) != 0)
791 		goto fail6;
792 
793 	/* Initialize the VPD. */
794 	DBGPRINT(sc->dev, "vpd_init...");
795 	if ((error = efx_vpd_init(enp)) != 0)
796 		goto fail7;
797 
798 	efx_mcdi_new_epoch(enp);
799 
800 	/* Reset the NIC. */
801 	DBGPRINT(sc->dev, "nic_reset...");
802 	if ((error = efx_nic_reset(enp)) != 0)
803 		goto fail8;
804 
805 	/* Initialize buffer table allocation. */
806 	sc->buffer_table_next = 0;
807 
808 	/*
809 	 * Guarantee minimum and estimate maximum number of event queues
810 	 * to take it into account when MSI-X interrupts are allocated.
811 	 * It initializes NIC and keeps it initialized on success.
812 	 */
813 	if ((error = sfxge_estimate_rsrc_limits(sc)) != 0)
814 		goto fail8;
815 
816 	/* Set up interrupts. */
817 	DBGPRINT(sc->dev, "intr_init...");
818 	if ((error = sfxge_intr_init(sc)) != 0)
819 		goto fail9;
820 
821 	/* Initialize event processing state. */
822 	DBGPRINT(sc->dev, "ev_init...");
823 	if ((error = sfxge_ev_init(sc)) != 0)
824 		goto fail11;
825 
826 	/* Initialize port state. */
827 	DBGPRINT(sc->dev, "port_init...");
828 	if ((error = sfxge_port_init(sc)) != 0)
829 		goto fail12;
830 
831 	/* Initialize receive state. */
832 	DBGPRINT(sc->dev, "rx_init...");
833 	if ((error = sfxge_rx_init(sc)) != 0)
834 		goto fail13;
835 
836 	/* Initialize transmit state. */
837 	DBGPRINT(sc->dev, "tx_init...");
838 	if ((error = sfxge_tx_init(sc)) != 0)
839 		goto fail14;
840 
841 	sc->init_state = SFXGE_INITIALIZED;
842 
843 	DBGPRINT(sc->dev, "success");
844 	return (0);
845 
846 fail14:
847 	sfxge_rx_fini(sc);
848 
849 fail13:
850 	sfxge_port_fini(sc);
851 
852 fail12:
853 	sfxge_ev_fini(sc);
854 
855 fail11:
856 	sfxge_intr_fini(sc);
857 
858 fail9:
859 	efx_nic_fini(sc->enp);
860 
861 fail8:
862 	efx_vpd_fini(enp);
863 
864 fail7:
865 	efx_nvram_fini(enp);
866 
867 fail6:
868 fail_tx_ring_entries:
869 fail_rx_ring_entries:
870 	efx_nic_unprobe(enp);
871 
872 fail5:
873 	sfxge_mcdi_fini(sc);
874 
875 fail4:
876 	sc->enp = NULL;
877 	efx_nic_destroy(enp);
878 	SFXGE_EFSYS_LOCK_DESTROY(&sc->enp_lock);
879 
880 fail3:
881 	sfxge_bar_fini(sc);
882 	(void) pci_disable_busmaster(sc->dev);
883 
884 fail:
885 	DBGPRINT(sc->dev, "failed %d", error);
886 	sc->dev = NULL;
887 	SFXGE_ADAPTER_LOCK_DESTROY(sc);
888 	return (error);
889 }
890 
891 static void
892 sfxge_destroy(struct sfxge_softc *sc)
893 {
894 	efx_nic_t *enp;
895 
896 	/* Clean up transmit state. */
897 	sfxge_tx_fini(sc);
898 
899 	/* Clean up receive state. */
900 	sfxge_rx_fini(sc);
901 
902 	/* Clean up port state. */
903 	sfxge_port_fini(sc);
904 
905 	/* Clean up event processing state. */
906 	sfxge_ev_fini(sc);
907 
908 	/* Clean up interrupts. */
909 	sfxge_intr_fini(sc);
910 
911 	/* Tear down common code subsystems. */
912 	efx_nic_reset(sc->enp);
913 	efx_vpd_fini(sc->enp);
914 	efx_nvram_fini(sc->enp);
915 	efx_nic_unprobe(sc->enp);
916 
917 	/* Tear down MCDI. */
918 	sfxge_mcdi_fini(sc);
919 
920 	/* Destroy common code context. */
921 	enp = sc->enp;
922 	sc->enp = NULL;
923 	efx_nic_destroy(enp);
924 
925 	/* Free DMA memory. */
926 	sfxge_dma_fini(sc);
927 
928 	/* Free mapped BARs. */
929 	sfxge_bar_fini(sc);
930 
931 	(void) pci_disable_busmaster(sc->dev);
932 
933 	taskqueue_drain(taskqueue_thread, &sc->task_reset);
934 
935 	/* Destroy the softc lock. */
936 	SFXGE_ADAPTER_LOCK_DESTROY(sc);
937 }
938 
939 static int
940 sfxge_vpd_handler(SYSCTL_HANDLER_ARGS)
941 {
942 	struct sfxge_softc *sc = arg1;
943 	efx_vpd_value_t value;
944 	int rc;
945 
946 	value.evv_tag = arg2 >> 16;
947 	value.evv_keyword = arg2 & 0xffff;
948 	if ((rc = efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value))
949 	    != 0)
950 		return (rc);
951 
952 	return (SYSCTL_OUT(req, value.evv_value, value.evv_length));
953 }
954 
955 static void
956 sfxge_vpd_try_add(struct sfxge_softc *sc, struct sysctl_oid_list *list,
957 		  efx_vpd_tag_t tag, const char *keyword)
958 {
959 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
960 	efx_vpd_value_t value;
961 
962 	/* Check whether VPD tag/keyword is present */
963 	value.evv_tag = tag;
964 	value.evv_keyword = EFX_VPD_KEYWORD(keyword[0], keyword[1]);
965 	if (efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value) != 0)
966 		return;
967 
968 	SYSCTL_ADD_PROC(
969 		ctx, list, OID_AUTO, keyword, CTLTYPE_STRING|CTLFLAG_RD,
970 		sc, tag << 16 | EFX_VPD_KEYWORD(keyword[0], keyword[1]),
971 		sfxge_vpd_handler, "A", "");
972 }
973 
974 static int
975 sfxge_vpd_init(struct sfxge_softc *sc)
976 {
977 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
978 	struct sysctl_oid *vpd_node;
979 	struct sysctl_oid_list *vpd_list;
980 	char keyword[3];
981 	efx_vpd_value_t value;
982 	int rc;
983 
984 	if ((rc = efx_vpd_size(sc->enp, &sc->vpd_size)) != 0) {
985 		/*
986 		 * Unpriviledged functions deny VPD access.
987 		 * Simply skip VPD in this case.
988 		 */
989 		if (rc == EACCES)
990 			goto done;
991 		goto fail;
992 	}
993 	sc->vpd_data = malloc(sc->vpd_size, M_SFXGE, M_WAITOK);
994 	if ((rc = efx_vpd_read(sc->enp, sc->vpd_data, sc->vpd_size)) != 0)
995 		goto fail2;
996 
997 	/* Copy ID (product name) into device description, and log it. */
998 	value.evv_tag = EFX_VPD_ID;
999 	if (efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value) == 0) {
1000 		value.evv_value[value.evv_length] = 0;
1001 		device_set_desc_copy(sc->dev, value.evv_value);
1002 		device_printf(sc->dev, "%s\n", value.evv_value);
1003 	}
1004 
1005 	vpd_node = SYSCTL_ADD_NODE(
1006 		ctx, SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)),
1007 		OID_AUTO, "vpd", CTLFLAG_RD, NULL, "Vital Product Data");
1008 	vpd_list = SYSCTL_CHILDREN(vpd_node);
1009 
1010 	/* Add sysctls for all expected and any vendor-defined keywords. */
1011 	sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "PN");
1012 	sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "EC");
1013 	sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "SN");
1014 	keyword[0] = 'V';
1015 	keyword[2] = 0;
1016 	for (keyword[1] = '0'; keyword[1] <= '9'; keyword[1]++)
1017 		sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, keyword);
1018 	for (keyword[1] = 'A'; keyword[1] <= 'Z'; keyword[1]++)
1019 		sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, keyword);
1020 
1021 done:
1022 	return (0);
1023 
1024 fail2:
1025 	free(sc->vpd_data, M_SFXGE);
1026 fail:
1027 	return (rc);
1028 }
1029 
1030 static void
1031 sfxge_vpd_fini(struct sfxge_softc *sc)
1032 {
1033 	free(sc->vpd_data, M_SFXGE);
1034 }
1035 
1036 static void
1037 sfxge_reset(void *arg, int npending)
1038 {
1039 	struct sfxge_softc *sc;
1040 	int rc;
1041 	unsigned attempt;
1042 
1043 	(void)npending;
1044 
1045 	sc = (struct sfxge_softc *)arg;
1046 
1047 	SFXGE_ADAPTER_LOCK(sc);
1048 
1049 	if (sc->init_state != SFXGE_STARTED)
1050 		goto done;
1051 
1052 	sfxge_stop(sc);
1053 	efx_nic_reset(sc->enp);
1054 	for (attempt = 0; attempt < sfxge_restart_attempts; ++attempt) {
1055 		if ((rc = sfxge_start(sc)) == 0)
1056 			goto done;
1057 
1058 		device_printf(sc->dev, "start on reset failed (%d)\n", rc);
1059 		DELAY(100000);
1060 	}
1061 
1062 	device_printf(sc->dev, "reset failed; interface is now stopped\n");
1063 
1064 done:
1065 	SFXGE_ADAPTER_UNLOCK(sc);
1066 }
1067 
1068 void
1069 sfxge_schedule_reset(struct sfxge_softc *sc)
1070 {
1071 	taskqueue_enqueue(taskqueue_thread, &sc->task_reset);
1072 }
1073 
1074 static int
1075 sfxge_attach(device_t dev)
1076 {
1077 	struct sfxge_softc *sc;
1078 	struct ifnet *ifp;
1079 	int error;
1080 
1081 	sc = device_get_softc(dev);
1082 	sc->dev = dev;
1083 
1084 	/* Allocate ifnet. */
1085 	ifp = if_alloc(IFT_ETHER);
1086 	if (ifp == NULL) {
1087 		device_printf(dev, "Couldn't allocate ifnet\n");
1088 		error = ENOMEM;
1089 		goto fail;
1090 	}
1091 	sc->ifnet = ifp;
1092 
1093 	/* Initialize hardware. */
1094 	DBGPRINT(sc->dev, "create nic");
1095 	if ((error = sfxge_create(sc)) != 0)
1096 		goto fail2;
1097 
1098 	/* Create the ifnet for the port. */
1099 	DBGPRINT(sc->dev, "init ifnet");
1100 	if ((error = sfxge_ifnet_init(ifp, sc)) != 0)
1101 		goto fail3;
1102 
1103 	DBGPRINT(sc->dev, "init vpd");
1104 	if ((error = sfxge_vpd_init(sc)) != 0)
1105 		goto fail4;
1106 
1107 	/*
1108 	 * NIC is initialized inside sfxge_create() and kept inialized
1109 	 * to be able to initialize port to discover media types in
1110 	 * sfxge_ifnet_init().
1111 	 */
1112 	efx_nic_fini(sc->enp);
1113 
1114 	sc->init_state = SFXGE_REGISTERED;
1115 
1116 	DBGPRINT(sc->dev, "success");
1117 	return (0);
1118 
1119 fail4:
1120 	sfxge_ifnet_fini(ifp);
1121 fail3:
1122 	efx_nic_fini(sc->enp);
1123 	sfxge_destroy(sc);
1124 
1125 fail2:
1126 	if_free(sc->ifnet);
1127 
1128 fail:
1129 	DBGPRINT(sc->dev, "failed %d", error);
1130 	return (error);
1131 }
1132 
1133 static int
1134 sfxge_detach(device_t dev)
1135 {
1136 	struct sfxge_softc *sc;
1137 
1138 	sc = device_get_softc(dev);
1139 
1140 	sfxge_vpd_fini(sc);
1141 
1142 	/* Destroy the ifnet. */
1143 	sfxge_ifnet_fini(sc->ifnet);
1144 
1145 	/* Tear down hardware. */
1146 	sfxge_destroy(sc);
1147 
1148 	return (0);
1149 }
1150 
1151 static int
1152 sfxge_probe(device_t dev)
1153 {
1154 	uint16_t pci_vendor_id;
1155 	uint16_t pci_device_id;
1156 	efx_family_t family;
1157 	int rc;
1158 
1159 	pci_vendor_id = pci_get_vendor(dev);
1160 	pci_device_id = pci_get_device(dev);
1161 
1162 	DBGPRINT(dev, "PCI ID %04x:%04x", pci_vendor_id, pci_device_id);
1163 	rc = efx_family(pci_vendor_id, pci_device_id, &family);
1164 	if (rc != 0) {
1165 		DBGPRINT(dev, "efx_family fail %d", rc);
1166 		return (ENXIO);
1167 	}
1168 
1169 	if (family == EFX_FAMILY_SIENA) {
1170 		device_set_desc(dev, "Solarflare SFC9000 family");
1171 		return (0);
1172 	}
1173 
1174 	if (family == EFX_FAMILY_HUNTINGTON) {
1175 		device_set_desc(dev, "Solarflare SFC9100 family");
1176 		return (0);
1177 	}
1178 
1179 	if (family == EFX_FAMILY_MEDFORD) {
1180 		device_set_desc(dev, "Solarflare SFC9200 family");
1181 		return (0);
1182 	}
1183 
1184 	DBGPRINT(dev, "impossible controller family %d", family);
1185 	return (ENXIO);
1186 }
1187 
1188 static device_method_t sfxge_methods[] = {
1189 	DEVMETHOD(device_probe,		sfxge_probe),
1190 	DEVMETHOD(device_attach,	sfxge_attach),
1191 	DEVMETHOD(device_detach,	sfxge_detach),
1192 
1193 	DEVMETHOD_END
1194 };
1195 
1196 static devclass_t sfxge_devclass;
1197 
1198 static driver_t sfxge_driver = {
1199 	"sfxge",
1200 	sfxge_methods,
1201 	sizeof(struct sfxge_softc)
1202 };
1203 
1204 DRIVER_MODULE(sfxge, pci, sfxge_driver, sfxge_devclass, 0, 0);
1205