xref: /freebsd/sys/dev/sfxge/sfxge.c (revision 792bbaba989533a1fc93823df1720c8c4aaf0442)
1 /*-
2  * Copyright (c) 2010-2016 Solarflare Communications Inc.
3  * All rights reserved.
4  *
5  * This software was developed in part by Philip Paeps under contract for
6  * Solarflare Communications, Inc.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright notice,
12  *    this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  *    this list of conditions and the following disclaimer in the documentation
15  *    and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
21  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
24  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  *
29  * The views and conclusions contained in the software and documentation are
30  * those of the authors and should not be interpreted as representing official
31  * policies, either expressed or implied, of the FreeBSD Project.
32  */
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 #include "opt_rss.h"
38 
39 #include <sys/param.h>
40 #include <sys/kernel.h>
41 #include <sys/bus.h>
42 #include <sys/rman.h>
43 #include <sys/lock.h>
44 #include <sys/module.h>
45 #include <sys/mutex.h>
46 #include <sys/smp.h>
47 #include <sys/socket.h>
48 #include <sys/taskqueue.h>
49 #include <sys/sockio.h>
50 #include <sys/sysctl.h>
51 #include <sys/priv.h>
52 #include <sys/syslog.h>
53 
54 #include <dev/pci/pcireg.h>
55 #include <dev/pci/pcivar.h>
56 
57 #include <net/ethernet.h>
58 #include <net/if.h>
59 #include <net/if_var.h>
60 #include <net/if_media.h>
61 #include <net/if_types.h>
62 
63 #ifdef RSS
64 #include <net/rss_config.h>
65 #endif
66 
67 #include "common/efx.h"
68 
69 #include "sfxge.h"
70 #include "sfxge_rx.h"
71 #include "sfxge_ioc.h"
72 #include "sfxge_version.h"
73 
74 #define	SFXGE_CAP (IFCAP_VLAN_MTU | IFCAP_VLAN_HWCSUM |			\
75 		   IFCAP_RXCSUM | IFCAP_TXCSUM |			\
76 		   IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6 |		\
77 		   IFCAP_TSO4 | IFCAP_TSO6 |				\
78 		   IFCAP_JUMBO_MTU |					\
79 		   IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWSTATS)
80 #define	SFXGE_CAP_ENABLE SFXGE_CAP
81 #define	SFXGE_CAP_FIXED (IFCAP_VLAN_MTU |				\
82 			 IFCAP_JUMBO_MTU | IFCAP_LINKSTATE | IFCAP_HWSTATS)
83 
84 MALLOC_DEFINE(M_SFXGE, "sfxge", "Solarflare 10GigE driver");
85 
86 
87 SYSCTL_NODE(_hw, OID_AUTO, sfxge, CTLFLAG_RD, 0,
88 	    "SFXGE driver parameters");
89 
90 #define	SFXGE_PARAM_RX_RING	SFXGE_PARAM(rx_ring)
91 static int sfxge_rx_ring_entries = SFXGE_NDESCS;
92 TUNABLE_INT(SFXGE_PARAM_RX_RING, &sfxge_rx_ring_entries);
93 SYSCTL_INT(_hw_sfxge, OID_AUTO, rx_ring, CTLFLAG_RDTUN,
94 	   &sfxge_rx_ring_entries, 0,
95 	   "Maximum number of descriptors in a receive ring");
96 
97 #define	SFXGE_PARAM_TX_RING	SFXGE_PARAM(tx_ring)
98 static int sfxge_tx_ring_entries = SFXGE_NDESCS;
99 TUNABLE_INT(SFXGE_PARAM_TX_RING, &sfxge_tx_ring_entries);
100 SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_ring, CTLFLAG_RDTUN,
101 	   &sfxge_tx_ring_entries, 0,
102 	   "Maximum number of descriptors in a transmit ring");
103 
104 #define	SFXGE_PARAM_RESTART_ATTEMPTS	SFXGE_PARAM(restart_attempts)
105 static int sfxge_restart_attempts = 3;
106 TUNABLE_INT(SFXGE_PARAM_RESTART_ATTEMPTS, &sfxge_restart_attempts);
107 SYSCTL_INT(_hw_sfxge, OID_AUTO, restart_attempts, CTLFLAG_RDTUN,
108 	   &sfxge_restart_attempts, 0,
109 	   "Maximum number of attempts to bring interface up after reset");
110 
111 #if EFSYS_OPT_MCDI_LOGGING
112 #define	SFXGE_PARAM_MCDI_LOGGING	SFXGE_PARAM(mcdi_logging)
113 static int sfxge_mcdi_logging = 0;
114 TUNABLE_INT(SFXGE_PARAM_MCDI_LOGGING, &sfxge_mcdi_logging);
115 #endif
116 
117 static void
118 sfxge_reset(void *arg, int npending);
119 
120 static int
121 sfxge_estimate_rsrc_limits(struct sfxge_softc *sc)
122 {
123 	efx_drv_limits_t limits;
124 	int rc;
125 	unsigned int evq_max;
126 	uint32_t evq_allocated;
127 	uint32_t rxq_allocated;
128 	uint32_t txq_allocated;
129 
130 	/*
131 	 * Limit the number of event queues to:
132 	 *  - number of CPUs
133 	 *  - hardwire maximum RSS channels
134 	 *  - administratively specified maximum RSS channels
135 	 */
136 #ifdef RSS
137 	/*
138 	 * Avoid extra limitations so that the number of queues
139 	 * may be configured at administrator's will
140 	 */
141 	evq_max = MIN(MAX(rss_getnumbuckets(), 1), EFX_MAXRSS);
142 #else
143 	evq_max = MIN(mp_ncpus, EFX_MAXRSS);
144 #endif
145 	if (sc->max_rss_channels > 0)
146 		evq_max = MIN(evq_max, sc->max_rss_channels);
147 
148 	memset(&limits, 0, sizeof(limits));
149 
150 	limits.edl_min_evq_count = 1;
151 	limits.edl_max_evq_count = evq_max;
152 	limits.edl_min_txq_count = SFXGE_TXQ_NTYPES;
153 	limits.edl_max_txq_count = evq_max + SFXGE_TXQ_NTYPES - 1;
154 	limits.edl_min_rxq_count = 1;
155 	limits.edl_max_rxq_count = evq_max;
156 
157 	efx_nic_set_drv_limits(sc->enp, &limits);
158 
159 	if ((rc = efx_nic_init(sc->enp)) != 0)
160 		return (rc);
161 
162 	rc = efx_nic_get_vi_pool(sc->enp, &evq_allocated, &rxq_allocated,
163 				 &txq_allocated);
164 	if (rc != 0) {
165 		efx_nic_fini(sc->enp);
166 		return (rc);
167 	}
168 
169 	KASSERT(txq_allocated >= SFXGE_TXQ_NTYPES,
170 		("txq_allocated < SFXGE_TXQ_NTYPES"));
171 
172 	sc->evq_max = MIN(evq_allocated, evq_max);
173 	sc->evq_max = MIN(rxq_allocated, sc->evq_max);
174 	sc->evq_max = MIN(txq_allocated - (SFXGE_TXQ_NTYPES - 1),
175 			  sc->evq_max);
176 
177 	KASSERT(sc->evq_max <= evq_max,
178 		("allocated more than maximum requested"));
179 
180 #ifdef RSS
181 	if (sc->evq_max < rss_getnumbuckets())
182 		device_printf(sc->dev, "The number of allocated queues (%u) "
183 			      "is less than the number of RSS buckets (%u); "
184 			      "performance degradation might be observed",
185 			      sc->evq_max, rss_getnumbuckets());
186 #endif
187 
188 	/*
189 	 * NIC is kept initialized in the case of success to be able to
190 	 * initialize port to find out media types.
191 	 */
192 	return (0);
193 }
194 
195 static int
196 sfxge_set_drv_limits(struct sfxge_softc *sc)
197 {
198 	efx_drv_limits_t limits;
199 
200 	memset(&limits, 0, sizeof(limits));
201 
202 	/* Limits are strict since take into account initial estimation */
203 	limits.edl_min_evq_count = limits.edl_max_evq_count =
204 	    sc->intr.n_alloc;
205 	limits.edl_min_txq_count = limits.edl_max_txq_count =
206 	    sc->intr.n_alloc + SFXGE_TXQ_NTYPES - 1;
207 	limits.edl_min_rxq_count = limits.edl_max_rxq_count =
208 	    sc->intr.n_alloc;
209 
210 	return (efx_nic_set_drv_limits(sc->enp, &limits));
211 }
212 
213 static int
214 sfxge_start(struct sfxge_softc *sc)
215 {
216 	int rc;
217 
218 	SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc);
219 
220 	if (sc->init_state == SFXGE_STARTED)
221 		return (0);
222 
223 	if (sc->init_state != SFXGE_REGISTERED) {
224 		rc = EINVAL;
225 		goto fail;
226 	}
227 
228 	/* Set required resource limits */
229 	if ((rc = sfxge_set_drv_limits(sc)) != 0)
230 		goto fail;
231 
232 	if ((rc = efx_nic_init(sc->enp)) != 0)
233 		goto fail;
234 
235 	/* Start processing interrupts. */
236 	if ((rc = sfxge_intr_start(sc)) != 0)
237 		goto fail2;
238 
239 	/* Start processing events. */
240 	if ((rc = sfxge_ev_start(sc)) != 0)
241 		goto fail3;
242 
243 	/* Fire up the port. */
244 	if ((rc = sfxge_port_start(sc)) != 0)
245 		goto fail4;
246 
247 	/* Start the receiver side. */
248 	if ((rc = sfxge_rx_start(sc)) != 0)
249 		goto fail5;
250 
251 	/* Start the transmitter side. */
252 	if ((rc = sfxge_tx_start(sc)) != 0)
253 		goto fail6;
254 
255 	sc->init_state = SFXGE_STARTED;
256 
257 	/* Tell the stack we're running. */
258 	sc->ifnet->if_drv_flags |= IFF_DRV_RUNNING;
259 	sc->ifnet->if_drv_flags &= ~IFF_DRV_OACTIVE;
260 
261 	return (0);
262 
263 fail6:
264 	sfxge_rx_stop(sc);
265 
266 fail5:
267 	sfxge_port_stop(sc);
268 
269 fail4:
270 	sfxge_ev_stop(sc);
271 
272 fail3:
273 	sfxge_intr_stop(sc);
274 
275 fail2:
276 	efx_nic_fini(sc->enp);
277 
278 fail:
279 	device_printf(sc->dev, "sfxge_start: %d\n", rc);
280 
281 	return (rc);
282 }
283 
284 static void
285 sfxge_if_init(void *arg)
286 {
287 	struct sfxge_softc *sc;
288 
289 	sc = (struct sfxge_softc *)arg;
290 
291 	SFXGE_ADAPTER_LOCK(sc);
292 	(void)sfxge_start(sc);
293 	SFXGE_ADAPTER_UNLOCK(sc);
294 }
295 
296 static void
297 sfxge_stop(struct sfxge_softc *sc)
298 {
299 	SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc);
300 
301 	if (sc->init_state != SFXGE_STARTED)
302 		return;
303 
304 	sc->init_state = SFXGE_REGISTERED;
305 
306 	/* Stop the transmitter. */
307 	sfxge_tx_stop(sc);
308 
309 	/* Stop the receiver. */
310 	sfxge_rx_stop(sc);
311 
312 	/* Stop the port. */
313 	sfxge_port_stop(sc);
314 
315 	/* Stop processing events. */
316 	sfxge_ev_stop(sc);
317 
318 	/* Stop processing interrupts. */
319 	sfxge_intr_stop(sc);
320 
321 	efx_nic_fini(sc->enp);
322 
323 	sc->ifnet->if_drv_flags &= ~IFF_DRV_RUNNING;
324 }
325 
326 
327 static int
328 sfxge_vpd_ioctl(struct sfxge_softc *sc, sfxge_ioc_t *ioc)
329 {
330 	efx_vpd_value_t value;
331 	int rc = 0;
332 
333 	switch (ioc->u.vpd.op) {
334 	case SFXGE_VPD_OP_GET_KEYWORD:
335 		value.evv_tag = ioc->u.vpd.tag;
336 		value.evv_keyword = ioc->u.vpd.keyword;
337 		rc = efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value);
338 		if (rc != 0)
339 			break;
340 		ioc->u.vpd.len = MIN(ioc->u.vpd.len, value.evv_length);
341 		if (ioc->u.vpd.payload != 0) {
342 			rc = copyout(value.evv_value, ioc->u.vpd.payload,
343 				     ioc->u.vpd.len);
344 		}
345 		break;
346 	case SFXGE_VPD_OP_SET_KEYWORD:
347 		if (ioc->u.vpd.len > sizeof(value.evv_value))
348 			return (EINVAL);
349 		value.evv_tag = ioc->u.vpd.tag;
350 		value.evv_keyword = ioc->u.vpd.keyword;
351 		value.evv_length = ioc->u.vpd.len;
352 		rc = copyin(ioc->u.vpd.payload, value.evv_value, value.evv_length);
353 		if (rc != 0)
354 			break;
355 		rc = efx_vpd_set(sc->enp, sc->vpd_data, sc->vpd_size, &value);
356 		if (rc != 0)
357 			break;
358 		rc = efx_vpd_verify(sc->enp, sc->vpd_data, sc->vpd_size);
359 		if (rc != 0)
360 			break;
361 		rc = efx_vpd_write(sc->enp, sc->vpd_data, sc->vpd_size);
362 		break;
363 	default:
364 		rc = EOPNOTSUPP;
365 		break;
366 	}
367 
368 	return (rc);
369 }
370 
371 static int
372 sfxge_private_ioctl(struct sfxge_softc *sc, sfxge_ioc_t *ioc)
373 {
374 	switch (ioc->op) {
375 	case SFXGE_MCDI_IOC:
376 		return (sfxge_mcdi_ioctl(sc, ioc));
377 	case SFXGE_NVRAM_IOC:
378 		return (sfxge_nvram_ioctl(sc, ioc));
379 	case SFXGE_VPD_IOC:
380 		return (sfxge_vpd_ioctl(sc, ioc));
381 	default:
382 		return (EOPNOTSUPP);
383 	}
384 }
385 
386 
387 static int
388 sfxge_if_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
389 {
390 	struct sfxge_softc *sc;
391 	struct ifreq *ifr;
392 	sfxge_ioc_t ioc;
393 	int error;
394 
395 	ifr = (struct ifreq *)data;
396 	sc = ifp->if_softc;
397 	error = 0;
398 
399 	switch (command) {
400 	case SIOCSIFFLAGS:
401 		SFXGE_ADAPTER_LOCK(sc);
402 		if (ifp->if_flags & IFF_UP) {
403 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
404 				if ((ifp->if_flags ^ sc->if_flags) &
405 				    (IFF_PROMISC | IFF_ALLMULTI)) {
406 					sfxge_mac_filter_set(sc);
407 				}
408 			} else
409 				sfxge_start(sc);
410 		} else
411 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
412 				sfxge_stop(sc);
413 		sc->if_flags = ifp->if_flags;
414 		SFXGE_ADAPTER_UNLOCK(sc);
415 		break;
416 	case SIOCSIFMTU:
417 		if (ifr->ifr_mtu == ifp->if_mtu) {
418 			/* Nothing to do */
419 			error = 0;
420 		} else if (ifr->ifr_mtu > SFXGE_MAX_MTU) {
421 			error = EINVAL;
422 		} else if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
423 			ifp->if_mtu = ifr->ifr_mtu;
424 			error = 0;
425 		} else {
426 			/* Restart required */
427 			SFXGE_ADAPTER_LOCK(sc);
428 			sfxge_stop(sc);
429 			ifp->if_mtu = ifr->ifr_mtu;
430 			error = sfxge_start(sc);
431 			SFXGE_ADAPTER_UNLOCK(sc);
432 			if (error != 0) {
433 				ifp->if_flags &= ~IFF_UP;
434 				ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
435 				if_down(ifp);
436 			}
437 		}
438 		break;
439 	case SIOCADDMULTI:
440 	case SIOCDELMULTI:
441 		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
442 			sfxge_mac_filter_set(sc);
443 		break;
444 	case SIOCSIFCAP:
445 	{
446 		int reqcap = ifr->ifr_reqcap;
447 		int capchg_mask;
448 
449 		SFXGE_ADAPTER_LOCK(sc);
450 
451 		/* Capabilities to be changed in accordance with request */
452 		capchg_mask = ifp->if_capenable ^ reqcap;
453 
454 		/*
455 		 * The networking core already rejects attempts to
456 		 * enable capabilities we don't have.  We still have
457 		 * to reject attempts to disable capabilities that we
458 		 * can't (yet) disable.
459 		 */
460 		KASSERT((reqcap & ~ifp->if_capabilities) == 0,
461 		    ("Unsupported capabilities 0x%x requested 0x%x vs "
462 		     "supported 0x%x",
463 		     reqcap & ~ifp->if_capabilities,
464 		     reqcap , ifp->if_capabilities));
465 		if (capchg_mask & SFXGE_CAP_FIXED) {
466 			error = EINVAL;
467 			SFXGE_ADAPTER_UNLOCK(sc);
468 			break;
469 		}
470 
471 		/* Check request before any changes */
472 		if ((capchg_mask & IFCAP_TSO4) &&
473 		    (reqcap & (IFCAP_TSO4 | IFCAP_TXCSUM)) == IFCAP_TSO4) {
474 			error = EAGAIN;
475 			SFXGE_ADAPTER_UNLOCK(sc);
476 			if_printf(ifp, "enable txcsum before tso4\n");
477 			break;
478 		}
479 		if ((capchg_mask & IFCAP_TSO6) &&
480 		    (reqcap & (IFCAP_TSO6 | IFCAP_TXCSUM_IPV6)) == IFCAP_TSO6) {
481 			error = EAGAIN;
482 			SFXGE_ADAPTER_UNLOCK(sc);
483 			if_printf(ifp, "enable txcsum6 before tso6\n");
484 			break;
485 		}
486 
487 		if (reqcap & IFCAP_TXCSUM) {
488 			ifp->if_hwassist |= (CSUM_IP | CSUM_TCP | CSUM_UDP);
489 		} else {
490 			ifp->if_hwassist &= ~(CSUM_IP | CSUM_TCP | CSUM_UDP);
491 			if (reqcap & IFCAP_TSO4) {
492 				reqcap &= ~IFCAP_TSO4;
493 				if_printf(ifp,
494 				    "tso4 disabled due to -txcsum\n");
495 			}
496 		}
497 		if (reqcap & IFCAP_TXCSUM_IPV6) {
498 			ifp->if_hwassist |= (CSUM_TCP_IPV6 | CSUM_UDP_IPV6);
499 		} else {
500 			ifp->if_hwassist &= ~(CSUM_TCP_IPV6 | CSUM_UDP_IPV6);
501 			if (reqcap & IFCAP_TSO6) {
502 				reqcap &= ~IFCAP_TSO6;
503 				if_printf(ifp,
504 				    "tso6 disabled due to -txcsum6\n");
505 			}
506 		}
507 
508 		/*
509 		 * The kernel takes both IFCAP_TSOx and CSUM_TSO into
510 		 * account before using TSO. So, we do not touch
511 		 * checksum flags when IFCAP_TSOx is modified.
512 		 * Note that CSUM_TSO is (CSUM_IP_TSO|CSUM_IP6_TSO),
513 		 * but both bits are set in IPv4 and IPv6 mbufs.
514 		 */
515 
516 		ifp->if_capenable = reqcap;
517 
518 		SFXGE_ADAPTER_UNLOCK(sc);
519 		break;
520 	}
521 	case SIOCSIFMEDIA:
522 	case SIOCGIFMEDIA:
523 		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
524 		break;
525 #ifdef SIOCGI2C
526 	case SIOCGI2C:
527 	{
528 		struct ifi2creq i2c;
529 
530 		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
531 		if (error != 0)
532 			break;
533 
534 		if (i2c.len > sizeof(i2c.data)) {
535 			error = EINVAL;
536 			break;
537 		}
538 
539 		SFXGE_ADAPTER_LOCK(sc);
540 		error = efx_phy_module_get_info(sc->enp, i2c.dev_addr,
541 						i2c.offset, i2c.len,
542 						&i2c.data[0]);
543 		SFXGE_ADAPTER_UNLOCK(sc);
544 		if (error == 0)
545 			error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
546 		break;
547 	}
548 #endif
549 	case SIOCGPRIVATE_0:
550 		error = priv_check(curthread, PRIV_DRIVER);
551 		if (error != 0)
552 			break;
553 		error = copyin(ifr->ifr_data, &ioc, sizeof(ioc));
554 		if (error != 0)
555 			return (error);
556 		error = sfxge_private_ioctl(sc, &ioc);
557 		if (error == 0) {
558 			error = copyout(&ioc, ifr->ifr_data, sizeof(ioc));
559 		}
560 		break;
561 	default:
562 		error = ether_ioctl(ifp, command, data);
563 	}
564 
565 	return (error);
566 }
567 
568 static void
569 sfxge_ifnet_fini(struct ifnet *ifp)
570 {
571 	struct sfxge_softc *sc = ifp->if_softc;
572 
573 	SFXGE_ADAPTER_LOCK(sc);
574 	sfxge_stop(sc);
575 	SFXGE_ADAPTER_UNLOCK(sc);
576 
577 	ifmedia_removeall(&sc->media);
578 	ether_ifdetach(ifp);
579 	if_free(ifp);
580 }
581 
582 static int
583 sfxge_ifnet_init(struct ifnet *ifp, struct sfxge_softc *sc)
584 {
585 	const efx_nic_cfg_t *encp = efx_nic_cfg_get(sc->enp);
586 	device_t dev;
587 	int rc;
588 
589 	dev = sc->dev;
590 	sc->ifnet = ifp;
591 
592 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
593 	ifp->if_init = sfxge_if_init;
594 	ifp->if_softc = sc;
595 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
596 	ifp->if_ioctl = sfxge_if_ioctl;
597 
598 	ifp->if_capabilities = SFXGE_CAP;
599 	ifp->if_capenable = SFXGE_CAP_ENABLE;
600 	ifp->if_hw_tsomax = SFXGE_TSO_MAX_SIZE;
601 	ifp->if_hw_tsomaxsegcount = SFXGE_TX_MAPPING_MAX_SEG;
602 	ifp->if_hw_tsomaxsegsize = PAGE_SIZE;
603 
604 #ifdef SFXGE_LRO
605 	ifp->if_capabilities |= IFCAP_LRO;
606 	ifp->if_capenable |= IFCAP_LRO;
607 #endif
608 
609 	if (encp->enc_hw_tx_insert_vlan_enabled) {
610 		ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
611 		ifp->if_capenable |= IFCAP_VLAN_HWTAGGING;
612 	}
613 	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
614 			   CSUM_TCP_IPV6 | CSUM_UDP_IPV6;
615 
616 	ether_ifattach(ifp, encp->enc_mac_addr);
617 
618 	ifp->if_transmit = sfxge_if_transmit;
619 	ifp->if_qflush = sfxge_if_qflush;
620 
621 	ifp->if_get_counter = sfxge_get_counter;
622 
623 	DBGPRINT(sc->dev, "ifmedia_init");
624 	if ((rc = sfxge_port_ifmedia_init(sc)) != 0)
625 		goto fail;
626 
627 	return (0);
628 
629 fail:
630 	ether_ifdetach(sc->ifnet);
631 	return (rc);
632 }
633 
634 void
635 sfxge_sram_buf_tbl_alloc(struct sfxge_softc *sc, size_t n, uint32_t *idp)
636 {
637 	KASSERT(sc->buffer_table_next + n <=
638 		efx_nic_cfg_get(sc->enp)->enc_buftbl_limit,
639 		("buffer table full"));
640 
641 	*idp = sc->buffer_table_next;
642 	sc->buffer_table_next += n;
643 }
644 
645 static int
646 sfxge_bar_init(struct sfxge_softc *sc)
647 {
648 	efsys_bar_t *esbp = &sc->bar;
649 
650 	esbp->esb_rid = PCIR_BAR(EFX_MEM_BAR);
651 	if ((esbp->esb_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
652 	    &esbp->esb_rid, RF_ACTIVE)) == NULL) {
653 		device_printf(sc->dev, "Cannot allocate BAR region %d\n",
654 		    EFX_MEM_BAR);
655 		return (ENXIO);
656 	}
657 	esbp->esb_tag = rman_get_bustag(esbp->esb_res);
658 	esbp->esb_handle = rman_get_bushandle(esbp->esb_res);
659 
660 	SFXGE_BAR_LOCK_INIT(esbp, device_get_nameunit(sc->dev));
661 
662 	return (0);
663 }
664 
665 static void
666 sfxge_bar_fini(struct sfxge_softc *sc)
667 {
668 	efsys_bar_t *esbp = &sc->bar;
669 
670 	bus_release_resource(sc->dev, SYS_RES_MEMORY, esbp->esb_rid,
671 	    esbp->esb_res);
672 	SFXGE_BAR_LOCK_DESTROY(esbp);
673 }
674 
675 static int
676 sfxge_create(struct sfxge_softc *sc)
677 {
678 	device_t dev;
679 	efx_nic_t *enp;
680 	int error;
681 	char rss_param_name[sizeof(SFXGE_PARAM(%d.max_rss_channels))];
682 #if EFSYS_OPT_MCDI_LOGGING
683 	char mcdi_log_param_name[sizeof(SFXGE_PARAM(%d.mcdi_logging))];
684 #endif
685 
686 	dev = sc->dev;
687 
688 	SFXGE_ADAPTER_LOCK_INIT(sc, device_get_nameunit(sc->dev));
689 
690 	sc->max_rss_channels = 0;
691 	snprintf(rss_param_name, sizeof(rss_param_name),
692 		 SFXGE_PARAM(%d.max_rss_channels),
693 		 (int)device_get_unit(dev));
694 	TUNABLE_INT_FETCH(rss_param_name, &sc->max_rss_channels);
695 #if EFSYS_OPT_MCDI_LOGGING
696 	sc->mcdi_logging = sfxge_mcdi_logging;
697 	snprintf(mcdi_log_param_name, sizeof(mcdi_log_param_name),
698 		 SFXGE_PARAM(%d.mcdi_logging),
699 		 (int)device_get_unit(dev));
700 	TUNABLE_INT_FETCH(mcdi_log_param_name, &sc->mcdi_logging);
701 #endif
702 
703 	sc->stats_node = SYSCTL_ADD_NODE(
704 		device_get_sysctl_ctx(dev),
705 		SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
706 		OID_AUTO, "stats", CTLFLAG_RD, NULL, "Statistics");
707 	if (sc->stats_node == NULL) {
708 		error = ENOMEM;
709 		goto fail;
710 	}
711 
712 	TASK_INIT(&sc->task_reset, 0, sfxge_reset, sc);
713 
714 	(void) pci_enable_busmaster(dev);
715 
716 	/* Initialize DMA mappings. */
717 	DBGPRINT(sc->dev, "dma_init...");
718 	if ((error = sfxge_dma_init(sc)) != 0)
719 		goto fail;
720 
721 	/* Map the device registers. */
722 	DBGPRINT(sc->dev, "bar_init...");
723 	if ((error = sfxge_bar_init(sc)) != 0)
724 		goto fail;
725 
726 	error = efx_family(pci_get_vendor(dev), pci_get_device(dev),
727 	    &sc->family);
728 	KASSERT(error == 0, ("Family should be filtered by sfxge_probe()"));
729 
730 	DBGPRINT(sc->dev, "nic_create...");
731 
732 	/* Create the common code nic object. */
733 	SFXGE_EFSYS_LOCK_INIT(&sc->enp_lock,
734 			      device_get_nameunit(sc->dev), "nic");
735 	if ((error = efx_nic_create(sc->family, (efsys_identifier_t *)sc,
736 	    &sc->bar, &sc->enp_lock, &enp)) != 0)
737 		goto fail3;
738 	sc->enp = enp;
739 
740 	/* Initialize MCDI to talk to the microcontroller. */
741 	DBGPRINT(sc->dev, "mcdi_init...");
742 	if ((error = sfxge_mcdi_init(sc)) != 0)
743 		goto fail4;
744 
745 	/* Probe the NIC and build the configuration data area. */
746 	DBGPRINT(sc->dev, "nic_probe...");
747 	if ((error = efx_nic_probe(enp)) != 0)
748 		goto fail5;
749 
750 	if (!ISP2(sfxge_rx_ring_entries) ||
751 	    (sfxge_rx_ring_entries < EFX_RXQ_MINNDESCS) ||
752 	    (sfxge_rx_ring_entries > EFX_RXQ_MAXNDESCS)) {
753 		log(LOG_ERR, "%s=%d must be power of 2 from %u to %u",
754 		    SFXGE_PARAM_RX_RING, sfxge_rx_ring_entries,
755 		    EFX_RXQ_MINNDESCS, EFX_RXQ_MAXNDESCS);
756 		error = EINVAL;
757 		goto fail_rx_ring_entries;
758 	}
759 	sc->rxq_entries = sfxge_rx_ring_entries;
760 
761 	if (!ISP2(sfxge_tx_ring_entries) ||
762 	    (sfxge_tx_ring_entries < EFX_TXQ_MINNDESCS) ||
763 	    (sfxge_tx_ring_entries > EFX_TXQ_MAXNDESCS(efx_nic_cfg_get(enp)))) {
764 		log(LOG_ERR, "%s=%d must be power of 2 from %u to %u",
765 		    SFXGE_PARAM_TX_RING, sfxge_tx_ring_entries,
766 		    EFX_TXQ_MINNDESCS, EFX_TXQ_MAXNDESCS(efx_nic_cfg_get(enp)));
767 		error = EINVAL;
768 		goto fail_tx_ring_entries;
769 	}
770 	sc->txq_entries = sfxge_tx_ring_entries;
771 
772 	SYSCTL_ADD_STRING(device_get_sysctl_ctx(dev),
773 			  SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
774 			  OID_AUTO, "version", CTLFLAG_RD,
775 			  SFXGE_VERSION_STRING, 0,
776 			  "Driver version");
777 
778 	SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev),
779 			SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
780 			OID_AUTO, "phy_type", CTLFLAG_RD,
781 			NULL, efx_nic_cfg_get(enp)->enc_phy_type,
782 			"PHY type");
783 
784 	/* Initialize the NVRAM. */
785 	DBGPRINT(sc->dev, "nvram_init...");
786 	if ((error = efx_nvram_init(enp)) != 0)
787 		goto fail6;
788 
789 	/* Initialize the VPD. */
790 	DBGPRINT(sc->dev, "vpd_init...");
791 	if ((error = efx_vpd_init(enp)) != 0)
792 		goto fail7;
793 
794 	efx_mcdi_new_epoch(enp);
795 
796 	/* Reset the NIC. */
797 	DBGPRINT(sc->dev, "nic_reset...");
798 	if ((error = efx_nic_reset(enp)) != 0)
799 		goto fail8;
800 
801 	/* Initialize buffer table allocation. */
802 	sc->buffer_table_next = 0;
803 
804 	/*
805 	 * Guarantee minimum and estimate maximum number of event queues
806 	 * to take it into account when MSI-X interrupts are allocated.
807 	 * It initializes NIC and keeps it initialized on success.
808 	 */
809 	if ((error = sfxge_estimate_rsrc_limits(sc)) != 0)
810 		goto fail8;
811 
812 	/* Set up interrupts. */
813 	DBGPRINT(sc->dev, "intr_init...");
814 	if ((error = sfxge_intr_init(sc)) != 0)
815 		goto fail9;
816 
817 	/* Initialize event processing state. */
818 	DBGPRINT(sc->dev, "ev_init...");
819 	if ((error = sfxge_ev_init(sc)) != 0)
820 		goto fail11;
821 
822 	/* Initialize port state. */
823 	DBGPRINT(sc->dev, "port_init...");
824 	if ((error = sfxge_port_init(sc)) != 0)
825 		goto fail12;
826 
827 	/* Initialize receive state. */
828 	DBGPRINT(sc->dev, "rx_init...");
829 	if ((error = sfxge_rx_init(sc)) != 0)
830 		goto fail13;
831 
832 	/* Initialize transmit state. */
833 	DBGPRINT(sc->dev, "tx_init...");
834 	if ((error = sfxge_tx_init(sc)) != 0)
835 		goto fail14;
836 
837 	sc->init_state = SFXGE_INITIALIZED;
838 
839 	DBGPRINT(sc->dev, "success");
840 	return (0);
841 
842 fail14:
843 	sfxge_rx_fini(sc);
844 
845 fail13:
846 	sfxge_port_fini(sc);
847 
848 fail12:
849 	sfxge_ev_fini(sc);
850 
851 fail11:
852 	sfxge_intr_fini(sc);
853 
854 fail9:
855 	efx_nic_fini(sc->enp);
856 
857 fail8:
858 	efx_vpd_fini(enp);
859 
860 fail7:
861 	efx_nvram_fini(enp);
862 
863 fail6:
864 fail_tx_ring_entries:
865 fail_rx_ring_entries:
866 	efx_nic_unprobe(enp);
867 
868 fail5:
869 	sfxge_mcdi_fini(sc);
870 
871 fail4:
872 	sc->enp = NULL;
873 	efx_nic_destroy(enp);
874 	SFXGE_EFSYS_LOCK_DESTROY(&sc->enp_lock);
875 
876 fail3:
877 	sfxge_bar_fini(sc);
878 	(void) pci_disable_busmaster(sc->dev);
879 
880 fail:
881 	DBGPRINT(sc->dev, "failed %d", error);
882 	sc->dev = NULL;
883 	SFXGE_ADAPTER_LOCK_DESTROY(sc);
884 	return (error);
885 }
886 
887 static void
888 sfxge_destroy(struct sfxge_softc *sc)
889 {
890 	efx_nic_t *enp;
891 
892 	/* Clean up transmit state. */
893 	sfxge_tx_fini(sc);
894 
895 	/* Clean up receive state. */
896 	sfxge_rx_fini(sc);
897 
898 	/* Clean up port state. */
899 	sfxge_port_fini(sc);
900 
901 	/* Clean up event processing state. */
902 	sfxge_ev_fini(sc);
903 
904 	/* Clean up interrupts. */
905 	sfxge_intr_fini(sc);
906 
907 	/* Tear down common code subsystems. */
908 	efx_nic_reset(sc->enp);
909 	efx_vpd_fini(sc->enp);
910 	efx_nvram_fini(sc->enp);
911 	efx_nic_unprobe(sc->enp);
912 
913 	/* Tear down MCDI. */
914 	sfxge_mcdi_fini(sc);
915 
916 	/* Destroy common code context. */
917 	enp = sc->enp;
918 	sc->enp = NULL;
919 	efx_nic_destroy(enp);
920 
921 	/* Free DMA memory. */
922 	sfxge_dma_fini(sc);
923 
924 	/* Free mapped BARs. */
925 	sfxge_bar_fini(sc);
926 
927 	(void) pci_disable_busmaster(sc->dev);
928 
929 	taskqueue_drain(taskqueue_thread, &sc->task_reset);
930 
931 	/* Destroy the softc lock. */
932 	SFXGE_ADAPTER_LOCK_DESTROY(sc);
933 }
934 
935 static int
936 sfxge_vpd_handler(SYSCTL_HANDLER_ARGS)
937 {
938 	struct sfxge_softc *sc = arg1;
939 	efx_vpd_value_t value;
940 	int rc;
941 
942 	value.evv_tag = arg2 >> 16;
943 	value.evv_keyword = arg2 & 0xffff;
944 	if ((rc = efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value))
945 	    != 0)
946 		return (rc);
947 
948 	return (SYSCTL_OUT(req, value.evv_value, value.evv_length));
949 }
950 
951 static void
952 sfxge_vpd_try_add(struct sfxge_softc *sc, struct sysctl_oid_list *list,
953 		  efx_vpd_tag_t tag, const char *keyword)
954 {
955 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
956 	efx_vpd_value_t value;
957 
958 	/* Check whether VPD tag/keyword is present */
959 	value.evv_tag = tag;
960 	value.evv_keyword = EFX_VPD_KEYWORD(keyword[0], keyword[1]);
961 	if (efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value) != 0)
962 		return;
963 
964 	SYSCTL_ADD_PROC(
965 		ctx, list, OID_AUTO, keyword, CTLTYPE_STRING|CTLFLAG_RD,
966 		sc, tag << 16 | EFX_VPD_KEYWORD(keyword[0], keyword[1]),
967 		sfxge_vpd_handler, "A", "");
968 }
969 
970 static int
971 sfxge_vpd_init(struct sfxge_softc *sc)
972 {
973 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
974 	struct sysctl_oid *vpd_node;
975 	struct sysctl_oid_list *vpd_list;
976 	char keyword[3];
977 	efx_vpd_value_t value;
978 	int rc;
979 
980 	if ((rc = efx_vpd_size(sc->enp, &sc->vpd_size)) != 0) {
981 		/*
982 		 * Unpriviledged functions deny VPD access.
983 		 * Simply skip VPD in this case.
984 		 */
985 		if (rc == EACCES)
986 			goto done;
987 		goto fail;
988 	}
989 	sc->vpd_data = malloc(sc->vpd_size, M_SFXGE, M_WAITOK);
990 	if ((rc = efx_vpd_read(sc->enp, sc->vpd_data, sc->vpd_size)) != 0)
991 		goto fail2;
992 
993 	/* Copy ID (product name) into device description, and log it. */
994 	value.evv_tag = EFX_VPD_ID;
995 	if (efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value) == 0) {
996 		value.evv_value[value.evv_length] = 0;
997 		device_set_desc_copy(sc->dev, value.evv_value);
998 		device_printf(sc->dev, "%s\n", value.evv_value);
999 	}
1000 
1001 	vpd_node = SYSCTL_ADD_NODE(
1002 		ctx, SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)),
1003 		OID_AUTO, "vpd", CTLFLAG_RD, NULL, "Vital Product Data");
1004 	vpd_list = SYSCTL_CHILDREN(vpd_node);
1005 
1006 	/* Add sysctls for all expected and any vendor-defined keywords. */
1007 	sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "PN");
1008 	sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "EC");
1009 	sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "SN");
1010 	keyword[0] = 'V';
1011 	keyword[2] = 0;
1012 	for (keyword[1] = '0'; keyword[1] <= '9'; keyword[1]++)
1013 		sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, keyword);
1014 	for (keyword[1] = 'A'; keyword[1] <= 'Z'; keyword[1]++)
1015 		sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, keyword);
1016 
1017 done:
1018 	return (0);
1019 
1020 fail2:
1021 	free(sc->vpd_data, M_SFXGE);
1022 fail:
1023 	return (rc);
1024 }
1025 
1026 static void
1027 sfxge_vpd_fini(struct sfxge_softc *sc)
1028 {
1029 	free(sc->vpd_data, M_SFXGE);
1030 }
1031 
1032 static void
1033 sfxge_reset(void *arg, int npending)
1034 {
1035 	struct sfxge_softc *sc;
1036 	int rc;
1037 	unsigned attempt;
1038 
1039 	(void)npending;
1040 
1041 	sc = (struct sfxge_softc *)arg;
1042 
1043 	SFXGE_ADAPTER_LOCK(sc);
1044 
1045 	if (sc->init_state != SFXGE_STARTED)
1046 		goto done;
1047 
1048 	sfxge_stop(sc);
1049 	efx_nic_reset(sc->enp);
1050 	for (attempt = 0; attempt < sfxge_restart_attempts; ++attempt) {
1051 		if ((rc = sfxge_start(sc)) == 0)
1052 			goto done;
1053 
1054 		device_printf(sc->dev, "start on reset failed (%d)\n", rc);
1055 		DELAY(100000);
1056 	}
1057 
1058 	device_printf(sc->dev, "reset failed; interface is now stopped\n");
1059 
1060 done:
1061 	SFXGE_ADAPTER_UNLOCK(sc);
1062 }
1063 
1064 void
1065 sfxge_schedule_reset(struct sfxge_softc *sc)
1066 {
1067 	taskqueue_enqueue(taskqueue_thread, &sc->task_reset);
1068 }
1069 
1070 static int
1071 sfxge_attach(device_t dev)
1072 {
1073 	struct sfxge_softc *sc;
1074 	struct ifnet *ifp;
1075 	int error;
1076 
1077 	sc = device_get_softc(dev);
1078 	sc->dev = dev;
1079 
1080 	/* Allocate ifnet. */
1081 	ifp = if_alloc(IFT_ETHER);
1082 	if (ifp == NULL) {
1083 		device_printf(dev, "Couldn't allocate ifnet\n");
1084 		error = ENOMEM;
1085 		goto fail;
1086 	}
1087 	sc->ifnet = ifp;
1088 
1089 	/* Initialize hardware. */
1090 	DBGPRINT(sc->dev, "create nic");
1091 	if ((error = sfxge_create(sc)) != 0)
1092 		goto fail2;
1093 
1094 	/* Create the ifnet for the port. */
1095 	DBGPRINT(sc->dev, "init ifnet");
1096 	if ((error = sfxge_ifnet_init(ifp, sc)) != 0)
1097 		goto fail3;
1098 
1099 	DBGPRINT(sc->dev, "init vpd");
1100 	if ((error = sfxge_vpd_init(sc)) != 0)
1101 		goto fail4;
1102 
1103 	/*
1104 	 * NIC is initialized inside sfxge_create() and kept inialized
1105 	 * to be able to initialize port to discover media types in
1106 	 * sfxge_ifnet_init().
1107 	 */
1108 	efx_nic_fini(sc->enp);
1109 
1110 	sc->init_state = SFXGE_REGISTERED;
1111 
1112 	DBGPRINT(sc->dev, "success");
1113 	return (0);
1114 
1115 fail4:
1116 	sfxge_ifnet_fini(ifp);
1117 fail3:
1118 	efx_nic_fini(sc->enp);
1119 	sfxge_destroy(sc);
1120 
1121 fail2:
1122 	if_free(sc->ifnet);
1123 
1124 fail:
1125 	DBGPRINT(sc->dev, "failed %d", error);
1126 	return (error);
1127 }
1128 
1129 static int
1130 sfxge_detach(device_t dev)
1131 {
1132 	struct sfxge_softc *sc;
1133 
1134 	sc = device_get_softc(dev);
1135 
1136 	sfxge_vpd_fini(sc);
1137 
1138 	/* Destroy the ifnet. */
1139 	sfxge_ifnet_fini(sc->ifnet);
1140 
1141 	/* Tear down hardware. */
1142 	sfxge_destroy(sc);
1143 
1144 	return (0);
1145 }
1146 
1147 static int
1148 sfxge_probe(device_t dev)
1149 {
1150 	uint16_t pci_vendor_id;
1151 	uint16_t pci_device_id;
1152 	efx_family_t family;
1153 	int rc;
1154 
1155 	pci_vendor_id = pci_get_vendor(dev);
1156 	pci_device_id = pci_get_device(dev);
1157 
1158 	DBGPRINT(dev, "PCI ID %04x:%04x", pci_vendor_id, pci_device_id);
1159 	rc = efx_family(pci_vendor_id, pci_device_id, &family);
1160 	if (rc != 0) {
1161 		DBGPRINT(dev, "efx_family fail %d", rc);
1162 		return (ENXIO);
1163 	}
1164 
1165 	if (family == EFX_FAMILY_SIENA) {
1166 		device_set_desc(dev, "Solarflare SFC9000 family");
1167 		return (0);
1168 	}
1169 
1170 	if (family == EFX_FAMILY_HUNTINGTON) {
1171 		device_set_desc(dev, "Solarflare SFC9100 family");
1172 		return (0);
1173 	}
1174 
1175 	if (family == EFX_FAMILY_MEDFORD) {
1176 		device_set_desc(dev, "Solarflare SFC9200 family");
1177 		return (0);
1178 	}
1179 
1180 	DBGPRINT(dev, "impossible controller family %d", family);
1181 	return (ENXIO);
1182 }
1183 
1184 static device_method_t sfxge_methods[] = {
1185 	DEVMETHOD(device_probe,		sfxge_probe),
1186 	DEVMETHOD(device_attach,	sfxge_attach),
1187 	DEVMETHOD(device_detach,	sfxge_detach),
1188 
1189 	DEVMETHOD_END
1190 };
1191 
1192 static devclass_t sfxge_devclass;
1193 
1194 static driver_t sfxge_driver = {
1195 	"sfxge",
1196 	sfxge_methods,
1197 	sizeof(struct sfxge_softc)
1198 };
1199 
1200 DRIVER_MODULE(sfxge, pci, sfxge_driver, sfxge_devclass, 0, 0);
1201