xref: /freebsd/sys/dev/sfxge/sfxge.c (revision 39ee7a7a6bdd1557b1c3532abf60d139798ac88b)
1 /*-
2  * Copyright (c) 2010-2015 Solarflare Communications Inc.
3  * All rights reserved.
4  *
5  * This software was developed in part by Philip Paeps under contract for
6  * Solarflare Communications, Inc.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright notice,
12  *    this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  *    this list of conditions and the following disclaimer in the documentation
15  *    and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
21  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
24  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  *
29  * The views and conclusions contained in the software and documentation are
30  * those of the authors and should not be interpreted as representing official
31  * policies, either expressed or implied, of the FreeBSD Project.
32  */
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 #include <sys/param.h>
38 #include <sys/kernel.h>
39 #include <sys/bus.h>
40 #include <sys/rman.h>
41 #include <sys/lock.h>
42 #include <sys/module.h>
43 #include <sys/mutex.h>
44 #include <sys/smp.h>
45 #include <sys/socket.h>
46 #include <sys/taskqueue.h>
47 #include <sys/sockio.h>
48 #include <sys/sysctl.h>
49 #include <sys/priv.h>
50 #include <sys/syslog.h>
51 
52 #include <dev/pci/pcireg.h>
53 #include <dev/pci/pcivar.h>
54 
55 #include <net/ethernet.h>
56 #include <net/if.h>
57 #include <net/if_var.h>
58 #include <net/if_media.h>
59 #include <net/if_types.h>
60 
61 #include "common/efx.h"
62 
63 #include "sfxge.h"
64 #include "sfxge_rx.h"
65 #include "sfxge_ioc.h"
66 #include "sfxge_version.h"
67 
68 #define	SFXGE_CAP (IFCAP_VLAN_MTU | IFCAP_VLAN_HWCSUM |			\
69 		   IFCAP_RXCSUM | IFCAP_TXCSUM |			\
70 		   IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6 |		\
71 		   IFCAP_TSO4 | IFCAP_TSO6 |				\
72 		   IFCAP_JUMBO_MTU |					\
73 		   IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWSTATS)
74 #define	SFXGE_CAP_ENABLE SFXGE_CAP
75 #define	SFXGE_CAP_FIXED (IFCAP_VLAN_MTU |				\
76 			 IFCAP_JUMBO_MTU | IFCAP_LINKSTATE | IFCAP_HWSTATS)
77 
78 MALLOC_DEFINE(M_SFXGE, "sfxge", "Solarflare 10GigE driver");
79 
80 
81 SYSCTL_NODE(_hw, OID_AUTO, sfxge, CTLFLAG_RD, 0,
82 	    "SFXGE driver parameters");
83 
84 #define	SFXGE_PARAM_RX_RING	SFXGE_PARAM(rx_ring)
85 static int sfxge_rx_ring_entries = SFXGE_NDESCS;
86 TUNABLE_INT(SFXGE_PARAM_RX_RING, &sfxge_rx_ring_entries);
87 SYSCTL_INT(_hw_sfxge, OID_AUTO, rx_ring, CTLFLAG_RDTUN,
88 	   &sfxge_rx_ring_entries, 0,
89 	   "Maximum number of descriptors in a receive ring");
90 
91 #define	SFXGE_PARAM_TX_RING	SFXGE_PARAM(tx_ring)
92 static int sfxge_tx_ring_entries = SFXGE_NDESCS;
93 TUNABLE_INT(SFXGE_PARAM_TX_RING, &sfxge_tx_ring_entries);
94 SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_ring, CTLFLAG_RDTUN,
95 	   &sfxge_tx_ring_entries, 0,
96 	   "Maximum number of descriptors in a transmit ring");
97 
98 static void
99 sfxge_reset(void *arg, int npending);
100 
101 static int
102 sfxge_estimate_rsrc_limits(struct sfxge_softc *sc)
103 {
104 	efx_drv_limits_t limits;
105 	int rc;
106 	unsigned int evq_max;
107 	uint32_t evq_allocated;
108 	uint32_t rxq_allocated;
109 	uint32_t txq_allocated;
110 
111 	/*
112 	 * Limit the number of event queues to:
113 	 *  - number of CPUs
114 	 *  - hardwire maximum RSS channels
115 	 *  - administratively specified maximum RSS channels
116 	 */
117 	evq_max = MIN(mp_ncpus, EFX_MAXRSS);
118 	if (sc->max_rss_channels > 0)
119 		evq_max = MIN(evq_max, sc->max_rss_channels);
120 
121 	memset(&limits, 0, sizeof(limits));
122 
123 	limits.edl_min_evq_count = 1;
124 	limits.edl_max_evq_count = evq_max;
125 	limits.edl_min_txq_count = SFXGE_TXQ_NTYPES;
126 	limits.edl_max_txq_count = evq_max + SFXGE_TXQ_NTYPES - 1;
127 	limits.edl_min_rxq_count = 1;
128 	limits.edl_max_rxq_count = evq_max;
129 
130 	efx_nic_set_drv_limits(sc->enp, &limits);
131 
132 	if ((rc = efx_nic_init(sc->enp)) != 0)
133 		return (rc);
134 
135 	rc = efx_nic_get_vi_pool(sc->enp, &evq_allocated, &rxq_allocated,
136 				 &txq_allocated);
137 	if (rc != 0) {
138 		efx_nic_fini(sc->enp);
139 		return (rc);
140 	}
141 
142 	KASSERT(txq_allocated >= SFXGE_TXQ_NTYPES,
143 		("txq_allocated < SFXGE_TXQ_NTYPES"));
144 
145 	sc->evq_max = MIN(evq_allocated, evq_max);
146 	sc->evq_max = MIN(rxq_allocated, sc->evq_max);
147 	sc->evq_max = MIN(txq_allocated - (SFXGE_TXQ_NTYPES - 1),
148 			  sc->evq_max);
149 
150 	KASSERT(sc->evq_max <= evq_max,
151 		("allocated more than maximum requested"));
152 
153 	/*
154 	 * NIC is kept initialized in the case of success to be able to
155 	 * initialize port to find out media types.
156 	 */
157 	return (0);
158 }
159 
160 static int
161 sfxge_set_drv_limits(struct sfxge_softc *sc)
162 {
163 	efx_drv_limits_t limits;
164 
165 	memset(&limits, 0, sizeof(limits));
166 
167 	/* Limits are strict since take into account initial estimation */
168 	limits.edl_min_evq_count = limits.edl_max_evq_count =
169 	    sc->intr.n_alloc;
170 	limits.edl_min_txq_count = limits.edl_max_txq_count =
171 	    sc->intr.n_alloc + SFXGE_TXQ_NTYPES - 1;
172 	limits.edl_min_rxq_count = limits.edl_max_rxq_count =
173 	    sc->intr.n_alloc;
174 
175 	return (efx_nic_set_drv_limits(sc->enp, &limits));
176 }
177 
178 static int
179 sfxge_start(struct sfxge_softc *sc)
180 {
181 	int rc;
182 
183 	SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc);
184 
185 	if (sc->init_state == SFXGE_STARTED)
186 		return (0);
187 
188 	if (sc->init_state != SFXGE_REGISTERED) {
189 		rc = EINVAL;
190 		goto fail;
191 	}
192 
193 	/* Set required resource limits */
194 	if ((rc = sfxge_set_drv_limits(sc)) != 0)
195 		goto fail;
196 
197 	if ((rc = efx_nic_init(sc->enp)) != 0)
198 		goto fail;
199 
200 	/* Start processing interrupts. */
201 	if ((rc = sfxge_intr_start(sc)) != 0)
202 		goto fail2;
203 
204 	/* Start processing events. */
205 	if ((rc = sfxge_ev_start(sc)) != 0)
206 		goto fail3;
207 
208 	/* Fire up the port. */
209 	if ((rc = sfxge_port_start(sc)) != 0)
210 		goto fail4;
211 
212 	/* Start the receiver side. */
213 	if ((rc = sfxge_rx_start(sc)) != 0)
214 		goto fail5;
215 
216 	/* Start the transmitter side. */
217 	if ((rc = sfxge_tx_start(sc)) != 0)
218 		goto fail6;
219 
220 	sc->init_state = SFXGE_STARTED;
221 
222 	/* Tell the stack we're running. */
223 	sc->ifnet->if_drv_flags |= IFF_DRV_RUNNING;
224 	sc->ifnet->if_drv_flags &= ~IFF_DRV_OACTIVE;
225 
226 	return (0);
227 
228 fail6:
229 	sfxge_rx_stop(sc);
230 
231 fail5:
232 	sfxge_port_stop(sc);
233 
234 fail4:
235 	sfxge_ev_stop(sc);
236 
237 fail3:
238 	sfxge_intr_stop(sc);
239 
240 fail2:
241 	efx_nic_fini(sc->enp);
242 
243 fail:
244 	device_printf(sc->dev, "sfxge_start: %d\n", rc);
245 
246 	return (rc);
247 }
248 
249 static void
250 sfxge_if_init(void *arg)
251 {
252 	struct sfxge_softc *sc;
253 
254 	sc = (struct sfxge_softc *)arg;
255 
256 	SFXGE_ADAPTER_LOCK(sc);
257 	(void)sfxge_start(sc);
258 	SFXGE_ADAPTER_UNLOCK(sc);
259 }
260 
261 static void
262 sfxge_stop(struct sfxge_softc *sc)
263 {
264 	SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc);
265 
266 	if (sc->init_state != SFXGE_STARTED)
267 		return;
268 
269 	sc->init_state = SFXGE_REGISTERED;
270 
271 	/* Stop the transmitter. */
272 	sfxge_tx_stop(sc);
273 
274 	/* Stop the receiver. */
275 	sfxge_rx_stop(sc);
276 
277 	/* Stop the port. */
278 	sfxge_port_stop(sc);
279 
280 	/* Stop processing events. */
281 	sfxge_ev_stop(sc);
282 
283 	/* Stop processing interrupts. */
284 	sfxge_intr_stop(sc);
285 
286 	efx_nic_fini(sc->enp);
287 
288 	sc->ifnet->if_drv_flags &= ~IFF_DRV_RUNNING;
289 }
290 
291 
292 static int
293 sfxge_vpd_ioctl(struct sfxge_softc *sc, sfxge_ioc_t *ioc)
294 {
295 	efx_vpd_value_t value;
296 	int rc = 0;
297 
298 	switch (ioc->u.vpd.op) {
299 	case SFXGE_VPD_OP_GET_KEYWORD:
300 		value.evv_tag = ioc->u.vpd.tag;
301 		value.evv_keyword = ioc->u.vpd.keyword;
302 		rc = efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value);
303 		if (rc != 0)
304 			break;
305 		ioc->u.vpd.len = MIN(ioc->u.vpd.len, value.evv_length);
306 		if (ioc->u.vpd.payload != 0) {
307 			rc = copyout(value.evv_value, ioc->u.vpd.payload,
308 				     ioc->u.vpd.len);
309 		}
310 		break;
311 	case SFXGE_VPD_OP_SET_KEYWORD:
312 		if (ioc->u.vpd.len > sizeof(value.evv_value))
313 			return (EINVAL);
314 		value.evv_tag = ioc->u.vpd.tag;
315 		value.evv_keyword = ioc->u.vpd.keyword;
316 		value.evv_length = ioc->u.vpd.len;
317 		rc = copyin(ioc->u.vpd.payload, value.evv_value, value.evv_length);
318 		if (rc != 0)
319 			break;
320 		rc = efx_vpd_set(sc->enp, sc->vpd_data, sc->vpd_size, &value);
321 		if (rc != 0)
322 			break;
323 		rc = efx_vpd_verify(sc->enp, sc->vpd_data, sc->vpd_size);
324 		if (rc != 0)
325 			break;
326 		rc = efx_vpd_write(sc->enp, sc->vpd_data, sc->vpd_size);
327 		break;
328 	default:
329 		rc = EOPNOTSUPP;
330 		break;
331 	}
332 
333 	return (rc);
334 }
335 
336 static int
337 sfxge_private_ioctl(struct sfxge_softc *sc, sfxge_ioc_t *ioc)
338 {
339 	switch (ioc->op) {
340 	case SFXGE_MCDI_IOC:
341 		return (sfxge_mcdi_ioctl(sc, ioc));
342 	case SFXGE_NVRAM_IOC:
343 		return (sfxge_nvram_ioctl(sc, ioc));
344 	case SFXGE_VPD_IOC:
345 		return (sfxge_vpd_ioctl(sc, ioc));
346 	default:
347 		return (EOPNOTSUPP);
348 	}
349 }
350 
351 
352 static int
353 sfxge_if_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
354 {
355 	struct sfxge_softc *sc;
356 	struct ifreq *ifr;
357 	sfxge_ioc_t ioc;
358 	int error;
359 
360 	ifr = (struct ifreq *)data;
361 	sc = ifp->if_softc;
362 	error = 0;
363 
364 	switch (command) {
365 	case SIOCSIFFLAGS:
366 		SFXGE_ADAPTER_LOCK(sc);
367 		if (ifp->if_flags & IFF_UP) {
368 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
369 				if ((ifp->if_flags ^ sc->if_flags) &
370 				    (IFF_PROMISC | IFF_ALLMULTI)) {
371 					sfxge_mac_filter_set(sc);
372 				}
373 			} else
374 				sfxge_start(sc);
375 		} else
376 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
377 				sfxge_stop(sc);
378 		sc->if_flags = ifp->if_flags;
379 		SFXGE_ADAPTER_UNLOCK(sc);
380 		break;
381 	case SIOCSIFMTU:
382 		if (ifr->ifr_mtu == ifp->if_mtu) {
383 			/* Nothing to do */
384 			error = 0;
385 		} else if (ifr->ifr_mtu > SFXGE_MAX_MTU) {
386 			error = EINVAL;
387 		} else if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
388 			ifp->if_mtu = ifr->ifr_mtu;
389 			error = 0;
390 		} else {
391 			/* Restart required */
392 			SFXGE_ADAPTER_LOCK(sc);
393 			sfxge_stop(sc);
394 			ifp->if_mtu = ifr->ifr_mtu;
395 			error = sfxge_start(sc);
396 			SFXGE_ADAPTER_UNLOCK(sc);
397 			if (error != 0) {
398 				ifp->if_flags &= ~IFF_UP;
399 				ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
400 				if_down(ifp);
401 			}
402 		}
403 		break;
404 	case SIOCADDMULTI:
405 	case SIOCDELMULTI:
406 		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
407 			sfxge_mac_filter_set(sc);
408 		break;
409 	case SIOCSIFCAP:
410 	{
411 		int reqcap = ifr->ifr_reqcap;
412 		int capchg_mask;
413 
414 		SFXGE_ADAPTER_LOCK(sc);
415 
416 		/* Capabilities to be changed in accordance with request */
417 		capchg_mask = ifp->if_capenable ^ reqcap;
418 
419 		/*
420 		 * The networking core already rejects attempts to
421 		 * enable capabilities we don't have.  We still have
422 		 * to reject attempts to disable capabilities that we
423 		 * can't (yet) disable.
424 		 */
425 		KASSERT((reqcap & ~ifp->if_capabilities) == 0,
426 		    ("Unsupported capabilities 0x%x requested 0x%x vs "
427 		     "supported 0x%x",
428 		     reqcap & ~ifp->if_capabilities,
429 		     reqcap , ifp->if_capabilities));
430 		if (capchg_mask & SFXGE_CAP_FIXED) {
431 			error = EINVAL;
432 			SFXGE_ADAPTER_UNLOCK(sc);
433 			break;
434 		}
435 
436 		/* Check request before any changes */
437 		if ((capchg_mask & IFCAP_TSO4) &&
438 		    (reqcap & (IFCAP_TSO4 | IFCAP_TXCSUM)) == IFCAP_TSO4) {
439 			error = EAGAIN;
440 			SFXGE_ADAPTER_UNLOCK(sc);
441 			if_printf(ifp, "enable txcsum before tso4\n");
442 			break;
443 		}
444 		if ((capchg_mask & IFCAP_TSO6) &&
445 		    (reqcap & (IFCAP_TSO6 | IFCAP_TXCSUM_IPV6)) == IFCAP_TSO6) {
446 			error = EAGAIN;
447 			SFXGE_ADAPTER_UNLOCK(sc);
448 			if_printf(ifp, "enable txcsum6 before tso6\n");
449 			break;
450 		}
451 
452 		if (reqcap & IFCAP_TXCSUM) {
453 			ifp->if_hwassist |= (CSUM_IP | CSUM_TCP | CSUM_UDP);
454 		} else {
455 			ifp->if_hwassist &= ~(CSUM_IP | CSUM_TCP | CSUM_UDP);
456 			if (reqcap & IFCAP_TSO4) {
457 				reqcap &= ~IFCAP_TSO4;
458 				if_printf(ifp,
459 				    "tso4 disabled due to -txcsum\n");
460 			}
461 		}
462 		if (reqcap & IFCAP_TXCSUM_IPV6) {
463 			ifp->if_hwassist |= (CSUM_TCP_IPV6 | CSUM_UDP_IPV6);
464 		} else {
465 			ifp->if_hwassist &= ~(CSUM_TCP_IPV6 | CSUM_UDP_IPV6);
466 			if (reqcap & IFCAP_TSO6) {
467 				reqcap &= ~IFCAP_TSO6;
468 				if_printf(ifp,
469 				    "tso6 disabled due to -txcsum6\n");
470 			}
471 		}
472 
473 		/*
474 		 * The kernel takes both IFCAP_TSOx and CSUM_TSO into
475 		 * account before using TSO. So, we do not touch
476 		 * checksum flags when IFCAP_TSOx is modified.
477 		 * Note that CSUM_TSO is (CSUM_IP_TSO|CSUM_IP6_TSO),
478 		 * but both bits are set in IPv4 and IPv6 mbufs.
479 		 */
480 
481 		ifp->if_capenable = reqcap;
482 
483 		SFXGE_ADAPTER_UNLOCK(sc);
484 		break;
485 	}
486 	case SIOCSIFMEDIA:
487 	case SIOCGIFMEDIA:
488 		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
489 		break;
490 	case SIOCGPRIVATE_0:
491 		error = priv_check(curthread, PRIV_DRIVER);
492 		if (error != 0)
493 			break;
494 		error = copyin(ifr->ifr_data, &ioc, sizeof(ioc));
495 		if (error != 0)
496 			return (error);
497 		error = sfxge_private_ioctl(sc, &ioc);
498 		if (error == 0) {
499 			error = copyout(&ioc, ifr->ifr_data, sizeof(ioc));
500 		}
501 		break;
502 	default:
503 		error = ether_ioctl(ifp, command, data);
504 	}
505 
506 	return (error);
507 }
508 
509 static void
510 sfxge_ifnet_fini(struct ifnet *ifp)
511 {
512 	struct sfxge_softc *sc = ifp->if_softc;
513 
514 	SFXGE_ADAPTER_LOCK(sc);
515 	sfxge_stop(sc);
516 	SFXGE_ADAPTER_UNLOCK(sc);
517 
518 	ifmedia_removeall(&sc->media);
519 	ether_ifdetach(ifp);
520 	if_free(ifp);
521 }
522 
523 static int
524 sfxge_ifnet_init(struct ifnet *ifp, struct sfxge_softc *sc)
525 {
526 	const efx_nic_cfg_t *encp = efx_nic_cfg_get(sc->enp);
527 	device_t dev;
528 	int rc;
529 
530 	dev = sc->dev;
531 	sc->ifnet = ifp;
532 
533 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
534 	ifp->if_init = sfxge_if_init;
535 	ifp->if_softc = sc;
536 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
537 	ifp->if_ioctl = sfxge_if_ioctl;
538 
539 	ifp->if_capabilities = SFXGE_CAP;
540 	ifp->if_capenable = SFXGE_CAP_ENABLE;
541 
542 #ifdef SFXGE_LRO
543 	ifp->if_capabilities |= IFCAP_LRO;
544 	ifp->if_capenable |= IFCAP_LRO;
545 #endif
546 
547 	if (encp->enc_hw_tx_insert_vlan_enabled) {
548 		ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
549 		ifp->if_capenable |= IFCAP_VLAN_HWTAGGING;
550 	}
551 	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
552 			   CSUM_TCP_IPV6 | CSUM_UDP_IPV6;
553 
554 	ether_ifattach(ifp, encp->enc_mac_addr);
555 
556 	ifp->if_transmit = sfxge_if_transmit;
557 	ifp->if_qflush = sfxge_if_qflush;
558 
559 	ifp->if_get_counter = sfxge_get_counter;
560 
561 	DBGPRINT(sc->dev, "ifmedia_init");
562 	if ((rc = sfxge_port_ifmedia_init(sc)) != 0)
563 		goto fail;
564 
565 	return (0);
566 
567 fail:
568 	ether_ifdetach(sc->ifnet);
569 	return (rc);
570 }
571 
572 void
573 sfxge_sram_buf_tbl_alloc(struct sfxge_softc *sc, size_t n, uint32_t *idp)
574 {
575 	KASSERT(sc->buffer_table_next + n <=
576 		efx_nic_cfg_get(sc->enp)->enc_buftbl_limit,
577 		("buffer table full"));
578 
579 	*idp = sc->buffer_table_next;
580 	sc->buffer_table_next += n;
581 }
582 
583 static int
584 sfxge_bar_init(struct sfxge_softc *sc)
585 {
586 	efsys_bar_t *esbp = &sc->bar;
587 
588 	esbp->esb_rid = PCIR_BAR(EFX_MEM_BAR);
589 	if ((esbp->esb_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
590 	    &esbp->esb_rid, RF_ACTIVE)) == NULL) {
591 		device_printf(sc->dev, "Cannot allocate BAR region %d\n",
592 		    EFX_MEM_BAR);
593 		return (ENXIO);
594 	}
595 	esbp->esb_tag = rman_get_bustag(esbp->esb_res);
596 	esbp->esb_handle = rman_get_bushandle(esbp->esb_res);
597 
598 	SFXGE_BAR_LOCK_INIT(esbp, device_get_nameunit(sc->dev));
599 
600 	return (0);
601 }
602 
603 static void
604 sfxge_bar_fini(struct sfxge_softc *sc)
605 {
606 	efsys_bar_t *esbp = &sc->bar;
607 
608 	bus_release_resource(sc->dev, SYS_RES_MEMORY, esbp->esb_rid,
609 	    esbp->esb_res);
610 	SFXGE_BAR_LOCK_DESTROY(esbp);
611 }
612 
613 static int
614 sfxge_create(struct sfxge_softc *sc)
615 {
616 	device_t dev;
617 	efx_nic_t *enp;
618 	int error;
619 	char rss_param_name[sizeof(SFXGE_PARAM(%d.max_rss_channels))];
620 
621 	dev = sc->dev;
622 
623 	SFXGE_ADAPTER_LOCK_INIT(sc, device_get_nameunit(sc->dev));
624 
625 	sc->max_rss_channels = 0;
626 	snprintf(rss_param_name, sizeof(rss_param_name),
627 		 SFXGE_PARAM(%d.max_rss_channels),
628 		 (int)device_get_unit(dev));
629 	TUNABLE_INT_FETCH(rss_param_name, &sc->max_rss_channels);
630 
631 	sc->stats_node = SYSCTL_ADD_NODE(
632 		device_get_sysctl_ctx(dev),
633 		SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
634 		OID_AUTO, "stats", CTLFLAG_RD, NULL, "Statistics");
635 	if (sc->stats_node == NULL) {
636 		error = ENOMEM;
637 		goto fail;
638 	}
639 
640 	TASK_INIT(&sc->task_reset, 0, sfxge_reset, sc);
641 
642 	(void) pci_enable_busmaster(dev);
643 
644 	/* Initialize DMA mappings. */
645 	DBGPRINT(sc->dev, "dma_init...");
646 	if ((error = sfxge_dma_init(sc)) != 0)
647 		goto fail;
648 
649 	/* Map the device registers. */
650 	DBGPRINT(sc->dev, "bar_init...");
651 	if ((error = sfxge_bar_init(sc)) != 0)
652 		goto fail;
653 
654 	error = efx_family(pci_get_vendor(dev), pci_get_device(dev),
655 	    &sc->family);
656 	KASSERT(error == 0, ("Family should be filtered by sfxge_probe()"));
657 
658 	DBGPRINT(sc->dev, "nic_create...");
659 
660 	/* Create the common code nic object. */
661 	SFXGE_EFSYS_LOCK_INIT(&sc->enp_lock,
662 			      device_get_nameunit(sc->dev), "nic");
663 	if ((error = efx_nic_create(sc->family, (efsys_identifier_t *)sc,
664 	    &sc->bar, &sc->enp_lock, &enp)) != 0)
665 		goto fail3;
666 	sc->enp = enp;
667 
668 	if (!ISP2(sfxge_rx_ring_entries) ||
669 	    (sfxge_rx_ring_entries < EFX_RXQ_MINNDESCS) ||
670 	    (sfxge_rx_ring_entries > EFX_RXQ_MAXNDESCS)) {
671 		log(LOG_ERR, "%s=%d must be power of 2 from %u to %u",
672 		    SFXGE_PARAM_RX_RING, sfxge_rx_ring_entries,
673 		    EFX_RXQ_MINNDESCS, EFX_RXQ_MAXNDESCS);
674 		error = EINVAL;
675 		goto fail_rx_ring_entries;
676 	}
677 	sc->rxq_entries = sfxge_rx_ring_entries;
678 
679 	if (!ISP2(sfxge_tx_ring_entries) ||
680 	    (sfxge_tx_ring_entries < EFX_TXQ_MINNDESCS) ||
681 	    (sfxge_tx_ring_entries > EFX_TXQ_MAXNDESCS(efx_nic_cfg_get(enp)))) {
682 		log(LOG_ERR, "%s=%d must be power of 2 from %u to %u",
683 		    SFXGE_PARAM_TX_RING, sfxge_tx_ring_entries,
684 		    EFX_TXQ_MINNDESCS, EFX_TXQ_MAXNDESCS(efx_nic_cfg_get(enp)));
685 		error = EINVAL;
686 		goto fail_tx_ring_entries;
687 	}
688 	sc->txq_entries = sfxge_tx_ring_entries;
689 
690 	/* Initialize MCDI to talk to the microcontroller. */
691 	DBGPRINT(sc->dev, "mcdi_init...");
692 	if ((error = sfxge_mcdi_init(sc)) != 0)
693 		goto fail4;
694 
695 	/* Probe the NIC and build the configuration data area. */
696 	DBGPRINT(sc->dev, "nic_probe...");
697 	if ((error = efx_nic_probe(enp)) != 0)
698 		goto fail5;
699 
700 	SYSCTL_ADD_STRING(device_get_sysctl_ctx(dev),
701 			  SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
702 			  OID_AUTO, "version", CTLFLAG_RD,
703 			  SFXGE_VERSION_STRING, 0,
704 			  "Driver version");
705 
706 	SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev),
707 			SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
708 			OID_AUTO, "phy_type", CTLFLAG_RD,
709 			NULL, efx_nic_cfg_get(enp)->enc_phy_type,
710 			"PHY type");
711 
712 	/* Initialize the NVRAM. */
713 	DBGPRINT(sc->dev, "nvram_init...");
714 	if ((error = efx_nvram_init(enp)) != 0)
715 		goto fail6;
716 
717 	/* Initialize the VPD. */
718 	DBGPRINT(sc->dev, "vpd_init...");
719 	if ((error = efx_vpd_init(enp)) != 0)
720 		goto fail7;
721 
722 	efx_mcdi_new_epoch(enp);
723 
724 	/* Reset the NIC. */
725 	DBGPRINT(sc->dev, "nic_reset...");
726 	if ((error = efx_nic_reset(enp)) != 0)
727 		goto fail8;
728 
729 	/* Initialize buffer table allocation. */
730 	sc->buffer_table_next = 0;
731 
732 	/*
733 	 * Guarantee minimum and estimate maximum number of event queues
734 	 * to take it into account when MSI-X interrupts are allocated.
735 	 * It initializes NIC and keeps it initialized on success.
736 	 */
737 	if ((error = sfxge_estimate_rsrc_limits(sc)) != 0)
738 		goto fail8;
739 
740 	/* Set up interrupts. */
741 	DBGPRINT(sc->dev, "intr_init...");
742 	if ((error = sfxge_intr_init(sc)) != 0)
743 		goto fail9;
744 
745 	/* Initialize event processing state. */
746 	DBGPRINT(sc->dev, "ev_init...");
747 	if ((error = sfxge_ev_init(sc)) != 0)
748 		goto fail11;
749 
750 	/* Initialize port state. */
751 	DBGPRINT(sc->dev, "port_init...");
752 	if ((error = sfxge_port_init(sc)) != 0)
753 		goto fail12;
754 
755 	/* Initialize receive state. */
756 	DBGPRINT(sc->dev, "rx_init...");
757 	if ((error = sfxge_rx_init(sc)) != 0)
758 		goto fail13;
759 
760 	/* Initialize transmit state. */
761 	DBGPRINT(sc->dev, "tx_init...");
762 	if ((error = sfxge_tx_init(sc)) != 0)
763 		goto fail14;
764 
765 	sc->init_state = SFXGE_INITIALIZED;
766 
767 	DBGPRINT(sc->dev, "success");
768 	return (0);
769 
770 fail14:
771 	sfxge_rx_fini(sc);
772 
773 fail13:
774 	sfxge_port_fini(sc);
775 
776 fail12:
777 	sfxge_ev_fini(sc);
778 
779 fail11:
780 	sfxge_intr_fini(sc);
781 
782 fail9:
783 	efx_nic_fini(sc->enp);
784 
785 fail8:
786 	efx_vpd_fini(enp);
787 
788 fail7:
789 	efx_nvram_fini(enp);
790 
791 fail6:
792 	efx_nic_unprobe(enp);
793 
794 fail5:
795 	sfxge_mcdi_fini(sc);
796 
797 fail4:
798 fail_tx_ring_entries:
799 fail_rx_ring_entries:
800 	sc->enp = NULL;
801 	efx_nic_destroy(enp);
802 	SFXGE_EFSYS_LOCK_DESTROY(&sc->enp_lock);
803 
804 fail3:
805 	sfxge_bar_fini(sc);
806 	(void) pci_disable_busmaster(sc->dev);
807 
808 fail:
809 	DBGPRINT(sc->dev, "failed %d", error);
810 	sc->dev = NULL;
811 	SFXGE_ADAPTER_LOCK_DESTROY(sc);
812 	return (error);
813 }
814 
815 static void
816 sfxge_destroy(struct sfxge_softc *sc)
817 {
818 	efx_nic_t *enp;
819 
820 	/* Clean up transmit state. */
821 	sfxge_tx_fini(sc);
822 
823 	/* Clean up receive state. */
824 	sfxge_rx_fini(sc);
825 
826 	/* Clean up port state. */
827 	sfxge_port_fini(sc);
828 
829 	/* Clean up event processing state. */
830 	sfxge_ev_fini(sc);
831 
832 	/* Clean up interrupts. */
833 	sfxge_intr_fini(sc);
834 
835 	/* Tear down common code subsystems. */
836 	efx_nic_reset(sc->enp);
837 	efx_vpd_fini(sc->enp);
838 	efx_nvram_fini(sc->enp);
839 	efx_nic_unprobe(sc->enp);
840 
841 	/* Tear down MCDI. */
842 	sfxge_mcdi_fini(sc);
843 
844 	/* Destroy common code context. */
845 	enp = sc->enp;
846 	sc->enp = NULL;
847 	efx_nic_destroy(enp);
848 
849 	/* Free DMA memory. */
850 	sfxge_dma_fini(sc);
851 
852 	/* Free mapped BARs. */
853 	sfxge_bar_fini(sc);
854 
855 	(void) pci_disable_busmaster(sc->dev);
856 
857 	taskqueue_drain(taskqueue_thread, &sc->task_reset);
858 
859 	/* Destroy the softc lock. */
860 	SFXGE_ADAPTER_LOCK_DESTROY(sc);
861 }
862 
863 static int
864 sfxge_vpd_handler(SYSCTL_HANDLER_ARGS)
865 {
866 	struct sfxge_softc *sc = arg1;
867 	efx_vpd_value_t value;
868 	int rc;
869 
870 	value.evv_tag = arg2 >> 16;
871 	value.evv_keyword = arg2 & 0xffff;
872 	if ((rc = efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value))
873 	    != 0)
874 		return (rc);
875 
876 	return (SYSCTL_OUT(req, value.evv_value, value.evv_length));
877 }
878 
879 static void
880 sfxge_vpd_try_add(struct sfxge_softc *sc, struct sysctl_oid_list *list,
881 		  efx_vpd_tag_t tag, const char *keyword)
882 {
883 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
884 	efx_vpd_value_t value;
885 
886 	/* Check whether VPD tag/keyword is present */
887 	value.evv_tag = tag;
888 	value.evv_keyword = EFX_VPD_KEYWORD(keyword[0], keyword[1]);
889 	if (efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value) != 0)
890 		return;
891 
892 	SYSCTL_ADD_PROC(
893 		ctx, list, OID_AUTO, keyword, CTLTYPE_STRING|CTLFLAG_RD,
894 		sc, tag << 16 | EFX_VPD_KEYWORD(keyword[0], keyword[1]),
895 		sfxge_vpd_handler, "A", "");
896 }
897 
898 static int
899 sfxge_vpd_init(struct sfxge_softc *sc)
900 {
901 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
902 	struct sysctl_oid *vpd_node;
903 	struct sysctl_oid_list *vpd_list;
904 	char keyword[3];
905 	efx_vpd_value_t value;
906 	int rc;
907 
908 	if ((rc = efx_vpd_size(sc->enp, &sc->vpd_size)) != 0) {
909 		/*
910 		 * Unpriviledged functions deny VPD access.
911 		 * Simply skip VPD in this case.
912 		 */
913 		if (rc == EACCES)
914 			goto done;
915 		goto fail;
916 	}
917 	sc->vpd_data = malloc(sc->vpd_size, M_SFXGE, M_WAITOK);
918 	if ((rc = efx_vpd_read(sc->enp, sc->vpd_data, sc->vpd_size)) != 0)
919 		goto fail2;
920 
921 	/* Copy ID (product name) into device description, and log it. */
922 	value.evv_tag = EFX_VPD_ID;
923 	if (efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value) == 0) {
924 		value.evv_value[value.evv_length] = 0;
925 		device_set_desc_copy(sc->dev, value.evv_value);
926 		device_printf(sc->dev, "%s\n", value.evv_value);
927 	}
928 
929 	vpd_node = SYSCTL_ADD_NODE(
930 		ctx, SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)),
931 		OID_AUTO, "vpd", CTLFLAG_RD, NULL, "Vital Product Data");
932 	vpd_list = SYSCTL_CHILDREN(vpd_node);
933 
934 	/* Add sysctls for all expected and any vendor-defined keywords. */
935 	sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "PN");
936 	sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "EC");
937 	sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "SN");
938 	keyword[0] = 'V';
939 	keyword[2] = 0;
940 	for (keyword[1] = '0'; keyword[1] <= '9'; keyword[1]++)
941 		sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, keyword);
942 	for (keyword[1] = 'A'; keyword[1] <= 'Z'; keyword[1]++)
943 		sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, keyword);
944 
945 done:
946 	return (0);
947 
948 fail2:
949 	free(sc->vpd_data, M_SFXGE);
950 fail:
951 	return (rc);
952 }
953 
954 static void
955 sfxge_vpd_fini(struct sfxge_softc *sc)
956 {
957 	free(sc->vpd_data, M_SFXGE);
958 }
959 
960 static void
961 sfxge_reset(void *arg, int npending)
962 {
963 	struct sfxge_softc *sc;
964 	int rc;
965 	unsigned attempt;
966 
967 	(void)npending;
968 
969 	sc = (struct sfxge_softc *)arg;
970 
971 	SFXGE_ADAPTER_LOCK(sc);
972 
973 	if (sc->init_state != SFXGE_STARTED)
974 		goto done;
975 
976 	sfxge_stop(sc);
977 	efx_nic_reset(sc->enp);
978 	for (attempt = 0; attempt < 3; ++attempt) {
979 		if ((rc = sfxge_start(sc)) == 0)
980 			goto done;
981 
982 		device_printf(sc->dev, "start on reset failed (%d)\n", rc);
983 		DELAY(100000);
984 	}
985 
986 	device_printf(sc->dev, "reset failed; interface is now stopped\n");
987 
988 done:
989 	SFXGE_ADAPTER_UNLOCK(sc);
990 }
991 
992 void
993 sfxge_schedule_reset(struct sfxge_softc *sc)
994 {
995 	taskqueue_enqueue(taskqueue_thread, &sc->task_reset);
996 }
997 
998 static int
999 sfxge_attach(device_t dev)
1000 {
1001 	struct sfxge_softc *sc;
1002 	struct ifnet *ifp;
1003 	int error;
1004 
1005 	sc = device_get_softc(dev);
1006 	sc->dev = dev;
1007 
1008 	/* Allocate ifnet. */
1009 	ifp = if_alloc(IFT_ETHER);
1010 	if (ifp == NULL) {
1011 		device_printf(dev, "Couldn't allocate ifnet\n");
1012 		error = ENOMEM;
1013 		goto fail;
1014 	}
1015 	sc->ifnet = ifp;
1016 
1017 	/* Initialize hardware. */
1018 	DBGPRINT(sc->dev, "create nic");
1019 	if ((error = sfxge_create(sc)) != 0)
1020 		goto fail2;
1021 
1022 	/* Create the ifnet for the port. */
1023 	DBGPRINT(sc->dev, "init ifnet");
1024 	if ((error = sfxge_ifnet_init(ifp, sc)) != 0)
1025 		goto fail3;
1026 
1027 	DBGPRINT(sc->dev, "init vpd");
1028 	if ((error = sfxge_vpd_init(sc)) != 0)
1029 		goto fail4;
1030 
1031 	/*
1032 	 * NIC is initialized inside sfxge_create() and kept inialized
1033 	 * to be able to initialize port to discover media types in
1034 	 * sfxge_ifnet_init().
1035 	 */
1036 	efx_nic_fini(sc->enp);
1037 
1038 	sc->init_state = SFXGE_REGISTERED;
1039 
1040 	DBGPRINT(sc->dev, "success");
1041 	return (0);
1042 
1043 fail4:
1044 	sfxge_ifnet_fini(ifp);
1045 fail3:
1046 	efx_nic_fini(sc->enp);
1047 	sfxge_destroy(sc);
1048 
1049 fail2:
1050 	if_free(sc->ifnet);
1051 
1052 fail:
1053 	DBGPRINT(sc->dev, "failed %d", error);
1054 	return (error);
1055 }
1056 
1057 static int
1058 sfxge_detach(device_t dev)
1059 {
1060 	struct sfxge_softc *sc;
1061 
1062 	sc = device_get_softc(dev);
1063 
1064 	sfxge_vpd_fini(sc);
1065 
1066 	/* Destroy the ifnet. */
1067 	sfxge_ifnet_fini(sc->ifnet);
1068 
1069 	/* Tear down hardware. */
1070 	sfxge_destroy(sc);
1071 
1072 	return (0);
1073 }
1074 
1075 static int
1076 sfxge_probe(device_t dev)
1077 {
1078 	uint16_t pci_vendor_id;
1079 	uint16_t pci_device_id;
1080 	efx_family_t family;
1081 	int rc;
1082 
1083 	pci_vendor_id = pci_get_vendor(dev);
1084 	pci_device_id = pci_get_device(dev);
1085 
1086 	DBGPRINT(dev, "PCI ID %04x:%04x", pci_vendor_id, pci_device_id);
1087 	rc = efx_family(pci_vendor_id, pci_device_id, &family);
1088 	if (rc != 0) {
1089 		DBGPRINT(dev, "efx_family fail %d", rc);
1090 		return (ENXIO);
1091 	}
1092 
1093 	if (family == EFX_FAMILY_SIENA) {
1094 		device_set_desc(dev, "Solarflare SFC9000 family");
1095 		return (0);
1096 	}
1097 
1098 	if (family == EFX_FAMILY_HUNTINGTON) {
1099 		device_set_desc(dev, "Solarflare SFC9100 family");
1100 		return (0);
1101 	}
1102 
1103 	DBGPRINT(dev, "impossible controller family %d", family);
1104 	return (ENXIO);
1105 }
1106 
1107 static device_method_t sfxge_methods[] = {
1108 	DEVMETHOD(device_probe,		sfxge_probe),
1109 	DEVMETHOD(device_attach,	sfxge_attach),
1110 	DEVMETHOD(device_detach,	sfxge_detach),
1111 
1112 	DEVMETHOD_END
1113 };
1114 
1115 static devclass_t sfxge_devclass;
1116 
1117 static driver_t sfxge_driver = {
1118 	"sfxge",
1119 	sfxge_methods,
1120 	sizeof(struct sfxge_softc)
1121 };
1122 
1123 DRIVER_MODULE(sfxge, pci, sfxge_driver, sfxge_devclass, 0, 0);
1124