xref: /freebsd/sys/dev/sfxge/sfxge_port.c (revision ab00ac327a66a53edaac95b536b209db3ae2cd9f)
1 /*-
2  * Copyright (c) 2010-2016 Solarflare Communications Inc.
3  * All rights reserved.
4  *
5  * This software was developed in part by Philip Paeps under contract for
6  * Solarflare Communications, Inc.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright notice,
12  *    this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  *    this list of conditions and the following disclaimer in the documentation
15  *    and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
21  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
24  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  *
29  * The views and conclusions contained in the software and documentation are
30  * those of the authors and should not be interpreted as representing official
31  * policies, either expressed or implied, of the FreeBSD Project.
32  */
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 #include <sys/types.h>
38 #include <sys/limits.h>
39 #include <net/ethernet.h>
40 #include <net/if_dl.h>
41 
42 #include "common/efx.h"
43 
44 #include "sfxge.h"
45 
46 #define	SFXGE_PARAM_STATS_UPDATE_PERIOD_MS \
47 	SFXGE_PARAM(stats_update_period_ms)
48 static int sfxge_stats_update_period_ms = SFXGE_STATS_UPDATE_PERIOD_MS;
49 TUNABLE_INT(SFXGE_PARAM_STATS_UPDATE_PERIOD_MS,
50 	    &sfxge_stats_update_period_ms);
51 SYSCTL_INT(_hw_sfxge, OID_AUTO, stats_update_period_ms, CTLFLAG_RDTUN,
52 	   &sfxge_stats_update_period_ms, 0,
53 	   "netstat interface statistics update period in milliseconds");
54 
55 static int sfxge_phy_cap_mask(struct sfxge_softc *, int, uint32_t *);
56 
57 static int
58 sfxge_mac_stat_update(struct sfxge_softc *sc)
59 {
60 	struct sfxge_port *port = &sc->port;
61 	efsys_mem_t *esmp = &(port->mac_stats.dma_buf);
62 	clock_t now;
63 	unsigned int min_ticks;
64 	unsigned int count;
65 	int rc;
66 
67 	SFXGE_PORT_LOCK_ASSERT_OWNED(port);
68 
69 	if (__predict_false(port->init_state != SFXGE_PORT_STARTED)) {
70 		rc = 0;
71 		goto out;
72 	}
73 
74 	min_ticks = (unsigned int)hz * port->stats_update_period_ms / 1000;
75 
76 	now = ticks;
77 	if ((unsigned int)(now - port->mac_stats.update_time) < min_ticks) {
78 		rc = 0;
79 		goto out;
80 	}
81 
82 	port->mac_stats.update_time = now;
83 
84 	/* If we're unlucky enough to read statistics wduring the DMA, wait
85 	 * up to 10ms for it to finish (typically takes <500us) */
86 	for (count = 0; count < 100; ++count) {
87 		EFSYS_PROBE1(wait, unsigned int, count);
88 
89 		/* Try to update the cached counters */
90 		if ((rc = efx_mac_stats_update(sc->enp, esmp,
91 		    port->mac_stats.decode_buf, NULL)) != EAGAIN)
92 			goto out;
93 
94 		DELAY(100);
95 	}
96 
97 	rc = ETIMEDOUT;
98 out:
99 	return (rc);
100 }
101 
102 uint64_t
103 sfxge_get_counter(struct ifnet *ifp, ift_counter c)
104 {
105 	struct sfxge_softc *sc = ifp->if_softc;
106 	uint64_t *mac_stats;
107 	uint64_t val;
108 
109 	SFXGE_PORT_LOCK(&sc->port);
110 
111 	/* Ignore error and use old values */
112 	(void)sfxge_mac_stat_update(sc);
113 
114 	mac_stats = (uint64_t *)sc->port.mac_stats.decode_buf;
115 
116 	switch (c) {
117 	case IFCOUNTER_IPACKETS:
118 		val = mac_stats[EFX_MAC_RX_PKTS];
119 		break;
120 	case IFCOUNTER_IERRORS:
121 		val = mac_stats[EFX_MAC_RX_ERRORS];
122 		break;
123 	case IFCOUNTER_OPACKETS:
124 		val = mac_stats[EFX_MAC_TX_PKTS];
125 		break;
126 	case IFCOUNTER_OERRORS:
127 		val = mac_stats[EFX_MAC_TX_ERRORS];
128 		break;
129 	case IFCOUNTER_COLLISIONS:
130 		val = mac_stats[EFX_MAC_TX_SGL_COL_PKTS] +
131 		      mac_stats[EFX_MAC_TX_MULT_COL_PKTS] +
132 		      mac_stats[EFX_MAC_TX_EX_COL_PKTS] +
133 		      mac_stats[EFX_MAC_TX_LATE_COL_PKTS];
134 		break;
135 	case IFCOUNTER_IBYTES:
136 		val = mac_stats[EFX_MAC_RX_OCTETS];
137 		break;
138 	case IFCOUNTER_OBYTES:
139 		val = mac_stats[EFX_MAC_TX_OCTETS];
140 		break;
141 	case IFCOUNTER_OMCASTS:
142 		val = mac_stats[EFX_MAC_TX_MULTICST_PKTS] +
143 		      mac_stats[EFX_MAC_TX_BRDCST_PKTS];
144 		break;
145 	case IFCOUNTER_OQDROPS:
146 		SFXGE_PORT_UNLOCK(&sc->port);
147 		return (sfxge_tx_get_drops(sc));
148 	case IFCOUNTER_IMCASTS:
149 		/* if_imcasts is maintained in net/if_ethersubr.c */
150 	case IFCOUNTER_IQDROPS:
151 		/* if_iqdrops is maintained in net/if_ethersubr.c */
152 	case IFCOUNTER_NOPROTO:
153 		/* if_noproto is maintained in net/if_ethersubr.c */
154 	default:
155 		SFXGE_PORT_UNLOCK(&sc->port);
156 		return (if_get_counter_default(ifp, c));
157 	}
158 
159 	SFXGE_PORT_UNLOCK(&sc->port);
160 
161 	return (val);
162 }
163 
164 static int
165 sfxge_mac_stat_handler(SYSCTL_HANDLER_ARGS)
166 {
167 	struct sfxge_softc *sc = arg1;
168 	unsigned int id = arg2;
169 	int rc;
170 	uint64_t val;
171 
172 	SFXGE_PORT_LOCK(&sc->port);
173 	if ((rc = sfxge_mac_stat_update(sc)) == 0)
174 		val = ((uint64_t *)sc->port.mac_stats.decode_buf)[id];
175 	SFXGE_PORT_UNLOCK(&sc->port);
176 
177 	if (rc == 0)
178 		rc = SYSCTL_OUT(req, &val, sizeof(val));
179 	return (rc);
180 }
181 
182 static void
183 sfxge_mac_stat_init(struct sfxge_softc *sc)
184 {
185 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
186 	struct sysctl_oid_list *stat_list;
187 	unsigned int id;
188 	const char *name;
189 
190 	stat_list = SYSCTL_CHILDREN(sc->stats_node);
191 
192 	/* Initialise the named stats */
193 	for (id = 0; id < EFX_MAC_NSTATS; id++) {
194 		name = efx_mac_stat_name(sc->enp, id);
195 		SYSCTL_ADD_PROC(
196 			ctx, stat_list,
197 			OID_AUTO, name, CTLTYPE_U64|CTLFLAG_RD,
198 			sc, id, sfxge_mac_stat_handler, "Q",
199 			"");
200 	}
201 }
202 
203 #ifdef SFXGE_HAVE_PAUSE_MEDIAOPTS
204 
205 static unsigned int
206 sfxge_port_wanted_fc(struct sfxge_softc *sc)
207 {
208 	struct ifmedia_entry *ifm = sc->media.ifm_cur;
209 
210 	if (ifm->ifm_media == (IFM_ETHER | IFM_AUTO))
211 		return (EFX_FCNTL_RESPOND | EFX_FCNTL_GENERATE);
212 	return (((ifm->ifm_media & IFM_ETH_RXPAUSE) ? EFX_FCNTL_RESPOND : 0) |
213 		((ifm->ifm_media & IFM_ETH_TXPAUSE) ? EFX_FCNTL_GENERATE : 0));
214 }
215 
216 static unsigned int
217 sfxge_port_link_fc_ifm(struct sfxge_softc *sc)
218 {
219 	unsigned int wanted_fc, link_fc;
220 
221 	efx_mac_fcntl_get(sc->enp, &wanted_fc, &link_fc);
222 	return ((link_fc & EFX_FCNTL_RESPOND) ? IFM_ETH_RXPAUSE : 0) |
223 		((link_fc & EFX_FCNTL_GENERATE) ? IFM_ETH_TXPAUSE : 0);
224 }
225 
226 #else /* !SFXGE_HAVE_PAUSE_MEDIAOPTS */
227 
228 static unsigned int
229 sfxge_port_wanted_fc(struct sfxge_softc *sc)
230 {
231 	return (sc->port.wanted_fc);
232 }
233 
234 static unsigned int
235 sfxge_port_link_fc_ifm(struct sfxge_softc *sc)
236 {
237 	return (0);
238 }
239 
240 static int
241 sfxge_port_wanted_fc_handler(SYSCTL_HANDLER_ARGS)
242 {
243 	struct sfxge_softc *sc;
244 	struct sfxge_port *port;
245 	unsigned int fcntl;
246 	int error;
247 
248 	sc = arg1;
249 	port = &sc->port;
250 
251 	if (req->newptr != NULL) {
252 		if ((error = SYSCTL_IN(req, &fcntl, sizeof(fcntl))) != 0)
253 			return (error);
254 
255 		SFXGE_PORT_LOCK(port);
256 
257 		if (port->wanted_fc != fcntl) {
258 			if (port->init_state == SFXGE_PORT_STARTED)
259 				error = efx_mac_fcntl_set(sc->enp,
260 							  port->wanted_fc,
261 							  B_TRUE);
262 			if (error == 0)
263 				port->wanted_fc = fcntl;
264 		}
265 
266 		SFXGE_PORT_UNLOCK(port);
267 	} else {
268 		SFXGE_PORT_LOCK(port);
269 		fcntl = port->wanted_fc;
270 		SFXGE_PORT_UNLOCK(port);
271 
272 		error = SYSCTL_OUT(req, &fcntl, sizeof(fcntl));
273 	}
274 
275 	return (error);
276 }
277 
278 static int
279 sfxge_port_link_fc_handler(SYSCTL_HANDLER_ARGS)
280 {
281 	struct sfxge_softc *sc;
282 	struct sfxge_port *port;
283 	unsigned int wanted_fc, link_fc;
284 
285 	sc = arg1;
286 	port = &sc->port;
287 
288 	SFXGE_PORT_LOCK(port);
289 	if (__predict_true(port->init_state == SFXGE_PORT_STARTED) &&
290 	    SFXGE_LINK_UP(sc))
291 		efx_mac_fcntl_get(sc->enp, &wanted_fc, &link_fc);
292 	else
293 		link_fc = 0;
294 	SFXGE_PORT_UNLOCK(port);
295 
296 	return (SYSCTL_OUT(req, &link_fc, sizeof(link_fc)));
297 }
298 
299 #endif /* SFXGE_HAVE_PAUSE_MEDIAOPTS */
300 
301 static const uint64_t sfxge_link_baudrate[EFX_LINK_NMODES] = {
302 	[EFX_LINK_10HDX]	= IF_Mbps(10),
303 	[EFX_LINK_10FDX]	= IF_Mbps(10),
304 	[EFX_LINK_100HDX]	= IF_Mbps(100),
305 	[EFX_LINK_100FDX]	= IF_Mbps(100),
306 	[EFX_LINK_1000HDX]	= IF_Gbps(1),
307 	[EFX_LINK_1000FDX]	= IF_Gbps(1),
308 	[EFX_LINK_10000FDX]	= IF_Gbps(10),
309 	[EFX_LINK_40000FDX]	= IF_Gbps(40),
310 };
311 
312 void
313 sfxge_mac_link_update(struct sfxge_softc *sc, efx_link_mode_t mode)
314 {
315 	struct sfxge_port *port;
316 	int link_state;
317 
318 	port = &sc->port;
319 
320 	if (port->link_mode == mode)
321 		return;
322 
323 	port->link_mode = mode;
324 
325 	/* Push link state update to the OS */
326 	link_state = (SFXGE_LINK_UP(sc) ? LINK_STATE_UP : LINK_STATE_DOWN);
327 	sc->ifnet->if_baudrate = sfxge_link_baudrate[port->link_mode];
328 	if_link_state_change(sc->ifnet, link_state);
329 }
330 
331 static void
332 sfxge_mac_poll_work(void *arg, int npending)
333 {
334 	struct sfxge_softc *sc;
335 	efx_nic_t *enp;
336 	struct sfxge_port *port;
337 	efx_link_mode_t mode;
338 
339 	sc = (struct sfxge_softc *)arg;
340 	enp = sc->enp;
341 	port = &sc->port;
342 
343 	SFXGE_PORT_LOCK(port);
344 
345 	if (__predict_false(port->init_state != SFXGE_PORT_STARTED))
346 		goto done;
347 
348 	/* This may sleep waiting for MCDI completion */
349 	(void)efx_port_poll(enp, &mode);
350 	sfxge_mac_link_update(sc, mode);
351 
352 done:
353 	SFXGE_PORT_UNLOCK(port);
354 }
355 
356 static int
357 sfxge_mac_multicast_list_set(struct sfxge_softc *sc)
358 {
359 	struct ifnet *ifp = sc->ifnet;
360 	struct sfxge_port *port = &sc->port;
361 	uint8_t *mcast_addr = port->mcast_addrs;
362 	struct ifmultiaddr *ifma;
363 	struct sockaddr_dl *sa;
364 	int rc = 0;
365 
366 	mtx_assert(&port->lock, MA_OWNED);
367 
368 	port->mcast_count = 0;
369 	if_maddr_rlock(ifp);
370 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
371 		if (ifma->ifma_addr->sa_family == AF_LINK) {
372 			if (port->mcast_count == EFX_MAC_MULTICAST_LIST_MAX) {
373 				device_printf(sc->dev,
374 				    "Too many multicast addresses\n");
375 				rc = EINVAL;
376 				break;
377 			}
378 
379 			sa = (struct sockaddr_dl *)ifma->ifma_addr;
380 			memcpy(mcast_addr, LLADDR(sa), EFX_MAC_ADDR_LEN);
381 			mcast_addr += EFX_MAC_ADDR_LEN;
382 			++port->mcast_count;
383 		}
384 	}
385 	if_maddr_runlock(ifp);
386 
387 	if (rc == 0) {
388 		rc = efx_mac_multicast_list_set(sc->enp, port->mcast_addrs,
389 						port->mcast_count);
390 		if (rc != 0)
391 			device_printf(sc->dev,
392 			    "Cannot set multicast address list\n");
393 	}
394 
395 	return (rc);
396 }
397 
398 static int
399 sfxge_mac_filter_set_locked(struct sfxge_softc *sc)
400 {
401 	struct ifnet *ifp = sc->ifnet;
402 	struct sfxge_port *port = &sc->port;
403 	boolean_t all_mulcst;
404 	int rc;
405 
406 	mtx_assert(&port->lock, MA_OWNED);
407 
408 	all_mulcst = !!(ifp->if_flags & (IFF_PROMISC | IFF_ALLMULTI));
409 
410 	rc = sfxge_mac_multicast_list_set(sc);
411 	/* Fallback to all multicast if cannot set multicast list */
412 	if (rc != 0)
413 		all_mulcst = B_TRUE;
414 
415 	rc = efx_mac_filter_set(sc->enp, !!(ifp->if_flags & IFF_PROMISC),
416 				(port->mcast_count > 0), all_mulcst, B_TRUE);
417 
418 	return (rc);
419 }
420 
421 int
422 sfxge_mac_filter_set(struct sfxge_softc *sc)
423 {
424 	struct sfxge_port *port = &sc->port;
425 	int rc;
426 
427 	SFXGE_PORT_LOCK(port);
428 	/*
429 	 * The function may be called without softc_lock held in the
430 	 * case of SIOCADDMULTI and SIOCDELMULTI ioctls. ioctl handler
431 	 * checks IFF_DRV_RUNNING flag which implies port started, but
432 	 * it is not guaranteed to remain. softc_lock shared lock can't
433 	 * be held in the case of these ioctls processing, since it
434 	 * results in failure where kernel complains that non-sleepable
435 	 * lock is held in sleeping thread. Both problems are repeatable
436 	 * on LAG with LACP proto bring up.
437 	 */
438 	if (__predict_true(port->init_state == SFXGE_PORT_STARTED))
439 		rc = sfxge_mac_filter_set_locked(sc);
440 	else
441 		rc = 0;
442 	SFXGE_PORT_UNLOCK(port);
443 	return (rc);
444 }
445 
446 void
447 sfxge_port_stop(struct sfxge_softc *sc)
448 {
449 	struct sfxge_port *port;
450 	efx_nic_t *enp;
451 
452 	port = &sc->port;
453 	enp = sc->enp;
454 
455 	SFXGE_PORT_LOCK(port);
456 
457 	KASSERT(port->init_state == SFXGE_PORT_STARTED,
458 	    ("port not started"));
459 
460 	port->init_state = SFXGE_PORT_INITIALIZED;
461 
462 	port->mac_stats.update_time = 0;
463 
464 	/* This may call MCDI */
465 	(void)efx_mac_drain(enp, B_TRUE);
466 
467 	(void)efx_mac_stats_periodic(enp, &port->mac_stats.dma_buf, 0, B_FALSE);
468 
469 	port->link_mode = EFX_LINK_UNKNOWN;
470 
471 	/* Destroy the common code port object. */
472 	efx_port_fini(enp);
473 
474 	efx_filter_fini(enp);
475 
476 	SFXGE_PORT_UNLOCK(port);
477 }
478 
479 int
480 sfxge_port_start(struct sfxge_softc *sc)
481 {
482 	uint8_t mac_addr[ETHER_ADDR_LEN];
483 	struct ifnet *ifp = sc->ifnet;
484 	struct sfxge_port *port;
485 	efx_nic_t *enp;
486 	size_t pdu;
487 	int rc;
488 	uint32_t phy_cap_mask;
489 
490 	port = &sc->port;
491 	enp = sc->enp;
492 
493 	SFXGE_PORT_LOCK(port);
494 
495 	KASSERT(port->init_state == SFXGE_PORT_INITIALIZED,
496 	    ("port not initialized"));
497 
498 	/* Initialise the required filtering */
499 	if ((rc = efx_filter_init(enp)) != 0)
500 		goto fail_filter_init;
501 
502 	/* Initialize the port object in the common code. */
503 	if ((rc = efx_port_init(sc->enp)) != 0)
504 		goto fail;
505 
506 	/* Set the SDU */
507 	pdu = EFX_MAC_PDU(ifp->if_mtu);
508 	if ((rc = efx_mac_pdu_set(enp, pdu)) != 0)
509 		goto fail2;
510 
511 	if ((rc = efx_mac_fcntl_set(enp, sfxge_port_wanted_fc(sc), B_TRUE))
512 	    != 0)
513 		goto fail3;
514 
515 	/* Set the unicast address */
516 	if_addr_rlock(ifp);
517 	bcopy(LLADDR((struct sockaddr_dl *)ifp->if_addr->ifa_addr),
518 	      mac_addr, sizeof(mac_addr));
519 	if_addr_runlock(ifp);
520 	if ((rc = efx_mac_addr_set(enp, mac_addr)) != 0)
521 		goto fail4;
522 
523 	sfxge_mac_filter_set_locked(sc);
524 
525 	/* Update MAC stats by DMA every period */
526 	if ((rc = efx_mac_stats_periodic(enp, &port->mac_stats.dma_buf,
527 					 port->stats_update_period_ms,
528 					 B_FALSE)) != 0)
529 		goto fail6;
530 
531 	if ((rc = efx_mac_drain(enp, B_FALSE)) != 0)
532 		goto fail8;
533 
534 	if ((rc = sfxge_phy_cap_mask(sc, sc->media.ifm_cur->ifm_media,
535 				     &phy_cap_mask)) != 0)
536 		goto fail9;
537 
538 	if ((rc = efx_phy_adv_cap_set(sc->enp, phy_cap_mask)) != 0)
539 		goto fail10;
540 
541 	port->init_state = SFXGE_PORT_STARTED;
542 
543 	/* Single poll in case there were missing initial events */
544 	SFXGE_PORT_UNLOCK(port);
545 	sfxge_mac_poll_work(sc, 0);
546 
547 	return (0);
548 
549 fail10:
550 fail9:
551 	(void)efx_mac_drain(enp, B_TRUE);
552 fail8:
553 	(void)efx_mac_stats_periodic(enp, &port->mac_stats.dma_buf, 0, B_FALSE);
554 fail6:
555 fail4:
556 fail3:
557 
558 fail2:
559 	efx_port_fini(enp);
560 fail:
561 	efx_filter_fini(enp);
562 fail_filter_init:
563 	SFXGE_PORT_UNLOCK(port);
564 
565 	return (rc);
566 }
567 
568 static int
569 sfxge_phy_stat_update(struct sfxge_softc *sc)
570 {
571 	struct sfxge_port *port = &sc->port;
572 	efsys_mem_t *esmp = &port->phy_stats.dma_buf;
573 	clock_t now;
574 	unsigned int count;
575 	int rc;
576 
577 	SFXGE_PORT_LOCK_ASSERT_OWNED(port);
578 
579 	if (__predict_false(port->init_state != SFXGE_PORT_STARTED)) {
580 		rc = 0;
581 		goto out;
582 	}
583 
584 	now = ticks;
585 	if ((unsigned int)(now - port->phy_stats.update_time) < (unsigned int)hz) {
586 		rc = 0;
587 		goto out;
588 	}
589 
590 	port->phy_stats.update_time = now;
591 
592 	/* If we're unlucky enough to read statistics wduring the DMA, wait
593 	 * up to 10ms for it to finish (typically takes <500us) */
594 	for (count = 0; count < 100; ++count) {
595 		EFSYS_PROBE1(wait, unsigned int, count);
596 
597 		/* Synchronize the DMA memory for reading */
598 		bus_dmamap_sync(esmp->esm_tag, esmp->esm_map,
599 		    BUS_DMASYNC_POSTREAD);
600 
601 		/* Try to update the cached counters */
602 		if ((rc = efx_phy_stats_update(sc->enp, esmp,
603 		    port->phy_stats.decode_buf)) != EAGAIN)
604 			goto out;
605 
606 		DELAY(100);
607 	}
608 
609 	rc = ETIMEDOUT;
610 out:
611 	return (rc);
612 }
613 
614 static int
615 sfxge_phy_stat_handler(SYSCTL_HANDLER_ARGS)
616 {
617 	struct sfxge_softc *sc = arg1;
618 	unsigned int id = arg2;
619 	int rc;
620 	uint32_t val;
621 
622 	SFXGE_PORT_LOCK(&sc->port);
623 	if ((rc = sfxge_phy_stat_update(sc)) == 0)
624 		val = ((uint32_t *)sc->port.phy_stats.decode_buf)[id];
625 	SFXGE_PORT_UNLOCK(&sc->port);
626 
627 	if (rc == 0)
628 		rc = SYSCTL_OUT(req, &val, sizeof(val));
629 	return (rc);
630 }
631 
632 static void
633 sfxge_phy_stat_init(struct sfxge_softc *sc)
634 {
635 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
636 	struct sysctl_oid_list *stat_list;
637 	unsigned int id;
638 	const char *name;
639 	uint64_t stat_mask = efx_nic_cfg_get(sc->enp)->enc_phy_stat_mask;
640 
641 	stat_list = SYSCTL_CHILDREN(sc->stats_node);
642 
643 	/* Initialise the named stats */
644 	for (id = 0; id < EFX_PHY_NSTATS; id++) {
645 		if (!(stat_mask & ((uint64_t)1 << id)))
646 			continue;
647 		name = efx_phy_stat_name(sc->enp, id);
648 		SYSCTL_ADD_PROC(
649 			ctx, stat_list,
650 			OID_AUTO, name, CTLTYPE_UINT|CTLFLAG_RD,
651 			sc, id, sfxge_phy_stat_handler,
652 			id == EFX_PHY_STAT_OUI ? "IX" : "IU",
653 			"");
654 	}
655 }
656 
657 void
658 sfxge_port_fini(struct sfxge_softc *sc)
659 {
660 	struct sfxge_port *port;
661 	efsys_mem_t *esmp;
662 
663 	port = &sc->port;
664 	esmp = &port->mac_stats.dma_buf;
665 
666 	KASSERT(port->init_state == SFXGE_PORT_INITIALIZED,
667 	    ("Port not initialized"));
668 
669 	port->init_state = SFXGE_PORT_UNINITIALIZED;
670 
671 	port->link_mode = EFX_LINK_UNKNOWN;
672 
673 	/* Finish with PHY DMA memory */
674 	sfxge_dma_free(&port->phy_stats.dma_buf);
675 	free(port->phy_stats.decode_buf, M_SFXGE);
676 
677 	sfxge_dma_free(esmp);
678 	free(port->mac_stats.decode_buf, M_SFXGE);
679 
680 	SFXGE_PORT_LOCK_DESTROY(port);
681 
682 	port->sc = NULL;
683 }
684 
685 static uint16_t
686 sfxge_port_stats_update_period_ms(struct sfxge_softc *sc)
687 {
688 	int period_ms = sfxge_stats_update_period_ms;
689 
690 	if (period_ms < 0) {
691 		device_printf(sc->dev,
692 			"treat negative stats update period %d as 0 (disable)\n",
693 			 period_ms);
694 		period_ms = 0;
695 	} else if (period_ms > UINT16_MAX) {
696 		device_printf(sc->dev,
697 			"treat too big stats update period %d as %u\n",
698 			period_ms, UINT16_MAX);
699 		period_ms = UINT16_MAX;
700 	}
701 
702 	return period_ms;
703 }
704 
705 static int
706 sfxge_port_stats_update_period_ms_handler(SYSCTL_HANDLER_ARGS)
707 {
708 	struct sfxge_softc *sc;
709 	struct sfxge_port *port;
710 	unsigned int period_ms;
711 	int error;
712 
713 	sc = arg1;
714 	port = &sc->port;
715 
716 	if (req->newptr != NULL) {
717 		error = SYSCTL_IN(req, &period_ms, sizeof(period_ms));
718 		if (error != 0)
719 			return (error);
720 
721 		if (period_ms > UINT16_MAX)
722 			return (EINVAL);
723 
724 		SFXGE_PORT_LOCK(port);
725 
726 		if (port->stats_update_period_ms != period_ms) {
727 			if (port->init_state == SFXGE_PORT_STARTED)
728 				error = efx_mac_stats_periodic(sc->enp,
729 						&port->mac_stats.dma_buf,
730 						period_ms, B_FALSE);
731 			if (error == 0)
732 				port->stats_update_period_ms = period_ms;
733 		}
734 
735 		SFXGE_PORT_UNLOCK(port);
736 	} else {
737 		SFXGE_PORT_LOCK(port);
738 		period_ms = port->stats_update_period_ms;
739 		SFXGE_PORT_UNLOCK(port);
740 
741 		error = SYSCTL_OUT(req, &period_ms, sizeof(period_ms));
742 	}
743 
744 	return (error);
745 }
746 
747 int
748 sfxge_port_init(struct sfxge_softc *sc)
749 {
750 	struct sfxge_port *port;
751 	struct sysctl_ctx_list *sysctl_ctx;
752 	struct sysctl_oid *sysctl_tree;
753 	efsys_mem_t *mac_stats_buf, *phy_stats_buf;
754 	int rc;
755 
756 	port = &sc->port;
757 	mac_stats_buf = &port->mac_stats.dma_buf;
758 	phy_stats_buf = &port->phy_stats.dma_buf;
759 
760 	KASSERT(port->init_state == SFXGE_PORT_UNINITIALIZED,
761 	    ("Port already initialized"));
762 
763 	port->sc = sc;
764 
765 	SFXGE_PORT_LOCK_INIT(port, device_get_nameunit(sc->dev));
766 
767 	DBGPRINT(sc->dev, "alloc PHY stats");
768 	port->phy_stats.decode_buf = malloc(EFX_PHY_NSTATS * sizeof(uint32_t),
769 					    M_SFXGE, M_WAITOK | M_ZERO);
770 	if ((rc = sfxge_dma_alloc(sc, EFX_PHY_STATS_SIZE, phy_stats_buf)) != 0)
771 		goto fail;
772 	sfxge_phy_stat_init(sc);
773 
774 	DBGPRINT(sc->dev, "init sysctl");
775 	sysctl_ctx = device_get_sysctl_ctx(sc->dev);
776 	sysctl_tree = device_get_sysctl_tree(sc->dev);
777 
778 #ifndef SFXGE_HAVE_PAUSE_MEDIAOPTS
779 	/* If flow control cannot be configured or reported through
780 	 * ifmedia, provide sysctls for it. */
781 	port->wanted_fc = EFX_FCNTL_RESPOND | EFX_FCNTL_GENERATE;
782 	SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
783 	    "wanted_fc", CTLTYPE_UINT|CTLFLAG_RW, sc, 0,
784 	    sfxge_port_wanted_fc_handler, "IU", "wanted flow control mode");
785 	SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
786 	    "link_fc", CTLTYPE_UINT|CTLFLAG_RD, sc, 0,
787 	    sfxge_port_link_fc_handler, "IU", "link flow control mode");
788 #endif
789 
790 	DBGPRINT(sc->dev, "alloc MAC stats");
791 	port->mac_stats.decode_buf = malloc(EFX_MAC_NSTATS * sizeof(uint64_t),
792 					    M_SFXGE, M_WAITOK | M_ZERO);
793 	if ((rc = sfxge_dma_alloc(sc, EFX_MAC_STATS_SIZE, mac_stats_buf)) != 0)
794 		goto fail2;
795 	port->stats_update_period_ms = sfxge_port_stats_update_period_ms(sc);
796 	sfxge_mac_stat_init(sc);
797 
798 	SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
799 	    "stats_update_period_ms", CTLTYPE_UINT|CTLFLAG_RW, sc, 0,
800 	    sfxge_port_stats_update_period_ms_handler, "IU",
801 	    "interface statistics refresh period");
802 
803 	port->init_state = SFXGE_PORT_INITIALIZED;
804 
805 	DBGPRINT(sc->dev, "success");
806 	return (0);
807 
808 fail2:
809 	free(port->mac_stats.decode_buf, M_SFXGE);
810 	sfxge_dma_free(phy_stats_buf);
811 fail:
812 	free(port->phy_stats.decode_buf, M_SFXGE);
813 	SFXGE_PORT_LOCK_DESTROY(port);
814 	port->sc = NULL;
815 	DBGPRINT(sc->dev, "failed %d", rc);
816 	return (rc);
817 }
818 
819 static const int sfxge_link_mode[EFX_PHY_MEDIA_NTYPES][EFX_LINK_NMODES] = {
820 	[EFX_PHY_MEDIA_CX4] = {
821 		[EFX_LINK_10000FDX]	= IFM_ETHER | IFM_FDX | IFM_10G_CX4,
822 	},
823 	[EFX_PHY_MEDIA_KX4] = {
824 		[EFX_LINK_10000FDX]	= IFM_ETHER | IFM_FDX | IFM_10G_KX4,
825 	},
826 	[EFX_PHY_MEDIA_XFP] = {
827 		/* Don't know the module type, but assume SR for now. */
828 		[EFX_LINK_10000FDX]	= IFM_ETHER | IFM_FDX | IFM_10G_SR,
829 	},
830 	[EFX_PHY_MEDIA_QSFP_PLUS] = {
831 		/* Don't know the module type, but assume SR for now. */
832 		[EFX_LINK_10000FDX]	= IFM_ETHER | IFM_FDX | IFM_10G_SR,
833 		[EFX_LINK_40000FDX]	= IFM_ETHER | IFM_FDX | IFM_40G_CR4,
834 	},
835 	[EFX_PHY_MEDIA_SFP_PLUS] = {
836 		/* Don't know the module type, but assume SX/SR for now. */
837 		[EFX_LINK_1000FDX]	= IFM_ETHER | IFM_FDX | IFM_1000_SX,
838 		[EFX_LINK_10000FDX]	= IFM_ETHER | IFM_FDX | IFM_10G_SR,
839 	},
840 	[EFX_PHY_MEDIA_BASE_T] = {
841 		[EFX_LINK_10HDX]	= IFM_ETHER | IFM_HDX | IFM_10_T,
842 		[EFX_LINK_10FDX]	= IFM_ETHER | IFM_FDX | IFM_10_T,
843 		[EFX_LINK_100HDX]	= IFM_ETHER | IFM_HDX | IFM_100_TX,
844 		[EFX_LINK_100FDX]	= IFM_ETHER | IFM_FDX | IFM_100_TX,
845 		[EFX_LINK_1000HDX]	= IFM_ETHER | IFM_HDX | IFM_1000_T,
846 		[EFX_LINK_1000FDX]	= IFM_ETHER | IFM_FDX | IFM_1000_T,
847 		[EFX_LINK_10000FDX]	= IFM_ETHER | IFM_FDX | IFM_10G_T,
848 	},
849 };
850 
851 static void
852 sfxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
853 {
854 	struct sfxge_softc *sc;
855 	efx_phy_media_type_t medium_type;
856 	efx_link_mode_t mode;
857 
858 	sc = ifp->if_softc;
859 	SFXGE_ADAPTER_LOCK(sc);
860 
861 	ifmr->ifm_status = IFM_AVALID;
862 	ifmr->ifm_active = IFM_ETHER;
863 
864 	if (SFXGE_RUNNING(sc) && SFXGE_LINK_UP(sc)) {
865 		ifmr->ifm_status |= IFM_ACTIVE;
866 
867 		efx_phy_media_type_get(sc->enp, &medium_type);
868 		mode = sc->port.link_mode;
869 		ifmr->ifm_active |= sfxge_link_mode[medium_type][mode];
870 		ifmr->ifm_active |= sfxge_port_link_fc_ifm(sc);
871 	}
872 
873 	SFXGE_ADAPTER_UNLOCK(sc);
874 }
875 
876 static efx_phy_cap_type_t
877 sfxge_link_mode_to_phy_cap(efx_link_mode_t mode)
878 {
879 	switch (mode) {
880 	case EFX_LINK_10HDX:
881 		return (EFX_PHY_CAP_10HDX);
882 	case EFX_LINK_10FDX:
883 		return (EFX_PHY_CAP_10FDX);
884 	case EFX_LINK_100HDX:
885 		return (EFX_PHY_CAP_100HDX);
886 	case EFX_LINK_100FDX:
887 		return (EFX_PHY_CAP_100FDX);
888 	case EFX_LINK_1000HDX:
889 		return (EFX_PHY_CAP_1000HDX);
890 	case EFX_LINK_1000FDX:
891 		return (EFX_PHY_CAP_1000FDX);
892 	case EFX_LINK_10000FDX:
893 		return (EFX_PHY_CAP_10000FDX);
894 	case EFX_LINK_40000FDX:
895 		return (EFX_PHY_CAP_40000FDX);
896 	default:
897 		EFSYS_ASSERT(B_FALSE);
898 		return (EFX_PHY_CAP_INVALID);
899 	}
900 }
901 
902 static int
903 sfxge_phy_cap_mask(struct sfxge_softc *sc, int ifmedia, uint32_t *phy_cap_mask)
904 {
905 	/* Get global options (duplex), type and subtype bits */
906 	int ifmedia_masked = ifmedia & (IFM_GMASK | IFM_NMASK | IFM_TMASK);
907 	efx_phy_media_type_t medium_type;
908 	boolean_t mode_found = B_FALSE;
909 	uint32_t cap_mask, mode_cap_mask;
910 	efx_link_mode_t mode;
911 	efx_phy_cap_type_t phy_cap;
912 
913 	efx_phy_media_type_get(sc->enp, &medium_type);
914 	if (medium_type >= nitems(sfxge_link_mode)) {
915 		if_printf(sc->ifnet, "unexpected media type %d\n", medium_type);
916 		return (EINVAL);
917 	}
918 
919 	efx_phy_adv_cap_get(sc->enp, EFX_PHY_CAP_PERM, &cap_mask);
920 
921 	for (mode = EFX_LINK_10HDX; mode < EFX_LINK_NMODES; mode++) {
922 		if (ifmedia_masked == sfxge_link_mode[medium_type][mode]) {
923 			mode_found = B_TRUE;
924 			break;
925 		}
926 	}
927 
928 	if (!mode_found) {
929 		/*
930 		 * If media is not in the table, it must be IFM_AUTO.
931 		 */
932 		KASSERT((cap_mask & (1 << EFX_PHY_CAP_AN)) &&
933 		    ifmedia_masked == (IFM_ETHER | IFM_AUTO),
934 		    ("%s: no mode for media %#x", __func__, ifmedia));
935 		*phy_cap_mask = (cap_mask & ~(1 << EFX_PHY_CAP_ASYM));
936 		return (0);
937 	}
938 
939 	phy_cap = sfxge_link_mode_to_phy_cap(mode);
940 	if (phy_cap == EFX_PHY_CAP_INVALID) {
941 		if_printf(sc->ifnet,
942 			  "cannot map link mode %d to phy capability\n",
943 			  mode);
944 		return (EINVAL);
945 	}
946 
947 	mode_cap_mask = (1 << phy_cap);
948 	mode_cap_mask |= cap_mask & (1 << EFX_PHY_CAP_AN);
949 #ifdef SFXGE_HAVE_PAUSE_MEDIAOPTS
950 	if (ifmedia & IFM_ETH_RXPAUSE)
951 		mode_cap_mask |= cap_mask & (1 << EFX_PHY_CAP_PAUSE);
952 	if (!(ifmedia & IFM_ETH_TXPAUSE))
953 		mode_cap_mask |= cap_mask & (1 << EFX_PHY_CAP_ASYM);
954 #else
955 	mode_cap_mask |= cap_mask & (1 << EFX_PHY_CAP_PAUSE);
956 #endif
957 
958 	*phy_cap_mask = mode_cap_mask;
959 	return (0);
960 }
961 
962 static int
963 sfxge_media_change(struct ifnet *ifp)
964 {
965 	struct sfxge_softc *sc;
966 	struct ifmedia_entry *ifm;
967 	int rc;
968 	uint32_t phy_cap_mask;
969 
970 	sc = ifp->if_softc;
971 	ifm = sc->media.ifm_cur;
972 
973 	SFXGE_ADAPTER_LOCK(sc);
974 
975 	if (!SFXGE_RUNNING(sc)) {
976 		rc = 0;
977 		goto out;
978 	}
979 
980 	rc = efx_mac_fcntl_set(sc->enp, sfxge_port_wanted_fc(sc), B_TRUE);
981 	if (rc != 0)
982 		goto out;
983 
984 	if ((rc = sfxge_phy_cap_mask(sc, ifm->ifm_media, &phy_cap_mask)) != 0)
985 		goto out;
986 
987 	rc = efx_phy_adv_cap_set(sc->enp, phy_cap_mask);
988 out:
989 	SFXGE_ADAPTER_UNLOCK(sc);
990 
991 	return (rc);
992 }
993 
994 int sfxge_port_ifmedia_init(struct sfxge_softc *sc)
995 {
996 	efx_phy_media_type_t medium_type;
997 	uint32_t cap_mask, mode_cap_mask;
998 	efx_link_mode_t mode;
999 	efx_phy_cap_type_t phy_cap;
1000 	int mode_ifm, best_mode_ifm = 0;
1001 	int rc;
1002 
1003 	/*
1004 	 * We need port state to initialise the ifmedia list.
1005 	 * It requires initialized NIC what is already done in
1006 	 * sfxge_create() when resources are estimated.
1007 	 */
1008 	if ((rc = efx_filter_init(sc->enp)) != 0)
1009 		goto out1;
1010 	if ((rc = efx_port_init(sc->enp)) != 0)
1011 		goto out2;
1012 
1013 	/*
1014 	 * Register ifconfig callbacks for querying and setting the
1015 	 * link mode and link status.
1016 	 */
1017 	ifmedia_init(&sc->media, IFM_IMASK, sfxge_media_change,
1018 	    sfxge_media_status);
1019 
1020 	/*
1021 	 * Map firmware medium type and capabilities to ifmedia types.
1022 	 * ifmedia does not distinguish between forcing the link mode
1023 	 * and disabling auto-negotiation.  1000BASE-T and 10GBASE-T
1024 	 * require AN even if only one link mode is enabled, and for
1025 	 * 100BASE-TX it is useful even if the link mode is forced.
1026 	 * Therefore we never disable auto-negotiation.
1027 	 *
1028 	 * Also enable and advertise flow control by default.
1029 	 */
1030 
1031 	efx_phy_media_type_get(sc->enp, &medium_type);
1032 	efx_phy_adv_cap_get(sc->enp, EFX_PHY_CAP_PERM, &cap_mask);
1033 
1034 	for (mode = EFX_LINK_10HDX; mode < EFX_LINK_NMODES; mode++) {
1035 		phy_cap = sfxge_link_mode_to_phy_cap(mode);
1036 		if (phy_cap == EFX_PHY_CAP_INVALID)
1037 			continue;
1038 
1039 		mode_cap_mask = (1 << phy_cap);
1040 		mode_ifm = sfxge_link_mode[medium_type][mode];
1041 
1042 		if ((cap_mask & mode_cap_mask) && mode_ifm) {
1043 			/* No flow-control */
1044 			ifmedia_add(&sc->media, mode_ifm, 0, NULL);
1045 
1046 #ifdef SFXGE_HAVE_PAUSE_MEDIAOPTS
1047 			/* Respond-only.  If using AN, we implicitly
1048 			 * offer symmetric as well, but that doesn't
1049 			 * mean we *have* to generate pause frames.
1050 			 */
1051 			mode_ifm |= IFM_ETH_RXPAUSE;
1052 			ifmedia_add(&sc->media, mode_ifm, 0, NULL);
1053 
1054 			/* Symmetric */
1055 			mode_ifm |= IFM_ETH_TXPAUSE;
1056 			ifmedia_add(&sc->media, mode_ifm, 0, NULL);
1057 #endif
1058 
1059 			/* Link modes are numbered in order of speed,
1060 			 * so assume the last one available is the best.
1061 			 */
1062 			best_mode_ifm = mode_ifm;
1063 		}
1064 	}
1065 
1066 	if (cap_mask & (1 << EFX_PHY_CAP_AN)) {
1067 		/* Add autoselect mode. */
1068 		mode_ifm = IFM_ETHER | IFM_AUTO;
1069 		ifmedia_add(&sc->media, mode_ifm, 0, NULL);
1070 		best_mode_ifm = mode_ifm;
1071 	}
1072 
1073 	if (best_mode_ifm != 0)
1074 		ifmedia_set(&sc->media, best_mode_ifm);
1075 
1076 	/* Now discard port state until interface is started. */
1077 	efx_port_fini(sc->enp);
1078 out2:
1079 	efx_filter_fini(sc->enp);
1080 out1:
1081 	return (rc);
1082 }
1083