xref: /freebsd/sys/dev/sfxge/sfxge_port.c (revision 51015e6d0f570239b0c2088dc6cf2b018928375d)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2010-2016 Solarflare Communications Inc.
5  * All rights reserved.
6  *
7  * This software was developed in part by Philip Paeps under contract for
8  * Solarflare Communications, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright notice,
14  *    this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright notice,
16  *    this list of conditions and the following disclaimer in the documentation
17  *    and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * The views and conclusions contained in the software and documentation are
32  * those of the authors and should not be interpreted as representing official
33  * policies, either expressed or implied, of the FreeBSD Project.
34  */
35 
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
38 
39 #include <sys/types.h>
40 #include <sys/limits.h>
41 #include <net/ethernet.h>
42 #include <net/if_dl.h>
43 
44 #include "common/efx.h"
45 
46 #include "sfxge.h"
47 
48 #define	SFXGE_PARAM_STATS_UPDATE_PERIOD_MS \
49 	SFXGE_PARAM(stats_update_period_ms)
50 static int sfxge_stats_update_period_ms = SFXGE_STATS_UPDATE_PERIOD_MS;
51 TUNABLE_INT(SFXGE_PARAM_STATS_UPDATE_PERIOD_MS,
52 	    &sfxge_stats_update_period_ms);
53 SYSCTL_INT(_hw_sfxge, OID_AUTO, stats_update_period_ms, CTLFLAG_RDTUN,
54 	   &sfxge_stats_update_period_ms, 0,
55 	   "netstat interface statistics update period in milliseconds");
56 
57 static int sfxge_phy_cap_mask(struct sfxge_softc *, int, uint32_t *);
58 
59 static int
60 sfxge_mac_stat_update(struct sfxge_softc *sc)
61 {
62 	struct sfxge_port *port = &sc->port;
63 	efsys_mem_t *esmp = &(port->mac_stats.dma_buf);
64 	clock_t now;
65 	unsigned int min_ticks;
66 	unsigned int count;
67 	int rc;
68 
69 	SFXGE_PORT_LOCK_ASSERT_OWNED(port);
70 
71 	if (__predict_false(port->init_state != SFXGE_PORT_STARTED)) {
72 		rc = 0;
73 		goto out;
74 	}
75 
76 	min_ticks = (unsigned int)hz * port->stats_update_period_ms / 1000;
77 
78 	now = ticks;
79 	if ((unsigned int)(now - port->mac_stats.update_time) < min_ticks) {
80 		rc = 0;
81 		goto out;
82 	}
83 
84 	port->mac_stats.update_time = now;
85 
86 	/* If we're unlucky enough to read statistics wduring the DMA, wait
87 	 * up to 10ms for it to finish (typically takes <500us) */
88 	for (count = 0; count < 100; ++count) {
89 		EFSYS_PROBE1(wait, unsigned int, count);
90 
91 		/* Try to update the cached counters */
92 		if ((rc = efx_mac_stats_update(sc->enp, esmp,
93 		    port->mac_stats.decode_buf, NULL)) != EAGAIN)
94 			goto out;
95 
96 		DELAY(100);
97 	}
98 
99 	rc = ETIMEDOUT;
100 out:
101 	return (rc);
102 }
103 
104 uint64_t
105 sfxge_get_counter(if_t ifp, ift_counter c)
106 {
107 	struct sfxge_softc *sc = if_getsoftc(ifp);
108 	uint64_t *mac_stats;
109 	uint64_t val;
110 
111 	SFXGE_PORT_LOCK(&sc->port);
112 
113 	/* Ignore error and use old values */
114 	(void)sfxge_mac_stat_update(sc);
115 
116 	mac_stats = (uint64_t *)sc->port.mac_stats.decode_buf;
117 
118 	switch (c) {
119 	case IFCOUNTER_IPACKETS:
120 		val = mac_stats[EFX_MAC_RX_PKTS];
121 		break;
122 	case IFCOUNTER_IERRORS:
123 		val = mac_stats[EFX_MAC_RX_ERRORS];
124 		break;
125 	case IFCOUNTER_OPACKETS:
126 		val = mac_stats[EFX_MAC_TX_PKTS];
127 		break;
128 	case IFCOUNTER_OERRORS:
129 		val = mac_stats[EFX_MAC_TX_ERRORS];
130 		break;
131 	case IFCOUNTER_COLLISIONS:
132 		val = mac_stats[EFX_MAC_TX_SGL_COL_PKTS] +
133 		      mac_stats[EFX_MAC_TX_MULT_COL_PKTS] +
134 		      mac_stats[EFX_MAC_TX_EX_COL_PKTS] +
135 		      mac_stats[EFX_MAC_TX_LATE_COL_PKTS];
136 		break;
137 	case IFCOUNTER_IBYTES:
138 		val = mac_stats[EFX_MAC_RX_OCTETS];
139 		break;
140 	case IFCOUNTER_OBYTES:
141 		val = mac_stats[EFX_MAC_TX_OCTETS];
142 		break;
143 	case IFCOUNTER_OMCASTS:
144 		val = mac_stats[EFX_MAC_TX_MULTICST_PKTS] +
145 		      mac_stats[EFX_MAC_TX_BRDCST_PKTS];
146 		break;
147 	case IFCOUNTER_OQDROPS:
148 		SFXGE_PORT_UNLOCK(&sc->port);
149 		return (sfxge_tx_get_drops(sc));
150 	case IFCOUNTER_IMCASTS:
151 		/* if_imcasts is maintained in net/if_ethersubr.c */
152 	case IFCOUNTER_IQDROPS:
153 		/* if_iqdrops is maintained in net/if_ethersubr.c */
154 	case IFCOUNTER_NOPROTO:
155 		/* if_noproto is maintained in net/if_ethersubr.c */
156 	default:
157 		SFXGE_PORT_UNLOCK(&sc->port);
158 		return (if_get_counter_default(ifp, c));
159 	}
160 
161 	SFXGE_PORT_UNLOCK(&sc->port);
162 
163 	return (val);
164 }
165 
166 static int
167 sfxge_mac_stat_handler(SYSCTL_HANDLER_ARGS)
168 {
169 	struct sfxge_softc *sc = arg1;
170 	unsigned int id = arg2;
171 	int rc;
172 	uint64_t val;
173 
174 	SFXGE_PORT_LOCK(&sc->port);
175 	if ((rc = sfxge_mac_stat_update(sc)) == 0)
176 		val = ((uint64_t *)sc->port.mac_stats.decode_buf)[id];
177 	SFXGE_PORT_UNLOCK(&sc->port);
178 
179 	if (rc == 0)
180 		rc = SYSCTL_OUT(req, &val, sizeof(val));
181 	return (rc);
182 }
183 
184 static void
185 sfxge_mac_stat_init(struct sfxge_softc *sc)
186 {
187 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
188 	struct sysctl_oid_list *stat_list;
189 	unsigned int id;
190 	const char *name;
191 
192 	stat_list = SYSCTL_CHILDREN(sc->stats_node);
193 
194 	/* Initialise the named stats */
195 	for (id = 0; id < EFX_MAC_NSTATS; id++) {
196 		name = efx_mac_stat_name(sc->enp, id);
197 		SYSCTL_ADD_PROC(ctx, stat_list, OID_AUTO, name,
198 		    CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
199 		    sc, id, sfxge_mac_stat_handler, "Q", "");
200 	}
201 }
202 
203 #ifdef SFXGE_HAVE_PAUSE_MEDIAOPTS
204 
205 static unsigned int
206 sfxge_port_wanted_fc(struct sfxge_softc *sc)
207 {
208 	struct ifmedia_entry *ifm = sc->media.ifm_cur;
209 
210 	if (ifm->ifm_media == (IFM_ETHER | IFM_AUTO))
211 		return (EFX_FCNTL_RESPOND | EFX_FCNTL_GENERATE);
212 	return (((ifm->ifm_media & IFM_ETH_RXPAUSE) ? EFX_FCNTL_RESPOND : 0) |
213 		((ifm->ifm_media & IFM_ETH_TXPAUSE) ? EFX_FCNTL_GENERATE : 0));
214 }
215 
216 static unsigned int
217 sfxge_port_link_fc_ifm(struct sfxge_softc *sc)
218 {
219 	unsigned int wanted_fc, link_fc;
220 
221 	efx_mac_fcntl_get(sc->enp, &wanted_fc, &link_fc);
222 	return ((link_fc & EFX_FCNTL_RESPOND) ? IFM_ETH_RXPAUSE : 0) |
223 		((link_fc & EFX_FCNTL_GENERATE) ? IFM_ETH_TXPAUSE : 0);
224 }
225 
226 #else /* !SFXGE_HAVE_PAUSE_MEDIAOPTS */
227 
228 static unsigned int
229 sfxge_port_wanted_fc(struct sfxge_softc *sc)
230 {
231 	return (sc->port.wanted_fc);
232 }
233 
234 static unsigned int
235 sfxge_port_link_fc_ifm(struct sfxge_softc *sc)
236 {
237 	return (0);
238 }
239 
240 static int
241 sfxge_port_wanted_fc_handler(SYSCTL_HANDLER_ARGS)
242 {
243 	struct sfxge_softc *sc;
244 	struct sfxge_port *port;
245 	unsigned int fcntl;
246 	int error;
247 
248 	sc = arg1;
249 	port = &sc->port;
250 
251 	if (req->newptr != NULL) {
252 		if ((error = SYSCTL_IN(req, &fcntl, sizeof(fcntl))) != 0)
253 			return (error);
254 
255 		SFXGE_PORT_LOCK(port);
256 
257 		if (port->wanted_fc != fcntl) {
258 			if (port->init_state == SFXGE_PORT_STARTED)
259 				error = efx_mac_fcntl_set(sc->enp,
260 							  port->wanted_fc,
261 							  B_TRUE);
262 			if (error == 0)
263 				port->wanted_fc = fcntl;
264 		}
265 
266 		SFXGE_PORT_UNLOCK(port);
267 	} else {
268 		SFXGE_PORT_LOCK(port);
269 		fcntl = port->wanted_fc;
270 		SFXGE_PORT_UNLOCK(port);
271 
272 		error = SYSCTL_OUT(req, &fcntl, sizeof(fcntl));
273 	}
274 
275 	return (error);
276 }
277 
278 static int
279 sfxge_port_link_fc_handler(SYSCTL_HANDLER_ARGS)
280 {
281 	struct sfxge_softc *sc;
282 	struct sfxge_port *port;
283 	unsigned int wanted_fc, link_fc;
284 
285 	sc = arg1;
286 	port = &sc->port;
287 
288 	SFXGE_PORT_LOCK(port);
289 	if (__predict_true(port->init_state == SFXGE_PORT_STARTED) &&
290 	    SFXGE_LINK_UP(sc))
291 		efx_mac_fcntl_get(sc->enp, &wanted_fc, &link_fc);
292 	else
293 		link_fc = 0;
294 	SFXGE_PORT_UNLOCK(port);
295 
296 	return (SYSCTL_OUT(req, &link_fc, sizeof(link_fc)));
297 }
298 
299 #endif /* SFXGE_HAVE_PAUSE_MEDIAOPTS */
300 
301 static const uint64_t sfxge_link_baudrate[EFX_LINK_NMODES] = {
302 	[EFX_LINK_10HDX]	= IF_Mbps(10),
303 	[EFX_LINK_10FDX]	= IF_Mbps(10),
304 	[EFX_LINK_100HDX]	= IF_Mbps(100),
305 	[EFX_LINK_100FDX]	= IF_Mbps(100),
306 	[EFX_LINK_1000HDX]	= IF_Gbps(1),
307 	[EFX_LINK_1000FDX]	= IF_Gbps(1),
308 	[EFX_LINK_10000FDX]	= IF_Gbps(10),
309 	[EFX_LINK_25000FDX]	= IF_Gbps(25),
310 	[EFX_LINK_40000FDX]	= IF_Gbps(40),
311 	[EFX_LINK_50000FDX]	= IF_Gbps(50),
312 	[EFX_LINK_100000FDX]	= IF_Gbps(100),
313 };
314 
315 void
316 sfxge_mac_link_update(struct sfxge_softc *sc, efx_link_mode_t mode)
317 {
318 	struct sfxge_port *port;
319 	int link_state;
320 
321 	port = &sc->port;
322 
323 	if (port->link_mode == mode)
324 		return;
325 
326 	port->link_mode = mode;
327 
328 	/* Push link state update to the OS */
329 	link_state = (SFXGE_LINK_UP(sc) ? LINK_STATE_UP : LINK_STATE_DOWN);
330 	if_setbaudrate(sc->ifnet, sfxge_link_baudrate[port->link_mode]);
331 	if_link_state_change(sc->ifnet, link_state);
332 }
333 
334 static void
335 sfxge_mac_poll_work(void *arg, int npending)
336 {
337 	struct sfxge_softc *sc;
338 	efx_nic_t *enp;
339 	struct sfxge_port *port;
340 	efx_link_mode_t mode;
341 
342 	sc = (struct sfxge_softc *)arg;
343 	enp = sc->enp;
344 	port = &sc->port;
345 
346 	SFXGE_PORT_LOCK(port);
347 
348 	if (__predict_false(port->init_state != SFXGE_PORT_STARTED))
349 		goto done;
350 
351 	/* This may sleep waiting for MCDI completion */
352 	(void)efx_port_poll(enp, &mode);
353 	sfxge_mac_link_update(sc, mode);
354 
355 done:
356 	SFXGE_PORT_UNLOCK(port);
357 }
358 
359 static u_int
360 sfxge_copy_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt)
361 {
362 	uint8_t *mcast_addr = arg;
363 
364 	if (cnt == EFX_MAC_MULTICAST_LIST_MAX)
365 		return (0);
366 
367 	memcpy(mcast_addr + (cnt * EFX_MAC_ADDR_LEN), LLADDR(sdl),
368 	    EFX_MAC_ADDR_LEN);
369 
370 	return (1);
371 }
372 
373 static int
374 sfxge_mac_multicast_list_set(struct sfxge_softc *sc)
375 {
376 	if_t ifp = sc->ifnet;
377 	struct sfxge_port *port = &sc->port;
378 	int rc = 0;
379 
380 	mtx_assert(&port->lock, MA_OWNED);
381 
382 	port->mcast_count = if_foreach_llmaddr(ifp, sfxge_copy_maddr,
383 	    port->mcast_addrs);
384 	if (port->mcast_count == EFX_MAC_MULTICAST_LIST_MAX) {
385 		device_printf(sc->dev, "Too many multicast addresses\n");
386 		rc = EINVAL;
387 	}
388 
389 	if (rc == 0) {
390 		rc = efx_mac_multicast_list_set(sc->enp, port->mcast_addrs,
391 						port->mcast_count);
392 		if (rc != 0)
393 			device_printf(sc->dev,
394 			    "Cannot set multicast address list\n");
395 	}
396 
397 	return (rc);
398 }
399 
400 static int
401 sfxge_mac_filter_set_locked(struct sfxge_softc *sc)
402 {
403 	if_t ifp = sc->ifnet;
404 	struct sfxge_port *port = &sc->port;
405 	boolean_t all_mulcst;
406 	int rc;
407 
408 	mtx_assert(&port->lock, MA_OWNED);
409 
410 	all_mulcst = !!(if_getflags(ifp) & (IFF_PROMISC | IFF_ALLMULTI));
411 
412 	rc = sfxge_mac_multicast_list_set(sc);
413 	/* Fallback to all multicast if cannot set multicast list */
414 	if (rc != 0)
415 		all_mulcst = B_TRUE;
416 
417 	rc = efx_mac_filter_set(sc->enp, !!(if_getflags(ifp) & IFF_PROMISC),
418 				(port->mcast_count > 0), all_mulcst, B_TRUE);
419 
420 	return (rc);
421 }
422 
423 int
424 sfxge_mac_filter_set(struct sfxge_softc *sc)
425 {
426 	struct sfxge_port *port = &sc->port;
427 	int rc;
428 
429 	SFXGE_PORT_LOCK(port);
430 	/*
431 	 * The function may be called without softc_lock held in the
432 	 * case of SIOCADDMULTI and SIOCDELMULTI ioctls. ioctl handler
433 	 * checks IFF_DRV_RUNNING flag which implies port started, but
434 	 * it is not guaranteed to remain. softc_lock shared lock can't
435 	 * be held in the case of these ioctls processing, since it
436 	 * results in failure where kernel complains that non-sleepable
437 	 * lock is held in sleeping thread. Both problems are repeatable
438 	 * on LAG with LACP proto bring up.
439 	 */
440 	if (__predict_true(port->init_state == SFXGE_PORT_STARTED))
441 		rc = sfxge_mac_filter_set_locked(sc);
442 	else
443 		rc = 0;
444 	SFXGE_PORT_UNLOCK(port);
445 	return (rc);
446 }
447 
448 void
449 sfxge_port_stop(struct sfxge_softc *sc)
450 {
451 	struct sfxge_port *port;
452 	efx_nic_t *enp;
453 
454 	port = &sc->port;
455 	enp = sc->enp;
456 
457 	SFXGE_PORT_LOCK(port);
458 
459 	KASSERT(port->init_state == SFXGE_PORT_STARTED,
460 	    ("port not started"));
461 
462 	port->init_state = SFXGE_PORT_INITIALIZED;
463 
464 	port->mac_stats.update_time = 0;
465 
466 	/* This may call MCDI */
467 	(void)efx_mac_drain(enp, B_TRUE);
468 
469 	(void)efx_mac_stats_periodic(enp, &port->mac_stats.dma_buf, 0, B_FALSE);
470 
471 	port->link_mode = EFX_LINK_UNKNOWN;
472 
473 	/* Destroy the common code port object. */
474 	efx_port_fini(enp);
475 
476 	efx_filter_fini(enp);
477 
478 	SFXGE_PORT_UNLOCK(port);
479 }
480 
481 int
482 sfxge_port_start(struct sfxge_softc *sc)
483 {
484 	uint8_t mac_addr[ETHER_ADDR_LEN];
485 	struct epoch_tracker et;
486 	if_t ifp = sc->ifnet;
487 	struct sfxge_port *port;
488 	efx_nic_t *enp;
489 	size_t pdu;
490 	int rc;
491 	uint32_t phy_cap_mask;
492 
493 	port = &sc->port;
494 	enp = sc->enp;
495 
496 	SFXGE_PORT_LOCK(port);
497 
498 	KASSERT(port->init_state == SFXGE_PORT_INITIALIZED,
499 	    ("port not initialized"));
500 
501 	/* Initialise the required filtering */
502 	if ((rc = efx_filter_init(enp)) != 0)
503 		goto fail_filter_init;
504 
505 	/* Initialize the port object in the common code. */
506 	if ((rc = efx_port_init(sc->enp)) != 0)
507 		goto fail;
508 
509 	/* Set the SDU */
510 	pdu = EFX_MAC_PDU(if_getmtu(ifp));
511 	if ((rc = efx_mac_pdu_set(enp, pdu)) != 0)
512 		goto fail2;
513 
514 	if ((rc = efx_mac_fcntl_set(enp, sfxge_port_wanted_fc(sc), B_TRUE))
515 	    != 0)
516 		goto fail3;
517 
518 	/* Set the unicast address */
519 	NET_EPOCH_ENTER(et);
520 	bcopy(if_getlladdr(ifp), mac_addr, sizeof(mac_addr));
521 	NET_EPOCH_EXIT(et);
522 	if ((rc = efx_mac_addr_set(enp, mac_addr)) != 0)
523 		goto fail4;
524 
525 	sfxge_mac_filter_set_locked(sc);
526 
527 	/* Update MAC stats by DMA every period */
528 	if ((rc = efx_mac_stats_periodic(enp, &port->mac_stats.dma_buf,
529 					 port->stats_update_period_ms,
530 					 B_FALSE)) != 0)
531 		goto fail6;
532 
533 	if ((rc = efx_mac_drain(enp, B_FALSE)) != 0)
534 		goto fail8;
535 
536 	if ((rc = sfxge_phy_cap_mask(sc, sc->media.ifm_cur->ifm_media,
537 				     &phy_cap_mask)) != 0)
538 		goto fail9;
539 
540 	if ((rc = efx_phy_adv_cap_set(sc->enp, phy_cap_mask)) != 0)
541 		goto fail10;
542 
543 	port->init_state = SFXGE_PORT_STARTED;
544 
545 	/* Single poll in case there were missing initial events */
546 	SFXGE_PORT_UNLOCK(port);
547 	sfxge_mac_poll_work(sc, 0);
548 
549 	return (0);
550 
551 fail10:
552 fail9:
553 	(void)efx_mac_drain(enp, B_TRUE);
554 fail8:
555 	(void)efx_mac_stats_periodic(enp, &port->mac_stats.dma_buf, 0, B_FALSE);
556 fail6:
557 fail4:
558 fail3:
559 
560 fail2:
561 	efx_port_fini(enp);
562 fail:
563 	efx_filter_fini(enp);
564 fail_filter_init:
565 	SFXGE_PORT_UNLOCK(port);
566 
567 	return (rc);
568 }
569 
570 static int
571 sfxge_phy_stat_update(struct sfxge_softc *sc)
572 {
573 	struct sfxge_port *port = &sc->port;
574 	efsys_mem_t *esmp = &port->phy_stats.dma_buf;
575 	clock_t now;
576 	unsigned int count;
577 	int rc;
578 
579 	SFXGE_PORT_LOCK_ASSERT_OWNED(port);
580 
581 	if (__predict_false(port->init_state != SFXGE_PORT_STARTED)) {
582 		rc = 0;
583 		goto out;
584 	}
585 
586 	now = ticks;
587 	if ((unsigned int)(now - port->phy_stats.update_time) < (unsigned int)hz) {
588 		rc = 0;
589 		goto out;
590 	}
591 
592 	port->phy_stats.update_time = now;
593 
594 	/* If we're unlucky enough to read statistics wduring the DMA, wait
595 	 * up to 10ms for it to finish (typically takes <500us) */
596 	for (count = 0; count < 100; ++count) {
597 		EFSYS_PROBE1(wait, unsigned int, count);
598 
599 		/* Synchronize the DMA memory for reading */
600 		bus_dmamap_sync(esmp->esm_tag, esmp->esm_map,
601 		    BUS_DMASYNC_POSTREAD);
602 
603 		/* Try to update the cached counters */
604 		if ((rc = efx_phy_stats_update(sc->enp, esmp,
605 		    port->phy_stats.decode_buf)) != EAGAIN)
606 			goto out;
607 
608 		DELAY(100);
609 	}
610 
611 	rc = ETIMEDOUT;
612 out:
613 	return (rc);
614 }
615 
616 static int
617 sfxge_phy_stat_handler(SYSCTL_HANDLER_ARGS)
618 {
619 	struct sfxge_softc *sc = arg1;
620 	unsigned int id = arg2;
621 	int rc;
622 	uint32_t val;
623 
624 	SFXGE_PORT_LOCK(&sc->port);
625 	if ((rc = sfxge_phy_stat_update(sc)) == 0)
626 		val = ((uint32_t *)sc->port.phy_stats.decode_buf)[id];
627 	SFXGE_PORT_UNLOCK(&sc->port);
628 
629 	if (rc == 0)
630 		rc = SYSCTL_OUT(req, &val, sizeof(val));
631 	return (rc);
632 }
633 
634 static void
635 sfxge_phy_stat_init(struct sfxge_softc *sc)
636 {
637 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
638 	struct sysctl_oid_list *stat_list;
639 	unsigned int id;
640 	const char *name;
641 	uint64_t stat_mask = efx_nic_cfg_get(sc->enp)->enc_phy_stat_mask;
642 
643 	stat_list = SYSCTL_CHILDREN(sc->stats_node);
644 
645 	/* Initialise the named stats */
646 	for (id = 0; id < EFX_PHY_NSTATS; id++) {
647 		if (!(stat_mask & ((uint64_t)1 << id)))
648 			continue;
649 		name = efx_phy_stat_name(sc->enp, id);
650 		SYSCTL_ADD_PROC(ctx, stat_list, OID_AUTO, name,
651 		    CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE,
652 		    sc, id, sfxge_phy_stat_handler,
653 		    id == EFX_PHY_STAT_OUI ? "IX" : "IU", "");
654 	}
655 }
656 
657 void
658 sfxge_port_fini(struct sfxge_softc *sc)
659 {
660 	struct sfxge_port *port;
661 	efsys_mem_t *esmp;
662 
663 	port = &sc->port;
664 	esmp = &port->mac_stats.dma_buf;
665 
666 	KASSERT(port->init_state == SFXGE_PORT_INITIALIZED,
667 	    ("Port not initialized"));
668 
669 	port->init_state = SFXGE_PORT_UNINITIALIZED;
670 
671 	port->link_mode = EFX_LINK_UNKNOWN;
672 
673 	/* Finish with PHY DMA memory */
674 	sfxge_dma_free(&port->phy_stats.dma_buf);
675 	free(port->phy_stats.decode_buf, M_SFXGE);
676 
677 	sfxge_dma_free(esmp);
678 	free(port->mac_stats.decode_buf, M_SFXGE);
679 
680 	SFXGE_PORT_LOCK_DESTROY(port);
681 
682 	port->sc = NULL;
683 }
684 
685 static uint16_t
686 sfxge_port_stats_update_period_ms(struct sfxge_softc *sc)
687 {
688 	int period_ms = sfxge_stats_update_period_ms;
689 
690 	if (period_ms < 0) {
691 		device_printf(sc->dev,
692 			"treat negative stats update period %d as 0 (disable)\n",
693 			 period_ms);
694 		period_ms = 0;
695 	} else if (period_ms > UINT16_MAX) {
696 		device_printf(sc->dev,
697 			"treat too big stats update period %d as %u\n",
698 			period_ms, UINT16_MAX);
699 		period_ms = UINT16_MAX;
700 	}
701 
702 	return period_ms;
703 }
704 
705 static int
706 sfxge_port_stats_update_period_ms_handler(SYSCTL_HANDLER_ARGS)
707 {
708 	struct sfxge_softc *sc;
709 	struct sfxge_port *port;
710 	unsigned int period_ms;
711 	int error;
712 
713 	sc = arg1;
714 	port = &sc->port;
715 
716 	if (req->newptr != NULL) {
717 		error = SYSCTL_IN(req, &period_ms, sizeof(period_ms));
718 		if (error != 0)
719 			return (error);
720 
721 		if (period_ms > UINT16_MAX)
722 			return (EINVAL);
723 
724 		SFXGE_PORT_LOCK(port);
725 
726 		if (port->stats_update_period_ms != period_ms) {
727 			if (port->init_state == SFXGE_PORT_STARTED)
728 				error = efx_mac_stats_periodic(sc->enp,
729 						&port->mac_stats.dma_buf,
730 						period_ms, B_FALSE);
731 			if (error == 0)
732 				port->stats_update_period_ms = period_ms;
733 		}
734 
735 		SFXGE_PORT_UNLOCK(port);
736 	} else {
737 		SFXGE_PORT_LOCK(port);
738 		period_ms = port->stats_update_period_ms;
739 		SFXGE_PORT_UNLOCK(port);
740 
741 		error = SYSCTL_OUT(req, &period_ms, sizeof(period_ms));
742 	}
743 
744 	return (error);
745 }
746 
747 int
748 sfxge_port_init(struct sfxge_softc *sc)
749 {
750 	struct sfxge_port *port;
751 	struct sysctl_ctx_list *sysctl_ctx;
752 	struct sysctl_oid *sysctl_tree;
753 	efsys_mem_t *mac_stats_buf, *phy_stats_buf;
754 	uint32_t mac_nstats;
755 	size_t mac_stats_size;
756 	int rc;
757 
758 	port = &sc->port;
759 	mac_stats_buf = &port->mac_stats.dma_buf;
760 	phy_stats_buf = &port->phy_stats.dma_buf;
761 
762 	KASSERT(port->init_state == SFXGE_PORT_UNINITIALIZED,
763 	    ("Port already initialized"));
764 
765 	port->sc = sc;
766 
767 	SFXGE_PORT_LOCK_INIT(port, device_get_nameunit(sc->dev));
768 
769 	DBGPRINT(sc->dev, "alloc PHY stats");
770 	port->phy_stats.decode_buf = malloc(EFX_PHY_NSTATS * sizeof(uint32_t),
771 					    M_SFXGE, M_WAITOK | M_ZERO);
772 	if ((rc = sfxge_dma_alloc(sc, EFX_PHY_STATS_SIZE, phy_stats_buf)) != 0)
773 		goto fail;
774 	sfxge_phy_stat_init(sc);
775 
776 	DBGPRINT(sc->dev, "init sysctl");
777 	sysctl_ctx = device_get_sysctl_ctx(sc->dev);
778 	sysctl_tree = device_get_sysctl_tree(sc->dev);
779 
780 #ifndef SFXGE_HAVE_PAUSE_MEDIAOPTS
781 	/* If flow control cannot be configured or reported through
782 	 * ifmedia, provide sysctls for it. */
783 	port->wanted_fc = EFX_FCNTL_RESPOND | EFX_FCNTL_GENERATE;
784 	SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
785 	    "wanted_fc", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
786 	    sfxge_port_wanted_fc_handler, "IU", "wanted flow control mode");
787 	SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
788 	    "link_fc", CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
789 	    sfxge_port_link_fc_handler, "IU", "link flow control mode");
790 #endif
791 
792 	DBGPRINT(sc->dev, "alloc MAC stats");
793 	port->mac_stats.decode_buf = malloc(EFX_MAC_NSTATS * sizeof(uint64_t),
794 					    M_SFXGE, M_WAITOK | M_ZERO);
795 	mac_nstats = efx_nic_cfg_get(sc->enp)->enc_mac_stats_nstats;
796 	mac_stats_size = EFX_P2ROUNDUP(size_t, mac_nstats * sizeof(uint64_t),
797 				       EFX_BUF_SIZE);
798 	if ((rc = sfxge_dma_alloc(sc, mac_stats_size, mac_stats_buf)) != 0)
799 		goto fail2;
800 	port->stats_update_period_ms = sfxge_port_stats_update_period_ms(sc);
801 	sfxge_mac_stat_init(sc);
802 
803 	SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
804 	    "stats_update_period_ms",
805 	    CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
806 	    sfxge_port_stats_update_period_ms_handler, "IU",
807 	    "interface statistics refresh period");
808 
809 	port->init_state = SFXGE_PORT_INITIALIZED;
810 
811 	DBGPRINT(sc->dev, "success");
812 	return (0);
813 
814 fail2:
815 	free(port->mac_stats.decode_buf, M_SFXGE);
816 	sfxge_dma_free(phy_stats_buf);
817 fail:
818 	free(port->phy_stats.decode_buf, M_SFXGE);
819 	SFXGE_PORT_LOCK_DESTROY(port);
820 	port->sc = NULL;
821 	DBGPRINT(sc->dev, "failed %d", rc);
822 	return (rc);
823 }
824 
825 static const int sfxge_link_mode[EFX_PHY_MEDIA_NTYPES][EFX_LINK_NMODES] = {
826 	[EFX_PHY_MEDIA_CX4] = {
827 		[EFX_LINK_10000FDX]	= IFM_ETHER | IFM_FDX | IFM_10G_CX4,
828 	},
829 	[EFX_PHY_MEDIA_KX4] = {
830 		[EFX_LINK_10000FDX]	= IFM_ETHER | IFM_FDX | IFM_10G_KX4,
831 	},
832 	[EFX_PHY_MEDIA_XFP] = {
833 		/* Don't know the module type, but assume SR for now. */
834 		[EFX_LINK_10000FDX]	= IFM_ETHER | IFM_FDX | IFM_10G_SR,
835 	},
836 	[EFX_PHY_MEDIA_QSFP_PLUS] = {
837 		/* Don't know the module type, but assume SR for now. */
838 		[EFX_LINK_10000FDX]	= IFM_ETHER | IFM_FDX | IFM_10G_SR,
839 		[EFX_LINK_25000FDX]	= IFM_ETHER | IFM_FDX | IFM_25G_SR,
840 		[EFX_LINK_40000FDX]	= IFM_ETHER | IFM_FDX | IFM_40G_CR4,
841 		[EFX_LINK_50000FDX]	= IFM_ETHER | IFM_FDX | IFM_50G_SR,
842 		[EFX_LINK_100000FDX]	= IFM_ETHER | IFM_FDX | IFM_100G_SR2,
843 	},
844 	[EFX_PHY_MEDIA_SFP_PLUS] = {
845 		/* Don't know the module type, but assume SX/SR for now. */
846 		[EFX_LINK_1000FDX]	= IFM_ETHER | IFM_FDX | IFM_1000_SX,
847 		[EFX_LINK_10000FDX]	= IFM_ETHER | IFM_FDX | IFM_10G_SR,
848 		[EFX_LINK_25000FDX]	= IFM_ETHER | IFM_FDX | IFM_25G_SR,
849 	},
850 	[EFX_PHY_MEDIA_BASE_T] = {
851 		[EFX_LINK_10HDX]	= IFM_ETHER | IFM_HDX | IFM_10_T,
852 		[EFX_LINK_10FDX]	= IFM_ETHER | IFM_FDX | IFM_10_T,
853 		[EFX_LINK_100HDX]	= IFM_ETHER | IFM_HDX | IFM_100_TX,
854 		[EFX_LINK_100FDX]	= IFM_ETHER | IFM_FDX | IFM_100_TX,
855 		[EFX_LINK_1000HDX]	= IFM_ETHER | IFM_HDX | IFM_1000_T,
856 		[EFX_LINK_1000FDX]	= IFM_ETHER | IFM_FDX | IFM_1000_T,
857 		[EFX_LINK_10000FDX]	= IFM_ETHER | IFM_FDX | IFM_10G_T,
858 	},
859 };
860 
861 static void
862 sfxge_media_status(if_t ifp, struct ifmediareq *ifmr)
863 {
864 	struct sfxge_softc *sc;
865 	efx_phy_media_type_t medium_type;
866 	efx_link_mode_t mode;
867 
868 	sc = if_getsoftc(ifp);
869 	SFXGE_ADAPTER_LOCK(sc);
870 
871 	ifmr->ifm_status = IFM_AVALID;
872 	ifmr->ifm_active = IFM_ETHER;
873 
874 	if (SFXGE_RUNNING(sc) && SFXGE_LINK_UP(sc)) {
875 		ifmr->ifm_status |= IFM_ACTIVE;
876 
877 		efx_phy_media_type_get(sc->enp, &medium_type);
878 		mode = sc->port.link_mode;
879 		ifmr->ifm_active |= sfxge_link_mode[medium_type][mode];
880 		ifmr->ifm_active |= sfxge_port_link_fc_ifm(sc);
881 	}
882 
883 	SFXGE_ADAPTER_UNLOCK(sc);
884 }
885 
886 static efx_phy_cap_type_t
887 sfxge_link_mode_to_phy_cap(efx_link_mode_t mode)
888 {
889 	switch (mode) {
890 	case EFX_LINK_10HDX:
891 		return (EFX_PHY_CAP_10HDX);
892 	case EFX_LINK_10FDX:
893 		return (EFX_PHY_CAP_10FDX);
894 	case EFX_LINK_100HDX:
895 		return (EFX_PHY_CAP_100HDX);
896 	case EFX_LINK_100FDX:
897 		return (EFX_PHY_CAP_100FDX);
898 	case EFX_LINK_1000HDX:
899 		return (EFX_PHY_CAP_1000HDX);
900 	case EFX_LINK_1000FDX:
901 		return (EFX_PHY_CAP_1000FDX);
902 	case EFX_LINK_10000FDX:
903 		return (EFX_PHY_CAP_10000FDX);
904 	case EFX_LINK_25000FDX:
905 		return (EFX_PHY_CAP_25000FDX);
906 	case EFX_LINK_40000FDX:
907 		return (EFX_PHY_CAP_40000FDX);
908 	case EFX_LINK_50000FDX:
909 		return (EFX_PHY_CAP_50000FDX);
910 	case EFX_LINK_100000FDX:
911 		return (EFX_PHY_CAP_100000FDX);
912 	default:
913 		return (EFX_PHY_CAP_INVALID);
914 	}
915 }
916 
917 static int
918 sfxge_phy_cap_mask(struct sfxge_softc *sc, int ifmedia, uint32_t *phy_cap_mask)
919 {
920 	/* Get global options (duplex), type and subtype bits */
921 	int ifmedia_masked = ifmedia & (IFM_GMASK | IFM_NMASK | IFM_TMASK);
922 	efx_phy_media_type_t medium_type;
923 	boolean_t mode_found = B_FALSE;
924 	uint32_t cap_mask, mode_cap_mask;
925 	efx_link_mode_t mode;
926 	efx_phy_cap_type_t phy_cap;
927 
928 	efx_phy_media_type_get(sc->enp, &medium_type);
929 	if (medium_type >= nitems(sfxge_link_mode)) {
930 		if_printf(sc->ifnet, "unexpected media type %d\n", medium_type);
931 		return (EINVAL);
932 	}
933 
934 	efx_phy_adv_cap_get(sc->enp, EFX_PHY_CAP_PERM, &cap_mask);
935 
936 	for (mode = EFX_LINK_10HDX; mode < EFX_LINK_NMODES; mode++) {
937 		if (ifmedia_masked == sfxge_link_mode[medium_type][mode]) {
938 			mode_found = B_TRUE;
939 			break;
940 		}
941 	}
942 
943 	if (!mode_found) {
944 		/*
945 		 * If media is not in the table, it must be IFM_AUTO.
946 		 */
947 		KASSERT((cap_mask & (1 << EFX_PHY_CAP_AN)) &&
948 		    ifmedia_masked == (IFM_ETHER | IFM_AUTO),
949 		    ("%s: no mode for media %#x", __func__, ifmedia));
950 		*phy_cap_mask = (cap_mask & ~(1 << EFX_PHY_CAP_ASYM));
951 		return (0);
952 	}
953 
954 	phy_cap = sfxge_link_mode_to_phy_cap(mode);
955 	if (phy_cap == EFX_PHY_CAP_INVALID) {
956 		if_printf(sc->ifnet,
957 			  "cannot map link mode %d to phy capability\n",
958 			  mode);
959 		return (EINVAL);
960 	}
961 
962 	mode_cap_mask = (1 << phy_cap);
963 	mode_cap_mask |= cap_mask & (1 << EFX_PHY_CAP_AN);
964 #ifdef SFXGE_HAVE_PAUSE_MEDIAOPTS
965 	if (ifmedia & IFM_ETH_RXPAUSE)
966 		mode_cap_mask |= cap_mask & (1 << EFX_PHY_CAP_PAUSE);
967 	if (!(ifmedia & IFM_ETH_TXPAUSE))
968 		mode_cap_mask |= cap_mask & (1 << EFX_PHY_CAP_ASYM);
969 #else
970 	mode_cap_mask |= cap_mask & (1 << EFX_PHY_CAP_PAUSE);
971 #endif
972 
973 	*phy_cap_mask = mode_cap_mask;
974 	return (0);
975 }
976 
977 static int
978 sfxge_media_change(if_t ifp)
979 {
980 	struct sfxge_softc *sc;
981 	struct ifmedia_entry *ifm;
982 	int rc;
983 	uint32_t phy_cap_mask;
984 
985 	sc = if_getsoftc(ifp);
986 	ifm = sc->media.ifm_cur;
987 
988 	SFXGE_ADAPTER_LOCK(sc);
989 
990 	if (!SFXGE_RUNNING(sc)) {
991 		rc = 0;
992 		goto out;
993 	}
994 
995 	rc = efx_mac_fcntl_set(sc->enp, sfxge_port_wanted_fc(sc), B_TRUE);
996 	if (rc != 0)
997 		goto out;
998 
999 	if ((rc = sfxge_phy_cap_mask(sc, ifm->ifm_media, &phy_cap_mask)) != 0)
1000 		goto out;
1001 
1002 	rc = efx_phy_adv_cap_set(sc->enp, phy_cap_mask);
1003 out:
1004 	SFXGE_ADAPTER_UNLOCK(sc);
1005 
1006 	return (rc);
1007 }
1008 
1009 int sfxge_port_ifmedia_init(struct sfxge_softc *sc)
1010 {
1011 	efx_phy_media_type_t medium_type;
1012 	uint32_t cap_mask, mode_cap_mask;
1013 	efx_link_mode_t mode;
1014 	efx_phy_cap_type_t phy_cap;
1015 	int mode_ifm, best_mode_ifm = 0;
1016 	int rc;
1017 
1018 	/*
1019 	 * We need port state to initialise the ifmedia list.
1020 	 * It requires initialized NIC what is already done in
1021 	 * sfxge_create() when resources are estimated.
1022 	 */
1023 	if ((rc = efx_filter_init(sc->enp)) != 0)
1024 		goto out1;
1025 	if ((rc = efx_port_init(sc->enp)) != 0)
1026 		goto out2;
1027 
1028 	/*
1029 	 * Register ifconfig callbacks for querying and setting the
1030 	 * link mode and link status.
1031 	 */
1032 	ifmedia_init(&sc->media, IFM_IMASK, sfxge_media_change,
1033 	    sfxge_media_status);
1034 
1035 	/*
1036 	 * Map firmware medium type and capabilities to ifmedia types.
1037 	 * ifmedia does not distinguish between forcing the link mode
1038 	 * and disabling auto-negotiation.  1000BASE-T and 10GBASE-T
1039 	 * require AN even if only one link mode is enabled, and for
1040 	 * 100BASE-TX it is useful even if the link mode is forced.
1041 	 * Therefore we never disable auto-negotiation.
1042 	 *
1043 	 * Also enable and advertise flow control by default.
1044 	 */
1045 
1046 	efx_phy_media_type_get(sc->enp, &medium_type);
1047 	efx_phy_adv_cap_get(sc->enp, EFX_PHY_CAP_PERM, &cap_mask);
1048 
1049 	for (mode = EFX_LINK_10HDX; mode < EFX_LINK_NMODES; mode++) {
1050 		phy_cap = sfxge_link_mode_to_phy_cap(mode);
1051 		if (phy_cap == EFX_PHY_CAP_INVALID)
1052 			continue;
1053 
1054 		mode_cap_mask = (1 << phy_cap);
1055 		mode_ifm = sfxge_link_mode[medium_type][mode];
1056 
1057 		if ((cap_mask & mode_cap_mask) && mode_ifm) {
1058 			/* No flow-control */
1059 			ifmedia_add(&sc->media, mode_ifm, 0, NULL);
1060 
1061 #ifdef SFXGE_HAVE_PAUSE_MEDIAOPTS
1062 			/* Respond-only.  If using AN, we implicitly
1063 			 * offer symmetric as well, but that doesn't
1064 			 * mean we *have* to generate pause frames.
1065 			 */
1066 			mode_ifm |= IFM_ETH_RXPAUSE;
1067 			ifmedia_add(&sc->media, mode_ifm, 0, NULL);
1068 
1069 			/* Symmetric */
1070 			mode_ifm |= IFM_ETH_TXPAUSE;
1071 			ifmedia_add(&sc->media, mode_ifm, 0, NULL);
1072 #endif
1073 
1074 			/* Link modes are numbered in order of speed,
1075 			 * so assume the last one available is the best.
1076 			 */
1077 			best_mode_ifm = mode_ifm;
1078 		}
1079 	}
1080 
1081 	if (cap_mask & (1 << EFX_PHY_CAP_AN)) {
1082 		/* Add autoselect mode. */
1083 		mode_ifm = IFM_ETHER | IFM_AUTO;
1084 		ifmedia_add(&sc->media, mode_ifm, 0, NULL);
1085 		best_mode_ifm = mode_ifm;
1086 	}
1087 
1088 	if (best_mode_ifm != 0)
1089 		ifmedia_set(&sc->media, best_mode_ifm);
1090 
1091 	/* Now discard port state until interface is started. */
1092 	efx_port_fini(sc->enp);
1093 out2:
1094 	efx_filter_fini(sc->enp);
1095 out1:
1096 	return (rc);
1097 }
1098