xref: /freebsd/sys/dev/sfxge/sfxge_port.c (revision 5ab1c5846ff41be24b1f6beb0317bf8258cd4409)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2010-2016 Solarflare Communications Inc.
5  * All rights reserved.
6  *
7  * This software was developed in part by Philip Paeps under contract for
8  * Solarflare Communications, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright notice,
14  *    this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright notice,
16  *    this list of conditions and the following disclaimer in the documentation
17  *    and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * The views and conclusions contained in the software and documentation are
32  * those of the authors and should not be interpreted as representing official
33  * policies, either expressed or implied, of the FreeBSD Project.
34  */
35 
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
38 
39 #include <sys/types.h>
40 #include <sys/limits.h>
41 #include <net/ethernet.h>
42 #include <net/if_dl.h>
43 
44 #include "common/efx.h"
45 
46 #include "sfxge.h"
47 
48 #define	SFXGE_PARAM_STATS_UPDATE_PERIOD_MS \
49 	SFXGE_PARAM(stats_update_period_ms)
50 static int sfxge_stats_update_period_ms = SFXGE_STATS_UPDATE_PERIOD_MS;
51 TUNABLE_INT(SFXGE_PARAM_STATS_UPDATE_PERIOD_MS,
52 	    &sfxge_stats_update_period_ms);
53 SYSCTL_INT(_hw_sfxge, OID_AUTO, stats_update_period_ms, CTLFLAG_RDTUN,
54 	   &sfxge_stats_update_period_ms, 0,
55 	   "netstat interface statistics update period in milliseconds");
56 
57 static int sfxge_phy_cap_mask(struct sfxge_softc *, int, uint32_t *);
58 
59 static int
60 sfxge_mac_stat_update(struct sfxge_softc *sc)
61 {
62 	struct sfxge_port *port = &sc->port;
63 	efsys_mem_t *esmp = &(port->mac_stats.dma_buf);
64 	clock_t now;
65 	unsigned int min_ticks;
66 	unsigned int count;
67 	int rc;
68 
69 	SFXGE_PORT_LOCK_ASSERT_OWNED(port);
70 
71 	if (__predict_false(port->init_state != SFXGE_PORT_STARTED)) {
72 		rc = 0;
73 		goto out;
74 	}
75 
76 	min_ticks = (unsigned int)hz * port->stats_update_period_ms / 1000;
77 
78 	now = ticks;
79 	if ((unsigned int)(now - port->mac_stats.update_time) < min_ticks) {
80 		rc = 0;
81 		goto out;
82 	}
83 
84 	port->mac_stats.update_time = now;
85 
86 	/* If we're unlucky enough to read statistics wduring the DMA, wait
87 	 * up to 10ms for it to finish (typically takes <500us) */
88 	for (count = 0; count < 100; ++count) {
89 		EFSYS_PROBE1(wait, unsigned int, count);
90 
91 		/* Try to update the cached counters */
92 		if ((rc = efx_mac_stats_update(sc->enp, esmp,
93 		    port->mac_stats.decode_buf, NULL)) != EAGAIN)
94 			goto out;
95 
96 		DELAY(100);
97 	}
98 
99 	rc = ETIMEDOUT;
100 out:
101 	return (rc);
102 }
103 
104 uint64_t
105 sfxge_get_counter(struct ifnet *ifp, ift_counter c)
106 {
107 	struct sfxge_softc *sc = ifp->if_softc;
108 	uint64_t *mac_stats;
109 	uint64_t val;
110 
111 	SFXGE_PORT_LOCK(&sc->port);
112 
113 	/* Ignore error and use old values */
114 	(void)sfxge_mac_stat_update(sc);
115 
116 	mac_stats = (uint64_t *)sc->port.mac_stats.decode_buf;
117 
118 	switch (c) {
119 	case IFCOUNTER_IPACKETS:
120 		val = mac_stats[EFX_MAC_RX_PKTS];
121 		break;
122 	case IFCOUNTER_IERRORS:
123 		val = mac_stats[EFX_MAC_RX_ERRORS];
124 		break;
125 	case IFCOUNTER_OPACKETS:
126 		val = mac_stats[EFX_MAC_TX_PKTS];
127 		break;
128 	case IFCOUNTER_OERRORS:
129 		val = mac_stats[EFX_MAC_TX_ERRORS];
130 		break;
131 	case IFCOUNTER_COLLISIONS:
132 		val = mac_stats[EFX_MAC_TX_SGL_COL_PKTS] +
133 		      mac_stats[EFX_MAC_TX_MULT_COL_PKTS] +
134 		      mac_stats[EFX_MAC_TX_EX_COL_PKTS] +
135 		      mac_stats[EFX_MAC_TX_LATE_COL_PKTS];
136 		break;
137 	case IFCOUNTER_IBYTES:
138 		val = mac_stats[EFX_MAC_RX_OCTETS];
139 		break;
140 	case IFCOUNTER_OBYTES:
141 		val = mac_stats[EFX_MAC_TX_OCTETS];
142 		break;
143 	case IFCOUNTER_OMCASTS:
144 		val = mac_stats[EFX_MAC_TX_MULTICST_PKTS] +
145 		      mac_stats[EFX_MAC_TX_BRDCST_PKTS];
146 		break;
147 	case IFCOUNTER_OQDROPS:
148 		SFXGE_PORT_UNLOCK(&sc->port);
149 		return (sfxge_tx_get_drops(sc));
150 	case IFCOUNTER_IMCASTS:
151 		/* if_imcasts is maintained in net/if_ethersubr.c */
152 	case IFCOUNTER_IQDROPS:
153 		/* if_iqdrops is maintained in net/if_ethersubr.c */
154 	case IFCOUNTER_NOPROTO:
155 		/* if_noproto is maintained in net/if_ethersubr.c */
156 	default:
157 		SFXGE_PORT_UNLOCK(&sc->port);
158 		return (if_get_counter_default(ifp, c));
159 	}
160 
161 	SFXGE_PORT_UNLOCK(&sc->port);
162 
163 	return (val);
164 }
165 
166 static int
167 sfxge_mac_stat_handler(SYSCTL_HANDLER_ARGS)
168 {
169 	struct sfxge_softc *sc = arg1;
170 	unsigned int id = arg2;
171 	int rc;
172 	uint64_t val;
173 
174 	SFXGE_PORT_LOCK(&sc->port);
175 	if ((rc = sfxge_mac_stat_update(sc)) == 0)
176 		val = ((uint64_t *)sc->port.mac_stats.decode_buf)[id];
177 	SFXGE_PORT_UNLOCK(&sc->port);
178 
179 	if (rc == 0)
180 		rc = SYSCTL_OUT(req, &val, sizeof(val));
181 	return (rc);
182 }
183 
184 static void
185 sfxge_mac_stat_init(struct sfxge_softc *sc)
186 {
187 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
188 	struct sysctl_oid_list *stat_list;
189 	unsigned int id;
190 	const char *name;
191 
192 	stat_list = SYSCTL_CHILDREN(sc->stats_node);
193 
194 	/* Initialise the named stats */
195 	for (id = 0; id < EFX_MAC_NSTATS; id++) {
196 		name = efx_mac_stat_name(sc->enp, id);
197 		SYSCTL_ADD_PROC(
198 			ctx, stat_list,
199 			OID_AUTO, name, CTLTYPE_U64|CTLFLAG_RD,
200 			sc, id, sfxge_mac_stat_handler, "Q",
201 			"");
202 	}
203 }
204 
205 #ifdef SFXGE_HAVE_PAUSE_MEDIAOPTS
206 
207 static unsigned int
208 sfxge_port_wanted_fc(struct sfxge_softc *sc)
209 {
210 	struct ifmedia_entry *ifm = sc->media.ifm_cur;
211 
212 	if (ifm->ifm_media == (IFM_ETHER | IFM_AUTO))
213 		return (EFX_FCNTL_RESPOND | EFX_FCNTL_GENERATE);
214 	return (((ifm->ifm_media & IFM_ETH_RXPAUSE) ? EFX_FCNTL_RESPOND : 0) |
215 		((ifm->ifm_media & IFM_ETH_TXPAUSE) ? EFX_FCNTL_GENERATE : 0));
216 }
217 
218 static unsigned int
219 sfxge_port_link_fc_ifm(struct sfxge_softc *sc)
220 {
221 	unsigned int wanted_fc, link_fc;
222 
223 	efx_mac_fcntl_get(sc->enp, &wanted_fc, &link_fc);
224 	return ((link_fc & EFX_FCNTL_RESPOND) ? IFM_ETH_RXPAUSE : 0) |
225 		((link_fc & EFX_FCNTL_GENERATE) ? IFM_ETH_TXPAUSE : 0);
226 }
227 
228 #else /* !SFXGE_HAVE_PAUSE_MEDIAOPTS */
229 
230 static unsigned int
231 sfxge_port_wanted_fc(struct sfxge_softc *sc)
232 {
233 	return (sc->port.wanted_fc);
234 }
235 
236 static unsigned int
237 sfxge_port_link_fc_ifm(struct sfxge_softc *sc)
238 {
239 	return (0);
240 }
241 
242 static int
243 sfxge_port_wanted_fc_handler(SYSCTL_HANDLER_ARGS)
244 {
245 	struct sfxge_softc *sc;
246 	struct sfxge_port *port;
247 	unsigned int fcntl;
248 	int error;
249 
250 	sc = arg1;
251 	port = &sc->port;
252 
253 	if (req->newptr != NULL) {
254 		if ((error = SYSCTL_IN(req, &fcntl, sizeof(fcntl))) != 0)
255 			return (error);
256 
257 		SFXGE_PORT_LOCK(port);
258 
259 		if (port->wanted_fc != fcntl) {
260 			if (port->init_state == SFXGE_PORT_STARTED)
261 				error = efx_mac_fcntl_set(sc->enp,
262 							  port->wanted_fc,
263 							  B_TRUE);
264 			if (error == 0)
265 				port->wanted_fc = fcntl;
266 		}
267 
268 		SFXGE_PORT_UNLOCK(port);
269 	} else {
270 		SFXGE_PORT_LOCK(port);
271 		fcntl = port->wanted_fc;
272 		SFXGE_PORT_UNLOCK(port);
273 
274 		error = SYSCTL_OUT(req, &fcntl, sizeof(fcntl));
275 	}
276 
277 	return (error);
278 }
279 
280 static int
281 sfxge_port_link_fc_handler(SYSCTL_HANDLER_ARGS)
282 {
283 	struct sfxge_softc *sc;
284 	struct sfxge_port *port;
285 	unsigned int wanted_fc, link_fc;
286 
287 	sc = arg1;
288 	port = &sc->port;
289 
290 	SFXGE_PORT_LOCK(port);
291 	if (__predict_true(port->init_state == SFXGE_PORT_STARTED) &&
292 	    SFXGE_LINK_UP(sc))
293 		efx_mac_fcntl_get(sc->enp, &wanted_fc, &link_fc);
294 	else
295 		link_fc = 0;
296 	SFXGE_PORT_UNLOCK(port);
297 
298 	return (SYSCTL_OUT(req, &link_fc, sizeof(link_fc)));
299 }
300 
301 #endif /* SFXGE_HAVE_PAUSE_MEDIAOPTS */
302 
303 static const uint64_t sfxge_link_baudrate[EFX_LINK_NMODES] = {
304 	[EFX_LINK_10HDX]	= IF_Mbps(10),
305 	[EFX_LINK_10FDX]	= IF_Mbps(10),
306 	[EFX_LINK_100HDX]	= IF_Mbps(100),
307 	[EFX_LINK_100FDX]	= IF_Mbps(100),
308 	[EFX_LINK_1000HDX]	= IF_Gbps(1),
309 	[EFX_LINK_1000FDX]	= IF_Gbps(1),
310 	[EFX_LINK_10000FDX]	= IF_Gbps(10),
311 	[EFX_LINK_25000FDX]	= IF_Gbps(25),
312 	[EFX_LINK_40000FDX]	= IF_Gbps(40),
313 	[EFX_LINK_50000FDX]	= IF_Gbps(50),
314 	[EFX_LINK_100000FDX]	= IF_Gbps(100),
315 };
316 
317 void
318 sfxge_mac_link_update(struct sfxge_softc *sc, efx_link_mode_t mode)
319 {
320 	struct sfxge_port *port;
321 	int link_state;
322 
323 	port = &sc->port;
324 
325 	if (port->link_mode == mode)
326 		return;
327 
328 	port->link_mode = mode;
329 
330 	/* Push link state update to the OS */
331 	link_state = (SFXGE_LINK_UP(sc) ? LINK_STATE_UP : LINK_STATE_DOWN);
332 	sc->ifnet->if_baudrate = sfxge_link_baudrate[port->link_mode];
333 	if_link_state_change(sc->ifnet, link_state);
334 }
335 
336 static void
337 sfxge_mac_poll_work(void *arg, int npending)
338 {
339 	struct sfxge_softc *sc;
340 	efx_nic_t *enp;
341 	struct sfxge_port *port;
342 	efx_link_mode_t mode;
343 
344 	sc = (struct sfxge_softc *)arg;
345 	enp = sc->enp;
346 	port = &sc->port;
347 
348 	SFXGE_PORT_LOCK(port);
349 
350 	if (__predict_false(port->init_state != SFXGE_PORT_STARTED))
351 		goto done;
352 
353 	/* This may sleep waiting for MCDI completion */
354 	(void)efx_port_poll(enp, &mode);
355 	sfxge_mac_link_update(sc, mode);
356 
357 done:
358 	SFXGE_PORT_UNLOCK(port);
359 }
360 
361 static u_int
362 sfxge_copy_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt)
363 {
364 	uint8_t *mcast_addr = arg;
365 
366 	if (cnt == EFX_MAC_MULTICAST_LIST_MAX)
367 		return (0);
368 
369 	memcpy(mcast_addr + (cnt * EFX_MAC_ADDR_LEN), LLADDR(sdl),
370 	    EFX_MAC_ADDR_LEN);
371 
372 	return (1);
373 }
374 
375 static int
376 sfxge_mac_multicast_list_set(struct sfxge_softc *sc)
377 {
378 	struct ifnet *ifp = sc->ifnet;
379 	struct sfxge_port *port = &sc->port;
380 	int rc = 0;
381 
382 	mtx_assert(&port->lock, MA_OWNED);
383 
384 	port->mcast_count = if_foreach_llmaddr(ifp, sfxge_copy_maddr,
385 	    port->mcast_addrs);
386 	if (port->mcast_count == EFX_MAC_MULTICAST_LIST_MAX) {
387 		device_printf(sc->dev, "Too many multicast addresses\n");
388 		rc = EINVAL;
389 	}
390 
391 	if (rc == 0) {
392 		rc = efx_mac_multicast_list_set(sc->enp, port->mcast_addrs,
393 						port->mcast_count);
394 		if (rc != 0)
395 			device_printf(sc->dev,
396 			    "Cannot set multicast address list\n");
397 	}
398 
399 	return (rc);
400 }
401 
402 static int
403 sfxge_mac_filter_set_locked(struct sfxge_softc *sc)
404 {
405 	struct ifnet *ifp = sc->ifnet;
406 	struct sfxge_port *port = &sc->port;
407 	boolean_t all_mulcst;
408 	int rc;
409 
410 	mtx_assert(&port->lock, MA_OWNED);
411 
412 	all_mulcst = !!(ifp->if_flags & (IFF_PROMISC | IFF_ALLMULTI));
413 
414 	rc = sfxge_mac_multicast_list_set(sc);
415 	/* Fallback to all multicast if cannot set multicast list */
416 	if (rc != 0)
417 		all_mulcst = B_TRUE;
418 
419 	rc = efx_mac_filter_set(sc->enp, !!(ifp->if_flags & IFF_PROMISC),
420 				(port->mcast_count > 0), all_mulcst, B_TRUE);
421 
422 	return (rc);
423 }
424 
425 int
426 sfxge_mac_filter_set(struct sfxge_softc *sc)
427 {
428 	struct sfxge_port *port = &sc->port;
429 	int rc;
430 
431 	SFXGE_PORT_LOCK(port);
432 	/*
433 	 * The function may be called without softc_lock held in the
434 	 * case of SIOCADDMULTI and SIOCDELMULTI ioctls. ioctl handler
435 	 * checks IFF_DRV_RUNNING flag which implies port started, but
436 	 * it is not guaranteed to remain. softc_lock shared lock can't
437 	 * be held in the case of these ioctls processing, since it
438 	 * results in failure where kernel complains that non-sleepable
439 	 * lock is held in sleeping thread. Both problems are repeatable
440 	 * on LAG with LACP proto bring up.
441 	 */
442 	if (__predict_true(port->init_state == SFXGE_PORT_STARTED))
443 		rc = sfxge_mac_filter_set_locked(sc);
444 	else
445 		rc = 0;
446 	SFXGE_PORT_UNLOCK(port);
447 	return (rc);
448 }
449 
450 void
451 sfxge_port_stop(struct sfxge_softc *sc)
452 {
453 	struct sfxge_port *port;
454 	efx_nic_t *enp;
455 
456 	port = &sc->port;
457 	enp = sc->enp;
458 
459 	SFXGE_PORT_LOCK(port);
460 
461 	KASSERT(port->init_state == SFXGE_PORT_STARTED,
462 	    ("port not started"));
463 
464 	port->init_state = SFXGE_PORT_INITIALIZED;
465 
466 	port->mac_stats.update_time = 0;
467 
468 	/* This may call MCDI */
469 	(void)efx_mac_drain(enp, B_TRUE);
470 
471 	(void)efx_mac_stats_periodic(enp, &port->mac_stats.dma_buf, 0, B_FALSE);
472 
473 	port->link_mode = EFX_LINK_UNKNOWN;
474 
475 	/* Destroy the common code port object. */
476 	efx_port_fini(enp);
477 
478 	efx_filter_fini(enp);
479 
480 	SFXGE_PORT_UNLOCK(port);
481 }
482 
483 int
484 sfxge_port_start(struct sfxge_softc *sc)
485 {
486 	uint8_t mac_addr[ETHER_ADDR_LEN];
487 	struct epoch_tracker et;
488 	struct ifnet *ifp = sc->ifnet;
489 	struct sfxge_port *port;
490 	efx_nic_t *enp;
491 	size_t pdu;
492 	int rc;
493 	uint32_t phy_cap_mask;
494 
495 	port = &sc->port;
496 	enp = sc->enp;
497 
498 	SFXGE_PORT_LOCK(port);
499 
500 	KASSERT(port->init_state == SFXGE_PORT_INITIALIZED,
501 	    ("port not initialized"));
502 
503 	/* Initialise the required filtering */
504 	if ((rc = efx_filter_init(enp)) != 0)
505 		goto fail_filter_init;
506 
507 	/* Initialize the port object in the common code. */
508 	if ((rc = efx_port_init(sc->enp)) != 0)
509 		goto fail;
510 
511 	/* Set the SDU */
512 	pdu = EFX_MAC_PDU(ifp->if_mtu);
513 	if ((rc = efx_mac_pdu_set(enp, pdu)) != 0)
514 		goto fail2;
515 
516 	if ((rc = efx_mac_fcntl_set(enp, sfxge_port_wanted_fc(sc), B_TRUE))
517 	    != 0)
518 		goto fail3;
519 
520 	/* Set the unicast address */
521 	NET_EPOCH_ENTER(et);
522 	bcopy(LLADDR((struct sockaddr_dl *)ifp->if_addr->ifa_addr),
523 	      mac_addr, sizeof(mac_addr));
524 	NET_EPOCH_EXIT(et);
525 	if ((rc = efx_mac_addr_set(enp, mac_addr)) != 0)
526 		goto fail4;
527 
528 	sfxge_mac_filter_set_locked(sc);
529 
530 	/* Update MAC stats by DMA every period */
531 	if ((rc = efx_mac_stats_periodic(enp, &port->mac_stats.dma_buf,
532 					 port->stats_update_period_ms,
533 					 B_FALSE)) != 0)
534 		goto fail6;
535 
536 	if ((rc = efx_mac_drain(enp, B_FALSE)) != 0)
537 		goto fail8;
538 
539 	if ((rc = sfxge_phy_cap_mask(sc, sc->media.ifm_cur->ifm_media,
540 				     &phy_cap_mask)) != 0)
541 		goto fail9;
542 
543 	if ((rc = efx_phy_adv_cap_set(sc->enp, phy_cap_mask)) != 0)
544 		goto fail10;
545 
546 	port->init_state = SFXGE_PORT_STARTED;
547 
548 	/* Single poll in case there were missing initial events */
549 	SFXGE_PORT_UNLOCK(port);
550 	sfxge_mac_poll_work(sc, 0);
551 
552 	return (0);
553 
554 fail10:
555 fail9:
556 	(void)efx_mac_drain(enp, B_TRUE);
557 fail8:
558 	(void)efx_mac_stats_periodic(enp, &port->mac_stats.dma_buf, 0, B_FALSE);
559 fail6:
560 fail4:
561 fail3:
562 
563 fail2:
564 	efx_port_fini(enp);
565 fail:
566 	efx_filter_fini(enp);
567 fail_filter_init:
568 	SFXGE_PORT_UNLOCK(port);
569 
570 	return (rc);
571 }
572 
573 static int
574 sfxge_phy_stat_update(struct sfxge_softc *sc)
575 {
576 	struct sfxge_port *port = &sc->port;
577 	efsys_mem_t *esmp = &port->phy_stats.dma_buf;
578 	clock_t now;
579 	unsigned int count;
580 	int rc;
581 
582 	SFXGE_PORT_LOCK_ASSERT_OWNED(port);
583 
584 	if (__predict_false(port->init_state != SFXGE_PORT_STARTED)) {
585 		rc = 0;
586 		goto out;
587 	}
588 
589 	now = ticks;
590 	if ((unsigned int)(now - port->phy_stats.update_time) < (unsigned int)hz) {
591 		rc = 0;
592 		goto out;
593 	}
594 
595 	port->phy_stats.update_time = now;
596 
597 	/* If we're unlucky enough to read statistics wduring the DMA, wait
598 	 * up to 10ms for it to finish (typically takes <500us) */
599 	for (count = 0; count < 100; ++count) {
600 		EFSYS_PROBE1(wait, unsigned int, count);
601 
602 		/* Synchronize the DMA memory for reading */
603 		bus_dmamap_sync(esmp->esm_tag, esmp->esm_map,
604 		    BUS_DMASYNC_POSTREAD);
605 
606 		/* Try to update the cached counters */
607 		if ((rc = efx_phy_stats_update(sc->enp, esmp,
608 		    port->phy_stats.decode_buf)) != EAGAIN)
609 			goto out;
610 
611 		DELAY(100);
612 	}
613 
614 	rc = ETIMEDOUT;
615 out:
616 	return (rc);
617 }
618 
619 static int
620 sfxge_phy_stat_handler(SYSCTL_HANDLER_ARGS)
621 {
622 	struct sfxge_softc *sc = arg1;
623 	unsigned int id = arg2;
624 	int rc;
625 	uint32_t val;
626 
627 	SFXGE_PORT_LOCK(&sc->port);
628 	if ((rc = sfxge_phy_stat_update(sc)) == 0)
629 		val = ((uint32_t *)sc->port.phy_stats.decode_buf)[id];
630 	SFXGE_PORT_UNLOCK(&sc->port);
631 
632 	if (rc == 0)
633 		rc = SYSCTL_OUT(req, &val, sizeof(val));
634 	return (rc);
635 }
636 
637 static void
638 sfxge_phy_stat_init(struct sfxge_softc *sc)
639 {
640 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
641 	struct sysctl_oid_list *stat_list;
642 	unsigned int id;
643 	const char *name;
644 	uint64_t stat_mask = efx_nic_cfg_get(sc->enp)->enc_phy_stat_mask;
645 
646 	stat_list = SYSCTL_CHILDREN(sc->stats_node);
647 
648 	/* Initialise the named stats */
649 	for (id = 0; id < EFX_PHY_NSTATS; id++) {
650 		if (!(stat_mask & ((uint64_t)1 << id)))
651 			continue;
652 		name = efx_phy_stat_name(sc->enp, id);
653 		SYSCTL_ADD_PROC(
654 			ctx, stat_list,
655 			OID_AUTO, name, CTLTYPE_UINT|CTLFLAG_RD,
656 			sc, id, sfxge_phy_stat_handler,
657 			id == EFX_PHY_STAT_OUI ? "IX" : "IU",
658 			"");
659 	}
660 }
661 
662 void
663 sfxge_port_fini(struct sfxge_softc *sc)
664 {
665 	struct sfxge_port *port;
666 	efsys_mem_t *esmp;
667 
668 	port = &sc->port;
669 	esmp = &port->mac_stats.dma_buf;
670 
671 	KASSERT(port->init_state == SFXGE_PORT_INITIALIZED,
672 	    ("Port not initialized"));
673 
674 	port->init_state = SFXGE_PORT_UNINITIALIZED;
675 
676 	port->link_mode = EFX_LINK_UNKNOWN;
677 
678 	/* Finish with PHY DMA memory */
679 	sfxge_dma_free(&port->phy_stats.dma_buf);
680 	free(port->phy_stats.decode_buf, M_SFXGE);
681 
682 	sfxge_dma_free(esmp);
683 	free(port->mac_stats.decode_buf, M_SFXGE);
684 
685 	SFXGE_PORT_LOCK_DESTROY(port);
686 
687 	port->sc = NULL;
688 }
689 
690 static uint16_t
691 sfxge_port_stats_update_period_ms(struct sfxge_softc *sc)
692 {
693 	int period_ms = sfxge_stats_update_period_ms;
694 
695 	if (period_ms < 0) {
696 		device_printf(sc->dev,
697 			"treat negative stats update period %d as 0 (disable)\n",
698 			 period_ms);
699 		period_ms = 0;
700 	} else if (period_ms > UINT16_MAX) {
701 		device_printf(sc->dev,
702 			"treat too big stats update period %d as %u\n",
703 			period_ms, UINT16_MAX);
704 		period_ms = UINT16_MAX;
705 	}
706 
707 	return period_ms;
708 }
709 
710 static int
711 sfxge_port_stats_update_period_ms_handler(SYSCTL_HANDLER_ARGS)
712 {
713 	struct sfxge_softc *sc;
714 	struct sfxge_port *port;
715 	unsigned int period_ms;
716 	int error;
717 
718 	sc = arg1;
719 	port = &sc->port;
720 
721 	if (req->newptr != NULL) {
722 		error = SYSCTL_IN(req, &period_ms, sizeof(period_ms));
723 		if (error != 0)
724 			return (error);
725 
726 		if (period_ms > UINT16_MAX)
727 			return (EINVAL);
728 
729 		SFXGE_PORT_LOCK(port);
730 
731 		if (port->stats_update_period_ms != period_ms) {
732 			if (port->init_state == SFXGE_PORT_STARTED)
733 				error = efx_mac_stats_periodic(sc->enp,
734 						&port->mac_stats.dma_buf,
735 						period_ms, B_FALSE);
736 			if (error == 0)
737 				port->stats_update_period_ms = period_ms;
738 		}
739 
740 		SFXGE_PORT_UNLOCK(port);
741 	} else {
742 		SFXGE_PORT_LOCK(port);
743 		period_ms = port->stats_update_period_ms;
744 		SFXGE_PORT_UNLOCK(port);
745 
746 		error = SYSCTL_OUT(req, &period_ms, sizeof(period_ms));
747 	}
748 
749 	return (error);
750 }
751 
752 int
753 sfxge_port_init(struct sfxge_softc *sc)
754 {
755 	struct sfxge_port *port;
756 	struct sysctl_ctx_list *sysctl_ctx;
757 	struct sysctl_oid *sysctl_tree;
758 	efsys_mem_t *mac_stats_buf, *phy_stats_buf;
759 	uint32_t mac_nstats;
760 	size_t mac_stats_size;
761 	int rc;
762 
763 	port = &sc->port;
764 	mac_stats_buf = &port->mac_stats.dma_buf;
765 	phy_stats_buf = &port->phy_stats.dma_buf;
766 
767 	KASSERT(port->init_state == SFXGE_PORT_UNINITIALIZED,
768 	    ("Port already initialized"));
769 
770 	port->sc = sc;
771 
772 	SFXGE_PORT_LOCK_INIT(port, device_get_nameunit(sc->dev));
773 
774 	DBGPRINT(sc->dev, "alloc PHY stats");
775 	port->phy_stats.decode_buf = malloc(EFX_PHY_NSTATS * sizeof(uint32_t),
776 					    M_SFXGE, M_WAITOK | M_ZERO);
777 	if ((rc = sfxge_dma_alloc(sc, EFX_PHY_STATS_SIZE, phy_stats_buf)) != 0)
778 		goto fail;
779 	sfxge_phy_stat_init(sc);
780 
781 	DBGPRINT(sc->dev, "init sysctl");
782 	sysctl_ctx = device_get_sysctl_ctx(sc->dev);
783 	sysctl_tree = device_get_sysctl_tree(sc->dev);
784 
785 #ifndef SFXGE_HAVE_PAUSE_MEDIAOPTS
786 	/* If flow control cannot be configured or reported through
787 	 * ifmedia, provide sysctls for it. */
788 	port->wanted_fc = EFX_FCNTL_RESPOND | EFX_FCNTL_GENERATE;
789 	SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
790 	    "wanted_fc", CTLTYPE_UINT|CTLFLAG_RW, sc, 0,
791 	    sfxge_port_wanted_fc_handler, "IU", "wanted flow control mode");
792 	SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
793 	    "link_fc", CTLTYPE_UINT|CTLFLAG_RD, sc, 0,
794 	    sfxge_port_link_fc_handler, "IU", "link flow control mode");
795 #endif
796 
797 	DBGPRINT(sc->dev, "alloc MAC stats");
798 	port->mac_stats.decode_buf = malloc(EFX_MAC_NSTATS * sizeof(uint64_t),
799 					    M_SFXGE, M_WAITOK | M_ZERO);
800 	mac_nstats = efx_nic_cfg_get(sc->enp)->enc_mac_stats_nstats;
801 	mac_stats_size = EFX_P2ROUNDUP(size_t, mac_nstats * sizeof(uint64_t),
802 				       EFX_BUF_SIZE);
803 	if ((rc = sfxge_dma_alloc(sc, mac_stats_size, mac_stats_buf)) != 0)
804 		goto fail2;
805 	port->stats_update_period_ms = sfxge_port_stats_update_period_ms(sc);
806 	sfxge_mac_stat_init(sc);
807 
808 	SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
809 	    "stats_update_period_ms", CTLTYPE_UINT|CTLFLAG_RW, sc, 0,
810 	    sfxge_port_stats_update_period_ms_handler, "IU",
811 	    "interface statistics refresh period");
812 
813 	port->init_state = SFXGE_PORT_INITIALIZED;
814 
815 	DBGPRINT(sc->dev, "success");
816 	return (0);
817 
818 fail2:
819 	free(port->mac_stats.decode_buf, M_SFXGE);
820 	sfxge_dma_free(phy_stats_buf);
821 fail:
822 	free(port->phy_stats.decode_buf, M_SFXGE);
823 	SFXGE_PORT_LOCK_DESTROY(port);
824 	port->sc = NULL;
825 	DBGPRINT(sc->dev, "failed %d", rc);
826 	return (rc);
827 }
828 
829 static const int sfxge_link_mode[EFX_PHY_MEDIA_NTYPES][EFX_LINK_NMODES] = {
830 	[EFX_PHY_MEDIA_CX4] = {
831 		[EFX_LINK_10000FDX]	= IFM_ETHER | IFM_FDX | IFM_10G_CX4,
832 	},
833 	[EFX_PHY_MEDIA_KX4] = {
834 		[EFX_LINK_10000FDX]	= IFM_ETHER | IFM_FDX | IFM_10G_KX4,
835 	},
836 	[EFX_PHY_MEDIA_XFP] = {
837 		/* Don't know the module type, but assume SR for now. */
838 		[EFX_LINK_10000FDX]	= IFM_ETHER | IFM_FDX | IFM_10G_SR,
839 	},
840 	[EFX_PHY_MEDIA_QSFP_PLUS] = {
841 		/* Don't know the module type, but assume SR for now. */
842 		[EFX_LINK_10000FDX]	= IFM_ETHER | IFM_FDX | IFM_10G_SR,
843 		[EFX_LINK_25000FDX]	= IFM_ETHER | IFM_FDX | IFM_25G_SR,
844 		[EFX_LINK_40000FDX]	= IFM_ETHER | IFM_FDX | IFM_40G_CR4,
845 		[EFX_LINK_50000FDX]	= IFM_ETHER | IFM_FDX | IFM_50G_SR,
846 		[EFX_LINK_100000FDX]	= IFM_ETHER | IFM_FDX | IFM_100G_SR2,
847 	},
848 	[EFX_PHY_MEDIA_SFP_PLUS] = {
849 		/* Don't know the module type, but assume SX/SR for now. */
850 		[EFX_LINK_1000FDX]	= IFM_ETHER | IFM_FDX | IFM_1000_SX,
851 		[EFX_LINK_10000FDX]	= IFM_ETHER | IFM_FDX | IFM_10G_SR,
852 		[EFX_LINK_25000FDX]	= IFM_ETHER | IFM_FDX | IFM_25G_SR,
853 	},
854 	[EFX_PHY_MEDIA_BASE_T] = {
855 		[EFX_LINK_10HDX]	= IFM_ETHER | IFM_HDX | IFM_10_T,
856 		[EFX_LINK_10FDX]	= IFM_ETHER | IFM_FDX | IFM_10_T,
857 		[EFX_LINK_100HDX]	= IFM_ETHER | IFM_HDX | IFM_100_TX,
858 		[EFX_LINK_100FDX]	= IFM_ETHER | IFM_FDX | IFM_100_TX,
859 		[EFX_LINK_1000HDX]	= IFM_ETHER | IFM_HDX | IFM_1000_T,
860 		[EFX_LINK_1000FDX]	= IFM_ETHER | IFM_FDX | IFM_1000_T,
861 		[EFX_LINK_10000FDX]	= IFM_ETHER | IFM_FDX | IFM_10G_T,
862 	},
863 };
864 
865 static void
866 sfxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
867 {
868 	struct sfxge_softc *sc;
869 	efx_phy_media_type_t medium_type;
870 	efx_link_mode_t mode;
871 
872 	sc = ifp->if_softc;
873 	SFXGE_ADAPTER_LOCK(sc);
874 
875 	ifmr->ifm_status = IFM_AVALID;
876 	ifmr->ifm_active = IFM_ETHER;
877 
878 	if (SFXGE_RUNNING(sc) && SFXGE_LINK_UP(sc)) {
879 		ifmr->ifm_status |= IFM_ACTIVE;
880 
881 		efx_phy_media_type_get(sc->enp, &medium_type);
882 		mode = sc->port.link_mode;
883 		ifmr->ifm_active |= sfxge_link_mode[medium_type][mode];
884 		ifmr->ifm_active |= sfxge_port_link_fc_ifm(sc);
885 	}
886 
887 	SFXGE_ADAPTER_UNLOCK(sc);
888 }
889 
890 static efx_phy_cap_type_t
891 sfxge_link_mode_to_phy_cap(efx_link_mode_t mode)
892 {
893 	switch (mode) {
894 	case EFX_LINK_10HDX:
895 		return (EFX_PHY_CAP_10HDX);
896 	case EFX_LINK_10FDX:
897 		return (EFX_PHY_CAP_10FDX);
898 	case EFX_LINK_100HDX:
899 		return (EFX_PHY_CAP_100HDX);
900 	case EFX_LINK_100FDX:
901 		return (EFX_PHY_CAP_100FDX);
902 	case EFX_LINK_1000HDX:
903 		return (EFX_PHY_CAP_1000HDX);
904 	case EFX_LINK_1000FDX:
905 		return (EFX_PHY_CAP_1000FDX);
906 	case EFX_LINK_10000FDX:
907 		return (EFX_PHY_CAP_10000FDX);
908 	case EFX_LINK_25000FDX:
909 		return (EFX_PHY_CAP_25000FDX);
910 	case EFX_LINK_40000FDX:
911 		return (EFX_PHY_CAP_40000FDX);
912 	case EFX_LINK_50000FDX:
913 		return (EFX_PHY_CAP_50000FDX);
914 	case EFX_LINK_100000FDX:
915 		return (EFX_PHY_CAP_100000FDX);
916 	default:
917 		return (EFX_PHY_CAP_INVALID);
918 	}
919 }
920 
921 static int
922 sfxge_phy_cap_mask(struct sfxge_softc *sc, int ifmedia, uint32_t *phy_cap_mask)
923 {
924 	/* Get global options (duplex), type and subtype bits */
925 	int ifmedia_masked = ifmedia & (IFM_GMASK | IFM_NMASK | IFM_TMASK);
926 	efx_phy_media_type_t medium_type;
927 	boolean_t mode_found = B_FALSE;
928 	uint32_t cap_mask, mode_cap_mask;
929 	efx_link_mode_t mode;
930 	efx_phy_cap_type_t phy_cap;
931 
932 	efx_phy_media_type_get(sc->enp, &medium_type);
933 	if (medium_type >= nitems(sfxge_link_mode)) {
934 		if_printf(sc->ifnet, "unexpected media type %d\n", medium_type);
935 		return (EINVAL);
936 	}
937 
938 	efx_phy_adv_cap_get(sc->enp, EFX_PHY_CAP_PERM, &cap_mask);
939 
940 	for (mode = EFX_LINK_10HDX; mode < EFX_LINK_NMODES; mode++) {
941 		if (ifmedia_masked == sfxge_link_mode[medium_type][mode]) {
942 			mode_found = B_TRUE;
943 			break;
944 		}
945 	}
946 
947 	if (!mode_found) {
948 		/*
949 		 * If media is not in the table, it must be IFM_AUTO.
950 		 */
951 		KASSERT((cap_mask & (1 << EFX_PHY_CAP_AN)) &&
952 		    ifmedia_masked == (IFM_ETHER | IFM_AUTO),
953 		    ("%s: no mode for media %#x", __func__, ifmedia));
954 		*phy_cap_mask = (cap_mask & ~(1 << EFX_PHY_CAP_ASYM));
955 		return (0);
956 	}
957 
958 	phy_cap = sfxge_link_mode_to_phy_cap(mode);
959 	if (phy_cap == EFX_PHY_CAP_INVALID) {
960 		if_printf(sc->ifnet,
961 			  "cannot map link mode %d to phy capability\n",
962 			  mode);
963 		return (EINVAL);
964 	}
965 
966 	mode_cap_mask = (1 << phy_cap);
967 	mode_cap_mask |= cap_mask & (1 << EFX_PHY_CAP_AN);
968 #ifdef SFXGE_HAVE_PAUSE_MEDIAOPTS
969 	if (ifmedia & IFM_ETH_RXPAUSE)
970 		mode_cap_mask |= cap_mask & (1 << EFX_PHY_CAP_PAUSE);
971 	if (!(ifmedia & IFM_ETH_TXPAUSE))
972 		mode_cap_mask |= cap_mask & (1 << EFX_PHY_CAP_ASYM);
973 #else
974 	mode_cap_mask |= cap_mask & (1 << EFX_PHY_CAP_PAUSE);
975 #endif
976 
977 	*phy_cap_mask = mode_cap_mask;
978 	return (0);
979 }
980 
981 static int
982 sfxge_media_change(struct ifnet *ifp)
983 {
984 	struct sfxge_softc *sc;
985 	struct ifmedia_entry *ifm;
986 	int rc;
987 	uint32_t phy_cap_mask;
988 
989 	sc = ifp->if_softc;
990 	ifm = sc->media.ifm_cur;
991 
992 	SFXGE_ADAPTER_LOCK(sc);
993 
994 	if (!SFXGE_RUNNING(sc)) {
995 		rc = 0;
996 		goto out;
997 	}
998 
999 	rc = efx_mac_fcntl_set(sc->enp, sfxge_port_wanted_fc(sc), B_TRUE);
1000 	if (rc != 0)
1001 		goto out;
1002 
1003 	if ((rc = sfxge_phy_cap_mask(sc, ifm->ifm_media, &phy_cap_mask)) != 0)
1004 		goto out;
1005 
1006 	rc = efx_phy_adv_cap_set(sc->enp, phy_cap_mask);
1007 out:
1008 	SFXGE_ADAPTER_UNLOCK(sc);
1009 
1010 	return (rc);
1011 }
1012 
1013 int sfxge_port_ifmedia_init(struct sfxge_softc *sc)
1014 {
1015 	efx_phy_media_type_t medium_type;
1016 	uint32_t cap_mask, mode_cap_mask;
1017 	efx_link_mode_t mode;
1018 	efx_phy_cap_type_t phy_cap;
1019 	int mode_ifm, best_mode_ifm = 0;
1020 	int rc;
1021 
1022 	/*
1023 	 * We need port state to initialise the ifmedia list.
1024 	 * It requires initialized NIC what is already done in
1025 	 * sfxge_create() when resources are estimated.
1026 	 */
1027 	if ((rc = efx_filter_init(sc->enp)) != 0)
1028 		goto out1;
1029 	if ((rc = efx_port_init(sc->enp)) != 0)
1030 		goto out2;
1031 
1032 	/*
1033 	 * Register ifconfig callbacks for querying and setting the
1034 	 * link mode and link status.
1035 	 */
1036 	ifmedia_init(&sc->media, IFM_IMASK, sfxge_media_change,
1037 	    sfxge_media_status);
1038 
1039 	/*
1040 	 * Map firmware medium type and capabilities to ifmedia types.
1041 	 * ifmedia does not distinguish between forcing the link mode
1042 	 * and disabling auto-negotiation.  1000BASE-T and 10GBASE-T
1043 	 * require AN even if only one link mode is enabled, and for
1044 	 * 100BASE-TX it is useful even if the link mode is forced.
1045 	 * Therefore we never disable auto-negotiation.
1046 	 *
1047 	 * Also enable and advertise flow control by default.
1048 	 */
1049 
1050 	efx_phy_media_type_get(sc->enp, &medium_type);
1051 	efx_phy_adv_cap_get(sc->enp, EFX_PHY_CAP_PERM, &cap_mask);
1052 
1053 	for (mode = EFX_LINK_10HDX; mode < EFX_LINK_NMODES; mode++) {
1054 		phy_cap = sfxge_link_mode_to_phy_cap(mode);
1055 		if (phy_cap == EFX_PHY_CAP_INVALID)
1056 			continue;
1057 
1058 		mode_cap_mask = (1 << phy_cap);
1059 		mode_ifm = sfxge_link_mode[medium_type][mode];
1060 
1061 		if ((cap_mask & mode_cap_mask) && mode_ifm) {
1062 			/* No flow-control */
1063 			ifmedia_add(&sc->media, mode_ifm, 0, NULL);
1064 
1065 #ifdef SFXGE_HAVE_PAUSE_MEDIAOPTS
1066 			/* Respond-only.  If using AN, we implicitly
1067 			 * offer symmetric as well, but that doesn't
1068 			 * mean we *have* to generate pause frames.
1069 			 */
1070 			mode_ifm |= IFM_ETH_RXPAUSE;
1071 			ifmedia_add(&sc->media, mode_ifm, 0, NULL);
1072 
1073 			/* Symmetric */
1074 			mode_ifm |= IFM_ETH_TXPAUSE;
1075 			ifmedia_add(&sc->media, mode_ifm, 0, NULL);
1076 #endif
1077 
1078 			/* Link modes are numbered in order of speed,
1079 			 * so assume the last one available is the best.
1080 			 */
1081 			best_mode_ifm = mode_ifm;
1082 		}
1083 	}
1084 
1085 	if (cap_mask & (1 << EFX_PHY_CAP_AN)) {
1086 		/* Add autoselect mode. */
1087 		mode_ifm = IFM_ETHER | IFM_AUTO;
1088 		ifmedia_add(&sc->media, mode_ifm, 0, NULL);
1089 		best_mode_ifm = mode_ifm;
1090 	}
1091 
1092 	if (best_mode_ifm != 0)
1093 		ifmedia_set(&sc->media, best_mode_ifm);
1094 
1095 	/* Now discard port state until interface is started. */
1096 	efx_port_fini(sc->enp);
1097 out2:
1098 	efx_filter_fini(sc->enp);
1099 out1:
1100 	return (rc);
1101 }
1102