xref: /illumos-gate/usr/src/uts/common/io/aggr/aggr_port.c (revision 1b83305cfc332b1e19ad6a194b73b2975e6bf79a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * IEEE 802.3ad Link Aggregation - Link Aggregation MAC ports.
30  *
31  * Implements the functions needed to manage the MAC ports that are
32  * part of Link Aggregation groups.
33  */
34 
35 #include <sys/types.h>
36 #include <sys/sysmacros.h>
37 #include <sys/conf.h>
38 #include <sys/cmn_err.h>
39 #include <sys/id_space.h>
40 #include <sys/list.h>
41 #include <sys/ksynch.h>
42 #include <sys/kmem.h>
43 #include <sys/stream.h>
44 #include <sys/modctl.h>
45 #include <sys/ddi.h>
46 #include <sys/sunddi.h>
47 #include <sys/atomic.h>
48 #include <sys/stat.h>
49 #include <sys/sdt.h>
50 #include <sys/dlpi.h>
51 #include <sys/aggr.h>
52 #include <sys/aggr_impl.h>
53 
54 static kmem_cache_t *aggr_port_cache;
55 static id_space_t *aggr_portids;
56 
57 static void aggr_port_notify_cb(void *, mac_notify_type_t);
58 
59 /*ARGSUSED*/
60 static int
61 aggr_port_constructor(void *buf, void *arg, int kmflag)
62 {
63 	aggr_port_t *port = buf;
64 
65 	bzero(buf, sizeof (aggr_port_t));
66 	rw_init(&port->lp_lock, NULL, RW_DRIVER, NULL);
67 
68 	return (0);
69 }
70 
71 /*ARGSUSED*/
72 static void
73 aggr_port_destructor(void *buf, void *arg)
74 {
75 	aggr_port_t *port = buf;
76 
77 	rw_destroy(&port->lp_lock);
78 }
79 
80 void
81 aggr_port_init(void)
82 {
83 	aggr_port_cache = kmem_cache_create("aggr_port_cache",
84 	    sizeof (aggr_port_t), 0, aggr_port_constructor,
85 	    aggr_port_destructor, NULL, NULL, NULL, 0);
86 
87 	/*
88 	 * Allocate a id space to manage port identification. The range of
89 	 * the arena will be from 1 to UINT16_MAX, because the LACP protocol
90 	 * specifies 16-bit unique identification.
91 	 */
92 	aggr_portids = id_space_create("aggr_portids", 1, UINT16_MAX);
93 	ASSERT(aggr_portids != NULL);
94 }
95 
96 void
97 aggr_port_fini(void)
98 {
99 	/*
100 	 * This function is called only after all groups have been
101 	 * freed. This ensures that there are no remaining allocated
102 	 * ports when this function is invoked.
103 	 */
104 	kmem_cache_destroy(aggr_port_cache);
105 	id_space_destroy(aggr_portids);
106 }
107 
108 mac_resource_handle_t
109 aggr_port_resource_add(void *arg, mac_resource_t *mrp)
110 {
111 	aggr_port_t *port = (aggr_port_t *)arg;
112 	aggr_grp_t *grp = port->lp_grp;
113 
114 	return (mac_resource_add(grp->lg_mh, mrp));
115 }
116 
117 void
118 aggr_port_init_callbacks(aggr_port_t *port)
119 {
120 	/* add the port's receive callback */
121 	port->lp_mnh = mac_notify_add(port->lp_mh, aggr_port_notify_cb,
122 	    (void *)port);
123 
124 	/* set port's resource_add callback */
125 	mac_resource_set(port->lp_mh, aggr_port_resource_add, (void *)port);
126 }
127 
128 int
129 aggr_port_create(const datalink_id_t linkid, boolean_t force, aggr_port_t **pp)
130 {
131 	int err;
132 	mac_handle_t mh;
133 	aggr_port_t *port;
134 	uint16_t portid;
135 	uint_t i;
136 	boolean_t no_link_update = B_FALSE;
137 	const mac_info_t *mip;
138 	uint32_t note;
139 	uint32_t margin;
140 
141 	*pp = NULL;
142 
143 	if ((err = mac_open_by_linkid(linkid, &mh)) != 0)
144 		return (err);
145 
146 	mip = mac_info(mh);
147 	if (mip->mi_media != DL_ETHER || mip->mi_nativemedia != DL_ETHER) {
148 		err = EINVAL;
149 		goto fail;
150 	}
151 
152 	/*
153 	 * If the underlying MAC does not support link update notification, it
154 	 * can only be aggregated if `force' is set.  This is because aggr
155 	 * depends on link notifications to attach ports whose link is up.
156 	 */
157 	note = mac_no_notification(mh);
158 	if ((note & (DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN)) != 0) {
159 		no_link_update = B_TRUE;
160 		if (!force) {
161 			/*
162 			 * We borrow this error code to indicate that link
163 			 * notification is not supported.
164 			 */
165 			err = ENETDOWN;
166 			goto fail;
167 		}
168 	}
169 
170 	if ((portid = (uint16_t)id_alloc(aggr_portids)) == 0) {
171 		err = ENOMEM;
172 		goto fail;
173 	}
174 
175 	/*
176 	 * As the underlying mac's current margin size is used to determine
177 	 * the margin size of the aggregation itself, request the underlying
178 	 * mac not to change to a smaller size.
179 	 */
180 	if ((err = mac_margin_add(mh, &margin, B_TRUE)) != 0) {
181 		id_free(aggr_portids, portid);
182 		goto fail;
183 	}
184 
185 	if (!mac_active_set(mh)) {
186 		VERIFY(mac_margin_remove(mh, margin) == 0);
187 		id_free(aggr_portids, portid);
188 		err = EBUSY;
189 		goto fail;
190 	}
191 
192 	port = kmem_cache_alloc(aggr_port_cache, KM_SLEEP);
193 
194 	port->lp_refs = 1;
195 	port->lp_next = NULL;
196 	port->lp_mh = mh;
197 	port->lp_mip = mip;
198 	port->lp_linkid = linkid;
199 	port->lp_closing = 0;
200 
201 	/* get the port's original MAC address */
202 	mac_unicst_get(port->lp_mh, port->lp_addr);
203 
204 	/* set port's transmit information */
205 	port->lp_txinfo = mac_tx_get(port->lp_mh);
206 
207 	/* initialize state */
208 	port->lp_state = AGGR_PORT_STATE_STANDBY;
209 	port->lp_link_state = LINK_STATE_UNKNOWN;
210 	port->lp_ifspeed = 0;
211 	port->lp_link_duplex = LINK_DUPLEX_UNKNOWN;
212 	port->lp_started = B_FALSE;
213 	port->lp_tx_enabled = B_FALSE;
214 	port->lp_promisc_on = B_FALSE;
215 	port->lp_no_link_update = no_link_update;
216 	port->lp_portid = portid;
217 	port->lp_margin = margin;
218 
219 	/*
220 	 * Save the current statistics of the port. They will be used
221 	 * later by aggr_m_stats() when aggregating the statistics of
222 	 * the constituent ports.
223 	 */
224 	for (i = 0; i < MAC_NSTAT; i++) {
225 		port->lp_stat[i] =
226 		    aggr_port_stat(port, i + MAC_STAT_MIN);
227 	}
228 	for (i = 0; i < ETHER_NSTAT; i++) {
229 		port->lp_ether_stat[i] =
230 		    aggr_port_stat(port, i + MACTYPE_STAT_MIN);
231 	}
232 
233 	/* LACP related state */
234 	port->lp_collector_enabled = B_FALSE;
235 
236 	*pp = port;
237 	return (0);
238 
239 fail:
240 	mac_close(mh);
241 	return (err);
242 }
243 
244 void
245 aggr_port_delete(aggr_port_t *port)
246 {
247 	VERIFY(mac_margin_remove(port->lp_mh, port->lp_margin) == 0);
248 	mac_rx_remove_wait(port->lp_mh);
249 	mac_resource_set(port->lp_mh, NULL, NULL);
250 	mac_notify_remove(port->lp_mh, port->lp_mnh);
251 	mac_active_clear(port->lp_mh);
252 
253 	/*
254 	 * Restore the port MAC address. Note it is called after the
255 	 * port's notification callback being removed. This prevent
256 	 * port's MAC_NOTE_UNICST notify callback function being called.
257 	 */
258 	(void) mac_unicst_set(port->lp_mh, port->lp_addr);
259 
260 	mac_close(port->lp_mh);
261 	AGGR_PORT_REFRELE(port);
262 }
263 
264 void
265 aggr_port_free(aggr_port_t *port)
266 {
267 	ASSERT(port->lp_refs == 0);
268 	if (port->lp_grp != NULL)
269 		AGGR_GRP_REFRELE(port->lp_grp);
270 	port->lp_grp = NULL;
271 	id_free(aggr_portids, port->lp_portid);
272 	port->lp_portid = 0;
273 	kmem_cache_free(aggr_port_cache, port);
274 }
275 
276 /*
277  * Invoked upon receiving a MAC_NOTE_LINK notification for
278  * one of the constituent ports.
279  */
280 boolean_t
281 aggr_port_notify_link(aggr_grp_t *grp, aggr_port_t *port, boolean_t dolock)
282 {
283 	boolean_t do_attach = B_FALSE;
284 	boolean_t do_detach = B_FALSE;
285 	boolean_t link_state_changed = B_TRUE;
286 	uint64_t ifspeed;
287 	link_state_t link_state;
288 	link_duplex_t link_duplex;
289 
290 	if (dolock) {
291 		AGGR_LACP_LOCK(grp);
292 		rw_enter(&grp->lg_lock, RW_WRITER);
293 	} else {
294 		ASSERT(AGGR_LACP_LOCK_HELD(grp));
295 		ASSERT(RW_WRITE_HELD(&grp->lg_lock));
296 	}
297 
298 	rw_enter(&port->lp_lock, RW_WRITER);
299 
300 	/*
301 	 * link state change?  For links that do not support link state
302 	 * notification, always assume the link is up.
303 	 */
304 	link_state = port->lp_no_link_update ? LINK_STATE_UP :
305 	    mac_link_get(port->lp_mh);
306 	if (port->lp_link_state != link_state) {
307 		if (link_state == LINK_STATE_UP)
308 			do_attach = (port->lp_link_state != LINK_STATE_UP);
309 		else
310 			do_detach = (port->lp_link_state == LINK_STATE_UP);
311 	}
312 	port->lp_link_state = link_state;
313 
314 	/* link duplex change? */
315 	link_duplex = aggr_port_stat(port, ETHER_STAT_LINK_DUPLEX);
316 	if (port->lp_link_duplex != link_duplex) {
317 		if (link_duplex == LINK_DUPLEX_FULL)
318 			do_attach |= (port->lp_link_duplex != LINK_DUPLEX_FULL);
319 		else
320 			do_detach |= (port->lp_link_duplex == LINK_DUPLEX_FULL);
321 	}
322 	port->lp_link_duplex = link_duplex;
323 
324 	/* link speed changes? */
325 	ifspeed = aggr_port_stat(port, MAC_STAT_IFSPEED);
326 	if (port->lp_ifspeed != ifspeed) {
327 		if (port->lp_state == AGGR_PORT_STATE_ATTACHED)
328 			do_detach |= (ifspeed != grp->lg_ifspeed);
329 		else
330 			do_attach |= (ifspeed == grp->lg_ifspeed);
331 	}
332 	port->lp_ifspeed = ifspeed;
333 
334 	if (do_attach) {
335 		/* attempt to attach the port to the aggregation */
336 		link_state_changed = aggr_grp_attach_port(grp, port);
337 	} else if (do_detach) {
338 		/* detach the port from the aggregation */
339 		link_state_changed = aggr_grp_detach_port(grp, port);
340 	}
341 
342 	rw_exit(&port->lp_lock);
343 
344 	if (dolock) {
345 		rw_exit(&grp->lg_lock);
346 		AGGR_LACP_UNLOCK(grp);
347 	}
348 	return (link_state_changed);
349 }
350 
351 /*
352  * Invoked upon receiving a MAC_NOTE_UNICST for one of the constituent
353  * ports of a group.
354  */
355 static void
356 aggr_port_notify_unicst(aggr_grp_t *grp, aggr_port_t *port,
357     boolean_t *mac_addr_changedp, boolean_t *link_state_changedp)
358 {
359 	boolean_t mac_addr_changed = B_FALSE;
360 	boolean_t link_state_changed = B_FALSE;
361 	uint8_t mac_addr[ETHERADDRL];
362 
363 	ASSERT(mac_addr_changedp != NULL);
364 	ASSERT(link_state_changedp != NULL);
365 	AGGR_LACP_LOCK(grp);
366 	rw_enter(&grp->lg_lock, RW_WRITER);
367 
368 	rw_enter(&port->lp_lock, RW_WRITER);
369 
370 	/*
371 	 * If it is called when setting the MAC address to the
372 	 * aggregation group MAC address, do nothing.
373 	 */
374 	mac_unicst_get(port->lp_mh, mac_addr);
375 	if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) {
376 		rw_exit(&port->lp_lock);
377 		goto done;
378 	}
379 
380 	/* save the new port MAC address */
381 	bcopy(mac_addr, port->lp_addr, ETHERADDRL);
382 
383 	aggr_grp_port_mac_changed(grp, port, &mac_addr_changed,
384 	    &link_state_changed);
385 
386 	rw_exit(&port->lp_lock);
387 
388 	if (grp->lg_closing)
389 		goto done;
390 
391 	/*
392 	 * If this port was used to determine the MAC address of
393 	 * the group, update the MAC address of the constituent
394 	 * ports.
395 	 */
396 	if (mac_addr_changed && aggr_grp_update_ports_mac(grp))
397 		link_state_changed = B_TRUE;
398 
399 done:
400 	*mac_addr_changedp = mac_addr_changed;
401 	*link_state_changedp = link_state_changed;
402 	rw_exit(&grp->lg_lock);
403 	AGGR_LACP_UNLOCK(grp);
404 }
405 
406 /*
407  * Notification callback invoked by the MAC service module for
408  * a particular MAC port.
409  */
410 static void
411 aggr_port_notify_cb(void *arg, mac_notify_type_t type)
412 {
413 	aggr_port_t *port = arg;
414 	aggr_grp_t *grp = port->lp_grp;
415 	boolean_t mac_addr_changed, link_state_changed;
416 
417 	/*
418 	 * Do nothing if the aggregation or the port is in the deletion
419 	 * process. Note that this is necessary to avoid deadlock.
420 	 */
421 	if ((grp->lg_closing) || (port->lp_closing))
422 		return;
423 
424 	AGGR_PORT_REFHOLD(port);
425 
426 	switch (type) {
427 	case MAC_NOTE_TX:
428 		mac_tx_update(grp->lg_mh);
429 		break;
430 	case MAC_NOTE_LINK:
431 		if (aggr_port_notify_link(grp, port, B_TRUE))
432 			mac_link_update(grp->lg_mh, grp->lg_link_state);
433 		break;
434 	case MAC_NOTE_UNICST:
435 		aggr_port_notify_unicst(grp, port, &mac_addr_changed,
436 		    &link_state_changed);
437 		if (mac_addr_changed)
438 			mac_unicst_update(grp->lg_mh, grp->lg_addr);
439 		if (link_state_changed)
440 			mac_link_update(grp->lg_mh, grp->lg_link_state);
441 		break;
442 	case MAC_NOTE_PROMISC:
443 		port->lp_txinfo = mac_tx_get(port->lp_mh);
444 		break;
445 	default:
446 		break;
447 	}
448 
449 	AGGR_PORT_REFRELE(port);
450 }
451 
452 int
453 aggr_port_start(aggr_port_t *port)
454 {
455 	int rc;
456 
457 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
458 
459 	if (port->lp_started)
460 		return (0);
461 
462 	if ((rc = mac_start(port->lp_mh)) != 0)
463 		return (rc);
464 
465 	/* update the port state */
466 	port->lp_started = B_TRUE;
467 
468 	return (rc);
469 }
470 
471 void
472 aggr_port_stop(aggr_port_t *port)
473 {
474 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
475 
476 	if (!port->lp_started)
477 		return;
478 
479 	aggr_grp_multicst_port(port, B_FALSE);
480 
481 	mac_stop(port->lp_mh);
482 
483 	/* update the port state */
484 	port->lp_started = B_FALSE;
485 }
486 
487 int
488 aggr_port_promisc(aggr_port_t *port, boolean_t on)
489 {
490 	int rc;
491 
492 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
493 
494 	if (on == port->lp_promisc_on)
495 		/* already in desired promiscous mode */
496 		return (0);
497 
498 	rc = mac_promisc_set(port->lp_mh, on, MAC_DEVPROMISC);
499 
500 	if (rc == 0)
501 		port->lp_promisc_on = on;
502 
503 	return (rc);
504 }
505 
506 /*
507  * Set the MAC address of a port.
508  */
509 int
510 aggr_port_unicst(aggr_port_t *port, uint8_t *macaddr)
511 {
512 	int rc;
513 
514 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
515 
516 	rc = mac_unicst_set(port->lp_mh, macaddr);
517 
518 	return (rc);
519 }
520 
521 /*
522  * Add or remove a multicast address to/from a port.
523  */
524 int
525 aggr_port_multicst(void *arg, boolean_t add, const uint8_t *addrp)
526 {
527 	aggr_port_t *port = arg;
528 
529 	return (add ? mac_multicst_add(port->lp_mh, addrp) :
530 	    mac_multicst_remove(port->lp_mh, addrp));
531 }
532 
533 uint64_t
534 aggr_port_stat(aggr_port_t *port, uint_t stat)
535 {
536 	return (mac_stat_get(port->lp_mh, stat));
537 }
538