xref: /illumos-gate/usr/src/uts/common/io/aggr/aggr_port.c (revision 3a109ad9413b360a5bfa6fa5ddfacef5fd64fe5b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * IEEE 802.3ad Link Aggregation - Link Aggregation MAC ports.
28  *
29  * Implements the functions needed to manage the MAC ports that are
30  * part of Link Aggregation groups.
31  */
32 
33 #include <sys/types.h>
34 #include <sys/sysmacros.h>
35 #include <sys/conf.h>
36 #include <sys/cmn_err.h>
37 #include <sys/id_space.h>
38 #include <sys/list.h>
39 #include <sys/ksynch.h>
40 #include <sys/kmem.h>
41 #include <sys/stream.h>
42 #include <sys/modctl.h>
43 #include <sys/ddi.h>
44 #include <sys/sunddi.h>
45 #include <sys/atomic.h>
46 #include <sys/stat.h>
47 #include <sys/sdt.h>
48 #include <sys/dlpi.h>
49 #include <sys/aggr.h>
50 #include <sys/aggr_impl.h>
51 
52 static kmem_cache_t *aggr_port_cache;
53 static id_space_t *aggr_portids;
54 
55 static void aggr_port_notify_cb(void *, mac_notify_type_t);
56 
57 /*ARGSUSED*/
58 static int
59 aggr_port_constructor(void *buf, void *arg, int kmflag)
60 {
61 	aggr_port_t *port = buf;
62 
63 	bzero(buf, sizeof (aggr_port_t));
64 	rw_init(&port->lp_lock, NULL, RW_DRIVER, NULL);
65 
66 	return (0);
67 }
68 
69 /*ARGSUSED*/
70 static void
71 aggr_port_destructor(void *buf, void *arg)
72 {
73 	aggr_port_t *port = buf;
74 
75 	rw_destroy(&port->lp_lock);
76 }
77 
78 void
79 aggr_port_init(void)
80 {
81 	aggr_port_cache = kmem_cache_create("aggr_port_cache",
82 	    sizeof (aggr_port_t), 0, aggr_port_constructor,
83 	    aggr_port_destructor, NULL, NULL, NULL, 0);
84 
85 	/*
86 	 * Allocate a id space to manage port identification. The range of
87 	 * the arena will be from 1 to UINT16_MAX, because the LACP protocol
88 	 * specifies 16-bit unique identification.
89 	 */
90 	aggr_portids = id_space_create("aggr_portids", 1, UINT16_MAX);
91 	ASSERT(aggr_portids != NULL);
92 }
93 
94 void
95 aggr_port_fini(void)
96 {
97 	/*
98 	 * This function is called only after all groups have been
99 	 * freed. This ensures that there are no remaining allocated
100 	 * ports when this function is invoked.
101 	 */
102 	kmem_cache_destroy(aggr_port_cache);
103 	id_space_destroy(aggr_portids);
104 }
105 
106 mac_resource_handle_t
107 aggr_port_resource_add(void *arg, mac_resource_t *mrp)
108 {
109 	aggr_port_t *port = (aggr_port_t *)arg;
110 	aggr_grp_t *grp = port->lp_grp;
111 
112 	return (mac_resource_add(grp->lg_mh, mrp));
113 }
114 
115 void
116 aggr_port_init_callbacks(aggr_port_t *port)
117 {
118 	/* add the port's receive callback */
119 	port->lp_mnh = mac_notify_add(port->lp_mh, aggr_port_notify_cb,
120 	    (void *)port);
121 
122 	/* set port's resource_add callback */
123 	mac_resource_set(port->lp_mh, aggr_port_resource_add, (void *)port);
124 }
125 
126 int
127 aggr_port_create(const datalink_id_t linkid, boolean_t force, aggr_port_t **pp)
128 {
129 	int err;
130 	mac_handle_t mh;
131 	aggr_port_t *port;
132 	uint16_t portid;
133 	uint_t i;
134 	boolean_t no_link_update = B_FALSE;
135 	const mac_info_t *mip;
136 	uint32_t note;
137 	uint32_t margin;
138 
139 	*pp = NULL;
140 
141 	if ((err = mac_open_by_linkid(linkid, &mh)) != 0)
142 		return (err);
143 
144 	mip = mac_info(mh);
145 	if (mip->mi_media != DL_ETHER || mip->mi_nativemedia != DL_ETHER) {
146 		err = EINVAL;
147 		goto fail;
148 	}
149 
150 	/*
151 	 * If the underlying MAC does not support link update notification, it
152 	 * can only be aggregated if `force' is set.  This is because aggr
153 	 * depends on link notifications to attach ports whose link is up.
154 	 */
155 	note = mac_no_notification(mh);
156 	if ((note & (DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN)) != 0) {
157 		no_link_update = B_TRUE;
158 		if (!force) {
159 			/*
160 			 * We borrow this error code to indicate that link
161 			 * notification is not supported.
162 			 */
163 			err = ENETDOWN;
164 			goto fail;
165 		}
166 	}
167 
168 	if ((portid = (uint16_t)id_alloc(aggr_portids)) == 0) {
169 		err = ENOMEM;
170 		goto fail;
171 	}
172 
173 	/*
174 	 * As the underlying mac's current margin size is used to determine
175 	 * the margin size of the aggregation itself, request the underlying
176 	 * mac not to change to a smaller size.
177 	 */
178 	if ((err = mac_margin_add(mh, &margin, B_TRUE)) != 0) {
179 		id_free(aggr_portids, portid);
180 		goto fail;
181 	}
182 
183 	if (!mac_active_set(mh)) {
184 		VERIFY(mac_margin_remove(mh, margin) == 0);
185 		id_free(aggr_portids, portid);
186 		err = EBUSY;
187 		goto fail;
188 	}
189 
190 	port = kmem_cache_alloc(aggr_port_cache, KM_SLEEP);
191 
192 	port->lp_refs = 1;
193 	port->lp_next = NULL;
194 	port->lp_mh = mh;
195 	port->lp_mip = mip;
196 	port->lp_linkid = linkid;
197 	port->lp_closing = 0;
198 
199 	/* get the port's original MAC address */
200 	mac_unicst_get(port->lp_mh, port->lp_addr);
201 
202 	/* set port's transmit information */
203 	port->lp_txinfo = mac_tx_get(port->lp_mh);
204 
205 	/* initialize state */
206 	port->lp_state = AGGR_PORT_STATE_STANDBY;
207 	port->lp_link_state = LINK_STATE_UNKNOWN;
208 	port->lp_ifspeed = 0;
209 	port->lp_link_duplex = LINK_DUPLEX_UNKNOWN;
210 	port->lp_started = B_FALSE;
211 	port->lp_tx_enabled = B_FALSE;
212 	port->lp_promisc_on = B_FALSE;
213 	port->lp_no_link_update = no_link_update;
214 	port->lp_portid = portid;
215 	port->lp_margin = margin;
216 
217 	/*
218 	 * Save the current statistics of the port. They will be used
219 	 * later by aggr_m_stats() when aggregating the statistics of
220 	 * the constituent ports.
221 	 */
222 	for (i = 0; i < MAC_NSTAT; i++) {
223 		port->lp_stat[i] =
224 		    aggr_port_stat(port, i + MAC_STAT_MIN);
225 	}
226 	for (i = 0; i < ETHER_NSTAT; i++) {
227 		port->lp_ether_stat[i] =
228 		    aggr_port_stat(port, i + MACTYPE_STAT_MIN);
229 	}
230 
231 	/* LACP related state */
232 	port->lp_collector_enabled = B_FALSE;
233 
234 	*pp = port;
235 	return (0);
236 
237 fail:
238 	mac_close(mh);
239 	return (err);
240 }
241 
242 void
243 aggr_port_delete(aggr_port_t *port)
244 {
245 	VERIFY(mac_margin_remove(port->lp_mh, port->lp_margin) == 0);
246 	mac_rx_remove_wait(port->lp_mh);
247 	mac_resource_set(port->lp_mh, NULL, NULL);
248 	mac_notify_remove(port->lp_mh, port->lp_mnh);
249 	mac_active_clear(port->lp_mh);
250 
251 	/*
252 	 * Restore the port MAC address. Note it is called after the
253 	 * port's notification callback being removed. This prevent
254 	 * port's MAC_NOTE_UNICST notify callback function being called.
255 	 */
256 	(void) mac_unicst_set(port->lp_mh, port->lp_addr);
257 
258 	mac_close(port->lp_mh);
259 	AGGR_PORT_REFRELE(port);
260 }
261 
262 void
263 aggr_port_free(aggr_port_t *port)
264 {
265 	ASSERT(port->lp_refs == 0);
266 	if (port->lp_grp != NULL)
267 		AGGR_GRP_REFRELE(port->lp_grp);
268 	port->lp_grp = NULL;
269 	id_free(aggr_portids, port->lp_portid);
270 	port->lp_portid = 0;
271 	kmem_cache_free(aggr_port_cache, port);
272 }
273 
274 /*
275  * Invoked upon receiving a MAC_NOTE_LINK notification for
276  * one of the constituent ports.
277  */
278 boolean_t
279 aggr_port_notify_link(aggr_grp_t *grp, aggr_port_t *port, boolean_t dolock)
280 {
281 	boolean_t do_attach = B_FALSE;
282 	boolean_t do_detach = B_FALSE;
283 	boolean_t link_state_changed = B_TRUE;
284 	uint64_t ifspeed;
285 	link_state_t link_state;
286 	link_duplex_t link_duplex;
287 
288 	if (dolock) {
289 		AGGR_LACP_LOCK_WRITER(grp);
290 		rw_enter(&grp->lg_lock, RW_WRITER);
291 	} else {
292 		ASSERT(AGGR_LACP_LOCK_HELD_WRITER(grp));
293 		ASSERT(RW_WRITE_HELD(&grp->lg_lock));
294 	}
295 
296 	rw_enter(&port->lp_lock, RW_WRITER);
297 
298 	/*
299 	 * link state change?  For links that do not support link state
300 	 * notification, always assume the link is up.
301 	 */
302 	link_state = port->lp_no_link_update ? LINK_STATE_UP :
303 	    mac_link_get(port->lp_mh);
304 	if (port->lp_link_state != link_state) {
305 		if (link_state == LINK_STATE_UP)
306 			do_attach = (port->lp_link_state != LINK_STATE_UP);
307 		else
308 			do_detach = (port->lp_link_state == LINK_STATE_UP);
309 	}
310 	port->lp_link_state = link_state;
311 
312 	/* link duplex change? */
313 	link_duplex = aggr_port_stat(port, ETHER_STAT_LINK_DUPLEX);
314 	if (port->lp_link_duplex != link_duplex) {
315 		if (link_duplex == LINK_DUPLEX_FULL)
316 			do_attach |= (port->lp_link_duplex != LINK_DUPLEX_FULL);
317 		else
318 			do_detach |= (port->lp_link_duplex == LINK_DUPLEX_FULL);
319 	}
320 	port->lp_link_duplex = link_duplex;
321 
322 	/* link speed changes? */
323 	ifspeed = aggr_port_stat(port, MAC_STAT_IFSPEED);
324 	if (port->lp_ifspeed != ifspeed) {
325 		if (port->lp_state == AGGR_PORT_STATE_ATTACHED)
326 			do_detach |= (ifspeed != grp->lg_ifspeed);
327 		else
328 			do_attach |= (ifspeed == grp->lg_ifspeed);
329 	}
330 	port->lp_ifspeed = ifspeed;
331 
332 	if (do_attach) {
333 		/* attempt to attach the port to the aggregation */
334 		link_state_changed = aggr_grp_attach_port(grp, port);
335 	} else if (do_detach) {
336 		/* detach the port from the aggregation */
337 		link_state_changed = aggr_grp_detach_port(grp, port, B_TRUE);
338 	}
339 
340 	rw_exit(&port->lp_lock);
341 
342 	if (dolock) {
343 		rw_exit(&grp->lg_lock);
344 		AGGR_LACP_UNLOCK(grp);
345 	}
346 	return (link_state_changed);
347 }
348 
349 /*
350  * Invoked upon receiving a MAC_NOTE_UNICST for one of the constituent
351  * ports of a group.
352  */
353 static void
354 aggr_port_notify_unicst(aggr_grp_t *grp, aggr_port_t *port,
355     boolean_t *mac_addr_changedp, boolean_t *link_state_changedp)
356 {
357 	boolean_t mac_addr_changed = B_FALSE;
358 	boolean_t link_state_changed = B_FALSE;
359 	uint8_t mac_addr[ETHERADDRL];
360 
361 	ASSERT(mac_addr_changedp != NULL);
362 	ASSERT(link_state_changedp != NULL);
363 	AGGR_LACP_LOCK_WRITER(grp);
364 	rw_enter(&grp->lg_lock, RW_WRITER);
365 
366 	rw_enter(&port->lp_lock, RW_WRITER);
367 
368 	/*
369 	 * If it is called when setting the MAC address to the
370 	 * aggregation group MAC address, do nothing.
371 	 */
372 	mac_unicst_get(port->lp_mh, mac_addr);
373 	if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) {
374 		rw_exit(&port->lp_lock);
375 		goto done;
376 	}
377 
378 	/* save the new port MAC address */
379 	bcopy(mac_addr, port->lp_addr, ETHERADDRL);
380 
381 	aggr_grp_port_mac_changed(grp, port, &mac_addr_changed,
382 	    &link_state_changed);
383 
384 	rw_exit(&port->lp_lock);
385 
386 	if (grp->lg_closing)
387 		goto done;
388 
389 	/*
390 	 * If this port was used to determine the MAC address of
391 	 * the group, update the MAC address of the constituent
392 	 * ports.
393 	 */
394 	if (mac_addr_changed && aggr_grp_update_ports_mac(grp))
395 		link_state_changed = B_TRUE;
396 
397 done:
398 	*mac_addr_changedp = mac_addr_changed;
399 	*link_state_changedp = link_state_changed;
400 	rw_exit(&grp->lg_lock);
401 	AGGR_LACP_UNLOCK(grp);
402 }
403 
404 /*
405  * Notification callback invoked by the MAC service module for
406  * a particular MAC port.
407  */
408 static void
409 aggr_port_notify_cb(void *arg, mac_notify_type_t type)
410 {
411 	aggr_port_t *port = arg;
412 	aggr_grp_t *grp = port->lp_grp;
413 	boolean_t mac_addr_changed, link_state_changed;
414 
415 	/*
416 	 * Do nothing if the aggregation or the port is in the deletion
417 	 * process. Note that this is necessary to avoid deadlock.
418 	 */
419 	if ((grp->lg_closing) || (port->lp_closing))
420 		return;
421 
422 	AGGR_PORT_REFHOLD(port);
423 
424 	switch (type) {
425 	case MAC_NOTE_TX:
426 		mac_tx_update(grp->lg_mh);
427 		break;
428 	case MAC_NOTE_LINK:
429 		if (aggr_port_notify_link(grp, port, B_TRUE))
430 			mac_link_update(grp->lg_mh, grp->lg_link_state);
431 		break;
432 	case MAC_NOTE_UNICST:
433 		aggr_port_notify_unicst(grp, port, &mac_addr_changed,
434 		    &link_state_changed);
435 		if (mac_addr_changed)
436 			mac_unicst_update(grp->lg_mh, grp->lg_addr);
437 		if (link_state_changed)
438 			mac_link_update(grp->lg_mh, grp->lg_link_state);
439 		break;
440 	case MAC_NOTE_PROMISC:
441 		port->lp_txinfo = mac_tx_get(port->lp_mh);
442 		break;
443 	default:
444 		break;
445 	}
446 
447 	AGGR_PORT_REFRELE(port);
448 }
449 
450 int
451 aggr_port_start(aggr_port_t *port)
452 {
453 	int rc;
454 
455 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
456 
457 	if (port->lp_started)
458 		return (0);
459 
460 	if ((rc = mac_start(port->lp_mh)) != 0)
461 		return (rc);
462 
463 	/* update the port state */
464 	port->lp_started = B_TRUE;
465 
466 	return (rc);
467 }
468 
469 void
470 aggr_port_stop(aggr_port_t *port)
471 {
472 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
473 
474 	if (!port->lp_started)
475 		return;
476 
477 	aggr_grp_multicst_port(port, B_FALSE);
478 
479 	mac_stop(port->lp_mh);
480 
481 	/* update the port state */
482 	port->lp_started = B_FALSE;
483 }
484 
485 int
486 aggr_port_promisc(aggr_port_t *port, boolean_t on)
487 {
488 	int rc;
489 
490 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
491 
492 	if (on == port->lp_promisc_on)
493 		/* already in desired promiscous mode */
494 		return (0);
495 
496 	rc = mac_promisc_set(port->lp_mh, on, MAC_DEVPROMISC);
497 
498 	if (rc == 0)
499 		port->lp_promisc_on = on;
500 
501 	return (rc);
502 }
503 
504 /*
505  * Set the MAC address of a port.
506  */
507 int
508 aggr_port_unicst(aggr_port_t *port, uint8_t *macaddr)
509 {
510 	int rc;
511 
512 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
513 
514 	rc = mac_unicst_set(port->lp_mh, macaddr);
515 
516 	return (rc);
517 }
518 
519 /*
520  * Add or remove a multicast address to/from a port.
521  */
522 int
523 aggr_port_multicst(void *arg, boolean_t add, const uint8_t *addrp)
524 {
525 	aggr_port_t *port = arg;
526 
527 	return (add ? mac_multicst_add(port->lp_mh, addrp) :
528 	    mac_multicst_remove(port->lp_mh, addrp));
529 }
530 
531 uint64_t
532 aggr_port_stat(aggr_port_t *port, uint_t stat)
533 {
534 	return (mac_stat_get(port->lp_mh, stat));
535 }
536