xref: /titanic_51/usr/src/uts/common/io/dls/dls_link.c (revision 0890ae4ef424a732c8f453aac6765c617daf8a24)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * Data-Link Services Module
31  */
32 
33 #include	<sys/types.h>
34 #include	<sys/stream.h>
35 #include	<sys/strsun.h>
36 #include	<sys/strsubr.h>
37 #include	<sys/sysmacros.h>
38 #include	<sys/atomic.h>
39 #include	<sys/modhash.h>
40 #include	<sys/dlpi.h>
41 #include	<sys/ethernet.h>
42 #include	<sys/byteorder.h>
43 #include	<sys/vlan.h>
44 #include	<sys/mac.h>
45 #include	<sys/sdt.h>
46 
47 #include	<sys/dls.h>
48 #include	<sys/dld_impl.h>
49 #include	<sys/dls_impl.h>
50 
51 static kmem_cache_t	*i_dls_link_cachep;
52 static mod_hash_t	*i_dls_link_hash;
53 static uint_t		i_dls_link_count;
54 static krwlock_t	i_dls_link_lock;
55 
56 #define		LINK_HASHSZ	67	/* prime */
57 #define		IMPL_HASHSZ	67	/* prime */
58 
59 /*
60  * Construct a hash key encompassing both DLSAP value and VLAN idenitifier.
61  */
62 #define	MAKE_KEY(_sap, _vid)						\
63 	((mod_hash_key_t)(uintptr_t)					\
64 	(((_sap) << VLAN_ID_SIZE) | (_vid) & VLAN_ID_MASK))
65 
66 /*
67  * Extract the DLSAP value from the hash key.
68  */
69 #define	KEY_SAP(_key)							\
70 	(((uint32_t)(uintptr_t)(_key)) >> VLAN_ID_SIZE)
71 
72 /*
73  * Private functions.
74  */
75 
76 /*ARGSUSED*/
77 static int
78 i_dls_link_constructor(void *buf, void *arg, int kmflag)
79 {
80 	dls_link_t	*dlp = buf;
81 	char		name[MAXNAMELEN];
82 
83 	bzero(buf, sizeof (dls_link_t));
84 
85 	(void) sprintf(name, "dls_link_t_%p_hash", buf);
86 	dlp->dl_impl_hash = mod_hash_create_idhash(name, IMPL_HASHSZ,
87 	    mod_hash_null_valdtor);
88 
89 	mutex_init(&dlp->dl_lock, NULL, MUTEX_DEFAULT, NULL);
90 	mutex_init(&dlp->dl_promisc_lock, NULL, MUTEX_DEFAULT, NULL);
91 	rw_init(&dlp->dl_impl_lock, NULL, RW_DEFAULT, NULL);
92 	return (0);
93 }
94 
95 /*ARGSUSED*/
96 static void
97 i_dls_link_destructor(void *buf, void *arg)
98 {
99 	dls_link_t	*dlp = buf;
100 
101 	ASSERT(dlp->dl_ref == 0);
102 	ASSERT(dlp->dl_mh == NULL);
103 	ASSERT(dlp->dl_unknowns == 0);
104 
105 	mod_hash_destroy_idhash(dlp->dl_impl_hash);
106 	dlp->dl_impl_hash = NULL;
107 
108 	mutex_destroy(&dlp->dl_lock);
109 	mutex_destroy(&dlp->dl_promisc_lock);
110 	rw_destroy(&dlp->dl_impl_lock);
111 }
112 
113 #define	ETHER_MATCH(_pkt_a, _pkt_b)					\
114 	((((uint16_t *)(_pkt_a))[0] == ((uint16_t *)(_pkt_b))[0]) &&	\
115 	(((uint16_t *)(_pkt_a))[1] == ((uint16_t *)(_pkt_b))[1]) &&	\
116 	(((uint16_t *)(_pkt_a))[2] == ((uint16_t *)(_pkt_b))[2]) &&	\
117 	(((uint16_t *)(_pkt_a))[6] == ((uint16_t *)(_pkt_b))[6]))
118 
119 #define	ETHER_VLAN_MATCH(_pkt_a, _pkt_b)				\
120 	((((uint16_t *)(_pkt_a))[0] == ((uint16_t *)(_pkt_b))[0]) &&	\
121 	(((uint16_t *)(_pkt_a))[1] == ((uint16_t *)(_pkt_b))[1]) &&	\
122 	(((uint16_t *)(_pkt_a))[2] == ((uint16_t *)(_pkt_b))[2]) &&	\
123 	(((uint16_t *)(_pkt_a))[6] == ((uint16_t *)(_pkt_b))[6]) &&	\
124 	(((uint16_t *)(_pkt_a))[7] == ((uint16_t *)(_pkt_b))[7]) &&	\
125 	(((uint16_t *)(_pkt_a))[8] == ((uint16_t *)(_pkt_b))[8]))
126 
127 static mblk_t *
128 i_dls_link_ether_subchain(mblk_t *mp, uint_t *header_lengthp,
129     uint8_t **daddrp, uint16_t *type_lengthp, uint16_t *vidp,
130     uint_t *countp)
131 {
132 	struct ether_header		*ehp;
133 	struct ether_vlan_header	*evhp;
134 	mblk_t				**pp;
135 	mblk_t				*p;
136 	uint_t				npacket;
137 
138 	/*
139 	 * Packets should always be at least 16 bit aligned.
140 	 */
141 	ASSERT(IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)));
142 
143 	/*
144 	 * Determine whether this is a VLAN or non-VLAN packet.
145 	 */
146 	ASSERT(MBLKL(mp) >= sizeof (struct ether_header));
147 	ehp = (struct ether_header *)mp->b_rptr;
148 	if ((*type_lengthp = ntohs(ehp->ether_type)) == VLAN_TPID)
149 		goto vlan;
150 
151 	/*
152 	 * It is a non-VLAN header.
153 	 */
154 	*header_lengthp = sizeof (struct ether_header);
155 
156 	/*
157 	 * Parse the rest of the header information that we need.
158 	 */
159 	*daddrp = (uint8_t *)&(ehp->ether_dhost);
160 	*vidp = VLAN_ID_NONE;
161 
162 	/*
163 	 * Compare with subsequent headers until we find one that has
164 	 * differing header information. After checking each packet skip over
165 	 * the header.
166 	 */
167 	npacket = 1;
168 	for (pp = &(mp->b_next); (p = *pp) != NULL; pp = &(p->b_next)) {
169 		if (!ETHER_MATCH(p->b_rptr, mp->b_rptr) != 0)
170 			break;
171 		p->b_rptr += sizeof (struct ether_header);
172 		npacket++;
173 	}
174 
175 	/*
176 	 * Skip over the initial packet's header.
177 	 */
178 	mp->b_rptr += sizeof (struct ether_header);
179 	goto done;
180 
181 vlan:
182 	/*
183 	 * It is a VLAN header.
184 	 */
185 	evhp = (struct ether_vlan_header *)mp->b_rptr;
186 	*header_lengthp = sizeof (struct ether_vlan_header);
187 
188 	/*
189 	 * Parse the header information.
190 	 */
191 	*daddrp = (uint8_t *)&(evhp->ether_dhost);
192 	*vidp = VLAN_ID(ntohs(evhp->ether_tci));
193 	*type_lengthp = ntohs(evhp->ether_type);
194 
195 	/*
196 	 * Compare with subsequent headers until we find one that has
197 	 * differing header information. After checking each packet skip over
198 	 * the header.
199 	 */
200 	npacket = 1;
201 	for (pp = &(mp->b_next); (p = *pp) != NULL; pp = &(p->b_next)) {
202 		if (!ETHER_VLAN_MATCH(p->b_rptr, mp->b_rptr) != 0)
203 			break;
204 		p->b_rptr += sizeof (struct ether_vlan_header);
205 		npacket++;
206 	}
207 
208 	/*
209 	 * Skip over the initial packet's header.
210 	 */
211 	mp->b_rptr += sizeof (struct ether_vlan_header);
212 
213 done:
214 	/*
215 	 * Break the chain at this point and return a pointer to the next
216 	 * sub-chain.
217 	 */
218 	*pp = NULL;
219 	*countp = npacket;
220 	return (p);
221 }
222 
223 static void
224 i_dls_head_hold(dls_head_t *dhp)
225 {
226 	atomic_inc_32(&dhp->dh_ref);
227 }
228 
229 static void
230 i_dls_head_rele(dls_head_t *dhp)
231 {
232 	atomic_dec_32(&dhp->dh_ref);
233 }
234 
235 static dls_head_t *
236 i_dls_head_alloc(mod_hash_key_t key)
237 {
238 	dls_head_t	*dhp;
239 
240 	dhp = kmem_zalloc(sizeof (dls_head_t), KM_SLEEP);
241 	dhp->dh_key = key;
242 	return (dhp);
243 }
244 
245 static void
246 i_dls_head_free(dls_head_t *dhp)
247 {
248 	ASSERT(dhp->dh_ref == 0);
249 	kmem_free(dhp, sizeof (dls_head_t));
250 }
251 
252 static void
253 i_dls_link_ether_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp)
254 {
255 	dls_link_t			*dlp = arg;
256 	mod_hash_t			*hash = dlp->dl_impl_hash;
257 	mblk_t				*nextp;
258 	uint_t				header_length;
259 	uint8_t				*daddr;
260 	uint16_t			type_length;
261 	uint16_t			vid;
262 	uint16_t			sap;
263 	dls_head_t			*dhp;
264 	dls_impl_t			*dip;
265 	dls_impl_t			*ndip;
266 	mblk_t				*nmp;
267 	mod_hash_key_t			key;
268 	uint_t				npacket;
269 	boolean_t			accepted;
270 
271 	/*
272 	 * Walk the packet chain.
273 	 */
274 	while (mp != NULL) {
275 		/*
276 		 * Wipe the accepted state.
277 		 */
278 		accepted = B_FALSE;
279 
280 		/*
281 		 * Grab the longest sub-chain we can process as a single
282 		 * unit.
283 		 */
284 		nextp = i_dls_link_ether_subchain(mp, &header_length, &daddr,
285 		    &type_length, &vid, &npacket);
286 
287 		/*
288 		 * Calculate the DLSAP: LLC (0) if the type/length field is
289 		 * interpreted as a length, otherwise it is the value of the
290 		 * type/length field.
291 		 */
292 		sap = (type_length <= ETHERMTU) ? DLS_SAP_LLC : type_length;
293 
294 		/*
295 		 * Construct a hash key from the VLAN identifier and the
296 		 * DLSAP.
297 		 */
298 		key = MAKE_KEY(sap, vid);
299 
300 		/*
301 		 * Search the has table for dls_impl_t eligible to receive
302 		 * a packet chain for this DLSAP/VLAN combination.
303 		 */
304 		rw_enter(&dlp->dl_impl_lock, RW_READER);
305 		if (mod_hash_find(hash, key, (mod_hash_val_t *)&dhp) != 0) {
306 			rw_exit(&dlp->dl_impl_lock);
307 			freemsgchain(mp);
308 			goto loop;
309 		}
310 		i_dls_head_hold(dhp);
311 		rw_exit(&dlp->dl_impl_lock);
312 
313 		/*
314 		 * Find the first dls_impl_t that will accept the sub-chain.
315 		 */
316 		for (dip = dhp->dh_list; dip != NULL; dip = dip->di_nextp)
317 			if (dls_accept(dip, daddr))
318 				break;
319 
320 		/*
321 		 * If we did not find any dls_impl_t willing to accept the
322 		 * sub-chain then throw it away.
323 		 */
324 		if (dip == NULL) {
325 			i_dls_head_rele(dhp);
326 			freemsgchain(mp);
327 			goto loop;
328 		}
329 
330 		/*
331 		 * We have at least one acceptor.
332 		 */
333 		accepted = B_TRUE;
334 		for (;;) {
335 			/*
336 			 * Find the next dls_impl_t that will accept the
337 			 * sub-chain.
338 			 */
339 			for (ndip = dip->di_nextp; ndip != NULL;
340 			    ndip = ndip->di_nextp)
341 				if (dls_accept(ndip, daddr))
342 					break;
343 
344 			/*
345 			 * If there are no more dls_impl_t that are willing
346 			 * to accept the sub-chain then we don't need to dup
347 			 * it before handing it to the current one.
348 			 */
349 			if (ndip == NULL) {
350 				dip->di_rx(dip->di_rx_arg, mrh, mp,
351 				    header_length);
352 
353 				/*
354 				 * Since there are no more dls_impl_t, we're
355 				 * done.
356 				 */
357 				break;
358 			}
359 
360 			/*
361 			 * There are more dls_impl_t so dup the sub-chain.
362 			 */
363 			if ((nmp = copymsgchain(mp)) != NULL)
364 				dip->di_rx(dip->di_rx_arg, mrh, nmp,
365 				    header_length);
366 
367 			dip = ndip;
368 		}
369 
370 		/*
371 		 * Release the hold on the dls_impl_t chain now that we have
372 		 * finished walking it.
373 		 */
374 		i_dls_head_rele(dhp);
375 
376 loop:
377 		/*
378 		 * If there were no acceptors then add the packet count to the
379 		 * 'unknown' count.
380 		 */
381 		if (!accepted)
382 			atomic_add_32(&(dlp->dl_unknowns), npacket);
383 
384 		/*
385 		 * Move onto the next sub-chain.
386 		 */
387 		mp = nextp;
388 	}
389 }
390 
391 static void
392 i_dls_link_ether_rx_promisc(void *arg, mac_resource_handle_t mrh,
393     mblk_t *mp)
394 {
395 	dls_link_t			*dlp = arg;
396 	mod_hash_t			*hash = dlp->dl_impl_hash;
397 	mblk_t				*nextp;
398 	uint_t				header_length;
399 	uint8_t				*daddr;
400 	uint16_t			type_length;
401 	uint16_t			vid;
402 	uint16_t			sap;
403 	dls_head_t			*dhp;
404 	dls_impl_t			*dip;
405 	dls_impl_t			*ndip;
406 	mblk_t				*nmp;
407 	mod_hash_key_t			key;
408 	uint_t				npacket;
409 	boolean_t			accepted;
410 
411 	/*
412 	 * Walk the packet chain.
413 	 */
414 	while (mp != NULL) {
415 		/*
416 		 * Wipe the accepted state.
417 		 */
418 		accepted = B_FALSE;
419 
420 		/*
421 		 * Grab the longest sub-chain we can process as a single
422 		 * unit.
423 		 */
424 		nextp = i_dls_link_ether_subchain(mp, &header_length, &daddr,
425 		    &type_length, &vid, &npacket);
426 
427 		/*
428 		 * Construct a hash key from the VLAN identifier and the
429 		 * DLSAP that represents dls_impl_t in promiscuous mode.
430 		 */
431 		key = MAKE_KEY(DLS_SAP_PROMISC, vid);
432 
433 		/*
434 		 * Search the has table for dls_impl_t eligible to receive
435 		 * a packet chain for this DLSAP/VLAN combination.
436 		 */
437 		rw_enter(&dlp->dl_impl_lock, RW_READER);
438 		if (mod_hash_find(hash, key, (mod_hash_val_t *)&dhp) != 0) {
439 			rw_exit(&dlp->dl_impl_lock);
440 			goto non_promisc;
441 		}
442 		i_dls_head_hold(dhp);
443 		rw_exit(&dlp->dl_impl_lock);
444 
445 		/*
446 		 * Find dls_impl_t that will accept the sub-chain.
447 		 */
448 		for (dip = dhp->dh_list; dip != NULL; dip = dip->di_nextp) {
449 			if (!dls_accept(dip, daddr))
450 				continue;
451 
452 			/*
453 			 * We have at least one acceptor.
454 			 */
455 			accepted = B_TRUE;
456 
457 			/*
458 			 * There will normally be at least more dls_impl_t
459 			 * (since we've yet to check for non-promiscuous
460 			 * dls_impl_t) so dup the sub-chain.
461 			 */
462 			if ((nmp = copymsgchain(mp)) != NULL)
463 				dip->di_rx(dip->di_rx_arg, mrh, nmp,
464 				    header_length);
465 		}
466 
467 		/*
468 		 * Release the hold on the dls_impl_t chain now that we have
469 		 * finished walking it.
470 		 */
471 		i_dls_head_rele(dhp);
472 
473 non_promisc:
474 		/*
475 		 * Calculate the DLSAP: LLC (0) if the type/length field is
476 		 * interpreted as a length, otherwise it is the value of the
477 		 * type/length field.
478 		 */
479 		sap = (type_length <= ETHERMTU) ? DLS_SAP_LLC : type_length;
480 
481 		/*
482 		 * Construct a hash key from the VLAN identifier and the
483 		 * DLSAP.
484 		 */
485 		key = MAKE_KEY(sap, vid);
486 
487 		/*
488 		 * Search the has table for dls_impl_t eligible to receive
489 		 * a packet chain for this DLSAP/VLAN combination.
490 		 */
491 		rw_enter(&dlp->dl_impl_lock, RW_READER);
492 		if (mod_hash_find(hash, key, (mod_hash_val_t *)&dhp) != 0) {
493 			rw_exit(&dlp->dl_impl_lock);
494 			freemsgchain(mp);
495 			goto loop;
496 		}
497 		i_dls_head_hold(dhp);
498 		rw_exit(&dlp->dl_impl_lock);
499 
500 		/*
501 		 * Find the first dls_impl_t that will accept the sub-chain.
502 		 */
503 		for (dip = dhp->dh_list; dip != NULL; dip = dip->di_nextp)
504 			if (dls_accept(dip, daddr))
505 				break;
506 
507 		/*
508 		 * If we did not find any dls_impl_t willing to accept the
509 		 * sub-chain then throw it away.
510 		 */
511 		if (dip == NULL) {
512 			i_dls_head_rele(dhp);
513 			freemsgchain(mp);
514 			goto loop;
515 		}
516 
517 		/*
518 		 * We have at least one acceptor.
519 		 */
520 		accepted = B_TRUE;
521 		for (;;) {
522 			/*
523 			 * Find the next dls_impl_t that will accept the
524 			 * sub-chain.
525 			 */
526 			for (ndip = dip->di_nextp; ndip != NULL;
527 			    ndip = ndip->di_nextp)
528 				if (dls_accept(ndip, daddr))
529 					break;
530 
531 			/*
532 			 * If there are no more dls_impl_t that are willing
533 			 * to accept the sub-chain then we don't need to dup
534 			 * it before handing it to the current one.
535 			 */
536 			if (ndip == NULL) {
537 				dip->di_rx(dip->di_rx_arg, mrh, mp,
538 				    header_length);
539 
540 				/*
541 				 * Since there are no more dls_impl_t, we're
542 				 * done.
543 				 */
544 				break;
545 			}
546 
547 			/*
548 			 * There are more dls_impl_t so dup the sub-chain.
549 			 */
550 			if ((nmp = copymsgchain(mp)) != NULL)
551 				dip->di_rx(dip->di_rx_arg, mrh, nmp,
552 				    header_length);
553 
554 			dip = ndip;
555 		}
556 
557 		/*
558 		 * Release the hold on the dls_impl_t chain now that we have
559 		 * finished walking it.
560 		 */
561 		i_dls_head_rele(dhp);
562 
563 loop:
564 		/*
565 		 * If there were no acceptors then add the packet count to the
566 		 * 'unknown' count.
567 		 */
568 		if (!accepted)
569 			atomic_add_32(&(dlp->dl_unknowns), npacket);
570 
571 		/*
572 		 * Move onto the next sub-chain.
573 		 */
574 		mp = nextp;
575 	}
576 }
577 
578 static void
579 i_dls_link_ether_loopback(void *arg, mblk_t *mp)
580 {
581 	dls_link_t			*dlp = arg;
582 	mod_hash_t			*hash = dlp->dl_impl_hash;
583 	mblk_t				*nextp;
584 	uint_t				header_length;
585 	uint8_t				*daddr;
586 	uint16_t			type_length;
587 	uint16_t			vid;
588 	uint16_t			sap;
589 	dls_head_t			*dhp;
590 	dls_impl_t			*dip;
591 	dls_impl_t			*ndip;
592 	mblk_t				*nmp;
593 	mod_hash_key_t			key;
594 	uint_t				npacket;
595 
596 	/*
597 	 * Walk the packet chain.
598 	 */
599 	while (mp != NULL) {
600 		/*
601 		 * Grab the longest sub-chain we can process as a single
602 		 * unit.
603 		 */
604 		nextp = i_dls_link_ether_subchain(mp, &header_length, &daddr,
605 		    &type_length, &vid, &npacket);
606 
607 		/*
608 		 * Calculate the DLSAP: LLC (0) if the type/length field is
609 		 * interpreted as a length, otherwise it is the value of the
610 		 * type/length field.
611 		 */
612 		sap = (type_length <= ETHERMTU) ? DLS_SAP_LLC : type_length;
613 
614 		/*
615 		 * Construct a hash key from the VLAN identifier and the
616 		 * DLSAP.
617 		 */
618 		key = MAKE_KEY(sap, vid);
619 
620 		/*
621 		 * Search the has table for dls_impl_t eligible to receive
622 		 * a packet chain for this DLSAP/VLAN combination.
623 		 */
624 		rw_enter(&dlp->dl_impl_lock, RW_READER);
625 		if (mod_hash_find(hash, key, (mod_hash_val_t *)&dhp) != 0) {
626 			rw_exit(&dlp->dl_impl_lock);
627 			goto promisc;
628 		}
629 		i_dls_head_hold(dhp);
630 		rw_exit(&dlp->dl_impl_lock);
631 
632 		/*
633 		 * Find dls_impl_t that will accept the sub-chain.
634 		 */
635 		for (dip = dhp->dh_list; dip != NULL; dip = dip->di_nextp) {
636 			if (!dls_accept_loopback(dip, daddr))
637 				continue;
638 
639 			/*
640 			 * There should be at least more dls_impl_t (since
641 			 * we've yet to check for dls_impl_t in promiscuous
642 			 * mode) so dup the sub-chain.
643 			 */
644 			if ((nmp = copymsgchain(mp)) != NULL)
645 				dip->di_rx(dip->di_rx_arg, NULL, nmp,
646 				    header_length);
647 		}
648 
649 		/*
650 		 * Release the hold on the dls_impl_t chain now that we have
651 		 * finished walking it.
652 		 */
653 		i_dls_head_rele(dhp);
654 
655 promisc:
656 		/*
657 		 * Construct a hash key from the VLAN identifier and the
658 		 * DLSAP that represents dls_impl_t in promiscuous mode.
659 		 */
660 		key = MAKE_KEY(DLS_SAP_PROMISC, vid);
661 
662 		/*
663 		 * Search the has table for dls_impl_t eligible to receive
664 		 * a packet chain for this DLSAP/VLAN combination.
665 		 */
666 		rw_enter(&dlp->dl_impl_lock, RW_READER);
667 		if (mod_hash_find(hash, key, (mod_hash_val_t *)&dhp) != 0) {
668 			rw_exit(&dlp->dl_impl_lock);
669 			freemsgchain(mp);
670 			goto loop;
671 		}
672 		i_dls_head_hold(dhp);
673 		rw_exit(&dlp->dl_impl_lock);
674 
675 		/*
676 		 * Find the first dls_impl_t that will accept the sub-chain.
677 		 */
678 		for (dip = dhp->dh_list; dip != NULL; dip = dip->di_nextp)
679 			if (dls_accept_loopback(dip, daddr))
680 				break;
681 
682 		/*
683 		 * If we did not find any dls_impl_t willing to accept the
684 		 * sub-chain then throw it away.
685 		 */
686 		if (dip == NULL) {
687 			i_dls_head_rele(dhp);
688 			freemsgchain(mp);
689 			goto loop;
690 		}
691 
692 		for (;;) {
693 			/*
694 			 * Find the next dls_impl_t that will accept the
695 			 * sub-chain.
696 			 */
697 			for (ndip = dip->di_nextp; ndip != NULL;
698 			    ndip = ndip->di_nextp)
699 				if (dls_accept_loopback(ndip, daddr))
700 					break;
701 
702 			/*
703 			 * If there are no more dls_impl_t that are willing
704 			 * to accept the sub-chain then we don't need to dup
705 			 * it before handing it to the current one.
706 			 */
707 			if (ndip == NULL) {
708 				dip->di_rx(dip->di_rx_arg, NULL, mp,
709 				    header_length);
710 
711 				/*
712 				 * Since there are no more dls_impl_t, we're
713 				 * done.
714 				 */
715 				break;
716 			}
717 
718 			/*
719 			 * There are more dls_impl_t so dup the sub-chain.
720 			 */
721 			if ((nmp = copymsgchain(mp)) != NULL)
722 				dip->di_rx(dip->di_rx_arg, NULL, nmp,
723 				    header_length);
724 
725 			dip = ndip;
726 		}
727 
728 		/*
729 		 * Release the hold on the dls_impl_t chain now that we have
730 		 * finished walking it.
731 		 */
732 		i_dls_head_rele(dhp);
733 
734 loop:
735 		/*
736 		 * Move onto the next sub-chain.
737 		 */
738 		mp = nextp;
739 	}
740 }
741 
742 /*ARGSUSED*/
743 static uint_t
744 i_dls_link_walk(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
745 {
746 	boolean_t	*promiscp = arg;
747 	uint32_t	sap = KEY_SAP(key);
748 
749 	if (sap == DLS_SAP_PROMISC) {
750 		*promiscp = B_TRUE;
751 		return (MH_WALK_TERMINATE);
752 	}
753 
754 	return (MH_WALK_CONTINUE);
755 }
756 
757 static int
758 i_dls_link_create(const char *dev, uint_t port, dls_link_t **dlpp)
759 {
760 	dls_link_t		*dlp;
761 
762 	/*
763 	 * Allocate a new dls_link_t structure.
764 	 */
765 	dlp = kmem_cache_alloc(i_dls_link_cachep, KM_SLEEP);
766 
767 	/*
768 	 * Name the dls_link_t after the MAC interface it represents.
769 	 */
770 	MAC_NAME(dlp->dl_name, dev, port);
771 	(void) strlcpy(dlp->dl_dev, dev, MAXNAMELEN);
772 	dlp->dl_port = port;
773 
774 	/*
775 	 * Set the packet loopback function for use when the MAC is in
776 	 * promiscuous mode, and initialize promiscuous bookeeping fields.
777 	 */
778 	dlp->dl_loopback = i_dls_link_ether_loopback;
779 	dlp->dl_npromisc = 0;
780 	dlp->dl_mth = NULL;
781 
782 	*dlpp = dlp;
783 	return (0);
784 }
785 
786 static void
787 i_dls_link_destroy(dls_link_t *dlp)
788 {
789 	ASSERT(dlp->dl_npromisc == 0);
790 	ASSERT(dlp->dl_nactive == 0);
791 	ASSERT(dlp->dl_mth == NULL);
792 	ASSERT(dlp->dl_macref == 0);
793 	ASSERT(dlp->dl_mh == NULL);
794 	ASSERT(dlp->dl_mip == NULL);
795 	ASSERT(dlp->dl_impl_count == 0);
796 	ASSERT(dlp->dl_mrh == NULL);
797 
798 	/*
799 	 * Free the structure back to the cache.
800 	 */
801 	dlp->dl_unknowns = 0;
802 	kmem_cache_free(i_dls_link_cachep, dlp);
803 }
804 
805 /*
806  * Module initialization functions.
807  */
808 
809 void
810 dls_link_init(void)
811 {
812 	/*
813 	 * Create a kmem_cache of dls_link_t structures.
814 	 */
815 	i_dls_link_cachep = kmem_cache_create("dls_link_cache",
816 	    sizeof (dls_link_t), 0, i_dls_link_constructor,
817 	    i_dls_link_destructor, NULL, NULL, NULL, 0);
818 	ASSERT(i_dls_link_cachep != NULL);
819 
820 	/*
821 	 * Create a dls_link_t hash table and associated lock.
822 	 */
823 	i_dls_link_hash = mod_hash_create_extended("dls_link_hash",
824 	    IMPL_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor,
825 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
826 	rw_init(&i_dls_link_lock, NULL, RW_DEFAULT, NULL);
827 	i_dls_link_count = 0;
828 }
829 
830 int
831 dls_link_fini(void)
832 {
833 	if (i_dls_link_count > 0)
834 		return (EBUSY);
835 
836 	/*
837 	 * Destroy the kmem_cache.
838 	 */
839 	kmem_cache_destroy(i_dls_link_cachep);
840 
841 	/*
842 	 * Destroy the hash table and associated lock.
843 	 */
844 	mod_hash_destroy_hash(i_dls_link_hash);
845 	rw_destroy(&i_dls_link_lock);
846 	return (0);
847 }
848 
849 /*
850  * Exported functions.
851  */
852 
853 int
854 dls_link_hold(const char *dev, uint_t port, dls_link_t **dlpp)
855 {
856 	char			name[MAXNAMELEN];
857 	dls_link_t		*dlp;
858 	int			err;
859 
860 	/*
861 	 * Construct a copy of the name used to identify any existing
862 	 * dls_link_t.
863 	 */
864 	MAC_NAME(name, dev, port);
865 
866 	/*
867 	 * Look up a dls_link_t corresponding to the given mac_handle_t
868 	 * in the global hash table. We need to hold i_dls_link_lock in
869 	 * order to atomically find and insert a dls_link_t into the
870 	 * hash table.
871 	 */
872 	rw_enter(&i_dls_link_lock, RW_WRITER);
873 	if ((err = mod_hash_find(i_dls_link_hash, (mod_hash_key_t)name,
874 	    (mod_hash_val_t *)&dlp)) == 0)
875 		goto done;
876 
877 	ASSERT(err == MH_ERR_NOTFOUND);
878 
879 	/*
880 	 * We didn't find anything so we need to create one.
881 	 */
882 	if ((err = i_dls_link_create(dev, port, &dlp)) != 0) {
883 		rw_exit(&i_dls_link_lock);
884 		return (err);
885 	}
886 
887 	/*
888 	 * Insert the dls_link_t.
889 	 */
890 	err = mod_hash_insert(i_dls_link_hash, (mod_hash_key_t)dlp->dl_name,
891 	    (mod_hash_val_t)dlp);
892 	ASSERT(err == 0);
893 
894 	i_dls_link_count++;
895 	ASSERT(i_dls_link_count != 0);
896 
897 done:
898 	/*
899 	 * Bump the reference count and hand back the reference.
900 	 */
901 	dlp->dl_ref++;
902 	*dlpp = dlp;
903 	rw_exit(&i_dls_link_lock);
904 	return (0);
905 }
906 
907 void
908 dls_link_rele(dls_link_t *dlp)
909 {
910 	mod_hash_val_t	val;
911 
912 	rw_enter(&i_dls_link_lock, RW_WRITER);
913 
914 	/*
915 	 * Check if there are any more references.
916 	 */
917 	if (--dlp->dl_ref != 0) {
918 		/*
919 		 * There are more references so there's nothing more to do.
920 		 */
921 		goto done;
922 	}
923 
924 	(void) mod_hash_remove(i_dls_link_hash,
925 	    (mod_hash_key_t)dlp->dl_name, &val);
926 	ASSERT(dlp == (dls_link_t *)val);
927 
928 	/*
929 	 * Destroy the dls_link_t.
930 	 */
931 	i_dls_link_destroy(dlp);
932 	ASSERT(i_dls_link_count > 0);
933 	i_dls_link_count--;
934 done:
935 	rw_exit(&i_dls_link_lock);
936 }
937 
938 int
939 dls_mac_hold(dls_link_t *dlp)
940 {
941 	int err = 0;
942 
943 	mutex_enter(&dlp->dl_lock);
944 
945 	ASSERT(IMPLY(dlp->dl_macref != 0, dlp->dl_mh != NULL));
946 	ASSERT(IMPLY(dlp->dl_macref == 0, dlp->dl_mh == NULL));
947 
948 	if (dlp->dl_macref == 0) {
949 		/*
950 		 * First reference; hold open the MAC interface.
951 		 */
952 		err = mac_open(dlp->dl_dev, dlp->dl_port, &dlp->dl_mh);
953 		if (err != 0)
954 			goto done;
955 
956 		dlp->dl_mip = mac_info(dlp->dl_mh);
957 	}
958 
959 	dlp->dl_macref++;
960 done:
961 	mutex_exit(&dlp->dl_lock);
962 	return (err);
963 }
964 
965 void
966 dls_mac_rele(dls_link_t *dlp)
967 {
968 	mutex_enter(&dlp->dl_lock);
969 	ASSERT(dlp->dl_mh != NULL);
970 
971 	if (--dlp->dl_macref == 0) {
972 		mac_close(dlp->dl_mh);
973 		dlp->dl_mh = NULL;
974 		dlp->dl_mip = NULL;
975 	}
976 	mutex_exit(&dlp->dl_lock);
977 }
978 
979 void
980 dls_link_add(dls_link_t *dlp, uint32_t sap, dls_impl_t *dip)
981 {
982 	dls_vlan_t	*dvp = dip->di_dvp;
983 	mod_hash_t	*hash = dlp->dl_impl_hash;
984 	mod_hash_key_t	key;
985 	dls_head_t	*dhp;
986 	dls_impl_t	*p;
987 	mac_rx_t	rx;
988 	int		err;
989 	boolean_t	promisc = B_FALSE;
990 
991 	/*
992 	 * For ethernet media, sap values less than or equal to
993 	 * ETHERMTU (1500) represent LLC channels. (See PSARC 2003/150).
994 	 * We strictly use 0 to represent LLC channels.
995 	 */
996 	sap = (sap <= ETHERMTU) ? 0 : sap;
997 
998 	/*
999 	 * Make the appropriate key value depending on whether the
1000 	 * dls_impl_t is in promiscuous mode or not.
1001 	 */
1002 	key = MAKE_KEY(sap, dvp->dv_id);
1003 
1004 	/*
1005 	 * We need dl_lock here because we want to be able to walk
1006 	 * the hash table *and* set the mac rx func atomically. if
1007 	 * these two operations are separate, someone else could
1008 	 * insert/remove dls_impl_t from the hash table after we
1009 	 * drop the hash lock and this could cause our chosen rx
1010 	 * func to be incorrect. note that we cannot call mac_rx_add
1011 	 * when holding the hash lock because this can cause deadlock.
1012 	 */
1013 	mutex_enter(&dlp->dl_lock);
1014 
1015 	/*
1016 	 * Search the table for a list head with this key.
1017 	 */
1018 	rw_enter(&dlp->dl_impl_lock, RW_WRITER);
1019 
1020 	if ((err = mod_hash_find(hash, key, (mod_hash_val_t *)&dhp)) != 0) {
1021 		ASSERT(err == MH_ERR_NOTFOUND);
1022 
1023 		dhp = i_dls_head_alloc(key);
1024 		err = mod_hash_insert(hash, key, (mod_hash_val_t)dhp);
1025 		ASSERT(err == 0);
1026 	}
1027 
1028 	/*
1029 	 * Add the dls_impl_t to the head of the list.
1030 	 */
1031 	ASSERT(dip->di_nextp == NULL);
1032 	p = dhp->dh_list;
1033 	dip->di_nextp = p;
1034 	dhp->dh_list = dip;
1035 
1036 	/*
1037 	 * Save a pointer to the list head.
1038 	 */
1039 	dip->di_headp = dhp;
1040 	dlp->dl_impl_count++;
1041 
1042 	/*
1043 	 * Walk the bound dls_impl_t to see if there are any
1044 	 * in promiscuous 'all sap' mode.
1045 	 */
1046 	mod_hash_walk(hash, i_dls_link_walk, (void *)&promisc);
1047 	rw_exit(&dlp->dl_impl_lock);
1048 
1049 	/*
1050 	 * If there are then we need to use a receive routine
1051 	 * which will route packets to those dls_impl_t as well
1052 	 * as ones bound to the  DLSAP of the packet.
1053 	 */
1054 	if (promisc)
1055 		rx = i_dls_link_ether_rx_promisc;
1056 	else
1057 		rx = i_dls_link_ether_rx;
1058 
1059 	/* Replace the existing receive function if there is one. */
1060 	if (dlp->dl_mrh != NULL)
1061 		mac_rx_remove(dlp->dl_mh, dlp->dl_mrh);
1062 	dlp->dl_mrh = mac_rx_add(dlp->dl_mh, rx, (void *)dlp);
1063 	mutex_exit(&dlp->dl_lock);
1064 }
1065 
1066 void
1067 dls_link_remove(dls_link_t *dlp, dls_impl_t *dip)
1068 {
1069 	mod_hash_t	*hash = dlp->dl_impl_hash;
1070 	dls_impl_t	**pp;
1071 	dls_impl_t	*p;
1072 	dls_head_t	*dhp;
1073 	mac_rx_t	rx;
1074 
1075 	/*
1076 	 * We need dl_lock here because we want to be able to walk
1077 	 * the hash table *and* set the mac rx func atomically. if
1078 	 * these two operations are separate, someone else could
1079 	 * insert/remove dls_impl_t from the hash table after we
1080 	 * drop the hash lock and this could cause our chosen rx
1081 	 * func to be incorrect. note that we cannot call mac_rx_add
1082 	 * when holding the hash lock because this can cause deadlock.
1083 	 */
1084 	mutex_enter(&dlp->dl_lock);
1085 	rw_enter(&dlp->dl_impl_lock, RW_WRITER);
1086 
1087 	/*
1088 	 * Poll the hash table entry until all references have been dropped.
1089 	 * We need to drop all locks before sleeping because we don't want
1090 	 * the interrupt handler to block. We set di_removing here to
1091 	 * tell the receive callbacks not to pass up packets anymore.
1092 	 * This is only a hint to quicken the decrease of the refcnt so
1093 	 * the assignment need not be protected by any lock.
1094 	 */
1095 	dhp = dip->di_headp;
1096 	dip->di_removing = B_TRUE;
1097 	while (dhp->dh_ref != 0) {
1098 		rw_exit(&dlp->dl_impl_lock);
1099 		mutex_exit(&dlp->dl_lock);
1100 		delay(drv_usectohz(1000));	/* 1ms delay */
1101 		mutex_enter(&dlp->dl_lock);
1102 		rw_enter(&dlp->dl_impl_lock, RW_WRITER);
1103 	}
1104 
1105 	/*
1106 	 * Walk the list and remove the dls_impl_t.
1107 	 */
1108 	for (pp = &dhp->dh_list; (p = *pp) != NULL; pp = &(p->di_nextp)) {
1109 		if (p == dip)
1110 			break;
1111 	}
1112 	ASSERT(p != NULL);
1113 	*pp = p->di_nextp;
1114 	p->di_nextp = NULL;
1115 
1116 	ASSERT(dlp->dl_impl_count > 0);
1117 	dlp->dl_impl_count--;
1118 
1119 	if (dhp->dh_list == NULL) {
1120 		mod_hash_val_t	val = NULL;
1121 
1122 		/*
1123 		 * The list is empty so remove the hash table entry.
1124 		 */
1125 		(void) mod_hash_remove(hash, dhp->dh_key, &val);
1126 		ASSERT(dhp == (dls_head_t *)val);
1127 		i_dls_head_free(dhp);
1128 	}
1129 	dip->di_removing = B_FALSE;
1130 
1131 	/*
1132 	 * If there are no dls_impl_t then there's no need to register a
1133 	 * receive function with the mac.
1134 	 */
1135 	if (dlp->dl_impl_count == 0) {
1136 		rw_exit(&dlp->dl_impl_lock);
1137 		mac_rx_remove(dlp->dl_mh, dlp->dl_mrh);
1138 		dlp->dl_mrh = NULL;
1139 	} else {
1140 		boolean_t promisc = B_FALSE;
1141 
1142 		/*
1143 		 * Walk the bound dls_impl_t to see if there are any
1144 		 * in promiscuous 'all sap' mode.
1145 		 */
1146 		mod_hash_walk(hash, i_dls_link_walk, (void *)&promisc);
1147 		rw_exit(&dlp->dl_impl_lock);
1148 
1149 		/*
1150 		 * If there are then we need to use a receive routine
1151 		 * which will route packets to those dls_impl_t as well
1152 		 * as ones bound to the  DLSAP of the packet.
1153 		 */
1154 		if (promisc)
1155 			rx = i_dls_link_ether_rx_promisc;
1156 		else
1157 			rx = i_dls_link_ether_rx;
1158 
1159 		mac_rx_remove(dlp->dl_mh, dlp->dl_mrh);
1160 		dlp->dl_mrh = mac_rx_add(dlp->dl_mh, rx, (void *)dlp);
1161 	}
1162 	mutex_exit(&dlp->dl_lock);
1163 }
1164