xref: /titanic_50/usr/src/uts/common/io/dls/dls_link.c (revision 554ff184129088135ad2643c1c9832174a17be88)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * Data-Link Services Module
31  */
32 
33 #include	<sys/types.h>
34 #include	<sys/stream.h>
35 #include	<sys/strsun.h>
36 #include	<sys/strsubr.h>
37 #include	<sys/sysmacros.h>
38 #include	<sys/atomic.h>
39 #include	<sys/ght.h>
40 #include	<sys/dlpi.h>
41 #include	<sys/ethernet.h>
42 #include	<sys/byteorder.h>
43 #include	<sys/vlan.h>
44 #include	<sys/mac.h>
45 #include	<sys/sdt.h>
46 
47 #include	<sys/dls.h>
48 #include	<sys/dld_impl.h>
49 #include	<sys/dls_impl.h>
50 
51 static kmem_cache_t	*i_dls_link_cachep;
52 static ght_t		i_dls_link_hash;
53 
54 #define		LINK_HASHSZ	67	/* prime */
55 #define		IMPL_HASHSZ	67	/* prime */
56 
57 /*
58  * Construct a hash key encompassing both DLSAP value and VLAN idenitifier.
59  */
60 #define	MAKE_KEY(_sap, _vid)						\
61 	GHT_SCALAR_TO_KEY(((_sap) << VLAN_ID_SIZE) | (_vid) & VLAN_ID_MASK)
62 
63 /*
64  * Extract the DLSAP value from the hash key.
65  */
66 #define	KEY_SAP(_key)							\
67 	(((uint32_t)(uintptr_t)(_key)) >> VLAN_ID_SIZE)
68 
69 /*
70  * Private functions.
71  */
72 
73 /*ARGSUSED*/
74 static int
75 i_dls_link_constructor(void *buf, void *arg, int kmflag)
76 {
77 	dls_link_t	*dlp = buf;
78 	char		name[MAXNAMELEN];
79 	int		err;
80 
81 	bzero(buf, sizeof (dls_link_t));
82 
83 	(void) sprintf(name, "dls_link_t_%p_impl_hash", buf);
84 	err = ght_scalar_create(name, IMPL_HASHSZ, &(dlp->dl_impl_hash));
85 	ASSERT(err == 0);
86 
87 	mutex_init(&dlp->dl_lock, NULL, MUTEX_DEFAULT, NULL);
88 	mutex_init(&dlp->dl_promisc_lock, NULL, MUTEX_DEFAULT, NULL);
89 	return (0);
90 }
91 
92 /*ARGSUSED*/
93 static void
94 i_dls_link_destructor(void *buf, void *arg)
95 {
96 	dls_link_t	*dlp = buf;
97 	int		err;
98 
99 	ASSERT(dlp->dl_ref == 0);
100 	ASSERT(dlp->dl_hte == NULL);
101 	ASSERT(dlp->dl_mh == NULL);
102 	ASSERT(dlp->dl_unknowns == 0);
103 
104 	err = ght_destroy(dlp->dl_impl_hash);
105 	ASSERT(err == 0);
106 
107 	mutex_destroy(&dlp->dl_lock);
108 	mutex_destroy(&dlp->dl_promisc_lock);
109 }
110 
111 #define	ETHER_MATCH(_pkt_a, _pkt_b)					\
112 	((((uint16_t *)(_pkt_a))[0] == ((uint16_t *)(_pkt_b))[0]) &&	\
113 	(((uint16_t *)(_pkt_a))[1] == ((uint16_t *)(_pkt_b))[1]) &&	\
114 	(((uint16_t *)(_pkt_a))[2] == ((uint16_t *)(_pkt_b))[2]) &&	\
115 	(((uint16_t *)(_pkt_a))[6] == ((uint16_t *)(_pkt_b))[6]))
116 
117 #define	ETHER_VLAN_MATCH(_pkt_a, _pkt_b)				\
118 	((((uint16_t *)(_pkt_a))[0] == ((uint16_t *)(_pkt_b))[0]) &&	\
119 	(((uint16_t *)(_pkt_a))[1] == ((uint16_t *)(_pkt_b))[1]) &&	\
120 	(((uint16_t *)(_pkt_a))[2] == ((uint16_t *)(_pkt_b))[2]) &&	\
121 	(((uint16_t *)(_pkt_a))[6] == ((uint16_t *)(_pkt_b))[6]) &&	\
122 	(((uint16_t *)(_pkt_a))[7] == ((uint16_t *)(_pkt_b))[7]) &&	\
123 	(((uint16_t *)(_pkt_a))[8] == ((uint16_t *)(_pkt_b))[8]))
124 
125 static mblk_t *
126 i_dls_link_ether_subchain(mblk_t *mp, uint_t *header_lengthp,
127     uint8_t **daddrp, uint16_t *type_lengthp, uint16_t *vidp,
128     uint_t *countp)
129 {
130 	struct ether_header		*ehp;
131 	struct ether_vlan_header	*evhp;
132 	mblk_t				**pp;
133 	mblk_t				*p;
134 	uint_t				npacket;
135 
136 	/*
137 	 * Packets should always be at least 16 bit aligned.
138 	 */
139 	ASSERT(IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)));
140 
141 	/*
142 	 * Determine whether this is a VLAN or non-VLAN packet.
143 	 */
144 	ASSERT(MBLKL(mp) >= sizeof (struct ether_header));
145 	ehp = (struct ether_header *)mp->b_rptr;
146 	if ((*type_lengthp = ntohs(ehp->ether_type)) == VLAN_TPID)
147 		goto vlan;
148 
149 	/*
150 	 * It is a non-VLAN header.
151 	 */
152 	*header_lengthp = sizeof (struct ether_header);
153 
154 	/*
155 	 * Parse the rest of the header information that we need.
156 	 */
157 	*daddrp = (uint8_t *)&(ehp->ether_dhost);
158 	*vidp = VLAN_ID_NONE;
159 
160 	/*
161 	 * Compare with subsequent headers until we find one that has
162 	 * differing header information. After checking each packet skip over
163 	 * the header.
164 	 */
165 	npacket = 1;
166 	for (pp = &(mp->b_next); (p = *pp) != NULL; pp = &(p->b_next)) {
167 		if (!ETHER_MATCH(p->b_rptr, mp->b_rptr) != 0)
168 			break;
169 		p->b_rptr += sizeof (struct ether_header);
170 		npacket++;
171 	}
172 
173 	/*
174 	 * Skip over the initial packet's header.
175 	 */
176 	mp->b_rptr += sizeof (struct ether_header);
177 	goto done;
178 
179 vlan:
180 	/*
181 	 * It is a VLAN header.
182 	 */
183 	evhp = (struct ether_vlan_header *)mp->b_rptr;
184 	*header_lengthp = sizeof (struct ether_vlan_header);
185 
186 	/*
187 	 * Parse the header information.
188 	 */
189 	*daddrp = (uint8_t *)&(evhp->ether_dhost);
190 	*vidp = VLAN_ID(ntohs(evhp->ether_tci));
191 	*type_lengthp = ntohs(evhp->ether_type);
192 
193 	/*
194 	 * Compare with subsequent headers until we find one that has
195 	 * differing header information. After checking each packet skip over
196 	 * the header.
197 	 */
198 	npacket = 1;
199 	for (pp = &(mp->b_next); (p = *pp) != NULL; pp = &(p->b_next)) {
200 		if (!ETHER_VLAN_MATCH(p->b_rptr, mp->b_rptr) != 0)
201 			break;
202 		p->b_rptr += sizeof (struct ether_vlan_header);
203 		npacket++;
204 	}
205 
206 	/*
207 	 * Skip over the initial packet's header.
208 	 */
209 	mp->b_rptr += sizeof (struct ether_vlan_header);
210 
211 done:
212 	/*
213 	 * Break the chain at this point and return a pointer to the next
214 	 * sub-chain.
215 	 */
216 	*pp = NULL;
217 	*countp = npacket;
218 	return (p);
219 }
220 
221 static void
222 i_dls_link_ether_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp)
223 {
224 	dls_link_t			*dlp = arg;
225 	ght_t				hash = dlp->dl_impl_hash;
226 	mblk_t				*nextp;
227 	uint_t				header_length;
228 	uint8_t				*daddr;
229 	uint16_t			type_length;
230 	uint16_t			vid;
231 	uint16_t			sap;
232 	ghte_t				hte;
233 	dls_impl_t			*dip;
234 	dls_impl_t			*ndip;
235 	mblk_t				*nmp;
236 	ght_key_t			key;
237 	uint_t				npacket;
238 	boolean_t			accepted;
239 
240 	/*
241 	 * Walk the packet chain.
242 	 */
243 	while (mp != NULL) {
244 		/*
245 		 * Wipe the accepted state.
246 		 */
247 		accepted = B_FALSE;
248 
249 		/*
250 		 * Grab the longest sub-chain we can process as a single
251 		 * unit.
252 		 */
253 		nextp = i_dls_link_ether_subchain(mp, &header_length, &daddr,
254 		    &type_length, &vid, &npacket);
255 
256 		/*
257 		 * Calculate the DLSAP: LLC (0) if the type/length field is
258 		 * interpreted as a length, otherwise it is the value of the
259 		 * type/length field.
260 		 */
261 		sap = (type_length <= ETHERMTU) ? DLS_SAP_LLC : type_length;
262 
263 		/*
264 		 * Construct a hash key from the VLAN identifier and the
265 		 * DLSAP.
266 		 */
267 		key = MAKE_KEY(sap, vid);
268 
269 		/*
270 		 * Search the has table for dls_impl_t eligible to receive
271 		 * a packet chain for this DLSAP/VLAN combination.
272 		 */
273 		ght_lock(hash, GHT_READ);
274 		if (ght_find(hash, key, &hte) != 0) {
275 			ght_unlock(hash);
276 			freemsgchain(mp);
277 			goto loop;
278 		}
279 
280 		/*
281 		 * Place a hold the chain of dls_impl_t to make sure none are
282 		 * removed from under our feet.
283 		 */
284 		ght_hold(hte);
285 		ght_unlock(hash);
286 
287 		/*
288 		 * Find the first dls_impl_t that will accept the sub-chain.
289 		 */
290 		for (dip = (dls_impl_t *)GHT_VAL(hte); dip != NULL;
291 		    dip = dip->di_nextp)
292 			if (dls_accept(dip, daddr))
293 				break;
294 
295 		/*
296 		 * If we did not find any dls_impl_t willing to accept the
297 		 * sub-chain then throw it away.
298 		 */
299 		if (dip == NULL) {
300 			ght_rele(hte);
301 			freemsgchain(mp);
302 			goto loop;
303 		}
304 
305 		/*
306 		 * We have at least one acceptor.
307 		 */
308 		accepted = B_TRUE;
309 		for (;;) {
310 			/*
311 			 * Find the next dls_impl_t that will accept the
312 			 * sub-chain.
313 			 */
314 			for (ndip = dip->di_nextp; ndip != NULL;
315 			    ndip = ndip->di_nextp)
316 				if (dls_accept(ndip, daddr))
317 					break;
318 
319 			/*
320 			 * If there are no more dls_impl_t that are willing
321 			 * to accept the sub-chain then we don't need to dup
322 			 * it before handing it to the current one.
323 			 */
324 			if (ndip == NULL) {
325 				dip->di_rx(dip->di_rx_arg, mrh, mp,
326 				    header_length);
327 
328 				/*
329 				 * Since there are no more dls_impl_t, we're
330 				 * done.
331 				 */
332 				break;
333 			}
334 
335 			/*
336 			 * There are more dls_impl_t so dup the sub-chain.
337 			 */
338 			if ((nmp = copymsgchain(mp)) != NULL)
339 				dip->di_rx(dip->di_rx_arg, mrh, nmp,
340 				    header_length);
341 
342 			dip = ndip;
343 		}
344 
345 		/*
346 		 * Release the hold on the dls_impl_t chain now that we have
347 		 * finished walking it.
348 		 */
349 		ght_rele(hte);
350 
351 loop:
352 		/*
353 		 * If there were no acceptors then add the packet count to the
354 		 * 'unknown' count.
355 		 */
356 		if (!accepted)
357 			atomic_add_32(&(dlp->dl_unknowns), npacket);
358 
359 		/*
360 		 * Move onto the next sub-chain.
361 		 */
362 		mp = nextp;
363 	}
364 }
365 
366 static void
367 i_dls_link_ether_rx_promisc(void *arg, mac_resource_handle_t mrh,
368     mblk_t *mp)
369 {
370 	dls_link_t			*dlp = arg;
371 	ght_t				hash = dlp->dl_impl_hash;
372 	mblk_t				*nextp;
373 	uint_t				header_length;
374 	uint8_t				*daddr;
375 	uint16_t			type_length;
376 	uint16_t			vid;
377 	uint16_t			sap;
378 	ghte_t				hte;
379 	dls_impl_t			*dip;
380 	dls_impl_t			*ndip;
381 	mblk_t				*nmp;
382 	ght_key_t			key;
383 	uint_t				npacket;
384 	boolean_t			accepted;
385 
386 	/*
387 	 * Walk the packet chain.
388 	 */
389 	while (mp != NULL) {
390 		/*
391 		 * Wipe the accepted state.
392 		 */
393 		accepted = B_FALSE;
394 
395 		/*
396 		 * Grab the longest sub-chain we can process as a single
397 		 * unit.
398 		 */
399 		nextp = i_dls_link_ether_subchain(mp, &header_length, &daddr,
400 		    &type_length, &vid, &npacket);
401 
402 		/*
403 		 * Construct a hash key from the VLAN identifier and the
404 		 * DLSAP that represents dls_impl_t in promiscuous mode.
405 		 */
406 		key = MAKE_KEY(DLS_SAP_PROMISC, vid);
407 
408 		/*
409 		 * Search the has table for dls_impl_t eligible to receive
410 		 * a packet chain for this DLSAP/VLAN combination.
411 		 */
412 		ght_lock(hash, GHT_READ);
413 		if (ght_find(hash, key, &hte) != 0) {
414 			ght_unlock(hash);
415 			goto non_promisc;
416 		}
417 
418 		/*
419 		 * Place a hold the chain of dls_impl_t to make sure none are
420 		 * removed from under our feet.
421 		 */
422 		ght_hold(hte);
423 		ght_unlock(hash);
424 
425 		/*
426 		 * Find dls_impl_t that will accept the sub-chain.
427 		 */
428 		for (dip = (dls_impl_t *)GHT_VAL(hte); dip != NULL;
429 		    dip = dip->di_nextp) {
430 			if (!dls_accept(dip, daddr))
431 				continue;
432 
433 			/*
434 			 * We have at least one acceptor.
435 			 */
436 			accepted = B_TRUE;
437 
438 			/*
439 			 * There will normally be at least more dls_impl_t
440 			 * (since we've yet to check for non-promiscuous
441 			 * dls_impl_t) so dup the sub-chain.
442 			 */
443 			if ((nmp = copymsgchain(mp)) != NULL)
444 				dip->di_rx(dip->di_rx_arg, mrh, nmp,
445 				    header_length);
446 		}
447 
448 		/*
449 		 * Release the hold on the dls_impl_t chain now that we have
450 		 * finished walking it.
451 		 */
452 		ght_rele(hte);
453 
454 non_promisc:
455 		/*
456 		 * Calculate the DLSAP: LLC (0) if the type/length field is
457 		 * interpreted as a length, otherwise it is the value of the
458 		 * type/length field.
459 		 */
460 		sap = (type_length <= ETHERMTU) ? DLS_SAP_LLC : type_length;
461 
462 		/*
463 		 * Construct a hash key from the VLAN identifier and the
464 		 * DLSAP.
465 		 */
466 		key = MAKE_KEY(sap, vid);
467 
468 		/*
469 		 * Search the has table for dls_impl_t eligible to receive
470 		 * a packet chain for this DLSAP/VLAN combination.
471 		 */
472 		ght_lock(hash, GHT_READ);
473 		if (ght_find(hash, key, &hte) != 0) {
474 			ght_unlock(hash);
475 			freemsgchain(mp);
476 			goto loop;
477 		}
478 
479 		/*
480 		 * Place a hold the chain of dls_impl_t to make sure none are
481 		 * removed from under our feet.
482 		 */
483 		ght_hold(hte);
484 		ght_unlock(hash);
485 
486 		/*
487 		 * Find the first dls_impl_t that will accept the sub-chain.
488 		 */
489 		for (dip = (dls_impl_t *)GHT_VAL(hte); dip != NULL;
490 		    dip = dip->di_nextp)
491 			if (dls_accept(dip, daddr))
492 				break;
493 
494 		/*
495 		 * If we did not find any dls_impl_t willing to accept the
496 		 * sub-chain then throw it away.
497 		 */
498 		if (dip == NULL) {
499 			ght_rele(hte);
500 			freemsgchain(mp);
501 			goto loop;
502 		}
503 
504 		/*
505 		 * We have at least one acceptor.
506 		 */
507 		accepted = B_TRUE;
508 		for (;;) {
509 			/*
510 			 * Find the next dls_impl_t that will accept the
511 			 * sub-chain.
512 			 */
513 			for (ndip = dip->di_nextp; ndip != NULL;
514 			    ndip = ndip->di_nextp)
515 				if (dls_accept(ndip, daddr))
516 					break;
517 
518 			/*
519 			 * If there are no more dls_impl_t that are willing
520 			 * to accept the sub-chain then we don't need to dup
521 			 * it before handing it to the current one.
522 			 */
523 			if (ndip == NULL) {
524 				dip->di_rx(dip->di_rx_arg, mrh, mp,
525 				    header_length);
526 
527 				/*
528 				 * Since there are no more dls_impl_t, we're
529 				 * done.
530 				 */
531 				break;
532 			}
533 
534 			/*
535 			 * There are more dls_impl_t so dup the sub-chain.
536 			 */
537 			if ((nmp = copymsgchain(mp)) != NULL)
538 				dip->di_rx(dip->di_rx_arg, mrh, nmp,
539 				    header_length);
540 
541 			dip = ndip;
542 		}
543 
544 		/*
545 		 * Release the hold on the dls_impl_t chain now that we have
546 		 * finished walking it.
547 		 */
548 		ght_rele(hte);
549 
550 loop:
551 		/*
552 		 * If there were no acceptors then add the packet count to the
553 		 * 'unknown' count.
554 		 */
555 		if (!accepted)
556 			atomic_add_32(&(dlp->dl_unknowns), npacket);
557 
558 		/*
559 		 * Move onto the next sub-chain.
560 		 */
561 		mp = nextp;
562 	}
563 }
564 
565 static void
566 i_dls_link_ether_loopback(void *arg, mblk_t *mp)
567 {
568 	dls_link_t			*dlp = arg;
569 	ght_t				hash = dlp->dl_impl_hash;
570 	mblk_t				*nextp;
571 	uint_t				header_length;
572 	uint8_t				*daddr;
573 	uint16_t			type_length;
574 	uint16_t			vid;
575 	uint16_t			sap;
576 	ghte_t				hte;
577 	dls_impl_t			*dip;
578 	dls_impl_t			*ndip;
579 	mblk_t				*nmp;
580 	ght_key_t			key;
581 	uint_t				npacket;
582 
583 	/*
584 	 * Walk the packet chain.
585 	 */
586 	while (mp != NULL) {
587 		/*
588 		 * Grab the longest sub-chain we can process as a single
589 		 * unit.
590 		 */
591 		nextp = i_dls_link_ether_subchain(mp, &header_length, &daddr,
592 		    &type_length, &vid, &npacket);
593 
594 		/*
595 		 * Calculate the DLSAP: LLC (0) if the type/length field is
596 		 * interpreted as a length, otherwise it is the value of the
597 		 * type/length field.
598 		 */
599 		sap = (type_length <= ETHERMTU) ? DLS_SAP_LLC : type_length;
600 
601 		/*
602 		 * Construct a hash key from the VLAN identifier and the
603 		 * DLSAP.
604 		 */
605 		key = MAKE_KEY(sap, vid);
606 
607 		/*
608 		 * Search the has table for dls_impl_t eligible to receive
609 		 * a packet chain for this DLSAP/VLAN combination.
610 		 */
611 		ght_lock(hash, GHT_READ);
612 		if (ght_find(hash, key, &hte) != 0) {
613 			ght_unlock(hash);
614 			goto promisc;
615 		}
616 
617 		/*
618 		 * Place a hold the chain of dls_impl_t to make sure none are
619 		 * removed from under our feet.
620 		 */
621 		ght_hold(hte);
622 		ght_unlock(hash);
623 
624 		/*
625 		 * Find dls_impl_t that will accept the sub-chain.
626 		 */
627 		for (dip = (dls_impl_t *)GHT_VAL(hte); dip != NULL;
628 		    dip = dip->di_nextp) {
629 			if (!dls_accept_loopback(dip, daddr))
630 				continue;
631 
632 			/*
633 			 * There should be at least more dls_impl_t (since
634 			 * we've yet to check for dls_impl_t in promiscuous
635 			 * mode) so dup the sub-chain.
636 			 */
637 			if ((nmp = copymsgchain(mp)) != NULL)
638 				dip->di_rx(dip->di_rx_arg, NULL, nmp,
639 				    header_length);
640 		}
641 
642 		/*
643 		 * Release the hold on the dls_impl_t chain now that we have
644 		 * finished walking it.
645 		 */
646 		ght_rele(hte);
647 
648 promisc:
649 		/*
650 		 * Construct a hash key from the VLAN identifier and the
651 		 * DLSAP that represents dls_impl_t in promiscuous mode.
652 		 */
653 		key = MAKE_KEY(DLS_SAP_PROMISC, vid);
654 
655 		/*
656 		 * Search the has table for dls_impl_t eligible to receive
657 		 * a packet chain for this DLSAP/VLAN combination.
658 		 */
659 		ght_lock(hash, GHT_READ);
660 		if (ght_find(hash, key, &hte) != 0) {
661 			ght_unlock(hash);
662 			freemsgchain(mp);
663 			goto loop;
664 		}
665 
666 		/*
667 		 * Place a hold the chain of dls_impl_t to make sure none are
668 		 * removed from under our feet.
669 		 */
670 		ght_hold(hte);
671 		ght_unlock(hash);
672 
673 		/*
674 		 * Find the first dls_impl_t that will accept the sub-chain.
675 		 */
676 		for (dip = (dls_impl_t *)GHT_VAL(hte); dip != NULL;
677 		    dip = dip->di_nextp)
678 			if (dls_accept_loopback(dip, daddr))
679 				break;
680 
681 		/*
682 		 * If we did not find any dls_impl_t willing to accept the
683 		 * sub-chain then throw it away.
684 		 */
685 		if (dip == NULL) {
686 			ght_rele(hte);
687 			freemsgchain(mp);
688 			goto loop;
689 		}
690 
691 		for (;;) {
692 			/*
693 			 * Find the next dls_impl_t that will accept the
694 			 * sub-chain.
695 			 */
696 			for (ndip = dip->di_nextp; ndip != NULL;
697 			    ndip = ndip->di_nextp)
698 				if (dls_accept_loopback(ndip, daddr))
699 					break;
700 
701 			/*
702 			 * If there are no more dls_impl_t that are willing
703 			 * to accept the sub-chain then we don't need to dup
704 			 * it before handing it to the current one.
705 			 */
706 			if (ndip == NULL) {
707 				dip->di_rx(dip->di_rx_arg, NULL, mp,
708 				    header_length);
709 
710 				/*
711 				 * Since there are no more dls_impl_t, we're
712 				 * done.
713 				 */
714 				break;
715 			}
716 
717 			/*
718 			 * There are more dls_impl_t so dup the sub-chain.
719 			 */
720 			if ((nmp = copymsgchain(mp)) != NULL)
721 				dip->di_rx(dip->di_rx_arg, NULL, nmp,
722 				    header_length);
723 
724 			dip = ndip;
725 		}
726 
727 		/*
728 		 * Release the hold on the dls_impl_t chain now that we have
729 		 * finished walking it.
730 		 */
731 		ght_rele(hte);
732 
733 loop:
734 		/*
735 		 * Move onto the next sub-chain.
736 		 */
737 		mp = nextp;
738 	}
739 }
740 
741 static boolean_t
742 i_dls_link_walk(void *arg, ghte_t hte)
743 {
744 	boolean_t	*promiscp = arg;
745 	ght_key_t	key = GHT_KEY(hte);
746 	uint32_t	sap = KEY_SAP(key);
747 
748 	if (sap == DLS_SAP_PROMISC) {
749 		*promiscp = B_TRUE;
750 		return (B_FALSE);	/* terminate walk */
751 	}
752 
753 	return (B_TRUE);
754 }
755 
756 static int
757 i_dls_link_create(const char *dev, uint_t port, dls_link_t **dlpp)
758 {
759 	dls_link_t		*dlp;
760 	int			err;
761 	mac_handle_t		mh;
762 
763 	/*
764 	 * Check that the MAC exists, and (for now) that it's
765 	 * of type DL_ETHER.
766 	 */
767 	if ((err = mac_open(dev, port, &mh)) != 0)
768 		return (err);
769 
770 	ASSERT(mac_info(mh)->mi_media == DL_ETHER);
771 	mac_close(mh);
772 
773 	/*
774 	 * Allocate a new dls_link_t structure.
775 	 */
776 	dlp = kmem_cache_alloc(i_dls_link_cachep, KM_SLEEP);
777 
778 	/*
779 	 * Name the dls_link_t after the MAC interface it represents.
780 	 */
781 	MAC_NAME(dlp->dl_name, dev, port);
782 	(void) strlcpy(dlp->dl_dev, dev, MAXNAMELEN);
783 	dlp->dl_port = port;
784 
785 	/*
786 	 * Set the initial packet receive function.
787 	 */
788 	ASSERT(ght_count(dlp->dl_impl_hash) == 0);
789 
790 	/*
791 	 * Set the packet loopback function for use when the MAC is in
792 	 * promiscuous mode, and initialize promiscuous bookeeping fields.
793 	 */
794 	dlp->dl_loopback = i_dls_link_ether_loopback;
795 	dlp->dl_npromisc = 0;
796 	dlp->dl_mth = NULL;
797 
798 	*dlpp = dlp;
799 	return (0);
800 }
801 
802 static void
803 i_dls_link_destroy(dls_link_t *dlp)
804 {
805 	ASSERT(dlp->dl_npromisc == 0);
806 	ASSERT(dlp->dl_nactive == 0);
807 	ASSERT(dlp->dl_mth == NULL);
808 	ASSERT(dlp->dl_macref == 0);
809 	ASSERT(dlp->dl_mh == NULL);
810 	ASSERT(dlp->dl_mip == NULL);
811 
812 	/*
813 	 * Free the structure back to the cache.
814 	 */
815 	dlp->dl_mrh = NULL;
816 	dlp->dl_unknowns = 0;
817 	kmem_cache_free(i_dls_link_cachep, dlp);
818 
819 }
820 
821 /*
822  * Module initialization functions.
823  */
824 
825 void
826 dls_link_init(void)
827 {
828 	int	err;
829 
830 	/*
831 	 * Create a kmem_cache of dls_link_t structures.
832 	 */
833 	i_dls_link_cachep = kmem_cache_create("dls_link_cache",
834 	    sizeof (dls_link_t), 0, i_dls_link_constructor,
835 	    i_dls_link_destructor, NULL, NULL, NULL, 0);
836 	ASSERT(i_dls_link_cachep != NULL);
837 
838 	/*
839 	 * Create a global hash tables to be keyed by a name.
840 	 */
841 	err = ght_str_create("dls_link_hash", LINK_HASHSZ, &i_dls_link_hash);
842 	ASSERT(err == 0);
843 }
844 
845 int
846 dls_link_fini(void)
847 {
848 	int	err;
849 
850 	/*
851 	 * Destroy the hash table. This will return EBUSY if there are
852 	 * still entries present.
853 	 */
854 	if ((err = ght_destroy(i_dls_link_hash)) != 0)
855 		return (err);
856 
857 	/*
858 	 * Destroy the kmem_cache.
859 	 */
860 	kmem_cache_destroy(i_dls_link_cachep);
861 	return (0);
862 }
863 
864 /*
865  * Exported functions.
866  */
867 
868 int
869 dls_link_hold(const char *dev, uint_t port, dls_link_t **dlpp)
870 {
871 	char			name[MAXNAMELEN];
872 	dls_link_t		*dlp;
873 	int			err;
874 	ghte_t			hte;
875 	ghte_t			nhte;
876 
877 	/*
878 	 * Allocate a new hash table entry.
879 	 */
880 	nhte = ght_alloc(i_dls_link_hash, KM_SLEEP);
881 
882 	/*
883 	 * Construct a copy of the name used to identify any existing
884 	 * dls_link_t.
885 	 */
886 	MAC_NAME(name, dev, port);
887 
888 	/*
889 	 * Look up a dls_link_t corresponding to the given mac_handle_t
890 	 * in the global hash table.
891 	 */
892 	ght_lock(i_dls_link_hash, GHT_WRITE);
893 	if ((err = ght_find(i_dls_link_hash, GHT_PTR_TO_KEY(name),
894 	    &hte)) == 0) {
895 		dlp = (dls_link_t *)GHT_VAL(hte);
896 		ght_free(nhte);
897 		goto done;
898 	}
899 	ASSERT(err == ENOENT);
900 
901 	/*
902 	 * We didn't find anything so we need to create one.
903 	 */
904 	if ((err = i_dls_link_create(dev, port, &dlp)) != 0) {
905 		ght_free(nhte);
906 		ght_unlock(i_dls_link_hash);
907 		return (err);
908 	}
909 
910 	GHT_KEY(nhte) = GHT_PTR_TO_KEY(dlp->dl_name);
911 	GHT_VAL(nhte) = GHT_PTR_TO_VAL(dlp);
912 	dlp->dl_hte = nhte;
913 	/*
914 	 * Insert the entry.
915 	 */
916 	err = ght_insert(nhte);
917 	ASSERT(err == 0);
918 
919 done:
920 	/*
921 	 * Bump the reference count and hand back the reference.
922 	 */
923 	dlp->dl_ref++;
924 	*dlpp = dlp;
925 	ght_unlock(i_dls_link_hash);
926 	return (err);
927 }
928 
929 void
930 dls_link_rele(dls_link_t *dlp)
931 {
932 	ghte_t		hte;
933 
934 	ght_lock(i_dls_link_hash, GHT_WRITE);
935 
936 	/*
937 	 * Check if there are any more references.
938 	 */
939 	if (--dlp->dl_ref != 0) {
940 		/*
941 		 * There are more references so there's nothing more to do.
942 		 */
943 		goto done;
944 	}
945 
946 	hte = dlp->dl_hte;
947 	dlp->dl_hte = NULL;
948 
949 	/*
950 	 * Remove the hash table entry.
951 	 */
952 	ght_remove(hte);
953 	ght_free(hte);
954 
955 	/*
956 	 * Destroy the dls_link_t.
957 	 */
958 	i_dls_link_destroy(dlp);
959 
960 done:
961 	ght_unlock(i_dls_link_hash);
962 }
963 
964 int
965 dls_mac_hold(dls_link_t *dlp)
966 {
967 	int err = 0;
968 
969 	mutex_enter(&dlp->dl_lock);
970 
971 	ASSERT(IMPLY(dlp->dl_macref != 0, dlp->dl_mh != NULL));
972 	ASSERT(IMPLY(dlp->dl_macref == 0, dlp->dl_mh == NULL));
973 
974 	if (dlp->dl_macref == 0) {
975 		/*
976 		 * First reference; hold open the MAC interface.
977 		 */
978 		err = mac_open(dlp->dl_dev, dlp->dl_port, &dlp->dl_mh);
979 		if (err != 0)
980 			goto done;
981 
982 		dlp->dl_mip = mac_info(dlp->dl_mh);
983 	}
984 
985 	dlp->dl_macref++;
986 done:
987 	mutex_exit(&dlp->dl_lock);
988 	return (err);
989 }
990 
991 void
992 dls_mac_rele(dls_link_t *dlp)
993 {
994 	mutex_enter(&dlp->dl_lock);
995 	ASSERT(dlp->dl_mh != NULL);
996 
997 	if (--dlp->dl_macref == 0) {
998 		mac_close(dlp->dl_mh);
999 		dlp->dl_mh = NULL;
1000 		dlp->dl_mip = NULL;
1001 	}
1002 	mutex_exit(&dlp->dl_lock);
1003 }
1004 
1005 void
1006 dls_link_add(dls_link_t *dlp, uint32_t sap, dls_impl_t *dip)
1007 {
1008 	dls_vlan_t	*dvp = dip->di_dvp;
1009 	ght_t		hash = dlp->dl_impl_hash;
1010 	ghte_t		hte;
1011 	ghte_t		nhte;
1012 	ght_key_t	key;
1013 	dls_impl_t	**pp;
1014 	dls_impl_t	*p;
1015 	mac_rx_t	rx;
1016 	int		err;
1017 	uint_t		impl_count;
1018 
1019 	ASSERT(dip->di_nextp == NULL);
1020 
1021 	/*
1022 	 * For ethernet media, sap values less than or equal to
1023 	 * ETHERMTU (1500) represent LLC channels. (See PSARC 2003/150).
1024 	 * We strictly use 0 to represent LLC channels.
1025 	 */
1026 	sap = (sap <= ETHERMTU) ? 0 : sap;
1027 
1028 	/*
1029 	 * Make the appropriate key value depending on whether the
1030 	 * dls_impl_t is in promiscuous mode or not.
1031 	 */
1032 	key = MAKE_KEY(sap, dvp->dv_id);
1033 
1034 	/*
1035 	 * We need dl_lock here because we want to be able to walk
1036 	 * the hash table *and* set the mac rx func atomically. if
1037 	 * these two operations are separate, someone else could
1038 	 * insert/remove dls_impl_t from the ght after we drop the
1039 	 * ght lock and this could cause our chosen rx func to be
1040 	 * incorrect. note that we cannot call mac_rx_set when
1041 	 * holding the ght lock because this can cause deadlock.
1042 	 */
1043 	mutex_enter(&dlp->dl_lock);
1044 	/*
1045 	 * Allocate a new entry.
1046 	 */
1047 	nhte = ght_alloc(hash, KM_SLEEP);
1048 
1049 	/*
1050 	 * Search the table for any existing entry with this key.
1051 	 */
1052 	ght_lock(hash, GHT_WRITE);
1053 	if ((err = ght_find(hash, key, &hte)) != 0) {
1054 		ASSERT(err == ENOENT);
1055 
1056 		GHT_KEY(nhte) = key;
1057 		GHT_VAL(nhte) = GHT_PTR_TO_VAL(dip);
1058 
1059 		/*
1060 		 * Insert it in the table to be the head of a new list.
1061 		 */
1062 		err = ght_insert(nhte);
1063 		ASSERT(err == 0);
1064 
1065 		/*
1066 		 * Cache a reference to the hash table entry.
1067 		 */
1068 		ASSERT(dip->di_hte == NULL);
1069 		dip->di_hte = nhte;
1070 
1071 		goto done;
1072 	}
1073 
1074 	/*
1075 	 * Free the unused hash table entry.
1076 	 */
1077 	ght_free(nhte);
1078 
1079 	/*
1080 	 * Add the dls_impl_t to the end of the list. We can't add to the head
1081 	 * because the hash table internals already have a reference to the
1082 	 * head of the list.
1083 	 */
1084 	for (pp = (dls_impl_t **)&(GHT_VAL(hte)); (p = *pp) != NULL;
1085 	    pp = &(p->di_nextp))
1086 		ASSERT(p != dip);
1087 
1088 	*pp = dip;
1089 
1090 	/*
1091 	 * Cache a reference to the hash table entry.
1092 	 */
1093 	ASSERT(dip->di_hte == NULL);
1094 	dip->di_hte = hte;
1095 
1096 done:
1097 	/*
1098 	 * If there are no dls_impl_t then we can just drop all received
1099 	 * packets on the floor.
1100 	 */
1101 	impl_count = ght_count(hash);
1102 	if (impl_count == 0) {
1103 		ght_unlock(hash);
1104 	} else {
1105 		boolean_t promisc = B_FALSE;
1106 
1107 		/*
1108 		 * Walk the bound dls_impl_t to see if there are any
1109 		 * in promiscuous 'all sap' mode.
1110 		 */
1111 		ght_walk(hash, i_dls_link_walk, (void *)&promisc);
1112 
1113 		/*
1114 		 * If there are then we need to use a receive routine
1115 		 * which will route packets to those dls_impl_t as well
1116 		 * as ones bound to the  DLSAP of the packet.
1117 		 */
1118 		if (promisc)
1119 			rx = i_dls_link_ether_rx_promisc;
1120 		else
1121 			rx = i_dls_link_ether_rx;
1122 
1123 		ght_unlock(hash);
1124 
1125 		/* Replace the existing receive function if there is one. */
1126 		if (dlp->dl_mrh != NULL)
1127 			mac_rx_remove(dlp->dl_mh, dlp->dl_mrh);
1128 		dlp->dl_mrh = mac_rx_add(dlp->dl_mh, rx, (void *)dlp);
1129 	}
1130 	mutex_exit(&dlp->dl_lock);
1131 }
1132 
1133 void
1134 dls_link_remove(dls_link_t *dlp, dls_impl_t *dip)
1135 {
1136 	ght_t		hash = dlp->dl_impl_hash;
1137 	ghte_t		hte;
1138 	dls_impl_t	**pp;
1139 	dls_impl_t	*p;
1140 	mac_rx_t	rx;
1141 
1142 	/*
1143 	 * We need dl_lock here because we want to be able to walk
1144 	 * the hash table *and* set the mac rx func atomically. if
1145 	 * these two operations are separate, someone else could
1146 	 * insert/remove dls_impl_t from the ght after we drop the
1147 	 * ght lock and this could cause our chosen rx func to be
1148 	 * incorrect. note that we cannot call mac_rx_add when
1149 	 * holding the ght lock because this can cause deadlock.
1150 	 */
1151 	mutex_enter(&dlp->dl_lock);
1152 
1153 	ght_lock(hash, GHT_WRITE);
1154 
1155 	/*
1156 	 * Get the cached hash table entry reference.
1157 	 */
1158 	hte = dip->di_hte;
1159 	ASSERT(hte != NULL);
1160 
1161 	/*
1162 	 * Poll the hash table entry until all references have been dropped.
1163 	 * We need to drop all locks before sleeping because we don't want
1164 	 * the interrupt handler to block. We set di_removing here to
1165 	 * tell the receive callbacks not to pass up packets anymore.
1166 	 * This is only a hint to quicken the decrease of the refcnt so
1167 	 * the assignment need not be protected by any lock.
1168 	 */
1169 	dip->di_removing = B_TRUE;
1170 	while (ght_ref(hte) != 0) {
1171 		ght_unlock(hash);
1172 		mutex_exit(&dlp->dl_lock);
1173 		delay(drv_usectohz(1000));	/* 1ms delay */
1174 		mutex_enter(&dlp->dl_lock);
1175 		ght_lock(hash, GHT_WRITE);
1176 	}
1177 
1178 	/*
1179 	 * Walk the list and remove the dls_impl_t.
1180 	 */
1181 	for (pp = (dls_impl_t **)&(GHT_VAL(hte)); (p = *pp) != NULL;
1182 	    pp = &(p->di_nextp)) {
1183 		if (p == dip)
1184 			break;
1185 	}
1186 	ASSERT(p != NULL);
1187 
1188 	*pp = p->di_nextp;
1189 	p->di_nextp = NULL;
1190 	dip->di_hte = NULL;
1191 
1192 	if (GHT_VAL(hte) == NULL) {
1193 		/*
1194 		 * The list is empty so remove the hash table entry.
1195 		 */
1196 		ght_remove(hte);
1197 		ght_free(hte);
1198 	}
1199 	dip->di_removing = B_FALSE;
1200 
1201 	/*
1202 	 * If there are no dls_impl_t then there's no need to register a
1203 	 * receive function with the mac.
1204 	 */
1205 	if (ght_count(hash) == 0) {
1206 		ght_unlock(hash);
1207 		mac_rx_remove(dlp->dl_mh, dlp->dl_mrh);
1208 		dlp->dl_mrh = NULL;
1209 	} else {
1210 		boolean_t promisc = B_FALSE;
1211 
1212 		/*
1213 		 * Walk the bound dls_impl_t to see if there are any
1214 		 * in promiscuous 'all sap' mode.
1215 		 */
1216 		ght_walk(hash, i_dls_link_walk, (void *)&promisc);
1217 
1218 		/*
1219 		 * If there are then we need to use a receive routine
1220 		 * which will route packets to those dls_impl_t as well
1221 		 * as ones bound to the  DLSAP of the packet.
1222 		 */
1223 		if (promisc)
1224 			rx = i_dls_link_ether_rx_promisc;
1225 		else
1226 			rx = i_dls_link_ether_rx;
1227 
1228 		ght_unlock(hash);
1229 
1230 		mac_rx_remove(dlp->dl_mh, dlp->dl_mrh);
1231 		dlp->dl_mrh = mac_rx_add(dlp->dl_mh, rx, (void *)dlp);
1232 	}
1233 	mutex_exit(&dlp->dl_lock);
1234 }
1235