xref: /titanic_44/usr/src/uts/common/io/dls/dls_link.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * Data-Link Services Module
31  */
32 
33 #include	<sys/types.h>
34 #include	<sys/stream.h>
35 #include	<sys/strsun.h>
36 #include	<sys/strsubr.h>
37 #include	<sys/sysmacros.h>
38 #include	<sys/atomic.h>
39 #include	<sys/ght.h>
40 #include	<sys/dlpi.h>
41 #include	<sys/ethernet.h>
42 #include	<sys/byteorder.h>
43 #include	<sys/vlan.h>
44 #include	<sys/mac.h>
45 #include	<sys/sdt.h>
46 
47 #include	<sys/dls.h>
48 #include	<sys/dld_impl.h>
49 #include	<sys/dls_impl.h>
50 
51 static kmem_cache_t	*i_dls_link_cachep;
52 static ght_t		i_dls_link_hash;
53 
54 #define		LINK_HASHSZ	67	/* prime */
55 #define		IMPL_HASHSZ	67	/* prime */
56 
57 /*
58  * Construct a hash key encompassing both DLSAP value and VLAN idenitifier.
59  */
60 #define	MAKE_KEY(_sap, _vid)						\
61 	GHT_SCALAR_TO_KEY(((_sap) << VLAN_ID_SIZE) | (_vid) & VLAN_ID_MASK)
62 
63 /*
64  * Extract the DLSAP value from the hash key.
65  */
66 #define	KEY_SAP(_key)							\
67 	(((uint32_t)(uintptr_t)(_key)) >> VLAN_ID_SIZE)
68 
69 /*
70  * Private functions.
71  */
72 
73 /*ARGSUSED*/
74 static int
75 i_dls_link_constructor(void *buf, void *arg, int kmflag)
76 {
77 	dls_link_t	*dlp = buf;
78 	char		name[MAXNAMELEN];
79 	int		err;
80 
81 	bzero(buf, sizeof (dls_link_t));
82 
83 	(void) sprintf(name, "dls_link_t_%p_impl_hash", buf);
84 	err = ght_scalar_create(name, IMPL_HASHSZ, &(dlp->dl_impl_hash));
85 	ASSERT(err == 0);
86 
87 	mutex_init(&dlp->dl_lock, NULL, MUTEX_DEFAULT, NULL);
88 	return (0);
89 }
90 
91 /*ARGSUSED*/
92 static void
93 i_dls_link_destructor(void *buf, void *arg)
94 {
95 	dls_link_t	*dlp = buf;
96 	int		err;
97 
98 	ASSERT(dlp->dl_ref == 0);
99 	ASSERT(dlp->dl_hte == NULL);
100 	ASSERT(dlp->dl_mh == NULL);
101 	ASSERT(dlp->dl_unknowns == 0);
102 
103 	err = ght_destroy(dlp->dl_impl_hash);
104 	ASSERT(err == 0);
105 
106 	mutex_destroy(&dlp->dl_lock);
107 }
108 
109 #define	ETHER_MATCH(_pkt_a, _pkt_b)					\
110 	((((uint16_t *)(_pkt_a))[0] == ((uint16_t *)(_pkt_b))[0]) &&	\
111 	(((uint16_t *)(_pkt_a))[1] == ((uint16_t *)(_pkt_b))[1]) &&	\
112 	(((uint16_t *)(_pkt_a))[2] == ((uint16_t *)(_pkt_b))[2]) &&	\
113 	(((uint16_t *)(_pkt_a))[6] == ((uint16_t *)(_pkt_b))[6]))
114 
115 #define	ETHER_VLAN_MATCH(_pkt_a, _pkt_b)				\
116 	((((uint16_t *)(_pkt_a))[0] == ((uint16_t *)(_pkt_b))[0]) &&	\
117 	(((uint16_t *)(_pkt_a))[1] == ((uint16_t *)(_pkt_b))[1]) &&	\
118 	(((uint16_t *)(_pkt_a))[2] == ((uint16_t *)(_pkt_b))[2]) &&	\
119 	(((uint16_t *)(_pkt_a))[6] == ((uint16_t *)(_pkt_b))[6]) &&	\
120 	(((uint16_t *)(_pkt_a))[7] == ((uint16_t *)(_pkt_b))[7]) &&	\
121 	(((uint16_t *)(_pkt_a))[8] == ((uint16_t *)(_pkt_b))[8]))
122 
123 static mblk_t *
124 i_dls_link_ether_subchain(mblk_t *mp, uint_t *header_lengthp,
125     uint8_t **daddrp, uint16_t *type_lengthp, uint16_t *vidp,
126     uint_t *countp)
127 {
128 	struct ether_header		*ehp;
129 	struct ether_vlan_header	*evhp;
130 	mblk_t				**pp;
131 	mblk_t				*p;
132 	uint_t				npacket;
133 
134 	/*
135 	 * Packets should always be at least 16 bit aligned.
136 	 */
137 	ASSERT(IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)));
138 
139 	/*
140 	 * Determine whether this is a VLAN or non-VLAN packet.
141 	 */
142 	ASSERT(MBLKL(mp) >= sizeof (struct ether_header));
143 	ehp = (struct ether_header *)mp->b_rptr;
144 	if ((*type_lengthp = ntohs(ehp->ether_type)) == VLAN_TPID)
145 		goto vlan;
146 
147 	/*
148 	 * It is a non-VLAN header.
149 	 */
150 	*header_lengthp = sizeof (struct ether_header);
151 
152 	/*
153 	 * Parse the rest of the header information that we need.
154 	 */
155 	*daddrp = (uint8_t *)&(ehp->ether_dhost);
156 	*vidp = VLAN_ID_NONE;
157 
158 	/*
159 	 * Compare with subsequent headers until we find one that has
160 	 * differing header information. After checking each packet skip over
161 	 * the header.
162 	 */
163 	npacket = 1;
164 	for (pp = &(mp->b_next); (p = *pp) != NULL; pp = &(p->b_next)) {
165 		if (!ETHER_MATCH(p->b_rptr, mp->b_rptr) != 0)
166 			break;
167 		p->b_rptr += sizeof (struct ether_header);
168 		npacket++;
169 	}
170 
171 	/*
172 	 * Skip over the initial packet's header.
173 	 */
174 	mp->b_rptr += sizeof (struct ether_header);
175 	goto done;
176 
177 vlan:
178 	/*
179 	 * It is a VLAN header.
180 	 */
181 	evhp = (struct ether_vlan_header *)mp->b_rptr;
182 	*header_lengthp = sizeof (struct ether_vlan_header);
183 
184 	/*
185 	 * Parse the header information.
186 	 */
187 	*daddrp = (uint8_t *)&(evhp->ether_dhost);
188 	*vidp = VLAN_ID(ntohs(evhp->ether_tci));
189 	*type_lengthp = ntohs(evhp->ether_type);
190 
191 	/*
192 	 * Compare with subsequent headers until we find one that has
193 	 * differing header information. After checking each packet skip over
194 	 * the header.
195 	 */
196 	npacket = 1;
197 	for (pp = &(mp->b_next); (p = *pp) != NULL; pp = &(p->b_next)) {
198 		if (!ETHER_VLAN_MATCH(p->b_rptr, mp->b_rptr) != 0)
199 			break;
200 		p->b_rptr += sizeof (struct ether_vlan_header);
201 		npacket++;
202 	}
203 
204 	/*
205 	 * Skip over the initial packet's header.
206 	 */
207 	mp->b_rptr += sizeof (struct ether_vlan_header);
208 
209 done:
210 	/*
211 	 * Break the chain at this point and return a pointer to the next
212 	 * sub-chain.
213 	 */
214 	*pp = NULL;
215 	*countp = npacket;
216 	return (p);
217 }
218 
219 static void
220 i_dls_link_ether_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp)
221 {
222 	dls_link_t			*dlp = arg;
223 	ght_t				hash = dlp->dl_impl_hash;
224 	mblk_t				*nextp;
225 	uint_t				header_length;
226 	uint8_t				*daddr;
227 	uint16_t			type_length;
228 	uint16_t			vid;
229 	uint16_t			sap;
230 	ghte_t				hte;
231 	dls_impl_t			*dip;
232 	dls_impl_t			*ndip;
233 	mblk_t				*nmp;
234 	ght_key_t			key;
235 	uint_t				npacket;
236 	boolean_t			accepted;
237 
238 	/*
239 	 * Walk the packet chain.
240 	 */
241 	while (mp != NULL) {
242 		/*
243 		 * Wipe the accepted state.
244 		 */
245 		accepted = B_FALSE;
246 
247 		/*
248 		 * Grab the longest sub-chain we can process as a single
249 		 * unit.
250 		 */
251 		nextp = i_dls_link_ether_subchain(mp, &header_length, &daddr,
252 		    &type_length, &vid, &npacket);
253 
254 		/*
255 		 * Calculate the DLSAP: LLC (0) if the type/length field is
256 		 * interpreted as a length, otherwise it is the value of the
257 		 * type/length field.
258 		 */
259 		sap = (type_length <= ETHERMTU) ? DLS_SAP_LLC : type_length;
260 
261 		/*
262 		 * Construct a hash key from the VLAN identifier and the
263 		 * DLSAP.
264 		 */
265 		key = MAKE_KEY(sap, vid);
266 
267 		/*
268 		 * Search the has table for dls_impl_t eligible to receive
269 		 * a packet chain for this DLSAP/VLAN combination.
270 		 */
271 		ght_lock(hash, GHT_READ);
272 		if (ght_find(hash, key, &hte) != 0) {
273 			ght_unlock(hash);
274 			freemsgchain(mp);
275 			goto loop;
276 		}
277 
278 		/*
279 		 * Place a hold the chain of dls_impl_t to make sure none are
280 		 * removed from under our feet.
281 		 */
282 		ght_hold(hte);
283 		ght_unlock(hash);
284 
285 		/*
286 		 * Find the first dls_impl_t that will accept the sub-chain.
287 		 */
288 		for (dip = (dls_impl_t *)GHT_VAL(hte); dip != NULL;
289 		    dip = dip->di_nextp)
290 			if (dls_accept(dip, daddr))
291 				break;
292 
293 		/*
294 		 * If we did not find any dls_impl_t willing to accept the
295 		 * sub-chain then throw it away.
296 		 */
297 		if (dip == NULL) {
298 			ght_rele(hte);
299 			freemsgchain(mp);
300 			goto loop;
301 		}
302 
303 		/*
304 		 * We have at least one acceptor.
305 		 */
306 		accepted = B_TRUE;
307 		for (;;) {
308 			/*
309 			 * Find the next dls_impl_t that will accept the
310 			 * sub-chain.
311 			 */
312 			for (ndip = dip->di_nextp; ndip != NULL;
313 			    ndip = ndip->di_nextp)
314 				if (dls_accept(ndip, daddr))
315 					break;
316 
317 			/*
318 			 * If there are no more dls_impl_t that are willing
319 			 * to accept the sub-chain then we don't need to dup
320 			 * it before handing it to the current one.
321 			 */
322 			if (ndip == NULL) {
323 				dip->di_rx(dip->di_rx_arg, mrh, mp,
324 				    header_length);
325 
326 				/*
327 				 * Since there are no more dls_impl_t, we're
328 				 * done.
329 				 */
330 				break;
331 			}
332 
333 			/*
334 			 * There are more dls_impl_t so dup the sub-chain.
335 			 */
336 			if ((nmp = copymsgchain(mp)) != NULL)
337 				dip->di_rx(dip->di_rx_arg, mrh, nmp,
338 				    header_length);
339 
340 			dip = ndip;
341 		}
342 
343 		/*
344 		 * Release the hold on the dls_impl_t chain now that we have
345 		 * finished walking it.
346 		 */
347 		ght_rele(hte);
348 
349 loop:
350 		/*
351 		 * If there were no acceptors then add the packet count to the
352 		 * 'unknown' count.
353 		 */
354 		if (!accepted)
355 			atomic_add_32(&(dlp->dl_unknowns), npacket);
356 
357 		/*
358 		 * Move onto the next sub-chain.
359 		 */
360 		mp = nextp;
361 	}
362 }
363 
364 static void
365 i_dls_link_ether_rx_promisc(void *arg, mac_resource_handle_t mrh,
366     mblk_t *mp)
367 {
368 	dls_link_t			*dlp = arg;
369 	ght_t				hash = dlp->dl_impl_hash;
370 	mblk_t				*nextp;
371 	uint_t				header_length;
372 	uint8_t				*daddr;
373 	uint16_t			type_length;
374 	uint16_t			vid;
375 	uint16_t			sap;
376 	ghte_t				hte;
377 	dls_impl_t			*dip;
378 	dls_impl_t			*ndip;
379 	mblk_t				*nmp;
380 	ght_key_t			key;
381 	uint_t				npacket;
382 	boolean_t			accepted;
383 
384 	/*
385 	 * Walk the packet chain.
386 	 */
387 	while (mp != NULL) {
388 		/*
389 		 * Wipe the accepted state.
390 		 */
391 		accepted = B_FALSE;
392 
393 		/*
394 		 * Grab the longest sub-chain we can process as a single
395 		 * unit.
396 		 */
397 		nextp = i_dls_link_ether_subchain(mp, &header_length, &daddr,
398 		    &type_length, &vid, &npacket);
399 
400 		/*
401 		 * Construct a hash key from the VLAN identifier and the
402 		 * DLSAP that represents dls_impl_t in promiscuous mode.
403 		 */
404 		key = MAKE_KEY(DLS_SAP_PROMISC, vid);
405 
406 		/*
407 		 * Search the has table for dls_impl_t eligible to receive
408 		 * a packet chain for this DLSAP/VLAN combination.
409 		 */
410 		ght_lock(hash, GHT_READ);
411 		if (ght_find(hash, key, &hte) != 0) {
412 			ght_unlock(hash);
413 			goto non_promisc;
414 		}
415 
416 		/*
417 		 * Place a hold the chain of dls_impl_t to make sure none are
418 		 * removed from under our feet.
419 		 */
420 		ght_hold(hte);
421 		ght_unlock(hash);
422 
423 		/*
424 		 * Find dls_impl_t that will accept the sub-chain.
425 		 */
426 		for (dip = (dls_impl_t *)GHT_VAL(hte); dip != NULL;
427 		    dip = dip->di_nextp) {
428 			if (!dls_accept(dip, daddr))
429 				continue;
430 
431 			/*
432 			 * We have at least one acceptor.
433 			 */
434 			accepted = B_TRUE;
435 
436 			/*
437 			 * There will normally be at least more dls_impl_t
438 			 * (since we've yet to check for non-promiscuous
439 			 * dls_impl_t) so dup the sub-chain.
440 			 */
441 			if ((nmp = copymsgchain(mp)) != NULL)
442 				dip->di_rx(dip->di_rx_arg, mrh, nmp,
443 				    header_length);
444 		}
445 
446 		/*
447 		 * Release the hold on the dls_impl_t chain now that we have
448 		 * finished walking it.
449 		 */
450 		ght_rele(hte);
451 
452 non_promisc:
453 		/*
454 		 * Calculate the DLSAP: LLC (0) if the type/length field is
455 		 * interpreted as a length, otherwise it is the value of the
456 		 * type/length field.
457 		 */
458 		sap = (type_length <= ETHERMTU) ? DLS_SAP_LLC : type_length;
459 
460 		/*
461 		 * Construct a hash key from the VLAN identifier and the
462 		 * DLSAP.
463 		 */
464 		key = MAKE_KEY(sap, vid);
465 
466 		/*
467 		 * Search the has table for dls_impl_t eligible to receive
468 		 * a packet chain for this DLSAP/VLAN combination.
469 		 */
470 		ght_lock(hash, GHT_READ);
471 		if (ght_find(hash, key, &hte) != 0) {
472 			ght_unlock(hash);
473 			freemsgchain(mp);
474 			goto loop;
475 		}
476 
477 		/*
478 		 * Place a hold the chain of dls_impl_t to make sure none are
479 		 * removed from under our feet.
480 		 */
481 		ght_hold(hte);
482 		ght_unlock(hash);
483 
484 		/*
485 		 * Find the first dls_impl_t that will accept the sub-chain.
486 		 */
487 		for (dip = (dls_impl_t *)GHT_VAL(hte); dip != NULL;
488 		    dip = dip->di_nextp)
489 			if (dls_accept(dip, daddr))
490 				break;
491 
492 		/*
493 		 * If we did not find any dls_impl_t willing to accept the
494 		 * sub-chain then throw it away.
495 		 */
496 		if (dip == NULL) {
497 			ght_rele(hte);
498 			freemsgchain(mp);
499 			goto loop;
500 		}
501 
502 		/*
503 		 * We have at least one acceptor.
504 		 */
505 		accepted = B_TRUE;
506 		for (;;) {
507 			/*
508 			 * Find the next dls_impl_t that will accept the
509 			 * sub-chain.
510 			 */
511 			for (ndip = dip->di_nextp; ndip != NULL;
512 			    ndip = ndip->di_nextp)
513 				if (dls_accept(ndip, daddr))
514 					break;
515 
516 			/*
517 			 * If there are no more dls_impl_t that are willing
518 			 * to accept the sub-chain then we don't need to dup
519 			 * it before handing it to the current one.
520 			 */
521 			if (ndip == NULL) {
522 				dip->di_rx(dip->di_rx_arg, mrh, mp,
523 				    header_length);
524 
525 				/*
526 				 * Since there are no more dls_impl_t, we're
527 				 * done.
528 				 */
529 				break;
530 			}
531 
532 			/*
533 			 * There are more dls_impl_t so dup the sub-chain.
534 			 */
535 			if ((nmp = copymsgchain(mp)) != NULL)
536 				dip->di_rx(dip->di_rx_arg, mrh, nmp,
537 				    header_length);
538 
539 			dip = ndip;
540 		}
541 
542 		/*
543 		 * Release the hold on the dls_impl_t chain now that we have
544 		 * finished walking it.
545 		 */
546 		ght_rele(hte);
547 
548 loop:
549 		/*
550 		 * If there were no acceptors then add the packet count to the
551 		 * 'unknown' count.
552 		 */
553 		if (!accepted)
554 			atomic_add_32(&(dlp->dl_unknowns), npacket);
555 
556 		/*
557 		 * Move onto the next sub-chain.
558 		 */
559 		mp = nextp;
560 	}
561 }
562 
563 static void
564 i_dls_link_ether_loopback(void *arg, mblk_t *mp)
565 {
566 	dls_link_t			*dlp = arg;
567 	ght_t				hash = dlp->dl_impl_hash;
568 	mblk_t				*nextp;
569 	uint_t				header_length;
570 	uint8_t				*daddr;
571 	uint16_t			type_length;
572 	uint16_t			vid;
573 	uint16_t			sap;
574 	ghte_t				hte;
575 	dls_impl_t			*dip;
576 	dls_impl_t			*ndip;
577 	mblk_t				*nmp;
578 	ght_key_t			key;
579 	uint_t				npacket;
580 
581 	/*
582 	 * Walk the packet chain.
583 	 */
584 	while (mp != NULL) {
585 		/*
586 		 * Grab the longest sub-chain we can process as a single
587 		 * unit.
588 		 */
589 		nextp = i_dls_link_ether_subchain(mp, &header_length, &daddr,
590 		    &type_length, &vid, &npacket);
591 
592 		/*
593 		 * Calculate the DLSAP: LLC (0) if the type/length field is
594 		 * interpreted as a length, otherwise it is the value of the
595 		 * type/length field.
596 		 */
597 		sap = (type_length <= ETHERMTU) ? DLS_SAP_LLC : type_length;
598 
599 		/*
600 		 * Construct a hash key from the VLAN identifier and the
601 		 * DLSAP.
602 		 */
603 		key = MAKE_KEY(sap, vid);
604 
605 		/*
606 		 * Search the has table for dls_impl_t eligible to receive
607 		 * a packet chain for this DLSAP/VLAN combination.
608 		 */
609 		ght_lock(hash, GHT_READ);
610 		if (ght_find(hash, key, &hte) != 0) {
611 			ght_unlock(hash);
612 			goto promisc;
613 		}
614 
615 		/*
616 		 * Place a hold the chain of dls_impl_t to make sure none are
617 		 * removed from under our feet.
618 		 */
619 		ght_hold(hte);
620 		ght_unlock(hash);
621 
622 		/*
623 		 * Find dls_impl_t that will accept the sub-chain.
624 		 */
625 		for (dip = (dls_impl_t *)GHT_VAL(hte); dip != NULL;
626 		    dip = dip->di_nextp) {
627 			if (!dls_accept_loopback(dip, daddr))
628 				continue;
629 
630 			/*
631 			 * There should be at least more dls_impl_t (since
632 			 * we've yet to check for dls_impl_t in promiscuous
633 			 * mode) so dup the sub-chain.
634 			 */
635 			if ((nmp = copymsgchain(mp)) != NULL)
636 				dip->di_rx(dip->di_rx_arg, NULL, nmp,
637 				    header_length);
638 		}
639 
640 		/*
641 		 * Release the hold on the dls_impl_t chain now that we have
642 		 * finished walking it.
643 		 */
644 		ght_rele(hte);
645 
646 promisc:
647 		/*
648 		 * Construct a hash key from the VLAN identifier and the
649 		 * DLSAP that represents dls_impl_t in promiscuous mode.
650 		 */
651 		key = MAKE_KEY(DLS_SAP_PROMISC, vid);
652 
653 		/*
654 		 * Search the has table for dls_impl_t eligible to receive
655 		 * a packet chain for this DLSAP/VLAN combination.
656 		 */
657 		ght_lock(hash, GHT_READ);
658 		if (ght_find(hash, key, &hte) != 0) {
659 			ght_unlock(hash);
660 			freemsgchain(mp);
661 			goto loop;
662 		}
663 
664 		/*
665 		 * Place a hold the chain of dls_impl_t to make sure none are
666 		 * removed from under our feet.
667 		 */
668 		ght_hold(hte);
669 		ght_unlock(hash);
670 
671 		/*
672 		 * Find the first dls_impl_t that will accept the sub-chain.
673 		 */
674 		for (dip = (dls_impl_t *)GHT_VAL(hte); dip != NULL;
675 		    dip = dip->di_nextp)
676 			if (dls_accept_loopback(dip, daddr))
677 				break;
678 
679 		/*
680 		 * If we did not find any dls_impl_t willing to accept the
681 		 * sub-chain then throw it away.
682 		 */
683 		if (dip == NULL) {
684 			ght_rele(hte);
685 			freemsgchain(mp);
686 			goto loop;
687 		}
688 
689 		for (;;) {
690 			/*
691 			 * Find the next dls_impl_t that will accept the
692 			 * sub-chain.
693 			 */
694 			for (ndip = dip->di_nextp; ndip != NULL;
695 			    ndip = ndip->di_nextp)
696 				if (dls_accept_loopback(ndip, daddr))
697 					break;
698 
699 			/*
700 			 * If there are no more dls_impl_t that are willing
701 			 * to accept the sub-chain then we don't need to dup
702 			 * it before handing it to the current one.
703 			 */
704 			if (ndip == NULL) {
705 				dip->di_rx(dip->di_rx_arg, NULL, mp,
706 				    header_length);
707 
708 				/*
709 				 * Since there are no more dls_impl_t, we're
710 				 * done.
711 				 */
712 				break;
713 			}
714 
715 			/*
716 			 * There are more dls_impl_t so dup the sub-chain.
717 			 */
718 			if ((nmp = copymsgchain(mp)) != NULL)
719 				dip->di_rx(dip->di_rx_arg, NULL, nmp,
720 				    header_length);
721 
722 			dip = ndip;
723 		}
724 
725 		/*
726 		 * Release the hold on the dls_impl_t chain now that we have
727 		 * finished walking it.
728 		 */
729 		ght_rele(hte);
730 
731 loop:
732 		/*
733 		 * Move onto the next sub-chain.
734 		 */
735 		mp = nextp;
736 	}
737 }
738 
739 static boolean_t
740 i_dls_link_walk(void *arg, ghte_t hte)
741 {
742 	boolean_t	*promiscp = arg;
743 	ght_key_t	key = GHT_KEY(hte);
744 	uint32_t	sap = KEY_SAP(key);
745 
746 	if (sap == DLS_SAP_PROMISC) {
747 		*promiscp = B_TRUE;
748 		return (B_FALSE);	/* terminate walk */
749 	}
750 
751 	return (B_TRUE);
752 }
753 
754 static int
755 i_dls_link_create(const char *dev, uint_t port, dls_link_t **dlpp)
756 {
757 	dls_link_t		*dlp;
758 	int			err;
759 	mac_handle_t		mh;
760 
761 	/*
762 	 * Check that the MAC exists, and (for now) that it's
763 	 * of type DL_ETHER.
764 	 */
765 	if ((err = mac_open(dev, port, &mh)) != 0)
766 		return (err);
767 
768 	ASSERT(mac_info(mh)->mi_media == DL_ETHER);
769 	mac_close(mh);
770 
771 	/*
772 	 * Allocate a new dls_link_t structure.
773 	 */
774 	dlp = kmem_cache_alloc(i_dls_link_cachep, KM_SLEEP);
775 
776 	/*
777 	 * Name the dls_link_t after the MAC interface it represents.
778 	 */
779 	MAC_NAME(dlp->dl_name, dev, port);
780 	(void) strlcpy(dlp->dl_dev, dev, MAXNAMELEN);
781 	dlp->dl_port = port;
782 
783 	/*
784 	 * Set the initial packet receive function.
785 	 */
786 	ASSERT(ght_count(dlp->dl_impl_hash) == 0);
787 
788 	/*
789 	 * Set the packet loopback function for use when the MAC is in
790 	 * promiscuous mode, and initialize promiscuous bookeeping fields.
791 	 */
792 	dlp->dl_loopback = i_dls_link_ether_loopback;
793 	dlp->dl_npromisc = 0;
794 	dlp->dl_mth = NULL;
795 
796 	*dlpp = dlp;
797 	return (0);
798 }
799 
800 static void
801 i_dls_link_destroy(dls_link_t *dlp)
802 {
803 	ASSERT(dlp->dl_npromisc == 0);
804 	ASSERT(dlp->dl_nactive == 0);
805 	ASSERT(dlp->dl_mth == NULL);
806 	ASSERT(dlp->dl_macref == 0);
807 	ASSERT(dlp->dl_mh == NULL);
808 	ASSERT(dlp->dl_mip == NULL);
809 
810 	/*
811 	 * Free the structure back to the cache.
812 	 */
813 	dlp->dl_mrh = NULL;
814 	dlp->dl_unknowns = 0;
815 	kmem_cache_free(i_dls_link_cachep, dlp);
816 
817 }
818 
819 /*
820  * Module initialization functions.
821  */
822 
823 void
824 dls_link_init(void)
825 {
826 	int	err;
827 
828 	/*
829 	 * Create a kmem_cache of dls_link_t structures.
830 	 */
831 	i_dls_link_cachep = kmem_cache_create("dls_link_cache",
832 	    sizeof (dls_link_t), 0, i_dls_link_constructor,
833 	    i_dls_link_destructor, NULL, NULL, NULL, 0);
834 	ASSERT(i_dls_link_cachep != NULL);
835 
836 	/*
837 	 * Create a global hash tables to be keyed by a name.
838 	 */
839 	err = ght_str_create("dls_link_hash", LINK_HASHSZ, &i_dls_link_hash);
840 	ASSERT(err == 0);
841 }
842 
843 int
844 dls_link_fini(void)
845 {
846 	int	err;
847 
848 	/*
849 	 * Destroy the hash table. This will return EBUSY if there are
850 	 * still entries present.
851 	 */
852 	if ((err = ght_destroy(i_dls_link_hash)) != 0)
853 		return (err);
854 
855 	/*
856 	 * Destroy the kmem_cache.
857 	 */
858 	kmem_cache_destroy(i_dls_link_cachep);
859 	return (0);
860 }
861 
862 /*
863  * Exported functions.
864  */
865 
866 int
867 dls_link_hold(const char *dev, uint_t port, dls_link_t **dlpp)
868 {
869 	char			name[MAXNAMELEN];
870 	dls_link_t		*dlp;
871 	int			err;
872 	ghte_t			hte;
873 	ghte_t			nhte;
874 
875 	/*
876 	 * Allocate a new hash table entry.
877 	 */
878 	nhte = ght_alloc(i_dls_link_hash, KM_SLEEP);
879 
880 	/*
881 	 * Construct a copy of the name used to identify any existing
882 	 * dls_link_t.
883 	 */
884 	MAC_NAME(name, dev, port);
885 
886 	/*
887 	 * Look up a dls_link_t corresponding to the given mac_handle_t
888 	 * in the global hash table.
889 	 */
890 	ght_lock(i_dls_link_hash, GHT_WRITE);
891 	if ((err = ght_find(i_dls_link_hash, GHT_PTR_TO_KEY(name),
892 	    &hte)) == 0) {
893 		dlp = (dls_link_t *)GHT_VAL(hte);
894 		ght_free(nhte);
895 		goto done;
896 	}
897 	ASSERT(err == ENOENT);
898 
899 	/*
900 	 * We didn't find anything so we need to create one.
901 	 */
902 	if ((err = i_dls_link_create(dev, port, &dlp)) != 0) {
903 		ght_free(nhte);
904 		ght_unlock(i_dls_link_hash);
905 		return (err);
906 	}
907 
908 	GHT_KEY(nhte) = GHT_PTR_TO_KEY(dlp->dl_name);
909 	GHT_VAL(nhte) = GHT_PTR_TO_VAL(dlp);
910 	dlp->dl_hte = nhte;
911 	/*
912 	 * Insert the entry.
913 	 */
914 	err = ght_insert(nhte);
915 	ASSERT(err == 0);
916 
917 done:
918 	/*
919 	 * Bump the reference count and hand back the reference.
920 	 */
921 	dlp->dl_ref++;
922 	*dlpp = dlp;
923 	ght_unlock(i_dls_link_hash);
924 	return (err);
925 }
926 
927 void
928 dls_link_rele(dls_link_t *dlp)
929 {
930 	ghte_t		hte;
931 
932 	ght_lock(i_dls_link_hash, GHT_WRITE);
933 
934 	/*
935 	 * Check if there are any more references.
936 	 */
937 	if (--dlp->dl_ref != 0) {
938 		/*
939 		 * There are more references so there's nothing more to do.
940 		 */
941 		goto done;
942 	}
943 
944 	hte = dlp->dl_hte;
945 	dlp->dl_hte = NULL;
946 
947 	/*
948 	 * Remove the hash table entry.
949 	 */
950 	ght_remove(hte);
951 	ght_free(hte);
952 
953 	/*
954 	 * Destroy the dls_link_t.
955 	 */
956 	i_dls_link_destroy(dlp);
957 
958 done:
959 	ght_unlock(i_dls_link_hash);
960 }
961 
962 int
963 dls_mac_hold(dls_link_t *dlp)
964 {
965 	int err = 0;
966 
967 	mutex_enter(&dlp->dl_lock);
968 
969 	ASSERT(IMPLY(dlp->dl_macref != 0, dlp->dl_mh != NULL));
970 	ASSERT(IMPLY(dlp->dl_macref == 0, dlp->dl_mh == NULL));
971 
972 	if (dlp->dl_macref == 0) {
973 		/*
974 		 * First reference; hold open the MAC interface.
975 		 */
976 		err = mac_open(dlp->dl_dev, dlp->dl_port, &dlp->dl_mh);
977 		if (err != 0)
978 			goto done;
979 
980 		dlp->dl_mip = mac_info(dlp->dl_mh);
981 	}
982 
983 	dlp->dl_macref++;
984 done:
985 	mutex_exit(&dlp->dl_lock);
986 	return (err);
987 }
988 
989 void
990 dls_mac_rele(dls_link_t *dlp)
991 {
992 	mutex_enter(&dlp->dl_lock);
993 	ASSERT(dlp->dl_mh != NULL);
994 
995 	if (--dlp->dl_macref == 0) {
996 		mac_close(dlp->dl_mh);
997 		dlp->dl_mh = NULL;
998 		dlp->dl_mip = NULL;
999 	}
1000 	mutex_exit(&dlp->dl_lock);
1001 }
1002 
1003 void
1004 dls_link_add(dls_link_t *dlp, uint32_t sap, dls_impl_t *dip)
1005 {
1006 	dls_vlan_t	*dvp = dip->di_dvp;
1007 	ght_t		hash = dlp->dl_impl_hash;
1008 	ghte_t		hte;
1009 	ghte_t		nhte;
1010 	ght_key_t	key;
1011 	dls_impl_t	**pp;
1012 	dls_impl_t	*p;
1013 	mac_rx_t	rx;
1014 	int		err;
1015 	uint_t		impl_count;
1016 
1017 	ASSERT(dip->di_nextp == NULL);
1018 
1019 	/*
1020 	 * For ethernet media, sap values less than or equal to
1021 	 * ETHERMTU (1500) represent LLC channels. (See PSARC 2003/150).
1022 	 * We strictly use 0 to represent LLC channels.
1023 	 */
1024 	sap = (sap <= ETHERMTU) ? 0 : sap;
1025 
1026 	/*
1027 	 * Make the appropriate key value depending on whether the
1028 	 * dls_impl_t is in promiscuous mode or not.
1029 	 */
1030 	key = MAKE_KEY(sap, dvp->dv_id);
1031 
1032 	/*
1033 	 * We need dl_lock here because we want to be able to walk
1034 	 * the hash table *and* set the mac rx func atomically. if
1035 	 * these two operations are separate, someone else could
1036 	 * insert/remove dls_impl_t from the ght after we drop the
1037 	 * ght lock and this could cause our chosen rx func to be
1038 	 * incorrect. note that we cannot call mac_rx_set when
1039 	 * holding the ght lock because this can cause deadlock.
1040 	 */
1041 	mutex_enter(&dlp->dl_lock);
1042 	/*
1043 	 * Allocate a new entry.
1044 	 */
1045 	nhte = ght_alloc(hash, KM_SLEEP);
1046 
1047 	/*
1048 	 * Search the table for any existing entry with this key.
1049 	 */
1050 	ght_lock(hash, GHT_WRITE);
1051 	if ((err = ght_find(hash, key, &hte)) != 0) {
1052 		ASSERT(err == ENOENT);
1053 
1054 		GHT_KEY(nhte) = key;
1055 		GHT_VAL(nhte) = GHT_PTR_TO_VAL(dip);
1056 
1057 		/*
1058 		 * Insert it in the table to be the head of a new list.
1059 		 */
1060 		err = ght_insert(nhte);
1061 		ASSERT(err == 0);
1062 
1063 		/*
1064 		 * Cache a reference to the hash table entry.
1065 		 */
1066 		ASSERT(dip->di_hte == NULL);
1067 		dip->di_hte = nhte;
1068 
1069 		goto done;
1070 	}
1071 
1072 	/*
1073 	 * Free the unused hash table entry.
1074 	 */
1075 	ght_free(nhte);
1076 
1077 	/*
1078 	 * Add the dls_impl_t to the end of the list. We can't add to the head
1079 	 * because the hash table internals already have a reference to the
1080 	 * head of the list.
1081 	 */
1082 	for (pp = (dls_impl_t **)&(GHT_VAL(hte)); (p = *pp) != NULL;
1083 	    pp = &(p->di_nextp))
1084 		ASSERT(p != dip);
1085 
1086 	*pp = dip;
1087 
1088 	/*
1089 	 * Cache a reference to the hash table entry.
1090 	 */
1091 	ASSERT(dip->di_hte == NULL);
1092 	dip->di_hte = hte;
1093 
1094 done:
1095 	/*
1096 	 * If there are no dls_impl_t then we can just drop all received
1097 	 * packets on the floor.
1098 	 */
1099 	impl_count = ght_count(hash);
1100 	if (impl_count == 0) {
1101 		ght_unlock(hash);
1102 	} else {
1103 		boolean_t promisc = B_FALSE;
1104 
1105 		/*
1106 		 * Walk the bound dls_impl_t to see if there are any
1107 		 * in promiscuous 'all sap' mode.
1108 		 */
1109 		ght_walk(hash, i_dls_link_walk, (void *)&promisc);
1110 
1111 		/*
1112 		 * If there are then we need to use a receive routine
1113 		 * which will route packets to those dls_impl_t as well
1114 		 * as ones bound to the  DLSAP of the packet.
1115 		 */
1116 		if (promisc)
1117 			rx = i_dls_link_ether_rx_promisc;
1118 		else
1119 			rx = i_dls_link_ether_rx;
1120 
1121 		ght_unlock(hash);
1122 
1123 		/* Replace the existing receive function if there is one. */
1124 		if (dlp->dl_mrh != NULL)
1125 			mac_rx_remove(dlp->dl_mh, dlp->dl_mrh);
1126 		dlp->dl_mrh = mac_rx_add(dlp->dl_mh, rx, (void *)dlp);
1127 	}
1128 	mutex_exit(&dlp->dl_lock);
1129 }
1130 
1131 void
1132 dls_link_remove(dls_link_t *dlp, dls_impl_t *dip)
1133 {
1134 	ght_t		hash = dlp->dl_impl_hash;
1135 	ghte_t		hte;
1136 	dls_impl_t	**pp;
1137 	dls_impl_t	*p;
1138 	mac_rx_t	rx;
1139 
1140 	/*
1141 	 * We need dl_lock here because we want to be able to walk
1142 	 * the hash table *and* set the mac rx func atomically. if
1143 	 * these two operations are separate, someone else could
1144 	 * insert/remove dls_impl_t from the ght after we drop the
1145 	 * ght lock and this could cause our chosen rx func to be
1146 	 * incorrect. note that we cannot call mac_rx_add when
1147 	 * holding the ght lock because this can cause deadlock.
1148 	 */
1149 	mutex_enter(&dlp->dl_lock);
1150 
1151 	ght_lock(hash, GHT_WRITE);
1152 
1153 	/*
1154 	 * Get the cached hash table entry reference.
1155 	 */
1156 	hte = dip->di_hte;
1157 	ASSERT(hte != NULL);
1158 
1159 	/*
1160 	 * Poll the hash table entry until all references have been dropped.
1161 	 * We need to drop all locks before sleeping because we don't want
1162 	 * the interrupt handler to block. We set di_removing here to
1163 	 * tell the receive callbacks not to pass up packets anymore.
1164 	 * This is only a hint to quicken the decrease of the refcnt so
1165 	 * the assignment need not be protected by any lock.
1166 	 */
1167 	dip->di_removing = B_TRUE;
1168 	while (ght_ref(hte) != 0) {
1169 		ght_unlock(hash);
1170 		mutex_exit(&dlp->dl_lock);
1171 		delay(drv_usectohz(1000));	/* 1ms delay */
1172 		mutex_enter(&dlp->dl_lock);
1173 		ght_lock(hash, GHT_WRITE);
1174 	}
1175 
1176 	/*
1177 	 * Walk the list and remove the dls_impl_t.
1178 	 */
1179 	for (pp = (dls_impl_t **)&(GHT_VAL(hte)); (p = *pp) != NULL;
1180 	    pp = &(p->di_nextp)) {
1181 		if (p == dip)
1182 			break;
1183 	}
1184 	ASSERT(p != NULL);
1185 
1186 	*pp = p->di_nextp;
1187 	p->di_nextp = NULL;
1188 	dip->di_hte = NULL;
1189 
1190 	if (GHT_VAL(hte) == NULL) {
1191 		/*
1192 		 * The list is empty so remove the hash table entry.
1193 		 */
1194 		ght_remove(hte);
1195 		ght_free(hte);
1196 	}
1197 	dip->di_removing = B_FALSE;
1198 
1199 	/*
1200 	 * If there are no dls_impl_t then there's no need to register a
1201 	 * receive function with the mac.
1202 	 */
1203 	if (ght_count(hash) == 0) {
1204 		ght_unlock(hash);
1205 		mac_rx_remove(dlp->dl_mh, dlp->dl_mrh);
1206 		dlp->dl_mrh = NULL;
1207 	} else {
1208 		boolean_t promisc = B_FALSE;
1209 
1210 		/*
1211 		 * Walk the bound dls_impl_t to see if there are any
1212 		 * in promiscuous 'all sap' mode.
1213 		 */
1214 		ght_walk(hash, i_dls_link_walk, (void *)&promisc);
1215 
1216 		/*
1217 		 * If there are then we need to use a receive routine
1218 		 * which will route packets to those dls_impl_t as well
1219 		 * as ones bound to the  DLSAP of the packet.
1220 		 */
1221 		if (promisc)
1222 			rx = i_dls_link_ether_rx_promisc;
1223 		else
1224 			rx = i_dls_link_ether_rx;
1225 
1226 		ght_unlock(hash);
1227 
1228 		mac_rx_remove(dlp->dl_mh, dlp->dl_mrh);
1229 		dlp->dl_mrh = mac_rx_add(dlp->dl_mh, rx, (void *)dlp);
1230 	}
1231 	mutex_exit(&dlp->dl_lock);
1232 }
1233