xref: /titanic_41/usr/src/uts/common/io/dls/dls_link.c (revision 23a276b1252962c987a613be470dde26561247b8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Data-Link Services Module
30  */
31 
32 #include	<sys/types.h>
33 #include	<sys/stream.h>
34 #include	<sys/strsun.h>
35 #include	<sys/strsubr.h>
36 #include	<sys/sysmacros.h>
37 #include	<sys/atomic.h>
38 #include	<sys/modhash.h>
39 #include	<sys/dlpi.h>
40 #include	<sys/ethernet.h>
41 #include	<sys/byteorder.h>
42 #include	<sys/vlan.h>
43 #include	<sys/mac.h>
44 #include	<sys/sdt.h>
45 
46 #include	<sys/dls.h>
47 #include	<sys/dld_impl.h>
48 #include	<sys/dls_impl.h>
49 
50 static kmem_cache_t	*i_dls_link_cachep;
51 static mod_hash_t	*i_dls_link_hash;
52 static uint_t		i_dls_link_count;
53 static krwlock_t	i_dls_link_lock;
54 
55 #define		LINK_HASHSZ	67	/* prime */
56 #define		IMPL_HASHSZ	67	/* prime */
57 
58 /*
59  * Construct a hash key encompassing both DLSAP value and VLAN idenitifier.
60  */
61 #define	MAKE_KEY(_sap, _vid)						\
62 	((mod_hash_key_t)(uintptr_t)					\
63 	(((_sap) << VLAN_ID_SIZE) | (_vid) & VLAN_ID_MASK))
64 
65 /*
66  * Extract the DLSAP value from the hash key.
67  */
68 #define	KEY_SAP(_key)							\
69 	(((uint32_t)(uintptr_t)(_key)) >> VLAN_ID_SIZE)
70 
71 #define	DLS_STRIP_PADDING(pktsize, p) {			\
72 	if (pktsize != 0) {				\
73 		ssize_t delta = pktsize - msgdsize(p);	\
74 							\
75 		if (delta < 0)				\
76 			(void) adjmsg(p, delta);	\
77 	}						\
78 }
79 
80 /*
81  * Private functions.
82  */
83 
84 /*ARGSUSED*/
85 static int
86 i_dls_link_constructor(void *buf, void *arg, int kmflag)
87 {
88 	dls_link_t	*dlp = buf;
89 	char		name[MAXNAMELEN];
90 
91 	bzero(buf, sizeof (dls_link_t));
92 
93 	(void) sprintf(name, "dls_link_t_%p_hash", buf);
94 	dlp->dl_impl_hash = mod_hash_create_idhash(name, IMPL_HASHSZ,
95 	    mod_hash_null_valdtor);
96 
97 	mutex_init(&dlp->dl_lock, NULL, MUTEX_DEFAULT, NULL);
98 	mutex_init(&dlp->dl_promisc_lock, NULL, MUTEX_DEFAULT, NULL);
99 	rw_init(&dlp->dl_impl_lock, NULL, RW_DEFAULT, NULL);
100 	return (0);
101 }
102 
103 /*ARGSUSED*/
104 static void
105 i_dls_link_destructor(void *buf, void *arg)
106 {
107 	dls_link_t	*dlp = buf;
108 
109 	ASSERT(dlp->dl_ref == 0);
110 	ASSERT(dlp->dl_mh == NULL);
111 	ASSERT(dlp->dl_unknowns == 0);
112 
113 	mod_hash_destroy_idhash(dlp->dl_impl_hash);
114 	dlp->dl_impl_hash = NULL;
115 
116 	mutex_destroy(&dlp->dl_lock);
117 	mutex_destroy(&dlp->dl_promisc_lock);
118 	rw_destroy(&dlp->dl_impl_lock);
119 }
120 
121 /*
122  * Truncate the chain starting at mp such that all packets in the chain
123  * have identical source and destination addresses, saps, and VLAN tags (if
124  * any).  It returns a pointer to the mblk following the chain, NULL if
125  * there is no further packet following the processed chain.  The countp
126  * argument is set to the number of valid packets in the chain.  It is set
127  * to 0 if the function encountered a problem with the first packet.
128  */
129 static mblk_t *
130 i_dls_link_subchain(dls_link_t *dlp, mblk_t *mp, mac_header_info_t *mhip,
131     uint16_t *vidp, uint_t *countp)
132 {
133 	mblk_t		**pp;
134 	mblk_t		*p;
135 	uint_t		npacket;
136 	size_t		addr_size = dlp->dl_mip->mi_addr_length;
137 
138 	/*
139 	 * Packets should always be at least 16 bit aligned.
140 	 */
141 	ASSERT(IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)));
142 
143 	if (dls_link_header_info(dlp, mp, mhip, vidp) != 0) {
144 		/*
145 		 * Something is wrong with the initial header.  No chain is
146 		 * possible.
147 		 */
148 		p = mp->b_next;
149 		mp->b_next = NULL;
150 		*countp = 0;
151 		return (p);
152 	}
153 
154 	/*
155 	 * Compare with subsequent headers until we find one that has
156 	 * differing header information. After checking each packet
157 	 * strip padding and skip over the header.
158 	 */
159 	npacket = 1;
160 	for (pp = &(mp->b_next); (p = *pp) != NULL; pp = &(p->b_next)) {
161 		mac_header_info_t cmhi;
162 		uint16_t cvid;
163 
164 		if (dls_link_header_info(dlp, p, &cmhi, &cvid) != 0)
165 			break;
166 
167 		/*
168 		 * The source, destination, sap, and vlan id must all match
169 		 * in a given subchain.
170 		 */
171 		if (memcmp(mhip->mhi_daddr, cmhi.mhi_daddr, addr_size) != 0 ||
172 		    memcmp(mhip->mhi_saddr, cmhi.mhi_saddr, addr_size) != 0 ||
173 		    mhip->mhi_bindsap != cmhi.mhi_bindsap) {
174 			break;
175 		}
176 
177 		if (cvid != *vidp)
178 			break;
179 
180 		DLS_STRIP_PADDING(cmhi.mhi_pktsize, p);
181 		p->b_rptr += cmhi.mhi_hdrsize;
182 		npacket++;
183 	}
184 
185 	/*
186 	 * Strip padding and skip over the initial packet's header.
187 	 */
188 	DLS_STRIP_PADDING(mhip->mhi_pktsize, mp);
189 	mp->b_rptr += mhip->mhi_hdrsize;
190 
191 	/*
192 	 * Break the chain at this point and return a pointer to the next
193 	 * sub-chain.
194 	 */
195 	*pp = NULL;
196 	*countp = npacket;
197 	return (p);
198 }
199 
200 static void
201 i_dls_head_hold(dls_head_t *dhp)
202 {
203 	atomic_inc_32(&dhp->dh_ref);
204 }
205 
206 static void
207 i_dls_head_rele(dls_head_t *dhp)
208 {
209 	atomic_dec_32(&dhp->dh_ref);
210 }
211 
212 static dls_head_t *
213 i_dls_head_alloc(mod_hash_key_t key)
214 {
215 	dls_head_t	*dhp;
216 
217 	dhp = kmem_zalloc(sizeof (dls_head_t), KM_SLEEP);
218 	dhp->dh_key = key;
219 	return (dhp);
220 }
221 
222 static void
223 i_dls_head_free(dls_head_t *dhp)
224 {
225 	ASSERT(dhp->dh_ref == 0);
226 	kmem_free(dhp, sizeof (dls_head_t));
227 }
228 
229 static void
230 i_dls_link_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp)
231 {
232 	dls_link_t			*dlp = arg;
233 	mod_hash_t			*hash = dlp->dl_impl_hash;
234 	mblk_t				*nextp;
235 	mac_header_info_t		mhi;
236 	uint16_t			vid;
237 	dls_head_t			*dhp;
238 	dls_impl_t			*dip;
239 	dls_impl_t			*ndip;
240 	mblk_t				*nmp;
241 	mod_hash_key_t			key;
242 	uint_t				npacket;
243 	boolean_t			accepted;
244 	dls_rx_t			di_rx, ndi_rx;
245 	void				*di_rx_arg, *ndi_rx_arg;
246 
247 	/*
248 	 * Walk the packet chain.
249 	 */
250 	while (mp != NULL) {
251 		/*
252 		 * Wipe the accepted state.
253 		 */
254 		accepted = B_FALSE;
255 
256 		/*
257 		 * Grab the longest sub-chain we can process as a single
258 		 * unit.
259 		 */
260 		nextp = i_dls_link_subchain(dlp, mp, &mhi, &vid, &npacket);
261 
262 		if (npacket == 0) {
263 			/*
264 			 * The first packet had an unrecognized header.
265 			 * Modify npacket so that this stray can be
266 			 * accounted for.
267 			 */
268 			npacket = 1;
269 			freemsg(mp);
270 			goto loop;
271 		}
272 
273 		/*
274 		 * Construct a hash key from the VLAN identifier and the
275 		 * DLSAP.
276 		 */
277 		key = MAKE_KEY(mhi.mhi_bindsap, vid);
278 
279 		/*
280 		 * Search the has table for dls_impl_t eligible to receive
281 		 * a packet chain for this DLSAP/VLAN combination.
282 		 */
283 		rw_enter(&dlp->dl_impl_lock, RW_READER);
284 		if (mod_hash_find(hash, key, (mod_hash_val_t *)&dhp) != 0) {
285 			rw_exit(&dlp->dl_impl_lock);
286 			freemsgchain(mp);
287 			goto loop;
288 		}
289 		i_dls_head_hold(dhp);
290 		rw_exit(&dlp->dl_impl_lock);
291 
292 		/*
293 		 * Find the first dls_impl_t that will accept the sub-chain.
294 		 */
295 		for (dip = dhp->dh_list; dip != NULL; dip = dip->di_nextp)
296 			if (dls_accept(dip, &mhi, &di_rx, &di_rx_arg))
297 				break;
298 
299 		/*
300 		 * If we did not find any dls_impl_t willing to accept the
301 		 * sub-chain then throw it away.
302 		 */
303 		if (dip == NULL) {
304 			i_dls_head_rele(dhp);
305 			freemsgchain(mp);
306 			goto loop;
307 		}
308 
309 		/*
310 		 * We have at least one acceptor.
311 		 */
312 		accepted = B_TRUE;
313 		for (;;) {
314 			/*
315 			 * Find the next dls_impl_t that will accept the
316 			 * sub-chain.
317 			 */
318 			for (ndip = dip->di_nextp; ndip != NULL;
319 			    ndip = ndip->di_nextp)
320 				if (dls_accept(ndip, &mhi, &ndi_rx,
321 				    &ndi_rx_arg))
322 					break;
323 
324 			/*
325 			 * If there are no more dls_impl_t that are willing
326 			 * to accept the sub-chain then we don't need to dup
327 			 * it before handing it to the current one.
328 			 */
329 			if (ndip == NULL) {
330 				di_rx(di_rx_arg, mrh, mp, mhi.mhi_hdrsize);
331 
332 				/*
333 				 * Since there are no more dls_impl_t, we're
334 				 * done.
335 				 */
336 				break;
337 			}
338 
339 			/*
340 			 * There are more dls_impl_t so dup the sub-chain.
341 			 */
342 			if ((nmp = copymsgchain(mp)) != NULL)
343 				di_rx(di_rx_arg, mrh, nmp, mhi.mhi_hdrsize);
344 
345 			dip = ndip;
346 			di_rx = ndi_rx;
347 			di_rx_arg = ndi_rx_arg;
348 		}
349 
350 		/*
351 		 * Release the hold on the dls_impl_t chain now that we have
352 		 * finished walking it.
353 		 */
354 		i_dls_head_rele(dhp);
355 
356 loop:
357 		/*
358 		 * If there were no acceptors then add the packet count to the
359 		 * 'unknown' count.
360 		 */
361 		if (!accepted)
362 			atomic_add_32(&(dlp->dl_unknowns), npacket);
363 
364 		/*
365 		 * Move onto the next sub-chain.
366 		 */
367 		mp = nextp;
368 	}
369 }
370 
371 static void
372 i_dls_link_rx_promisc(void *arg, mac_resource_handle_t mrh, mblk_t *mp)
373 {
374 	dls_link_t			*dlp = arg;
375 	mod_hash_t			*hash = dlp->dl_impl_hash;
376 	mblk_t				*nextp;
377 	mac_header_info_t		mhi;
378 	uint16_t			vid;
379 	dls_head_t			*dhp;
380 	dls_impl_t			*dip;
381 	dls_impl_t			*ndip;
382 	mblk_t				*nmp;
383 	mod_hash_key_t			key;
384 	uint_t				npacket;
385 	boolean_t			accepted;
386 	dls_rx_t			di_rx, ndi_rx;
387 	void				*di_rx_arg, *ndi_rx_arg;
388 
389 	/*
390 	 * Walk the packet chain.
391 	 */
392 	while (mp != NULL) {
393 		/*
394 		 * Wipe the accepted state.
395 		 */
396 		accepted = B_FALSE;
397 
398 		/*
399 		 * Grab the longest sub-chain we can process as a single
400 		 * unit.
401 		 */
402 		nextp = i_dls_link_subchain(dlp, mp, &mhi, &vid, &npacket);
403 
404 		if (npacket == 0) {
405 			/*
406 			 * The first packet had an unrecognized header.
407 			 * Modify npacket so that this stray can be
408 			 * accounted for.
409 			 */
410 			npacket = 1;
411 			freemsg(mp);
412 			goto loop;
413 		}
414 
415 		/*
416 		 * Construct a hash key from the VLAN identifier and the
417 		 * DLSAP that represents dls_impl_t in promiscuous mode.
418 		 */
419 		key = MAKE_KEY(DLS_SAP_PROMISC, vid);
420 
421 		/*
422 		 * Search the has table for dls_impl_t eligible to receive
423 		 * a packet chain for this DLSAP/VLAN combination.
424 		 */
425 		rw_enter(&dlp->dl_impl_lock, RW_READER);
426 		if (mod_hash_find(hash, key, (mod_hash_val_t *)&dhp) != 0) {
427 			rw_exit(&dlp->dl_impl_lock);
428 			goto non_promisc;
429 		}
430 		i_dls_head_hold(dhp);
431 		rw_exit(&dlp->dl_impl_lock);
432 
433 		/*
434 		 * Find dls_impl_t that will accept the sub-chain.
435 		 */
436 		for (dip = dhp->dh_list; dip != NULL; dip = dip->di_nextp) {
437 			if (!dls_accept(dip, &mhi, &di_rx, &di_rx_arg))
438 				continue;
439 
440 			/*
441 			 * We have at least one acceptor.
442 			 */
443 			accepted = B_TRUE;
444 
445 			/*
446 			 * There will normally be at least more dls_impl_t
447 			 * (since we've yet to check for non-promiscuous
448 			 * dls_impl_t) so dup the sub-chain.
449 			 */
450 			if ((nmp = copymsgchain(mp)) != NULL)
451 				di_rx(di_rx_arg, mrh, nmp, mhi.mhi_hdrsize);
452 		}
453 
454 		/*
455 		 * Release the hold on the dls_impl_t chain now that we have
456 		 * finished walking it.
457 		 */
458 		i_dls_head_rele(dhp);
459 
460 non_promisc:
461 		/*
462 		 * Construct a hash key from the VLAN identifier and the
463 		 * DLSAP.
464 		 */
465 		key = MAKE_KEY(mhi.mhi_bindsap, vid);
466 
467 		/*
468 		 * Search the has table for dls_impl_t eligible to receive
469 		 * a packet chain for this DLSAP/VLAN combination.
470 		 */
471 		rw_enter(&dlp->dl_impl_lock, RW_READER);
472 		if (mod_hash_find(hash, key, (mod_hash_val_t *)&dhp) != 0) {
473 			rw_exit(&dlp->dl_impl_lock);
474 			freemsgchain(mp);
475 			goto loop;
476 		}
477 		i_dls_head_hold(dhp);
478 		rw_exit(&dlp->dl_impl_lock);
479 
480 		/*
481 		 * Find the first dls_impl_t that will accept the sub-chain.
482 		 */
483 		for (dip = dhp->dh_list; dip != NULL; dip = dip->di_nextp)
484 			if (dls_accept(dip, &mhi, &di_rx, &di_rx_arg))
485 				break;
486 
487 		/*
488 		 * If we did not find any dls_impl_t willing to accept the
489 		 * sub-chain then throw it away.
490 		 */
491 		if (dip == NULL) {
492 			i_dls_head_rele(dhp);
493 			freemsgchain(mp);
494 			goto loop;
495 		}
496 
497 		/*
498 		 * We have at least one acceptor.
499 		 */
500 		accepted = B_TRUE;
501 		for (;;) {
502 			/*
503 			 * Find the next dls_impl_t that will accept the
504 			 * sub-chain.
505 			 */
506 			for (ndip = dip->di_nextp; ndip != NULL;
507 			    ndip = ndip->di_nextp)
508 				if (dls_accept(ndip, &mhi, &ndi_rx,
509 				    &ndi_rx_arg))
510 					break;
511 
512 			/*
513 			 * If there are no more dls_impl_t that are willing
514 			 * to accept the sub-chain then we don't need to dup
515 			 * it before handing it to the current one.
516 			 */
517 			if (ndip == NULL) {
518 				di_rx(di_rx_arg, mrh, mp, mhi.mhi_hdrsize);
519 
520 				/*
521 				 * Since there are no more dls_impl_t, we're
522 				 * done.
523 				 */
524 				break;
525 			}
526 
527 			/*
528 			 * There are more dls_impl_t so dup the sub-chain.
529 			 */
530 			if ((nmp = copymsgchain(mp)) != NULL)
531 				di_rx(di_rx_arg, mrh, nmp, mhi.mhi_hdrsize);
532 
533 			dip = ndip;
534 			di_rx = ndi_rx;
535 			di_rx_arg = ndi_rx_arg;
536 		}
537 
538 		/*
539 		 * Release the hold on the dls_impl_t chain now that we have
540 		 * finished walking it.
541 		 */
542 		i_dls_head_rele(dhp);
543 
544 loop:
545 		/*
546 		 * If there were no acceptors then add the packet count to the
547 		 * 'unknown' count.
548 		 */
549 		if (!accepted)
550 			atomic_add_32(&(dlp->dl_unknowns), npacket);
551 
552 		/*
553 		 * Move onto the next sub-chain.
554 		 */
555 		mp = nextp;
556 	}
557 }
558 
559 static void
560 i_dls_link_txloop(void *arg, mblk_t *mp)
561 {
562 	dls_link_t			*dlp = arg;
563 	mod_hash_t			*hash = dlp->dl_impl_hash;
564 	mblk_t				*nextp;
565 	mac_header_info_t		mhi;
566 	uint16_t			vid;
567 	dls_head_t			*dhp;
568 	dls_impl_t			*dip;
569 	dls_impl_t			*ndip;
570 	mblk_t				*nmp;
571 	mod_hash_key_t			key;
572 	uint_t				npacket;
573 	dls_rx_t			di_rx, ndi_rx;
574 	void				*di_rx_arg, *ndi_rx_arg;
575 
576 	/*
577 	 * Walk the packet chain.
578 	 */
579 	while (mp != NULL) {
580 		/*
581 		 * Grab the longest sub-chain we can process as a single
582 		 * unit.
583 		 */
584 		nextp = i_dls_link_subchain(dlp, mp, &mhi, &vid, &npacket);
585 
586 		if (npacket == 0) {
587 			freemsg(mp);
588 			goto loop;
589 		}
590 
591 		/*
592 		 * Construct a hash key from the VLAN identifier and the
593 		 * DLSAP.
594 		 */
595 		key = MAKE_KEY(mhi.mhi_bindsap, vid);
596 
597 		/*
598 		 * Search the has table for dls_impl_t eligible to receive
599 		 * a packet chain for this DLSAP/VLAN combination.
600 		 */
601 		rw_enter(&dlp->dl_impl_lock, RW_READER);
602 		if (mod_hash_find(hash, key, (mod_hash_val_t *)&dhp) != 0) {
603 			rw_exit(&dlp->dl_impl_lock);
604 			goto promisc;
605 		}
606 		i_dls_head_hold(dhp);
607 		rw_exit(&dlp->dl_impl_lock);
608 
609 		/*
610 		 * Find dls_impl_t that will accept the sub-chain.
611 		 */
612 		for (dip = dhp->dh_list; dip != NULL; dip = dip->di_nextp) {
613 			if (!dls_accept_loopback(dip, &di_rx, &di_rx_arg))
614 				continue;
615 
616 			/*
617 			 * There should be at least more dls_impl_t (since
618 			 * we've yet to check for dls_impl_t in promiscuous
619 			 * mode) so dup the sub-chain.
620 			 */
621 			if ((nmp = copymsgchain(mp)) != NULL)
622 				di_rx(di_rx_arg, NULL, nmp, mhi.mhi_hdrsize);
623 		}
624 
625 		/*
626 		 * Release the hold on the dls_impl_t chain now that we have
627 		 * finished walking it.
628 		 */
629 		i_dls_head_rele(dhp);
630 
631 promisc:
632 		/*
633 		 * Construct a hash key from the VLAN identifier and the
634 		 * DLSAP that represents dls_impl_t in promiscuous mode.
635 		 */
636 		key = MAKE_KEY(DLS_SAP_PROMISC, vid);
637 
638 		/*
639 		 * Search the has table for dls_impl_t eligible to receive
640 		 * a packet chain for this DLSAP/VLAN combination.
641 		 */
642 		rw_enter(&dlp->dl_impl_lock, RW_READER);
643 		if (mod_hash_find(hash, key, (mod_hash_val_t *)&dhp) != 0) {
644 			rw_exit(&dlp->dl_impl_lock);
645 			freemsgchain(mp);
646 			goto loop;
647 		}
648 		i_dls_head_hold(dhp);
649 		rw_exit(&dlp->dl_impl_lock);
650 
651 		/*
652 		 * Find the first dls_impl_t that will accept the sub-chain.
653 		 */
654 		for (dip = dhp->dh_list; dip != NULL; dip = dip->di_nextp)
655 			if (dls_accept_loopback(dip, &di_rx, &di_rx_arg))
656 				break;
657 
658 		/*
659 		 * If we did not find any dls_impl_t willing to accept the
660 		 * sub-chain then throw it away.
661 		 */
662 		if (dip == NULL) {
663 			i_dls_head_rele(dhp);
664 			freemsgchain(mp);
665 			goto loop;
666 		}
667 
668 		for (;;) {
669 			/*
670 			 * Find the next dls_impl_t that will accept the
671 			 * sub-chain.
672 			 */
673 			for (ndip = dip->di_nextp; ndip != NULL;
674 			    ndip = ndip->di_nextp)
675 				if (dls_accept_loopback(ndip, &ndi_rx,
676 				    &ndi_rx_arg)) {
677 					break;
678 				}
679 
680 			/*
681 			 * If there are no more dls_impl_t that are willing
682 			 * to accept the sub-chain then we don't need to dup
683 			 * it before handing it to the current one.
684 			 */
685 			if (ndip == NULL) {
686 				di_rx(di_rx_arg, NULL, mp, mhi.mhi_hdrsize);
687 
688 				/*
689 				 * Since there are no more dls_impl_t, we're
690 				 * done.
691 				 */
692 				break;
693 			}
694 
695 			/*
696 			 * There are more dls_impl_t so dup the sub-chain.
697 			 */
698 			if ((nmp = copymsgchain(mp)) != NULL)
699 				di_rx(di_rx_arg, NULL, nmp, mhi.mhi_hdrsize);
700 
701 			dip = ndip;
702 			di_rx = ndi_rx;
703 			di_rx_arg = ndi_rx_arg;
704 		}
705 
706 		/*
707 		 * Release the hold on the dls_impl_t chain now that we have
708 		 * finished walking it.
709 		 */
710 		i_dls_head_rele(dhp);
711 
712 loop:
713 		/*
714 		 * Move onto the next sub-chain.
715 		 */
716 		mp = nextp;
717 	}
718 }
719 
720 /*ARGSUSED*/
721 static uint_t
722 i_dls_link_walk(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
723 {
724 	boolean_t	*promiscp = arg;
725 	uint32_t	sap = KEY_SAP(key);
726 
727 	if (sap == DLS_SAP_PROMISC) {
728 		*promiscp = B_TRUE;
729 		return (MH_WALK_TERMINATE);
730 	}
731 
732 	return (MH_WALK_CONTINUE);
733 }
734 
735 static int
736 i_dls_link_create(const char *name, uint_t ddi_instance, dls_link_t **dlpp)
737 {
738 	dls_link_t		*dlp;
739 
740 	/*
741 	 * Allocate a new dls_link_t structure.
742 	 */
743 	dlp = kmem_cache_alloc(i_dls_link_cachep, KM_SLEEP);
744 
745 	/*
746 	 * Name the dls_link_t after the MAC interface it represents.
747 	 */
748 	(void) strlcpy(dlp->dl_name, name, sizeof (dlp->dl_name));
749 	dlp->dl_ddi_instance = ddi_instance;
750 
751 	/*
752 	 * Set the packet loopback function for use when the MAC is in
753 	 * promiscuous mode, and initialize promiscuous bookeeping fields.
754 	 */
755 	dlp->dl_txloop = i_dls_link_txloop;
756 	dlp->dl_npromisc = 0;
757 	dlp->dl_mth = NULL;
758 
759 	*dlpp = dlp;
760 	return (0);
761 }
762 
763 static void
764 i_dls_link_destroy(dls_link_t *dlp)
765 {
766 	ASSERT(dlp->dl_npromisc == 0);
767 	ASSERT(dlp->dl_nactive == 0);
768 	ASSERT(dlp->dl_mth == NULL);
769 	ASSERT(dlp->dl_macref == 0);
770 	ASSERT(dlp->dl_mh == NULL);
771 	ASSERT(dlp->dl_mip == NULL);
772 	ASSERT(dlp->dl_impl_count == 0);
773 	ASSERT(dlp->dl_mrh == NULL);
774 
775 	/*
776 	 * Free the structure back to the cache.
777 	 */
778 	dlp->dl_unknowns = 0;
779 	kmem_cache_free(i_dls_link_cachep, dlp);
780 }
781 
782 /*
783  * Module initialization functions.
784  */
785 
786 void
787 dls_link_init(void)
788 {
789 	/*
790 	 * Create a kmem_cache of dls_link_t structures.
791 	 */
792 	i_dls_link_cachep = kmem_cache_create("dls_link_cache",
793 	    sizeof (dls_link_t), 0, i_dls_link_constructor,
794 	    i_dls_link_destructor, NULL, NULL, NULL, 0);
795 	ASSERT(i_dls_link_cachep != NULL);
796 
797 	/*
798 	 * Create a dls_link_t hash table and associated lock.
799 	 */
800 	i_dls_link_hash = mod_hash_create_extended("dls_link_hash",
801 	    IMPL_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor,
802 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
803 	rw_init(&i_dls_link_lock, NULL, RW_DEFAULT, NULL);
804 	i_dls_link_count = 0;
805 }
806 
807 int
808 dls_link_fini(void)
809 {
810 	if (i_dls_link_count > 0)
811 		return (EBUSY);
812 
813 	/*
814 	 * Destroy the kmem_cache.
815 	 */
816 	kmem_cache_destroy(i_dls_link_cachep);
817 
818 	/*
819 	 * Destroy the hash table and associated lock.
820 	 */
821 	mod_hash_destroy_hash(i_dls_link_hash);
822 	rw_destroy(&i_dls_link_lock);
823 	return (0);
824 }
825 
826 /*
827  * Exported functions.
828  */
829 
830 int
831 dls_link_hold(const char *name, uint_t ddi_instance, dls_link_t **dlpp)
832 {
833 	dls_link_t		*dlp;
834 	int			err;
835 
836 	/*
837 	 * Look up a dls_link_t corresponding to the given mac_handle_t
838 	 * in the global hash table. We need to hold i_dls_link_lock in
839 	 * order to atomically find and insert a dls_link_t into the
840 	 * hash table.
841 	 */
842 	rw_enter(&i_dls_link_lock, RW_WRITER);
843 	if ((err = mod_hash_find(i_dls_link_hash, (mod_hash_key_t)name,
844 	    (mod_hash_val_t *)&dlp)) == 0)
845 		goto done;
846 
847 	ASSERT(err == MH_ERR_NOTFOUND);
848 
849 	/*
850 	 * We didn't find anything so we need to create one.
851 	 */
852 	if ((err = i_dls_link_create(name, ddi_instance, &dlp)) != 0) {
853 		rw_exit(&i_dls_link_lock);
854 		return (err);
855 	}
856 
857 	/*
858 	 * Insert the dls_link_t.
859 	 */
860 	err = mod_hash_insert(i_dls_link_hash, (mod_hash_key_t)name,
861 	    (mod_hash_val_t)dlp);
862 	ASSERT(err == 0);
863 
864 	i_dls_link_count++;
865 	ASSERT(i_dls_link_count != 0);
866 
867 done:
868 	/*
869 	 * Bump the reference count and hand back the reference.
870 	 */
871 	dlp->dl_ref++;
872 	*dlpp = dlp;
873 	rw_exit(&i_dls_link_lock);
874 	return (0);
875 }
876 
877 void
878 dls_link_rele(dls_link_t *dlp)
879 {
880 	mod_hash_val_t	val;
881 
882 	rw_enter(&i_dls_link_lock, RW_WRITER);
883 
884 	/*
885 	 * Check if there are any more references.
886 	 */
887 	if (--dlp->dl_ref != 0) {
888 		/*
889 		 * There are more references so there's nothing more to do.
890 		 */
891 		goto done;
892 	}
893 
894 	(void) mod_hash_remove(i_dls_link_hash,
895 	    (mod_hash_key_t)dlp->dl_name, &val);
896 	ASSERT(dlp == (dls_link_t *)val);
897 
898 	/*
899 	 * Destroy the dls_link_t.
900 	 */
901 	i_dls_link_destroy(dlp);
902 	ASSERT(i_dls_link_count > 0);
903 	i_dls_link_count--;
904 done:
905 	rw_exit(&i_dls_link_lock);
906 }
907 
908 int
909 dls_mac_hold(dls_link_t *dlp)
910 {
911 	int err = 0;
912 
913 	mutex_enter(&dlp->dl_lock);
914 
915 	ASSERT(IMPLY(dlp->dl_macref != 0, dlp->dl_mh != NULL));
916 	ASSERT(IMPLY(dlp->dl_macref == 0, dlp->dl_mh == NULL));
917 
918 	if (dlp->dl_macref == 0) {
919 		/*
920 		 * First reference; hold open the MAC interface.
921 		 */
922 		err = mac_open(dlp->dl_name, dlp->dl_ddi_instance, &dlp->dl_mh);
923 		if (err != 0)
924 			goto done;
925 
926 		dlp->dl_mip = mac_info(dlp->dl_mh);
927 	}
928 
929 	dlp->dl_macref++;
930 done:
931 	mutex_exit(&dlp->dl_lock);
932 	return (err);
933 }
934 
935 void
936 dls_mac_rele(dls_link_t *dlp)
937 {
938 	mutex_enter(&dlp->dl_lock);
939 	ASSERT(dlp->dl_mh != NULL);
940 
941 	if (--dlp->dl_macref == 0) {
942 		mac_close(dlp->dl_mh);
943 		dlp->dl_mh = NULL;
944 		dlp->dl_mip = NULL;
945 	}
946 	mutex_exit(&dlp->dl_lock);
947 }
948 
949 void
950 dls_link_add(dls_link_t *dlp, uint32_t sap, dls_impl_t *dip)
951 {
952 	dls_vlan_t	*dvp = dip->di_dvp;
953 	mod_hash_t	*hash = dlp->dl_impl_hash;
954 	mod_hash_key_t	key;
955 	dls_head_t	*dhp;
956 	dls_impl_t	*p;
957 	mac_rx_t	rx;
958 	int		err;
959 	boolean_t	promisc = B_FALSE;
960 
961 	/*
962 	 * Generate a hash key based on the sap and the VLAN id.
963 	 */
964 	key = MAKE_KEY(sap, dvp->dv_id);
965 
966 	/*
967 	 * We need dl_lock here because we want to be able to walk
968 	 * the hash table *and* set the mac rx func atomically. if
969 	 * these two operations are separate, someone else could
970 	 * insert/remove dls_impl_t from the hash table after we
971 	 * drop the hash lock and this could cause our chosen rx
972 	 * func to be incorrect. note that we cannot call mac_rx_add
973 	 * when holding the hash lock because this can cause deadlock.
974 	 */
975 	mutex_enter(&dlp->dl_lock);
976 
977 	/*
978 	 * Search the table for a list head with this key.
979 	 */
980 	rw_enter(&dlp->dl_impl_lock, RW_WRITER);
981 
982 	if ((err = mod_hash_find(hash, key, (mod_hash_val_t *)&dhp)) != 0) {
983 		ASSERT(err == MH_ERR_NOTFOUND);
984 
985 		dhp = i_dls_head_alloc(key);
986 		err = mod_hash_insert(hash, key, (mod_hash_val_t)dhp);
987 		ASSERT(err == 0);
988 	}
989 
990 	/*
991 	 * Add the dls_impl_t to the head of the list.
992 	 */
993 	ASSERT(dip->di_nextp == NULL);
994 	p = dhp->dh_list;
995 	dip->di_nextp = p;
996 	dhp->dh_list = dip;
997 
998 	/*
999 	 * Save a pointer to the list head.
1000 	 */
1001 	dip->di_headp = dhp;
1002 	dlp->dl_impl_count++;
1003 
1004 	/*
1005 	 * Walk the bound dls_impl_t to see if there are any
1006 	 * in promiscuous 'all sap' mode.
1007 	 */
1008 	mod_hash_walk(hash, i_dls_link_walk, (void *)&promisc);
1009 	rw_exit(&dlp->dl_impl_lock);
1010 
1011 	/*
1012 	 * If there are then we need to use a receive routine
1013 	 * which will route packets to those dls_impl_t as well
1014 	 * as ones bound to the  DLSAP of the packet.
1015 	 */
1016 	if (promisc)
1017 		rx = i_dls_link_rx_promisc;
1018 	else
1019 		rx = i_dls_link_rx;
1020 
1021 	/* Replace the existing receive function if there is one. */
1022 	if (dlp->dl_mrh != NULL)
1023 		mac_rx_remove(dlp->dl_mh, dlp->dl_mrh);
1024 	dlp->dl_mrh = mac_rx_add(dlp->dl_mh, rx, (void *)dlp);
1025 	mutex_exit(&dlp->dl_lock);
1026 }
1027 
1028 void
1029 dls_link_remove(dls_link_t *dlp, dls_impl_t *dip)
1030 {
1031 	mod_hash_t	*hash = dlp->dl_impl_hash;
1032 	dls_impl_t	**pp;
1033 	dls_impl_t	*p;
1034 	dls_head_t	*dhp;
1035 	mac_rx_t	rx;
1036 
1037 	/*
1038 	 * We need dl_lock here because we want to be able to walk
1039 	 * the hash table *and* set the mac rx func atomically. if
1040 	 * these two operations are separate, someone else could
1041 	 * insert/remove dls_impl_t from the hash table after we
1042 	 * drop the hash lock and this could cause our chosen rx
1043 	 * func to be incorrect. note that we cannot call mac_rx_add
1044 	 * when holding the hash lock because this can cause deadlock.
1045 	 */
1046 	mutex_enter(&dlp->dl_lock);
1047 	rw_enter(&dlp->dl_impl_lock, RW_WRITER);
1048 
1049 	/*
1050 	 * Poll the hash table entry until all references have been dropped.
1051 	 * We need to drop all locks before sleeping because we don't want
1052 	 * the interrupt handler to block. We set di_removing here to
1053 	 * tell the receive callbacks not to pass up packets anymore.
1054 	 * This is only a hint to quicken the decrease of the refcnt so
1055 	 * the assignment need not be protected by any lock.
1056 	 */
1057 	dhp = dip->di_headp;
1058 	dip->di_removing = B_TRUE;
1059 	while (dhp->dh_ref != 0) {
1060 		rw_exit(&dlp->dl_impl_lock);
1061 		mutex_exit(&dlp->dl_lock);
1062 		delay(drv_usectohz(1000));	/* 1ms delay */
1063 		mutex_enter(&dlp->dl_lock);
1064 		rw_enter(&dlp->dl_impl_lock, RW_WRITER);
1065 	}
1066 
1067 	/*
1068 	 * Walk the list and remove the dls_impl_t.
1069 	 */
1070 	for (pp = &dhp->dh_list; (p = *pp) != NULL; pp = &(p->di_nextp)) {
1071 		if (p == dip)
1072 			break;
1073 	}
1074 	ASSERT(p != NULL);
1075 	*pp = p->di_nextp;
1076 	p->di_nextp = NULL;
1077 
1078 	ASSERT(dlp->dl_impl_count > 0);
1079 	dlp->dl_impl_count--;
1080 
1081 	if (dhp->dh_list == NULL) {
1082 		mod_hash_val_t	val = NULL;
1083 
1084 		/*
1085 		 * The list is empty so remove the hash table entry.
1086 		 */
1087 		(void) mod_hash_remove(hash, dhp->dh_key, &val);
1088 		ASSERT(dhp == (dls_head_t *)val);
1089 		i_dls_head_free(dhp);
1090 	}
1091 	dip->di_removing = B_FALSE;
1092 
1093 	/*
1094 	 * If there are no dls_impl_t then there's no need to register a
1095 	 * receive function with the mac.
1096 	 */
1097 	if (dlp->dl_impl_count == 0) {
1098 		rw_exit(&dlp->dl_impl_lock);
1099 		mac_rx_remove(dlp->dl_mh, dlp->dl_mrh);
1100 		dlp->dl_mrh = NULL;
1101 	} else {
1102 		boolean_t promisc = B_FALSE;
1103 
1104 		/*
1105 		 * Walk the bound dls_impl_t to see if there are any
1106 		 * in promiscuous 'all sap' mode.
1107 		 */
1108 		mod_hash_walk(hash, i_dls_link_walk, (void *)&promisc);
1109 		rw_exit(&dlp->dl_impl_lock);
1110 
1111 		/*
1112 		 * If there are then we need to use a receive routine
1113 		 * which will route packets to those dls_impl_t as well
1114 		 * as ones bound to the  DLSAP of the packet.
1115 		 */
1116 		if (promisc)
1117 			rx = i_dls_link_rx_promisc;
1118 		else
1119 			rx = i_dls_link_rx;
1120 
1121 		mac_rx_remove(dlp->dl_mh, dlp->dl_mrh);
1122 		dlp->dl_mrh = mac_rx_add(dlp->dl_mh, rx, (void *)dlp);
1123 	}
1124 	mutex_exit(&dlp->dl_lock);
1125 }
1126 
1127 int
1128 dls_link_header_info(dls_link_t *dlp, mblk_t *mp, mac_header_info_t *mhip,
1129     uint16_t *vidp)
1130 {
1131 	boolean_t	is_ethernet = (dlp->dl_mip->mi_media == DL_ETHER);
1132 	int		err = 0;
1133 
1134 	if ((err = mac_header_info(dlp->dl_mh, mp, mhip)) != 0)
1135 		return (err);
1136 
1137 	/*
1138 	 * If this is a VLAN-tagged Ethernet packet, then the SAP in the
1139 	 * mac_header_info_t as returned by mac_header_info() is VLAN_TPID.
1140 	 * We need to grab the ethertype from the VLAN header.
1141 	 */
1142 	if (is_ethernet && (mhip->mhi_bindsap == VLAN_TPID)) {
1143 		struct ether_vlan_header *evhp;
1144 		uint16_t sap;
1145 
1146 		evhp = (struct ether_vlan_header *)mp->b_rptr;
1147 		sap = ntohs(evhp->ether_type);
1148 		(void) mac_sap_verify(dlp->dl_mh, sap, &mhip->mhi_bindsap);
1149 		mhip->mhi_hdrsize = sizeof (struct ether_vlan_header);
1150 		if (vidp != NULL)
1151 			*vidp = VLAN_ID(ntohs(evhp->ether_tci));
1152 	} else if (vidp != NULL) {
1153 		*vidp = VLAN_ID_NONE;
1154 	}
1155 	return (0);
1156 }
1157