xref: /titanic_52/usr/src/uts/common/io/dls/dls_link.c (revision 753d2d2e8e7fd0c9bcf736d9bf2f2faf4d6234cc)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Data-Link Services Module
30  */
31 
32 #include	<sys/types.h>
33 #include	<sys/stream.h>
34 #include	<sys/strsun.h>
35 #include	<sys/strsubr.h>
36 #include	<sys/sysmacros.h>
37 #include	<sys/atomic.h>
38 #include	<sys/modhash.h>
39 #include	<sys/dlpi.h>
40 #include	<sys/ethernet.h>
41 #include	<sys/byteorder.h>
42 #include	<sys/vlan.h>
43 #include	<sys/mac.h>
44 #include	<sys/sdt.h>
45 
46 #include	<sys/dls.h>
47 #include	<sys/dld_impl.h>
48 #include	<sys/dls_impl.h>
49 
50 static kmem_cache_t	*i_dls_link_cachep;
51 static mod_hash_t	*i_dls_link_hash;
52 static uint_t		i_dls_link_count;
53 static krwlock_t	i_dls_link_lock;
54 
55 #define		LINK_HASHSZ	67	/* prime */
56 #define		IMPL_HASHSZ	67	/* prime */
57 
58 /*
59  * Construct a hash key encompassing both DLSAP value and VLAN idenitifier.
60  */
61 #define	MAKE_KEY(_sap, _vid)						\
62 	((mod_hash_key_t)(uintptr_t)					\
63 	(((_sap) << VLAN_ID_SIZE) | (_vid) & VLAN_ID_MASK))
64 
65 /*
66  * Extract the DLSAP value from the hash key.
67  */
68 #define	KEY_SAP(_key)							\
69 	(((uint32_t)(uintptr_t)(_key)) >> VLAN_ID_SIZE)
70 
71 #define	DLS_STRIP_PADDING(pktsize, p) {			\
72 	if (pktsize != 0) {				\
73 		ssize_t delta = pktsize - msgdsize(p);	\
74 							\
75 		if (delta < 0)				\
76 			(void) adjmsg(p, delta);	\
77 	}						\
78 }
79 
80 /*
81  * Private functions.
82  */
83 
84 /*ARGSUSED*/
85 static int
86 i_dls_link_constructor(void *buf, void *arg, int kmflag)
87 {
88 	dls_link_t	*dlp = buf;
89 	char		name[MAXNAMELEN];
90 
91 	bzero(buf, sizeof (dls_link_t));
92 
93 	(void) sprintf(name, "dls_link_t_%p_hash", buf);
94 	dlp->dl_impl_hash = mod_hash_create_idhash(name, IMPL_HASHSZ,
95 	    mod_hash_null_valdtor);
96 
97 	mutex_init(&dlp->dl_lock, NULL, MUTEX_DEFAULT, NULL);
98 	mutex_init(&dlp->dl_promisc_lock, NULL, MUTEX_DEFAULT, NULL);
99 	rw_init(&dlp->dl_impl_lock, NULL, RW_DEFAULT, NULL);
100 	return (0);
101 }
102 
103 /*ARGSUSED*/
104 static void
105 i_dls_link_destructor(void *buf, void *arg)
106 {
107 	dls_link_t	*dlp = buf;
108 
109 	ASSERT(dlp->dl_ref == 0);
110 	ASSERT(dlp->dl_mh == NULL);
111 	ASSERT(dlp->dl_unknowns == 0);
112 
113 	mod_hash_destroy_idhash(dlp->dl_impl_hash);
114 	dlp->dl_impl_hash = NULL;
115 
116 	mutex_destroy(&dlp->dl_lock);
117 	mutex_destroy(&dlp->dl_promisc_lock);
118 	rw_destroy(&dlp->dl_impl_lock);
119 }
120 
121 /*
122  * - Parse the mac header information of the given packet.
123  * - Strip the padding and skip over the header. Note that because some
124  *   DLS consumers only check the db_ref count of the first mblk, we
125  *   pullup the message into a single mblk. The dls_link_header_info()
126  *   function ensures that the size of the pulled message is greater
127  *   than the MAC header size.
128  *
129  * We choose to use a macro for performance reasons.
130  */
131 #define	DLS_PREPARE_PKT(dlp, mp, mhip, err) {				\
132 	mblk_t *nextp = (mp)->b_next;					\
133 	if (((err) = dls_link_header_info((dlp), (mp), (mhip))) == 0) {	\
134 		DLS_STRIP_PADDING((mhip)->mhi_pktsize, (mp));		\
135 		if (MBLKL((mp)) < (mhip)->mhi_hdrsize) {		\
136 			mblk_t *newmp;					\
137 			if ((newmp = msgpullup((mp), -1)) == NULL) {	\
138 				(err) = EINVAL;				\
139 			} else {					\
140 				freemsg((mp));				\
141 				(mp) = newmp;				\
142 				(mp)->b_next = nextp;			\
143 				(mp)->b_rptr += (mhip)->mhi_hdrsize;	\
144 			}						\
145 		} else {						\
146 			(mp)->b_rptr += (mhip)->mhi_hdrsize;		\
147 		}							\
148 	}								\
149 }
150 
151 /*
152  * Truncate the chain starting at mp such that all packets in the chain
153  * have identical source and destination addresses, saps, and tag types
154  * (see below).  It returns a pointer to the mblk following the chain,
155  * NULL if there is no further packet following the processed chain.
156  * The countp argument is set to the number of valid packets in the chain.
157  * Note that the whole MAC header (including the VLAN tag if any) in each
158  * packet will be stripped.
159  */
160 static mblk_t *
161 i_dls_link_subchain(dls_link_t *dlp, mblk_t *mp, const mac_header_info_t *mhip,
162     uint_t *countp)
163 {
164 	mblk_t		*prevp;
165 	uint_t		npacket = 1;
166 	size_t		addr_size = dlp->dl_mip->mi_addr_length;
167 	uint16_t	vid = VLAN_ID(mhip->mhi_tci);
168 	uint16_t	pri = VLAN_PRI(mhip->mhi_tci);
169 
170 	/*
171 	 * Compare with subsequent headers until we find one that has
172 	 * differing header information. After checking each packet
173 	 * strip padding and skip over the header.
174 	 */
175 	for (prevp = mp; (mp = mp->b_next) != NULL; prevp = mp) {
176 		mac_header_info_t cmhi;
177 		uint16_t cvid, cpri;
178 		int err;
179 
180 		DLS_PREPARE_PKT(dlp, mp, &cmhi, err);
181 		if (err != 0)
182 			break;
183 
184 		prevp->b_next = mp;
185 
186 		/*
187 		 * The source, destination, sap, and vlan id must all match
188 		 * in a given subchain.
189 		 */
190 		if (memcmp(mhip->mhi_daddr, cmhi.mhi_daddr, addr_size) != 0 ||
191 		    memcmp(mhip->mhi_saddr, cmhi.mhi_saddr, addr_size) != 0 ||
192 		    mhip->mhi_bindsap != cmhi.mhi_bindsap) {
193 			/*
194 			 * Note that we don't need to restore the padding.
195 			 */
196 			mp->b_rptr -= cmhi.mhi_hdrsize;
197 			break;
198 		}
199 
200 		cvid = VLAN_ID(cmhi.mhi_tci);
201 		cpri = VLAN_PRI(cmhi.mhi_tci);
202 
203 		/*
204 		 * There are several types of packets. Packets don't match
205 		 * if they are classified to different type or if they are
206 		 * VLAN packets but belong to different VLANs:
207 		 *
208 		 * packet type		tagged		vid		pri
209 		 * ---------------------------------------------------------
210 		 * untagged		No		zero		zero
211 		 * VLAN packets		Yes		non-zero	-
212 		 * priority tagged	Yes		zero		non-zero
213 		 * 0 tagged		Yes		zero		zero
214 		 */
215 		if ((mhip->mhi_istagged != cmhi.mhi_istagged) ||
216 		    (vid != cvid) || ((vid == VLAN_ID_NONE) &&
217 		    (((pri == 0) && (cpri != 0)) ||
218 		    ((pri != 0) && (cpri == 0))))) {
219 			mp->b_rptr -= cmhi.mhi_hdrsize;
220 			break;
221 		}
222 
223 		npacket++;
224 	}
225 
226 	/*
227 	 * Break the chain at this point and return a pointer to the next
228 	 * sub-chain.
229 	 */
230 	prevp->b_next = NULL;
231 	*countp = npacket;
232 	return (mp);
233 }
234 
235 static void
236 i_dls_head_hold(dls_head_t *dhp)
237 {
238 	atomic_inc_32(&dhp->dh_ref);
239 }
240 
241 static void
242 i_dls_head_rele(dls_head_t *dhp)
243 {
244 	atomic_dec_32(&dhp->dh_ref);
245 }
246 
247 static dls_head_t *
248 i_dls_head_alloc(mod_hash_key_t key)
249 {
250 	dls_head_t	*dhp;
251 
252 	dhp = kmem_zalloc(sizeof (dls_head_t), KM_SLEEP);
253 	dhp->dh_key = key;
254 	return (dhp);
255 }
256 
257 static void
258 i_dls_head_free(dls_head_t *dhp)
259 {
260 	ASSERT(dhp->dh_ref == 0);
261 	kmem_free(dhp, sizeof (dls_head_t));
262 }
263 
264 /*
265  * Try to send mp up to the streams of the given sap and vid. Return B_TRUE
266  * if this message is sent to any streams.
267  * Note that this function will copy the message chain and the original
268  * mp will remain valid after this function
269  */
270 static uint_t
271 i_dls_link_rx_func(dls_link_t *dlp, mac_resource_handle_t mrh,
272     mac_header_info_t *mhip, mblk_t *mp, uint32_t sap, uint16_t vid,
273     boolean_t (*acceptfunc)())
274 {
275 	mod_hash_t	*hash = dlp->dl_impl_hash;
276 	mod_hash_key_t	key;
277 	dls_head_t	*dhp;
278 	dls_impl_t	*dip;
279 	mblk_t		*nmp;
280 	dls_rx_t	di_rx;
281 	void		*di_rx_arg;
282 	uint_t		naccepted = 0;
283 
284 	/*
285 	 * Construct a hash key from the VLAN identifier and the
286 	 * DLSAP that represents dls_impl_t in promiscuous mode.
287 	 */
288 	key = MAKE_KEY(sap, vid);
289 
290 	/*
291 	 * Search the hash table for dls_impl_t eligible to receive
292 	 * a packet chain for this DLSAP/VLAN combination.
293 	 */
294 	rw_enter(&dlp->dl_impl_lock, RW_READER);
295 	if (mod_hash_find(hash, key, (mod_hash_val_t *)&dhp) != 0) {
296 		rw_exit(&dlp->dl_impl_lock);
297 		return (B_FALSE);
298 	}
299 	i_dls_head_hold(dhp);
300 	rw_exit(&dlp->dl_impl_lock);
301 
302 	/*
303 	 * Find dls_impl_t that will accept the sub-chain.
304 	 */
305 	for (dip = dhp->dh_list; dip != NULL; dip = dip->di_nextp) {
306 		if (!acceptfunc(dip, mhip, &di_rx, &di_rx_arg))
307 			continue;
308 
309 		/*
310 		 * We have at least one acceptor.
311 		 */
312 		naccepted ++;
313 
314 		/*
315 		 * There will normally be at least more dls_impl_t
316 		 * (since we've yet to check for non-promiscuous
317 		 * dls_impl_t) so dup the sub-chain.
318 		 */
319 		if ((nmp = copymsgchain(mp)) != NULL)
320 			di_rx(di_rx_arg, mrh, nmp, mhip);
321 	}
322 
323 	/*
324 	 * Release the hold on the dls_impl_t chain now that we have
325 	 * finished walking it.
326 	 */
327 	i_dls_head_rele(dhp);
328 	return (naccepted);
329 }
330 
331 static void
332 i_dls_link_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp)
333 {
334 	dls_link_t			*dlp = arg;
335 	mod_hash_t			*hash = dlp->dl_impl_hash;
336 	mblk_t				*nextp;
337 	mac_header_info_t		mhi;
338 	dls_head_t			*dhp;
339 	dls_impl_t			*dip;
340 	dls_impl_t			*ndip;
341 	mblk_t				*nmp;
342 	mod_hash_key_t			key;
343 	uint_t				npacket;
344 	boolean_t			accepted;
345 	dls_rx_t			di_rx, ndi_rx;
346 	void				*di_rx_arg, *ndi_rx_arg;
347 	uint16_t			vid;
348 	int				err;
349 
350 	/*
351 	 * Walk the packet chain.
352 	 */
353 	for (; mp != NULL; mp = nextp) {
354 		/*
355 		 * Wipe the accepted state.
356 		 */
357 		accepted = B_FALSE;
358 
359 		DLS_PREPARE_PKT(dlp, mp, &mhi, err);
360 		if (err != 0) {
361 			atomic_add_32(&(dlp->dl_unknowns), 1);
362 			nextp = mp->b_next;
363 			freemsg(mp);
364 			continue;
365 		}
366 
367 		/*
368 		 * Grab the longest sub-chain we can process as a single
369 		 * unit.
370 		 */
371 		nextp = i_dls_link_subchain(dlp, mp, &mhi, &npacket);
372 		ASSERT(npacket != 0);
373 
374 		vid = VLAN_ID(mhi.mhi_tci);
375 
376 		if (mhi.mhi_istagged) {
377 			/*
378 			 * If it is tagged traffic, send it upstream to
379 			 * all dls_impl_t which are attached to the physical
380 			 * link and bound to SAP 0x8100.
381 			 */
382 			if (i_dls_link_rx_func(dlp, mrh, &mhi, mp,
383 			    ETHERTYPE_VLAN, VLAN_ID_NONE, dls_accept) > 0) {
384 				accepted = B_TRUE;
385 			}
386 
387 			/*
388 			 * Don't pass the packets up if they are tagged
389 			 * packets and:
390 			 *  - their VID and priority are both zero (invalid
391 			 *    packets).
392 			 *  - their sap is ETHERTYPE_VLAN and their VID is
393 			 *    zero as they have already been sent upstreams.
394 			 */
395 			if ((vid == VLAN_ID_NONE &&
396 			    VLAN_PRI(mhi.mhi_tci) == 0) ||
397 			    (mhi.mhi_bindsap == ETHERTYPE_VLAN &&
398 			    vid == VLAN_ID_NONE)) {
399 				freemsgchain(mp);
400 				goto loop;
401 			}
402 		}
403 
404 		/*
405 		 * Construct a hash key from the VLAN identifier and the
406 		 * DLSAP.
407 		 */
408 		key = MAKE_KEY(mhi.mhi_bindsap, vid);
409 
410 		/*
411 		 * Search the has table for dls_impl_t eligible to receive
412 		 * a packet chain for this DLSAP/VLAN combination.
413 		 */
414 		rw_enter(&dlp->dl_impl_lock, RW_READER);
415 		if (mod_hash_find(hash, key, (mod_hash_val_t *)&dhp) != 0) {
416 			rw_exit(&dlp->dl_impl_lock);
417 			freemsgchain(mp);
418 			goto loop;
419 		}
420 		i_dls_head_hold(dhp);
421 		rw_exit(&dlp->dl_impl_lock);
422 
423 		/*
424 		 * Find the first dls_impl_t that will accept the sub-chain.
425 		 */
426 		for (dip = dhp->dh_list; dip != NULL; dip = dip->di_nextp)
427 			if (dls_accept(dip, &mhi, &di_rx, &di_rx_arg))
428 				break;
429 
430 		/*
431 		 * If we did not find any dls_impl_t willing to accept the
432 		 * sub-chain then throw it away.
433 		 */
434 		if (dip == NULL) {
435 			i_dls_head_rele(dhp);
436 			freemsgchain(mp);
437 			goto loop;
438 		}
439 
440 		/*
441 		 * We have at least one acceptor.
442 		 */
443 		accepted = B_TRUE;
444 		for (;;) {
445 			/*
446 			 * Find the next dls_impl_t that will accept the
447 			 * sub-chain.
448 			 */
449 			for (ndip = dip->di_nextp; ndip != NULL;
450 			    ndip = ndip->di_nextp)
451 				if (dls_accept(ndip, &mhi, &ndi_rx,
452 				    &ndi_rx_arg))
453 					break;
454 
455 			/*
456 			 * If there are no more dls_impl_t that are willing
457 			 * to accept the sub-chain then we don't need to dup
458 			 * it before handing it to the current one.
459 			 */
460 			if (ndip == NULL) {
461 				di_rx(di_rx_arg, mrh, mp, &mhi);
462 
463 				/*
464 				 * Since there are no more dls_impl_t, we're
465 				 * done.
466 				 */
467 				break;
468 			}
469 
470 			/*
471 			 * There are more dls_impl_t so dup the sub-chain.
472 			 */
473 			if ((nmp = copymsgchain(mp)) != NULL)
474 				di_rx(di_rx_arg, mrh, nmp, &mhi);
475 
476 			dip = ndip;
477 			di_rx = ndi_rx;
478 			di_rx_arg = ndi_rx_arg;
479 		}
480 
481 		/*
482 		 * Release the hold on the dls_impl_t chain now that we have
483 		 * finished walking it.
484 		 */
485 		i_dls_head_rele(dhp);
486 
487 loop:
488 		/*
489 		 * If there were no acceptors then add the packet count to the
490 		 * 'unknown' count.
491 		 */
492 		if (!accepted)
493 			atomic_add_32(&(dlp->dl_unknowns), npacket);
494 	}
495 }
496 
497 /*
498  * Try to send mp up to the DLS_SAP_PROMISC listeners. Return B_TRUE if this
499  * message is sent to any streams.
500  */
501 static uint_t
502 i_dls_link_rx_common_promisc(dls_link_t *dlp, mac_resource_handle_t mrh,
503     mac_header_info_t *mhip, mblk_t *mp, uint16_t vid,
504     boolean_t (*acceptfunc)())
505 {
506 	uint_t naccepted;
507 
508 	naccepted = i_dls_link_rx_func(dlp, mrh, mhip, mp, DLS_SAP_PROMISC,
509 	    vid, acceptfunc);
510 
511 	if (vid != VLAN_ID_NONE) {
512 		naccepted += i_dls_link_rx_func(dlp, mrh, mhip, mp,
513 		    DLS_SAP_PROMISC, VLAN_ID_NONE, acceptfunc);
514 	}
515 	return (naccepted);
516 }
517 
518 static void
519 i_dls_link_rx_common(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
520     boolean_t (*acceptfunc)())
521 {
522 	dls_link_t			*dlp = arg;
523 	mod_hash_t			*hash = dlp->dl_impl_hash;
524 	mblk_t				*nextp;
525 	mac_header_info_t		mhi;
526 	uint16_t			vid, vidkey, pri;
527 	dls_head_t			*dhp;
528 	dls_impl_t			*dip;
529 	mblk_t				*nmp;
530 	mod_hash_key_t			key;
531 	uint_t				npacket;
532 	uint32_t			sap;
533 	boolean_t			accepted;
534 	dls_rx_t			di_rx, fdi_rx;
535 	void				*di_rx_arg, *fdi_rx_arg;
536 	boolean_t			pass2;
537 	int				err;
538 
539 	/*
540 	 * Walk the packet chain.
541 	 */
542 	for (; mp != NULL; mp = nextp) {
543 		/*
544 		 * Wipe the accepted state and the receive information of
545 		 * the first eligible dls_impl_t.
546 		 */
547 		accepted = B_FALSE;
548 		pass2 = B_FALSE;
549 		fdi_rx = NULL;
550 		fdi_rx_arg = NULL;
551 
552 		DLS_PREPARE_PKT(dlp, mp, &mhi, err);
553 		if (err != 0) {
554 			if (acceptfunc == dls_accept)
555 				atomic_add_32(&(dlp->dl_unknowns), 1);
556 			nextp = mp->b_next;
557 			freemsg(mp);
558 			continue;
559 		}
560 
561 		/*
562 		 * Grab the longest sub-chain we can process as a single
563 		 * unit.
564 		 */
565 		nextp = i_dls_link_subchain(dlp, mp, &mhi, &npacket);
566 		ASSERT(npacket != 0);
567 
568 		vid = VLAN_ID(mhi.mhi_tci);
569 		pri = VLAN_PRI(mhi.mhi_tci);
570 
571 		vidkey = vid;
572 
573 		/*
574 		 * Note that we need to first send to the dls_impl_t
575 		 * in promiscuous mode in order to avoid the packet reordering
576 		 * when snooping.
577 		 */
578 		if (i_dls_link_rx_common_promisc(dlp, mrh, &mhi, mp, vidkey,
579 		    acceptfunc) > 0) {
580 			accepted = B_TRUE;
581 		}
582 
583 		/*
584 		 * Non promisc case. Two passes:
585 		 *   1. send tagged packets to ETHERTYPE_VLAN listeners
586 		 *   2. send packets to listeners bound to the specific SAP.
587 		 */
588 		if (mhi.mhi_istagged) {
589 			vidkey = VLAN_ID_NONE;
590 			sap = ETHERTYPE_VLAN;
591 		} else {
592 			goto non_promisc_loop;
593 		}
594 non_promisc:
595 		/*
596 		 * Construct a hash key from the VLAN identifier and the
597 		 * DLSAP.
598 		 */
599 		key = MAKE_KEY(sap, vidkey);
600 
601 		/*
602 		 * Search the has table for dls_impl_t eligible to receive
603 		 * a packet chain for this DLSAP/VLAN combination.
604 		 */
605 		rw_enter(&dlp->dl_impl_lock, RW_READER);
606 		if (mod_hash_find(hash, key, (mod_hash_val_t *)&dhp) != 0) {
607 			rw_exit(&dlp->dl_impl_lock);
608 			goto non_promisc_loop;
609 		}
610 		i_dls_head_hold(dhp);
611 		rw_exit(&dlp->dl_impl_lock);
612 
613 		/*
614 		 * Find the first dls_impl_t that will accept the sub-chain.
615 		 */
616 		for (dip = dhp->dh_list; dip != NULL; dip = dip->di_nextp) {
617 			if (!acceptfunc(dip, &mhi, &di_rx, &di_rx_arg))
618 				continue;
619 
620 			accepted = B_TRUE;
621 
622 			/*
623 			 * To avoid the extra copymsgchain(), if this
624 			 * is the first eligible dls_impl_t, remember required
625 			 * information and send up the message afterwards.
626 			 */
627 			if (fdi_rx == NULL) {
628 				fdi_rx = di_rx;
629 				fdi_rx_arg = di_rx_arg;
630 				continue;
631 			}
632 
633 			if ((nmp = copymsgchain(mp)) != NULL)
634 				di_rx(di_rx_arg, mrh, nmp, &mhi);
635 		}
636 
637 		/*
638 		 * Release the hold on the dls_impl_t chain now that we have
639 		 * finished walking it.
640 		 */
641 		i_dls_head_rele(dhp);
642 
643 non_promisc_loop:
644 		/*
645 		 * Don't pass the packets up again if:
646 		 * - First pass is done and the packets are tagged and their:
647 		 *	- VID and priority are both zero (invalid packets).
648 		 *	- their sap is ETHERTYPE_VLAN and their VID is zero
649 		 *	  (they have already been sent upstreams).
650 		 *  - Second pass is done:
651 		 */
652 		if (pass2 || (mhi.mhi_istagged &&
653 		    ((vid == VLAN_ID_NONE && pri == 0) ||
654 		    (mhi.mhi_bindsap == ETHERTYPE_VLAN &&
655 		    vid == VLAN_ID_NONE)))) {
656 			/*
657 			 * Send the message up to the first eligible dls_impl_t.
658 			 */
659 			if (fdi_rx != NULL)
660 				fdi_rx(fdi_rx_arg, mrh, mp, &mhi);
661 			else
662 				freemsgchain(mp);
663 		} else {
664 			vidkey = vid;
665 			sap = mhi.mhi_bindsap;
666 			pass2 = B_TRUE;
667 			goto non_promisc;
668 		}
669 
670 		/*
671 		 * If there were no acceptors then add the packet count to the
672 		 * 'unknown' count.
673 		 */
674 		if (!accepted && (acceptfunc == dls_accept))
675 			atomic_add_32(&(dlp->dl_unknowns), npacket);
676 	}
677 }
678 
679 static void
680 i_dls_link_rx_promisc(void *arg, mac_resource_handle_t mrh, mblk_t *mp)
681 {
682 	i_dls_link_rx_common(arg, mrh, mp, dls_accept);
683 }
684 
685 static void
686 i_dls_link_txloop(void *arg, mblk_t *mp)
687 {
688 	i_dls_link_rx_common(arg, NULL, mp, dls_accept_loopback);
689 }
690 
691 /*ARGSUSED*/
692 static uint_t
693 i_dls_link_walk(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
694 {
695 	boolean_t	*promiscp = arg;
696 	uint32_t	sap = KEY_SAP(key);
697 
698 	if (sap == DLS_SAP_PROMISC) {
699 		*promiscp = B_TRUE;
700 		return (MH_WALK_TERMINATE);
701 	}
702 
703 	return (MH_WALK_CONTINUE);
704 }
705 
706 static int
707 i_dls_link_create(const char *name, uint_t ddi_instance, dls_link_t **dlpp)
708 {
709 	dls_link_t		*dlp;
710 
711 	/*
712 	 * Allocate a new dls_link_t structure.
713 	 */
714 	dlp = kmem_cache_alloc(i_dls_link_cachep, KM_SLEEP);
715 
716 	/*
717 	 * Name the dls_link_t after the MAC interface it represents.
718 	 */
719 	(void) strlcpy(dlp->dl_name, name, sizeof (dlp->dl_name));
720 	dlp->dl_ddi_instance = ddi_instance;
721 
722 	/*
723 	 * Set the packet loopback function for use when the MAC is in
724 	 * promiscuous mode, and initialize promiscuous bookeeping fields.
725 	 */
726 	dlp->dl_txloop = i_dls_link_txloop;
727 	dlp->dl_npromisc = 0;
728 	dlp->dl_mth = NULL;
729 
730 	*dlpp = dlp;
731 	return (0);
732 }
733 
734 static void
735 i_dls_link_destroy(dls_link_t *dlp)
736 {
737 	ASSERT(dlp->dl_npromisc == 0);
738 	ASSERT(dlp->dl_nactive == 0);
739 	ASSERT(dlp->dl_mth == NULL);
740 	ASSERT(dlp->dl_macref == 0);
741 	ASSERT(dlp->dl_mh == NULL);
742 	ASSERT(dlp->dl_mip == NULL);
743 	ASSERT(dlp->dl_impl_count == 0);
744 	ASSERT(dlp->dl_mrh == NULL);
745 
746 	/*
747 	 * Free the structure back to the cache.
748 	 */
749 	dlp->dl_unknowns = 0;
750 	kmem_cache_free(i_dls_link_cachep, dlp);
751 }
752 
753 /*
754  * Module initialization functions.
755  */
756 
757 void
758 dls_link_init(void)
759 {
760 	/*
761 	 * Create a kmem_cache of dls_link_t structures.
762 	 */
763 	i_dls_link_cachep = kmem_cache_create("dls_link_cache",
764 	    sizeof (dls_link_t), 0, i_dls_link_constructor,
765 	    i_dls_link_destructor, NULL, NULL, NULL, 0);
766 	ASSERT(i_dls_link_cachep != NULL);
767 
768 	/*
769 	 * Create a dls_link_t hash table and associated lock.
770 	 */
771 	i_dls_link_hash = mod_hash_create_extended("dls_link_hash",
772 	    IMPL_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor,
773 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
774 	rw_init(&i_dls_link_lock, NULL, RW_DEFAULT, NULL);
775 	i_dls_link_count = 0;
776 }
777 
778 int
779 dls_link_fini(void)
780 {
781 	if (i_dls_link_count > 0)
782 		return (EBUSY);
783 
784 	/*
785 	 * Destroy the kmem_cache.
786 	 */
787 	kmem_cache_destroy(i_dls_link_cachep);
788 
789 	/*
790 	 * Destroy the hash table and associated lock.
791 	 */
792 	mod_hash_destroy_hash(i_dls_link_hash);
793 	rw_destroy(&i_dls_link_lock);
794 	return (0);
795 }
796 
797 /*
798  * Exported functions.
799  */
800 
801 int
802 dls_link_hold(const char *name, uint_t ddi_instance, dls_link_t **dlpp)
803 {
804 	dls_link_t		*dlp;
805 	int			err;
806 
807 	/*
808 	 * Look up a dls_link_t corresponding to the given mac_handle_t
809 	 * in the global hash table. We need to hold i_dls_link_lock in
810 	 * order to atomically find and insert a dls_link_t into the
811 	 * hash table.
812 	 */
813 	rw_enter(&i_dls_link_lock, RW_WRITER);
814 	if ((err = mod_hash_find(i_dls_link_hash, (mod_hash_key_t)name,
815 	    (mod_hash_val_t *)&dlp)) == 0)
816 		goto done;
817 
818 	ASSERT(err == MH_ERR_NOTFOUND);
819 
820 	/*
821 	 * We didn't find anything so we need to create one.
822 	 */
823 	if ((err = i_dls_link_create(name, ddi_instance, &dlp)) != 0) {
824 		rw_exit(&i_dls_link_lock);
825 		return (err);
826 	}
827 
828 	/*
829 	 * Insert the dls_link_t.
830 	 */
831 	err = mod_hash_insert(i_dls_link_hash, (mod_hash_key_t)name,
832 	    (mod_hash_val_t)dlp);
833 	ASSERT(err == 0);
834 
835 	i_dls_link_count++;
836 	ASSERT(i_dls_link_count != 0);
837 
838 done:
839 	/*
840 	 * Bump the reference count and hand back the reference.
841 	 */
842 	dlp->dl_ref++;
843 	*dlpp = dlp;
844 	rw_exit(&i_dls_link_lock);
845 	return (0);
846 }
847 
848 void
849 dls_link_rele(dls_link_t *dlp)
850 {
851 	mod_hash_val_t	val;
852 
853 	rw_enter(&i_dls_link_lock, RW_WRITER);
854 
855 	/*
856 	 * Check if there are any more references.
857 	 */
858 	if (--dlp->dl_ref != 0) {
859 		/*
860 		 * There are more references so there's nothing more to do.
861 		 */
862 		goto done;
863 	}
864 
865 	(void) mod_hash_remove(i_dls_link_hash,
866 	    (mod_hash_key_t)dlp->dl_name, &val);
867 	ASSERT(dlp == (dls_link_t *)val);
868 
869 	/*
870 	 * Destroy the dls_link_t.
871 	 */
872 	i_dls_link_destroy(dlp);
873 	ASSERT(i_dls_link_count > 0);
874 	i_dls_link_count--;
875 done:
876 	rw_exit(&i_dls_link_lock);
877 }
878 
879 int
880 dls_mac_hold(dls_link_t *dlp)
881 {
882 	int err = 0;
883 
884 	mutex_enter(&dlp->dl_lock);
885 
886 	ASSERT(IMPLY(dlp->dl_macref != 0, dlp->dl_mh != NULL));
887 	ASSERT(IMPLY(dlp->dl_macref == 0, dlp->dl_mh == NULL));
888 
889 	if (dlp->dl_macref == 0) {
890 		/*
891 		 * First reference; hold open the MAC interface.
892 		 */
893 		err = mac_open(dlp->dl_name, dlp->dl_ddi_instance, &dlp->dl_mh);
894 		if (err != 0)
895 			goto done;
896 
897 		dlp->dl_mip = mac_info(dlp->dl_mh);
898 	}
899 
900 	dlp->dl_macref++;
901 done:
902 	mutex_exit(&dlp->dl_lock);
903 	return (err);
904 }
905 
906 void
907 dls_mac_rele(dls_link_t *dlp)
908 {
909 	mutex_enter(&dlp->dl_lock);
910 	ASSERT(dlp->dl_mh != NULL);
911 
912 	if (--dlp->dl_macref == 0) {
913 		mac_close(dlp->dl_mh);
914 		dlp->dl_mh = NULL;
915 		dlp->dl_mip = NULL;
916 	}
917 	mutex_exit(&dlp->dl_lock);
918 }
919 
920 void
921 dls_link_add(dls_link_t *dlp, uint32_t sap, dls_impl_t *dip)
922 {
923 	dls_vlan_t	*dvp = dip->di_dvp;
924 	mod_hash_t	*hash = dlp->dl_impl_hash;
925 	mod_hash_key_t	key;
926 	dls_head_t	*dhp;
927 	dls_impl_t	*p;
928 	mac_rx_t	rx;
929 	int		err;
930 	boolean_t	promisc = B_FALSE;
931 
932 	/*
933 	 * Generate a hash key based on the sap and the VLAN id.
934 	 */
935 	key = MAKE_KEY(sap, dvp->dv_id);
936 
937 	/*
938 	 * We need dl_lock here because we want to be able to walk
939 	 * the hash table *and* set the mac rx func atomically. if
940 	 * these two operations are separate, someone else could
941 	 * insert/remove dls_impl_t from the hash table after we
942 	 * drop the hash lock and this could cause our chosen rx
943 	 * func to be incorrect. note that we cannot call mac_rx_add
944 	 * when holding the hash lock because this can cause deadlock.
945 	 */
946 	mutex_enter(&dlp->dl_lock);
947 
948 	/*
949 	 * Search the table for a list head with this key.
950 	 */
951 	rw_enter(&dlp->dl_impl_lock, RW_WRITER);
952 
953 	if ((err = mod_hash_find(hash, key, (mod_hash_val_t *)&dhp)) != 0) {
954 		ASSERT(err == MH_ERR_NOTFOUND);
955 
956 		dhp = i_dls_head_alloc(key);
957 		err = mod_hash_insert(hash, key, (mod_hash_val_t)dhp);
958 		ASSERT(err == 0);
959 	}
960 
961 	/*
962 	 * Add the dls_impl_t to the head of the list.
963 	 */
964 	ASSERT(dip->di_nextp == NULL);
965 	p = dhp->dh_list;
966 	dip->di_nextp = p;
967 	dhp->dh_list = dip;
968 
969 	/*
970 	 * Save a pointer to the list head.
971 	 */
972 	dip->di_headp = dhp;
973 	dlp->dl_impl_count++;
974 
975 	/*
976 	 * Walk the bound dls_impl_t to see if there are any
977 	 * in promiscuous 'all sap' mode.
978 	 */
979 	mod_hash_walk(hash, i_dls_link_walk, (void *)&promisc);
980 	rw_exit(&dlp->dl_impl_lock);
981 
982 	/*
983 	 * If there are then we need to use a receive routine
984 	 * which will route packets to those dls_impl_t as well
985 	 * as ones bound to the  DLSAP of the packet.
986 	 */
987 	if (promisc)
988 		rx = i_dls_link_rx_promisc;
989 	else
990 		rx = i_dls_link_rx;
991 
992 	/* Replace the existing receive function if there is one. */
993 	if (dlp->dl_mrh != NULL)
994 		mac_rx_remove(dlp->dl_mh, dlp->dl_mrh);
995 	dlp->dl_mrh = mac_rx_add(dlp->dl_mh, rx, (void *)dlp);
996 	mutex_exit(&dlp->dl_lock);
997 }
998 
999 void
1000 dls_link_remove(dls_link_t *dlp, dls_impl_t *dip)
1001 {
1002 	mod_hash_t	*hash = dlp->dl_impl_hash;
1003 	dls_impl_t	**pp;
1004 	dls_impl_t	*p;
1005 	dls_head_t	*dhp;
1006 	mac_rx_t	rx;
1007 
1008 	/*
1009 	 * We need dl_lock here because we want to be able to walk
1010 	 * the hash table *and* set the mac rx func atomically. if
1011 	 * these two operations are separate, someone else could
1012 	 * insert/remove dls_impl_t from the hash table after we
1013 	 * drop the hash lock and this could cause our chosen rx
1014 	 * func to be incorrect. note that we cannot call mac_rx_add
1015 	 * when holding the hash lock because this can cause deadlock.
1016 	 */
1017 	mutex_enter(&dlp->dl_lock);
1018 	rw_enter(&dlp->dl_impl_lock, RW_WRITER);
1019 
1020 	/*
1021 	 * Poll the hash table entry until all references have been dropped.
1022 	 * We need to drop all locks before sleeping because we don't want
1023 	 * the interrupt handler to block. We set di_removing here to
1024 	 * tell the receive callbacks not to pass up packets anymore.
1025 	 * This is only a hint to quicken the decrease of the refcnt so
1026 	 * the assignment need not be protected by any lock.
1027 	 */
1028 	dhp = dip->di_headp;
1029 	dip->di_removing = B_TRUE;
1030 	while (dhp->dh_ref != 0) {
1031 		rw_exit(&dlp->dl_impl_lock);
1032 		mutex_exit(&dlp->dl_lock);
1033 		delay(drv_usectohz(1000));	/* 1ms delay */
1034 		mutex_enter(&dlp->dl_lock);
1035 		rw_enter(&dlp->dl_impl_lock, RW_WRITER);
1036 	}
1037 
1038 	/*
1039 	 * Walk the list and remove the dls_impl_t.
1040 	 */
1041 	for (pp = &dhp->dh_list; (p = *pp) != NULL; pp = &(p->di_nextp)) {
1042 		if (p == dip)
1043 			break;
1044 	}
1045 	ASSERT(p != NULL);
1046 	*pp = p->di_nextp;
1047 	p->di_nextp = NULL;
1048 
1049 	ASSERT(dlp->dl_impl_count > 0);
1050 	dlp->dl_impl_count--;
1051 
1052 	if (dhp->dh_list == NULL) {
1053 		mod_hash_val_t	val = NULL;
1054 
1055 		/*
1056 		 * The list is empty so remove the hash table entry.
1057 		 */
1058 		(void) mod_hash_remove(hash, dhp->dh_key, &val);
1059 		ASSERT(dhp == (dls_head_t *)val);
1060 		i_dls_head_free(dhp);
1061 	}
1062 	dip->di_removing = B_FALSE;
1063 
1064 	/*
1065 	 * If there are no dls_impl_t then there's no need to register a
1066 	 * receive function with the mac.
1067 	 */
1068 	if (dlp->dl_impl_count == 0) {
1069 		rw_exit(&dlp->dl_impl_lock);
1070 		mac_rx_remove(dlp->dl_mh, dlp->dl_mrh);
1071 		dlp->dl_mrh = NULL;
1072 	} else {
1073 		boolean_t promisc = B_FALSE;
1074 
1075 		/*
1076 		 * Walk the bound dls_impl_t to see if there are any
1077 		 * in promiscuous 'all sap' mode.
1078 		 */
1079 		mod_hash_walk(hash, i_dls_link_walk, (void *)&promisc);
1080 		rw_exit(&dlp->dl_impl_lock);
1081 
1082 		/*
1083 		 * If there are then we need to use a receive routine
1084 		 * which will route packets to those dls_impl_t as well
1085 		 * as ones bound to the  DLSAP of the packet.
1086 		 */
1087 		if (promisc)
1088 			rx = i_dls_link_rx_promisc;
1089 		else
1090 			rx = i_dls_link_rx;
1091 
1092 		mac_rx_remove(dlp->dl_mh, dlp->dl_mrh);
1093 		dlp->dl_mrh = mac_rx_add(dlp->dl_mh, rx, (void *)dlp);
1094 	}
1095 	mutex_exit(&dlp->dl_lock);
1096 }
1097 
1098 int
1099 dls_link_header_info(dls_link_t *dlp, mblk_t *mp, mac_header_info_t *mhip)
1100 {
1101 	boolean_t	is_ethernet = (dlp->dl_mip->mi_media == DL_ETHER);
1102 	int		err = 0;
1103 
1104 	/*
1105 	 * Packets should always be at least 16 bit aligned.
1106 	 */
1107 	ASSERT(IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)));
1108 
1109 	if ((err = mac_header_info(dlp->dl_mh, mp, mhip)) != 0)
1110 		return (err);
1111 
1112 	/*
1113 	 * If this is a VLAN-tagged Ethernet packet, then the SAP in the
1114 	 * mac_header_info_t as returned by mac_header_info() is
1115 	 * ETHERTYPE_VLAN. We need to grab the ethertype from the VLAN header.
1116 	 */
1117 	if (is_ethernet && (mhip->mhi_bindsap == ETHERTYPE_VLAN)) {
1118 		struct ether_vlan_header *evhp;
1119 		uint16_t sap;
1120 		mblk_t *tmp = NULL;
1121 		size_t size;
1122 
1123 		size = sizeof (struct ether_vlan_header);
1124 		if (MBLKL(mp) < size) {
1125 			/*
1126 			 * Pullup the message in order to get the MAC header
1127 			 * infomation. Note that this is a read-only function,
1128 			 * we keep the input packet intact.
1129 			 */
1130 			if ((tmp = msgpullup(mp, size)) == NULL)
1131 				return (EINVAL);
1132 
1133 			mp = tmp;
1134 		}
1135 		evhp = (struct ether_vlan_header *)mp->b_rptr;
1136 		sap = ntohs(evhp->ether_type);
1137 		(void) mac_sap_verify(dlp->dl_mh, sap, &mhip->mhi_bindsap);
1138 		mhip->mhi_hdrsize = sizeof (struct ether_vlan_header);
1139 		mhip->mhi_tci = ntohs(evhp->ether_tci);
1140 		mhip->mhi_istagged = B_TRUE;
1141 		freemsg(tmp);
1142 
1143 		if (VLAN_CFI(mhip->mhi_tci) != ETHER_CFI)
1144 			return (EINVAL);
1145 	} else {
1146 		mhip->mhi_istagged = B_FALSE;
1147 		mhip->mhi_tci = 0;
1148 	}
1149 	return (0);
1150 }
1151