xref: /titanic_41/usr/src/uts/common/io/dls/dls_link.c (revision a6d42e7d71324c5193c3b94d57d96ba2925d52e1)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Data-Link Services Module
30  */
31 
32 #include	<sys/types.h>
33 #include	<sys/stream.h>
34 #include	<sys/strsun.h>
35 #include	<sys/strsubr.h>
36 #include	<sys/sysmacros.h>
37 #include	<sys/atomic.h>
38 #include	<sys/modhash.h>
39 #include	<sys/dlpi.h>
40 #include	<sys/ethernet.h>
41 #include	<sys/byteorder.h>
42 #include	<sys/vlan.h>
43 #include	<sys/mac.h>
44 #include	<sys/sdt.h>
45 
46 #include	<sys/dls.h>
47 #include	<sys/dld_impl.h>
48 #include	<sys/dls_impl.h>
49 
50 static kmem_cache_t	*i_dls_link_cachep;
51 static mod_hash_t	*i_dls_link_hash;
52 static uint_t		i_dls_link_count;
53 static krwlock_t	i_dls_link_lock;
54 
55 #define		LINK_HASHSZ	67	/* prime */
56 #define		IMPL_HASHSZ	67	/* prime */
57 
58 /*
59  * Construct a hash key encompassing both DLSAP value and VLAN idenitifier.
60  */
61 #define	MAKE_KEY(_sap, _vid)						\
62 	((mod_hash_key_t)(uintptr_t)					\
63 	(((_sap) << VLAN_ID_SIZE) | (_vid) & VLAN_ID_MASK))
64 
65 /*
66  * Extract the DLSAP value from the hash key.
67  */
68 #define	KEY_SAP(_key)							\
69 	(((uint32_t)(uintptr_t)(_key)) >> VLAN_ID_SIZE)
70 
71 #define	DLS_STRIP_PADDING(pktsize, p) {			\
72 	if (pktsize != 0) {				\
73 		ssize_t delta = pktsize - msgdsize(p);	\
74 							\
75 		if (delta < 0)				\
76 			(void) adjmsg(p, delta);	\
77 	}						\
78 }
79 
80 /*
81  * Private functions.
82  */
83 
84 /*ARGSUSED*/
85 static int
86 i_dls_link_constructor(void *buf, void *arg, int kmflag)
87 {
88 	dls_link_t	*dlp = buf;
89 	char		name[MAXNAMELEN];
90 
91 	bzero(buf, sizeof (dls_link_t));
92 
93 	(void) snprintf(name, MAXNAMELEN, "dls_link_t_%p_hash", buf);
94 	dlp->dl_impl_hash = mod_hash_create_idhash(name, IMPL_HASHSZ,
95 	    mod_hash_null_valdtor);
96 
97 	mutex_init(&dlp->dl_lock, NULL, MUTEX_DEFAULT, NULL);
98 	mutex_init(&dlp->dl_promisc_lock, NULL, MUTEX_DEFAULT, NULL);
99 	rw_init(&dlp->dl_impl_lock, NULL, RW_DEFAULT, NULL);
100 	return (0);
101 }
102 
103 /*ARGSUSED*/
104 static void
105 i_dls_link_destructor(void *buf, void *arg)
106 {
107 	dls_link_t	*dlp = buf;
108 
109 	ASSERT(dlp->dl_ref == 0);
110 	ASSERT(dlp->dl_mh == NULL);
111 	ASSERT(dlp->dl_unknowns == 0);
112 
113 	mod_hash_destroy_idhash(dlp->dl_impl_hash);
114 	dlp->dl_impl_hash = NULL;
115 
116 	mutex_destroy(&dlp->dl_lock);
117 	mutex_destroy(&dlp->dl_promisc_lock);
118 	rw_destroy(&dlp->dl_impl_lock);
119 }
120 
121 /*
122  * - Parse the mac header information of the given packet.
123  * - Strip the padding and skip over the header. Note that because some
124  *   DLS consumers only check the db_ref count of the first mblk, we
125  *   pullup the message into a single mblk. Because the original message
126  *   is freed as the result of message pulling up, dls_link_header_info()
127  *   is called again to update the mhi_saddr and mhi_daddr pointers in the
128  *   mhip. Further, the dls_link_header_info() function ensures that the
129  *   size of the pulled message is greater than the MAC header size,
130  *   therefore we can directly advance b_rptr to point at the payload.
131  *
132  * We choose to use a macro for performance reasons.
133  */
134 #define	DLS_PREPARE_PKT(dlp, mp, mhip, err) {				\
135 	mblk_t *nextp = (mp)->b_next;					\
136 	if (((err) = dls_link_header_info((dlp), (mp), (mhip))) == 0) {	\
137 		DLS_STRIP_PADDING((mhip)->mhi_pktsize, (mp));		\
138 		if (MBLKL((mp)) < (mhip)->mhi_hdrsize) {		\
139 			mblk_t *newmp;					\
140 			if ((newmp = msgpullup((mp), -1)) == NULL) {	\
141 				(err) = EINVAL;				\
142 			} else {					\
143 				(mp)->b_next = NULL;			\
144 				freemsg((mp));				\
145 				(mp) = newmp;				\
146 				VERIFY(dls_link_header_info((dlp),	\
147 				    (mp), (mhip)) == 0);		\
148 				(mp)->b_next = nextp;			\
149 				(mp)->b_rptr += (mhip)->mhi_hdrsize;	\
150 			}						\
151 		} else {						\
152 			(mp)->b_rptr += (mhip)->mhi_hdrsize;		\
153 		}							\
154 	}								\
155 }
156 
157 /*
158  * Truncate the chain starting at mp such that all packets in the chain
159  * have identical source and destination addresses, saps, and tag types
160  * (see below).  It returns a pointer to the mblk following the chain,
161  * NULL if there is no further packet following the processed chain.
162  * The countp argument is set to the number of valid packets in the chain.
163  * Note that the whole MAC header (including the VLAN tag if any) in each
164  * packet will be stripped.
165  */
166 static mblk_t *
167 i_dls_link_subchain(dls_link_t *dlp, mblk_t *mp, const mac_header_info_t *mhip,
168     uint_t *countp)
169 {
170 	mblk_t		*prevp;
171 	uint_t		npacket = 1;
172 	size_t		addr_size = dlp->dl_mip->mi_addr_length;
173 	uint16_t	vid = VLAN_ID(mhip->mhi_tci);
174 	uint16_t	pri = VLAN_PRI(mhip->mhi_tci);
175 
176 	/*
177 	 * Compare with subsequent headers until we find one that has
178 	 * differing header information. After checking each packet
179 	 * strip padding and skip over the header.
180 	 */
181 	for (prevp = mp; (mp = mp->b_next) != NULL; prevp = mp) {
182 		mac_header_info_t cmhi;
183 		uint16_t cvid, cpri;
184 		int err;
185 
186 		DLS_PREPARE_PKT(dlp, mp, &cmhi, err);
187 		if (err != 0)
188 			break;
189 
190 		prevp->b_next = mp;
191 
192 		/*
193 		 * The source, destination, sap, vlan id and the MSGNOLOOP
194 		 * flag must all match in a given subchain.
195 		 */
196 		if (memcmp(mhip->mhi_daddr, cmhi.mhi_daddr, addr_size) != 0 ||
197 		    memcmp(mhip->mhi_saddr, cmhi.mhi_saddr, addr_size) != 0 ||
198 		    mhip->mhi_bindsap != cmhi.mhi_bindsap ||
199 		    mhip->mhi_prom_looped != cmhi.mhi_prom_looped) {
200 			/*
201 			 * Note that we don't need to restore the padding.
202 			 */
203 			mp->b_rptr -= cmhi.mhi_hdrsize;
204 			break;
205 		}
206 
207 		cvid = VLAN_ID(cmhi.mhi_tci);
208 		cpri = VLAN_PRI(cmhi.mhi_tci);
209 
210 		/*
211 		 * There are several types of packets. Packets don't match
212 		 * if they are classified to different type or if they are
213 		 * VLAN packets but belong to different VLANs:
214 		 *
215 		 * packet type		tagged		vid		pri
216 		 * ---------------------------------------------------------
217 		 * untagged		No		zero		zero
218 		 * VLAN packets		Yes		non-zero	-
219 		 * priority tagged	Yes		zero		non-zero
220 		 * 0 tagged		Yes		zero		zero
221 		 */
222 		if ((mhip->mhi_istagged != cmhi.mhi_istagged) ||
223 		    (vid != cvid) || ((vid == VLAN_ID_NONE) &&
224 		    (((pri == 0) && (cpri != 0)) ||
225 		    ((pri != 0) && (cpri == 0))))) {
226 			mp->b_rptr -= cmhi.mhi_hdrsize;
227 			break;
228 		}
229 
230 		npacket++;
231 	}
232 
233 	/*
234 	 * Break the chain at this point and return a pointer to the next
235 	 * sub-chain.
236 	 */
237 	prevp->b_next = NULL;
238 	*countp = npacket;
239 	return (mp);
240 }
241 
242 static void
243 i_dls_head_hold(dls_head_t *dhp)
244 {
245 	atomic_inc_32(&dhp->dh_ref);
246 }
247 
248 static void
249 i_dls_head_rele(dls_head_t *dhp)
250 {
251 	atomic_dec_32(&dhp->dh_ref);
252 }
253 
254 static dls_head_t *
255 i_dls_head_alloc(mod_hash_key_t key)
256 {
257 	dls_head_t	*dhp;
258 
259 	dhp = kmem_zalloc(sizeof (dls_head_t), KM_SLEEP);
260 	dhp->dh_key = key;
261 	return (dhp);
262 }
263 
264 static void
265 i_dls_head_free(dls_head_t *dhp)
266 {
267 	ASSERT(dhp->dh_ref == 0);
268 	kmem_free(dhp, sizeof (dls_head_t));
269 }
270 
271 /*
272  * Try to send mp up to the streams of the given sap and vid. Return B_TRUE
273  * if this message is sent to any streams.
274  * Note that this function will copy the message chain and the original
275  * mp will remain valid after this function
276  */
277 static uint_t
278 i_dls_link_rx_func(dls_link_t *dlp, mac_resource_handle_t mrh,
279     mac_header_info_t *mhip, mblk_t *mp, uint32_t sap, uint16_t vid,
280     boolean_t (*acceptfunc)())
281 {
282 	mod_hash_t	*hash = dlp->dl_impl_hash;
283 	mod_hash_key_t	key;
284 	dls_head_t	*dhp;
285 	dls_impl_t	*dip;
286 	mblk_t		*nmp;
287 	dls_rx_t	di_rx;
288 	void		*di_rx_arg;
289 	uint_t		naccepted = 0;
290 
291 	/*
292 	 * Construct a hash key from the VLAN identifier and the
293 	 * DLSAP that represents dls_impl_t in promiscuous mode.
294 	 */
295 	key = MAKE_KEY(sap, vid);
296 
297 	/*
298 	 * Search the hash table for dls_impl_t eligible to receive
299 	 * a packet chain for this DLSAP/VLAN combination.
300 	 */
301 	rw_enter(&dlp->dl_impl_lock, RW_READER);
302 	if (mod_hash_find(hash, key, (mod_hash_val_t *)&dhp) != 0) {
303 		rw_exit(&dlp->dl_impl_lock);
304 		return (B_FALSE);
305 	}
306 	i_dls_head_hold(dhp);
307 	rw_exit(&dlp->dl_impl_lock);
308 
309 	/*
310 	 * Find dls_impl_t that will accept the sub-chain.
311 	 */
312 	for (dip = dhp->dh_list; dip != NULL; dip = dip->di_nextp) {
313 		if (!acceptfunc(dip, mhip, &di_rx, &di_rx_arg))
314 			continue;
315 
316 		/*
317 		 * We have at least one acceptor.
318 		 */
319 		naccepted ++;
320 
321 		/*
322 		 * There will normally be at least more dls_impl_t
323 		 * (since we've yet to check for non-promiscuous
324 		 * dls_impl_t) so dup the sub-chain.
325 		 */
326 		if ((nmp = copymsgchain(mp)) != NULL)
327 			di_rx(di_rx_arg, mrh, nmp, mhip);
328 	}
329 
330 	/*
331 	 * Release the hold on the dls_impl_t chain now that we have
332 	 * finished walking it.
333 	 */
334 	i_dls_head_rele(dhp);
335 	return (naccepted);
336 }
337 
338 static void
339 i_dls_link_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp)
340 {
341 	dls_link_t			*dlp = arg;
342 	mod_hash_t			*hash = dlp->dl_impl_hash;
343 	mblk_t				*nextp;
344 	mac_header_info_t		mhi;
345 	dls_head_t			*dhp;
346 	dls_impl_t			*dip;
347 	dls_impl_t			*ndip;
348 	mblk_t				*nmp;
349 	mod_hash_key_t			key;
350 	uint_t				npacket;
351 	boolean_t			accepted;
352 	dls_rx_t			di_rx, ndi_rx;
353 	void				*di_rx_arg, *ndi_rx_arg;
354 	uint16_t			vid;
355 	int				err;
356 
357 	/*
358 	 * Walk the packet chain.
359 	 */
360 	for (; mp != NULL; mp = nextp) {
361 		/*
362 		 * Wipe the accepted state.
363 		 */
364 		accepted = B_FALSE;
365 
366 		DLS_PREPARE_PKT(dlp, mp, &mhi, err);
367 		if (err != 0) {
368 			atomic_add_32(&(dlp->dl_unknowns), 1);
369 			nextp = mp->b_next;
370 			mp->b_next = NULL;
371 			freemsg(mp);
372 			continue;
373 		}
374 
375 		/*
376 		 * Grab the longest sub-chain we can process as a single
377 		 * unit.
378 		 */
379 		nextp = i_dls_link_subchain(dlp, mp, &mhi, &npacket);
380 		ASSERT(npacket != 0);
381 
382 		vid = VLAN_ID(mhi.mhi_tci);
383 
384 		if (mhi.mhi_istagged) {
385 			/*
386 			 * If it is tagged traffic, send it upstream to
387 			 * all dls_impl_t which are attached to the physical
388 			 * link and bound to SAP 0x8100.
389 			 */
390 			if (i_dls_link_rx_func(dlp, mrh, &mhi, mp,
391 			    ETHERTYPE_VLAN, VLAN_ID_NONE, dls_accept) > 0) {
392 				accepted = B_TRUE;
393 			}
394 
395 			/*
396 			 * Don't pass the packets up if they are tagged
397 			 * packets and:
398 			 *  - their VID and priority are both zero (invalid
399 			 *    packets).
400 			 *  - their sap is ETHERTYPE_VLAN and their VID is
401 			 *    zero as they have already been sent upstreams.
402 			 */
403 			if ((vid == VLAN_ID_NONE &&
404 			    VLAN_PRI(mhi.mhi_tci) == 0) ||
405 			    (mhi.mhi_bindsap == ETHERTYPE_VLAN &&
406 			    vid == VLAN_ID_NONE)) {
407 				freemsgchain(mp);
408 				goto loop;
409 			}
410 		}
411 
412 		/*
413 		 * Construct a hash key from the VLAN identifier and the
414 		 * DLSAP.
415 		 */
416 		key = MAKE_KEY(mhi.mhi_bindsap, vid);
417 
418 		/*
419 		 * Search the has table for dls_impl_t eligible to receive
420 		 * a packet chain for this DLSAP/VLAN combination.
421 		 */
422 		rw_enter(&dlp->dl_impl_lock, RW_READER);
423 		if (mod_hash_find(hash, key, (mod_hash_val_t *)&dhp) != 0) {
424 			rw_exit(&dlp->dl_impl_lock);
425 			freemsgchain(mp);
426 			goto loop;
427 		}
428 		i_dls_head_hold(dhp);
429 		rw_exit(&dlp->dl_impl_lock);
430 
431 		/*
432 		 * Find the first dls_impl_t that will accept the sub-chain.
433 		 */
434 		for (dip = dhp->dh_list; dip != NULL; dip = dip->di_nextp)
435 			if (dls_accept(dip, &mhi, &di_rx, &di_rx_arg))
436 				break;
437 
438 		/*
439 		 * If we did not find any dls_impl_t willing to accept the
440 		 * sub-chain then throw it away.
441 		 */
442 		if (dip == NULL) {
443 			i_dls_head_rele(dhp);
444 			freemsgchain(mp);
445 			goto loop;
446 		}
447 
448 		/*
449 		 * We have at least one acceptor.
450 		 */
451 		accepted = B_TRUE;
452 		for (;;) {
453 			/*
454 			 * Find the next dls_impl_t that will accept the
455 			 * sub-chain.
456 			 */
457 			for (ndip = dip->di_nextp; ndip != NULL;
458 			    ndip = ndip->di_nextp)
459 				if (dls_accept(ndip, &mhi, &ndi_rx,
460 				    &ndi_rx_arg))
461 					break;
462 
463 			/*
464 			 * If there are no more dls_impl_t that are willing
465 			 * to accept the sub-chain then we don't need to dup
466 			 * it before handing it to the current one.
467 			 */
468 			if (ndip == NULL) {
469 				di_rx(di_rx_arg, mrh, mp, &mhi);
470 
471 				/*
472 				 * Since there are no more dls_impl_t, we're
473 				 * done.
474 				 */
475 				break;
476 			}
477 
478 			/*
479 			 * There are more dls_impl_t so dup the sub-chain.
480 			 */
481 			if ((nmp = copymsgchain(mp)) != NULL)
482 				di_rx(di_rx_arg, mrh, nmp, &mhi);
483 
484 			dip = ndip;
485 			di_rx = ndi_rx;
486 			di_rx_arg = ndi_rx_arg;
487 		}
488 
489 		/*
490 		 * Release the hold on the dls_impl_t chain now that we have
491 		 * finished walking it.
492 		 */
493 		i_dls_head_rele(dhp);
494 
495 loop:
496 		/*
497 		 * If there were no acceptors then add the packet count to the
498 		 * 'unknown' count.
499 		 */
500 		if (!accepted)
501 			atomic_add_32(&(dlp->dl_unknowns), npacket);
502 	}
503 }
504 
505 /*
506  * Try to send mp up to the DLS_SAP_PROMISC listeners. Return B_TRUE if this
507  * message is sent to any streams.
508  */
509 static uint_t
510 i_dls_link_rx_common_promisc(dls_link_t *dlp, mac_resource_handle_t mrh,
511     mac_header_info_t *mhip, mblk_t *mp, uint16_t vid,
512     boolean_t (*acceptfunc)())
513 {
514 	uint_t naccepted;
515 
516 	naccepted = i_dls_link_rx_func(dlp, mrh, mhip, mp, DLS_SAP_PROMISC,
517 	    vid, acceptfunc);
518 
519 	if (vid != VLAN_ID_NONE) {
520 		naccepted += i_dls_link_rx_func(dlp, mrh, mhip, mp,
521 		    DLS_SAP_PROMISC, VLAN_ID_NONE, acceptfunc);
522 	}
523 	return (naccepted);
524 }
525 
526 static void
527 i_dls_link_rx_common(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
528     boolean_t (*acceptfunc)())
529 {
530 	dls_link_t			*dlp = arg;
531 	mod_hash_t			*hash = dlp->dl_impl_hash;
532 	mblk_t				*nextp;
533 	mac_header_info_t		mhi;
534 	uint16_t			vid, vidkey, pri;
535 	dls_head_t			*dhp;
536 	dls_impl_t			*dip;
537 	mblk_t				*nmp;
538 	mod_hash_key_t			key;
539 	uint_t				npacket;
540 	uint32_t			sap;
541 	boolean_t			accepted;
542 	dls_rx_t			di_rx, fdi_rx;
543 	void				*di_rx_arg, *fdi_rx_arg;
544 	boolean_t			pass2;
545 	int				err;
546 
547 	/*
548 	 * Walk the packet chain.
549 	 */
550 	for (; mp != NULL; mp = nextp) {
551 		/*
552 		 * Wipe the accepted state and the receive information of
553 		 * the first eligible dls_impl_t.
554 		 */
555 		accepted = B_FALSE;
556 		pass2 = B_FALSE;
557 		fdi_rx = NULL;
558 		fdi_rx_arg = NULL;
559 
560 		DLS_PREPARE_PKT(dlp, mp, &mhi, err);
561 		if (err != 0) {
562 			if (acceptfunc == dls_accept)
563 				atomic_add_32(&(dlp->dl_unknowns), 1);
564 			nextp = mp->b_next;
565 			mp->b_next = NULL;
566 			freemsg(mp);
567 			continue;
568 		}
569 
570 		/*
571 		 * Grab the longest sub-chain we can process as a single
572 		 * unit.
573 		 */
574 		nextp = i_dls_link_subchain(dlp, mp, &mhi, &npacket);
575 		ASSERT(npacket != 0);
576 
577 		vid = VLAN_ID(mhi.mhi_tci);
578 		pri = VLAN_PRI(mhi.mhi_tci);
579 
580 		vidkey = vid;
581 
582 		/*
583 		 * Note that we need to first send to the dls_impl_t
584 		 * in promiscuous mode in order to avoid the packet reordering
585 		 * when snooping.
586 		 */
587 		if (i_dls_link_rx_common_promisc(dlp, mrh, &mhi, mp, vidkey,
588 		    acceptfunc) > 0) {
589 			accepted = B_TRUE;
590 		}
591 
592 		/*
593 		 * Non promisc case. Two passes:
594 		 *   1. send tagged packets to ETHERTYPE_VLAN listeners
595 		 *   2. send packets to listeners bound to the specific SAP.
596 		 */
597 		if (mhi.mhi_istagged) {
598 			vidkey = VLAN_ID_NONE;
599 			sap = ETHERTYPE_VLAN;
600 		} else {
601 			goto non_promisc_loop;
602 		}
603 non_promisc:
604 		/*
605 		 * Construct a hash key from the VLAN identifier and the
606 		 * DLSAP.
607 		 */
608 		key = MAKE_KEY(sap, vidkey);
609 
610 		/*
611 		 * Search the has table for dls_impl_t eligible to receive
612 		 * a packet chain for this DLSAP/VLAN combination.
613 		 */
614 		rw_enter(&dlp->dl_impl_lock, RW_READER);
615 		if (mod_hash_find(hash, key, (mod_hash_val_t *)&dhp) != 0) {
616 			rw_exit(&dlp->dl_impl_lock);
617 			goto non_promisc_loop;
618 		}
619 		i_dls_head_hold(dhp);
620 		rw_exit(&dlp->dl_impl_lock);
621 
622 		/*
623 		 * Find the first dls_impl_t that will accept the sub-chain.
624 		 */
625 		for (dip = dhp->dh_list; dip != NULL; dip = dip->di_nextp) {
626 			if (!acceptfunc(dip, &mhi, &di_rx, &di_rx_arg))
627 				continue;
628 
629 			accepted = B_TRUE;
630 
631 			/*
632 			 * To avoid the extra copymsgchain(), if this
633 			 * is the first eligible dls_impl_t, remember required
634 			 * information and send up the message afterwards.
635 			 */
636 			if (fdi_rx == NULL) {
637 				fdi_rx = di_rx;
638 				fdi_rx_arg = di_rx_arg;
639 				continue;
640 			}
641 
642 			if ((nmp = copymsgchain(mp)) != NULL)
643 				di_rx(di_rx_arg, mrh, nmp, &mhi);
644 		}
645 
646 		/*
647 		 * Release the hold on the dls_impl_t chain now that we have
648 		 * finished walking it.
649 		 */
650 		i_dls_head_rele(dhp);
651 
652 non_promisc_loop:
653 		/*
654 		 * Don't pass the packets up again if:
655 		 * - First pass is done and the packets are tagged and their:
656 		 *	- VID and priority are both zero (invalid packets).
657 		 *	- their sap is ETHERTYPE_VLAN and their VID is zero
658 		 *	  (they have already been sent upstreams).
659 		 *  - Second pass is done:
660 		 */
661 		if (pass2 || (mhi.mhi_istagged &&
662 		    ((vid == VLAN_ID_NONE && pri == 0) ||
663 		    (mhi.mhi_bindsap == ETHERTYPE_VLAN &&
664 		    vid == VLAN_ID_NONE)))) {
665 			/*
666 			 * Send the message up to the first eligible dls_impl_t.
667 			 */
668 			if (fdi_rx != NULL)
669 				fdi_rx(fdi_rx_arg, mrh, mp, &mhi);
670 			else
671 				freemsgchain(mp);
672 		} else {
673 			vidkey = vid;
674 			sap = mhi.mhi_bindsap;
675 			pass2 = B_TRUE;
676 			goto non_promisc;
677 		}
678 
679 		/*
680 		 * If there were no acceptors then add the packet count to the
681 		 * 'unknown' count.
682 		 */
683 		if (!accepted && (acceptfunc == dls_accept))
684 			atomic_add_32(&(dlp->dl_unknowns), npacket);
685 	}
686 }
687 
688 static void
689 i_dls_link_rx_promisc(void *arg, mac_resource_handle_t mrh, mblk_t *mp)
690 {
691 	i_dls_link_rx_common(arg, mrh, mp, dls_accept);
692 }
693 
694 void
695 dls_link_txloop(void *arg, mblk_t *mp)
696 {
697 	i_dls_link_rx_common(arg, NULL, mp, dls_accept_loopback);
698 }
699 
700 /*ARGSUSED*/
701 static uint_t
702 i_dls_link_walk(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
703 {
704 	boolean_t 	*promiscp = arg;
705 	uint32_t	sap = KEY_SAP(key);
706 
707 	if (sap == DLS_SAP_PROMISC) {
708 		*promiscp = B_TRUE;
709 		return (MH_WALK_TERMINATE);
710 	}
711 
712 	return (MH_WALK_CONTINUE);
713 }
714 
715 static int
716 i_dls_link_create(const char *name, dls_link_t **dlpp)
717 {
718 	dls_link_t		*dlp;
719 
720 	/*
721 	 * Allocate a new dls_link_t structure.
722 	 */
723 	dlp = kmem_cache_alloc(i_dls_link_cachep, KM_SLEEP);
724 
725 	/*
726 	 * Name the dls_link_t after the MAC interface it represents.
727 	 */
728 	(void) strlcpy(dlp->dl_name, name, sizeof (dlp->dl_name));
729 
730 	/*
731 	 * Initialize promiscuous bookkeeping fields.
732 	 */
733 	dlp->dl_npromisc = 0;
734 	dlp->dl_mth = NULL;
735 
736 	*dlpp = dlp;
737 	return (0);
738 }
739 
740 static void
741 i_dls_link_destroy(dls_link_t *dlp)
742 {
743 	ASSERT(dlp->dl_npromisc == 0);
744 	ASSERT(dlp->dl_nactive == 0);
745 	ASSERT(dlp->dl_mth == NULL);
746 	ASSERT(dlp->dl_macref == 0);
747 	ASSERT(dlp->dl_mh == NULL);
748 	ASSERT(dlp->dl_mip == NULL);
749 	ASSERT(dlp->dl_impl_count == 0);
750 	ASSERT(dlp->dl_mrh == NULL);
751 
752 	/*
753 	 * Free the structure back to the cache.
754 	 */
755 	dlp->dl_unknowns = 0;
756 	kmem_cache_free(i_dls_link_cachep, dlp);
757 }
758 
759 /*
760  * Module initialization functions.
761  */
762 
763 void
764 dls_link_init(void)
765 {
766 	/*
767 	 * Create a kmem_cache of dls_link_t structures.
768 	 */
769 	i_dls_link_cachep = kmem_cache_create("dls_link_cache",
770 	    sizeof (dls_link_t), 0, i_dls_link_constructor,
771 	    i_dls_link_destructor, NULL, NULL, NULL, 0);
772 	ASSERT(i_dls_link_cachep != NULL);
773 
774 	/*
775 	 * Create a dls_link_t hash table and associated lock.
776 	 */
777 	i_dls_link_hash = mod_hash_create_extended("dls_link_hash",
778 	    IMPL_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor,
779 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
780 	rw_init(&i_dls_link_lock, NULL, RW_DEFAULT, NULL);
781 	i_dls_link_count = 0;
782 }
783 
784 int
785 dls_link_fini(void)
786 {
787 	if (i_dls_link_count > 0)
788 		return (EBUSY);
789 
790 	/*
791 	 * Destroy the kmem_cache.
792 	 */
793 	kmem_cache_destroy(i_dls_link_cachep);
794 
795 	/*
796 	 * Destroy the hash table and associated lock.
797 	 */
798 	mod_hash_destroy_hash(i_dls_link_hash);
799 	rw_destroy(&i_dls_link_lock);
800 	return (0);
801 }
802 
803 /*
804  * Exported functions.
805  */
806 
807 int
808 dls_link_hold(const char *name, dls_link_t **dlpp)
809 {
810 	dls_link_t		*dlp;
811 	int			err;
812 
813 	/*
814 	 * Look up a dls_link_t corresponding to the given mac_handle_t
815 	 * in the global hash table. We need to hold i_dls_link_lock in
816 	 * order to atomically find and insert a dls_link_t into the
817 	 * hash table.
818 	 */
819 	rw_enter(&i_dls_link_lock, RW_WRITER);
820 	if ((err = mod_hash_find(i_dls_link_hash, (mod_hash_key_t)name,
821 	    (mod_hash_val_t *)&dlp)) == 0)
822 		goto done;
823 
824 	ASSERT(err == MH_ERR_NOTFOUND);
825 
826 	/*
827 	 * We didn't find anything so we need to create one.
828 	 */
829 	if ((err = i_dls_link_create(name, &dlp)) != 0) {
830 		rw_exit(&i_dls_link_lock);
831 		return (err);
832 	}
833 
834 	/*
835 	 * Insert the dls_link_t.
836 	 */
837 	err = mod_hash_insert(i_dls_link_hash, (mod_hash_key_t)dlp->dl_name,
838 	    (mod_hash_val_t)dlp);
839 	ASSERT(err == 0);
840 
841 	i_dls_link_count++;
842 	ASSERT(i_dls_link_count != 0);
843 
844 done:
845 
846 	/*
847 	 * Bump the reference count and hand back the reference.
848 	 */
849 	dlp->dl_ref++;
850 	*dlpp = dlp;
851 	rw_exit(&i_dls_link_lock);
852 	return (0);
853 }
854 
855 void
856 dls_link_rele(dls_link_t *dlp)
857 {
858 	mod_hash_val_t	val;
859 
860 	rw_enter(&i_dls_link_lock, RW_WRITER);
861 
862 	/*
863 	 * Check if there are any more references.
864 	 */
865 	if (--dlp->dl_ref != 0) {
866 		/*
867 		 * There are more references so there's nothing more to do.
868 		 */
869 		goto done;
870 	}
871 
872 	(void) mod_hash_remove(i_dls_link_hash,
873 	    (mod_hash_key_t)dlp->dl_name, &val);
874 	ASSERT(dlp == (dls_link_t *)val);
875 
876 	/*
877 	 * Destroy the dls_link_t.
878 	 */
879 	i_dls_link_destroy(dlp);
880 	ASSERT(i_dls_link_count > 0);
881 	i_dls_link_count--;
882 done:
883 	rw_exit(&i_dls_link_lock);
884 }
885 
886 int
887 dls_mac_hold(dls_link_t *dlp)
888 {
889 	mac_handle_t mh;
890 	int err = 0;
891 
892 	err = mac_open(dlp->dl_name, &mh);
893 
894 	mutex_enter(&dlp->dl_lock);
895 
896 	ASSERT(IMPLY(dlp->dl_macref != 0, dlp->dl_mh != NULL));
897 	ASSERT(IMPLY(dlp->dl_macref == 0, dlp->dl_mh == NULL));
898 	if (err == 0) {
899 		ASSERT(dlp->dl_mh == NULL || dlp->dl_mh == mh);
900 		if (dlp->dl_mh == NULL) {
901 			dlp->dl_mh = mh;
902 			dlp->dl_mip = mac_info(mh);
903 		}
904 		dlp->dl_macref++;
905 	}
906 
907 	mutex_exit(&dlp->dl_lock);
908 	return (err);
909 }
910 
911 void
912 dls_mac_rele(dls_link_t *dlp)
913 {
914 	mutex_enter(&dlp->dl_lock);
915 	ASSERT(dlp->dl_mh != NULL);
916 
917 	mac_close(dlp->dl_mh);
918 
919 	if (--dlp->dl_macref == 0) {
920 		dlp->dl_mh = NULL;
921 		dlp->dl_mip = NULL;
922 	}
923 	mutex_exit(&dlp->dl_lock);
924 }
925 
926 void
927 dls_link_add(dls_link_t *dlp, uint32_t sap, dls_impl_t *dip)
928 {
929 	dls_vlan_t	*dvp = dip->di_dvp;
930 	mod_hash_t	*hash = dlp->dl_impl_hash;
931 	mod_hash_key_t	key;
932 	dls_head_t	*dhp;
933 	dls_impl_t	*p;
934 	mac_rx_t	rx;
935 	int		err;
936 	boolean_t	promisc = B_FALSE;
937 
938 	/*
939 	 * Generate a hash key based on the sap and the VLAN id.
940 	 */
941 	key = MAKE_KEY(sap, dvp->dv_id);
942 
943 	/*
944 	 * We need dl_lock here because we want to be able to walk
945 	 * the hash table *and* set the mac rx func atomically. if
946 	 * these two operations are separate, someone else could
947 	 * insert/remove dls_impl_t from the hash table after we
948 	 * drop the hash lock and this could cause our chosen rx
949 	 * func to be incorrect. note that we cannot call mac_rx_add
950 	 * when holding the hash lock because this can cause deadlock.
951 	 */
952 	mutex_enter(&dlp->dl_lock);
953 
954 	/*
955 	 * Search the table for a list head with this key.
956 	 */
957 	rw_enter(&dlp->dl_impl_lock, RW_WRITER);
958 
959 	if ((err = mod_hash_find(hash, key, (mod_hash_val_t *)&dhp)) != 0) {
960 		ASSERT(err == MH_ERR_NOTFOUND);
961 
962 		dhp = i_dls_head_alloc(key);
963 		err = mod_hash_insert(hash, key, (mod_hash_val_t)dhp);
964 		ASSERT(err == 0);
965 	}
966 
967 	/*
968 	 * Add the dls_impl_t to the head of the list.
969 	 */
970 	ASSERT(dip->di_nextp == NULL);
971 	p = dhp->dh_list;
972 	dip->di_nextp = p;
973 	dhp->dh_list = dip;
974 
975 	/*
976 	 * Save a pointer to the list head.
977 	 */
978 	dip->di_headp = dhp;
979 	dlp->dl_impl_count++;
980 
981 	/*
982 	 * Walk the bound dls_impl_t to see if there are any
983 	 * in promiscuous 'all sap' mode.
984 	 */
985 	mod_hash_walk(hash, i_dls_link_walk, (void *)&promisc);
986 	rw_exit(&dlp->dl_impl_lock);
987 
988 	/*
989 	 * If there are then we need to use a receive routine
990 	 * which will route packets to those dls_impl_t as well
991 	 * as ones bound to the  DLSAP of the packet.
992 	 */
993 	if (promisc)
994 		rx = i_dls_link_rx_promisc;
995 	else
996 		rx = i_dls_link_rx;
997 
998 	/* Replace the existing receive function if there is one. */
999 	if (dlp->dl_mrh != NULL)
1000 		mac_rx_remove(dlp->dl_mh, dlp->dl_mrh, B_TRUE);
1001 	dlp->dl_mrh = mac_active_rx_add(dlp->dl_mh, rx, (void *)dlp);
1002 	mutex_exit(&dlp->dl_lock);
1003 }
1004 
1005 void
1006 dls_link_remove(dls_link_t *dlp, dls_impl_t *dip)
1007 {
1008 	mod_hash_t	*hash = dlp->dl_impl_hash;
1009 	dls_impl_t	**pp;
1010 	dls_impl_t	*p;
1011 	dls_head_t	*dhp;
1012 	mac_rx_t	rx;
1013 
1014 	/*
1015 	 * We need dl_lock here because we want to be able to walk
1016 	 * the hash table *and* set the mac rx func atomically. if
1017 	 * these two operations are separate, someone else could
1018 	 * insert/remove dls_impl_t from the hash table after we
1019 	 * drop the hash lock and this could cause our chosen rx
1020 	 * func to be incorrect. note that we cannot call mac_rx_add
1021 	 * when holding the hash lock because this can cause deadlock.
1022 	 */
1023 	mutex_enter(&dlp->dl_lock);
1024 	rw_enter(&dlp->dl_impl_lock, RW_WRITER);
1025 
1026 	/*
1027 	 * Poll the hash table entry until all references have been dropped.
1028 	 * We need to drop all locks before sleeping because we don't want
1029 	 * the interrupt handler to block. We set di_removing here to
1030 	 * tell the receive callbacks not to pass up packets anymore.
1031 	 * This is only a hint to quicken the decrease of the refcnt so
1032 	 * the assignment need not be protected by any lock.
1033 	 */
1034 	dhp = dip->di_headp;
1035 	dip->di_removing = B_TRUE;
1036 	while (dhp->dh_ref != 0) {
1037 		rw_exit(&dlp->dl_impl_lock);
1038 		mutex_exit(&dlp->dl_lock);
1039 		delay(drv_usectohz(1000));	/* 1ms delay */
1040 		mutex_enter(&dlp->dl_lock);
1041 		rw_enter(&dlp->dl_impl_lock, RW_WRITER);
1042 	}
1043 
1044 	/*
1045 	 * Walk the list and remove the dls_impl_t.
1046 	 */
1047 	for (pp = &dhp->dh_list; (p = *pp) != NULL; pp = &(p->di_nextp)) {
1048 		if (p == dip)
1049 			break;
1050 	}
1051 	ASSERT(p != NULL);
1052 	*pp = p->di_nextp;
1053 	p->di_nextp = NULL;
1054 
1055 	ASSERT(dlp->dl_impl_count > 0);
1056 	dlp->dl_impl_count--;
1057 
1058 	if (dhp->dh_list == NULL) {
1059 		mod_hash_val_t	val = NULL;
1060 
1061 		/*
1062 		 * The list is empty so remove the hash table entry.
1063 		 */
1064 		(void) mod_hash_remove(hash, dhp->dh_key, &val);
1065 		ASSERT(dhp == (dls_head_t *)val);
1066 		i_dls_head_free(dhp);
1067 	}
1068 	dip->di_removing = B_FALSE;
1069 
1070 	/*
1071 	 * If there are no dls_impl_t then there's no need to register a
1072 	 * receive function with the mac.
1073 	 */
1074 	if (dlp->dl_impl_count == 0) {
1075 		rw_exit(&dlp->dl_impl_lock);
1076 		mac_rx_remove(dlp->dl_mh, dlp->dl_mrh, B_TRUE);
1077 		dlp->dl_mrh = NULL;
1078 	} else {
1079 		boolean_t promisc = B_FALSE;
1080 
1081 		/*
1082 		 * Walk the bound dls_impl_t to see if there are any
1083 		 * in promiscuous 'all sap' mode.
1084 		 */
1085 		mod_hash_walk(hash, i_dls_link_walk, (void *)&promisc);
1086 		rw_exit(&dlp->dl_impl_lock);
1087 
1088 		/*
1089 		 * If there are then we need to use a receive routine
1090 		 * which will route packets to those dls_impl_t as well
1091 		 * as ones bound to the  DLSAP of the packet.
1092 		 */
1093 		if (promisc)
1094 			rx = i_dls_link_rx_promisc;
1095 		else
1096 			rx = i_dls_link_rx;
1097 
1098 		mac_rx_remove(dlp->dl_mh, dlp->dl_mrh, B_TRUE);
1099 		dlp->dl_mrh = mac_active_rx_add(dlp->dl_mh, rx, (void *)dlp);
1100 	}
1101 	mutex_exit(&dlp->dl_lock);
1102 }
1103 
1104 int
1105 dls_link_header_info(dls_link_t *dlp, mblk_t *mp, mac_header_info_t *mhip)
1106 {
1107 	boolean_t	is_ethernet = (dlp->dl_mip->mi_media == DL_ETHER);
1108 	int		err = 0;
1109 
1110 	/*
1111 	 * Packets should always be at least 16 bit aligned.
1112 	 */
1113 	ASSERT(IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)));
1114 
1115 	if ((err = mac_header_info(dlp->dl_mh, mp, mhip)) != 0)
1116 		return (err);
1117 
1118 	/*
1119 	 * If this is a VLAN-tagged Ethernet packet, then the SAP in the
1120 	 * mac_header_info_t as returned by mac_header_info() is
1121 	 * ETHERTYPE_VLAN. We need to grab the ethertype from the VLAN header.
1122 	 */
1123 	if (is_ethernet && (mhip->mhi_bindsap == ETHERTYPE_VLAN)) {
1124 		struct ether_vlan_header *evhp;
1125 		uint16_t sap;
1126 		mblk_t *tmp = NULL;
1127 		size_t size;
1128 
1129 		size = sizeof (struct ether_vlan_header);
1130 		if (MBLKL(mp) < size) {
1131 			/*
1132 			 * Pullup the message in order to get the MAC header
1133 			 * infomation. Note that this is a read-only function,
1134 			 * we keep the input packet intact.
1135 			 */
1136 			if ((tmp = msgpullup(mp, size)) == NULL)
1137 				return (EINVAL);
1138 
1139 			mp = tmp;
1140 		}
1141 		evhp = (struct ether_vlan_header *)mp->b_rptr;
1142 		sap = ntohs(evhp->ether_type);
1143 		(void) mac_sap_verify(dlp->dl_mh, sap, &mhip->mhi_bindsap);
1144 		mhip->mhi_hdrsize = sizeof (struct ether_vlan_header);
1145 		mhip->mhi_tci = ntohs(evhp->ether_tci);
1146 		mhip->mhi_istagged = B_TRUE;
1147 		freemsg(tmp);
1148 
1149 		if (VLAN_CFI(mhip->mhi_tci) != ETHER_CFI)
1150 			return (EINVAL);
1151 	} else {
1152 		mhip->mhi_istagged = B_FALSE;
1153 		mhip->mhi_tci = 0;
1154 	}
1155 
1156 	/*
1157 	 * The messsage is looped back from the underlying driver.
1158 	 */
1159 	mhip->mhi_prom_looped = (mp->b_flag & MSGNOLOOP);
1160 
1161 	return (0);
1162 }
1163