xref: /illumos-gate/usr/src/uts/common/io/dls/dls_link.c (revision abddfefb3168362a915cd681eb5a6498ec6c9e09)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Data-Link Services Module
28  */
29 
30 #include	<sys/sysmacros.h>
31 #include	<sys/strsubr.h>
32 #include	<sys/strsun.h>
33 #include	<sys/vlan.h>
34 #include	<sys/dld_impl.h>
35 #include	<sys/sdt.h>
36 #include	<sys/atomic.h>
37 
38 static kmem_cache_t	*i_dls_link_cachep;
39 mod_hash_t		*i_dls_link_hash;
40 static uint_t		i_dls_link_count;
41 
42 #define		LINK_HASHSZ	67	/* prime */
43 #define		IMPL_HASHSZ	67	/* prime */
44 
45 /*
46  * Construct a hash key encompassing both DLSAP value and VLAN idenitifier.
47  */
48 #define	MAKE_KEY(_sap)						\
49 	((mod_hash_key_t)(uintptr_t)((_sap) << VLAN_ID_SIZE))
50 
51 #define	DLS_STRIP_PADDING(pktsize, p) {			\
52 	if (pktsize != 0) {				\
53 		ssize_t delta = pktsize - msgdsize(p);	\
54 							\
55 		if (delta < 0)				\
56 			(void) adjmsg(p, delta);	\
57 	}						\
58 }
59 
60 /*
61  * Private functions.
62  */
63 
64 /*ARGSUSED*/
65 static int
66 i_dls_link_constructor(void *buf, void *arg, int kmflag)
67 {
68 	dls_link_t	*dlp = buf;
69 	char		name[MAXNAMELEN];
70 
71 	bzero(buf, sizeof (dls_link_t));
72 
73 	(void) snprintf(name, MAXNAMELEN, "dls_link_t_%p_hash", buf);
74 	dlp->dl_str_hash = mod_hash_create_idhash(name, IMPL_HASHSZ,
75 	    mod_hash_null_valdtor);
76 
77 	return (0);
78 }
79 
80 /*ARGSUSED*/
81 static void
82 i_dls_link_destructor(void *buf, void *arg)
83 {
84 	dls_link_t	*dlp = buf;
85 
86 	ASSERT(dlp->dl_ref == 0);
87 	ASSERT(dlp->dl_mh == NULL);
88 	ASSERT(dlp->dl_mah == NULL);
89 	ASSERT(dlp->dl_unknowns == 0);
90 
91 	mod_hash_destroy_idhash(dlp->dl_str_hash);
92 	dlp->dl_str_hash = NULL;
93 
94 }
95 
96 /*
97  * - Parse the mac header information of the given packet.
98  * - Strip the padding and skip over the header. Note that because some
99  *   DLS consumers only check the db_ref count of the first mblk, we
100  *   pullup the message into a single mblk. Because the original message
101  *   is freed as the result of message pulling up, dls_link_header_info()
102  *   is called again to update the mhi_saddr and mhi_daddr pointers in the
103  *   mhip. Further, the dls_link_header_info() function ensures that the
104  *   size of the pulled message is greater than the MAC header size,
105  *   therefore we can directly advance b_rptr to point at the payload.
106  *
107  * We choose to use a macro for performance reasons.
108  */
109 #define	DLS_PREPARE_PKT(dlp, mp, mhip, err) {				\
110 	mblk_t *nextp = (mp)->b_next;					\
111 	if (((err) = dls_link_header_info((dlp), (mp), (mhip))) == 0) {	\
112 		DLS_STRIP_PADDING((mhip)->mhi_pktsize, (mp));		\
113 		if (MBLKL((mp)) < (mhip)->mhi_hdrsize) {		\
114 			mblk_t *newmp;					\
115 			if ((newmp = msgpullup((mp), -1)) == NULL) {	\
116 				(err) = EINVAL;				\
117 			} else {					\
118 				(mp)->b_next = NULL;			\
119 				freemsg((mp));				\
120 				(mp) = newmp;				\
121 				VERIFY(dls_link_header_info((dlp),	\
122 				    (mp), (mhip)) == 0);		\
123 				(mp)->b_next = nextp;			\
124 				(mp)->b_rptr += (mhip)->mhi_hdrsize;	\
125 			}						\
126 		} else {						\
127 			(mp)->b_rptr += (mhip)->mhi_hdrsize;		\
128 		}							\
129 	}								\
130 }
131 
132 /*
133  * Truncate the chain starting at mp such that all packets in the chain
134  * have identical source and destination addresses, saps, and tag types
135  * (see below).  It returns a pointer to the mblk following the chain,
136  * NULL if there is no further packet following the processed chain.
137  * The countp argument is set to the number of valid packets in the chain.
138  * Note that the whole MAC header (including the VLAN tag if any) in each
139  * packet will be stripped.
140  */
141 static mblk_t *
142 i_dls_link_subchain(dls_link_t *dlp, mblk_t *mp, const mac_header_info_t *mhip,
143     uint_t *countp)
144 {
145 	mblk_t		*prevp;
146 	uint_t		npacket = 1;
147 	size_t		addr_size = dlp->dl_mip->mi_addr_length;
148 	uint16_t	vid = VLAN_ID(mhip->mhi_tci);
149 	uint16_t	pri = VLAN_PRI(mhip->mhi_tci);
150 
151 	/*
152 	 * Compare with subsequent headers until we find one that has
153 	 * differing header information. After checking each packet
154 	 * strip padding and skip over the header.
155 	 */
156 	for (prevp = mp; (mp = mp->b_next) != NULL; prevp = mp) {
157 		mac_header_info_t cmhi;
158 		uint16_t cvid, cpri;
159 		int err;
160 
161 		DLS_PREPARE_PKT(dlp, mp, &cmhi, err);
162 		if (err != 0)
163 			break;
164 
165 		prevp->b_next = mp;
166 
167 		/*
168 		 * The source, destination, sap, vlan id and the MSGNOLOOP
169 		 * flag must all match in a given subchain.
170 		 */
171 		if (memcmp(mhip->mhi_daddr, cmhi.mhi_daddr, addr_size) != 0 ||
172 		    memcmp(mhip->mhi_saddr, cmhi.mhi_saddr, addr_size) != 0 ||
173 		    mhip->mhi_bindsap != cmhi.mhi_bindsap) {
174 			/*
175 			 * Note that we don't need to restore the padding.
176 			 */
177 			mp->b_rptr -= cmhi.mhi_hdrsize;
178 			break;
179 		}
180 
181 		cvid = VLAN_ID(cmhi.mhi_tci);
182 		cpri = VLAN_PRI(cmhi.mhi_tci);
183 
184 		/*
185 		 * There are several types of packets. Packets don't match
186 		 * if they are classified to different type or if they are
187 		 * VLAN packets but belong to different VLANs:
188 		 *
189 		 * packet type		tagged		vid		pri
190 		 * ---------------------------------------------------------
191 		 * untagged		No		zero		zero
192 		 * VLAN packets		Yes		non-zero	-
193 		 * priority tagged	Yes		zero		non-zero
194 		 * 0 tagged		Yes		zero		zero
195 		 */
196 		if ((mhip->mhi_istagged != cmhi.mhi_istagged) ||
197 		    (vid != cvid) || ((vid == VLAN_ID_NONE) &&
198 		    (((pri == 0) && (cpri != 0)) ||
199 		    ((pri != 0) && (cpri == 0))))) {
200 			mp->b_rptr -= cmhi.mhi_hdrsize;
201 			break;
202 		}
203 
204 		npacket++;
205 	}
206 
207 	/*
208 	 * Break the chain at this point and return a pointer to the next
209 	 * sub-chain.
210 	 */
211 	prevp->b_next = NULL;
212 	*countp = npacket;
213 	return (mp);
214 }
215 
216 /* ARGSUSED */
217 static int
218 i_dls_head_hold(mod_hash_key_t key, mod_hash_val_t val)
219 {
220 	dls_head_t *dhp = (dls_head_t *)val;
221 
222 	/*
223 	 * The lock order is  mod_hash's internal lock -> dh_lock as in the
224 	 * call to i_dls_link_rx -> mod_hash_find_cb_rval -> i_dls_head_hold
225 	 */
226 	mutex_enter(&dhp->dh_lock);
227 	if (dhp->dh_removing) {
228 		mutex_exit(&dhp->dh_lock);
229 		return (-1);
230 	}
231 	dhp->dh_ref++;
232 	mutex_exit(&dhp->dh_lock);
233 	return (0);
234 }
235 
236 void
237 i_dls_head_rele(dls_head_t *dhp)
238 {
239 	mutex_enter(&dhp->dh_lock);
240 	dhp->dh_ref--;
241 	if (dhp->dh_ref == 0 && dhp->dh_removing != 0)
242 		cv_broadcast(&dhp->dh_cv);
243 	mutex_exit(&dhp->dh_lock);
244 }
245 
246 static dls_head_t *
247 i_dls_head_alloc(mod_hash_key_t key)
248 {
249 	dls_head_t	*dhp;
250 
251 	dhp = kmem_zalloc(sizeof (dls_head_t), KM_SLEEP);
252 	dhp->dh_key = key;
253 	return (dhp);
254 }
255 
256 static void
257 i_dls_head_free(dls_head_t *dhp)
258 {
259 	ASSERT(dhp->dh_ref == 0);
260 	kmem_free(dhp, sizeof (dls_head_t));
261 }
262 
263 /*
264  * Try to send mp up to the streams of the given sap and vid. Return B_TRUE
265  * if this message is sent to any streams.
266  * Note that this function will copy the message chain and the original
267  * mp will remain valid after this function
268  */
269 static uint_t
270 i_dls_link_rx_func(dls_link_t *dlp, mac_resource_handle_t mrh,
271     mac_header_info_t *mhip, mblk_t *mp, uint32_t sap,
272     boolean_t (*acceptfunc)())
273 {
274 	mod_hash_t	*hash = dlp->dl_str_hash;
275 	mod_hash_key_t	key;
276 	dls_head_t	*dhp;
277 	dld_str_t	*dsp;
278 	mblk_t		*nmp;
279 	dls_rx_t	ds_rx;
280 	void		*ds_rx_arg;
281 	uint_t		naccepted = 0;
282 	int		rval;
283 
284 	/*
285 	 * Construct a hash key from the VLAN identifier and the
286 	 * DLSAP that represents dld_str_t in promiscuous mode.
287 	 */
288 	key = MAKE_KEY(sap);
289 
290 	/*
291 	 * Search the hash table for dld_str_t eligible to receive
292 	 * a packet chain for this DLSAP/VLAN combination. The mod hash's
293 	 * internal lock serializes find/insert/remove from the mod hash list.
294 	 * Incrementing the dh_ref (while holding the mod hash lock) ensures
295 	 * dls_link_remove will wait for the upcall to finish.
296 	 */
297 	if (mod_hash_find_cb_rval(hash, key, (mod_hash_val_t *)&dhp,
298 	    i_dls_head_hold, &rval) != 0 || (rval != 0)) {
299 		return (B_FALSE);
300 	}
301 
302 	/*
303 	 * Find dld_str_t that will accept the sub-chain.
304 	 */
305 	for (dsp = dhp->dh_list; dsp != NULL; dsp = dsp->ds_next) {
306 		if (!acceptfunc(dsp, mhip, &ds_rx, &ds_rx_arg))
307 			continue;
308 
309 		/*
310 		 * We have at least one acceptor.
311 		 */
312 		naccepted++;
313 
314 		/*
315 		 * There will normally be at least more dld_str_t
316 		 * (since we've yet to check for non-promiscuous
317 		 * dld_str_t) so dup the sub-chain.
318 		 */
319 		if ((nmp = copymsgchain(mp)) != NULL)
320 			ds_rx(ds_rx_arg, mrh, nmp, mhip);
321 	}
322 
323 	/*
324 	 * Release the hold on the dld_str_t chain now that we have
325 	 * finished walking it.
326 	 */
327 	i_dls_head_rele(dhp);
328 	return (naccepted);
329 }
330 
331 /* ARGSUSED */
332 void
333 i_dls_link_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
334     boolean_t loopback)
335 {
336 	dls_link_t			*dlp = arg;
337 	mod_hash_t			*hash = dlp->dl_str_hash;
338 	mblk_t				*nextp;
339 	mac_header_info_t		mhi;
340 	dls_head_t			*dhp;
341 	dld_str_t			*dsp;
342 	dld_str_t			*ndsp;
343 	mblk_t				*nmp;
344 	mod_hash_key_t			key;
345 	uint_t				npacket;
346 	boolean_t			accepted;
347 	dls_rx_t			ds_rx, nds_rx;
348 	void				*ds_rx_arg, *nds_rx_arg;
349 	uint16_t			vid;
350 	int				err, rval;
351 
352 	/*
353 	 * Walk the packet chain.
354 	 */
355 	for (; mp != NULL; mp = nextp) {
356 		/*
357 		 * Wipe the accepted state.
358 		 */
359 		accepted = B_FALSE;
360 
361 		DLS_PREPARE_PKT(dlp, mp, &mhi, err);
362 		if (err != 0) {
363 			atomic_add_32(&(dlp->dl_unknowns), 1);
364 			nextp = mp->b_next;
365 			mp->b_next = NULL;
366 			freemsg(mp);
367 			continue;
368 		}
369 
370 		/*
371 		 * Grab the longest sub-chain we can process as a single
372 		 * unit.
373 		 */
374 		nextp = i_dls_link_subchain(dlp, mp, &mhi, &npacket);
375 		ASSERT(npacket != 0);
376 
377 		vid = VLAN_ID(mhi.mhi_tci);
378 
379 		if (mhi.mhi_istagged) {
380 			/*
381 			 * If it is tagged traffic, send it upstream to
382 			 * all dld_str_t which are attached to the physical
383 			 * link and bound to SAP 0x8100.
384 			 */
385 			if (i_dls_link_rx_func(dlp, mrh, &mhi, mp,
386 			    ETHERTYPE_VLAN, dls_accept) > 0) {
387 				accepted = B_TRUE;
388 			}
389 
390 			/*
391 			 * Don't pass the packets up if they are tagged
392 			 * packets and:
393 			 *  - their VID and priority are both zero (invalid
394 			 *    packets).
395 			 *  - their sap is ETHERTYPE_VLAN and their VID is
396 			 *    zero as they have already been sent upstreams.
397 			 */
398 			if ((vid == VLAN_ID_NONE &&
399 			    VLAN_PRI(mhi.mhi_tci) == 0) ||
400 			    (mhi.mhi_bindsap == ETHERTYPE_VLAN &&
401 			    vid == VLAN_ID_NONE)) {
402 				freemsgchain(mp);
403 				goto loop;
404 			}
405 		}
406 
407 		/*
408 		 * Construct a hash key from the VLAN identifier and the
409 		 * DLSAP.
410 		 */
411 		key = MAKE_KEY(mhi.mhi_bindsap);
412 
413 		/*
414 		 * Search the has table for dld_str_t eligible to receive
415 		 * a packet chain for this DLSAP/VLAN combination.
416 		 */
417 		if (mod_hash_find_cb_rval(hash, key, (mod_hash_val_t *)&dhp,
418 		    i_dls_head_hold, &rval) != 0 || (rval != 0)) {
419 			freemsgchain(mp);
420 			goto loop;
421 		}
422 
423 		/*
424 		 * Find the first dld_str_t that will accept the sub-chain.
425 		 */
426 		for (dsp = dhp->dh_list; dsp != NULL; dsp = dsp->ds_next)
427 			if (dls_accept(dsp, &mhi, &ds_rx, &ds_rx_arg))
428 				break;
429 
430 		/*
431 		 * If we did not find any dld_str_t willing to accept the
432 		 * sub-chain then throw it away.
433 		 */
434 		if (dsp == NULL) {
435 			i_dls_head_rele(dhp);
436 			freemsgchain(mp);
437 			goto loop;
438 		}
439 
440 		/*
441 		 * We have at least one acceptor.
442 		 */
443 		accepted = B_TRUE;
444 		for (;;) {
445 			/*
446 			 * Find the next dld_str_t that will accept the
447 			 * sub-chain.
448 			 */
449 			for (ndsp = dsp->ds_next; ndsp != NULL;
450 			    ndsp = ndsp->ds_next)
451 				if (dls_accept(ndsp, &mhi, &nds_rx,
452 				    &nds_rx_arg))
453 					break;
454 
455 			/*
456 			 * If there are no more dld_str_t that are willing
457 			 * to accept the sub-chain then we don't need to dup
458 			 * it before handing it to the current one.
459 			 */
460 			if (ndsp == NULL) {
461 				ds_rx(ds_rx_arg, mrh, mp, &mhi);
462 
463 				/*
464 				 * Since there are no more dld_str_t, we're
465 				 * done.
466 				 */
467 				break;
468 			}
469 
470 			/*
471 			 * There are more dld_str_t so dup the sub-chain.
472 			 */
473 			if ((nmp = copymsgchain(mp)) != NULL)
474 				ds_rx(ds_rx_arg, mrh, nmp, &mhi);
475 
476 			dsp = ndsp;
477 			ds_rx = nds_rx;
478 			ds_rx_arg = nds_rx_arg;
479 		}
480 
481 		/*
482 		 * Release the hold on the dld_str_t chain now that we have
483 		 * finished walking it.
484 		 */
485 		i_dls_head_rele(dhp);
486 
487 loop:
488 		/*
489 		 * If there were no acceptors then add the packet count to the
490 		 * 'unknown' count.
491 		 */
492 		if (!accepted)
493 			atomic_add_32(&(dlp->dl_unknowns), npacket);
494 	}
495 }
496 
497 /* ARGSUSED */
498 void
499 dls_rx_vlan_promisc(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
500     boolean_t loopback)
501 {
502 	dld_str_t			*dsp = arg;
503 	dls_link_t			*dlp = dsp->ds_dlp;
504 	mac_header_info_t		mhi;
505 	dls_rx_t			ds_rx;
506 	void				*ds_rx_arg;
507 	int				err;
508 
509 	DLS_PREPARE_PKT(dlp, mp, &mhi, err);
510 	if (err != 0)
511 		goto drop;
512 
513 	/*
514 	 * If there is promiscuous handle for vlan, we filter out the untagged
515 	 * pkts and pkts that are not for the primary unicast address.
516 	 */
517 	if (dsp->ds_vlan_mph != NULL) {
518 		uint8_t prim_addr[MAXMACADDRLEN];
519 		size_t	addr_length = dsp->ds_mip->mi_addr_length;
520 
521 		if (!(mhi.mhi_istagged))
522 			goto drop;
523 		ASSERT(dsp->ds_mh != NULL);
524 		mac_unicast_primary_get(dsp->ds_mh, (uint8_t *)prim_addr);
525 		if (memcmp(mhi.mhi_daddr, prim_addr, addr_length) != 0)
526 			goto drop;
527 
528 		if (!dls_accept(dsp, &mhi, &ds_rx, &ds_rx_arg))
529 			goto drop;
530 
531 		ds_rx(ds_rx_arg, NULL, mp, &mhi);
532 		return;
533 	}
534 
535 drop:
536 	atomic_add_32(&dlp->dl_unknowns, 1);
537 	freemsg(mp);
538 }
539 
540 /* ARGSUSED */
541 void
542 dls_rx_promisc(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
543     boolean_t loopback)
544 {
545 	dld_str_t			*dsp = arg;
546 	dls_link_t			*dlp = dsp->ds_dlp;
547 	mac_header_info_t		mhi;
548 	dls_rx_t			ds_rx;
549 	void				*ds_rx_arg;
550 	int				err;
551 	dls_head_t			*dhp;
552 	mod_hash_key_t			key;
553 
554 	DLS_PREPARE_PKT(dlp, mp, &mhi, err);
555 	if (err != 0)
556 		goto drop;
557 
558 	/*
559 	 * In order to filter out sap pkt that no dls channel listens, search
560 	 * the hash table trying to find a dld_str_t eligible to receive the pkt
561 	 */
562 	if ((dsp->ds_promisc & DLS_PROMISC_SAP) == 0) {
563 		key = MAKE_KEY(mhi.mhi_bindsap);
564 		if (mod_hash_find(dsp->ds_dlp->dl_str_hash, key,
565 		    (mod_hash_val_t *)&dhp) != 0)
566 			goto drop;
567 	}
568 
569 	if (!dls_accept_promisc(dsp, &mhi, &ds_rx, &ds_rx_arg, loopback))
570 		goto drop;
571 
572 	ds_rx(ds_rx_arg, NULL, mp, &mhi);
573 	return;
574 
575 drop:
576 	atomic_add_32(&dlp->dl_unknowns, 1);
577 	freemsg(mp);
578 }
579 
580 static void
581 i_dls_link_destroy(dls_link_t *dlp)
582 {
583 	ASSERT(dlp->dl_nactive == 0);
584 	ASSERT(dlp->dl_impl_count == 0);
585 	ASSERT(dlp->dl_zone_ref == 0);
586 
587 	/*
588 	 * Free the structure back to the cache.
589 	 */
590 	if (dlp->dl_mch != NULL)
591 		mac_client_close(dlp->dl_mch, 0);
592 
593 	if (dlp->dl_mh != NULL) {
594 		ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
595 		mac_close(dlp->dl_mh);
596 	}
597 
598 	dlp->dl_mh = NULL;
599 	dlp->dl_mch = NULL;
600 	dlp->dl_mip = NULL;
601 	dlp->dl_unknowns = 0;
602 	kmem_cache_free(i_dls_link_cachep, dlp);
603 }
604 
605 static int
606 i_dls_link_create(const char *name, dls_link_t **dlpp)
607 {
608 	dls_link_t		*dlp;
609 	int			err;
610 
611 	/*
612 	 * Allocate a new dls_link_t structure.
613 	 */
614 	dlp = kmem_cache_alloc(i_dls_link_cachep, KM_SLEEP);
615 
616 	/*
617 	 * Name the dls_link_t after the MAC interface it represents.
618 	 */
619 	(void) strlcpy(dlp->dl_name, name, sizeof (dlp->dl_name));
620 
621 	/*
622 	 * First reference; hold open the MAC interface.
623 	 */
624 	ASSERT(dlp->dl_mh == NULL);
625 	err = mac_open(dlp->dl_name, &dlp->dl_mh);
626 	if (err != 0)
627 		goto bail;
628 
629 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
630 	dlp->dl_mip = mac_info(dlp->dl_mh);
631 
632 	/* DLS is the "primary" MAC client */
633 	ASSERT(dlp->dl_mch == NULL);
634 
635 	err = mac_client_open(dlp->dl_mh, &dlp->dl_mch, NULL,
636 	    MAC_OPEN_FLAGS_USE_DATALINK_NAME);
637 	if (err != 0)
638 		goto bail;
639 
640 	DTRACE_PROBE2(dls__primary__client, char *, dlp->dl_name, void *,
641 	    dlp->dl_mch);
642 
643 	*dlpp = dlp;
644 	return (0);
645 
646 bail:
647 	i_dls_link_destroy(dlp);
648 	return (err);
649 }
650 
651 /*
652  * Module initialization functions.
653  */
654 
655 void
656 dls_link_init(void)
657 {
658 	/*
659 	 * Create a kmem_cache of dls_link_t structures.
660 	 */
661 	i_dls_link_cachep = kmem_cache_create("dls_link_cache",
662 	    sizeof (dls_link_t), 0, i_dls_link_constructor,
663 	    i_dls_link_destructor, NULL, NULL, NULL, 0);
664 	ASSERT(i_dls_link_cachep != NULL);
665 
666 	/*
667 	 * Create a dls_link_t hash table and associated lock.
668 	 */
669 	i_dls_link_hash = mod_hash_create_extended("dls_link_hash",
670 	    IMPL_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor,
671 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
672 	i_dls_link_count = 0;
673 }
674 
675 int
676 dls_link_fini(void)
677 {
678 	if (i_dls_link_count > 0)
679 		return (EBUSY);
680 
681 	/*
682 	 * Destroy the kmem_cache.
683 	 */
684 	kmem_cache_destroy(i_dls_link_cachep);
685 
686 	/*
687 	 * Destroy the hash table and associated lock.
688 	 */
689 	mod_hash_destroy_hash(i_dls_link_hash);
690 	return (0);
691 }
692 
693 /*
694  * Exported functions.
695  */
696 
697 static int
698 dls_link_hold_common(const char *name, dls_link_t **dlpp, boolean_t create)
699 {
700 	dls_link_t		*dlp;
701 	int			err;
702 
703 	/*
704 	 * Look up a dls_link_t corresponding to the given macname in the
705 	 * global hash table. The i_dls_link_hash itself is protected by the
706 	 * mod_hash package's internal lock which synchronizes
707 	 * find/insert/remove into the global mod_hash list. Assumes that
708 	 * inserts and removes are single threaded on a per mac end point
709 	 * by the mac perimeter.
710 	 */
711 	if ((err = mod_hash_find(i_dls_link_hash, (mod_hash_key_t)name,
712 	    (mod_hash_val_t *)&dlp)) == 0)
713 		goto done;
714 
715 	ASSERT(err == MH_ERR_NOTFOUND);
716 	if (!create)
717 		return (ENOENT);
718 
719 	/*
720 	 * We didn't find anything so we need to create one.
721 	 */
722 	if ((err = i_dls_link_create(name, &dlp)) != 0)
723 		return (err);
724 
725 	/*
726 	 * Insert the dls_link_t.
727 	 */
728 	err = mod_hash_insert(i_dls_link_hash, (mod_hash_key_t)dlp->dl_name,
729 	    (mod_hash_val_t)dlp);
730 	ASSERT(err == 0);
731 
732 	atomic_add_32(&i_dls_link_count, 1);
733 	ASSERT(i_dls_link_count != 0);
734 
735 done:
736 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
737 	/*
738 	 * Bump the reference count and hand back the reference.
739 	 */
740 	dlp->dl_ref++;
741 	*dlpp = dlp;
742 	return (0);
743 }
744 
745 int
746 dls_link_hold_create(const char *name, dls_link_t **dlpp)
747 {
748 	return (dls_link_hold_common(name, dlpp, B_TRUE));
749 }
750 
751 int
752 dls_link_hold(const char *name, dls_link_t **dlpp)
753 {
754 	return (dls_link_hold_common(name, dlpp, B_FALSE));
755 }
756 
757 dev_info_t *
758 dls_link_devinfo(dev_t dev)
759 {
760 	dls_link_t	*dlp;
761 	dev_info_t	*dip;
762 	char	macname[MAXNAMELEN];
763 	char	*drv;
764 	mac_perim_handle_t	mph;
765 
766 	if ((drv = ddi_major_to_name(getmajor(dev))) == NULL)
767 		return (NULL);
768 	(void) snprintf(macname, MAXNAMELEN, "%s%d", drv, getminor(dev) - 1);
769 
770 	/*
771 	 * The code below assumes that the name constructed above is the
772 	 * macname. This is not the case for legacy devices. Currently this
773 	 * is ok because this function is only called in the getinfo(9e) path,
774 	 * which for a legacy device would directly end up in the driver's
775 	 * getinfo, rather than here
776 	 */
777 	if (mac_perim_enter_by_macname(macname, &mph) != 0)
778 		return (NULL);
779 
780 	if (dls_link_hold(macname, &dlp) != 0) {
781 		mac_perim_exit(mph);
782 		return (NULL);
783 	}
784 
785 	dip = mac_devinfo_get(dlp->dl_mh);
786 	dls_link_rele(dlp);
787 	mac_perim_exit(mph);
788 
789 	return (dip);
790 }
791 
792 dev_t
793 dls_link_dev(dls_link_t *dlp)
794 {
795 	return (makedevice(ddi_driver_major(mac_devinfo_get(dlp->dl_mh)),
796 	    mac_minor(dlp->dl_mh)));
797 }
798 
799 void
800 dls_link_rele(dls_link_t *dlp)
801 {
802 	mod_hash_val_t	val;
803 
804 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
805 	/*
806 	 * Check if there are any more references.
807 	 */
808 	if (--dlp->dl_ref == 0) {
809 		(void) mod_hash_remove(i_dls_link_hash,
810 		    (mod_hash_key_t)dlp->dl_name, &val);
811 		ASSERT(dlp == (dls_link_t *)val);
812 
813 		/*
814 		 * Destroy the dls_link_t.
815 		 */
816 		i_dls_link_destroy(dlp);
817 		ASSERT(i_dls_link_count > 0);
818 		atomic_add_32(&i_dls_link_count, -1);
819 	}
820 }
821 
822 int
823 dls_link_rele_by_name(const char *name)
824 {
825 	dls_link_t		*dlp;
826 
827 	if (mod_hash_find(i_dls_link_hash, (mod_hash_key_t)name,
828 	    (mod_hash_val_t *)&dlp) != 0)
829 		return (ENOENT);
830 
831 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
832 
833 	/*
834 	 * Must fail detach if mac client is busy.
835 	 */
836 	ASSERT(dlp->dl_ref > 0 && dlp->dl_mch != NULL);
837 	if (mac_link_has_flows(dlp->dl_mch))
838 		return (ENOTEMPTY);
839 
840 	dls_link_rele(dlp);
841 	return (0);
842 }
843 
844 int
845 dls_link_setzid(const char *name, zoneid_t zid)
846 {
847 	dls_link_t	*dlp;
848 	int		err = 0;
849 	zoneid_t	old_zid;
850 
851 	if ((err = dls_link_hold_create(name, &dlp)) != 0)
852 		return (err);
853 
854 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
855 
856 	if ((old_zid = dlp->dl_zid) == zid)
857 		goto done;
858 
859 	/*
860 	 * Check whether this dlp is used by its own zones, if yes,
861 	 * we cannot change its zoneid.
862 	 */
863 	if (dlp->dl_zone_ref != 0) {
864 		err = EBUSY;
865 		goto done;
866 	}
867 
868 	if (zid == GLOBAL_ZONEID) {
869 		/*
870 		 * Move the link from the local zone to the global zone,
871 		 * and release the reference to this link.  At the same time
872 		 * reset the link's active state so that an aggregation is
873 		 * allowed to be created over it.
874 		 */
875 		dlp->dl_zid = zid;
876 		dls_mac_active_clear(dlp);
877 		dls_link_rele(dlp);
878 		goto done;
879 	} else if (old_zid == GLOBAL_ZONEID) {
880 		/*
881 		 * Move the link from the global zone to the local zone,
882 		 * and hold a reference to this link.  Also, set the link
883 		 * to the "active" state so that the global zone is
884 		 * not able to create an aggregation over this link.
885 		 * TODO: revisit once we allow creating aggregations
886 		 * within a local zone.
887 		 */
888 		if ((err = dls_mac_active_set(dlp)) != 0) {
889 			if (err != ENXIO)
890 				err = EBUSY;
891 			goto done;
892 		}
893 		dlp->dl_zid = zid;
894 		return (0);
895 	} else {
896 		/*
897 		 * Move the link from a local zone to another local zone.
898 		 */
899 		dlp->dl_zid = zid;
900 	}
901 
902 done:
903 	dls_link_rele(dlp);
904 	return (err);
905 }
906 
907 void
908 dls_link_add(dls_link_t *dlp, uint32_t sap, dld_str_t *dsp)
909 {
910 	mod_hash_t	*hash = dlp->dl_str_hash;
911 	mod_hash_key_t	key;
912 	dls_head_t	*dhp;
913 	dld_str_t	*p;
914 	int		err;
915 
916 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
917 
918 	/*
919 	 * Generate a hash key based on the sap.
920 	 */
921 	key = MAKE_KEY(sap);
922 
923 	/*
924 	 * Search the table for a list head with this key.
925 	 */
926 	if ((err = mod_hash_find(hash, key, (mod_hash_val_t *)&dhp)) != 0) {
927 		ASSERT(err == MH_ERR_NOTFOUND);
928 
929 		dhp = i_dls_head_alloc(key);
930 		err = mod_hash_insert(hash, key, (mod_hash_val_t)dhp);
931 		ASSERT(err == 0);
932 	}
933 
934 	/*
935 	 * Add the dld_str_t to the head of the list. List walkers in
936 	 * i_dls_link_rx_* bump up dh_ref to ensure the list does not change
937 	 * while they walk the list. The membar below ensures that list walkers
938 	 * see exactly the old list or the new list.
939 	 */
940 	ASSERT(dsp->ds_next == NULL);
941 	p = dhp->dh_list;
942 	dsp->ds_next = p;
943 
944 	membar_producer();
945 
946 	dhp->dh_list = dsp;
947 
948 	/*
949 	 * Save a pointer to the list head.
950 	 */
951 	dsp->ds_head = dhp;
952 	dlp->dl_impl_count++;
953 }
954 
955 void
956 dls_link_remove(dls_link_t *dlp, dld_str_t *dsp)
957 {
958 	mod_hash_t	*hash = dlp->dl_str_hash;
959 	dld_str_t	**pp;
960 	dld_str_t	*p;
961 	dls_head_t	*dhp;
962 
963 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
964 
965 	/*
966 	 * We set dh_removing here to tell the receive callbacks not to pass
967 	 * up packets anymore. Then wait till the current callbacks are done.
968 	 * This happens either in the close path or in processing the
969 	 * DL_UNBIND_REQ via a taskq thread, and it is ok to cv_wait in either.
970 	 * The dh_ref ensures there aren't and there won't be any upcalls
971 	 * walking or using the dh_list. The mod hash internal lock ensures
972 	 * that the insert/remove of the dls_head_t itself synchronizes with
973 	 * any i_dls_link_rx trying to locate it. The perimeter ensures that
974 	 * there isn't another simultaneous dls_link_add/remove.
975 	 */
976 	dhp = dsp->ds_head;
977 
978 	mutex_enter(&dhp->dh_lock);
979 	dhp->dh_removing = B_TRUE;
980 	while (dhp->dh_ref != 0)
981 		cv_wait(&dhp->dh_cv, &dhp->dh_lock);
982 	mutex_exit(&dhp->dh_lock);
983 
984 	/*
985 	 * Walk the list and remove the dld_str_t.
986 	 */
987 	for (pp = &dhp->dh_list; (p = *pp) != NULL; pp = &(p->ds_next)) {
988 		if (p == dsp)
989 			break;
990 	}
991 	ASSERT(p != NULL);
992 	*pp = p->ds_next;
993 	p->ds_next = NULL;
994 	p->ds_head = NULL;
995 
996 	ASSERT(dlp->dl_impl_count != 0);
997 	dlp->dl_impl_count--;
998 
999 	if (dhp->dh_list == NULL) {
1000 		mod_hash_val_t	val = NULL;
1001 
1002 		/*
1003 		 * The list is empty so remove the hash table entry.
1004 		 */
1005 		(void) mod_hash_remove(hash, dhp->dh_key, &val);
1006 		ASSERT(dhp == (dls_head_t *)val);
1007 		i_dls_head_free(dhp);
1008 	} else {
1009 		mutex_enter(&dhp->dh_lock);
1010 		dhp->dh_removing = B_FALSE;
1011 		mutex_exit(&dhp->dh_lock);
1012 	}
1013 }
1014 
1015 int
1016 dls_link_header_info(dls_link_t *dlp, mblk_t *mp, mac_header_info_t *mhip)
1017 {
1018 	boolean_t	is_ethernet = (dlp->dl_mip->mi_media == DL_ETHER);
1019 	int		err = 0;
1020 
1021 	/*
1022 	 * Packets should always be at least 16 bit aligned.
1023 	 */
1024 	ASSERT(IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)));
1025 
1026 	if ((err = mac_header_info(dlp->dl_mh, mp, mhip)) != 0)
1027 		return (err);
1028 
1029 	/*
1030 	 * If this is a VLAN-tagged Ethernet packet, then the SAP in the
1031 	 * mac_header_info_t as returned by mac_header_info() is
1032 	 * ETHERTYPE_VLAN. We need to grab the ethertype from the VLAN header.
1033 	 */
1034 	if (is_ethernet && (mhip->mhi_bindsap == ETHERTYPE_VLAN)) {
1035 		struct ether_vlan_header *evhp;
1036 		uint16_t sap;
1037 		mblk_t *tmp = NULL;
1038 		size_t size;
1039 
1040 		size = sizeof (struct ether_vlan_header);
1041 		if (MBLKL(mp) < size) {
1042 			/*
1043 			 * Pullup the message in order to get the MAC header
1044 			 * infomation. Note that this is a read-only function,
1045 			 * we keep the input packet intact.
1046 			 */
1047 			if ((tmp = msgpullup(mp, size)) == NULL)
1048 				return (EINVAL);
1049 
1050 			mp = tmp;
1051 		}
1052 		evhp = (struct ether_vlan_header *)mp->b_rptr;
1053 		sap = ntohs(evhp->ether_type);
1054 		(void) mac_sap_verify(dlp->dl_mh, sap, &mhip->mhi_bindsap);
1055 		mhip->mhi_hdrsize = sizeof (struct ether_vlan_header);
1056 		mhip->mhi_tci = ntohs(evhp->ether_tci);
1057 		mhip->mhi_istagged = B_TRUE;
1058 		freemsg(tmp);
1059 
1060 		if (VLAN_CFI(mhip->mhi_tci) != ETHER_CFI)
1061 			return (EINVAL);
1062 	} else {
1063 		mhip->mhi_istagged = B_FALSE;
1064 		mhip->mhi_tci = 0;
1065 	}
1066 
1067 	return (0);
1068 }
1069