xref: /titanic_50/usr/src/uts/common/io/dls/dls_link.c (revision 23a1ccea6aac035f084a7a4cdc968687d1b02daf)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Data-Link Services Module
28  */
29 
30 #include	<sys/sysmacros.h>
31 #include	<sys/strsubr.h>
32 #include	<sys/strsun.h>
33 #include	<sys/vlan.h>
34 #include	<sys/dld_impl.h>
35 #include	<sys/sdt.h>
36 #include	<sys/atomic.h>
37 
38 static kmem_cache_t	*i_dls_link_cachep;
39 mod_hash_t		*i_dls_link_hash;
40 static uint_t		i_dls_link_count;
41 
42 #define		LINK_HASHSZ	67	/* prime */
43 #define		IMPL_HASHSZ	67	/* prime */
44 
45 /*
46  * Construct a hash key encompassing both DLSAP value and VLAN idenitifier.
47  */
48 #define	MAKE_KEY(_sap)						\
49 	((mod_hash_key_t)(uintptr_t)((_sap) << VLAN_ID_SIZE))
50 
51 #define	DLS_STRIP_PADDING(pktsize, p) {			\
52 	if (pktsize != 0) {				\
53 		ssize_t delta = pktsize - msgdsize(p);	\
54 							\
55 		if (delta < 0)				\
56 			(void) adjmsg(p, delta);	\
57 	}						\
58 }
59 
60 /*
61  * Private functions.
62  */
63 
64 /*ARGSUSED*/
65 static int
66 i_dls_link_constructor(void *buf, void *arg, int kmflag)
67 {
68 	dls_link_t	*dlp = buf;
69 	char		name[MAXNAMELEN];
70 
71 	bzero(buf, sizeof (dls_link_t));
72 
73 	(void) snprintf(name, MAXNAMELEN, "dls_link_t_%p_hash", buf);
74 	dlp->dl_str_hash = mod_hash_create_idhash(name, IMPL_HASHSZ,
75 	    mod_hash_null_valdtor);
76 
77 	return (0);
78 }
79 
80 /*ARGSUSED*/
81 static void
82 i_dls_link_destructor(void *buf, void *arg)
83 {
84 	dls_link_t	*dlp = buf;
85 
86 	ASSERT(dlp->dl_ref == 0);
87 	ASSERT(dlp->dl_mh == NULL);
88 	ASSERT(dlp->dl_mah == NULL);
89 	ASSERT(dlp->dl_unknowns == 0);
90 
91 	mod_hash_destroy_idhash(dlp->dl_str_hash);
92 	dlp->dl_str_hash = NULL;
93 
94 }
95 
96 /*
97  * - Parse the mac header information of the given packet.
98  * - Strip the padding and skip over the header. Note that because some
99  *   DLS consumers only check the db_ref count of the first mblk, we
100  *   pullup the message into a single mblk. Because the original message
101  *   is freed as the result of message pulling up, mac_vlan_header_info()
102  *   is called again to update the mhi_saddr and mhi_daddr pointers in the
103  *   mhip. Further, the mac_vlan_header_info() function ensures that the
104  *   size of the pulled message is greater than the MAC header size,
105  *   therefore we can directly advance b_rptr to point at the payload.
106  *
107  * We choose to use a macro for performance reasons.
108  */
109 #define	DLS_PREPARE_PKT(mh, mp, mhip, err) {				\
110 	mblk_t *nextp = (mp)->b_next;					\
111 	if (((err) = mac_vlan_header_info((mh), (mp), (mhip))) == 0) {	\
112 		DLS_STRIP_PADDING((mhip)->mhi_pktsize, (mp));		\
113 		if (MBLKL((mp)) < (mhip)->mhi_hdrsize) {		\
114 			mblk_t *newmp;					\
115 			if ((newmp = msgpullup((mp), -1)) == NULL) {	\
116 				(err) = EINVAL;				\
117 			} else {					\
118 				(mp)->b_next = NULL;			\
119 				freemsg((mp));				\
120 				(mp) = newmp;				\
121 				VERIFY(mac_vlan_header_info((mh),	\
122 				    (mp), (mhip)) == 0);		\
123 				(mp)->b_next = nextp;			\
124 				(mp)->b_rptr += (mhip)->mhi_hdrsize;	\
125 			}						\
126 		} else {						\
127 			(mp)->b_rptr += (mhip)->mhi_hdrsize;		\
128 		}							\
129 	}								\
130 }
131 
132 /*
133  * Truncate the chain starting at mp such that all packets in the chain
134  * have identical source and destination addresses, saps, and tag types
135  * (see below).  It returns a pointer to the mblk following the chain,
136  * NULL if there is no further packet following the processed chain.
137  * The countp argument is set to the number of valid packets in the chain.
138  * Note that the whole MAC header (including the VLAN tag if any) in each
139  * packet will be stripped.
140  */
141 static mblk_t *
142 i_dls_link_subchain(dls_link_t *dlp, mblk_t *mp, const mac_header_info_t *mhip,
143     uint_t *countp)
144 {
145 	mblk_t		*prevp;
146 	uint_t		npacket = 1;
147 	size_t		addr_size = dlp->dl_mip->mi_addr_length;
148 	uint16_t	vid = VLAN_ID(mhip->mhi_tci);
149 	uint16_t	pri = VLAN_PRI(mhip->mhi_tci);
150 
151 	/*
152 	 * Compare with subsequent headers until we find one that has
153 	 * differing header information. After checking each packet
154 	 * strip padding and skip over the header.
155 	 */
156 	for (prevp = mp; (mp = mp->b_next) != NULL; prevp = mp) {
157 		mac_header_info_t cmhi;
158 		uint16_t cvid, cpri;
159 		int err;
160 
161 		DLS_PREPARE_PKT(dlp->dl_mh, mp, &cmhi, err);
162 		if (err != 0)
163 			break;
164 
165 		prevp->b_next = mp;
166 
167 		/*
168 		 * The source, destination, sap, vlan tag must all match in
169 		 * a given subchain.
170 		 */
171 		if (mhip->mhi_saddr == NULL || cmhi.mhi_saddr == NULL ||
172 		    memcmp(mhip->mhi_daddr, cmhi.mhi_daddr, addr_size) != 0 ||
173 		    memcmp(mhip->mhi_saddr, cmhi.mhi_saddr, addr_size) != 0 ||
174 		    mhip->mhi_bindsap != cmhi.mhi_bindsap) {
175 			/*
176 			 * Note that we don't need to restore the padding.
177 			 */
178 			mp->b_rptr -= cmhi.mhi_hdrsize;
179 			break;
180 		}
181 
182 		cvid = VLAN_ID(cmhi.mhi_tci);
183 		cpri = VLAN_PRI(cmhi.mhi_tci);
184 
185 		/*
186 		 * There are several types of packets. Packets don't match
187 		 * if they are classified to different type or if they are
188 		 * VLAN packets but belong to different VLANs:
189 		 *
190 		 * packet type		tagged		vid		pri
191 		 * ---------------------------------------------------------
192 		 * untagged		No		zero		zero
193 		 * VLAN packets		Yes		non-zero	-
194 		 * priority tagged	Yes		zero		non-zero
195 		 * 0 tagged		Yes		zero		zero
196 		 */
197 		if ((mhip->mhi_istagged != cmhi.mhi_istagged) ||
198 		    (vid != cvid) || ((vid == VLAN_ID_NONE) &&
199 		    (((pri == 0) && (cpri != 0)) ||
200 		    ((pri != 0) && (cpri == 0))))) {
201 			mp->b_rptr -= cmhi.mhi_hdrsize;
202 			break;
203 		}
204 
205 		npacket++;
206 	}
207 
208 	/*
209 	 * Break the chain at this point and return a pointer to the next
210 	 * sub-chain.
211 	 */
212 	prevp->b_next = NULL;
213 	*countp = npacket;
214 	return (mp);
215 }
216 
217 /* ARGSUSED */
218 static int
219 i_dls_head_hold(mod_hash_key_t key, mod_hash_val_t val)
220 {
221 	dls_head_t *dhp = (dls_head_t *)val;
222 
223 	/*
224 	 * The lock order is  mod_hash's internal lock -> dh_lock as in the
225 	 * call to i_dls_link_rx -> mod_hash_find_cb_rval -> i_dls_head_hold
226 	 */
227 	mutex_enter(&dhp->dh_lock);
228 	if (dhp->dh_removing) {
229 		mutex_exit(&dhp->dh_lock);
230 		return (-1);
231 	}
232 	dhp->dh_ref++;
233 	mutex_exit(&dhp->dh_lock);
234 	return (0);
235 }
236 
237 void
238 i_dls_head_rele(dls_head_t *dhp)
239 {
240 	mutex_enter(&dhp->dh_lock);
241 	dhp->dh_ref--;
242 	if (dhp->dh_ref == 0 && dhp->dh_removing != 0)
243 		cv_broadcast(&dhp->dh_cv);
244 	mutex_exit(&dhp->dh_lock);
245 }
246 
247 static dls_head_t *
248 i_dls_head_alloc(mod_hash_key_t key)
249 {
250 	dls_head_t	*dhp;
251 
252 	dhp = kmem_zalloc(sizeof (dls_head_t), KM_SLEEP);
253 	dhp->dh_key = key;
254 	return (dhp);
255 }
256 
257 static void
258 i_dls_head_free(dls_head_t *dhp)
259 {
260 	ASSERT(dhp->dh_ref == 0);
261 	kmem_free(dhp, sizeof (dls_head_t));
262 }
263 
264 /*
265  * Try to send mp up to the streams of the given sap and vid. Return B_TRUE
266  * if this message is sent to any streams.
267  * Note that this function will copy the message chain and the original
268  * mp will remain valid after this function
269  */
270 static uint_t
271 i_dls_link_rx_func(dls_link_t *dlp, mac_resource_handle_t mrh,
272     mac_header_info_t *mhip, mblk_t *mp, uint32_t sap,
273     boolean_t (*acceptfunc)())
274 {
275 	mod_hash_t	*hash = dlp->dl_str_hash;
276 	mod_hash_key_t	key;
277 	dls_head_t	*dhp;
278 	dld_str_t	*dsp;
279 	mblk_t		*nmp;
280 	dls_rx_t	ds_rx;
281 	void		*ds_rx_arg;
282 	uint_t		naccepted = 0;
283 	int		rval;
284 
285 	/*
286 	 * Construct a hash key from the VLAN identifier and the
287 	 * DLSAP that represents dld_str_t in promiscuous mode.
288 	 */
289 	key = MAKE_KEY(sap);
290 
291 	/*
292 	 * Search the hash table for dld_str_t eligible to receive
293 	 * a packet chain for this DLSAP/VLAN combination. The mod hash's
294 	 * internal lock serializes find/insert/remove from the mod hash list.
295 	 * Incrementing the dh_ref (while holding the mod hash lock) ensures
296 	 * dls_link_remove will wait for the upcall to finish.
297 	 */
298 	if (mod_hash_find_cb_rval(hash, key, (mod_hash_val_t *)&dhp,
299 	    i_dls_head_hold, &rval) != 0 || (rval != 0)) {
300 		return (B_FALSE);
301 	}
302 
303 	/*
304 	 * Find dld_str_t that will accept the sub-chain.
305 	 */
306 	for (dsp = dhp->dh_list; dsp != NULL; dsp = dsp->ds_next) {
307 		if (!acceptfunc(dsp, mhip, &ds_rx, &ds_rx_arg))
308 			continue;
309 
310 		/*
311 		 * We have at least one acceptor.
312 		 */
313 		naccepted++;
314 
315 		/*
316 		 * There will normally be at least more dld_str_t
317 		 * (since we've yet to check for non-promiscuous
318 		 * dld_str_t) so dup the sub-chain.
319 		 */
320 		if ((nmp = copymsgchain(mp)) != NULL)
321 			ds_rx(ds_rx_arg, mrh, nmp, mhip);
322 	}
323 
324 	/*
325 	 * Release the hold on the dld_str_t chain now that we have
326 	 * finished walking it.
327 	 */
328 	i_dls_head_rele(dhp);
329 	return (naccepted);
330 }
331 
332 /* ARGSUSED */
333 void
334 i_dls_link_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
335     boolean_t loopback)
336 {
337 	dls_link_t			*dlp = arg;
338 	mod_hash_t			*hash = dlp->dl_str_hash;
339 	mblk_t				*nextp;
340 	mac_header_info_t		mhi;
341 	dls_head_t			*dhp;
342 	dld_str_t			*dsp;
343 	dld_str_t			*ndsp;
344 	mblk_t				*nmp;
345 	mod_hash_key_t			key;
346 	uint_t				npacket;
347 	boolean_t			accepted;
348 	dls_rx_t			ds_rx, nds_rx;
349 	void				*ds_rx_arg, *nds_rx_arg;
350 	uint16_t			vid;
351 	int				err, rval;
352 
353 	/*
354 	 * Walk the packet chain.
355 	 */
356 	for (; mp != NULL; mp = nextp) {
357 		/*
358 		 * Wipe the accepted state.
359 		 */
360 		accepted = B_FALSE;
361 
362 		DLS_PREPARE_PKT(dlp->dl_mh, mp, &mhi, err);
363 		if (err != 0) {
364 			atomic_add_32(&(dlp->dl_unknowns), 1);
365 			nextp = mp->b_next;
366 			mp->b_next = NULL;
367 			freemsg(mp);
368 			continue;
369 		}
370 
371 		/*
372 		 * Grab the longest sub-chain we can process as a single
373 		 * unit.
374 		 */
375 		nextp = i_dls_link_subchain(dlp, mp, &mhi, &npacket);
376 		ASSERT(npacket != 0);
377 
378 		vid = VLAN_ID(mhi.mhi_tci);
379 
380 		if (mhi.mhi_istagged) {
381 			/*
382 			 * If it is tagged traffic, send it upstream to
383 			 * all dld_str_t which are attached to the physical
384 			 * link and bound to SAP 0x8100.
385 			 */
386 			if (i_dls_link_rx_func(dlp, mrh, &mhi, mp,
387 			    ETHERTYPE_VLAN, dls_accept) > 0) {
388 				accepted = B_TRUE;
389 			}
390 
391 			/*
392 			 * Don't pass the packets up if they are tagged
393 			 * packets and:
394 			 *  - their VID and priority are both zero and the
395 			 *    original packet isn't using the PVID (invalid
396 			 *    packets).
397 			 *  - their sap is ETHERTYPE_VLAN and their VID is
398 			 *    zero as they have already been sent upstreams.
399 			 */
400 			if ((vid == VLAN_ID_NONE && !mhi.mhi_ispvid &&
401 			    VLAN_PRI(mhi.mhi_tci) == 0) ||
402 			    (mhi.mhi_bindsap == ETHERTYPE_VLAN &&
403 			    vid == VLAN_ID_NONE)) {
404 				freemsgchain(mp);
405 				goto loop;
406 			}
407 		}
408 
409 		/*
410 		 * Construct a hash key from the VLAN identifier and the
411 		 * DLSAP.
412 		 */
413 		key = MAKE_KEY(mhi.mhi_bindsap);
414 
415 		/*
416 		 * Search the has table for dld_str_t eligible to receive
417 		 * a packet chain for this DLSAP/VLAN combination.
418 		 */
419 		if (mod_hash_find_cb_rval(hash, key, (mod_hash_val_t *)&dhp,
420 		    i_dls_head_hold, &rval) != 0 || (rval != 0)) {
421 			freemsgchain(mp);
422 			goto loop;
423 		}
424 
425 		/*
426 		 * Find the first dld_str_t that will accept the sub-chain.
427 		 */
428 		for (dsp = dhp->dh_list; dsp != NULL; dsp = dsp->ds_next)
429 			if (dls_accept(dsp, &mhi, &ds_rx, &ds_rx_arg))
430 				break;
431 
432 		/*
433 		 * If we did not find any dld_str_t willing to accept the
434 		 * sub-chain then throw it away.
435 		 */
436 		if (dsp == NULL) {
437 			i_dls_head_rele(dhp);
438 			freemsgchain(mp);
439 			goto loop;
440 		}
441 
442 		/*
443 		 * We have at least one acceptor.
444 		 */
445 		accepted = B_TRUE;
446 		for (;;) {
447 			/*
448 			 * Find the next dld_str_t that will accept the
449 			 * sub-chain.
450 			 */
451 			for (ndsp = dsp->ds_next; ndsp != NULL;
452 			    ndsp = ndsp->ds_next)
453 				if (dls_accept(ndsp, &mhi, &nds_rx,
454 				    &nds_rx_arg))
455 					break;
456 
457 			/*
458 			 * If there are no more dld_str_t that are willing
459 			 * to accept the sub-chain then we don't need to dup
460 			 * it before handing it to the current one.
461 			 */
462 			if (ndsp == NULL) {
463 				ds_rx(ds_rx_arg, mrh, mp, &mhi);
464 
465 				/*
466 				 * Since there are no more dld_str_t, we're
467 				 * done.
468 				 */
469 				break;
470 			}
471 
472 			/*
473 			 * There are more dld_str_t so dup the sub-chain.
474 			 */
475 			if ((nmp = copymsgchain(mp)) != NULL)
476 				ds_rx(ds_rx_arg, mrh, nmp, &mhi);
477 
478 			dsp = ndsp;
479 			ds_rx = nds_rx;
480 			ds_rx_arg = nds_rx_arg;
481 		}
482 
483 		/*
484 		 * Release the hold on the dld_str_t chain now that we have
485 		 * finished walking it.
486 		 */
487 		i_dls_head_rele(dhp);
488 
489 loop:
490 		/*
491 		 * If there were no acceptors then add the packet count to the
492 		 * 'unknown' count.
493 		 */
494 		if (!accepted)
495 			atomic_add_32(&(dlp->dl_unknowns), npacket);
496 	}
497 }
498 
499 /* ARGSUSED */
500 void
501 dls_rx_vlan_promisc(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
502     boolean_t loopback)
503 {
504 	dld_str_t			*dsp = arg;
505 	dls_link_t			*dlp = dsp->ds_dlp;
506 	mac_header_info_t		mhi;
507 	dls_rx_t			ds_rx;
508 	void				*ds_rx_arg;
509 	int				err;
510 
511 	DLS_PREPARE_PKT(dlp->dl_mh, mp, &mhi, err);
512 	if (err != 0)
513 		goto drop;
514 
515 	/*
516 	 * If there is promiscuous handle for vlan, we filter out the untagged
517 	 * pkts and pkts that are not for the primary unicast address.
518 	 */
519 	if (dsp->ds_vlan_mph != NULL) {
520 		uint8_t prim_addr[MAXMACADDRLEN];
521 		size_t	addr_length = dsp->ds_mip->mi_addr_length;
522 
523 		if (!(mhi.mhi_istagged))
524 			goto drop;
525 		ASSERT(dsp->ds_mh != NULL);
526 		mac_unicast_primary_get(dsp->ds_mh, (uint8_t *)prim_addr);
527 		if (memcmp(mhi.mhi_daddr, prim_addr, addr_length) != 0)
528 			goto drop;
529 
530 		if (!dls_accept(dsp, &mhi, &ds_rx, &ds_rx_arg))
531 			goto drop;
532 
533 		ds_rx(ds_rx_arg, NULL, mp, &mhi);
534 		return;
535 	}
536 
537 drop:
538 	atomic_add_32(&dlp->dl_unknowns, 1);
539 	freemsg(mp);
540 }
541 
542 /* ARGSUSED */
543 void
544 dls_rx_promisc(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
545     boolean_t loopback)
546 {
547 	dld_str_t			*dsp = arg;
548 	dls_link_t			*dlp = dsp->ds_dlp;
549 	mac_header_info_t		mhi;
550 	dls_rx_t			ds_rx;
551 	void				*ds_rx_arg;
552 	int				err;
553 	dls_head_t			*dhp;
554 	mod_hash_key_t			key;
555 
556 	DLS_PREPARE_PKT(dlp->dl_mh, mp, &mhi, err);
557 	if (err != 0)
558 		goto drop;
559 
560 	/*
561 	 * In order to filter out sap pkt that no dls channel listens, search
562 	 * the hash table trying to find a dld_str_t eligible to receive the pkt
563 	 */
564 	if ((dsp->ds_promisc & DLS_PROMISC_SAP) == 0) {
565 		key = MAKE_KEY(mhi.mhi_bindsap);
566 		if (mod_hash_find(dsp->ds_dlp->dl_str_hash, key,
567 		    (mod_hash_val_t *)&dhp) != 0)
568 			goto drop;
569 	}
570 
571 	if (!dls_accept_promisc(dsp, &mhi, &ds_rx, &ds_rx_arg, loopback))
572 		goto drop;
573 
574 	ds_rx(ds_rx_arg, NULL, mp, &mhi);
575 	return;
576 
577 drop:
578 	atomic_add_32(&dlp->dl_unknowns, 1);
579 	freemsg(mp);
580 }
581 
582 static void
583 i_dls_link_destroy(dls_link_t *dlp)
584 {
585 	ASSERT(dlp->dl_nactive == 0);
586 	ASSERT(dlp->dl_impl_count == 0);
587 	ASSERT(dlp->dl_zone_ref == 0);
588 
589 	/*
590 	 * Free the structure back to the cache.
591 	 */
592 	if (dlp->dl_mch != NULL)
593 		mac_client_close(dlp->dl_mch, 0);
594 
595 	if (dlp->dl_mh != NULL) {
596 		ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
597 		mac_close(dlp->dl_mh);
598 	}
599 
600 	dlp->dl_mh = NULL;
601 	dlp->dl_mch = NULL;
602 	dlp->dl_mip = NULL;
603 	dlp->dl_unknowns = 0;
604 	dlp->dl_nonip_cnt = 0;
605 	kmem_cache_free(i_dls_link_cachep, dlp);
606 }
607 
608 static int
609 i_dls_link_create(const char *name, dls_link_t **dlpp)
610 {
611 	dls_link_t		*dlp;
612 	int			err;
613 
614 	/*
615 	 * Allocate a new dls_link_t structure.
616 	 */
617 	dlp = kmem_cache_alloc(i_dls_link_cachep, KM_SLEEP);
618 
619 	/*
620 	 * Name the dls_link_t after the MAC interface it represents.
621 	 */
622 	(void) strlcpy(dlp->dl_name, name, sizeof (dlp->dl_name));
623 
624 	/*
625 	 * First reference; hold open the MAC interface.
626 	 */
627 	ASSERT(dlp->dl_mh == NULL);
628 	err = mac_open(dlp->dl_name, &dlp->dl_mh);
629 	if (err != 0)
630 		goto bail;
631 
632 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
633 	dlp->dl_mip = mac_info(dlp->dl_mh);
634 
635 	/* DLS is the "primary" MAC client */
636 	ASSERT(dlp->dl_mch == NULL);
637 
638 	err = mac_client_open(dlp->dl_mh, &dlp->dl_mch, NULL,
639 	    MAC_OPEN_FLAGS_USE_DATALINK_NAME);
640 	if (err != 0)
641 		goto bail;
642 
643 	DTRACE_PROBE2(dls__primary__client, char *, dlp->dl_name, void *,
644 	    dlp->dl_mch);
645 
646 	*dlpp = dlp;
647 	return (0);
648 
649 bail:
650 	i_dls_link_destroy(dlp);
651 	return (err);
652 }
653 
654 /*
655  * Module initialization functions.
656  */
657 
658 void
659 dls_link_init(void)
660 {
661 	/*
662 	 * Create a kmem_cache of dls_link_t structures.
663 	 */
664 	i_dls_link_cachep = kmem_cache_create("dls_link_cache",
665 	    sizeof (dls_link_t), 0, i_dls_link_constructor,
666 	    i_dls_link_destructor, NULL, NULL, NULL, 0);
667 	ASSERT(i_dls_link_cachep != NULL);
668 
669 	/*
670 	 * Create a dls_link_t hash table and associated lock.
671 	 */
672 	i_dls_link_hash = mod_hash_create_extended("dls_link_hash",
673 	    IMPL_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor,
674 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
675 	i_dls_link_count = 0;
676 }
677 
678 int
679 dls_link_fini(void)
680 {
681 	if (i_dls_link_count > 0)
682 		return (EBUSY);
683 
684 	/*
685 	 * Destroy the kmem_cache.
686 	 */
687 	kmem_cache_destroy(i_dls_link_cachep);
688 
689 	/*
690 	 * Destroy the hash table and associated lock.
691 	 */
692 	mod_hash_destroy_hash(i_dls_link_hash);
693 	return (0);
694 }
695 
696 /*
697  * Exported functions.
698  */
699 
700 static int
701 dls_link_hold_common(const char *name, dls_link_t **dlpp, boolean_t create)
702 {
703 	dls_link_t		*dlp;
704 	int			err;
705 
706 	/*
707 	 * Look up a dls_link_t corresponding to the given macname in the
708 	 * global hash table. The i_dls_link_hash itself is protected by the
709 	 * mod_hash package's internal lock which synchronizes
710 	 * find/insert/remove into the global mod_hash list. Assumes that
711 	 * inserts and removes are single threaded on a per mac end point
712 	 * by the mac perimeter.
713 	 */
714 	if ((err = mod_hash_find(i_dls_link_hash, (mod_hash_key_t)name,
715 	    (mod_hash_val_t *)&dlp)) == 0)
716 		goto done;
717 
718 	ASSERT(err == MH_ERR_NOTFOUND);
719 	if (!create)
720 		return (ENOENT);
721 
722 	/*
723 	 * We didn't find anything so we need to create one.
724 	 */
725 	if ((err = i_dls_link_create(name, &dlp)) != 0)
726 		return (err);
727 
728 	/*
729 	 * Insert the dls_link_t.
730 	 */
731 	err = mod_hash_insert(i_dls_link_hash, (mod_hash_key_t)dlp->dl_name,
732 	    (mod_hash_val_t)dlp);
733 	ASSERT(err == 0);
734 
735 	atomic_add_32(&i_dls_link_count, 1);
736 	ASSERT(i_dls_link_count != 0);
737 
738 done:
739 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
740 	/*
741 	 * Bump the reference count and hand back the reference.
742 	 */
743 	dlp->dl_ref++;
744 	*dlpp = dlp;
745 	return (0);
746 }
747 
748 int
749 dls_link_hold_create(const char *name, dls_link_t **dlpp)
750 {
751 	return (dls_link_hold_common(name, dlpp, B_TRUE));
752 }
753 
754 int
755 dls_link_hold(const char *name, dls_link_t **dlpp)
756 {
757 	return (dls_link_hold_common(name, dlpp, B_FALSE));
758 }
759 
760 dev_info_t *
761 dls_link_devinfo(dev_t dev)
762 {
763 	dls_link_t	*dlp;
764 	dev_info_t	*dip;
765 	char	macname[MAXNAMELEN];
766 	char	*drv;
767 	mac_perim_handle_t	mph;
768 
769 	if ((drv = ddi_major_to_name(getmajor(dev))) == NULL)
770 		return (NULL);
771 	(void) snprintf(macname, MAXNAMELEN, "%s%d", drv,
772 	    DLS_MINOR2INST(getminor(dev)));
773 
774 	/*
775 	 * The code below assumes that the name constructed above is the
776 	 * macname. This is not the case for legacy devices. Currently this
777 	 * is ok because this function is only called in the getinfo(9e) path,
778 	 * which for a legacy device would directly end up in the driver's
779 	 * getinfo, rather than here
780 	 */
781 	if (mac_perim_enter_by_macname(macname, &mph) != 0)
782 		return (NULL);
783 
784 	if (dls_link_hold(macname, &dlp) != 0) {
785 		mac_perim_exit(mph);
786 		return (NULL);
787 	}
788 
789 	dip = mac_devinfo_get(dlp->dl_mh);
790 	dls_link_rele(dlp);
791 	mac_perim_exit(mph);
792 
793 	return (dip);
794 }
795 
796 dev_t
797 dls_link_dev(dls_link_t *dlp)
798 {
799 	return (makedevice(ddi_driver_major(mac_devinfo_get(dlp->dl_mh)),
800 	    mac_minor(dlp->dl_mh)));
801 }
802 
803 void
804 dls_link_rele(dls_link_t *dlp)
805 {
806 	mod_hash_val_t	val;
807 
808 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
809 	/*
810 	 * Check if there are any more references.
811 	 */
812 	if (--dlp->dl_ref == 0) {
813 		(void) mod_hash_remove(i_dls_link_hash,
814 		    (mod_hash_key_t)dlp->dl_name, &val);
815 		ASSERT(dlp == (dls_link_t *)val);
816 
817 		/*
818 		 * Destroy the dls_link_t.
819 		 */
820 		i_dls_link_destroy(dlp);
821 		ASSERT(i_dls_link_count > 0);
822 		atomic_add_32(&i_dls_link_count, -1);
823 	}
824 }
825 
826 int
827 dls_link_rele_by_name(const char *name)
828 {
829 	dls_link_t		*dlp;
830 
831 	if (mod_hash_find(i_dls_link_hash, (mod_hash_key_t)name,
832 	    (mod_hash_val_t *)&dlp) != 0)
833 		return (ENOENT);
834 
835 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
836 
837 	/*
838 	 * Must fail detach if mac client is busy.
839 	 */
840 	ASSERT(dlp->dl_ref > 0 && dlp->dl_mch != NULL);
841 	if (mac_link_has_flows(dlp->dl_mch))
842 		return (ENOTEMPTY);
843 
844 	dls_link_rele(dlp);
845 	return (0);
846 }
847 
848 int
849 dls_link_setzid(const char *name, zoneid_t zid)
850 {
851 	dls_link_t	*dlp;
852 	int		err = 0;
853 	zoneid_t	old_zid;
854 
855 	if ((err = dls_link_hold_create(name, &dlp)) != 0)
856 		return (err);
857 
858 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
859 
860 	if ((old_zid = dlp->dl_zid) == zid)
861 		goto done;
862 
863 	/*
864 	 * Check whether this dlp is used by its own zone.  If yes, we cannot
865 	 * change its zoneid.
866 	 */
867 	if (dlp->dl_zone_ref != 0) {
868 		err = EBUSY;
869 		goto done;
870 	}
871 
872 	dlp->dl_zid = zid;
873 
874 	if (zid == GLOBAL_ZONEID) {
875 		/*
876 		 * The link is moving from a non-global zone to the global
877 		 * zone, so we need to release the reference that was held
878 		 * when the link was originally assigned to the non-global
879 		 * zone.
880 		 */
881 		dls_link_rele(dlp);
882 	}
883 
884 done:
885 	/*
886 	 * We only keep the reference to this link open if the link has
887 	 * successfully moved from the global zone to a non-global zone.
888 	 */
889 	if (err != 0 || old_zid != GLOBAL_ZONEID)
890 		dls_link_rele(dlp);
891 	return (err);
892 }
893 
894 int
895 dls_link_getzid(const char *name, zoneid_t *zidp)
896 {
897 	dls_link_t	*dlp;
898 	int		err = 0;
899 
900 	if ((err = dls_link_hold(name, &dlp)) != 0)
901 		return (err);
902 
903 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
904 
905 	*zidp = dlp->dl_zid;
906 
907 	dls_link_rele(dlp);
908 	return (0);
909 }
910 
911 void
912 dls_link_add(dls_link_t *dlp, uint32_t sap, dld_str_t *dsp)
913 {
914 	mod_hash_t	*hash = dlp->dl_str_hash;
915 	mod_hash_key_t	key;
916 	dls_head_t	*dhp;
917 	dld_str_t	*p;
918 	int		err;
919 
920 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
921 
922 	/*
923 	 * Generate a hash key based on the sap.
924 	 */
925 	key = MAKE_KEY(sap);
926 
927 	/*
928 	 * Search the table for a list head with this key.
929 	 */
930 	if ((err = mod_hash_find(hash, key, (mod_hash_val_t *)&dhp)) != 0) {
931 		ASSERT(err == MH_ERR_NOTFOUND);
932 
933 		dhp = i_dls_head_alloc(key);
934 		err = mod_hash_insert(hash, key, (mod_hash_val_t)dhp);
935 		ASSERT(err == 0);
936 	}
937 
938 	/*
939 	 * Add the dld_str_t to the head of the list. List walkers in
940 	 * i_dls_link_rx_* bump up dh_ref to ensure the list does not change
941 	 * while they walk the list. The membar below ensures that list walkers
942 	 * see exactly the old list or the new list.
943 	 */
944 	ASSERT(dsp->ds_next == NULL);
945 	p = dhp->dh_list;
946 	dsp->ds_next = p;
947 
948 	membar_producer();
949 
950 	dhp->dh_list = dsp;
951 
952 	/*
953 	 * Save a pointer to the list head.
954 	 */
955 	dsp->ds_head = dhp;
956 	dlp->dl_impl_count++;
957 }
958 
959 void
960 dls_link_remove(dls_link_t *dlp, dld_str_t *dsp)
961 {
962 	mod_hash_t	*hash = dlp->dl_str_hash;
963 	dld_str_t	**pp;
964 	dld_str_t	*p;
965 	dls_head_t	*dhp;
966 
967 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
968 
969 	/*
970 	 * We set dh_removing here to tell the receive callbacks not to pass
971 	 * up packets anymore. Then wait till the current callbacks are done.
972 	 * This happens either in the close path or in processing the
973 	 * DL_UNBIND_REQ via a taskq thread, and it is ok to cv_wait in either.
974 	 * The dh_ref ensures there aren't and there won't be any upcalls
975 	 * walking or using the dh_list. The mod hash internal lock ensures
976 	 * that the insert/remove of the dls_head_t itself synchronizes with
977 	 * any i_dls_link_rx trying to locate it. The perimeter ensures that
978 	 * there isn't another simultaneous dls_link_add/remove.
979 	 */
980 	dhp = dsp->ds_head;
981 
982 	mutex_enter(&dhp->dh_lock);
983 	dhp->dh_removing = B_TRUE;
984 	while (dhp->dh_ref != 0)
985 		cv_wait(&dhp->dh_cv, &dhp->dh_lock);
986 	mutex_exit(&dhp->dh_lock);
987 
988 	/*
989 	 * Walk the list and remove the dld_str_t.
990 	 */
991 	for (pp = &dhp->dh_list; (p = *pp) != NULL; pp = &(p->ds_next)) {
992 		if (p == dsp)
993 			break;
994 	}
995 	ASSERT(p != NULL);
996 	*pp = p->ds_next;
997 	p->ds_next = NULL;
998 	p->ds_head = NULL;
999 
1000 	ASSERT(dlp->dl_impl_count != 0);
1001 	dlp->dl_impl_count--;
1002 
1003 	if (dhp->dh_list == NULL) {
1004 		mod_hash_val_t	val = NULL;
1005 
1006 		/*
1007 		 * The list is empty so remove the hash table entry.
1008 		 */
1009 		(void) mod_hash_remove(hash, dhp->dh_key, &val);
1010 		ASSERT(dhp == (dls_head_t *)val);
1011 		i_dls_head_free(dhp);
1012 	} else {
1013 		mutex_enter(&dhp->dh_lock);
1014 		dhp->dh_removing = B_FALSE;
1015 		mutex_exit(&dhp->dh_lock);
1016 	}
1017 }
1018