xref: /illumos-gate/usr/src/uts/common/io/dls/dls_link.c (revision d70bcb7258b79267aad36309c42fd499e844458f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  * Copyright 2017 Joyent, Inc.
25  */
26 
27 /*
28  * Data-Link Services Module
29  */
30 
31 #include	<sys/sysmacros.h>
32 #include	<sys/strsubr.h>
33 #include	<sys/strsun.h>
34 #include	<sys/vlan.h>
35 #include	<sys/dld_impl.h>
36 #include	<sys/sdt.h>
37 #include	<sys/atomic.h>
38 
39 static kmem_cache_t	*i_dls_link_cachep;
40 mod_hash_t		*i_dls_link_hash;
41 static uint_t		i_dls_link_count;
42 
43 #define		LINK_HASHSZ	67	/* prime */
44 #define		IMPL_HASHSZ	67	/* prime */
45 
46 /*
47  * Construct a hash key from the DLSAP value.
48  */
49 #define	MAKE_KEY(_sap)						\
50 	((mod_hash_key_t)(uintptr_t)((_sap) << VLAN_ID_SIZE))
51 
52 #define	DLS_STRIP_PADDING(pktsize, p) {			\
53 	if (pktsize != 0) {				\
54 		ssize_t delta = pktsize - msgdsize(p);	\
55 							\
56 		if (delta < 0)				\
57 			(void) adjmsg(p, delta);	\
58 	}						\
59 }
60 
61 /*
62  * Private functions.
63  */
64 
65 /*ARGSUSED*/
66 static int
67 i_dls_link_constructor(void *buf, void *arg, int kmflag)
68 {
69 	dls_link_t	*dlp = buf;
70 	char		name[MAXNAMELEN];
71 
72 	bzero(buf, sizeof (dls_link_t));
73 
74 	(void) snprintf(name, MAXNAMELEN, "dls_link_t_%p_hash", buf);
75 	dlp->dl_str_hash = mod_hash_create_idhash(name, IMPL_HASHSZ,
76 	    mod_hash_null_valdtor);
77 
78 	return (0);
79 }
80 
81 /*ARGSUSED*/
82 static void
83 i_dls_link_destructor(void *buf, void *arg)
84 {
85 	dls_link_t	*dlp = buf;
86 
87 	ASSERT(dlp->dl_ref == 0);
88 	ASSERT(dlp->dl_mh == NULL);
89 	ASSERT(dlp->dl_mah == NULL);
90 	ASSERT(dlp->dl_unknowns == 0);
91 
92 	mod_hash_destroy_idhash(dlp->dl_str_hash);
93 	dlp->dl_str_hash = NULL;
94 
95 }
96 
97 /*
98  * - Parse the mac header information of the given packet.
99  * - Strip the padding and skip over the header. Note that because some
100  *   DLS consumers only check the db_ref count of the first mblk, we
101  *   pullup the message into a single mblk. Because the original message
102  *   is freed as the result of message pulling up, mac_vlan_header_info()
103  *   is called again to update the mhi_saddr and mhi_daddr pointers in the
104  *   mhip. Further, the mac_vlan_header_info() function ensures that the
105  *   size of the pulled message is greater than the MAC header size,
106  *   therefore we can directly advance b_rptr to point at the payload.
107  *
108  * We choose to use a macro for performance reasons.
109  */
110 #define	DLS_PREPARE_PKT(mh, mp, mhip, err) {				\
111 	mblk_t *nextp = (mp)->b_next;					\
112 	if (((err) = mac_vlan_header_info((mh), (mp), (mhip))) == 0) {	\
113 		DLS_STRIP_PADDING((mhip)->mhi_pktsize, (mp));		\
114 		if (MBLKL((mp)) < (mhip)->mhi_hdrsize) {		\
115 			mblk_t *newmp;					\
116 			if ((newmp = msgpullup((mp), -1)) == NULL) {	\
117 				(err) = EINVAL;				\
118 			} else {					\
119 				(mp)->b_next = NULL;			\
120 				freemsg((mp));				\
121 				(mp) = newmp;				\
122 				VERIFY(mac_vlan_header_info((mh),	\
123 				    (mp), (mhip)) == 0);		\
124 				(mp)->b_next = nextp;			\
125 				(mp)->b_rptr += (mhip)->mhi_hdrsize;	\
126 			}						\
127 		} else {						\
128 			(mp)->b_rptr += (mhip)->mhi_hdrsize;		\
129 		}							\
130 	}								\
131 }
132 
133 /*
134  * Truncate the chain starting at mp such that all packets in the chain
135  * have identical source and destination addresses, saps, and tag types
136  * (see below).  It returns a pointer to the mblk following the chain,
137  * NULL if there is no further packet following the processed chain.
138  * The countp argument is set to the number of valid packets in the chain.
139  * Note that the whole MAC header (including the VLAN tag if any) in each
140  * packet will be stripped.
141  */
142 static mblk_t *
143 i_dls_link_subchain(dls_link_t *dlp, mblk_t *mp, const mac_header_info_t *mhip,
144     uint_t *countp)
145 {
146 	mblk_t		*prevp;
147 	uint_t		npacket = 1;
148 	size_t		addr_size = dlp->dl_mip->mi_addr_length;
149 	uint16_t	vid = VLAN_ID(mhip->mhi_tci);
150 	uint16_t	pri = VLAN_PRI(mhip->mhi_tci);
151 
152 	/*
153 	 * Compare with subsequent headers until we find one that has
154 	 * differing header information. After checking each packet
155 	 * strip padding and skip over the header.
156 	 */
157 	for (prevp = mp; (mp = mp->b_next) != NULL; prevp = mp) {
158 		mac_header_info_t cmhi;
159 		uint16_t cvid, cpri;
160 		int err;
161 
162 		DLS_PREPARE_PKT(dlp->dl_mh, mp, &cmhi, err);
163 		if (err != 0)
164 			break;
165 
166 		prevp->b_next = mp;
167 
168 		/*
169 		 * The source, destination, sap, vlan tag must all match in
170 		 * a given subchain.
171 		 */
172 		if (mhip->mhi_saddr == NULL || cmhi.mhi_saddr == NULL ||
173 		    memcmp(mhip->mhi_daddr, cmhi.mhi_daddr, addr_size) != 0 ||
174 		    memcmp(mhip->mhi_saddr, cmhi.mhi_saddr, addr_size) != 0 ||
175 		    mhip->mhi_bindsap != cmhi.mhi_bindsap) {
176 			/*
177 			 * Note that we don't need to restore the padding.
178 			 */
179 			mp->b_rptr -= cmhi.mhi_hdrsize;
180 			break;
181 		}
182 
183 		cvid = VLAN_ID(cmhi.mhi_tci);
184 		cpri = VLAN_PRI(cmhi.mhi_tci);
185 
186 		/*
187 		 * There are several types of packets. Packets don't match
188 		 * if they are classified to different type or if they are
189 		 * VLAN packets but belong to different VLANs:
190 		 *
191 		 * packet type		tagged		vid		pri
192 		 * ---------------------------------------------------------
193 		 * untagged		No		zero		zero
194 		 * VLAN packets		Yes		non-zero	-
195 		 * priority tagged	Yes		zero		non-zero
196 		 * 0 tagged		Yes		zero		zero
197 		 */
198 		if ((mhip->mhi_istagged != cmhi.mhi_istagged) ||
199 		    (vid != cvid) || ((vid == VLAN_ID_NONE) &&
200 		    (((pri == 0) && (cpri != 0)) ||
201 		    ((pri != 0) && (cpri == 0))))) {
202 			mp->b_rptr -= cmhi.mhi_hdrsize;
203 			break;
204 		}
205 
206 		npacket++;
207 	}
208 
209 	/*
210 	 * Break the chain at this point and return a pointer to the next
211 	 * sub-chain.
212 	 */
213 	prevp->b_next = NULL;
214 	*countp = npacket;
215 	return (mp);
216 }
217 
218 /* ARGSUSED */
219 static int
220 i_dls_head_hold(mod_hash_key_t key, mod_hash_val_t val)
221 {
222 	dls_head_t *dhp = (dls_head_t *)val;
223 
224 	/*
225 	 * The lock order is  mod_hash's internal lock -> dh_lock as in the
226 	 * call to i_dls_link_rx -> mod_hash_find_cb_rval -> i_dls_head_hold
227 	 */
228 	mutex_enter(&dhp->dh_lock);
229 	if (dhp->dh_removing) {
230 		mutex_exit(&dhp->dh_lock);
231 		return (-1);
232 	}
233 	dhp->dh_ref++;
234 	mutex_exit(&dhp->dh_lock);
235 	return (0);
236 }
237 
238 void
239 i_dls_head_rele(dls_head_t *dhp)
240 {
241 	mutex_enter(&dhp->dh_lock);
242 	dhp->dh_ref--;
243 	if (dhp->dh_ref == 0 && dhp->dh_removing != 0)
244 		cv_broadcast(&dhp->dh_cv);
245 	mutex_exit(&dhp->dh_lock);
246 }
247 
248 static dls_head_t *
249 i_dls_head_alloc(mod_hash_key_t key)
250 {
251 	dls_head_t	*dhp;
252 
253 	dhp = kmem_zalloc(sizeof (dls_head_t), KM_SLEEP);
254 	dhp->dh_key = key;
255 	return (dhp);
256 }
257 
258 static void
259 i_dls_head_free(dls_head_t *dhp)
260 {
261 	ASSERT(dhp->dh_ref == 0);
262 	kmem_free(dhp, sizeof (dls_head_t));
263 }
264 
265 /*
266  * Try to send mp up to the streams of the given sap. Return the
267  * number of streams which accepted this message, or 0 if no streams
268  * accepted the message.
269  *
270  * Note that this function copies the message chain and the original
271  * mp remains valid after this function returns.
272  */
273 static uint_t
274 i_dls_link_rx_func(dls_link_t *dlp, mac_resource_handle_t mrh,
275     mac_header_info_t *mhip, mblk_t *mp, uint32_t sap,
276     boolean_t (*acceptfunc)())
277 {
278 	mod_hash_t	*hash = dlp->dl_str_hash;
279 	mod_hash_key_t	key;
280 	dls_head_t	*dhp;
281 	dld_str_t	*dsp;
282 	mblk_t		*nmp;
283 	dls_rx_t	ds_rx;
284 	void		*ds_rx_arg;
285 	uint_t		naccepted = 0;
286 	int		rval;
287 
288 	/*
289 	 * Construct a hash key from the DLSAP.
290 	 */
291 	key = MAKE_KEY(sap);
292 
293 	/*
294 	 * Search the hash table for a dld_str_t eligible to receive a
295 	 * packet chain for this DLSAP. The mod hash's internal lock
296 	 * serializes find/insert/remove from the mod hash list.
297 	 * Incrementing the dh_ref (while holding the mod hash lock)
298 	 * ensures dls_link_remove will wait for the upcall to finish.
299 	 */
300 	if (mod_hash_find_cb_rval(hash, key, (mod_hash_val_t *)&dhp,
301 	    i_dls_head_hold, &rval) != 0 || (rval != 0)) {
302 		return (0);
303 	}
304 
305 	/*
306 	 * Find all dld_str_t that will accept the sub-chain.
307 	 */
308 	for (dsp = dhp->dh_list; dsp != NULL; dsp = dsp->ds_next) {
309 		if (!acceptfunc(dsp, mhip, &ds_rx, &ds_rx_arg))
310 			continue;
311 
312 		/*
313 		 * We have at least one acceptor.
314 		 */
315 		naccepted++;
316 
317 		/*
318 		 * There will normally be at least one more dld_str_t
319 		 * (since we've yet to check for non-promiscuous
320 		 * dld_str_t) so dup the sub-chain.
321 		 */
322 		if ((nmp = copymsgchain(mp)) != NULL)
323 			ds_rx(ds_rx_arg, mrh, nmp, mhip);
324 	}
325 
326 	/*
327 	 * Release the hold on the dld_str_t chain now that we have
328 	 * finished walking it.
329 	 */
330 	i_dls_head_rele(dhp);
331 	return (naccepted);
332 }
333 
334 /* ARGSUSED */
335 void
336 i_dls_link_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
337     boolean_t loopback)
338 {
339 	dls_link_t			*dlp = arg;
340 	mod_hash_t			*hash = dlp->dl_str_hash;
341 	mblk_t				*nextp;
342 	mac_header_info_t		mhi;
343 	dls_head_t			*dhp;
344 	dld_str_t			*dsp;
345 	dld_str_t			*ndsp;
346 	mblk_t				*nmp;
347 	mod_hash_key_t			key;
348 	uint_t				npacket;
349 	boolean_t			accepted;
350 	dls_rx_t			ds_rx, nds_rx;
351 	void				*ds_rx_arg, *nds_rx_arg;
352 	uint16_t			vid;
353 	int				err, rval;
354 
355 	/*
356 	 * Walk the packet chain.
357 	 */
358 	for (; mp != NULL; mp = nextp) {
359 		/*
360 		 * Wipe the accepted state.
361 		 */
362 		accepted = B_FALSE;
363 
364 		DLS_PREPARE_PKT(dlp->dl_mh, mp, &mhi, err);
365 		if (err != 0) {
366 			atomic_inc_32(&(dlp->dl_unknowns));
367 			nextp = mp->b_next;
368 			mp->b_next = NULL;
369 			freemsg(mp);
370 			continue;
371 		}
372 
373 		/*
374 		 * Grab the longest sub-chain we can process as a single
375 		 * unit.
376 		 */
377 		nextp = i_dls_link_subchain(dlp, mp, &mhi, &npacket);
378 		ASSERT(npacket != 0);
379 
380 		vid = VLAN_ID(mhi.mhi_tci);
381 
382 		if (mhi.mhi_istagged) {
383 			/*
384 			 * If it is tagged traffic, send it upstream to
385 			 * all dld_str_t which are attached to the physical
386 			 * link and bound to SAP 0x8100.
387 			 */
388 			if (i_dls_link_rx_func(dlp, mrh, &mhi, mp,
389 			    ETHERTYPE_VLAN, dls_accept) > 0) {
390 				accepted = B_TRUE;
391 			}
392 
393 			/*
394 			 * Don't pass the packets up if they are tagged
395 			 * packets and:
396 			 *  - their VID and priority are both zero and the
397 			 *    original packet isn't using the PVID (invalid
398 			 *    packets).
399 			 *  - their sap is ETHERTYPE_VLAN and their VID is
400 			 *    zero as they have already been sent upstreams.
401 			 */
402 			if ((vid == VLAN_ID_NONE && !mhi.mhi_ispvid &&
403 			    VLAN_PRI(mhi.mhi_tci) == 0) ||
404 			    (mhi.mhi_bindsap == ETHERTYPE_VLAN &&
405 			    vid == VLAN_ID_NONE)) {
406 				freemsgchain(mp);
407 				goto loop;
408 			}
409 		}
410 
411 		/*
412 		 * Construct a hash key from the DLSAP.
413 		 */
414 		key = MAKE_KEY(mhi.mhi_bindsap);
415 
416 		/*
417 		 * Search the hash table for dld_str_t eligible to receive
418 		 * a packet chain for this DLSAP.
419 		 */
420 		if (mod_hash_find_cb_rval(hash, key, (mod_hash_val_t *)&dhp,
421 		    i_dls_head_hold, &rval) != 0 || (rval != 0)) {
422 			freemsgchain(mp);
423 			goto loop;
424 		}
425 
426 		/*
427 		 * Find the first dld_str_t that will accept the sub-chain.
428 		 */
429 		for (dsp = dhp->dh_list; dsp != NULL; dsp = dsp->ds_next)
430 			if (dls_accept(dsp, &mhi, &ds_rx, &ds_rx_arg))
431 				break;
432 
433 		/*
434 		 * If we did not find any dld_str_t willing to accept the
435 		 * sub-chain then throw it away.
436 		 */
437 		if (dsp == NULL) {
438 			i_dls_head_rele(dhp);
439 			freemsgchain(mp);
440 			goto loop;
441 		}
442 
443 		/*
444 		 * We have at least one acceptor.
445 		 */
446 		accepted = B_TRUE;
447 		for (;;) {
448 			/*
449 			 * Find the next dld_str_t that will accept the
450 			 * sub-chain.
451 			 */
452 			for (ndsp = dsp->ds_next; ndsp != NULL;
453 			    ndsp = ndsp->ds_next)
454 				if (dls_accept(ndsp, &mhi, &nds_rx,
455 				    &nds_rx_arg))
456 					break;
457 
458 			/*
459 			 * If there are no more dld_str_t that are willing
460 			 * to accept the sub-chain then we don't need to dup
461 			 * it before handing it to the current one.
462 			 */
463 			if (ndsp == NULL) {
464 				ds_rx(ds_rx_arg, mrh, mp, &mhi);
465 
466 				/*
467 				 * Since there are no more dld_str_t, we're
468 				 * done.
469 				 */
470 				break;
471 			}
472 
473 			/*
474 			 * There are more dld_str_t so dup the sub-chain.
475 			 */
476 			if ((nmp = copymsgchain(mp)) != NULL)
477 				ds_rx(ds_rx_arg, mrh, nmp, &mhi);
478 
479 			dsp = ndsp;
480 			ds_rx = nds_rx;
481 			ds_rx_arg = nds_rx_arg;
482 		}
483 
484 		/*
485 		 * Release the hold on the dld_str_t chain now that we have
486 		 * finished walking it.
487 		 */
488 		i_dls_head_rele(dhp);
489 
490 loop:
491 		/*
492 		 * If there were no acceptors then add the packet count to the
493 		 * 'unknown' count.
494 		 */
495 		if (!accepted)
496 			atomic_add_32(&(dlp->dl_unknowns), npacket);
497 	}
498 }
499 
500 /* ARGSUSED */
501 void
502 dls_rx_vlan_promisc(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
503     boolean_t loopback)
504 {
505 	dld_str_t			*dsp = arg;
506 	dls_link_t			*dlp = dsp->ds_dlp;
507 	mac_header_info_t		mhi;
508 	dls_rx_t			ds_rx;
509 	void				*ds_rx_arg;
510 	int				err;
511 
512 	DLS_PREPARE_PKT(dlp->dl_mh, mp, &mhi, err);
513 	if (err != 0)
514 		goto drop;
515 
516 	/*
517 	 * If there is promiscuous handle for vlan, we filter out the untagged
518 	 * pkts and pkts that are not for the primary unicast address.
519 	 */
520 	if (dsp->ds_vlan_mph != NULL) {
521 		uint8_t prim_addr[MAXMACADDRLEN];
522 		size_t	addr_length = dsp->ds_mip->mi_addr_length;
523 
524 		if (!(mhi.mhi_istagged))
525 			goto drop;
526 		ASSERT(dsp->ds_mh != NULL);
527 		mac_unicast_primary_get(dsp->ds_mh, (uint8_t *)prim_addr);
528 		if (memcmp(mhi.mhi_daddr, prim_addr, addr_length) != 0)
529 			goto drop;
530 
531 		if (!dls_accept(dsp, &mhi, &ds_rx, &ds_rx_arg))
532 			goto drop;
533 
534 		ds_rx(ds_rx_arg, NULL, mp, &mhi);
535 		return;
536 	}
537 
538 drop:
539 	atomic_inc_32(&dlp->dl_unknowns);
540 	freemsg(mp);
541 }
542 
543 /* ARGSUSED */
544 void
545 dls_rx_promisc(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
546     boolean_t loopback)
547 {
548 	dld_str_t			*dsp = arg;
549 	dls_link_t			*dlp = dsp->ds_dlp;
550 	mac_header_info_t		mhi;
551 	dls_rx_t			ds_rx;
552 	void				*ds_rx_arg;
553 	int				err;
554 	dls_head_t			*dhp;
555 	mod_hash_key_t			key;
556 
557 	DLS_PREPARE_PKT(dlp->dl_mh, mp, &mhi, err);
558 	if (err != 0)
559 		goto drop;
560 
561 	/*
562 	 * In order to filter out sap pkt that no dls channel listens, search
563 	 * the hash table trying to find a dld_str_t eligible to receive the pkt
564 	 */
565 	if ((dsp->ds_promisc & DLS_PROMISC_SAP) == 0) {
566 		key = MAKE_KEY(mhi.mhi_bindsap);
567 		if (mod_hash_find(dsp->ds_dlp->dl_str_hash, key,
568 		    (mod_hash_val_t *)&dhp) != 0)
569 			goto drop;
570 	}
571 
572 	if (!dls_accept_promisc(dsp, &mhi, &ds_rx, &ds_rx_arg, loopback))
573 		goto drop;
574 
575 	ds_rx(ds_rx_arg, NULL, mp, &mhi);
576 	return;
577 
578 drop:
579 	atomic_inc_32(&dlp->dl_unknowns);
580 	freemsg(mp);
581 }
582 
583 static void
584 i_dls_link_destroy(dls_link_t *dlp)
585 {
586 	ASSERT(dlp->dl_nactive == 0);
587 	ASSERT(dlp->dl_impl_count == 0);
588 	ASSERT(dlp->dl_zone_ref == 0);
589 
590 	/*
591 	 * Free the structure back to the cache.
592 	 */
593 	if (dlp->dl_mch != NULL)
594 		mac_client_close(dlp->dl_mch, 0);
595 
596 	if (dlp->dl_mh != NULL) {
597 		ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
598 		mac_close(dlp->dl_mh);
599 	}
600 
601 	dlp->dl_mh = NULL;
602 	dlp->dl_mch = NULL;
603 	dlp->dl_mip = NULL;
604 	dlp->dl_unknowns = 0;
605 	dlp->dl_nonip_cnt = 0;
606 	kmem_cache_free(i_dls_link_cachep, dlp);
607 }
608 
609 static int
610 i_dls_link_create(const char *name, dls_link_t **dlpp)
611 {
612 	dls_link_t		*dlp;
613 	int			err;
614 
615 	/*
616 	 * Allocate a new dls_link_t structure.
617 	 */
618 	dlp = kmem_cache_alloc(i_dls_link_cachep, KM_SLEEP);
619 
620 	/*
621 	 * Name the dls_link_t after the MAC interface it represents.
622 	 */
623 	(void) strlcpy(dlp->dl_name, name, sizeof (dlp->dl_name));
624 
625 	/*
626 	 * First reference; hold open the MAC interface.
627 	 */
628 	ASSERT(dlp->dl_mh == NULL);
629 	err = mac_open(dlp->dl_name, &dlp->dl_mh);
630 	if (err != 0)
631 		goto bail;
632 
633 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
634 	dlp->dl_mip = mac_info(dlp->dl_mh);
635 
636 	/* DLS is the "primary" MAC client */
637 	ASSERT(dlp->dl_mch == NULL);
638 
639 	err = mac_client_open(dlp->dl_mh, &dlp->dl_mch, NULL,
640 	    MAC_OPEN_FLAGS_USE_DATALINK_NAME);
641 	if (err != 0)
642 		goto bail;
643 
644 	DTRACE_PROBE2(dls__primary__client, char *, dlp->dl_name, void *,
645 	    dlp->dl_mch);
646 
647 	*dlpp = dlp;
648 	return (0);
649 
650 bail:
651 	i_dls_link_destroy(dlp);
652 	return (err);
653 }
654 
655 /*
656  * Module initialization functions.
657  */
658 
659 void
660 dls_link_init(void)
661 {
662 	/*
663 	 * Create a kmem_cache of dls_link_t structures.
664 	 */
665 	i_dls_link_cachep = kmem_cache_create("dls_link_cache",
666 	    sizeof (dls_link_t), 0, i_dls_link_constructor,
667 	    i_dls_link_destructor, NULL, NULL, NULL, 0);
668 	ASSERT(i_dls_link_cachep != NULL);
669 
670 	/*
671 	 * Create a dls_link_t hash table and associated lock.
672 	 */
673 	i_dls_link_hash = mod_hash_create_extended("dls_link_hash",
674 	    IMPL_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor,
675 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
676 	i_dls_link_count = 0;
677 }
678 
679 int
680 dls_link_fini(void)
681 {
682 	if (i_dls_link_count > 0)
683 		return (EBUSY);
684 
685 	/*
686 	 * Destroy the kmem_cache.
687 	 */
688 	kmem_cache_destroy(i_dls_link_cachep);
689 
690 	/*
691 	 * Destroy the hash table and associated lock.
692 	 */
693 	mod_hash_destroy_hash(i_dls_link_hash);
694 	return (0);
695 }
696 
697 /*
698  * Exported functions.
699  */
700 
701 static int
702 dls_link_hold_common(const char *name, dls_link_t **dlpp, boolean_t create)
703 {
704 	dls_link_t		*dlp;
705 	int			err;
706 
707 	/*
708 	 * Look up a dls_link_t corresponding to the given macname in the
709 	 * global hash table. The i_dls_link_hash itself is protected by the
710 	 * mod_hash package's internal lock which synchronizes
711 	 * find/insert/remove into the global mod_hash list. Assumes that
712 	 * inserts and removes are single threaded on a per mac end point
713 	 * by the mac perimeter.
714 	 */
715 	if ((err = mod_hash_find(i_dls_link_hash, (mod_hash_key_t)name,
716 	    (mod_hash_val_t *)&dlp)) == 0)
717 		goto done;
718 
719 	ASSERT(err == MH_ERR_NOTFOUND);
720 	if (!create)
721 		return (ENOENT);
722 
723 	/*
724 	 * We didn't find anything so we need to create one.
725 	 */
726 	if ((err = i_dls_link_create(name, &dlp)) != 0)
727 		return (err);
728 
729 	/*
730 	 * Insert the dls_link_t.
731 	 */
732 	err = mod_hash_insert(i_dls_link_hash, (mod_hash_key_t)dlp->dl_name,
733 	    (mod_hash_val_t)dlp);
734 	ASSERT(err == 0);
735 
736 	atomic_inc_32(&i_dls_link_count);
737 	ASSERT(i_dls_link_count != 0);
738 
739 done:
740 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
741 	/*
742 	 * Bump the reference count and hand back the reference.
743 	 */
744 	dlp->dl_ref++;
745 	*dlpp = dlp;
746 	return (0);
747 }
748 
749 int
750 dls_link_hold_create(const char *name, dls_link_t **dlpp)
751 {
752 	return (dls_link_hold_common(name, dlpp, B_TRUE));
753 }
754 
755 int
756 dls_link_hold(const char *name, dls_link_t **dlpp)
757 {
758 	return (dls_link_hold_common(name, dlpp, B_FALSE));
759 }
760 
761 dev_info_t *
762 dls_link_devinfo(dev_t dev)
763 {
764 	dls_link_t	*dlp;
765 	dev_info_t	*dip;
766 	char	macname[MAXNAMELEN];
767 	char	*drv;
768 	mac_perim_handle_t	mph;
769 
770 	if ((drv = ddi_major_to_name(getmajor(dev))) == NULL)
771 		return (NULL);
772 	(void) snprintf(macname, MAXNAMELEN, "%s%d", drv,
773 	    DLS_MINOR2INST(getminor(dev)));
774 
775 	/*
776 	 * The code below assumes that the name constructed above is the
777 	 * macname. This is not the case for legacy devices. Currently this
778 	 * is ok because this function is only called in the getinfo(9e) path,
779 	 * which for a legacy device would directly end up in the driver's
780 	 * getinfo, rather than here
781 	 */
782 	if (mac_perim_enter_by_macname(macname, &mph) != 0)
783 		return (NULL);
784 
785 	if (dls_link_hold(macname, &dlp) != 0) {
786 		mac_perim_exit(mph);
787 		return (NULL);
788 	}
789 
790 	dip = mac_devinfo_get(dlp->dl_mh);
791 	dls_link_rele(dlp);
792 	mac_perim_exit(mph);
793 
794 	return (dip);
795 }
796 
797 dev_t
798 dls_link_dev(dls_link_t *dlp)
799 {
800 	return (makedevice(ddi_driver_major(mac_devinfo_get(dlp->dl_mh)),
801 	    mac_minor(dlp->dl_mh)));
802 }
803 
804 void
805 dls_link_rele(dls_link_t *dlp)
806 {
807 	mod_hash_val_t	val;
808 
809 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
810 	/*
811 	 * Check if there are any more references.
812 	 */
813 	if (--dlp->dl_ref == 0) {
814 		(void) mod_hash_remove(i_dls_link_hash,
815 		    (mod_hash_key_t)dlp->dl_name, &val);
816 		ASSERT(dlp == (dls_link_t *)val);
817 
818 		/*
819 		 * Destroy the dls_link_t.
820 		 */
821 		i_dls_link_destroy(dlp);
822 		ASSERT(i_dls_link_count > 0);
823 		atomic_dec_32(&i_dls_link_count);
824 	}
825 }
826 
827 int
828 dls_link_rele_by_name(const char *name)
829 {
830 	dls_link_t		*dlp;
831 
832 	if (mod_hash_find(i_dls_link_hash, (mod_hash_key_t)name,
833 	    (mod_hash_val_t *)&dlp) != 0)
834 		return (ENOENT);
835 
836 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
837 
838 	/*
839 	 * Must fail detach if mac client is busy.
840 	 */
841 	ASSERT(dlp->dl_ref > 0 && dlp->dl_mch != NULL);
842 	if (mac_link_has_flows(dlp->dl_mch))
843 		return (ENOTEMPTY);
844 
845 	dls_link_rele(dlp);
846 	return (0);
847 }
848 
849 int
850 dls_link_setzid(const char *name, zoneid_t zid)
851 {
852 	dls_link_t	*dlp;
853 	int		err = 0;
854 	zoneid_t	old_zid;
855 
856 	if ((err = dls_link_hold_create(name, &dlp)) != 0)
857 		return (err);
858 
859 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
860 
861 	if ((old_zid = dlp->dl_zid) == zid)
862 		goto done;
863 
864 	/*
865 	 * Check whether this dlp is used by its own zone.  If yes, we cannot
866 	 * change its zoneid.
867 	 */
868 	if (dlp->dl_zone_ref != 0) {
869 		err = EBUSY;
870 		goto done;
871 	}
872 
873 	dlp->dl_zid = zid;
874 
875 	if (zid == GLOBAL_ZONEID) {
876 		/*
877 		 * The link is moving from a non-global zone to the global
878 		 * zone, so we need to release the reference that was held
879 		 * when the link was originally assigned to the non-global
880 		 * zone.
881 		 */
882 		dls_link_rele(dlp);
883 	}
884 
885 done:
886 	/*
887 	 * We only keep the reference to this link open if the link has
888 	 * successfully moved from the global zone to a non-global zone.
889 	 */
890 	if (err != 0 || old_zid != GLOBAL_ZONEID)
891 		dls_link_rele(dlp);
892 	return (err);
893 }
894 
895 int
896 dls_link_getzid(const char *name, zoneid_t *zidp)
897 {
898 	dls_link_t	*dlp;
899 	int		err = 0;
900 
901 	if ((err = dls_link_hold(name, &dlp)) != 0)
902 		return (err);
903 
904 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
905 
906 	*zidp = dlp->dl_zid;
907 
908 	dls_link_rele(dlp);
909 	return (0);
910 }
911 
912 void
913 dls_link_add(dls_link_t *dlp, uint32_t sap, dld_str_t *dsp)
914 {
915 	mod_hash_t	*hash = dlp->dl_str_hash;
916 	mod_hash_key_t	key;
917 	dls_head_t	*dhp;
918 	dld_str_t	*p;
919 	int		err;
920 
921 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
922 
923 	/*
924 	 * Generate a hash key based on the sap.
925 	 */
926 	key = MAKE_KEY(sap);
927 
928 	/*
929 	 * Search the table for a list head with this key.
930 	 */
931 	if ((err = mod_hash_find(hash, key, (mod_hash_val_t *)&dhp)) != 0) {
932 		ASSERT(err == MH_ERR_NOTFOUND);
933 
934 		dhp = i_dls_head_alloc(key);
935 		err = mod_hash_insert(hash, key, (mod_hash_val_t)dhp);
936 		ASSERT(err == 0);
937 	}
938 
939 	/*
940 	 * Add the dld_str_t to the head of the list. List walkers in
941 	 * i_dls_link_rx_* bump up dh_ref to ensure the list does not change
942 	 * while they walk the list. The membar below ensures that list walkers
943 	 * see exactly the old list or the new list.
944 	 */
945 	ASSERT(dsp->ds_next == NULL);
946 	p = dhp->dh_list;
947 	dsp->ds_next = p;
948 
949 	membar_producer();
950 
951 	dhp->dh_list = dsp;
952 
953 	/*
954 	 * Save a pointer to the list head.
955 	 */
956 	dsp->ds_head = dhp;
957 	dlp->dl_impl_count++;
958 }
959 
960 void
961 dls_link_remove(dls_link_t *dlp, dld_str_t *dsp)
962 {
963 	mod_hash_t	*hash = dlp->dl_str_hash;
964 	dld_str_t	**pp;
965 	dld_str_t	*p;
966 	dls_head_t	*dhp;
967 
968 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
969 
970 	/*
971 	 * We set dh_removing here to tell the receive callbacks not to pass
972 	 * up packets anymore. Then wait till the current callbacks are done.
973 	 * This happens either in the close path or in processing the
974 	 * DL_UNBIND_REQ via a taskq thread, and it is ok to cv_wait in either.
975 	 * The dh_ref ensures there aren't and there won't be any upcalls
976 	 * walking or using the dh_list. The mod hash internal lock ensures
977 	 * that the insert/remove of the dls_head_t itself synchronizes with
978 	 * any i_dls_link_rx trying to locate it. The perimeter ensures that
979 	 * there isn't another simultaneous dls_link_add/remove.
980 	 */
981 	dhp = dsp->ds_head;
982 
983 	mutex_enter(&dhp->dh_lock);
984 	dhp->dh_removing = B_TRUE;
985 	while (dhp->dh_ref != 0)
986 		cv_wait(&dhp->dh_cv, &dhp->dh_lock);
987 	mutex_exit(&dhp->dh_lock);
988 
989 	/*
990 	 * Walk the list and remove the dld_str_t.
991 	 */
992 	for (pp = &dhp->dh_list; (p = *pp) != NULL; pp = &(p->ds_next)) {
993 		if (p == dsp)
994 			break;
995 	}
996 	ASSERT(p != NULL);
997 	*pp = p->ds_next;
998 	p->ds_next = NULL;
999 	p->ds_head = NULL;
1000 
1001 	ASSERT(dlp->dl_impl_count != 0);
1002 	dlp->dl_impl_count--;
1003 
1004 	if (dhp->dh_list == NULL) {
1005 		mod_hash_val_t	val = NULL;
1006 
1007 		/*
1008 		 * The list is empty so remove the hash table entry.
1009 		 */
1010 		(void) mod_hash_remove(hash, dhp->dh_key, &val);
1011 		ASSERT(dhp == (dls_head_t *)val);
1012 		i_dls_head_free(dhp);
1013 	} else {
1014 		mutex_enter(&dhp->dh_lock);
1015 		dhp->dh_removing = B_FALSE;
1016 		mutex_exit(&dhp->dh_lock);
1017 	}
1018 }
1019