xref: /illumos-gate/usr/src/uts/common/io/dls/dls_mgmt.c (revision 1677a13522f801f59117c9fb50212af5fb87a872)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  * Copyright (c) 2017 Joyent, Inc.
25  */
26 /*
27  * Copyright (c) 2016 by Delphix. All rights reserved.
28  */
29 
30 /*
31  * Datalink management routines.
32  */
33 
34 #include <sys/types.h>
35 #include <sys/door.h>
36 #include <sys/zone.h>
37 #include <sys/modctl.h>
38 #include <sys/file.h>
39 #include <sys/modhash.h>
40 #include <sys/kstat.h>
41 #include <sys/vnode.h>
42 #include <sys/cmn_err.h>
43 #include <sys/softmac.h>
44 #include <sys/dls.h>
45 #include <sys/dls_impl.h>
46 #include <sys/stropts.h>
47 #include <sys/netstack.h>
48 #include <inet/iptun/iptun_impl.h>
49 
50 /*
51  * This vanity name management module is treated as part of the GLD framework
52  * and we don't hold any GLD framework lock across a call to any mac
53  * function that needs to acquire the mac perimeter. The hierarchy is
54  * mac perimeter -> framework locks
55  */
56 
57 typedef struct dls_stack {
58 	zoneid_t	dlss_zoneid;
59 } dls_stack_t;
60 
61 static kmem_cache_t	*i_dls_devnet_cachep;
62 static kmutex_t		i_dls_mgmt_lock;
63 static krwlock_t	i_dls_devnet_lock;
64 static mod_hash_t	*i_dls_devnet_id_hash;
65 static mod_hash_t	*i_dls_devnet_hash;
66 
67 boolean_t		devnet_need_rebuild;
68 
69 #define	VLAN_HASHSZ	67	/* prime */
70 
71 /*
72  * The following macros take a link name without the trailing PPA as input.
73  * Opening a /dev/net node with one of these names causes a tunnel link to be
74  * implicitly created in dls_devnet_hold_by_name() for backward compatibility
75  * with Solaris 10 and prior.
76  */
77 #define	IS_IPV4_TUN(name)	(strcmp((name), "ip.tun") == 0)
78 #define	IS_IPV6_TUN(name)	(strcmp((name), "ip6.tun") == 0)
79 #define	IS_6TO4_TUN(name)	(strcmp((name), "ip.6to4tun") == 0)
80 #define	IS_IPTUN_LINK(name)	(					\
81     IS_IPV4_TUN(name) || IS_IPV6_TUN(name) || IS_6TO4_TUN(name))
82 
83 /* Upcall door handle */
84 static door_handle_t	dls_mgmt_dh = NULL;
85 
86 /* dls_devnet_t dd_flags */
87 #define	DD_CONDEMNED		0x1
88 #define	DD_IMPLICIT_IPTUN	0x2 /* Implicitly-created ip*.*tun* tunnel */
89 #define	DD_INITIALIZING		0x4
90 
91 /*
92  * If the link is marked as initializing or condemned then it should
93  * not be visible outside of the DLS framework.
94  */
95 #define	DD_NOT_VISIBLE(flags)	(					\
96 	(flags & (DD_CONDEMNED | DD_INITIALIZING)) != 0)
97 
98 /*
99  * This structure is used to keep the <linkid, macname> mapping.
100  * This structure itself is not protected by the mac perimeter, but is
101  * protected by the dd_mutex and i_dls_devnet_lock. Thus most of the
102  * functions manipulating this structure such as dls_devnet_set/unset etc.
103  * may be called while not holding the mac perimeter.
104  */
105 typedef struct dls_devnet_s {
106 	datalink_id_t	dd_linkid;
107 	char		dd_linkname[MAXLINKNAMELEN];
108 	char		dd_mac[MAXNAMELEN];
109 	kstat_t		*dd_ksp;	/* kstat in owner_zid */
110 	kstat_t		*dd_zone_ksp;	/* in dd_zid if != owner_zid */
111 	uint32_t	dd_ref;
112 	kmutex_t	dd_mutex;
113 	kcondvar_t	dd_cv;
114 	uint32_t	dd_tref;
115 	uint_t		dd_flags;
116 	zoneid_t	dd_owner_zid;	/* zone where node was created */
117 	zoneid_t	dd_zid;		/* current zone */
118 	boolean_t	dd_prop_loaded;
119 	taskqid_t	dd_prop_taskid;
120 	boolean_t	dd_transient;	/* link goes away when zone does */
121 } dls_devnet_t;
122 
123 static int i_dls_devnet_create_iptun(const char *, const char *,
124     datalink_id_t *);
125 static int i_dls_devnet_destroy_iptun(datalink_id_t);
126 static int i_dls_devnet_setzid(dls_devnet_t *, zoneid_t, boolean_t, boolean_t);
127 static int dls_devnet_unset(mac_handle_t, datalink_id_t *, boolean_t);
128 
129 /*ARGSUSED*/
130 static int
131 i_dls_devnet_constructor(void *buf, void *arg, int kmflag)
132 {
133 	dls_devnet_t	*ddp = buf;
134 
135 	bzero(buf, sizeof (dls_devnet_t));
136 	mutex_init(&ddp->dd_mutex, NULL, MUTEX_DEFAULT, NULL);
137 	cv_init(&ddp->dd_cv, NULL, CV_DEFAULT, NULL);
138 	return (0);
139 }
140 
141 /*ARGSUSED*/
142 static void
143 i_dls_devnet_destructor(void *buf, void *arg)
144 {
145 	dls_devnet_t	*ddp = buf;
146 
147 	VERIFY(ddp->dd_ksp == NULL);
148 	VERIFY(ddp->dd_ref == 0);
149 	VERIFY(ddp->dd_tref == 0);
150 	mutex_destroy(&ddp->dd_mutex);
151 	cv_destroy(&ddp->dd_cv);
152 }
153 
154 /* ARGSUSED */
155 static int
156 dls_zone_remove(datalink_id_t linkid, void *arg)
157 {
158 	dls_devnet_t *ddp;
159 
160 	if (dls_devnet_hold_tmp(linkid, &ddp) == 0) {
161 		/*
162 		 * Don't bother moving transient links back to the global zone
163 		 * since we will simply delete them in dls_devnet_unset.
164 		 */
165 		if (!ddp->dd_transient)
166 			(void) dls_devnet_setzid(ddp, GLOBAL_ZONEID);
167 		dls_devnet_rele_tmp(ddp);
168 	}
169 	return (0);
170 }
171 
172 /* ARGSUSED */
173 static void *
174 dls_stack_init(netstackid_t stackid, netstack_t *ns)
175 {
176 	dls_stack_t *dlss;
177 
178 	dlss = kmem_zalloc(sizeof (*dlss), KM_SLEEP);
179 	dlss->dlss_zoneid = netstackid_to_zoneid(stackid);
180 	return (dlss);
181 }
182 
183 /* ARGSUSED */
184 static void
185 dls_stack_shutdown(netstackid_t stackid, void *arg)
186 {
187 	dls_stack_t	*dlss = (dls_stack_t *)arg;
188 
189 	/* Move remaining datalinks in this zone back to the global zone. */
190 	(void) zone_datalink_walk(dlss->dlss_zoneid, dls_zone_remove, NULL);
191 }
192 
193 /* ARGSUSED */
194 static void
195 dls_stack_fini(netstackid_t stackid, void *arg)
196 {
197 	dls_stack_t	*dlss = (dls_stack_t *)arg;
198 
199 	kmem_free(dlss, sizeof (*dlss));
200 }
201 
202 /*
203  * Module initialization and finalization functions.
204  */
205 void
206 dls_mgmt_init(void)
207 {
208 	mutex_init(&i_dls_mgmt_lock, NULL, MUTEX_DEFAULT, NULL);
209 	rw_init(&i_dls_devnet_lock, NULL, RW_DEFAULT, NULL);
210 
211 	/*
212 	 * Create a kmem_cache of dls_devnet_t structures.
213 	 */
214 	i_dls_devnet_cachep = kmem_cache_create("dls_devnet_cache",
215 	    sizeof (dls_devnet_t), 0, i_dls_devnet_constructor,
216 	    i_dls_devnet_destructor, NULL, NULL, NULL, 0);
217 	ASSERT(i_dls_devnet_cachep != NULL);
218 
219 	/*
220 	 * Create a hash table, keyed by dd_linkid, of dls_devnet_t.
221 	 */
222 	i_dls_devnet_id_hash = mod_hash_create_idhash("dls_devnet_id_hash",
223 	    VLAN_HASHSZ, mod_hash_null_valdtor);
224 
225 	/*
226 	 * Create a hash table, keyed by dd_mac
227 	 */
228 	i_dls_devnet_hash = mod_hash_create_extended("dls_devnet_hash",
229 	    VLAN_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor,
230 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
231 
232 	devnet_need_rebuild = B_FALSE;
233 
234 	netstack_register(NS_DLS, dls_stack_init, dls_stack_shutdown,
235 	    dls_stack_fini);
236 }
237 
238 void
239 dls_mgmt_fini(void)
240 {
241 	netstack_unregister(NS_DLS);
242 	mod_hash_destroy_hash(i_dls_devnet_hash);
243 	mod_hash_destroy_hash(i_dls_devnet_id_hash);
244 	kmem_cache_destroy(i_dls_devnet_cachep);
245 	rw_destroy(&i_dls_devnet_lock);
246 	mutex_destroy(&i_dls_mgmt_lock);
247 }
248 
249 int
250 dls_mgmt_door_set(boolean_t start)
251 {
252 	int	err;
253 
254 	/* handle daemon restart */
255 	mutex_enter(&i_dls_mgmt_lock);
256 	if (dls_mgmt_dh != NULL) {
257 		door_ki_rele(dls_mgmt_dh);
258 		dls_mgmt_dh = NULL;
259 	}
260 
261 	if (start && ((err = door_ki_open(DLMGMT_DOOR, &dls_mgmt_dh)) != 0)) {
262 		mutex_exit(&i_dls_mgmt_lock);
263 		return (err);
264 	}
265 
266 	mutex_exit(&i_dls_mgmt_lock);
267 
268 	/*
269 	 * Create and associate <link name, linkid> mapping for network devices
270 	 * which are already attached before the daemon is started.
271 	 */
272 	if (start)
273 		softmac_recreate();
274 	return (0);
275 }
276 
277 static boolean_t
278 i_dls_mgmt_door_revoked(door_handle_t dh)
279 {
280 	struct door_info info;
281 	extern int sys_shutdown;
282 
283 	ASSERT(dh != NULL);
284 
285 	if (sys_shutdown) {
286 		cmn_err(CE_NOTE, "dls_mgmt_door: shutdown observed\n");
287 		return (B_TRUE);
288 	}
289 
290 	if (door_ki_info(dh, &info) != 0)
291 		return (B_TRUE);
292 
293 	return ((info.di_attributes & DOOR_REVOKED) != 0);
294 }
295 
296 /*
297  * Upcall to the datalink management daemon (dlmgmtd).
298  */
299 static int
300 i_dls_mgmt_upcall(void *arg, size_t asize, void *rbuf, size_t rsize)
301 {
302 	door_arg_t			darg, save_arg;
303 	door_handle_t			dh;
304 	int				err;
305 	int				retry = 0;
306 
307 #define	MAXRETRYNUM	3
308 
309 	ASSERT(arg);
310 	darg.data_ptr = arg;
311 	darg.data_size = asize;
312 	darg.desc_ptr = NULL;
313 	darg.desc_num = 0;
314 	darg.rbuf = rbuf;
315 	darg.rsize = rsize;
316 	save_arg = darg;
317 
318 retry:
319 	mutex_enter(&i_dls_mgmt_lock);
320 	dh = dls_mgmt_dh;
321 	if ((dh == NULL) || i_dls_mgmt_door_revoked(dh)) {
322 		mutex_exit(&i_dls_mgmt_lock);
323 		return (EBADF);
324 	}
325 	door_ki_hold(dh);
326 	mutex_exit(&i_dls_mgmt_lock);
327 
328 	for (;;) {
329 		retry++;
330 		if ((err = door_ki_upcall_limited(dh, &darg, zone_kcred(),
331 		    SIZE_MAX, 0)) == 0)
332 			break;
333 
334 		/*
335 		 * handle door call errors
336 		 */
337 		darg = save_arg;
338 		switch (err) {
339 		case EINTR:
340 			/*
341 			 * If the operation which caused this door upcall gets
342 			 * interrupted, return directly.
343 			 */
344 			goto done;
345 		case EAGAIN:
346 			/*
347 			 * Repeat upcall if the maximum attempt limit has not
348 			 * been reached.
349 			 */
350 			if (retry < MAXRETRYNUM) {
351 				delay(2 * hz);
352 				break;
353 			}
354 			cmn_err(CE_WARN, "dls: dlmgmtd fatal error %d\n", err);
355 			goto done;
356 		default:
357 			/* A fatal door error */
358 			if (i_dls_mgmt_door_revoked(dh)) {
359 				cmn_err(CE_NOTE,
360 				    "dls: dlmgmtd door service revoked\n");
361 
362 				if (retry < MAXRETRYNUM) {
363 					door_ki_rele(dh);
364 					goto retry;
365 				}
366 			}
367 			cmn_err(CE_WARN, "dls: dlmgmtd fatal error %d\n", err);
368 			goto done;
369 		}
370 	}
371 
372 	if (darg.rbuf != rbuf) {
373 		/*
374 		 * The size of the input rbuf was not big enough, so the
375 		 * upcall allocated the rbuf itself.  If this happens, assume
376 		 * that this was an invalid door call request.
377 		 */
378 		kmem_free(darg.rbuf, darg.rsize);
379 		err = ENOSPC;
380 		goto done;
381 	}
382 
383 	if (darg.rsize != rsize) {
384 		err = EINVAL;
385 		goto done;
386 	}
387 
388 	err = ((dlmgmt_retval_t *)rbuf)->lr_err;
389 
390 done:
391 	door_ki_rele(dh);
392 	return (err);
393 }
394 
395 /*
396  * Request the datalink management daemon to create a link with the attributes
397  * below.  Upon success, zero is returned and linkidp contains the linkid for
398  * the new link; otherwise, an errno is returned.
399  *
400  *     - dev		physical dev_t.  required for all physical links,
401  *		        including GLDv3 links.  It will be used to force the
402  *		        attachment of a physical device, hence the
403  *		        registration of its mac
404  *     - class		datalink class
405  *     - media type	media type; DL_OTHER means unknown
406  *     - persist	whether to persist the datalink
407  */
408 int
409 dls_mgmt_create(const char *devname, dev_t dev, datalink_class_t class,
410     uint32_t media, boolean_t persist, datalink_id_t *linkidp)
411 {
412 	dlmgmt_upcall_arg_create_t	create;
413 	dlmgmt_create_retval_t		retval;
414 	int				err;
415 
416 	create.ld_cmd = DLMGMT_CMD_DLS_CREATE;
417 	create.ld_class = class;
418 	create.ld_media = media;
419 	create.ld_phymaj = getmajor(dev);
420 	create.ld_phyinst = getminor(dev);
421 	create.ld_persist = persist;
422 	if (strlcpy(create.ld_devname, devname, sizeof (create.ld_devname)) >=
423 	    sizeof (create.ld_devname))
424 		return (EINVAL);
425 
426 	if ((err = i_dls_mgmt_upcall(&create, sizeof (create), &retval,
427 	    sizeof (retval))) == 0) {
428 		*linkidp = retval.lr_linkid;
429 	}
430 	return (err);
431 }
432 
433 /*
434  * Request the datalink management daemon to destroy the specified link.
435  * Returns zero upon success, or an errno upon failure.
436  */
437 int
438 dls_mgmt_destroy(datalink_id_t linkid, boolean_t persist)
439 {
440 	dlmgmt_upcall_arg_destroy_t	destroy;
441 	dlmgmt_destroy_retval_t		retval;
442 
443 	destroy.ld_cmd = DLMGMT_CMD_DLS_DESTROY;
444 	destroy.ld_linkid = linkid;
445 	destroy.ld_persist = persist;
446 
447 	return (i_dls_mgmt_upcall(&destroy, sizeof (destroy),
448 	    &retval, sizeof (retval)));
449 }
450 
451 /*
452  * Request the datalink management daemon to verify/update the information
453  * for a physical link.  Upon success, get its linkid.
454  *
455  *     - media type	media type
456  *     - novanity	whether this physical datalink supports vanity naming.
457  *			physical links that do not use the GLDv3 MAC plugin
458  *			cannot suport vanity naming
459  *
460  * This function could fail with ENOENT or EEXIST.  Two cases return EEXIST:
461  *
462  * 1. A link with devname already exists, but the media type does not match.
463  *    In this case, mediap will bee set to the media type of the existing link.
464  * 2. A link with devname already exists, but its link name does not match
465  *    the device name, although this link does not support vanity naming.
466  */
467 int
468 dls_mgmt_update(const char *devname, uint32_t media, boolean_t novanity,
469     uint32_t *mediap, datalink_id_t *linkidp)
470 {
471 	dlmgmt_upcall_arg_update_t	update;
472 	dlmgmt_update_retval_t		retval;
473 	int				err;
474 
475 	update.ld_cmd = DLMGMT_CMD_DLS_UPDATE;
476 
477 	if (strlcpy(update.ld_devname, devname, sizeof (update.ld_devname)) >=
478 	    sizeof (update.ld_devname))
479 		return (EINVAL);
480 
481 	update.ld_media = media;
482 	update.ld_novanity = novanity;
483 
484 	if ((err = i_dls_mgmt_upcall(&update, sizeof (update), &retval,
485 	    sizeof (retval))) == EEXIST) {
486 		*linkidp = retval.lr_linkid;
487 		*mediap = retval.lr_media;
488 	} else if (err == 0) {
489 		*linkidp = retval.lr_linkid;
490 	}
491 
492 	return (err);
493 }
494 
495 /*
496  * Request the datalink management daemon to get the information for a link.
497  * Returns zero upon success, or an errno upon failure.
498  *
499  * Only fills in information for argument pointers that are non-NULL.
500  * Note that the link argument is expected to be MAXLINKNAMELEN bytes.
501  */
502 int
503 dls_mgmt_get_linkinfo(datalink_id_t linkid, char *link,
504     datalink_class_t *classp, uint32_t *mediap, uint32_t *flagsp)
505 {
506 	dlmgmt_door_getname_t	getname;
507 	dlmgmt_getname_retval_t	retval;
508 	int			err, len;
509 
510 	getname.ld_cmd = DLMGMT_CMD_GETNAME;
511 	getname.ld_linkid = linkid;
512 
513 	if ((err = i_dls_mgmt_upcall(&getname, sizeof (getname), &retval,
514 	    sizeof (retval))) != 0) {
515 		return (err);
516 	}
517 
518 	len = strlen(retval.lr_link);
519 	if (len <= 1 || len >= MAXLINKNAMELEN)
520 		return (EINVAL);
521 
522 	if (link != NULL)
523 		(void) strlcpy(link, retval.lr_link, MAXLINKNAMELEN);
524 	if (classp != NULL)
525 		*classp = retval.lr_class;
526 	if (mediap != NULL)
527 		*mediap = retval.lr_media;
528 	if (flagsp != NULL)
529 		*flagsp = retval.lr_flags;
530 	return (0);
531 }
532 
533 /*
534  * Request the datalink management daemon to get the linkid for a link.
535  * Returns a non-zero error code on failure.  The linkid argument is only
536  * set on success (when zero is returned.)
537  */
538 int
539 dls_mgmt_get_linkid(const char *link, datalink_id_t *linkid)
540 {
541 	dlmgmt_door_getlinkid_t		getlinkid;
542 	dlmgmt_getlinkid_retval_t	retval;
543 	int				err;
544 
545 	getlinkid.ld_cmd = DLMGMT_CMD_GETLINKID;
546 	(void) strlcpy(getlinkid.ld_link, link, MAXLINKNAMELEN);
547 
548 	if ((err = i_dls_mgmt_upcall(&getlinkid, sizeof (getlinkid), &retval,
549 	    sizeof (retval))) == 0) {
550 		*linkid = retval.lr_linkid;
551 	}
552 	return (err);
553 }
554 
555 datalink_id_t
556 dls_mgmt_get_next(datalink_id_t linkid, datalink_class_t class,
557     datalink_media_t dmedia, uint32_t flags)
558 {
559 	dlmgmt_door_getnext_t	getnext;
560 	dlmgmt_getnext_retval_t	retval;
561 
562 	getnext.ld_cmd = DLMGMT_CMD_GETNEXT;
563 	getnext.ld_class = class;
564 	getnext.ld_dmedia = dmedia;
565 	getnext.ld_flags = flags;
566 	getnext.ld_linkid = linkid;
567 
568 	if (i_dls_mgmt_upcall(&getnext, sizeof (getnext), &retval,
569 	    sizeof (retval)) != 0) {
570 		return (DATALINK_INVALID_LINKID);
571 	}
572 
573 	return (retval.lr_linkid);
574 }
575 
576 static int
577 i_dls_mgmt_get_linkattr(const datalink_id_t linkid, const char *attr,
578     void *attrval, size_t *attrszp)
579 {
580 	dlmgmt_upcall_arg_getattr_t	getattr;
581 	dlmgmt_getattr_retval_t		retval;
582 	int				err;
583 
584 	getattr.ld_cmd = DLMGMT_CMD_DLS_GETATTR;
585 	getattr.ld_linkid = linkid;
586 	(void) strlcpy(getattr.ld_attr, attr, MAXLINKATTRLEN);
587 
588 	if ((err = i_dls_mgmt_upcall(&getattr, sizeof (getattr), &retval,
589 	    sizeof (retval))) == 0) {
590 		if (*attrszp < retval.lr_attrsz)
591 			return (EINVAL);
592 		*attrszp = retval.lr_attrsz;
593 		bcopy(retval.lr_attrval, attrval, retval.lr_attrsz);
594 	}
595 
596 	return (err);
597 }
598 
599 /*
600  * Note that this function can only get devp successfully for non-VLAN link.
601  */
602 int
603 dls_mgmt_get_phydev(datalink_id_t linkid, dev_t *devp)
604 {
605 	uint64_t	maj, inst;
606 	size_t		attrsz = sizeof (uint64_t);
607 
608 	if (i_dls_mgmt_get_linkattr(linkid, FPHYMAJ, &maj, &attrsz) != 0 ||
609 	    attrsz != sizeof (uint64_t) ||
610 	    i_dls_mgmt_get_linkattr(linkid, FPHYINST, &inst, &attrsz) != 0 ||
611 	    attrsz != sizeof (uint64_t)) {
612 		return (EINVAL);
613 	}
614 
615 	*devp = makedevice((major_t)maj, (minor_t)inst);
616 	return (0);
617 }
618 
619 /*
620  * Request the datalink management daemon to push in
621  * all properties associated with the link.
622  * Returns a non-zero error code on failure.
623  */
624 int
625 dls_mgmt_linkprop_init(datalink_id_t linkid)
626 {
627 	dlmgmt_door_linkprop_init_t	li;
628 	dlmgmt_linkprop_init_retval_t	retval;
629 	int				err;
630 
631 	li.ld_cmd = DLMGMT_CMD_LINKPROP_INIT;
632 	li.ld_linkid = linkid;
633 
634 	err = i_dls_mgmt_upcall(&li, sizeof (li), &retval, sizeof (retval));
635 	return (err);
636 }
637 
638 static void
639 dls_devnet_prop_task(void *arg)
640 {
641 	dls_devnet_t		*ddp = arg;
642 
643 	(void) dls_mgmt_linkprop_init(ddp->dd_linkid);
644 
645 	mutex_enter(&ddp->dd_mutex);
646 	ddp->dd_prop_loaded = B_TRUE;
647 	ddp->dd_prop_taskid = 0;
648 	cv_broadcast(&ddp->dd_cv);
649 	mutex_exit(&ddp->dd_mutex);
650 }
651 
652 /*
653  * Ensure property loading task is completed.
654  */
655 void
656 dls_devnet_prop_task_wait(dls_dl_handle_t ddp)
657 {
658 	mutex_enter(&ddp->dd_mutex);
659 	while (ddp->dd_prop_taskid != 0)
660 		cv_wait(&ddp->dd_cv, &ddp->dd_mutex);
661 	mutex_exit(&ddp->dd_mutex);
662 }
663 
664 void
665 dls_devnet_rele_tmp(dls_dl_handle_t dlh)
666 {
667 	dls_devnet_t		*ddp = dlh;
668 
669 	mutex_enter(&ddp->dd_mutex);
670 	ASSERT(ddp->dd_tref != 0);
671 	if (--ddp->dd_tref == 0)
672 		cv_signal(&ddp->dd_cv);
673 	mutex_exit(&ddp->dd_mutex);
674 }
675 
676 int
677 dls_devnet_hold_link(datalink_id_t linkid, dls_dl_handle_t *ddhp,
678     dls_link_t **dlpp)
679 {
680 	dls_dl_handle_t	dlh;
681 	dls_link_t	*dlp;
682 	int		err;
683 
684 	if ((err = dls_devnet_hold_tmp(linkid, &dlh)) != 0)
685 		return (err);
686 
687 	if ((err = dls_link_hold(dls_devnet_mac(dlh), &dlp)) != 0) {
688 		dls_devnet_rele_tmp(dlh);
689 		return (err);
690 	}
691 
692 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
693 
694 	*ddhp = dlh;
695 	*dlpp = dlp;
696 	return (0);
697 }
698 
699 void
700 dls_devnet_rele_link(dls_dl_handle_t dlh, dls_link_t *dlp)
701 {
702 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
703 
704 	dls_link_rele(dlp);
705 	dls_devnet_rele_tmp(dlh);
706 }
707 
708 /*
709  * "link" kstats related functions.
710  */
711 
712 /*
713  * Query the "link" kstats.
714  *
715  * We may be called from the kstat subsystem in an arbitrary context.
716  * If the caller is the stack, the context could be an upcall data
717  * thread. Hence we can't acquire the mac perimeter in this function
718  * for fear of deadlock.
719  */
720 static int
721 dls_devnet_stat_update(kstat_t *ksp, int rw)
722 {
723 	datalink_id_t	linkid = (datalink_id_t)(uintptr_t)ksp->ks_private;
724 	dls_devnet_t	*ddp;
725 	dls_link_t	*dlp;
726 	int		err;
727 
728 	if ((err = dls_devnet_hold_tmp(linkid, &ddp)) != 0) {
729 		return (err);
730 	}
731 
732 	/*
733 	 * If a device detach happens at this time, it will block in
734 	 * dls_devnet_unset since the dd_tref has been bumped in
735 	 * dls_devnet_hold_tmp(). So the access to 'dlp' is safe even though
736 	 * we don't hold the mac perimeter.
737 	 */
738 	if (mod_hash_find(i_dls_link_hash, (mod_hash_key_t)ddp->dd_mac,
739 	    (mod_hash_val_t *)&dlp) != 0) {
740 		dls_devnet_rele_tmp(ddp);
741 		return (ENOENT);
742 	}
743 
744 	err = dls_stat_update(ksp, dlp, rw);
745 
746 	dls_devnet_rele_tmp(ddp);
747 	return (err);
748 }
749 
750 /*
751  * Create the "link" kstats.
752  */
753 static void
754 dls_devnet_stat_create(dls_devnet_t *ddp, zoneid_t zoneid)
755 {
756 	kstat_t	*ksp;
757 
758 	if (dls_stat_create("link", 0, ddp->dd_linkname, zoneid,
759 	    dls_devnet_stat_update, (void *)(uintptr_t)ddp->dd_linkid,
760 	    &ksp) == 0) {
761 		ASSERT(ksp != NULL);
762 		if (zoneid == ddp->dd_owner_zid) {
763 			ASSERT(ddp->dd_ksp == NULL);
764 			ddp->dd_ksp = ksp;
765 		} else {
766 			ASSERT(ddp->dd_zone_ksp == NULL);
767 			ddp->dd_zone_ksp = ksp;
768 		}
769 	}
770 }
771 
772 /*
773  * Destroy the "link" kstats.
774  */
775 static void
776 dls_devnet_stat_destroy(dls_devnet_t *ddp, zoneid_t zoneid)
777 {
778 	if (zoneid == ddp->dd_owner_zid) {
779 		if (ddp->dd_ksp != NULL) {
780 			kstat_delete(ddp->dd_ksp);
781 			ddp->dd_ksp = NULL;
782 		}
783 	} else {
784 		if (ddp->dd_zone_ksp != NULL) {
785 			kstat_delete(ddp->dd_zone_ksp);
786 			ddp->dd_zone_ksp = NULL;
787 		}
788 	}
789 }
790 
791 /*
792  * The link has been renamed. Destroy the old non-legacy kstats ("link kstats")
793  * and create the new set using the new name.
794  */
795 static void
796 dls_devnet_stat_rename(dls_devnet_t *ddp)
797 {
798 	if (ddp->dd_ksp != NULL) {
799 		kstat_delete(ddp->dd_ksp);
800 		ddp->dd_ksp = NULL;
801 	}
802 	/* We can't rename a link while it's assigned to a non-global zone. */
803 	ASSERT(ddp->dd_zone_ksp == NULL);
804 	dls_devnet_stat_create(ddp, ddp->dd_owner_zid);
805 }
806 
807 /*
808  * Associate the linkid with the link identified by macname. If this
809  * is called on behalf of a physical link then linkid may be
810  * DATALINK_INVALID_LINKID. Otherwise, if called on behalf of a
811  * virtual link, linkid must have a value.
812  */
813 static int
814 dls_devnet_set(mac_handle_t mh, datalink_id_t linkid, zoneid_t zoneid,
815     dls_devnet_t **ddpp)
816 {
817 	const char		*macname = mac_name(mh);
818 	dls_devnet_t		*ddp = NULL;
819 	datalink_class_t	class;
820 	int			err;
821 	boolean_t		stat_create = B_FALSE;
822 	char			linkname[MAXLINKNAMELEN];
823 
824 	rw_enter(&i_dls_devnet_lock, RW_WRITER);
825 
826 	/*
827 	 * Don't allow callers to set a link name with a linkid that already
828 	 * has a name association (that's what rename is for).
829 	 */
830 	if (linkid != DATALINK_INVALID_LINKID) {
831 		if (mod_hash_find(i_dls_devnet_id_hash,
832 		    (mod_hash_key_t)(uintptr_t)linkid,
833 		    (mod_hash_val_t *)&ddp) == 0) {
834 			err = EEXIST;
835 			goto done;
836 		}
837 		if ((err = dls_mgmt_get_linkinfo(linkid, linkname, &class,
838 		    NULL, NULL)) != 0)
839 			goto done;
840 	}
841 
842 	if ((err = mod_hash_find(i_dls_devnet_hash,
843 	    (mod_hash_key_t)macname, (mod_hash_val_t *)&ddp)) == 0) {
844 		if (ddp->dd_linkid != DATALINK_INVALID_LINKID) {
845 			err = EEXIST;
846 			goto done;
847 		}
848 
849 		/*
850 		 * If we arrive here we know we are attempting to set
851 		 * the linkid on a physical link. A virtual link
852 		 * should never arrive here because it should never
853 		 * call this function without a linkid. Virtual links
854 		 * are created through dlgmtmd and thus we know
855 		 * dlmgmtd is alive to assign it a linkid (search for
856 		 * uses of dladm_create_datalink_id() to prove this to
857 		 * yourself); we don't have the same guarantee for a
858 		 * physical link which may perform an upcall for a
859 		 * linkid while dlmgmtd is down but will continue
860 		 * creating a devnet without the linkid (see
861 		 * softmac_create_datalink() to see how physical link
862 		 * creation works). That is why there is no entry in
863 		 * the id hash but there is one in the macname hash --
864 		 * softmac couldn't acquire a linkid the first time it
865 		 * called this function.
866 		 *
867 		 * Because of the check above, we also know that
868 		 * ddp->dd_linkid is not set. Following this, the link
869 		 * must still be in the DD_INITIALIZING state because
870 		 * that flag is removed IFF dd_linkid is set. This is
871 		 * why we can ASSERT the DD_INITIALIZING flag below if
872 		 * the call to i_dls_devnet_setzid() fails.
873 		 */
874 		if (linkid == DATALINK_INVALID_LINKID ||
875 		    class != DATALINK_CLASS_PHYS) {
876 			err = EINVAL;
877 			goto done;
878 		}
879 
880 		ASSERT(ddp->dd_flags & DD_INITIALIZING);
881 
882 	} else {
883 		ddp = kmem_cache_alloc(i_dls_devnet_cachep, KM_SLEEP);
884 		ddp->dd_flags = DD_INITIALIZING;
885 		ddp->dd_tref = 0;
886 		ddp->dd_ref++;
887 		ddp->dd_owner_zid = zoneid;
888 		/*
889 		 * If we are creating a new devnet which will be owned by a NGZ
890 		 * then mark it as transient. This link has never been in the
891 		 * GZ, the GZ will not have a hold on its reference, and we do
892 		 * not want to return it to the GZ when the zone halts.
893 		 */
894 		if (zoneid != GLOBAL_ZONEID)
895 			ddp->dd_transient = B_TRUE;
896 		(void) strlcpy(ddp->dd_mac, macname, sizeof (ddp->dd_mac));
897 		VERIFY(mod_hash_insert(i_dls_devnet_hash,
898 		    (mod_hash_key_t)ddp->dd_mac, (mod_hash_val_t)ddp) == 0);
899 	}
900 
901 	if (linkid != DATALINK_INVALID_LINKID) {
902 		ddp->dd_linkid = linkid;
903 		(void) strlcpy(ddp->dd_linkname, linkname,
904 		    sizeof (ddp->dd_linkname));
905 		VERIFY(mod_hash_insert(i_dls_devnet_id_hash,
906 		    (mod_hash_key_t)(uintptr_t)linkid,
907 		    (mod_hash_val_t)ddp) == 0);
908 		devnet_need_rebuild = B_TRUE;
909 		stat_create = B_TRUE;
910 	}
911 	err = 0;
912 done:
913 	/*
914 	 * It is safe to drop the i_dls_devnet_lock at this point. In the case
915 	 * of physical devices, the softmac framework will fail the device
916 	 * detach based on the smac_state or smac_hold_cnt. Other cases like
917 	 * vnic and aggr use their own scheme to serialize creates and deletes
918 	 * and ensure that *ddp is valid.
919 	 */
920 	rw_exit(&i_dls_devnet_lock);
921 
922 	if (err == 0 && zoneid != GLOBAL_ZONEID) {
923 		/*
924 		 * If this link is being created directly within a non-global
925 		 * zone, then flag it as transient so that it will be cleaned
926 		 * up when the zone is shut down.
927 		 */
928 		err = i_dls_devnet_setzid(ddp, zoneid, B_FALSE, B_TRUE);
929 		if (err != 0) {
930 			/*
931 			 * At this point the link is marked as
932 			 * DD_INITIALIZING -- there can be no
933 			 * outstanding temp refs and therefore no need
934 			 * to wait for them.
935 			 */
936 			ASSERT(ddp->dd_flags & DD_INITIALIZING);
937 			(void) dls_devnet_unset(mh, &linkid, B_FALSE);
938 			return (err);
939 		}
940 	}
941 
942 	if (err == 0) {
943 		/*
944 		 * The kstat subsystem holds its own locks (rather perimeter)
945 		 * before calling the ks_update (dls_devnet_stat_update) entry
946 		 * point which in turn grabs the i_dls_devnet_lock. So the
947 		 * lock hierarchy is kstat locks -> i_dls_devnet_lock.
948 		 */
949 		if (stat_create)
950 			dls_devnet_stat_create(ddp, zoneid);
951 		if (ddpp != NULL)
952 			*ddpp = ddp;
953 
954 		mutex_enter(&ddp->dd_mutex);
955 		if (linkid != DATALINK_INVALID_LINKID &&
956 		    !ddp->dd_prop_loaded && ddp->dd_prop_taskid == 0) {
957 			ddp->dd_prop_taskid = taskq_dispatch(system_taskq,
958 			    dls_devnet_prop_task, ddp, TQ_SLEEP);
959 		}
960 		mutex_exit(&ddp->dd_mutex);
961 
962 	}
963 	return (err);
964 }
965 
966 /*
967  * Disassociate the linkid from the link identified by macname. If
968  * wait is B_TRUE, wait until all temporary refs are released and the
969  * prop task is finished.
970  *
971  * If waiting then you SHOULD NOT call this from inside the MAC perim
972  * as deadlock will ensue. Otherwise, this function is safe to call
973  * from inside or outside the MAC perim.
974  */
975 static int
976 dls_devnet_unset(mac_handle_t mh, datalink_id_t *id, boolean_t wait)
977 {
978 	const char	*macname = mac_name(mh);
979 	dls_devnet_t	*ddp;
980 	int		err;
981 	mod_hash_val_t	val;
982 
983 	rw_enter(&i_dls_devnet_lock, RW_WRITER);
984 	if ((err = mod_hash_find(i_dls_devnet_hash,
985 	    (mod_hash_key_t)macname, (mod_hash_val_t *)&ddp)) != 0) {
986 		ASSERT(err == MH_ERR_NOTFOUND);
987 		rw_exit(&i_dls_devnet_lock);
988 		return (ENOENT);
989 	}
990 
991 	mutex_enter(&ddp->dd_mutex);
992 
993 	/*
994 	 * Make sure downcalls into softmac_create or softmac_destroy from
995 	 * devfs don't cv_wait on any devfs related condition for fear of
996 	 * deadlock. Return EBUSY if the asynchronous thread started for
997 	 * property loading as part of the post attach hasn't yet completed.
998 	 */
999 	VERIFY(ddp->dd_ref != 0);
1000 	if ((ddp->dd_ref != 1) || (!wait &&
1001 	    (ddp->dd_tref != 0 || ddp->dd_prop_taskid != 0))) {
1002 		int zstatus = 0;
1003 
1004 		/*
1005 		 * There are a couple of alternatives that might be going on
1006 		 * here; a) the zone is shutting down and it has a transient
1007 		 * link assigned, in which case we want to clean it up instead
1008 		 * of moving it back to the global zone, or b) its possible
1009 		 * that we're trying to clean up an orphaned vnic that was
1010 		 * delegated to a zone and which wasn't cleaned up properly
1011 		 * when the zone went away.  Check for either of these cases
1012 		 * before we simply return EBUSY.
1013 		 *
1014 		 * zstatus indicates which situation we are dealing with:
1015 		 *	 0 - means return EBUSY
1016 		 *	 1 - means case (a), cleanup transient link
1017 		 *	-1 - means case (b), orphaned VNIC
1018 		 */
1019 		if (ddp->dd_ref > 1 && ddp->dd_zid != GLOBAL_ZONEID) {
1020 			zone_t	*zp;
1021 
1022 			if ((zp = zone_find_by_id(ddp->dd_zid)) == NULL) {
1023 				zstatus = -1;
1024 			} else {
1025 				if (ddp->dd_transient) {
1026 					zone_status_t s = zone_status_get(zp);
1027 
1028 					if (s >= ZONE_IS_SHUTTING_DOWN)
1029 						zstatus = 1;
1030 				}
1031 				zone_rele(zp);
1032 			}
1033 		}
1034 
1035 		if (zstatus == 0) {
1036 			mutex_exit(&ddp->dd_mutex);
1037 			rw_exit(&i_dls_devnet_lock);
1038 			return (EBUSY);
1039 		}
1040 
1041 		/*
1042 		 * We want to delete the link, reset ref to 1;
1043 		 */
1044 		if (zstatus == -1) {
1045 			/* Log a warning, but continue in this case */
1046 			cmn_err(CE_WARN, "clear orphaned datalink: %s\n",
1047 			    ddp->dd_linkname);
1048 		}
1049 		ddp->dd_ref = 1;
1050 	}
1051 
1052 	ddp->dd_flags |= DD_CONDEMNED;
1053 	ddp->dd_ref--;
1054 	*id = ddp->dd_linkid;
1055 
1056 	/*
1057 	 * Remove this dls_devnet_t from the hash table.
1058 	 */
1059 	VERIFY(mod_hash_remove(i_dls_devnet_hash,
1060 	    (mod_hash_key_t)ddp->dd_mac, &val) == 0);
1061 
1062 	if (ddp->dd_linkid != DATALINK_INVALID_LINKID) {
1063 		VERIFY(mod_hash_remove(i_dls_devnet_id_hash,
1064 		    (mod_hash_key_t)(uintptr_t)ddp->dd_linkid, &val) == 0);
1065 
1066 		devnet_need_rebuild = B_TRUE;
1067 	}
1068 	rw_exit(&i_dls_devnet_lock);
1069 
1070 	/*
1071 	 * It is important to call i_dls_devnet_setzid() WITHOUT the
1072 	 * i_dls_devnet_lock held. The setzid call grabs the MAC
1073 	 * perim; thus causing DLS -> MAC lock ordering if performed
1074 	 * with the i_dls_devnet_lock held. This forces consumers to
1075 	 * grab the MAC perim before calling dls_devnet_unset() (the
1076 	 * locking rules state MAC -> DLS order). By performing the
1077 	 * setzid outside of the i_dls_devnet_lock consumers can
1078 	 * safely call dls_devnet_unset() outside the MAC perim.
1079 	 */
1080 	if (ddp->dd_zid != GLOBAL_ZONEID) {
1081 		/*
1082 		 * We need to release the dd_mutex before we try and destroy the
1083 		 * stat. When we destroy it, we'll need to grab the lock for the
1084 		 * kstat but if there's a concurrent reader of the kstat, we'll
1085 		 * be blocked on it. This will lead to deadlock because these
1086 		 * kstats employ a ks_update function (dls_devnet_stat_update)
1087 		 * which needs the dd_mutex that we currently hold.
1088 		 *
1089 		 * Because we've already flagged the dls_devnet_t as
1090 		 * DD_CONDEMNED and we still have a write lock on
1091 		 * i_dls_devnet_lock, we should be able to release the dd_mutex.
1092 		 */
1093 		mutex_exit(&ddp->dd_mutex);
1094 		dls_devnet_stat_destroy(ddp, ddp->dd_zid);
1095 		mutex_enter(&ddp->dd_mutex);
1096 		(void) i_dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE,
1097 		    B_FALSE);
1098 	}
1099 
1100 	if (wait) {
1101 		/*
1102 		 * Wait until all temporary references are released.
1103 		 * The holders of the tref need the MAC perim to
1104 		 * perform their work and release the tref. To avoid
1105 		 * deadlock, assert that the perim is never held here.
1106 		 */
1107 		ASSERT0(MAC_PERIM_HELD(mh));
1108 		while ((ddp->dd_tref != 0) || (ddp->dd_prop_taskid != 0))
1109 			cv_wait(&ddp->dd_cv, &ddp->dd_mutex);
1110 	} else {
1111 		VERIFY(ddp->dd_tref == 0);
1112 		VERIFY(ddp->dd_prop_taskid == 0);
1113 	}
1114 
1115 	if (ddp->dd_linkid != DATALINK_INVALID_LINKID)
1116 		dls_devnet_stat_destroy(ddp, ddp->dd_owner_zid);
1117 
1118 	ddp->dd_prop_loaded = B_FALSE;
1119 	ddp->dd_linkid = DATALINK_INVALID_LINKID;
1120 	ddp->dd_flags = 0;
1121 	mutex_exit(&ddp->dd_mutex);
1122 	kmem_cache_free(i_dls_devnet_cachep, ddp);
1123 
1124 	return (0);
1125 }
1126 
1127 /*
1128  * This is a private hold routine used when we already have the dls_link_t, thus
1129  * we know that it cannot go away.
1130  */
1131 int
1132 dls_devnet_hold_tmp_by_link(dls_link_t *dlp, dls_dl_handle_t *ddhp)
1133 {
1134 	int err;
1135 	dls_devnet_t *ddp = NULL;
1136 
1137 	rw_enter(&i_dls_devnet_lock, RW_WRITER);
1138 	if ((err = mod_hash_find(i_dls_devnet_hash,
1139 	    (mod_hash_key_t)dlp->dl_name, (mod_hash_val_t *)&ddp)) != 0) {
1140 		ASSERT(err == MH_ERR_NOTFOUND);
1141 		rw_exit(&i_dls_devnet_lock);
1142 		return (ENOENT);
1143 	}
1144 
1145 	mutex_enter(&ddp->dd_mutex);
1146 	VERIFY(ddp->dd_ref > 0);
1147 	if (DD_NOT_VISIBLE(ddp->dd_flags)) {
1148 		mutex_exit(&ddp->dd_mutex);
1149 		rw_exit(&i_dls_devnet_lock);
1150 		return (ENOENT);
1151 	}
1152 	ddp->dd_tref++;
1153 	mutex_exit(&ddp->dd_mutex);
1154 	rw_exit(&i_dls_devnet_lock);
1155 
1156 	*ddhp = ddp;
1157 	return (0);
1158 }
1159 
1160 static int
1161 dls_devnet_hold_common(datalink_id_t linkid, dls_devnet_t **ddpp,
1162     boolean_t tmp_hold)
1163 {
1164 	dls_devnet_t		*ddp;
1165 	int			err;
1166 
1167 	rw_enter(&i_dls_devnet_lock, RW_READER);
1168 	if ((err = mod_hash_find(i_dls_devnet_id_hash,
1169 	    (mod_hash_key_t)(uintptr_t)linkid, (mod_hash_val_t *)&ddp)) != 0) {
1170 		ASSERT(err == MH_ERR_NOTFOUND);
1171 		rw_exit(&i_dls_devnet_lock);
1172 		return (ENOENT);
1173 	}
1174 
1175 	mutex_enter(&ddp->dd_mutex);
1176 	VERIFY(ddp->dd_ref > 0);
1177 	if (DD_NOT_VISIBLE(ddp->dd_flags)) {
1178 		mutex_exit(&ddp->dd_mutex);
1179 		rw_exit(&i_dls_devnet_lock);
1180 		return (ENOENT);
1181 	}
1182 	if (tmp_hold)
1183 		ddp->dd_tref++;
1184 	else
1185 		ddp->dd_ref++;
1186 	mutex_exit(&ddp->dd_mutex);
1187 	rw_exit(&i_dls_devnet_lock);
1188 
1189 	*ddpp = ddp;
1190 	return (0);
1191 }
1192 
1193 int
1194 dls_devnet_hold(datalink_id_t linkid, dls_devnet_t **ddpp)
1195 {
1196 	return (dls_devnet_hold_common(linkid, ddpp, B_FALSE));
1197 }
1198 
1199 /*
1200  * Hold the vanity naming structure (dls_devnet_t) temporarily.  The request to
1201  * delete the dls_devnet_t will wait until the temporary reference is released.
1202  */
1203 int
1204 dls_devnet_hold_tmp(datalink_id_t linkid, dls_devnet_t **ddpp)
1205 {
1206 	return (dls_devnet_hold_common(linkid, ddpp, B_TRUE));
1207 }
1208 
1209 /*
1210  * This funtion is called when a DLS client tries to open a device node.
1211  * This dev_t could be a result of a /dev/net node access (returned by
1212  * devnet_create_rvp->dls_devnet_open()) or a direct /dev node access.
1213  * In both cases, this function bumps up the reference count of the
1214  * dls_devnet_t structure. The reference is held as long as the device node
1215  * is open. In the case of /dev/net while it is true that the initial reference
1216  * is held when the devnet_create_rvp->dls_devnet_open call happens, this
1217  * initial reference is released immediately in devnet_inactive_callback ->
1218  * dls_devnet_close(). (Note that devnet_inactive_callback() is called right
1219  * after dld_open completes, not when the /dev/net node is being closed).
1220  * To undo this function, call dls_devnet_rele()
1221  */
1222 int
1223 dls_devnet_hold_by_dev(dev_t dev, dls_dl_handle_t *ddhp)
1224 {
1225 	char			name[MAXNAMELEN];
1226 	char			*drv;
1227 	dls_devnet_t		*ddp;
1228 	int			err;
1229 
1230 	if ((drv = ddi_major_to_name(getmajor(dev))) == NULL)
1231 		return (EINVAL);
1232 
1233 	(void) snprintf(name, sizeof (name), "%s%d", drv,
1234 	    DLS_MINOR2INST(getminor(dev)));
1235 
1236 	rw_enter(&i_dls_devnet_lock, RW_READER);
1237 	if ((err = mod_hash_find(i_dls_devnet_hash,
1238 	    (mod_hash_key_t)name, (mod_hash_val_t *)&ddp)) != 0) {
1239 		ASSERT(err == MH_ERR_NOTFOUND);
1240 		rw_exit(&i_dls_devnet_lock);
1241 		return (ENOENT);
1242 	}
1243 	mutex_enter(&ddp->dd_mutex);
1244 	VERIFY(ddp->dd_ref > 0);
1245 	if (DD_NOT_VISIBLE(ddp->dd_flags)) {
1246 		mutex_exit(&ddp->dd_mutex);
1247 		rw_exit(&i_dls_devnet_lock);
1248 		return (ENOENT);
1249 	}
1250 	ddp->dd_ref++;
1251 	mutex_exit(&ddp->dd_mutex);
1252 	rw_exit(&i_dls_devnet_lock);
1253 
1254 	*ddhp = ddp;
1255 	return (0);
1256 }
1257 
1258 void
1259 dls_devnet_rele(dls_devnet_t *ddp)
1260 {
1261 	mutex_enter(&ddp->dd_mutex);
1262 	VERIFY(ddp->dd_ref > 1);
1263 	ddp->dd_ref--;
1264 	if ((ddp->dd_flags & DD_IMPLICIT_IPTUN) && ddp->dd_ref == 1) {
1265 		mutex_exit(&ddp->dd_mutex);
1266 		if (i_dls_devnet_destroy_iptun(ddp->dd_linkid) != 0)
1267 			ddp->dd_flags |= DD_IMPLICIT_IPTUN;
1268 		return;
1269 	}
1270 	mutex_exit(&ddp->dd_mutex);
1271 }
1272 
1273 static int
1274 dls_devnet_hold_by_name(const char *link, dls_devnet_t **ddpp)
1275 {
1276 	char			drv[MAXLINKNAMELEN];
1277 	uint_t			ppa;
1278 	major_t			major;
1279 	dev_t			phy_dev, tmp_dev;
1280 	datalink_id_t		linkid;
1281 	dls_dev_handle_t	ddh;
1282 	int			err;
1283 
1284 	if ((err = dls_mgmt_get_linkid(link, &linkid)) == 0)
1285 		return (dls_devnet_hold(linkid, ddpp));
1286 
1287 	/*
1288 	 * If we failed to get the link's linkid because the dlmgmtd daemon
1289 	 * has not been started, return ENOENT so that the application can
1290 	 * fallback to open the /dev node.
1291 	 */
1292 	if (err == EBADF)
1293 		return (ENOENT);
1294 
1295 	if (err != ENOENT)
1296 		return (err);
1297 
1298 	/*
1299 	 * If we reach this point it means dlmgmtd is up but has no
1300 	 * mapping for the link name.
1301 	 */
1302 	if (ddi_parse_dlen(link, drv, MAXLINKNAMELEN, &ppa) != DDI_SUCCESS)
1303 		return (ENOENT);
1304 
1305 	if (IS_IPTUN_LINK(drv)) {
1306 		if ((err = i_dls_devnet_create_iptun(link, drv, &linkid)) != 0)
1307 			return (err);
1308 		/*
1309 		 * At this point, an IP tunnel MAC has registered, which
1310 		 * resulted in a link being created.
1311 		 */
1312 		err = dls_devnet_hold(linkid, ddpp);
1313 		if (err != 0) {
1314 			VERIFY(i_dls_devnet_destroy_iptun(linkid) == 0);
1315 			return (err);
1316 		}
1317 		/*
1318 		 * dls_devnet_rele() will know to destroy the implicit IP
1319 		 * tunnel on last reference release if DD_IMPLICIT_IPTUN is
1320 		 * set.
1321 		 */
1322 		(*ddpp)->dd_flags |= DD_IMPLICIT_IPTUN;
1323 		return (0);
1324 	}
1325 
1326 	/*
1327 	 * If this link:
1328 	 * (a) is a physical device, (b) this is the first boot, (c) the MAC
1329 	 * is not registered yet, and (d) we cannot find its linkid, then the
1330 	 * linkname is the same as the devname.
1331 	 *
1332 	 * First filter out invalid names.
1333 	 */
1334 	if ((major = ddi_name_to_major(drv)) == (major_t)-1)
1335 		return (ENOENT);
1336 
1337 	phy_dev = makedevice(major, DLS_PPA2MINOR(ppa));
1338 	if (softmac_hold_device(phy_dev, &ddh) != 0)
1339 		return (ENOENT);
1340 
1341 	/*
1342 	 * At this time, the MAC should be registered, check its phy_dev using
1343 	 * the given name.
1344 	 */
1345 	if ((err = dls_mgmt_get_linkid(link, &linkid)) != 0 ||
1346 	    (err = dls_mgmt_get_phydev(linkid, &tmp_dev)) != 0) {
1347 		softmac_rele_device(ddh);
1348 		return (err);
1349 	}
1350 	if (tmp_dev != phy_dev) {
1351 		softmac_rele_device(ddh);
1352 		return (ENOENT);
1353 	}
1354 
1355 	err = dls_devnet_hold(linkid, ddpp);
1356 	softmac_rele_device(ddh);
1357 	return (err);
1358 }
1359 
1360 int
1361 dls_devnet_macname2linkid(const char *macname, datalink_id_t *linkidp)
1362 {
1363 	dls_devnet_t	*ddp;
1364 
1365 	rw_enter(&i_dls_devnet_lock, RW_READER);
1366 	if (mod_hash_find(i_dls_devnet_hash, (mod_hash_key_t)macname,
1367 	    (mod_hash_val_t *)&ddp) != 0) {
1368 		rw_exit(&i_dls_devnet_lock);
1369 		return (ENOENT);
1370 	}
1371 
1372 	*linkidp = ddp->dd_linkid;
1373 	rw_exit(&i_dls_devnet_lock);
1374 	return (0);
1375 }
1376 
1377 /*
1378  * Get linkid for the given dev.
1379  */
1380 int
1381 dls_devnet_dev2linkid(dev_t dev, datalink_id_t *linkidp)
1382 {
1383 	char	macname[MAXNAMELEN];
1384 	char	*drv;
1385 
1386 	if ((drv = ddi_major_to_name(getmajor(dev))) == NULL)
1387 		return (EINVAL);
1388 
1389 	(void) snprintf(macname, sizeof (macname), "%s%d", drv,
1390 	    DLS_MINOR2INST(getminor(dev)));
1391 	return (dls_devnet_macname2linkid(macname, linkidp));
1392 }
1393 
1394 /*
1395  * Get the link's physical dev_t. It this is a VLAN, get the dev_t of the
1396  * link this VLAN is created on.
1397  */
1398 int
1399 dls_devnet_phydev(datalink_id_t vlanid, dev_t *devp)
1400 {
1401 	dls_devnet_t	*ddp;
1402 	int		err;
1403 
1404 	if ((err = dls_devnet_hold_tmp(vlanid, &ddp)) != 0)
1405 		return (err);
1406 
1407 	err = dls_mgmt_get_phydev(ddp->dd_linkid, devp);
1408 	dls_devnet_rele_tmp(ddp);
1409 	return (err);
1410 }
1411 
1412 /*
1413  * Handle the renaming requests.  There are two rename cases:
1414  *
1415  * 1. Request to rename a valid link (id1) to an non-existent link name
1416  *    (id2). In this case id2 is DATALINK_INVALID_LINKID.  Just check whether
1417  *    id1 is held by any applications.
1418  *
1419  *    In this case, the link's kstats need to be updated using the given name.
1420  *
1421  * 2. Request to rename a valid link (id1) to the name of a REMOVED
1422  *    physical link (id2). In this case, check that id1 and its associated
1423  *    mac is not held by any application, and update the link's linkid to id2.
1424  *
1425  *    This case does not change the <link name, linkid> mapping, so the link's
1426  *    kstats need to be updated with using name associated the given id2.
1427  */
1428 int
1429 dls_devnet_rename(datalink_id_t id1, datalink_id_t id2, const char *link)
1430 {
1431 	dls_dev_handle_t	ddh = NULL;
1432 	int			err = 0;
1433 	dev_t			phydev = 0;
1434 	dls_devnet_t		*ddp;
1435 	mac_perim_handle_t	mph = NULL;
1436 	mac_handle_t		mh;
1437 	mod_hash_val_t		val;
1438 
1439 	/*
1440 	 * In the second case, id2 must be a REMOVED physical link.
1441 	 */
1442 	if ((id2 != DATALINK_INVALID_LINKID) &&
1443 	    (dls_mgmt_get_phydev(id2, &phydev) == 0) &&
1444 	    softmac_hold_device(phydev, &ddh) == 0) {
1445 		softmac_rele_device(ddh);
1446 		return (EEXIST);
1447 	}
1448 
1449 	/*
1450 	 * Hold id1 to prevent it from being detached (if a physical link).
1451 	 */
1452 	if (dls_mgmt_get_phydev(id1, &phydev) == 0)
1453 		(void) softmac_hold_device(phydev, &ddh);
1454 
1455 	/*
1456 	 * The framework does not hold hold locks across calls to the
1457 	 * mac perimeter, hence enter the perimeter first. This also waits
1458 	 * for the property loading to finish.
1459 	 */
1460 	if ((err = mac_perim_enter_by_linkid(id1, &mph)) != 0) {
1461 		softmac_rele_device(ddh);
1462 		return (err);
1463 	}
1464 
1465 	rw_enter(&i_dls_devnet_lock, RW_WRITER);
1466 	if ((err = mod_hash_find(i_dls_devnet_id_hash,
1467 	    (mod_hash_key_t)(uintptr_t)id1, (mod_hash_val_t *)&ddp)) != 0) {
1468 		ASSERT(err == MH_ERR_NOTFOUND);
1469 		err = ENOENT;
1470 		goto done;
1471 	}
1472 
1473 	mutex_enter(&ddp->dd_mutex);
1474 	if (ddp->dd_ref > 1) {
1475 		mutex_exit(&ddp->dd_mutex);
1476 		err = EBUSY;
1477 		goto done;
1478 	}
1479 	mutex_exit(&ddp->dd_mutex);
1480 
1481 	if (id2 == DATALINK_INVALID_LINKID) {
1482 		(void) strlcpy(ddp->dd_linkname, link,
1483 		    sizeof (ddp->dd_linkname));
1484 
1485 		/* rename mac client name and its flow if exists */
1486 		if ((err = mac_open(ddp->dd_mac, &mh)) != 0)
1487 			goto done;
1488 		(void) mac_rename_primary(mh, link);
1489 		mac_close(mh);
1490 		goto done;
1491 	}
1492 
1493 	/*
1494 	 * The second case, check whether the MAC is used by any MAC
1495 	 * user.  This must be a physical link so ddh must not be NULL.
1496 	 */
1497 	if (ddh == NULL) {
1498 		err = EINVAL;
1499 		goto done;
1500 	}
1501 
1502 	if ((err = mac_open(ddp->dd_mac, &mh)) != 0)
1503 		goto done;
1504 
1505 	/*
1506 	 * We release the reference of the MAC which mac_open() is
1507 	 * holding. Note that this mac will not be unregistered
1508 	 * because the physical device is held.
1509 	 */
1510 	mac_close(mh);
1511 
1512 	/*
1513 	 * Check if there is any other MAC clients, if not, hold this mac
1514 	 * exclusively until we are done.
1515 	 */
1516 	if ((err = mac_mark_exclusive(mh)) != 0)
1517 		goto done;
1518 
1519 	/*
1520 	 * Update the link's linkid.
1521 	 */
1522 	if ((err = mod_hash_find(i_dls_devnet_id_hash,
1523 	    (mod_hash_key_t)(uintptr_t)id2, &val)) != MH_ERR_NOTFOUND) {
1524 		mac_unmark_exclusive(mh);
1525 		err = EEXIST;
1526 		goto done;
1527 	}
1528 
1529 	err = dls_mgmt_get_linkinfo(id2, ddp->dd_linkname, NULL, NULL, NULL);
1530 	if (err != 0) {
1531 		mac_unmark_exclusive(mh);
1532 		goto done;
1533 	}
1534 
1535 	(void) mod_hash_remove(i_dls_devnet_id_hash,
1536 	    (mod_hash_key_t)(uintptr_t)id1, &val);
1537 
1538 	ddp->dd_linkid = id2;
1539 	(void) mod_hash_insert(i_dls_devnet_id_hash,
1540 	    (mod_hash_key_t)(uintptr_t)ddp->dd_linkid, (mod_hash_val_t)ddp);
1541 
1542 	mac_unmark_exclusive(mh);
1543 
1544 	/* load properties for new id */
1545 	mutex_enter(&ddp->dd_mutex);
1546 	ddp->dd_prop_loaded = B_FALSE;
1547 	ddp->dd_prop_taskid = taskq_dispatch(system_taskq,
1548 	    dls_devnet_prop_task, ddp, TQ_SLEEP);
1549 	mutex_exit(&ddp->dd_mutex);
1550 
1551 done:
1552 	rw_exit(&i_dls_devnet_lock);
1553 
1554 	if (err == 0)
1555 		dls_devnet_stat_rename(ddp);
1556 
1557 	if (mph != NULL)
1558 		mac_perim_exit(mph);
1559 	softmac_rele_device(ddh);
1560 	return (err);
1561 }
1562 
1563 static int
1564 i_dls_devnet_setzid(dls_devnet_t *ddp, zoneid_t new_zoneid, boolean_t setprop,
1565     boolean_t transient)
1566 {
1567 	int			err;
1568 	mac_perim_handle_t	mph;
1569 	boolean_t		upcall_done = B_FALSE;
1570 	datalink_id_t		linkid = ddp->dd_linkid;
1571 	zoneid_t		old_zoneid = ddp->dd_zid;
1572 	dlmgmt_door_setzoneid_t	setzid;
1573 	dlmgmt_setzoneid_retval_t retval;
1574 
1575 	if (old_zoneid == new_zoneid)
1576 		return (0);
1577 
1578 	if ((err = mac_perim_enter_by_macname(ddp->dd_mac, &mph)) != 0)
1579 		return (err);
1580 
1581 	/*
1582 	 * When changing the zoneid of an existing link, we need to tell
1583 	 * dlmgmtd about it.  dlmgmtd already knows the zoneid associated with
1584 	 * newly created links.
1585 	 */
1586 	if (setprop) {
1587 		setzid.ld_cmd = DLMGMT_CMD_SETZONEID;
1588 		setzid.ld_linkid = linkid;
1589 		setzid.ld_zoneid = new_zoneid;
1590 		err = i_dls_mgmt_upcall(&setzid, sizeof (setzid), &retval,
1591 		    sizeof (retval));
1592 		if (err != 0)
1593 			goto done;
1594 
1595 		/*
1596 		 * We set upcall_done only if the upcall is
1597 		 * successful. This way, if dls_link_setzid() fails,
1598 		 * we know another upcall must be done to reset the
1599 		 * dlmgmtd state.
1600 		 */
1601 		upcall_done = B_TRUE;
1602 	}
1603 	if ((err = dls_link_setzid(ddp->dd_mac, new_zoneid)) == 0) {
1604 		ddp->dd_zid = new_zoneid;
1605 		ddp->dd_transient = transient;
1606 		devnet_need_rebuild = B_TRUE;
1607 	}
1608 
1609 done:
1610 	if (err != 0 && upcall_done) {
1611 		setzid.ld_zoneid = old_zoneid;
1612 		(void) i_dls_mgmt_upcall(&setzid, sizeof (setzid), &retval,
1613 		    sizeof (retval));
1614 	}
1615 	mac_perim_exit(mph);
1616 	return (err);
1617 }
1618 
1619 int
1620 dls_devnet_setzid(dls_dl_handle_t ddh, zoneid_t new_zid)
1621 {
1622 	dls_devnet_t	*ddp;
1623 	int		err;
1624 	zoneid_t	old_zid;
1625 	boolean_t	refheld = B_FALSE;
1626 
1627 	old_zid = ddh->dd_zid;
1628 
1629 	if (old_zid == new_zid)
1630 		return (0);
1631 
1632 	/*
1633 	 * Acquire an additional reference to the link if it is being assigned
1634 	 * to a non-global zone from the global zone.
1635 	 */
1636 	if (old_zid == GLOBAL_ZONEID && new_zid != GLOBAL_ZONEID) {
1637 		if ((err = dls_devnet_hold(ddh->dd_linkid, &ddp)) != 0)
1638 			return (err);
1639 		refheld = B_TRUE;
1640 	}
1641 
1642 	if ((err = i_dls_devnet_setzid(ddh, new_zid, B_TRUE, B_FALSE)) != 0) {
1643 		if (refheld)
1644 			dls_devnet_rele(ddp);
1645 		return (err);
1646 	}
1647 
1648 	/*
1649 	 * Release the additional reference if the link is returning to the
1650 	 * global zone from a non-global zone.
1651 	 */
1652 	if (old_zid != GLOBAL_ZONEID && new_zid == GLOBAL_ZONEID)
1653 		dls_devnet_rele(ddh);
1654 
1655 	/* Re-create kstats in the appropriate zones. */
1656 	if (old_zid != GLOBAL_ZONEID)
1657 		dls_devnet_stat_destroy(ddh, old_zid);
1658 	if (new_zid != GLOBAL_ZONEID)
1659 		dls_devnet_stat_create(ddh, new_zid);
1660 
1661 	return (0);
1662 }
1663 
1664 zoneid_t
1665 dls_devnet_getzid(dls_dl_handle_t ddh)
1666 {
1667 	return (((dls_devnet_t *)ddh)->dd_zid);
1668 }
1669 
1670 zoneid_t
1671 dls_devnet_getownerzid(dls_dl_handle_t ddh)
1672 {
1673 	return (((dls_devnet_t *)ddh)->dd_owner_zid);
1674 }
1675 
1676 /*
1677  * Is linkid visible from zoneid?  A link is visible if it was created in the
1678  * zone, or if it is currently assigned to the zone.
1679  */
1680 boolean_t
1681 dls_devnet_islinkvisible(datalink_id_t linkid, zoneid_t zoneid)
1682 {
1683 	dls_devnet_t	*ddp;
1684 	boolean_t	result;
1685 
1686 	if (dls_devnet_hold_tmp(linkid, &ddp) != 0)
1687 		return (B_FALSE);
1688 	result = (ddp->dd_owner_zid == zoneid || ddp->dd_zid == zoneid);
1689 	dls_devnet_rele_tmp(ddp);
1690 	return (result);
1691 }
1692 
1693 /*
1694  * Access a vanity naming node.
1695  */
1696 int
1697 dls_devnet_open(const char *link, dls_dl_handle_t *dhp, dev_t *devp)
1698 {
1699 	dls_devnet_t	*ddp;
1700 	dls_link_t	*dlp;
1701 	zoneid_t	zid = getzoneid();
1702 	int		err;
1703 	mac_perim_handle_t	mph;
1704 
1705 	if ((err = dls_devnet_hold_by_name(link, &ddp)) != 0)
1706 		return (err);
1707 
1708 	dls_devnet_prop_task_wait(ddp);
1709 
1710 	/*
1711 	 * Opening a link that does not belong to the current non-global zone
1712 	 * is not allowed.
1713 	 */
1714 	if (zid != GLOBAL_ZONEID && ddp->dd_zid != zid) {
1715 		dls_devnet_rele(ddp);
1716 		return (ENOENT);
1717 	}
1718 
1719 	err = mac_perim_enter_by_macname(ddp->dd_mac, &mph);
1720 	if (err != 0) {
1721 		dls_devnet_rele(ddp);
1722 		return (err);
1723 	}
1724 
1725 	err = dls_link_hold_create(ddp->dd_mac, &dlp);
1726 	mac_perim_exit(mph);
1727 
1728 	if (err != 0) {
1729 		dls_devnet_rele(ddp);
1730 		return (err);
1731 	}
1732 
1733 	*dhp = ddp;
1734 	*devp = dls_link_dev(dlp);
1735 	return (0);
1736 }
1737 
1738 /*
1739  * Close access to a vanity naming node.
1740  */
1741 void
1742 dls_devnet_close(dls_dl_handle_t dlh)
1743 {
1744 	dls_devnet_t	*ddp = dlh;
1745 	dls_link_t	*dlp;
1746 	mac_perim_handle_t	mph;
1747 
1748 	VERIFY(mac_perim_enter_by_macname(ddp->dd_mac, &mph) == 0);
1749 	VERIFY(dls_link_hold(ddp->dd_mac, &dlp) == 0);
1750 
1751 	/*
1752 	 * One rele for the hold placed in dls_devnet_open, another for
1753 	 * the hold done just above
1754 	 */
1755 	dls_link_rele(dlp);
1756 	dls_link_rele(dlp);
1757 	mac_perim_exit(mph);
1758 
1759 	dls_devnet_rele(ddp);
1760 }
1761 
1762 /*
1763  * This is used by /dev/net to rebuild the nodes for readdir().  It is not
1764  * critical and no protection is needed.
1765  */
1766 boolean_t
1767 dls_devnet_rebuild()
1768 {
1769 	boolean_t updated = devnet_need_rebuild;
1770 
1771 	devnet_need_rebuild = B_FALSE;
1772 	return (updated);
1773 }
1774 
1775 int
1776 dls_devnet_create(mac_handle_t mh, datalink_id_t linkid, zoneid_t zoneid)
1777 {
1778 	dls_link_t	*dlp;
1779 	dls_devnet_t	*ddp;
1780 	int		err;
1781 	mac_perim_handle_t mph;
1782 
1783 	/*
1784 	 * Holding the mac perimeter ensures that the downcall from the
1785 	 * dlmgmt daemon which does the property loading does not proceed
1786 	 * until we relinquish the perimeter.
1787 	 */
1788 	mac_perim_enter_by_mh(mh, &mph);
1789 	/*
1790 	 * Make this association before we call dls_link_hold_create as
1791 	 * we need to use the linkid to get the user name for the link
1792 	 * when we create the MAC client.
1793 	 */
1794 	if ((err = dls_devnet_set(mh, linkid, zoneid, &ddp)) == 0) {
1795 		if ((err = dls_link_hold_create(mac_name(mh), &dlp)) != 0) {
1796 			mac_perim_exit(mph);
1797 			(void) dls_devnet_unset(mh, &linkid, B_FALSE);
1798 			return (err);
1799 		}
1800 
1801 		/*
1802 		 * If dd_linkid is set then the link was successfully
1803 		 * initialized. In this case we can remove the
1804 		 * initializing flag and make the link visible to the
1805 		 * rest of the system.
1806 		 *
1807 		 * If not set then we were called by softmac and it
1808 		 * was unable to obtain a linkid for the physical link
1809 		 * because dlmgmtd is down. In that case softmac will
1810 		 * eventually obtain a linkid and call
1811 		 * dls_devnet_recreate() to complete initialization.
1812 		 */
1813 		mutex_enter(&ddp->dd_mutex);
1814 		if (ddp->dd_linkid != DATALINK_INVALID_LINKID)
1815 			ddp->dd_flags &= ~DD_INITIALIZING;
1816 		mutex_exit(&ddp->dd_mutex);
1817 
1818 	}
1819 
1820 	mac_perim_exit(mph);
1821 	return (err);
1822 }
1823 
1824 /*
1825  * Set the linkid of the dls_devnet_t and add it into the i_dls_devnet_id_hash.
1826  * This is called in the case that the dlmgmtd daemon is started later than
1827  * the physical devices get attached, and the linkid is only known after the
1828  * daemon starts.
1829  */
1830 int
1831 dls_devnet_recreate(mac_handle_t mh, datalink_id_t linkid)
1832 {
1833 	dls_devnet_t	*ddp;
1834 	int		err;
1835 
1836 	VERIFY(linkid != DATALINK_INVALID_LINKID);
1837 	if ((err = dls_devnet_set(mh, linkid, GLOBAL_ZONEID, &ddp)) == 0) {
1838 		mutex_enter(&ddp->dd_mutex);
1839 		if (ddp->dd_linkid != DATALINK_INVALID_LINKID)
1840 			ddp->dd_flags &= ~DD_INITIALIZING;
1841 		mutex_exit(&ddp->dd_mutex);
1842 	}
1843 
1844 	return (err);
1845 
1846 }
1847 
1848 int
1849 dls_devnet_destroy(mac_handle_t mh, datalink_id_t *idp, boolean_t wait)
1850 {
1851 	int			err;
1852 	mac_perim_handle_t	mph;
1853 
1854 	*idp = DATALINK_INVALID_LINKID;
1855 	err = dls_devnet_unset(mh, idp, wait);
1856 
1857 	/*
1858 	 * We continue on in the face of ENOENT because the devnet
1859 	 * unset and DLS link release are not atomic and we may have a
1860 	 * scenario where there is no entry in i_dls_devnet_hash for
1861 	 * the MAC name but there is an entry in i_dls_link_hash. For
1862 	 * example, if the following occurred:
1863 	 *
1864 	 * 1. dls_devnet_unset() returns success, and
1865 	 *
1866 	 * 2. dls_link_rele_by_name() fails with ENOTEMPTY because
1867 	 *    flows still exist, and
1868 	 *
1869 	 * 3. dls_devnet_set() fails to set the zone id and calls
1870 	 *    dls_devnet_unset() -- leaving an entry in
1871 	 *    i_dls_link_hash but no corresponding entry in
1872 	 *    i_dls_devnet_hash.
1873 	 *
1874 	 * Even if #3 wasn't true the dls_devnet_set() may fail for
1875 	 * different reasons in the future; the point is that it _can_
1876 	 * fail as part of its contract. We can't rely on it working
1877 	 * so we must assume that these two pieces of state (devnet
1878 	 * and link hashes), which should always be in sync, can get
1879 	 * out of sync and thus even if we get ENOENT from the devnet
1880 	 * hash we should still try to delete from the link hash just
1881 	 * in case.
1882 	 *
1883 	 * We could prevent the ENOTEMPTY from dls_link_rele_by_name()
1884 	 * by calling mac_disable() before calling
1885 	 * dls_devnet_destroy() but that's not currently possible due
1886 	 * to a long-standing bug. OpenSolaris 6791335: The semantics
1887 	 * of mac_disable() were modified by Crossbow such that
1888 	 * dls_devnet_destroy() needs to be called before
1889 	 * mac_disable() can succeed. This is because of the implicit
1890 	 * reference that dls has on the mac_impl_t.
1891 	 */
1892 	if (err != 0 && err != ENOENT)
1893 		return (err);
1894 
1895 	mac_perim_enter_by_mh(mh, &mph);
1896 	err = dls_link_rele_by_name(mac_name(mh));
1897 	mac_perim_exit(mph);
1898 
1899 	if (err != 0) {
1900 		dls_devnet_t	*ddp;
1901 
1902 		/*
1903 		 * XXX It is a general GLDv3 bug that dls_devnet_set() has to
1904 		 * be called to re-set the link when destroy fails.  The
1905 		 * zoneid below will be incorrect if this function is ever
1906 		 * called from kernel context or from a zone other than that
1907 		 * which initially created the link.
1908 		 */
1909 		(void) dls_devnet_set(mh, *idp, crgetzoneid(CRED()), &ddp);
1910 
1911 		/*
1912 		 * You might think dd_linkid should always be set
1913 		 * here, but in the case where dls_devnet_unset()
1914 		 * returns ENOENT it will be DATALINK_INVALID_LINKID.
1915 		 * Stay consistent with the rest of DLS and only
1916 		 * remove the initializing flag if linkid is set.
1917 		 */
1918 		mutex_enter(&ddp->dd_mutex);
1919 		if (ddp->dd_linkid != DATALINK_INVALID_LINKID)
1920 			ddp->dd_flags &= ~DD_INITIALIZING;
1921 		mutex_exit(&ddp->dd_mutex);
1922 	}
1923 	return (err);
1924 }
1925 
1926 /*
1927  * Implicitly create an IP tunnel link.
1928  */
1929 static int
1930 i_dls_devnet_create_iptun(const char *linkname, const char *drvname,
1931     datalink_id_t *linkid)
1932 {
1933 	int		err;
1934 	iptun_kparams_t	ik;
1935 	uint32_t	media;
1936 	netstack_t	*ns;
1937 	major_t		iptun_major;
1938 	dev_info_t	*iptun_dip;
1939 
1940 	/* First ensure that the iptun device is attached. */
1941 	if ((iptun_major = ddi_name_to_major(IPTUN_DRIVER_NAME)) == (major_t)-1)
1942 		return (EINVAL);
1943 	if ((iptun_dip = ddi_hold_devi_by_instance(iptun_major, 0, 0)) == NULL)
1944 		return (EINVAL);
1945 
1946 	if (IS_IPV4_TUN(drvname)) {
1947 		ik.iptun_kparam_type = IPTUN_TYPE_IPV4;
1948 		media = DL_IPV4;
1949 	} else if (IS_6TO4_TUN(drvname)) {
1950 		ik.iptun_kparam_type = IPTUN_TYPE_6TO4;
1951 		media = DL_6TO4;
1952 	} else if (IS_IPV6_TUN(drvname)) {
1953 		ik.iptun_kparam_type = IPTUN_TYPE_IPV6;
1954 		media = DL_IPV6;
1955 	}
1956 	ik.iptun_kparam_flags = (IPTUN_KPARAM_TYPE | IPTUN_KPARAM_IMPLICIT);
1957 
1958 	/* Obtain a datalink id for this tunnel. */
1959 	err = dls_mgmt_create((char *)linkname, 0, DATALINK_CLASS_IPTUN, media,
1960 	    B_FALSE, &ik.iptun_kparam_linkid);
1961 	if (err != 0) {
1962 		ddi_release_devi(iptun_dip);
1963 		return (err);
1964 	}
1965 
1966 	ns = netstack_get_current();
1967 	err = iptun_create(&ik, CRED());
1968 	netstack_rele(ns);
1969 
1970 	if (err != 0)
1971 		VERIFY(dls_mgmt_destroy(ik.iptun_kparam_linkid, B_FALSE) == 0);
1972 	else
1973 		*linkid = ik.iptun_kparam_linkid;
1974 
1975 	ddi_release_devi(iptun_dip);
1976 	return (err);
1977 }
1978 
1979 static int
1980 i_dls_devnet_destroy_iptun(datalink_id_t linkid)
1981 {
1982 	int err;
1983 
1984 	/*
1985 	 * Note the use of zone_kcred() here as opposed to CRED().  This is
1986 	 * because the process that does the last close of this /dev/net node
1987 	 * may not have necessary privileges to delete this IP tunnel, but the
1988 	 * tunnel must always be implicitly deleted on last close.
1989 	 */
1990 	if ((err = iptun_delete(linkid, zone_kcred())) == 0)
1991 		(void) dls_mgmt_destroy(linkid, B_FALSE);
1992 	return (err);
1993 }
1994 
1995 const char *
1996 dls_devnet_link(dls_dl_handle_t ddh)
1997 {
1998 	return (ddh->dd_linkname);
1999 }
2000 
2001 const char *
2002 dls_devnet_mac(dls_dl_handle_t ddh)
2003 {
2004 	return (ddh->dd_mac);
2005 }
2006 
2007 datalink_id_t
2008 dls_devnet_linkid(dls_dl_handle_t ddh)
2009 {
2010 	return (ddh->dd_linkid);
2011 }
2012