xref: /illumos-gate/usr/src/uts/common/io/dls/dls_mgmt.c (revision 8509e9caaaa43d21ab1a18a2aa45b43322c378ac)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright (c) 2016 by Delphix. All rights reserved.
27  */
28 
29 /*
30  * Datalink management routines.
31  */
32 
33 #include <sys/types.h>
34 #include <sys/door.h>
35 #include <sys/zone.h>
36 #include <sys/modctl.h>
37 #include <sys/file.h>
38 #include <sys/modhash.h>
39 #include <sys/kstat.h>
40 #include <sys/vnode.h>
41 #include <sys/cmn_err.h>
42 #include <sys/softmac.h>
43 #include <sys/dls.h>
44 #include <sys/dls_impl.h>
45 #include <sys/stropts.h>
46 #include <sys/netstack.h>
47 #include <inet/iptun/iptun_impl.h>
48 
49 /*
50  * This vanity name management module is treated as part of the GLD framework
51  * and we don't hold any GLD framework lock across a call to any mac
52  * function that needs to acquire the mac perimeter. The hierarchy is
53  * mac perimeter -> framework locks
54  */
55 
56 typedef struct dls_stack {
57 	zoneid_t	dlss_zoneid;
58 } dls_stack_t;
59 
60 static kmem_cache_t	*i_dls_devnet_cachep;
61 static kmutex_t		i_dls_mgmt_lock;
62 static krwlock_t	i_dls_devnet_lock;
63 static mod_hash_t	*i_dls_devnet_id_hash;
64 static mod_hash_t	*i_dls_devnet_hash;
65 
66 boolean_t		devnet_need_rebuild;
67 
68 #define	VLAN_HASHSZ	67	/* prime */
69 
70 /*
71  * The following macros take a link name without the trailing PPA as input.
72  * Opening a /dev/net node with one of these names causes a tunnel link to be
73  * implicitly created in dls_devnet_hold_by_name() for backward compatibility
74  * with Solaris 10 and prior.
75  */
76 #define	IS_IPV4_TUN(name)	(strcmp((name), "ip.tun") == 0)
77 #define	IS_IPV6_TUN(name)	(strcmp((name), "ip6.tun") == 0)
78 #define	IS_6TO4_TUN(name)	(strcmp((name), "ip.6to4tun") == 0)
79 #define	IS_IPTUN_LINK(name)	(					\
80     IS_IPV4_TUN(name) || IS_IPV6_TUN(name) || IS_6TO4_TUN(name))
81 
82 /* Upcall door handle */
83 static door_handle_t	dls_mgmt_dh = NULL;
84 
85 /* dls_devnet_t dd_flags */
86 #define	DD_CONDEMNED		0x1
87 #define	DD_IMPLICIT_IPTUN	0x2 /* Implicitly-created ip*.*tun* tunnel */
88 
89 /*
90  * This structure is used to keep the <linkid, macname> mapping.
91  * This structure itself is not protected by the mac perimeter, but is
92  * protected by the dd_mutex and i_dls_devnet_lock. Thus most of the
93  * functions manipulating this structure such as dls_devnet_set/unset etc.
94  * may be called while not holding the mac perimeter.
95  */
96 typedef struct dls_devnet_s {
97 	datalink_id_t	dd_linkid;
98 	char		dd_linkname[MAXLINKNAMELEN];
99 	char		dd_mac[MAXNAMELEN];
100 	kstat_t		*dd_ksp;	/* kstat in owner_zid */
101 	kstat_t		*dd_zone_ksp;	/* in dd_zid if != owner_zid */
102 	uint32_t	dd_ref;
103 	kmutex_t	dd_mutex;
104 	kcondvar_t	dd_cv;
105 	uint32_t	dd_tref;
106 	uint_t		dd_flags;
107 	zoneid_t	dd_owner_zid;	/* zone where node was created */
108 	zoneid_t	dd_zid;		/* current zone */
109 	boolean_t	dd_prop_loaded;
110 	taskqid_t	dd_prop_taskid;
111 } dls_devnet_t;
112 
113 static int i_dls_devnet_create_iptun(const char *, const char *,
114     datalink_id_t *);
115 static int i_dls_devnet_destroy_iptun(datalink_id_t);
116 static int i_dls_devnet_setzid(dls_devnet_t *, zoneid_t, boolean_t);
117 static int dls_devnet_unset(const char *, datalink_id_t *, boolean_t);
118 
119 /*ARGSUSED*/
120 static int
121 i_dls_devnet_constructor(void *buf, void *arg, int kmflag)
122 {
123 	dls_devnet_t	*ddp = buf;
124 
125 	bzero(buf, sizeof (dls_devnet_t));
126 	mutex_init(&ddp->dd_mutex, NULL, MUTEX_DEFAULT, NULL);
127 	cv_init(&ddp->dd_cv, NULL, CV_DEFAULT, NULL);
128 	return (0);
129 }
130 
131 /*ARGSUSED*/
132 static void
133 i_dls_devnet_destructor(void *buf, void *arg)
134 {
135 	dls_devnet_t	*ddp = buf;
136 
137 	ASSERT(ddp->dd_ksp == NULL);
138 	ASSERT(ddp->dd_ref == 0);
139 	ASSERT(ddp->dd_tref == 0);
140 	mutex_destroy(&ddp->dd_mutex);
141 	cv_destroy(&ddp->dd_cv);
142 }
143 
144 /* ARGSUSED */
145 static int
146 dls_zone_remove(datalink_id_t linkid, void *arg)
147 {
148 	dls_devnet_t *ddp;
149 
150 	if (dls_devnet_hold_tmp(linkid, &ddp) == 0) {
151 		(void) dls_devnet_setzid(ddp, GLOBAL_ZONEID);
152 		dls_devnet_rele_tmp(ddp);
153 	}
154 	return (0);
155 }
156 
157 /* ARGSUSED */
158 static void *
159 dls_stack_init(netstackid_t stackid, netstack_t *ns)
160 {
161 	dls_stack_t *dlss;
162 
163 	dlss = kmem_zalloc(sizeof (*dlss), KM_SLEEP);
164 	dlss->dlss_zoneid = netstackid_to_zoneid(stackid);
165 	return (dlss);
166 }
167 
168 /* ARGSUSED */
169 static void
170 dls_stack_shutdown(netstackid_t stackid, void *arg)
171 {
172 	dls_stack_t	*dlss = (dls_stack_t *)arg;
173 
174 	/* Move remaining datalinks in this zone back to the global zone. */
175 	(void) zone_datalink_walk(dlss->dlss_zoneid, dls_zone_remove, NULL);
176 }
177 
178 /* ARGSUSED */
179 static void
180 dls_stack_fini(netstackid_t stackid, void *arg)
181 {
182 	dls_stack_t	*dlss = (dls_stack_t *)arg;
183 
184 	kmem_free(dlss, sizeof (*dlss));
185 }
186 
187 /*
188  * Module initialization and finalization functions.
189  */
190 void
191 dls_mgmt_init(void)
192 {
193 	mutex_init(&i_dls_mgmt_lock, NULL, MUTEX_DEFAULT, NULL);
194 	rw_init(&i_dls_devnet_lock, NULL, RW_DEFAULT, NULL);
195 
196 	/*
197 	 * Create a kmem_cache of dls_devnet_t structures.
198 	 */
199 	i_dls_devnet_cachep = kmem_cache_create("dls_devnet_cache",
200 	    sizeof (dls_devnet_t), 0, i_dls_devnet_constructor,
201 	    i_dls_devnet_destructor, NULL, NULL, NULL, 0);
202 	ASSERT(i_dls_devnet_cachep != NULL);
203 
204 	/*
205 	 * Create a hash table, keyed by dd_linkid, of dls_devnet_t.
206 	 */
207 	i_dls_devnet_id_hash = mod_hash_create_idhash("dls_devnet_id_hash",
208 	    VLAN_HASHSZ, mod_hash_null_valdtor);
209 
210 	/*
211 	 * Create a hash table, keyed by dd_mac
212 	 */
213 	i_dls_devnet_hash = mod_hash_create_extended("dls_devnet_hash",
214 	    VLAN_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor,
215 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
216 
217 	devnet_need_rebuild = B_FALSE;
218 
219 	netstack_register(NS_DLS, dls_stack_init, dls_stack_shutdown,
220 	    dls_stack_fini);
221 }
222 
223 void
224 dls_mgmt_fini(void)
225 {
226 	netstack_unregister(NS_DLS);
227 	mod_hash_destroy_hash(i_dls_devnet_hash);
228 	mod_hash_destroy_hash(i_dls_devnet_id_hash);
229 	kmem_cache_destroy(i_dls_devnet_cachep);
230 	rw_destroy(&i_dls_devnet_lock);
231 	mutex_destroy(&i_dls_mgmt_lock);
232 }
233 
234 int
235 dls_mgmt_door_set(boolean_t start)
236 {
237 	int	err;
238 
239 	/* handle daemon restart */
240 	mutex_enter(&i_dls_mgmt_lock);
241 	if (dls_mgmt_dh != NULL) {
242 		door_ki_rele(dls_mgmt_dh);
243 		dls_mgmt_dh = NULL;
244 	}
245 
246 	if (start && ((err = door_ki_open(DLMGMT_DOOR, &dls_mgmt_dh)) != 0)) {
247 		mutex_exit(&i_dls_mgmt_lock);
248 		return (err);
249 	}
250 
251 	mutex_exit(&i_dls_mgmt_lock);
252 
253 	/*
254 	 * Create and associate <link name, linkid> mapping for network devices
255 	 * which are already attached before the daemon is started.
256 	 */
257 	if (start)
258 		softmac_recreate();
259 	return (0);
260 }
261 
262 static boolean_t
263 i_dls_mgmt_door_revoked(door_handle_t dh)
264 {
265 	struct door_info info;
266 	extern int sys_shutdown;
267 
268 	ASSERT(dh != NULL);
269 
270 	if (sys_shutdown) {
271 		cmn_err(CE_NOTE, "dls_mgmt_door: shutdown observed\n");
272 		return (B_TRUE);
273 	}
274 
275 	if (door_ki_info(dh, &info) != 0)
276 		return (B_TRUE);
277 
278 	return ((info.di_attributes & DOOR_REVOKED) != 0);
279 }
280 
281 /*
282  * Upcall to the datalink management daemon (dlmgmtd).
283  */
284 static int
285 i_dls_mgmt_upcall(void *arg, size_t asize, void *rbuf, size_t rsize)
286 {
287 	door_arg_t			darg, save_arg;
288 	door_handle_t			dh;
289 	int				err;
290 	int				retry = 0;
291 
292 #define	MAXRETRYNUM	3
293 
294 	ASSERT(arg);
295 	darg.data_ptr = arg;
296 	darg.data_size = asize;
297 	darg.desc_ptr = NULL;
298 	darg.desc_num = 0;
299 	darg.rbuf = rbuf;
300 	darg.rsize = rsize;
301 	save_arg = darg;
302 
303 retry:
304 	mutex_enter(&i_dls_mgmt_lock);
305 	dh = dls_mgmt_dh;
306 	if ((dh == NULL) || i_dls_mgmt_door_revoked(dh)) {
307 		mutex_exit(&i_dls_mgmt_lock);
308 		return (EBADF);
309 	}
310 	door_ki_hold(dh);
311 	mutex_exit(&i_dls_mgmt_lock);
312 
313 	for (;;) {
314 		retry++;
315 		if ((err = door_ki_upcall_limited(dh, &darg, zone_kcred(),
316 		    SIZE_MAX, 0)) == 0)
317 			break;
318 
319 		/*
320 		 * handle door call errors
321 		 */
322 		darg = save_arg;
323 		switch (err) {
324 		case EINTR:
325 			/*
326 			 * If the operation which caused this door upcall gets
327 			 * interrupted, return directly.
328 			 */
329 			goto done;
330 		case EAGAIN:
331 			/*
332 			 * Repeat upcall if the maximum attempt limit has not
333 			 * been reached.
334 			 */
335 			if (retry < MAXRETRYNUM) {
336 				delay(2 * hz);
337 				break;
338 			}
339 			cmn_err(CE_WARN, "dls: dlmgmtd fatal error %d\n", err);
340 			goto done;
341 		default:
342 			/* A fatal door error */
343 			if (i_dls_mgmt_door_revoked(dh)) {
344 				cmn_err(CE_NOTE,
345 				    "dls: dlmgmtd door service revoked\n");
346 
347 				if (retry < MAXRETRYNUM) {
348 					door_ki_rele(dh);
349 					goto retry;
350 				}
351 			}
352 			cmn_err(CE_WARN, "dls: dlmgmtd fatal error %d\n", err);
353 			goto done;
354 		}
355 	}
356 
357 	if (darg.rbuf != rbuf) {
358 		/*
359 		 * The size of the input rbuf was not big enough, so the
360 		 * upcall allocated the rbuf itself.  If this happens, assume
361 		 * that this was an invalid door call request.
362 		 */
363 		kmem_free(darg.rbuf, darg.rsize);
364 		err = ENOSPC;
365 		goto done;
366 	}
367 
368 	if (darg.rsize != rsize) {
369 		err = EINVAL;
370 		goto done;
371 	}
372 
373 	err = ((dlmgmt_retval_t *)rbuf)->lr_err;
374 
375 done:
376 	door_ki_rele(dh);
377 	return (err);
378 }
379 
380 /*
381  * Request the datalink management daemon to create a link with the attributes
382  * below.  Upon success, zero is returned and linkidp contains the linkid for
383  * the new link; otherwise, an errno is returned.
384  *
385  *     - dev		physical dev_t.  required for all physical links,
386  *		        including GLDv3 links.  It will be used to force the
387  *		        attachment of a physical device, hence the
388  *		        registration of its mac
389  *     - class		datalink class
390  *     - media type	media type; DL_OTHER means unknown
391  *     - persist	whether to persist the datalink
392  */
393 int
394 dls_mgmt_create(const char *devname, dev_t dev, datalink_class_t class,
395     uint32_t media, boolean_t persist, datalink_id_t *linkidp)
396 {
397 	dlmgmt_upcall_arg_create_t	create;
398 	dlmgmt_create_retval_t		retval;
399 	int				err;
400 
401 	create.ld_cmd = DLMGMT_CMD_DLS_CREATE;
402 	create.ld_class = class;
403 	create.ld_media = media;
404 	create.ld_phymaj = getmajor(dev);
405 	create.ld_phyinst = getminor(dev);
406 	create.ld_persist = persist;
407 	if (strlcpy(create.ld_devname, devname, sizeof (create.ld_devname)) >=
408 	    sizeof (create.ld_devname))
409 		return (EINVAL);
410 
411 	if ((err = i_dls_mgmt_upcall(&create, sizeof (create), &retval,
412 	    sizeof (retval))) == 0) {
413 		*linkidp = retval.lr_linkid;
414 	}
415 	return (err);
416 }
417 
418 /*
419  * Request the datalink management daemon to destroy the specified link.
420  * Returns zero upon success, or an errno upon failure.
421  */
422 int
423 dls_mgmt_destroy(datalink_id_t linkid, boolean_t persist)
424 {
425 	dlmgmt_upcall_arg_destroy_t	destroy;
426 	dlmgmt_destroy_retval_t		retval;
427 
428 	destroy.ld_cmd = DLMGMT_CMD_DLS_DESTROY;
429 	destroy.ld_linkid = linkid;
430 	destroy.ld_persist = persist;
431 
432 	return (i_dls_mgmt_upcall(&destroy, sizeof (destroy),
433 	    &retval, sizeof (retval)));
434 }
435 
436 /*
437  * Request the datalink management daemon to verify/update the information
438  * for a physical link.  Upon success, get its linkid.
439  *
440  *     - media type	media type
441  *     - novanity	whether this physical datalink supports vanity naming.
442  *			physical links that do not use the GLDv3 MAC plugin
443  *			cannot suport vanity naming
444  *
445  * This function could fail with ENOENT or EEXIST.  Two cases return EEXIST:
446  *
447  * 1. A link with devname already exists, but the media type does not match.
448  *    In this case, mediap will bee set to the media type of the existing link.
449  * 2. A link with devname already exists, but its link name does not match
450  *    the device name, although this link does not support vanity naming.
451  */
452 int
453 dls_mgmt_update(const char *devname, uint32_t media, boolean_t novanity,
454     uint32_t *mediap, datalink_id_t *linkidp)
455 {
456 	dlmgmt_upcall_arg_update_t	update;
457 	dlmgmt_update_retval_t		retval;
458 	int				err;
459 
460 	update.ld_cmd = DLMGMT_CMD_DLS_UPDATE;
461 
462 	if (strlcpy(update.ld_devname, devname, sizeof (update.ld_devname)) >=
463 	    sizeof (update.ld_devname))
464 		return (EINVAL);
465 
466 	update.ld_media = media;
467 	update.ld_novanity = novanity;
468 
469 	if ((err = i_dls_mgmt_upcall(&update, sizeof (update), &retval,
470 	    sizeof (retval))) == EEXIST) {
471 		*linkidp = retval.lr_linkid;
472 		*mediap = retval.lr_media;
473 	} else if (err == 0) {
474 		*linkidp = retval.lr_linkid;
475 	}
476 
477 	return (err);
478 }
479 
480 /*
481  * Request the datalink management daemon to get the information for a link.
482  * Returns zero upon success, or an errno upon failure.
483  *
484  * Only fills in information for argument pointers that are non-NULL.
485  * Note that the link argument is expected to be MAXLINKNAMELEN bytes.
486  */
487 int
488 dls_mgmt_get_linkinfo(datalink_id_t linkid, char *link,
489     datalink_class_t *classp, uint32_t *mediap, uint32_t *flagsp)
490 {
491 	dlmgmt_door_getname_t	getname;
492 	dlmgmt_getname_retval_t	retval;
493 	int			err, len;
494 
495 	getname.ld_cmd = DLMGMT_CMD_GETNAME;
496 	getname.ld_linkid = linkid;
497 
498 	if ((err = i_dls_mgmt_upcall(&getname, sizeof (getname), &retval,
499 	    sizeof (retval))) != 0) {
500 		return (err);
501 	}
502 
503 	len = strlen(retval.lr_link);
504 	if (len <= 1 || len >= MAXLINKNAMELEN)
505 		return (EINVAL);
506 
507 	if (link != NULL)
508 		(void) strlcpy(link, retval.lr_link, MAXLINKNAMELEN);
509 	if (classp != NULL)
510 		*classp = retval.lr_class;
511 	if (mediap != NULL)
512 		*mediap = retval.lr_media;
513 	if (flagsp != NULL)
514 		*flagsp = retval.lr_flags;
515 	return (0);
516 }
517 
518 /*
519  * Request the datalink management daemon to get the linkid for a link.
520  * Returns a non-zero error code on failure.  The linkid argument is only
521  * set on success (when zero is returned.)
522  */
523 int
524 dls_mgmt_get_linkid(const char *link, datalink_id_t *linkid)
525 {
526 	dlmgmt_door_getlinkid_t		getlinkid;
527 	dlmgmt_getlinkid_retval_t	retval;
528 	int				err;
529 
530 	getlinkid.ld_cmd = DLMGMT_CMD_GETLINKID;
531 	(void) strlcpy(getlinkid.ld_link, link, MAXLINKNAMELEN);
532 
533 	if ((err = i_dls_mgmt_upcall(&getlinkid, sizeof (getlinkid), &retval,
534 	    sizeof (retval))) == 0) {
535 		*linkid = retval.lr_linkid;
536 	}
537 	return (err);
538 }
539 
540 datalink_id_t
541 dls_mgmt_get_next(datalink_id_t linkid, datalink_class_t class,
542     datalink_media_t dmedia, uint32_t flags)
543 {
544 	dlmgmt_door_getnext_t	getnext;
545 	dlmgmt_getnext_retval_t	retval;
546 
547 	getnext.ld_cmd = DLMGMT_CMD_GETNEXT;
548 	getnext.ld_class = class;
549 	getnext.ld_dmedia = dmedia;
550 	getnext.ld_flags = flags;
551 	getnext.ld_linkid = linkid;
552 
553 	if (i_dls_mgmt_upcall(&getnext, sizeof (getnext), &retval,
554 	    sizeof (retval)) != 0) {
555 		return (DATALINK_INVALID_LINKID);
556 	}
557 
558 	return (retval.lr_linkid);
559 }
560 
561 static int
562 i_dls_mgmt_get_linkattr(const datalink_id_t linkid, const char *attr,
563     void *attrval, size_t *attrszp)
564 {
565 	dlmgmt_upcall_arg_getattr_t	getattr;
566 	dlmgmt_getattr_retval_t		retval;
567 	int				err;
568 
569 	getattr.ld_cmd = DLMGMT_CMD_DLS_GETATTR;
570 	getattr.ld_linkid = linkid;
571 	(void) strlcpy(getattr.ld_attr, attr, MAXLINKATTRLEN);
572 
573 	if ((err = i_dls_mgmt_upcall(&getattr, sizeof (getattr), &retval,
574 	    sizeof (retval))) == 0) {
575 		if (*attrszp < retval.lr_attrsz)
576 			return (EINVAL);
577 		*attrszp = retval.lr_attrsz;
578 		bcopy(retval.lr_attrval, attrval, retval.lr_attrsz);
579 	}
580 
581 	return (err);
582 }
583 
584 /*
585  * Note that this function can only get devp successfully for non-VLAN link.
586  */
587 int
588 dls_mgmt_get_phydev(datalink_id_t linkid, dev_t *devp)
589 {
590 	uint64_t	maj, inst;
591 	size_t		attrsz = sizeof (uint64_t);
592 
593 	if (i_dls_mgmt_get_linkattr(linkid, FPHYMAJ, &maj, &attrsz) != 0 ||
594 	    attrsz != sizeof (uint64_t) ||
595 	    i_dls_mgmt_get_linkattr(linkid, FPHYINST, &inst, &attrsz) != 0 ||
596 	    attrsz != sizeof (uint64_t)) {
597 		return (EINVAL);
598 	}
599 
600 	*devp = makedevice((major_t)maj, (minor_t)inst);
601 	return (0);
602 }
603 
604 /*
605  * Request the datalink management daemon to push in
606  * all properties associated with the link.
607  * Returns a non-zero error code on failure.
608  */
609 int
610 dls_mgmt_linkprop_init(datalink_id_t linkid)
611 {
612 	dlmgmt_door_linkprop_init_t	li;
613 	dlmgmt_linkprop_init_retval_t	retval;
614 	int				err;
615 
616 	li.ld_cmd = DLMGMT_CMD_LINKPROP_INIT;
617 	li.ld_linkid = linkid;
618 
619 	err = i_dls_mgmt_upcall(&li, sizeof (li), &retval, sizeof (retval));
620 	return (err);
621 }
622 
623 static void
624 dls_devnet_prop_task(void *arg)
625 {
626 	dls_devnet_t		*ddp = arg;
627 
628 	(void) dls_mgmt_linkprop_init(ddp->dd_linkid);
629 
630 	mutex_enter(&ddp->dd_mutex);
631 	ddp->dd_prop_loaded = B_TRUE;
632 	ddp->dd_prop_taskid = 0;
633 	cv_broadcast(&ddp->dd_cv);
634 	mutex_exit(&ddp->dd_mutex);
635 }
636 
637 /*
638  * Ensure property loading task is completed.
639  */
640 void
641 dls_devnet_prop_task_wait(dls_dl_handle_t ddp)
642 {
643 	mutex_enter(&ddp->dd_mutex);
644 	while (ddp->dd_prop_taskid != 0)
645 		cv_wait(&ddp->dd_cv, &ddp->dd_mutex);
646 	mutex_exit(&ddp->dd_mutex);
647 }
648 
649 void
650 dls_devnet_rele_tmp(dls_dl_handle_t dlh)
651 {
652 	dls_devnet_t		*ddp = dlh;
653 
654 	mutex_enter(&ddp->dd_mutex);
655 	ASSERT(ddp->dd_tref != 0);
656 	if (--ddp->dd_tref == 0)
657 		cv_signal(&ddp->dd_cv);
658 	mutex_exit(&ddp->dd_mutex);
659 }
660 
661 int
662 dls_devnet_hold_link(datalink_id_t linkid, dls_dl_handle_t *ddhp,
663     dls_link_t **dlpp)
664 {
665 	dls_dl_handle_t	dlh;
666 	dls_link_t	*dlp;
667 	int		err;
668 
669 	if ((err = dls_devnet_hold_tmp(linkid, &dlh)) != 0)
670 		return (err);
671 
672 	if ((err = dls_link_hold(dls_devnet_mac(dlh), &dlp)) != 0) {
673 		dls_devnet_rele_tmp(dlh);
674 		return (err);
675 	}
676 
677 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
678 
679 	*ddhp = dlh;
680 	*dlpp = dlp;
681 	return (0);
682 }
683 
684 void
685 dls_devnet_rele_link(dls_dl_handle_t dlh, dls_link_t *dlp)
686 {
687 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
688 
689 	dls_link_rele(dlp);
690 	dls_devnet_rele_tmp(dlh);
691 }
692 
693 /*
694  * "link" kstats related functions.
695  */
696 
697 /*
698  * Query the "link" kstats.
699  *
700  * We may be called from the kstat subsystem in an arbitrary context.
701  * If the caller is the stack, the context could be an upcall data
702  * thread. Hence we can't acquire the mac perimeter in this function
703  * for fear of deadlock.
704  */
705 static int
706 dls_devnet_stat_update(kstat_t *ksp, int rw)
707 {
708 	datalink_id_t	linkid = (datalink_id_t)(uintptr_t)ksp->ks_private;
709 	dls_devnet_t	*ddp;
710 	dls_link_t	*dlp;
711 	int		err;
712 
713 	if ((err = dls_devnet_hold_tmp(linkid, &ddp)) != 0) {
714 		return (err);
715 	}
716 
717 	/*
718 	 * If a device detach happens at this time, it will block in
719 	 * dls_devnet_unset since the dd_tref has been bumped in
720 	 * dls_devnet_hold_tmp(). So the access to 'dlp' is safe even though
721 	 * we don't hold the mac perimeter.
722 	 */
723 	if (mod_hash_find(i_dls_link_hash, (mod_hash_key_t)ddp->dd_mac,
724 	    (mod_hash_val_t *)&dlp) != 0) {
725 		dls_devnet_rele_tmp(ddp);
726 		return (ENOENT);
727 	}
728 
729 	err = dls_stat_update(ksp, dlp, rw);
730 
731 	dls_devnet_rele_tmp(ddp);
732 	return (err);
733 }
734 
735 /*
736  * Create the "link" kstats.
737  */
738 static void
739 dls_devnet_stat_create(dls_devnet_t *ddp, zoneid_t zoneid)
740 {
741 	kstat_t	*ksp;
742 
743 	if (dls_stat_create("link", 0, ddp->dd_linkname, zoneid,
744 	    dls_devnet_stat_update, (void *)(uintptr_t)ddp->dd_linkid,
745 	    &ksp) == 0) {
746 		ASSERT(ksp != NULL);
747 		if (zoneid == ddp->dd_owner_zid) {
748 			ASSERT(ddp->dd_ksp == NULL);
749 			ddp->dd_ksp = ksp;
750 		} else {
751 			ASSERT(ddp->dd_zone_ksp == NULL);
752 			ddp->dd_zone_ksp = ksp;
753 		}
754 	}
755 }
756 
757 /*
758  * Destroy the "link" kstats.
759  */
760 static void
761 dls_devnet_stat_destroy(dls_devnet_t *ddp, zoneid_t zoneid)
762 {
763 	if (zoneid == ddp->dd_owner_zid) {
764 		if (ddp->dd_ksp != NULL) {
765 			kstat_delete(ddp->dd_ksp);
766 			ddp->dd_ksp = NULL;
767 		}
768 	} else {
769 		if (ddp->dd_zone_ksp != NULL) {
770 			kstat_delete(ddp->dd_zone_ksp);
771 			ddp->dd_zone_ksp = NULL;
772 		}
773 	}
774 }
775 
776 /*
777  * The link has been renamed. Destroy the old non-legacy kstats ("link kstats")
778  * and create the new set using the new name.
779  */
780 static void
781 dls_devnet_stat_rename(dls_devnet_t *ddp)
782 {
783 	if (ddp->dd_ksp != NULL) {
784 		kstat_delete(ddp->dd_ksp);
785 		ddp->dd_ksp = NULL;
786 	}
787 	/* We can't rename a link while it's assigned to a non-global zone. */
788 	ASSERT(ddp->dd_zone_ksp == NULL);
789 	dls_devnet_stat_create(ddp, ddp->dd_owner_zid);
790 }
791 
792 /*
793  * Associate a linkid with a given link (identified by macname)
794  */
795 static int
796 dls_devnet_set(const char *macname, datalink_id_t linkid, zoneid_t zoneid,
797     dls_devnet_t **ddpp)
798 {
799 	dls_devnet_t		*ddp = NULL;
800 	datalink_class_t	class;
801 	int			err;
802 	boolean_t		stat_create = B_FALSE;
803 	char			linkname[MAXLINKNAMELEN];
804 
805 	rw_enter(&i_dls_devnet_lock, RW_WRITER);
806 
807 	/*
808 	 * Don't allow callers to set a link name with a linkid that already
809 	 * has a name association (that's what rename is for).
810 	 */
811 	if (linkid != DATALINK_INVALID_LINKID) {
812 		if (mod_hash_find(i_dls_devnet_id_hash,
813 		    (mod_hash_key_t)(uintptr_t)linkid,
814 		    (mod_hash_val_t *)&ddp) == 0) {
815 			err = EEXIST;
816 			goto done;
817 		}
818 		if ((err = dls_mgmt_get_linkinfo(linkid, linkname, &class,
819 		    NULL, NULL)) != 0)
820 			goto done;
821 	}
822 
823 	if ((err = mod_hash_find(i_dls_devnet_hash,
824 	    (mod_hash_key_t)macname, (mod_hash_val_t *)&ddp)) == 0) {
825 		if (ddp->dd_linkid != DATALINK_INVALID_LINKID) {
826 			err = EEXIST;
827 			goto done;
828 		}
829 
830 		/*
831 		 * This might be a physical link that has already
832 		 * been created, but which does not have a linkid
833 		 * because dlmgmtd was not running when it was created.
834 		 */
835 		if (linkid == DATALINK_INVALID_LINKID ||
836 		    class != DATALINK_CLASS_PHYS) {
837 			err = EINVAL;
838 			goto done;
839 		}
840 	} else {
841 		ddp = kmem_cache_alloc(i_dls_devnet_cachep, KM_SLEEP);
842 		ddp->dd_tref = 0;
843 		ddp->dd_ref++;
844 		ddp->dd_owner_zid = zoneid;
845 		(void) strlcpy(ddp->dd_mac, macname, sizeof (ddp->dd_mac));
846 		VERIFY(mod_hash_insert(i_dls_devnet_hash,
847 		    (mod_hash_key_t)ddp->dd_mac, (mod_hash_val_t)ddp) == 0);
848 	}
849 
850 	if (linkid != DATALINK_INVALID_LINKID) {
851 		ddp->dd_linkid = linkid;
852 		(void) strlcpy(ddp->dd_linkname, linkname,
853 		    sizeof (ddp->dd_linkname));
854 		VERIFY(mod_hash_insert(i_dls_devnet_id_hash,
855 		    (mod_hash_key_t)(uintptr_t)linkid,
856 		    (mod_hash_val_t)ddp) == 0);
857 		devnet_need_rebuild = B_TRUE;
858 		stat_create = B_TRUE;
859 		mutex_enter(&ddp->dd_mutex);
860 		if (!ddp->dd_prop_loaded && (ddp->dd_prop_taskid == 0)) {
861 			ddp->dd_prop_taskid = taskq_dispatch(system_taskq,
862 			    dls_devnet_prop_task, ddp, TQ_SLEEP);
863 		}
864 		mutex_exit(&ddp->dd_mutex);
865 	}
866 	err = 0;
867 done:
868 	/*
869 	 * It is safe to drop the i_dls_devnet_lock at this point. In the case
870 	 * of physical devices, the softmac framework will fail the device
871 	 * detach based on the smac_state or smac_hold_cnt. Other cases like
872 	 * vnic and aggr use their own scheme to serialize creates and deletes
873 	 * and ensure that *ddp is valid.
874 	 */
875 	rw_exit(&i_dls_devnet_lock);
876 	if (err == 0) {
877 		if (zoneid != GLOBAL_ZONEID &&
878 		    (err = i_dls_devnet_setzid(ddp, zoneid, B_FALSE)) != 0)
879 			(void) dls_devnet_unset(macname, &linkid, B_TRUE);
880 		/*
881 		 * The kstat subsystem holds its own locks (rather perimeter)
882 		 * before calling the ks_update (dls_devnet_stat_update) entry
883 		 * point which in turn grabs the i_dls_devnet_lock. So the
884 		 * lock hierarchy is kstat locks -> i_dls_devnet_lock.
885 		 */
886 		if (stat_create)
887 			dls_devnet_stat_create(ddp, zoneid);
888 		if (ddpp != NULL)
889 			*ddpp = ddp;
890 	}
891 	return (err);
892 }
893 
894 /*
895  * Disassociate a linkid with a given link (identified by macname)
896  * This waits until temporary references to the dls_devnet_t are gone.
897  */
898 static int
899 dls_devnet_unset(const char *macname, datalink_id_t *id, boolean_t wait)
900 {
901 	dls_devnet_t	*ddp;
902 	int		err;
903 	mod_hash_val_t	val;
904 
905 	rw_enter(&i_dls_devnet_lock, RW_WRITER);
906 	if ((err = mod_hash_find(i_dls_devnet_hash,
907 	    (mod_hash_key_t)macname, (mod_hash_val_t *)&ddp)) != 0) {
908 		ASSERT(err == MH_ERR_NOTFOUND);
909 		rw_exit(&i_dls_devnet_lock);
910 		return (ENOENT);
911 	}
912 
913 	mutex_enter(&ddp->dd_mutex);
914 
915 	/*
916 	 * Make sure downcalls into softmac_create or softmac_destroy from
917 	 * devfs don't cv_wait on any devfs related condition for fear of
918 	 * deadlock. Return EBUSY if the asynchronous thread started for
919 	 * property loading as part of the post attach hasn't yet completed.
920 	 */
921 	ASSERT(ddp->dd_ref != 0);
922 	if ((ddp->dd_ref != 1) || (!wait &&
923 	    (ddp->dd_tref != 0 || ddp->dd_prop_taskid != 0))) {
924 		mutex_exit(&ddp->dd_mutex);
925 		rw_exit(&i_dls_devnet_lock);
926 		return (EBUSY);
927 	}
928 
929 	ddp->dd_flags |= DD_CONDEMNED;
930 	ddp->dd_ref--;
931 	*id = ddp->dd_linkid;
932 
933 	if (ddp->dd_zid != GLOBAL_ZONEID)
934 		(void) i_dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE);
935 
936 	/*
937 	 * Remove this dls_devnet_t from the hash table.
938 	 */
939 	VERIFY(mod_hash_remove(i_dls_devnet_hash,
940 	    (mod_hash_key_t)ddp->dd_mac, &val) == 0);
941 
942 	if (ddp->dd_linkid != DATALINK_INVALID_LINKID) {
943 		VERIFY(mod_hash_remove(i_dls_devnet_id_hash,
944 		    (mod_hash_key_t)(uintptr_t)ddp->dd_linkid, &val) == 0);
945 
946 		devnet_need_rebuild = B_TRUE;
947 	}
948 	rw_exit(&i_dls_devnet_lock);
949 
950 	if (wait) {
951 		/*
952 		 * Wait until all temporary references are released.
953 		 */
954 		while ((ddp->dd_tref != 0) || (ddp->dd_prop_taskid != 0))
955 			cv_wait(&ddp->dd_cv, &ddp->dd_mutex);
956 	} else {
957 		ASSERT(ddp->dd_tref == 0 &&
958 		    ddp->dd_prop_taskid == (taskqid_t)NULL);
959 	}
960 
961 	if (ddp->dd_linkid != DATALINK_INVALID_LINKID)
962 		dls_devnet_stat_destroy(ddp, ddp->dd_owner_zid);
963 
964 	ddp->dd_prop_loaded = B_FALSE;
965 	ddp->dd_linkid = DATALINK_INVALID_LINKID;
966 	ddp->dd_flags = 0;
967 	mutex_exit(&ddp->dd_mutex);
968 	kmem_cache_free(i_dls_devnet_cachep, ddp);
969 
970 	return (0);
971 }
972 
973 /*
974  * This is a private hold routine used when we already have the dls_link_t, thus
975  * we know that it cannot go away.
976  */
977 int
978 dls_devnet_hold_tmp_by_link(dls_link_t *dlp, dls_dl_handle_t *ddhp)
979 {
980 	int err;
981 	dls_devnet_t *ddp = NULL;
982 
983 	rw_enter(&i_dls_devnet_lock, RW_WRITER);
984 	if ((err = mod_hash_find(i_dls_devnet_hash,
985 	    (mod_hash_key_t)dlp->dl_name, (mod_hash_val_t *)&ddp)) != 0) {
986 		ASSERT(err == MH_ERR_NOTFOUND);
987 		rw_exit(&i_dls_devnet_lock);
988 		return (ENOENT);
989 	}
990 
991 	mutex_enter(&ddp->dd_mutex);
992 	ASSERT(ddp->dd_ref > 0);
993 	if (ddp->dd_flags & DD_CONDEMNED) {
994 		mutex_exit(&ddp->dd_mutex);
995 		rw_exit(&i_dls_devnet_lock);
996 		return (ENOENT);
997 	}
998 	ddp->dd_tref++;
999 	mutex_exit(&ddp->dd_mutex);
1000 	rw_exit(&i_dls_devnet_lock);
1001 
1002 	*ddhp = ddp;
1003 	return (0);
1004 }
1005 
1006 static int
1007 dls_devnet_hold_common(datalink_id_t linkid, dls_devnet_t **ddpp,
1008     boolean_t tmp_hold)
1009 {
1010 	dls_devnet_t		*ddp;
1011 	int			err;
1012 
1013 	rw_enter(&i_dls_devnet_lock, RW_READER);
1014 	if ((err = mod_hash_find(i_dls_devnet_id_hash,
1015 	    (mod_hash_key_t)(uintptr_t)linkid, (mod_hash_val_t *)&ddp)) != 0) {
1016 		ASSERT(err == MH_ERR_NOTFOUND);
1017 		rw_exit(&i_dls_devnet_lock);
1018 		return (ENOENT);
1019 	}
1020 
1021 	mutex_enter(&ddp->dd_mutex);
1022 	ASSERT(ddp->dd_ref > 0);
1023 	if (ddp->dd_flags & DD_CONDEMNED) {
1024 		mutex_exit(&ddp->dd_mutex);
1025 		rw_exit(&i_dls_devnet_lock);
1026 		return (ENOENT);
1027 	}
1028 	if (tmp_hold)
1029 		ddp->dd_tref++;
1030 	else
1031 		ddp->dd_ref++;
1032 	mutex_exit(&ddp->dd_mutex);
1033 	rw_exit(&i_dls_devnet_lock);
1034 
1035 	*ddpp = ddp;
1036 	return (0);
1037 }
1038 
1039 int
1040 dls_devnet_hold(datalink_id_t linkid, dls_devnet_t **ddpp)
1041 {
1042 	return (dls_devnet_hold_common(linkid, ddpp, B_FALSE));
1043 }
1044 
1045 /*
1046  * Hold the vanity naming structure (dls_devnet_t) temporarily.  The request to
1047  * delete the dls_devnet_t will wait until the temporary reference is released.
1048  */
1049 int
1050 dls_devnet_hold_tmp(datalink_id_t linkid, dls_devnet_t **ddpp)
1051 {
1052 	return (dls_devnet_hold_common(linkid, ddpp, B_TRUE));
1053 }
1054 
1055 /*
1056  * This funtion is called when a DLS client tries to open a device node.
1057  * This dev_t could be a result of a /dev/net node access (returned by
1058  * devnet_create_rvp->dls_devnet_open()) or a direct /dev node access.
1059  * In both cases, this function bumps up the reference count of the
1060  * dls_devnet_t structure. The reference is held as long as the device node
1061  * is open. In the case of /dev/net while it is true that the initial reference
1062  * is held when the devnet_create_rvp->dls_devnet_open call happens, this
1063  * initial reference is released immediately in devnet_inactive_callback ->
1064  * dls_devnet_close(). (Note that devnet_inactive_callback() is called right
1065  * after dld_open completes, not when the /dev/net node is being closed).
1066  * To undo this function, call dls_devnet_rele()
1067  */
1068 int
1069 dls_devnet_hold_by_dev(dev_t dev, dls_dl_handle_t *ddhp)
1070 {
1071 	char			name[MAXNAMELEN];
1072 	char			*drv;
1073 	dls_devnet_t		*ddp;
1074 	int			err;
1075 
1076 	if ((drv = ddi_major_to_name(getmajor(dev))) == NULL)
1077 		return (EINVAL);
1078 
1079 	(void) snprintf(name, sizeof (name), "%s%d", drv,
1080 	    DLS_MINOR2INST(getminor(dev)));
1081 
1082 	rw_enter(&i_dls_devnet_lock, RW_READER);
1083 	if ((err = mod_hash_find(i_dls_devnet_hash,
1084 	    (mod_hash_key_t)name, (mod_hash_val_t *)&ddp)) != 0) {
1085 		ASSERT(err == MH_ERR_NOTFOUND);
1086 		rw_exit(&i_dls_devnet_lock);
1087 		return (ENOENT);
1088 	}
1089 	mutex_enter(&ddp->dd_mutex);
1090 	ASSERT(ddp->dd_ref > 0);
1091 	if (ddp->dd_flags & DD_CONDEMNED) {
1092 		mutex_exit(&ddp->dd_mutex);
1093 		rw_exit(&i_dls_devnet_lock);
1094 		return (ENOENT);
1095 	}
1096 	ddp->dd_ref++;
1097 	mutex_exit(&ddp->dd_mutex);
1098 	rw_exit(&i_dls_devnet_lock);
1099 
1100 	*ddhp = ddp;
1101 	return (0);
1102 }
1103 
1104 void
1105 dls_devnet_rele(dls_devnet_t *ddp)
1106 {
1107 	mutex_enter(&ddp->dd_mutex);
1108 	ASSERT(ddp->dd_ref > 1);
1109 	ddp->dd_ref--;
1110 	if ((ddp->dd_flags & DD_IMPLICIT_IPTUN) && ddp->dd_ref == 1) {
1111 		mutex_exit(&ddp->dd_mutex);
1112 		if (i_dls_devnet_destroy_iptun(ddp->dd_linkid) != 0)
1113 			ddp->dd_flags |= DD_IMPLICIT_IPTUN;
1114 		return;
1115 	}
1116 	mutex_exit(&ddp->dd_mutex);
1117 }
1118 
1119 static int
1120 dls_devnet_hold_by_name(const char *link, dls_devnet_t **ddpp)
1121 {
1122 	char			drv[MAXLINKNAMELEN];
1123 	uint_t			ppa;
1124 	major_t			major;
1125 	dev_t			phy_dev, tmp_dev;
1126 	datalink_id_t		linkid;
1127 	dls_dev_handle_t	ddh;
1128 	int			err;
1129 
1130 	if ((err = dls_mgmt_get_linkid(link, &linkid)) == 0)
1131 		return (dls_devnet_hold(linkid, ddpp));
1132 
1133 	/*
1134 	 * If we failed to get the link's linkid because the dlmgmtd daemon
1135 	 * has not been started, return ENOENT so that the application can
1136 	 * fallback to open the /dev node.
1137 	 */
1138 	if (err == EBADF)
1139 		return (ENOENT);
1140 
1141 	if (err != ENOENT)
1142 		return (err);
1143 
1144 	/*
1145 	 * If we reach this point it means dlmgmtd is up but has no
1146 	 * mapping for the link name.
1147 	 */
1148 	if (ddi_parse(link, drv, &ppa) != DDI_SUCCESS)
1149 		return (ENOENT);
1150 
1151 	if (IS_IPTUN_LINK(drv)) {
1152 		if ((err = i_dls_devnet_create_iptun(link, drv, &linkid)) != 0)
1153 			return (err);
1154 		/*
1155 		 * At this point, an IP tunnel MAC has registered, which
1156 		 * resulted in a link being created.
1157 		 */
1158 		err = dls_devnet_hold(linkid, ddpp);
1159 		if (err != 0) {
1160 			VERIFY(i_dls_devnet_destroy_iptun(linkid) == 0);
1161 			return (err);
1162 		}
1163 		/*
1164 		 * dls_devnet_rele() will know to destroy the implicit IP
1165 		 * tunnel on last reference release if DD_IMPLICIT_IPTUN is
1166 		 * set.
1167 		 */
1168 		(*ddpp)->dd_flags |= DD_IMPLICIT_IPTUN;
1169 		return (0);
1170 	}
1171 
1172 	/*
1173 	 * If this link:
1174 	 * (a) is a physical device, (b) this is the first boot, (c) the MAC
1175 	 * is not registered yet, and (d) we cannot find its linkid, then the
1176 	 * linkname is the same as the devname.
1177 	 *
1178 	 * First filter out invalid names.
1179 	 */
1180 	if ((major = ddi_name_to_major(drv)) == (major_t)-1)
1181 		return (ENOENT);
1182 
1183 	phy_dev = makedevice(major, DLS_PPA2MINOR(ppa));
1184 	if (softmac_hold_device(phy_dev, &ddh) != 0)
1185 		return (ENOENT);
1186 
1187 	/*
1188 	 * At this time, the MAC should be registered, check its phy_dev using
1189 	 * the given name.
1190 	 */
1191 	if ((err = dls_mgmt_get_linkid(link, &linkid)) != 0 ||
1192 	    (err = dls_mgmt_get_phydev(linkid, &tmp_dev)) != 0) {
1193 		softmac_rele_device(ddh);
1194 		return (err);
1195 	}
1196 	if (tmp_dev != phy_dev) {
1197 		softmac_rele_device(ddh);
1198 		return (ENOENT);
1199 	}
1200 
1201 	err = dls_devnet_hold(linkid, ddpp);
1202 	softmac_rele_device(ddh);
1203 	return (err);
1204 }
1205 
1206 int
1207 dls_devnet_macname2linkid(const char *macname, datalink_id_t *linkidp)
1208 {
1209 	dls_devnet_t	*ddp;
1210 
1211 	rw_enter(&i_dls_devnet_lock, RW_READER);
1212 	if (mod_hash_find(i_dls_devnet_hash, (mod_hash_key_t)macname,
1213 	    (mod_hash_val_t *)&ddp) != 0) {
1214 		rw_exit(&i_dls_devnet_lock);
1215 		return (ENOENT);
1216 	}
1217 
1218 	*linkidp = ddp->dd_linkid;
1219 	rw_exit(&i_dls_devnet_lock);
1220 	return (0);
1221 }
1222 
1223 /*
1224  * Get linkid for the given dev.
1225  */
1226 int
1227 dls_devnet_dev2linkid(dev_t dev, datalink_id_t *linkidp)
1228 {
1229 	char	macname[MAXNAMELEN];
1230 	char	*drv;
1231 
1232 	if ((drv = ddi_major_to_name(getmajor(dev))) == NULL)
1233 		return (EINVAL);
1234 
1235 	(void) snprintf(macname, sizeof (macname), "%s%d", drv,
1236 	    DLS_MINOR2INST(getminor(dev)));
1237 	return (dls_devnet_macname2linkid(macname, linkidp));
1238 }
1239 
1240 /*
1241  * Get the link's physical dev_t. It this is a VLAN, get the dev_t of the
1242  * link this VLAN is created on.
1243  */
1244 int
1245 dls_devnet_phydev(datalink_id_t vlanid, dev_t *devp)
1246 {
1247 	dls_devnet_t	*ddp;
1248 	int		err;
1249 
1250 	if ((err = dls_devnet_hold_tmp(vlanid, &ddp)) != 0)
1251 		return (err);
1252 
1253 	err = dls_mgmt_get_phydev(ddp->dd_linkid, devp);
1254 	dls_devnet_rele_tmp(ddp);
1255 	return (err);
1256 }
1257 
1258 /*
1259  * Handle the renaming requests.  There are two rename cases:
1260  *
1261  * 1. Request to rename a valid link (id1) to an non-existent link name
1262  *    (id2). In this case id2 is DATALINK_INVALID_LINKID.  Just check whether
1263  *    id1 is held by any applications.
1264  *
1265  *    In this case, the link's kstats need to be updated using the given name.
1266  *
1267  * 2. Request to rename a valid link (id1) to the name of a REMOVED
1268  *    physical link (id2). In this case, check that id1 and its associated
1269  *    mac is not held by any application, and update the link's linkid to id2.
1270  *
1271  *    This case does not change the <link name, linkid> mapping, so the link's
1272  *    kstats need to be updated with using name associated the given id2.
1273  */
1274 int
1275 dls_devnet_rename(datalink_id_t id1, datalink_id_t id2, const char *link)
1276 {
1277 	dls_dev_handle_t	ddh = NULL;
1278 	int			err = 0;
1279 	dev_t			phydev = 0;
1280 	dls_devnet_t		*ddp;
1281 	mac_perim_handle_t	mph = NULL;
1282 	mac_handle_t		mh;
1283 	mod_hash_val_t		val;
1284 
1285 	/*
1286 	 * In the second case, id2 must be a REMOVED physical link.
1287 	 */
1288 	if ((id2 != DATALINK_INVALID_LINKID) &&
1289 	    (dls_mgmt_get_phydev(id2, &phydev) == 0) &&
1290 	    softmac_hold_device(phydev, &ddh) == 0) {
1291 		softmac_rele_device(ddh);
1292 		return (EEXIST);
1293 	}
1294 
1295 	/*
1296 	 * Hold id1 to prevent it from being detached (if a physical link).
1297 	 */
1298 	if (dls_mgmt_get_phydev(id1, &phydev) == 0)
1299 		(void) softmac_hold_device(phydev, &ddh);
1300 
1301 	/*
1302 	 * The framework does not hold hold locks across calls to the
1303 	 * mac perimeter, hence enter the perimeter first. This also waits
1304 	 * for the property loading to finish.
1305 	 */
1306 	if ((err = mac_perim_enter_by_linkid(id1, &mph)) != 0) {
1307 		softmac_rele_device(ddh);
1308 		return (err);
1309 	}
1310 
1311 	rw_enter(&i_dls_devnet_lock, RW_WRITER);
1312 	if ((err = mod_hash_find(i_dls_devnet_id_hash,
1313 	    (mod_hash_key_t)(uintptr_t)id1, (mod_hash_val_t *)&ddp)) != 0) {
1314 		ASSERT(err == MH_ERR_NOTFOUND);
1315 		err = ENOENT;
1316 		goto done;
1317 	}
1318 
1319 	mutex_enter(&ddp->dd_mutex);
1320 	if (ddp->dd_ref > 1) {
1321 		mutex_exit(&ddp->dd_mutex);
1322 		err = EBUSY;
1323 		goto done;
1324 	}
1325 	mutex_exit(&ddp->dd_mutex);
1326 
1327 	if (id2 == DATALINK_INVALID_LINKID) {
1328 		(void) strlcpy(ddp->dd_linkname, link,
1329 		    sizeof (ddp->dd_linkname));
1330 
1331 		/* rename mac client name and its flow if exists */
1332 		if ((err = mac_open(ddp->dd_mac, &mh)) != 0)
1333 			goto done;
1334 		(void) mac_rename_primary(mh, link);
1335 		mac_close(mh);
1336 		goto done;
1337 	}
1338 
1339 	/*
1340 	 * The second case, check whether the MAC is used by any MAC
1341 	 * user.  This must be a physical link so ddh must not be NULL.
1342 	 */
1343 	if (ddh == NULL) {
1344 		err = EINVAL;
1345 		goto done;
1346 	}
1347 
1348 	if ((err = mac_open(ddp->dd_mac, &mh)) != 0)
1349 		goto done;
1350 
1351 	/*
1352 	 * We release the reference of the MAC which mac_open() is
1353 	 * holding. Note that this mac will not be unregistered
1354 	 * because the physical device is held.
1355 	 */
1356 	mac_close(mh);
1357 
1358 	/*
1359 	 * Check if there is any other MAC clients, if not, hold this mac
1360 	 * exclusively until we are done.
1361 	 */
1362 	if ((err = mac_mark_exclusive(mh)) != 0)
1363 		goto done;
1364 
1365 	/*
1366 	 * Update the link's linkid.
1367 	 */
1368 	if ((err = mod_hash_find(i_dls_devnet_id_hash,
1369 	    (mod_hash_key_t)(uintptr_t)id2, &val)) != MH_ERR_NOTFOUND) {
1370 		mac_unmark_exclusive(mh);
1371 		err = EEXIST;
1372 		goto done;
1373 	}
1374 
1375 	err = dls_mgmt_get_linkinfo(id2, ddp->dd_linkname, NULL, NULL, NULL);
1376 	if (err != 0) {
1377 		mac_unmark_exclusive(mh);
1378 		goto done;
1379 	}
1380 
1381 	(void) mod_hash_remove(i_dls_devnet_id_hash,
1382 	    (mod_hash_key_t)(uintptr_t)id1, &val);
1383 
1384 	ddp->dd_linkid = id2;
1385 	(void) mod_hash_insert(i_dls_devnet_id_hash,
1386 	    (mod_hash_key_t)(uintptr_t)ddp->dd_linkid, (mod_hash_val_t)ddp);
1387 
1388 	mac_unmark_exclusive(mh);
1389 
1390 	/* load properties for new id */
1391 	mutex_enter(&ddp->dd_mutex);
1392 	ddp->dd_prop_loaded = B_FALSE;
1393 	ddp->dd_prop_taskid = taskq_dispatch(system_taskq,
1394 	    dls_devnet_prop_task, ddp, TQ_SLEEP);
1395 	mutex_exit(&ddp->dd_mutex);
1396 
1397 done:
1398 	rw_exit(&i_dls_devnet_lock);
1399 
1400 	if (err == 0)
1401 		dls_devnet_stat_rename(ddp);
1402 
1403 	if (mph != NULL)
1404 		mac_perim_exit(mph);
1405 	softmac_rele_device(ddh);
1406 	return (err);
1407 }
1408 
1409 static int
1410 i_dls_devnet_setzid(dls_devnet_t *ddp, zoneid_t new_zoneid, boolean_t setprop)
1411 {
1412 	int			err;
1413 	mac_perim_handle_t	mph;
1414 	boolean_t		upcall_done = B_FALSE;
1415 	datalink_id_t		linkid = ddp->dd_linkid;
1416 	zoneid_t		old_zoneid = ddp->dd_zid;
1417 	dlmgmt_door_setzoneid_t	setzid;
1418 	dlmgmt_setzoneid_retval_t retval;
1419 
1420 	if (old_zoneid == new_zoneid)
1421 		return (0);
1422 
1423 	if ((err = mac_perim_enter_by_macname(ddp->dd_mac, &mph)) != 0)
1424 		return (err);
1425 
1426 	/*
1427 	 * When changing the zoneid of an existing link, we need to tell
1428 	 * dlmgmtd about it.  dlmgmtd already knows the zoneid associated with
1429 	 * newly created links.
1430 	 */
1431 	if (setprop) {
1432 		setzid.ld_cmd = DLMGMT_CMD_SETZONEID;
1433 		setzid.ld_linkid = linkid;
1434 		setzid.ld_zoneid = new_zoneid;
1435 		err = i_dls_mgmt_upcall(&setzid, sizeof (setzid), &retval,
1436 		    sizeof (retval));
1437 		if (err != 0)
1438 			goto done;
1439 		upcall_done = B_TRUE;
1440 	}
1441 	if ((err = dls_link_setzid(ddp->dd_mac, new_zoneid)) == 0) {
1442 		ddp->dd_zid = new_zoneid;
1443 		devnet_need_rebuild = B_TRUE;
1444 	}
1445 
1446 done:
1447 	if (err != 0 && upcall_done) {
1448 		setzid.ld_zoneid = old_zoneid;
1449 		(void) i_dls_mgmt_upcall(&setzid, sizeof (setzid), &retval,
1450 		    sizeof (retval));
1451 	}
1452 	mac_perim_exit(mph);
1453 	return (err);
1454 }
1455 
1456 int
1457 dls_devnet_setzid(dls_dl_handle_t ddh, zoneid_t new_zid)
1458 {
1459 	dls_devnet_t	*ddp;
1460 	int		err;
1461 	zoneid_t	old_zid;
1462 	boolean_t	refheld = B_FALSE;
1463 
1464 	old_zid = ddh->dd_zid;
1465 
1466 	if (old_zid == new_zid)
1467 		return (0);
1468 
1469 	/*
1470 	 * Acquire an additional reference to the link if it is being assigned
1471 	 * to a non-global zone from the global zone.
1472 	 */
1473 	if (old_zid == GLOBAL_ZONEID && new_zid != GLOBAL_ZONEID) {
1474 		if ((err = dls_devnet_hold(ddh->dd_linkid, &ddp)) != 0)
1475 			return (err);
1476 		refheld = B_TRUE;
1477 	}
1478 
1479 	if ((err = i_dls_devnet_setzid(ddh, new_zid, B_TRUE)) != 0) {
1480 		if (refheld)
1481 			dls_devnet_rele(ddp);
1482 		return (err);
1483 	}
1484 
1485 	/*
1486 	 * Release the additional reference if the link is returning to the
1487 	 * global zone from a non-global zone.
1488 	 */
1489 	if (old_zid != GLOBAL_ZONEID && new_zid == GLOBAL_ZONEID)
1490 		dls_devnet_rele(ddh);
1491 
1492 	/* Re-create kstats in the appropriate zones. */
1493 	if (old_zid != GLOBAL_ZONEID)
1494 		dls_devnet_stat_destroy(ddh, old_zid);
1495 	if (new_zid != GLOBAL_ZONEID)
1496 		dls_devnet_stat_create(ddh, new_zid);
1497 
1498 	return (0);
1499 }
1500 
1501 zoneid_t
1502 dls_devnet_getzid(dls_dl_handle_t ddh)
1503 {
1504 	return (((dls_devnet_t *)ddh)->dd_zid);
1505 }
1506 
1507 zoneid_t
1508 dls_devnet_getownerzid(dls_dl_handle_t ddh)
1509 {
1510 	return (((dls_devnet_t *)ddh)->dd_owner_zid);
1511 }
1512 
1513 /*
1514  * Is linkid visible from zoneid?  A link is visible if it was created in the
1515  * zone, or if it is currently assigned to the zone.
1516  */
1517 boolean_t
1518 dls_devnet_islinkvisible(datalink_id_t linkid, zoneid_t zoneid)
1519 {
1520 	dls_devnet_t	*ddp;
1521 	boolean_t	result;
1522 
1523 	if (dls_devnet_hold_tmp(linkid, &ddp) != 0)
1524 		return (B_FALSE);
1525 	result = (ddp->dd_owner_zid == zoneid || ddp->dd_zid == zoneid);
1526 	dls_devnet_rele_tmp(ddp);
1527 	return (result);
1528 }
1529 
1530 /*
1531  * Access a vanity naming node.
1532  */
1533 int
1534 dls_devnet_open(const char *link, dls_dl_handle_t *dhp, dev_t *devp)
1535 {
1536 	dls_devnet_t	*ddp;
1537 	dls_link_t	*dlp;
1538 	zoneid_t	zid = getzoneid();
1539 	int		err;
1540 	mac_perim_handle_t	mph;
1541 
1542 	if ((err = dls_devnet_hold_by_name(link, &ddp)) != 0)
1543 		return (err);
1544 
1545 	dls_devnet_prop_task_wait(ddp);
1546 
1547 	/*
1548 	 * Opening a link that does not belong to the current non-global zone
1549 	 * is not allowed.
1550 	 */
1551 	if (zid != GLOBAL_ZONEID && ddp->dd_zid != zid) {
1552 		dls_devnet_rele(ddp);
1553 		return (ENOENT);
1554 	}
1555 
1556 	err = mac_perim_enter_by_macname(ddp->dd_mac, &mph);
1557 	if (err != 0) {
1558 		dls_devnet_rele(ddp);
1559 		return (err);
1560 	}
1561 
1562 	err = dls_link_hold_create(ddp->dd_mac, &dlp);
1563 	mac_perim_exit(mph);
1564 
1565 	if (err != 0) {
1566 		dls_devnet_rele(ddp);
1567 		return (err);
1568 	}
1569 
1570 	*dhp = ddp;
1571 	*devp = dls_link_dev(dlp);
1572 	return (0);
1573 }
1574 
1575 /*
1576  * Close access to a vanity naming node.
1577  */
1578 void
1579 dls_devnet_close(dls_dl_handle_t dlh)
1580 {
1581 	dls_devnet_t	*ddp = dlh;
1582 	dls_link_t	*dlp;
1583 	mac_perim_handle_t	mph;
1584 
1585 	VERIFY(mac_perim_enter_by_macname(ddp->dd_mac, &mph) == 0);
1586 	VERIFY(dls_link_hold(ddp->dd_mac, &dlp) == 0);
1587 
1588 	/*
1589 	 * One rele for the hold placed in dls_devnet_open, another for
1590 	 * the hold done just above
1591 	 */
1592 	dls_link_rele(dlp);
1593 	dls_link_rele(dlp);
1594 	mac_perim_exit(mph);
1595 
1596 	dls_devnet_rele(ddp);
1597 }
1598 
1599 /*
1600  * This is used by /dev/net to rebuild the nodes for readdir().  It is not
1601  * critical and no protection is needed.
1602  */
1603 boolean_t
1604 dls_devnet_rebuild()
1605 {
1606 	boolean_t updated = devnet_need_rebuild;
1607 
1608 	devnet_need_rebuild = B_FALSE;
1609 	return (updated);
1610 }
1611 
1612 int
1613 dls_devnet_create(mac_handle_t mh, datalink_id_t linkid, zoneid_t zoneid)
1614 {
1615 	dls_link_t	*dlp;
1616 	dls_devnet_t	*ddp;
1617 	int		err;
1618 	mac_perim_handle_t mph;
1619 
1620 	/*
1621 	 * Holding the mac perimeter ensures that the downcall from the
1622 	 * dlmgmt daemon which does the property loading does not proceed
1623 	 * until we relinquish the perimeter.
1624 	 */
1625 	mac_perim_enter_by_mh(mh, &mph);
1626 	/*
1627 	 * Make this association before we call dls_link_hold_create as
1628 	 * we need to use the linkid to get the user name for the link
1629 	 * when we create the MAC client.
1630 	 */
1631 	if ((err = dls_devnet_set(mac_name(mh), linkid, zoneid, &ddp)) == 0) {
1632 		if ((err = dls_link_hold_create(mac_name(mh), &dlp)) != 0) {
1633 			mac_perim_exit(mph);
1634 			(void) dls_devnet_unset(mac_name(mh), &linkid, B_TRUE);
1635 			return (err);
1636 		}
1637 	}
1638 	mac_perim_exit(mph);
1639 	return (err);
1640 }
1641 
1642 /*
1643  * Set the linkid of the dls_devnet_t and add it into the i_dls_devnet_id_hash.
1644  * This is called in the case that the dlmgmtd daemon is started later than
1645  * the physical devices get attached, and the linkid is only known after the
1646  * daemon starts.
1647  */
1648 int
1649 dls_devnet_recreate(mac_handle_t mh, datalink_id_t linkid)
1650 {
1651 	ASSERT(linkid != DATALINK_INVALID_LINKID);
1652 	return (dls_devnet_set(mac_name(mh), linkid, GLOBAL_ZONEID, NULL));
1653 }
1654 
1655 int
1656 dls_devnet_destroy(mac_handle_t mh, datalink_id_t *idp, boolean_t wait)
1657 {
1658 	int			err;
1659 	mac_perim_handle_t	mph;
1660 
1661 	*idp = DATALINK_INVALID_LINKID;
1662 	err = dls_devnet_unset(mac_name(mh), idp, wait);
1663 	if (err != 0 && err != ENOENT)
1664 		return (err);
1665 
1666 	mac_perim_enter_by_mh(mh, &mph);
1667 	err = dls_link_rele_by_name(mac_name(mh));
1668 	mac_perim_exit(mph);
1669 
1670 	if (err != 0) {
1671 		/*
1672 		 * XXX It is a general GLDv3 bug that dls_devnet_set() has to
1673 		 * be called to re-set the link when destroy fails.  The
1674 		 * zoneid below will be incorrect if this function is ever
1675 		 * called from kernel context or from a zone other than that
1676 		 * which initially created the link.
1677 		 */
1678 		(void) dls_devnet_set(mac_name(mh), *idp, crgetzoneid(CRED()),
1679 		    NULL);
1680 	}
1681 	return (err);
1682 }
1683 
1684 /*
1685  * Implicitly create an IP tunnel link.
1686  */
1687 static int
1688 i_dls_devnet_create_iptun(const char *linkname, const char *drvname,
1689     datalink_id_t *linkid)
1690 {
1691 	int		err;
1692 	iptun_kparams_t	ik;
1693 	uint32_t	media;
1694 	netstack_t	*ns;
1695 	major_t		iptun_major;
1696 	dev_info_t	*iptun_dip;
1697 
1698 	/* First ensure that the iptun device is attached. */
1699 	if ((iptun_major = ddi_name_to_major(IPTUN_DRIVER_NAME)) == (major_t)-1)
1700 		return (EINVAL);
1701 	if ((iptun_dip = ddi_hold_devi_by_instance(iptun_major, 0, 0)) == NULL)
1702 		return (EINVAL);
1703 
1704 	if (IS_IPV4_TUN(drvname)) {
1705 		ik.iptun_kparam_type = IPTUN_TYPE_IPV4;
1706 		media = DL_IPV4;
1707 	} else if (IS_6TO4_TUN(drvname)) {
1708 		ik.iptun_kparam_type = IPTUN_TYPE_6TO4;
1709 		media = DL_6TO4;
1710 	} else if (IS_IPV6_TUN(drvname)) {
1711 		ik.iptun_kparam_type = IPTUN_TYPE_IPV6;
1712 		media = DL_IPV6;
1713 	}
1714 	ik.iptun_kparam_flags = (IPTUN_KPARAM_TYPE | IPTUN_KPARAM_IMPLICIT);
1715 
1716 	/* Obtain a datalink id for this tunnel. */
1717 	err = dls_mgmt_create((char *)linkname, 0, DATALINK_CLASS_IPTUN, media,
1718 	    B_FALSE, &ik.iptun_kparam_linkid);
1719 	if (err != 0) {
1720 		ddi_release_devi(iptun_dip);
1721 		return (err);
1722 	}
1723 
1724 	ns = netstack_get_current();
1725 	err = iptun_create(&ik, CRED());
1726 	netstack_rele(ns);
1727 
1728 	if (err != 0)
1729 		VERIFY(dls_mgmt_destroy(ik.iptun_kparam_linkid, B_FALSE) == 0);
1730 	else
1731 		*linkid = ik.iptun_kparam_linkid;
1732 
1733 	ddi_release_devi(iptun_dip);
1734 	return (err);
1735 }
1736 
1737 static int
1738 i_dls_devnet_destroy_iptun(datalink_id_t linkid)
1739 {
1740 	int err;
1741 
1742 	/*
1743 	 * Note the use of zone_kcred() here as opposed to CRED().  This is
1744 	 * because the process that does the last close of this /dev/net node
1745 	 * may not have necessary privileges to delete this IP tunnel, but the
1746 	 * tunnel must always be implicitly deleted on last close.
1747 	 */
1748 	if ((err = iptun_delete(linkid, zone_kcred())) == 0)
1749 		(void) dls_mgmt_destroy(linkid, B_FALSE);
1750 	return (err);
1751 }
1752 
1753 const char *
1754 dls_devnet_link(dls_dl_handle_t ddh)
1755 {
1756 	return (ddh->dd_linkname);
1757 }
1758 
1759 const char *
1760 dls_devnet_mac(dls_dl_handle_t ddh)
1761 {
1762 	return (ddh->dd_mac);
1763 }
1764 
1765 datalink_id_t
1766 dls_devnet_linkid(dls_dl_handle_t ddh)
1767 {
1768 	return (ddh->dd_linkid);
1769 }
1770