xref: /titanic_51/usr/src/uts/common/io/dls/dls_mgmt.c (revision 7380b00ccf9fc5638da98d756925bc6d6c002a42)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Datalink management routines.
28  */
29 
30 #include <sys/types.h>
31 #include <sys/door.h>
32 #include <sys/zone.h>
33 #include <sys/modctl.h>
34 #include <sys/file.h>
35 #include <sys/modhash.h>
36 #include <sys/kstat.h>
37 #include <sys/vnode.h>
38 #include <sys/cmn_err.h>
39 #include <sys/softmac.h>
40 #include <sys/dls.h>
41 #include <sys/dls_impl.h>
42 #include <sys/stropts.h>
43 #include <sys/netstack.h>
44 #include <inet/iptun/iptun_impl.h>
45 
46 /*
47  * This vanity name management module is treated as part of the GLD framework
48  * and we don't hold any GLD framework lock across a call to any mac
49  * function that needs to acquire the mac perimeter. The hierarchy is
50  * mac perimeter -> framework locks
51  */
52 
53 typedef struct dls_stack {
54 	zoneid_t	dlss_zoneid;
55 } dls_stack_t;
56 
57 static kmem_cache_t	*i_dls_devnet_cachep;
58 static kmutex_t		i_dls_mgmt_lock;
59 static krwlock_t	i_dls_devnet_lock;
60 static mod_hash_t	*i_dls_devnet_id_hash;
61 static mod_hash_t	*i_dls_devnet_hash;
62 
63 boolean_t		devnet_need_rebuild;
64 
65 #define	VLAN_HASHSZ	67	/* prime */
66 
67 
68 /*
69  * The following names are default tunnel interface names for backward
70  * compatibility with Solaris 10 and prior.  Opening a /dev/net node with one
71  * of these names causes a tunnel link to be implicitly created in
72  * dls_devnet_hold_by_name().
73  */
74 #define	IPTUN_IPV4_NAME	"ip.tun"
75 #define	IPTUN_IPV6_NAME	"ip6.tun"
76 #define	IPTUN_6TO4_NAME	"ip.6to4tun"
77 
78 #define	IS_IPV4_TUN(name)	(					\
79     strncmp((name), IPTUN_IPV4_NAME, strlen(IPTUN_IPV4_NAME)) == 0)
80 #define	IS_IPV6_TUN(name)	(					\
81     strncmp((name), IPTUN_IPV6_NAME, strlen(IPTUN_IPV6_NAME)) == 0)
82 #define	IS_6TO4_TUN(name)	(					\
83     strncmp((name), IPTUN_6TO4_NAME, strlen(IPTUN_6TO4_NAME)) == 0)
84 #define	IS_IPTUN_LINK(name)	(					\
85     IS_IPV4_TUN(name) || IS_IPV6_TUN(name) || IS_6TO4_TUN(name))
86 
87 /* Upcall door handle */
88 static door_handle_t	dls_mgmt_dh = NULL;
89 
90 #define	DD_CONDEMNED		0x1
91 #define	DD_KSTAT_CHANGING	0x2
92 #define	DD_IMPLICIT_IPTUN	0x4 /* Implicitly-created ip*.*tun* tunnel */
93 
94 /*
95  * This structure is used to keep the <linkid, macname> mapping.
96  * This structure itself is not protected by the mac perimeter, but is
97  * protected by the dd_mutex and i_dls_devnet_lock. Thus most of the
98  * functions manipulating this structure such as dls_devnet_set/unset etc.
99  * may be called while not holding the mac perimeter.
100  */
101 typedef struct dls_devnet_s {
102 	datalink_id_t	dd_linkid;
103 	char		dd_linkname[MAXLINKNAMELEN];
104 	char		dd_mac[MAXNAMELEN];
105 	kstat_t		*dd_ksp;	/* kstat in owner_zid */
106 	kstat_t		*dd_zone_ksp;	/* in dd_zid if != owner_zid */
107 	uint32_t	dd_ref;
108 	kmutex_t	dd_mutex;
109 	kcondvar_t	dd_cv;
110 	uint32_t	dd_tref;
111 	uint_t		dd_flags;
112 	zoneid_t	dd_owner_zid;	/* zone where node was created */
113 	zoneid_t	dd_zid;		/* current zone */
114 	boolean_t	dd_prop_loaded;
115 	taskqid_t	dd_prop_taskid;
116 } dls_devnet_t;
117 
118 static int i_dls_devnet_create_iptun(const char *, datalink_id_t *);
119 static int i_dls_devnet_destroy_iptun(datalink_id_t);
120 static int i_dls_devnet_setzid(dls_devnet_t *, zoneid_t, boolean_t);
121 static int dls_devnet_unset(const char *, datalink_id_t *, boolean_t);
122 
123 /*ARGSUSED*/
124 static int
125 i_dls_devnet_constructor(void *buf, void *arg, int kmflag)
126 {
127 	dls_devnet_t	*ddp = buf;
128 
129 	bzero(buf, sizeof (dls_devnet_t));
130 	mutex_init(&ddp->dd_mutex, NULL, MUTEX_DEFAULT, NULL);
131 	cv_init(&ddp->dd_cv, NULL, CV_DEFAULT, NULL);
132 	return (0);
133 }
134 
135 /*ARGSUSED*/
136 static void
137 i_dls_devnet_destructor(void *buf, void *arg)
138 {
139 	dls_devnet_t	*ddp = buf;
140 
141 	ASSERT(ddp->dd_ksp == NULL);
142 	ASSERT(ddp->dd_ref == 0);
143 	ASSERT(ddp->dd_tref == 0);
144 	mutex_destroy(&ddp->dd_mutex);
145 	cv_destroy(&ddp->dd_cv);
146 }
147 
148 /* ARGSUSED */
149 static int
150 dls_zone_remove(datalink_id_t linkid, void *arg)
151 {
152 	dls_devnet_t *ddp;
153 
154 	if (dls_devnet_hold_tmp(linkid, &ddp) == 0) {
155 		(void) dls_devnet_setzid(ddp, GLOBAL_ZONEID);
156 		dls_devnet_rele_tmp(ddp);
157 	}
158 	return (0);
159 }
160 
161 /* ARGSUSED */
162 static void *
163 dls_stack_init(netstackid_t stackid, netstack_t *ns)
164 {
165 	dls_stack_t *dlss;
166 
167 	dlss = kmem_zalloc(sizeof (*dlss), KM_SLEEP);
168 	dlss->dlss_zoneid = netstackid_to_zoneid(stackid);
169 	return (dlss);
170 }
171 
172 /* ARGSUSED */
173 static void
174 dls_stack_shutdown(netstackid_t stackid, void *arg)
175 {
176 	dls_stack_t	*dlss = (dls_stack_t *)arg;
177 
178 	/* Move remaining datalinks in this zone back to the global zone. */
179 	(void) zone_datalink_walk(dlss->dlss_zoneid, dls_zone_remove, NULL);
180 }
181 
182 /* ARGSUSED */
183 static void
184 dls_stack_fini(netstackid_t stackid, void *arg)
185 {
186 	dls_stack_t	*dlss = (dls_stack_t *)arg;
187 
188 	kmem_free(dlss, sizeof (*dlss));
189 }
190 
191 /*
192  * Module initialization and finalization functions.
193  */
194 void
195 dls_mgmt_init(void)
196 {
197 	mutex_init(&i_dls_mgmt_lock, NULL, MUTEX_DEFAULT, NULL);
198 	rw_init(&i_dls_devnet_lock, NULL, RW_DEFAULT, NULL);
199 
200 	/*
201 	 * Create a kmem_cache of dls_devnet_t structures.
202 	 */
203 	i_dls_devnet_cachep = kmem_cache_create("dls_devnet_cache",
204 	    sizeof (dls_devnet_t), 0, i_dls_devnet_constructor,
205 	    i_dls_devnet_destructor, NULL, NULL, NULL, 0);
206 	ASSERT(i_dls_devnet_cachep != NULL);
207 
208 	/*
209 	 * Create a hash table, keyed by dd_linkid, of dls_devnet_t.
210 	 */
211 	i_dls_devnet_id_hash = mod_hash_create_idhash("dls_devnet_id_hash",
212 	    VLAN_HASHSZ, mod_hash_null_valdtor);
213 
214 	/*
215 	 * Create a hash table, keyed by dd_mac
216 	 */
217 	i_dls_devnet_hash = mod_hash_create_extended("dls_devnet_hash",
218 	    VLAN_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor,
219 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
220 
221 	devnet_need_rebuild = B_FALSE;
222 
223 	netstack_register(NS_DLS, dls_stack_init, dls_stack_shutdown,
224 	    dls_stack_fini);
225 }
226 
227 void
228 dls_mgmt_fini(void)
229 {
230 	netstack_unregister(NS_DLS);
231 	mod_hash_destroy_hash(i_dls_devnet_hash);
232 	mod_hash_destroy_hash(i_dls_devnet_id_hash);
233 	kmem_cache_destroy(i_dls_devnet_cachep);
234 	rw_destroy(&i_dls_devnet_lock);
235 	mutex_destroy(&i_dls_mgmt_lock);
236 }
237 
238 int
239 dls_mgmt_door_set(boolean_t start)
240 {
241 	int	err;
242 
243 	/* handle daemon restart */
244 	mutex_enter(&i_dls_mgmt_lock);
245 	if (dls_mgmt_dh != NULL) {
246 		door_ki_rele(dls_mgmt_dh);
247 		dls_mgmt_dh = NULL;
248 	}
249 
250 	if (start && ((err = door_ki_open(DLMGMT_DOOR, &dls_mgmt_dh)) != 0)) {
251 		mutex_exit(&i_dls_mgmt_lock);
252 		return (err);
253 	}
254 
255 	mutex_exit(&i_dls_mgmt_lock);
256 
257 	/*
258 	 * Create and associate <link name, linkid> mapping for network devices
259 	 * which are already attached before the daemon is started.
260 	 */
261 	if (start)
262 		softmac_recreate();
263 	return (0);
264 }
265 
266 static boolean_t
267 i_dls_mgmt_door_revoked(door_handle_t dh)
268 {
269 	struct door_info info;
270 	extern int sys_shutdown;
271 
272 	ASSERT(dh != NULL);
273 
274 	if (sys_shutdown) {
275 		cmn_err(CE_NOTE, "dls_mgmt_door: shutdown observed\n");
276 		return (B_TRUE);
277 	}
278 
279 	if (door_ki_info(dh, &info) != 0)
280 		return (B_TRUE);
281 
282 	return ((info.di_attributes & DOOR_REVOKED) != 0);
283 }
284 
285 /*
286  * Upcall to the datalink management daemon (dlmgmtd).
287  */
288 static int
289 i_dls_mgmt_upcall(void *arg, size_t asize, void *rbuf, size_t rsize)
290 {
291 	door_arg_t			darg, save_arg;
292 	door_handle_t			dh;
293 	int				err;
294 	int				retry = 0;
295 
296 #define	MAXRETRYNUM	3
297 
298 	ASSERT(arg);
299 	darg.data_ptr = arg;
300 	darg.data_size = asize;
301 	darg.desc_ptr = NULL;
302 	darg.desc_num = 0;
303 	darg.rbuf = rbuf;
304 	darg.rsize = rsize;
305 	save_arg = darg;
306 
307 retry:
308 	mutex_enter(&i_dls_mgmt_lock);
309 	dh = dls_mgmt_dh;
310 	if ((dh == NULL) || i_dls_mgmt_door_revoked(dh)) {
311 		mutex_exit(&i_dls_mgmt_lock);
312 		return (EBADF);
313 	}
314 	door_ki_hold(dh);
315 	mutex_exit(&i_dls_mgmt_lock);
316 
317 	for (;;) {
318 		retry++;
319 		if ((err = door_ki_upcall_limited(dh, &darg, zone_kcred(),
320 		    SIZE_MAX, 0)) == 0)
321 			break;
322 
323 		/*
324 		 * handle door call errors
325 		 */
326 		darg = save_arg;
327 		switch (err) {
328 		case EINTR:
329 			/*
330 			 * If the operation which caused this door upcall gets
331 			 * interrupted, return directly.
332 			 */
333 			goto done;
334 		case EAGAIN:
335 			/*
336 			 * Repeat upcall if the maximum attempt limit has not
337 			 * been reached.
338 			 */
339 			if (retry < MAXRETRYNUM) {
340 				delay(2 * hz);
341 				break;
342 			}
343 			cmn_err(CE_WARN, "dls: dlmgmtd fatal error %d\n", err);
344 			goto done;
345 		default:
346 			/* A fatal door error */
347 			if (i_dls_mgmt_door_revoked(dh)) {
348 				cmn_err(CE_NOTE,
349 				    "dls: dlmgmtd door service revoked\n");
350 
351 				if (retry < MAXRETRYNUM) {
352 					door_ki_rele(dh);
353 					goto retry;
354 				}
355 			}
356 			cmn_err(CE_WARN, "dls: dlmgmtd fatal error %d\n", err);
357 			goto done;
358 		}
359 	}
360 
361 	if (darg.rbuf != rbuf) {
362 		/*
363 		 * The size of the input rbuf was not big enough, so the
364 		 * upcall allocated the rbuf itself.  If this happens, assume
365 		 * that this was an invalid door call request.
366 		 */
367 		kmem_free(darg.rbuf, darg.rsize);
368 		err = ENOSPC;
369 		goto done;
370 	}
371 
372 	if (darg.rsize != rsize) {
373 		err = EINVAL;
374 		goto done;
375 	}
376 
377 	err = ((dlmgmt_retval_t *)rbuf)->lr_err;
378 
379 done:
380 	door_ki_rele(dh);
381 	return (err);
382 }
383 
384 /*
385  * Request the datalink management daemon to create a link with the attributes
386  * below.  Upon success, zero is returned and linkidp contains the linkid for
387  * the new link; otherwise, an errno is returned.
388  *
389  *     - dev		physical dev_t.  required for all physical links,
390  *		        including GLDv3 links.  It will be used to force the
391  *		        attachment of a physical device, hence the
392  *		        registration of its mac
393  *     - class		datalink class
394  *     - media type	media type; DL_OTHER means unknown
395  *     - persist	whether to persist the datalink
396  */
397 int
398 dls_mgmt_create(const char *devname, dev_t dev, datalink_class_t class,
399     uint32_t media, boolean_t persist, datalink_id_t *linkidp)
400 {
401 	dlmgmt_upcall_arg_create_t	create;
402 	dlmgmt_create_retval_t		retval;
403 	int				err;
404 
405 	create.ld_cmd = DLMGMT_CMD_DLS_CREATE;
406 	create.ld_class = class;
407 	create.ld_media = media;
408 	create.ld_phymaj = getmajor(dev);
409 	create.ld_phyinst = getminor(dev);
410 	create.ld_persist = persist;
411 	if (strlcpy(create.ld_devname, devname, sizeof (create.ld_devname)) >=
412 	    sizeof (create.ld_devname))
413 		return (EINVAL);
414 
415 	if ((err = i_dls_mgmt_upcall(&create, sizeof (create), &retval,
416 	    sizeof (retval))) == 0) {
417 		*linkidp = retval.lr_linkid;
418 	}
419 	return (err);
420 }
421 
422 /*
423  * Request the datalink management daemon to destroy the specified link.
424  * Returns zero upon success, or an errno upon failure.
425  */
426 int
427 dls_mgmt_destroy(datalink_id_t linkid, boolean_t persist)
428 {
429 	dlmgmt_upcall_arg_destroy_t	destroy;
430 	dlmgmt_destroy_retval_t		retval;
431 
432 	destroy.ld_cmd = DLMGMT_CMD_DLS_DESTROY;
433 	destroy.ld_linkid = linkid;
434 	destroy.ld_persist = persist;
435 
436 	return (i_dls_mgmt_upcall(&destroy, sizeof (destroy),
437 	    &retval, sizeof (retval)));
438 }
439 
440 /*
441  * Request the datalink management daemon to verify/update the information
442  * for a physical link.  Upon success, get its linkid.
443  *
444  *     - media type	media type
445  *     - novanity	whether this physical datalink supports vanity naming.
446  *			physical links that do not use the GLDv3 MAC plugin
447  *			cannot suport vanity naming
448  *
449  * This function could fail with ENOENT or EEXIST.  Two cases return EEXIST:
450  *
451  * 1. A link with devname already exists, but the media type does not match.
452  *    In this case, mediap will bee set to the media type of the existing link.
453  * 2. A link with devname already exists, but its link name does not match
454  *    the device name, although this link does not support vanity naming.
455  */
456 int
457 dls_mgmt_update(const char *devname, uint32_t media, boolean_t novanity,
458     uint32_t *mediap, datalink_id_t *linkidp)
459 {
460 	dlmgmt_upcall_arg_update_t	update;
461 	dlmgmt_update_retval_t		retval;
462 	int				err;
463 
464 	update.ld_cmd = DLMGMT_CMD_DLS_UPDATE;
465 
466 	if (strlcpy(update.ld_devname, devname, sizeof (update.ld_devname)) >=
467 	    sizeof (update.ld_devname))
468 		return (EINVAL);
469 
470 	update.ld_media = media;
471 	update.ld_novanity = novanity;
472 
473 	if ((err = i_dls_mgmt_upcall(&update, sizeof (update), &retval,
474 	    sizeof (retval))) == EEXIST) {
475 		*linkidp = retval.lr_linkid;
476 		*mediap = retval.lr_media;
477 	} else if (err == 0) {
478 		*linkidp = retval.lr_linkid;
479 	}
480 
481 	return (err);
482 }
483 
484 /*
485  * Request the datalink management daemon to get the information for a link.
486  * Returns zero upon success, or an errno upon failure.
487  *
488  * Only fills in information for argument pointers that are non-NULL.
489  * Note that the link argument is expected to be MAXLINKNAMELEN bytes.
490  */
491 int
492 dls_mgmt_get_linkinfo(datalink_id_t linkid, char *link,
493     datalink_class_t *classp, uint32_t *mediap, uint32_t *flagsp)
494 {
495 	dlmgmt_door_getname_t	getname;
496 	dlmgmt_getname_retval_t	retval;
497 	int			err, len;
498 
499 	getname.ld_cmd = DLMGMT_CMD_GETNAME;
500 	getname.ld_linkid = linkid;
501 
502 	if ((err = i_dls_mgmt_upcall(&getname, sizeof (getname), &retval,
503 	    sizeof (retval))) != 0) {
504 		return (err);
505 	}
506 
507 	len = strlen(retval.lr_link);
508 	if (len <= 1 || len >= MAXLINKNAMELEN)
509 		return (EINVAL);
510 
511 	if (link != NULL)
512 		(void) strlcpy(link, retval.lr_link, MAXLINKNAMELEN);
513 	if (classp != NULL)
514 		*classp = retval.lr_class;
515 	if (mediap != NULL)
516 		*mediap = retval.lr_media;
517 	if (flagsp != NULL)
518 		*flagsp = retval.lr_flags;
519 	return (0);
520 }
521 
522 /*
523  * Request the datalink management daemon to get the linkid for a link.
524  * Returns a non-zero error code on failure.  The linkid argument is only
525  * set on success (when zero is returned.)
526  */
527 int
528 dls_mgmt_get_linkid(const char *link, datalink_id_t *linkid)
529 {
530 	dlmgmt_door_getlinkid_t		getlinkid;
531 	dlmgmt_getlinkid_retval_t	retval;
532 	int				err;
533 
534 	getlinkid.ld_cmd = DLMGMT_CMD_GETLINKID;
535 	(void) strlcpy(getlinkid.ld_link, link, MAXLINKNAMELEN);
536 
537 	if ((err = i_dls_mgmt_upcall(&getlinkid, sizeof (getlinkid), &retval,
538 	    sizeof (retval))) == 0) {
539 		*linkid = retval.lr_linkid;
540 	}
541 	return (err);
542 }
543 
544 datalink_id_t
545 dls_mgmt_get_next(datalink_id_t linkid, datalink_class_t class,
546     datalink_media_t dmedia, uint32_t flags)
547 {
548 	dlmgmt_door_getnext_t	getnext;
549 	dlmgmt_getnext_retval_t	retval;
550 
551 	getnext.ld_cmd = DLMGMT_CMD_GETNEXT;
552 	getnext.ld_class = class;
553 	getnext.ld_dmedia = dmedia;
554 	getnext.ld_flags = flags;
555 	getnext.ld_linkid = linkid;
556 
557 	if (i_dls_mgmt_upcall(&getnext, sizeof (getnext), &retval,
558 	    sizeof (retval)) != 0) {
559 		return (DATALINK_INVALID_LINKID);
560 	}
561 
562 	return (retval.lr_linkid);
563 }
564 
565 static int
566 i_dls_mgmt_get_linkattr(const datalink_id_t linkid, const char *attr,
567     void *attrval, size_t *attrszp)
568 {
569 	dlmgmt_upcall_arg_getattr_t	getattr;
570 	dlmgmt_getattr_retval_t		retval;
571 	int				err;
572 
573 	getattr.ld_cmd = DLMGMT_CMD_DLS_GETATTR;
574 	getattr.ld_linkid = linkid;
575 	(void) strlcpy(getattr.ld_attr, attr, MAXLINKATTRLEN);
576 
577 	if ((err = i_dls_mgmt_upcall(&getattr, sizeof (getattr), &retval,
578 	    sizeof (retval))) == 0) {
579 		if (*attrszp < retval.lr_attrsz)
580 			return (EINVAL);
581 		*attrszp = retval.lr_attrsz;
582 		bcopy(retval.lr_attrval, attrval, retval.lr_attrsz);
583 	}
584 
585 	return (err);
586 }
587 
588 /*
589  * Note that this function can only get devp successfully for non-VLAN link.
590  */
591 int
592 dls_mgmt_get_phydev(datalink_id_t linkid, dev_t *devp)
593 {
594 	uint64_t	maj, inst;
595 	size_t		attrsz = sizeof (uint64_t);
596 
597 	if (i_dls_mgmt_get_linkattr(linkid, FPHYMAJ, &maj, &attrsz) != 0 ||
598 	    attrsz != sizeof (uint64_t) ||
599 	    i_dls_mgmt_get_linkattr(linkid, FPHYINST, &inst, &attrsz) != 0 ||
600 	    attrsz != sizeof (uint64_t)) {
601 		return (EINVAL);
602 	}
603 
604 	*devp = makedevice((major_t)maj, (minor_t)inst);
605 	return (0);
606 }
607 
608 /*
609  * Request the datalink management daemon to push in
610  * all properties associated with the link.
611  * Returns a non-zero error code on failure.
612  */
613 int
614 dls_mgmt_linkprop_init(datalink_id_t linkid)
615 {
616 	dlmgmt_door_linkprop_init_t	li;
617 	dlmgmt_linkprop_init_retval_t	retval;
618 	int				err;
619 
620 	li.ld_cmd = DLMGMT_CMD_LINKPROP_INIT;
621 	li.ld_linkid = linkid;
622 
623 	err = i_dls_mgmt_upcall(&li, sizeof (li), &retval, sizeof (retval));
624 	return (err);
625 }
626 
627 static void
628 dls_devnet_prop_task(void *arg)
629 {
630 	dls_devnet_t		*ddp = arg;
631 
632 	(void) dls_mgmt_linkprop_init(ddp->dd_linkid);
633 
634 	mutex_enter(&ddp->dd_mutex);
635 	ddp->dd_prop_loaded = B_TRUE;
636 	ddp->dd_prop_taskid = NULL;
637 	cv_broadcast(&ddp->dd_cv);
638 	mutex_exit(&ddp->dd_mutex);
639 }
640 
641 /*
642  * Ensure property loading task is completed.
643  */
644 void
645 dls_devnet_prop_task_wait(dls_dl_handle_t ddp)
646 {
647 	mutex_enter(&ddp->dd_mutex);
648 	while (ddp->dd_prop_taskid != NULL)
649 		cv_wait(&ddp->dd_cv, &ddp->dd_mutex);
650 	mutex_exit(&ddp->dd_mutex);
651 }
652 
653 void
654 dls_devnet_rele_tmp(dls_dl_handle_t dlh)
655 {
656 	dls_devnet_t		*ddp = dlh;
657 
658 	mutex_enter(&ddp->dd_mutex);
659 	ASSERT(ddp->dd_tref != 0);
660 	if (--ddp->dd_tref == 0)
661 		cv_signal(&ddp->dd_cv);
662 	mutex_exit(&ddp->dd_mutex);
663 }
664 
665 int
666 dls_devnet_hold_link(datalink_id_t linkid, dls_dl_handle_t *ddhp,
667     dls_link_t **dlpp)
668 {
669 	dls_dl_handle_t	dlh;
670 	dls_link_t	*dlp;
671 	int		err;
672 
673 	if ((err = dls_devnet_hold_tmp(linkid, &dlh)) != 0)
674 		return (err);
675 
676 	if ((err = dls_link_hold(dls_devnet_mac(dlh), &dlp)) != 0) {
677 		dls_devnet_rele_tmp(dlh);
678 		return (err);
679 	}
680 
681 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
682 
683 	*ddhp = dlh;
684 	*dlpp = dlp;
685 	return (0);
686 }
687 
688 void
689 dls_devnet_rele_link(dls_dl_handle_t dlh, dls_link_t *dlp)
690 {
691 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
692 
693 	dls_link_rele(dlp);
694 	dls_devnet_rele_tmp(dlh);
695 }
696 
697 /*
698  * "link" kstats related functions.
699  */
700 
701 /*
702  * Query the "link" kstats.
703  *
704  * We may be called from the kstat subsystem in an arbitrary context.
705  * If the caller is the stack, the context could be an upcall data
706  * thread. Hence we can't acquire the mac perimeter in this function
707  * for fear of deadlock.
708  */
709 static int
710 dls_devnet_stat_update(kstat_t *ksp, int rw)
711 {
712 	dls_devnet_t	*ddp = ksp->ks_private;
713 	dls_link_t	*dlp;
714 	int		err;
715 
716 	/*
717 	 * Check the link is being renamed or if the link is going away
718 	 * before incrementing dd_tref which in turn prevents the link
719 	 * from being renamed or deleted until we finish.
720 	 */
721 	mutex_enter(&ddp->dd_mutex);
722 	if (ddp->dd_flags & (DD_CONDEMNED | DD_KSTAT_CHANGING)) {
723 		mutex_exit(&ddp->dd_mutex);
724 		return (ENOENT);
725 	}
726 	ddp->dd_tref++;
727 	mutex_exit(&ddp->dd_mutex);
728 
729 	/*
730 	 * If a device detach happens at this time, it will block in
731 	 * dls_devnet_unset since the dd_tref has been bumped up above. So the
732 	 * access to 'dlp' is safe even though we don't hold the mac perimeter.
733 	 */
734 	if (mod_hash_find(i_dls_link_hash, (mod_hash_key_t)ddp->dd_mac,
735 	    (mod_hash_val_t *)&dlp) != 0) {
736 		dls_devnet_rele_tmp(ddp);
737 		return (ENOENT);
738 	}
739 
740 	err = dls_stat_update(ksp, dlp, rw);
741 
742 	dls_devnet_rele_tmp(ddp);
743 	return (err);
744 }
745 
746 /*
747  * Create the "link" kstats.
748  */
749 static void
750 dls_devnet_stat_create(dls_devnet_t *ddp, zoneid_t zoneid)
751 {
752 	kstat_t	*ksp;
753 
754 	if (dls_stat_create("link", 0, ddp->dd_linkname, zoneid,
755 	    dls_devnet_stat_update, ddp, &ksp) == 0) {
756 		ASSERT(ksp != NULL);
757 		if (zoneid == ddp->dd_owner_zid) {
758 			ASSERT(ddp->dd_ksp == NULL);
759 			ddp->dd_ksp = ksp;
760 		} else {
761 			ASSERT(ddp->dd_zone_ksp == NULL);
762 			ddp->dd_zone_ksp = ksp;
763 		}
764 	}
765 }
766 
767 /*
768  * Destroy the "link" kstats.
769  */
770 static void
771 dls_devnet_stat_destroy(dls_devnet_t *ddp, zoneid_t zoneid)
772 {
773 	if (zoneid == ddp->dd_owner_zid) {
774 		if (ddp->dd_ksp != NULL) {
775 			kstat_delete(ddp->dd_ksp);
776 			ddp->dd_ksp = NULL;
777 		}
778 	} else {
779 		if (ddp->dd_zone_ksp != NULL) {
780 			kstat_delete(ddp->dd_zone_ksp);
781 			ddp->dd_zone_ksp = NULL;
782 		}
783 	}
784 }
785 
786 /*
787  * The link has been renamed. Destroy the old non-legacy kstats ("link kstats")
788  * and create the new set using the new name.
789  */
790 static void
791 dls_devnet_stat_rename(dls_devnet_t *ddp)
792 {
793 	if (ddp->dd_ksp != NULL) {
794 		kstat_delete(ddp->dd_ksp);
795 		ddp->dd_ksp = NULL;
796 	}
797 	/* We can't rename a link while it's assigned to a non-global zone. */
798 	ASSERT(ddp->dd_zone_ksp == NULL);
799 	dls_devnet_stat_create(ddp, ddp->dd_owner_zid);
800 }
801 
802 /*
803  * Associate a linkid with a given link (identified by macname)
804  */
805 static int
806 dls_devnet_set(const char *macname, datalink_id_t linkid, zoneid_t zoneid,
807     dls_devnet_t **ddpp)
808 {
809 	dls_devnet_t		*ddp = NULL;
810 	datalink_class_t	class;
811 	int			err;
812 	boolean_t		stat_create = B_FALSE;
813 	char			linkname[MAXLINKNAMELEN];
814 
815 	rw_enter(&i_dls_devnet_lock, RW_WRITER);
816 
817 	/*
818 	 * Don't allow callers to set a link name with a linkid that already
819 	 * has a name association (that's what rename is for).
820 	 */
821 	if (linkid != DATALINK_INVALID_LINKID) {
822 		if (mod_hash_find(i_dls_devnet_id_hash,
823 		    (mod_hash_key_t)(uintptr_t)linkid,
824 		    (mod_hash_val_t *)&ddp) == 0) {
825 			err = EEXIST;
826 			goto done;
827 		}
828 		if ((err = dls_mgmt_get_linkinfo(linkid, linkname, &class,
829 		    NULL, NULL)) != 0)
830 			goto done;
831 	}
832 
833 	if ((err = mod_hash_find(i_dls_devnet_hash,
834 	    (mod_hash_key_t)macname, (mod_hash_val_t *)&ddp)) == 0) {
835 		if (ddp->dd_linkid != DATALINK_INVALID_LINKID) {
836 			err = EEXIST;
837 			goto done;
838 		}
839 
840 		/*
841 		 * This might be a physical link that has already
842 		 * been created, but which does not have a linkid
843 		 * because dlmgmtd was not running when it was created.
844 		 */
845 		if (linkid == DATALINK_INVALID_LINKID ||
846 		    class != DATALINK_CLASS_PHYS) {
847 			err = EINVAL;
848 			goto done;
849 		}
850 	} else {
851 		ddp = kmem_cache_alloc(i_dls_devnet_cachep, KM_SLEEP);
852 		ddp->dd_tref = 0;
853 		ddp->dd_ref++;
854 		ddp->dd_owner_zid = zoneid;
855 		(void) strlcpy(ddp->dd_mac, macname, sizeof (ddp->dd_mac));
856 		VERIFY(mod_hash_insert(i_dls_devnet_hash,
857 		    (mod_hash_key_t)ddp->dd_mac, (mod_hash_val_t)ddp) == 0);
858 	}
859 
860 	if (linkid != DATALINK_INVALID_LINKID) {
861 		ddp->dd_linkid = linkid;
862 		(void) strlcpy(ddp->dd_linkname, linkname,
863 		    sizeof (ddp->dd_linkname));
864 		VERIFY(mod_hash_insert(i_dls_devnet_id_hash,
865 		    (mod_hash_key_t)(uintptr_t)linkid,
866 		    (mod_hash_val_t)ddp) == 0);
867 		devnet_need_rebuild = B_TRUE;
868 		stat_create = B_TRUE;
869 		mutex_enter(&ddp->dd_mutex);
870 		if (!ddp->dd_prop_loaded && (ddp->dd_prop_taskid == NULL)) {
871 			ddp->dd_prop_taskid = taskq_dispatch(system_taskq,
872 			    dls_devnet_prop_task, ddp, TQ_SLEEP);
873 		}
874 		mutex_exit(&ddp->dd_mutex);
875 	}
876 	err = 0;
877 done:
878 	/*
879 	 * It is safe to drop the i_dls_devnet_lock at this point. In the case
880 	 * of physical devices, the softmac framework will fail the device
881 	 * detach based on the smac_state or smac_hold_cnt. Other cases like
882 	 * vnic and aggr use their own scheme to serialize creates and deletes
883 	 * and ensure that *ddp is valid.
884 	 */
885 	rw_exit(&i_dls_devnet_lock);
886 	if (err == 0) {
887 		if (zoneid != GLOBAL_ZONEID &&
888 		    (err = i_dls_devnet_setzid(ddp, zoneid, B_FALSE)) != 0)
889 			(void) dls_devnet_unset(macname, &linkid, B_TRUE);
890 		/*
891 		 * The kstat subsystem holds its own locks (rather perimeter)
892 		 * before calling the ks_update (dls_devnet_stat_update) entry
893 		 * point which in turn grabs the i_dls_devnet_lock. So the
894 		 * lock hierarchy is kstat locks -> i_dls_devnet_lock.
895 		 */
896 		if (stat_create)
897 			dls_devnet_stat_create(ddp, zoneid);
898 		if (ddpp != NULL)
899 			*ddpp = ddp;
900 	}
901 	return (err);
902 }
903 
904 /*
905  * Disassociate a linkid with a given link (identified by macname)
906  * This waits until temporary references to the dls_devnet_t are gone.
907  */
908 static int
909 dls_devnet_unset(const char *macname, datalink_id_t *id, boolean_t wait)
910 {
911 	dls_devnet_t	*ddp;
912 	int		err;
913 	mod_hash_val_t	val;
914 
915 	rw_enter(&i_dls_devnet_lock, RW_WRITER);
916 	if ((err = mod_hash_find(i_dls_devnet_hash,
917 	    (mod_hash_key_t)macname, (mod_hash_val_t *)&ddp)) != 0) {
918 		ASSERT(err == MH_ERR_NOTFOUND);
919 		rw_exit(&i_dls_devnet_lock);
920 		return (ENOENT);
921 	}
922 
923 	mutex_enter(&ddp->dd_mutex);
924 
925 	/*
926 	 * Make sure downcalls into softmac_create or softmac_destroy from
927 	 * devfs don't cv_wait on any devfs related condition for fear of
928 	 * deadlock. Return EBUSY if the asynchronous thread started for
929 	 * property loading as part of the post attach hasn't yet completed.
930 	 */
931 	ASSERT(ddp->dd_ref != 0);
932 	if ((ddp->dd_ref != 1) || (!wait &&
933 	    (ddp->dd_tref != 0 || ddp->dd_prop_taskid != NULL))) {
934 		mutex_exit(&ddp->dd_mutex);
935 		rw_exit(&i_dls_devnet_lock);
936 		return (EBUSY);
937 	}
938 
939 	ddp->dd_flags |= DD_CONDEMNED;
940 	ddp->dd_ref--;
941 	*id = ddp->dd_linkid;
942 
943 	if (ddp->dd_zid != GLOBAL_ZONEID)
944 		(void) i_dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE);
945 
946 	/*
947 	 * Remove this dls_devnet_t from the hash table.
948 	 */
949 	VERIFY(mod_hash_remove(i_dls_devnet_hash,
950 	    (mod_hash_key_t)ddp->dd_mac, &val) == 0);
951 
952 	if (ddp->dd_linkid != DATALINK_INVALID_LINKID) {
953 		VERIFY(mod_hash_remove(i_dls_devnet_id_hash,
954 		    (mod_hash_key_t)(uintptr_t)ddp->dd_linkid, &val) == 0);
955 
956 		devnet_need_rebuild = B_TRUE;
957 	}
958 	rw_exit(&i_dls_devnet_lock);
959 
960 	if (wait) {
961 		/*
962 		 * Wait until all temporary references are released.
963 		 */
964 		while ((ddp->dd_tref != 0) || (ddp->dd_prop_taskid != NULL))
965 			cv_wait(&ddp->dd_cv, &ddp->dd_mutex);
966 	} else {
967 		ASSERT(ddp->dd_tref == 0 && ddp->dd_prop_taskid == NULL);
968 	}
969 
970 	if (ddp->dd_linkid != DATALINK_INVALID_LINKID)
971 		dls_devnet_stat_destroy(ddp, ddp->dd_owner_zid);
972 
973 	ddp->dd_prop_loaded = B_FALSE;
974 	ddp->dd_linkid = DATALINK_INVALID_LINKID;
975 	ddp->dd_flags = 0;
976 	mutex_exit(&ddp->dd_mutex);
977 	kmem_cache_free(i_dls_devnet_cachep, ddp);
978 
979 	return (0);
980 }
981 
982 static int
983 dls_devnet_hold_common(datalink_id_t linkid, dls_devnet_t **ddpp,
984     boolean_t tmp_hold)
985 {
986 	dls_devnet_t		*ddp;
987 	dev_t			phydev = 0;
988 	dls_dev_handle_t	ddh = NULL;
989 	int			err;
990 
991 	/*
992 	 * Hold this link to prevent it being detached in case of a
993 	 * physical link.
994 	 */
995 	if (dls_mgmt_get_phydev(linkid, &phydev) == 0)
996 		(void) softmac_hold_device(phydev, &ddh);
997 
998 	rw_enter(&i_dls_devnet_lock, RW_WRITER);
999 	if ((err = mod_hash_find(i_dls_devnet_id_hash,
1000 	    (mod_hash_key_t)(uintptr_t)linkid, (mod_hash_val_t *)&ddp)) != 0) {
1001 		ASSERT(err == MH_ERR_NOTFOUND);
1002 		rw_exit(&i_dls_devnet_lock);
1003 		softmac_rele_device(ddh);
1004 		return (ENOENT);
1005 	}
1006 
1007 	mutex_enter(&ddp->dd_mutex);
1008 	ASSERT(ddp->dd_ref > 0);
1009 	if (ddp->dd_flags & DD_CONDEMNED) {
1010 		mutex_exit(&ddp->dd_mutex);
1011 		rw_exit(&i_dls_devnet_lock);
1012 		softmac_rele_device(ddh);
1013 		return (ENOENT);
1014 	}
1015 	if (tmp_hold)
1016 		ddp->dd_tref++;
1017 	else
1018 		ddp->dd_ref++;
1019 	mutex_exit(&ddp->dd_mutex);
1020 	rw_exit(&i_dls_devnet_lock);
1021 
1022 	softmac_rele_device(ddh);
1023 
1024 	*ddpp = ddp;
1025 	return (0);
1026 }
1027 
1028 int
1029 dls_devnet_hold(datalink_id_t linkid, dls_devnet_t **ddpp)
1030 {
1031 	return (dls_devnet_hold_common(linkid, ddpp, B_FALSE));
1032 }
1033 
1034 /*
1035  * Hold the vanity naming structure (dls_devnet_t) temporarily.  The request to
1036  * delete the dls_devnet_t will wait until the temporary reference is released.
1037  */
1038 int
1039 dls_devnet_hold_tmp(datalink_id_t linkid, dls_devnet_t **ddpp)
1040 {
1041 	return (dls_devnet_hold_common(linkid, ddpp, B_TRUE));
1042 }
1043 
1044 /*
1045  * This funtion is called when a DLS client tries to open a device node.
1046  * This dev_t could a result of a /dev/net node access (returned by
1047  * devnet_create_rvp->dls_devnet_open()) or a direct /dev node access.
1048  * In both cases, this function bumps up the reference count of the
1049  * dls_devnet_t structure. The reference is held as long as the device node
1050  * is open. In the case of /dev/net while it is true that the initial reference
1051  * is held when the devnet_create_rvp->dls_devnet_open call happens, this
1052  * initial reference is released immediately in devnet_inactive_callback ->
1053  * dls_devnet_close(). (Note that devnet_inactive_callback() is called right
1054  * after dld_open completes, not when the /dev/net node is being closed).
1055  * To undo this function, call dls_devnet_rele()
1056  */
1057 int
1058 dls_devnet_hold_by_dev(dev_t dev, dls_dl_handle_t *ddhp)
1059 {
1060 	char			name[MAXNAMELEN];
1061 	char			*drv;
1062 	dls_dev_handle_t	ddh = NULL;
1063 	dls_devnet_t		*ddp;
1064 	int			err;
1065 
1066 	if ((drv = ddi_major_to_name(getmajor(dev))) == NULL)
1067 		return (EINVAL);
1068 
1069 	(void) snprintf(name, sizeof (name), "%s%d", drv, getminor(dev) - 1);
1070 
1071 	/*
1072 	 * Hold this link to prevent it being detached in case of a
1073 	 * GLDv3 physical link.
1074 	 */
1075 	if (getminor(dev) - 1 < MAC_MAX_MINOR)
1076 		(void) softmac_hold_device(dev, &ddh);
1077 
1078 	rw_enter(&i_dls_devnet_lock, RW_WRITER);
1079 	if ((err = mod_hash_find(i_dls_devnet_hash,
1080 	    (mod_hash_key_t)name, (mod_hash_val_t *)&ddp)) != 0) {
1081 		ASSERT(err == MH_ERR_NOTFOUND);
1082 		rw_exit(&i_dls_devnet_lock);
1083 		softmac_rele_device(ddh);
1084 		return (ENOENT);
1085 	}
1086 	mutex_enter(&ddp->dd_mutex);
1087 	ASSERT(ddp->dd_ref > 0);
1088 	if (ddp->dd_flags & DD_CONDEMNED) {
1089 		mutex_exit(&ddp->dd_mutex);
1090 		rw_exit(&i_dls_devnet_lock);
1091 		softmac_rele_device(ddh);
1092 		return (ENOENT);
1093 	}
1094 	ddp->dd_ref++;
1095 	mutex_exit(&ddp->dd_mutex);
1096 	rw_exit(&i_dls_devnet_lock);
1097 
1098 	softmac_rele_device(ddh);
1099 
1100 	*ddhp = ddp;
1101 	return (0);
1102 }
1103 
1104 void
1105 dls_devnet_rele(dls_devnet_t *ddp)
1106 {
1107 	mutex_enter(&ddp->dd_mutex);
1108 	ASSERT(ddp->dd_ref > 1);
1109 	ddp->dd_ref--;
1110 	if ((ddp->dd_flags & DD_IMPLICIT_IPTUN) && ddp->dd_ref == 1) {
1111 		mutex_exit(&ddp->dd_mutex);
1112 		if (i_dls_devnet_destroy_iptun(ddp->dd_linkid) != 0)
1113 			ddp->dd_flags |= DD_IMPLICIT_IPTUN;
1114 		return;
1115 	}
1116 	mutex_exit(&ddp->dd_mutex);
1117 }
1118 
1119 static int
1120 dls_devnet_hold_by_name(const char *link, dls_devnet_t **ddpp)
1121 {
1122 	char			drv[MAXLINKNAMELEN];
1123 	uint_t			ppa;
1124 	major_t			major;
1125 	dev_t			phy_dev, tmp_dev;
1126 	datalink_id_t		linkid;
1127 	dls_dev_handle_t	ddh;
1128 	int			err;
1129 
1130 	if ((err = dls_mgmt_get_linkid(link, &linkid)) == 0)
1131 		return (dls_devnet_hold(linkid, ddpp));
1132 
1133 	/*
1134 	 * If we failed to get the link's linkid because the dlmgmtd daemon
1135 	 * has not been started, return ENOENT so that the application can
1136 	 * fallback to open the /dev node.
1137 	 */
1138 	if (err == EBADF)
1139 		return (ENOENT);
1140 
1141 	if (err != ENOENT)
1142 		return (err);
1143 
1144 	if (IS_IPTUN_LINK(link)) {
1145 		if ((err = i_dls_devnet_create_iptun(link, &linkid)) != 0)
1146 			return (err);
1147 		/*
1148 		 * At this point, an IP tunnel MAC has registered, which
1149 		 * resulted in a link being created.
1150 		 */
1151 		err = dls_devnet_hold(linkid, ddpp);
1152 		ASSERT(err == 0);
1153 		if (err != 0) {
1154 			VERIFY(i_dls_devnet_destroy_iptun(linkid) == 0);
1155 			return (err);
1156 		}
1157 		/*
1158 		 * dls_devnet_rele() will know to destroy the implicit IP
1159 		 * tunnel on last reference release if DD_IMPLICIT_IPTUN is
1160 		 * set.
1161 		 */
1162 		(*ddpp)->dd_flags |= DD_IMPLICIT_IPTUN;
1163 		return (0);
1164 	}
1165 
1166 	if (ddi_parse(link, drv, &ppa) != DDI_SUCCESS)
1167 		return (ENOENT);
1168 
1169 	/*
1170 	 * If this link:
1171 	 * (a) is a physical device, (b) this is the first boot, (c) the MAC
1172 	 * is not registered yet, and (d) we cannot find its linkid, then the
1173 	 * linkname is the same as the devname.
1174 	 *
1175 	 * First filter out invalid names.
1176 	 */
1177 	if ((major = ddi_name_to_major(drv)) == (major_t)-1)
1178 		return (ENOENT);
1179 
1180 	phy_dev = makedevice(major, (minor_t)ppa + 1);
1181 	if (softmac_hold_device(phy_dev, &ddh) != 0)
1182 		return (ENOENT);
1183 
1184 	/*
1185 	 * At this time, the MAC should be registered, check its phy_dev using
1186 	 * the given name.
1187 	 */
1188 	if ((err = dls_mgmt_get_linkid(link, &linkid)) != 0 ||
1189 	    (err = dls_mgmt_get_phydev(linkid, &tmp_dev)) != 0) {
1190 		softmac_rele_device(ddh);
1191 		return (err);
1192 	}
1193 	if (tmp_dev != phy_dev) {
1194 		softmac_rele_device(ddh);
1195 		return (ENOENT);
1196 	}
1197 
1198 	err = dls_devnet_hold(linkid, ddpp);
1199 	softmac_rele_device(ddh);
1200 	return (err);
1201 }
1202 
1203 int
1204 dls_devnet_macname2linkid(const char *macname, datalink_id_t *linkidp)
1205 {
1206 	dls_devnet_t	*ddp;
1207 
1208 	rw_enter(&i_dls_devnet_lock, RW_READER);
1209 	if (mod_hash_find(i_dls_devnet_hash, (mod_hash_key_t)macname,
1210 	    (mod_hash_val_t *)&ddp) != 0) {
1211 		rw_exit(&i_dls_devnet_lock);
1212 		return (ENOENT);
1213 	}
1214 
1215 	*linkidp = ddp->dd_linkid;
1216 	rw_exit(&i_dls_devnet_lock);
1217 	return (0);
1218 }
1219 
1220 
1221 /*
1222  * Get linkid for the given dev.
1223  */
1224 int
1225 dls_devnet_dev2linkid(dev_t dev, datalink_id_t *linkidp)
1226 {
1227 	char	macname[MAXNAMELEN];
1228 	char	*drv;
1229 
1230 	if ((drv = ddi_major_to_name(getmajor(dev))) == NULL)
1231 		return (EINVAL);
1232 
1233 	(void) snprintf(macname, sizeof (macname), "%s%d", drv,
1234 	    getminor(dev) - 1);
1235 	return (dls_devnet_macname2linkid(macname, linkidp));
1236 }
1237 
1238 /*
1239  * Get the link's physical dev_t. It this is a VLAN, get the dev_t of the
1240  * link this VLAN is created on.
1241  */
1242 int
1243 dls_devnet_phydev(datalink_id_t vlanid, dev_t *devp)
1244 {
1245 	dls_devnet_t	*ddp;
1246 	int		err;
1247 
1248 	if ((err = dls_devnet_hold_tmp(vlanid, &ddp)) != 0)
1249 		return (err);
1250 
1251 	err = dls_mgmt_get_phydev(ddp->dd_linkid, devp);
1252 	dls_devnet_rele_tmp(ddp);
1253 	return (err);
1254 }
1255 
1256 /*
1257  * Handle the renaming requests.  There are two rename cases:
1258  *
1259  * 1. Request to rename a valid link (id1) to an non-existent link name
1260  *    (id2). In this case id2 is DATALINK_INVALID_LINKID.  Just check whether
1261  *    id1 is held by any applications.
1262  *
1263  *    In this case, the link's kstats need to be updated using the given name.
1264  *
1265  * 2. Request to rename a valid link (id1) to the name of a REMOVED
1266  *    physical link (id2). In this case, check that id1 and its associated
1267  *    mac is not held by any application, and update the link's linkid to id2.
1268  *
1269  *    This case does not change the <link name, linkid> mapping, so the link's
1270  *    kstats need to be updated with using name associated the given id2.
1271  */
1272 int
1273 dls_devnet_rename(datalink_id_t id1, datalink_id_t id2, const char *link)
1274 {
1275 	dls_dev_handle_t	ddh = NULL;
1276 	int			err = 0;
1277 	dev_t			phydev = 0;
1278 	dls_devnet_t		*ddp;
1279 	mac_perim_handle_t	mph = NULL;
1280 	mac_handle_t		mh;
1281 	mod_hash_val_t		val;
1282 	boolean_t		clear_dd_flag = B_FALSE;
1283 
1284 	/*
1285 	 * In the second case, id2 must be a REMOVED physical link.
1286 	 */
1287 	if ((id2 != DATALINK_INVALID_LINKID) &&
1288 	    (dls_mgmt_get_phydev(id2, &phydev) == 0) &&
1289 	    softmac_hold_device(phydev, &ddh) == 0) {
1290 		softmac_rele_device(ddh);
1291 		return (EEXIST);
1292 	}
1293 
1294 	/*
1295 	 * Hold id1 to prevent it from being detached (if a physical link).
1296 	 */
1297 	if (dls_mgmt_get_phydev(id1, &phydev) == 0)
1298 		(void) softmac_hold_device(phydev, &ddh);
1299 
1300 	/*
1301 	 * The framework does not hold hold locks across calls to the
1302 	 * mac perimeter, hence enter the perimeter first. This also waits
1303 	 * for the property loading to finish.
1304 	 */
1305 	if ((err = mac_perim_enter_by_linkid(id1, &mph)) != 0) {
1306 		softmac_rele_device(ddh);
1307 		return (err);
1308 	}
1309 
1310 	rw_enter(&i_dls_devnet_lock, RW_WRITER);
1311 	if ((err = mod_hash_find(i_dls_devnet_id_hash,
1312 	    (mod_hash_key_t)(uintptr_t)id1, (mod_hash_val_t *)&ddp)) != 0) {
1313 		ASSERT(err == MH_ERR_NOTFOUND);
1314 		err = ENOENT;
1315 		goto done;
1316 	}
1317 
1318 	/*
1319 	 * Return EBUSY if any applications have this link open, if any thread
1320 	 * is currently accessing the link kstats, or if the link is on-loan
1321 	 * to a non-global zone. Then set the DD_KSTAT_CHANGING flag to
1322 	 * prevent any access to the kstats while we delete and recreate
1323 	 * kstats below.
1324 	 */
1325 	mutex_enter(&ddp->dd_mutex);
1326 	if (ddp->dd_ref > 1) {
1327 		mutex_exit(&ddp->dd_mutex);
1328 		err = EBUSY;
1329 		goto done;
1330 	}
1331 
1332 	ddp->dd_flags |= DD_KSTAT_CHANGING;
1333 	clear_dd_flag = B_TRUE;
1334 	mutex_exit(&ddp->dd_mutex);
1335 
1336 	if (id2 == DATALINK_INVALID_LINKID) {
1337 		(void) strlcpy(ddp->dd_linkname, link,
1338 		    sizeof (ddp->dd_linkname));
1339 
1340 		/* rename mac client name and its flow if exists */
1341 		if ((err = mac_open(ddp->dd_mac, &mh)) != 0)
1342 			goto done;
1343 		(void) mac_rename_primary(mh, link);
1344 		mac_close(mh);
1345 		goto done;
1346 	}
1347 
1348 	/*
1349 	 * The second case, check whether the MAC is used by any MAC
1350 	 * user.  This must be a physical link so ddh must not be NULL.
1351 	 */
1352 	if (ddh == NULL) {
1353 		err = EINVAL;
1354 		goto done;
1355 	}
1356 
1357 	if ((err = mac_open(ddp->dd_mac, &mh)) != 0)
1358 		goto done;
1359 
1360 	/*
1361 	 * We release the reference of the MAC which mac_open() is
1362 	 * holding. Note that this mac will not be unregistered
1363 	 * because the physical device is held.
1364 	 */
1365 	mac_close(mh);
1366 
1367 	/*
1368 	 * Check if there is any other MAC clients, if not, hold this mac
1369 	 * exclusively until we are done.
1370 	 */
1371 	if ((err = mac_mark_exclusive(mh)) != 0)
1372 		goto done;
1373 
1374 	/*
1375 	 * Update the link's linkid.
1376 	 */
1377 	if ((err = mod_hash_find(i_dls_devnet_id_hash,
1378 	    (mod_hash_key_t)(uintptr_t)id2, &val)) != MH_ERR_NOTFOUND) {
1379 		mac_unmark_exclusive(mh);
1380 		err = EEXIST;
1381 		goto done;
1382 	}
1383 
1384 	err = dls_mgmt_get_linkinfo(id2, ddp->dd_linkname, NULL, NULL, NULL);
1385 	if (err != 0) {
1386 		mac_unmark_exclusive(mh);
1387 		goto done;
1388 	}
1389 
1390 	(void) mod_hash_remove(i_dls_devnet_id_hash,
1391 	    (mod_hash_key_t)(uintptr_t)id1, &val);
1392 
1393 	ddp->dd_linkid = id2;
1394 	(void) mod_hash_insert(i_dls_devnet_id_hash,
1395 	    (mod_hash_key_t)(uintptr_t)ddp->dd_linkid, (mod_hash_val_t)ddp);
1396 
1397 	mac_unmark_exclusive(mh);
1398 
1399 	/* load properties for new id */
1400 	mutex_enter(&ddp->dd_mutex);
1401 	ddp->dd_prop_loaded = B_FALSE;
1402 	ddp->dd_prop_taskid = taskq_dispatch(system_taskq,
1403 	    dls_devnet_prop_task, ddp, TQ_SLEEP);
1404 	mutex_exit(&ddp->dd_mutex);
1405 
1406 done:
1407 	/*
1408 	 * Change the name of the kstat based on the new link name.
1409 	 * We can't hold the i_dls_devnet_lock across calls to the kstat
1410 	 * subsystem. Instead the DD_KSTAT_CHANGING flag set above in this
1411 	 * function prevents any access to the dd_ksp while we delete and
1412 	 * recreate it below.
1413 	 */
1414 	rw_exit(&i_dls_devnet_lock);
1415 	if (err == 0)
1416 		dls_devnet_stat_rename(ddp);
1417 
1418 	if (clear_dd_flag) {
1419 		mutex_enter(&ddp->dd_mutex);
1420 		ddp->dd_flags &= ~DD_KSTAT_CHANGING;
1421 		mutex_exit(&ddp->dd_mutex);
1422 	}
1423 
1424 	if (mph != NULL)
1425 		mac_perim_exit(mph);
1426 	softmac_rele_device(ddh);
1427 	return (err);
1428 }
1429 
1430 static int
1431 i_dls_devnet_setzid(dls_devnet_t *ddp, zoneid_t new_zoneid, boolean_t setprop)
1432 {
1433 	int			err;
1434 	mac_perim_handle_t	mph;
1435 	boolean_t		upcall_done = B_FALSE;
1436 	datalink_id_t		linkid = ddp->dd_linkid;
1437 	zoneid_t		old_zoneid = ddp->dd_zid;
1438 	dlmgmt_door_setzoneid_t	setzid;
1439 	dlmgmt_setzoneid_retval_t retval;
1440 
1441 	if (old_zoneid == new_zoneid)
1442 		return (0);
1443 
1444 	if ((err = mac_perim_enter_by_macname(ddp->dd_mac, &mph)) != 0)
1445 		return (err);
1446 
1447 	/*
1448 	 * When changing the zoneid of an existing link, we need to tell
1449 	 * dlmgmtd about it.  dlmgmtd already knows the zoneid associated with
1450 	 * newly created links.
1451 	 */
1452 	if (setprop) {
1453 		setzid.ld_cmd = DLMGMT_CMD_SETZONEID;
1454 		setzid.ld_linkid = linkid;
1455 		setzid.ld_zoneid = new_zoneid;
1456 		err = i_dls_mgmt_upcall(&setzid, sizeof (setzid), &retval,
1457 		    sizeof (retval));
1458 		if (err != 0)
1459 			goto done;
1460 		upcall_done = B_TRUE;
1461 	}
1462 	if ((err = dls_link_setzid(ddp->dd_mac, new_zoneid)) == 0) {
1463 		ddp->dd_zid = new_zoneid;
1464 		devnet_need_rebuild = B_TRUE;
1465 	}
1466 
1467 done:
1468 	if (err != 0 && upcall_done) {
1469 		setzid.ld_zoneid = old_zoneid;
1470 		(void) i_dls_mgmt_upcall(&setzid, sizeof (setzid), &retval,
1471 		    sizeof (retval));
1472 	}
1473 	mac_perim_exit(mph);
1474 	return (err);
1475 }
1476 
1477 int
1478 dls_devnet_setzid(dls_dl_handle_t ddh, zoneid_t new_zid)
1479 {
1480 	dls_devnet_t	*ddp;
1481 	int		err;
1482 	zoneid_t	old_zid;
1483 	boolean_t	refheld = B_FALSE;
1484 
1485 	old_zid = ddh->dd_zid;
1486 
1487 	if (old_zid == new_zid)
1488 		return (0);
1489 
1490 	/*
1491 	 * Acquire an additional reference to the link if it is being assigned
1492 	 * to a non-global zone from the global zone.
1493 	 */
1494 	if (old_zid == GLOBAL_ZONEID && new_zid != GLOBAL_ZONEID) {
1495 		if ((err = dls_devnet_hold(ddh->dd_linkid, &ddp)) != 0)
1496 			return (err);
1497 		refheld = B_TRUE;
1498 	}
1499 
1500 	if ((err = i_dls_devnet_setzid(ddh, new_zid, B_TRUE)) != 0) {
1501 		if (refheld)
1502 			dls_devnet_rele(ddp);
1503 		return (err);
1504 	}
1505 
1506 	/*
1507 	 * Release the additional reference if the link is returning to the
1508 	 * global zone from a non-global zone.
1509 	 */
1510 	if (old_zid != GLOBAL_ZONEID && new_zid == GLOBAL_ZONEID)
1511 		dls_devnet_rele(ddh);
1512 
1513 	/* Re-create kstats in the appropriate zones. */
1514 	if (old_zid != GLOBAL_ZONEID)
1515 		dls_devnet_stat_destroy(ddh, old_zid);
1516 	if (new_zid != GLOBAL_ZONEID)
1517 		dls_devnet_stat_create(ddh, new_zid);
1518 
1519 	return (0);
1520 }
1521 
1522 zoneid_t
1523 dls_devnet_getzid(dls_dl_handle_t ddh)
1524 {
1525 	return (((dls_devnet_t *)ddh)->dd_zid);
1526 }
1527 
1528 zoneid_t
1529 dls_devnet_getownerzid(dls_dl_handle_t ddh)
1530 {
1531 	return (((dls_devnet_t *)ddh)->dd_owner_zid);
1532 }
1533 
1534 /*
1535  * Is linkid visible from zoneid?  A link is visible if it was created in the
1536  * zone, or if it is currently assigned to the zone.
1537  */
1538 boolean_t
1539 dls_devnet_islinkvisible(datalink_id_t linkid, zoneid_t zoneid)
1540 {
1541 	dls_devnet_t	*ddp;
1542 	boolean_t	result;
1543 
1544 	if (dls_devnet_hold_tmp(linkid, &ddp) != 0)
1545 		return (B_FALSE);
1546 	result = (ddp->dd_owner_zid == zoneid || ddp->dd_zid == zoneid);
1547 	dls_devnet_rele_tmp(ddp);
1548 	return (result);
1549 }
1550 
1551 /*
1552  * Access a vanity naming node.
1553  */
1554 int
1555 dls_devnet_open(const char *link, dls_dl_handle_t *dhp, dev_t *devp)
1556 {
1557 	dls_devnet_t	*ddp;
1558 	dls_link_t	*dlp;
1559 	zoneid_t	zid = getzoneid();
1560 	int		err;
1561 	mac_perim_handle_t	mph;
1562 
1563 	if ((err = dls_devnet_hold_by_name(link, &ddp)) != 0)
1564 		return (err);
1565 
1566 	dls_devnet_prop_task_wait(ddp);
1567 
1568 	/*
1569 	 * Opening a link that does not belong to the current non-global zone
1570 	 * is not allowed.
1571 	 */
1572 	if (zid != GLOBAL_ZONEID && ddp->dd_zid != zid) {
1573 		dls_devnet_rele(ddp);
1574 		return (ENOENT);
1575 	}
1576 
1577 	err = mac_perim_enter_by_macname(ddp->dd_mac, &mph);
1578 	if (err != 0) {
1579 		dls_devnet_rele(ddp);
1580 		return (err);
1581 	}
1582 
1583 	err = dls_link_hold_create(ddp->dd_mac, &dlp);
1584 	mac_perim_exit(mph);
1585 
1586 	if (err != 0) {
1587 		dls_devnet_rele(ddp);
1588 		return (err);
1589 	}
1590 
1591 	*dhp = ddp;
1592 	*devp = dls_link_dev(dlp);
1593 	return (0);
1594 }
1595 
1596 /*
1597  * Close access to a vanity naming node.
1598  */
1599 void
1600 dls_devnet_close(dls_dl_handle_t dlh)
1601 {
1602 	dls_devnet_t	*ddp = dlh;
1603 	dls_link_t	*dlp;
1604 	mac_perim_handle_t	mph;
1605 
1606 	VERIFY(mac_perim_enter_by_macname(ddp->dd_mac, &mph) == 0);
1607 	VERIFY(dls_link_hold(ddp->dd_mac, &dlp) == 0);
1608 
1609 	/*
1610 	 * One rele for the hold placed in dls_devnet_open, another for
1611 	 * the hold done just above
1612 	 */
1613 	dls_link_rele(dlp);
1614 	dls_link_rele(dlp);
1615 	mac_perim_exit(mph);
1616 
1617 	dls_devnet_rele(ddp);
1618 }
1619 
1620 /*
1621  * This is used by /dev/net to rebuild the nodes for readdir().  It is not
1622  * critical and no protection is needed.
1623  */
1624 boolean_t
1625 dls_devnet_rebuild()
1626 {
1627 	boolean_t updated = devnet_need_rebuild;
1628 
1629 	devnet_need_rebuild = B_FALSE;
1630 	return (updated);
1631 }
1632 
1633 int
1634 dls_devnet_create(mac_handle_t mh, datalink_id_t linkid, zoneid_t zoneid)
1635 {
1636 	dls_link_t	*dlp;
1637 	dls_devnet_t	*ddp;
1638 	int		err;
1639 	mac_perim_handle_t mph;
1640 
1641 	/*
1642 	 * Holding the mac perimeter ensures that the downcall from the
1643 	 * dlmgmt daemon which does the property loading does not proceed
1644 	 * until we relinquish the perimeter.
1645 	 */
1646 	mac_perim_enter_by_mh(mh, &mph);
1647 	/*
1648 	 * Make this association before we call dls_link_hold_create as
1649 	 * we need to use the linkid to get the user name for the link
1650 	 * when we create the MAC client.
1651 	 */
1652 	if ((err = dls_devnet_set(mac_name(mh), linkid, zoneid, &ddp)) == 0) {
1653 		if ((err = dls_link_hold_create(mac_name(mh), &dlp)) != 0) {
1654 			mac_perim_exit(mph);
1655 			(void) dls_devnet_unset(mac_name(mh), &linkid, B_TRUE);
1656 			return (err);
1657 		}
1658 	}
1659 	mac_perim_exit(mph);
1660 	return (err);
1661 }
1662 
1663 /*
1664  * Set the linkid of the dls_devnet_t and add it into the i_dls_devnet_id_hash.
1665  * This is called in the case that the dlmgmtd daemon is started later than
1666  * the physical devices get attached, and the linkid is only known after the
1667  * daemon starts.
1668  */
1669 int
1670 dls_devnet_recreate(mac_handle_t mh, datalink_id_t linkid)
1671 {
1672 	ASSERT(linkid != DATALINK_INVALID_LINKID);
1673 	return (dls_devnet_set(mac_name(mh), linkid, GLOBAL_ZONEID, NULL));
1674 }
1675 
1676 int
1677 dls_devnet_destroy(mac_handle_t mh, datalink_id_t *idp, boolean_t wait)
1678 {
1679 	int			err;
1680 	mac_perim_handle_t	mph;
1681 
1682 	*idp = DATALINK_INVALID_LINKID;
1683 	err = dls_devnet_unset(mac_name(mh), idp, wait);
1684 	if (err != 0 && err != ENOENT)
1685 		return (err);
1686 
1687 	mac_perim_enter_by_mh(mh, &mph);
1688 	err = dls_link_rele_by_name(mac_name(mh));
1689 	mac_perim_exit(mph);
1690 
1691 	if (err != 0) {
1692 		/*
1693 		 * XXX It is a general GLDv3 bug that dls_devnet_set() has to
1694 		 * be called to re-set the link when destroy fails.  The
1695 		 * zoneid below will be incorrect if this function is ever
1696 		 * called from kernel context or from a zone other than that
1697 		 * which initially created the link.
1698 		 */
1699 		(void) dls_devnet_set(mac_name(mh), *idp, crgetzoneid(CRED()),
1700 		    NULL);
1701 	}
1702 	return (err);
1703 }
1704 
1705 /*
1706  * Implicitly create an IP tunnel link.
1707  */
1708 static int
1709 i_dls_devnet_create_iptun(const char *name, datalink_id_t *linkid)
1710 {
1711 	int		err;
1712 	iptun_kparams_t	ik;
1713 	uint32_t	media;
1714 	netstack_t	*ns;
1715 	major_t		iptun_major;
1716 	dev_info_t	*iptun_dip;
1717 
1718 	/* First ensure that the iptun device is attached. */
1719 	if ((iptun_major = ddi_name_to_major(IPTUN_DRIVER_NAME)) == (major_t)-1)
1720 		return (EINVAL);
1721 	if ((iptun_dip = ddi_hold_devi_by_instance(iptun_major, 0, 0)) == NULL)
1722 		return (EINVAL);
1723 
1724 	if (IS_IPV4_TUN(name)) {
1725 		ik.iptun_kparam_type = IPTUN_TYPE_IPV4;
1726 		media = DL_IPV4;
1727 	} else if (IS_6TO4_TUN(name)) {
1728 		ik.iptun_kparam_type = IPTUN_TYPE_6TO4;
1729 		media = DL_6TO4;
1730 	} else if (IS_IPV6_TUN(name)) {
1731 		ik.iptun_kparam_type = IPTUN_TYPE_IPV6;
1732 		media = DL_IPV6;
1733 	}
1734 	ik.iptun_kparam_flags = (IPTUN_KPARAM_TYPE | IPTUN_KPARAM_IMPLICIT);
1735 
1736 	/* Obtain a datalink id for this tunnel. */
1737 	err = dls_mgmt_create((char *)name, 0, DATALINK_CLASS_IPTUN, media,
1738 	    B_FALSE, &ik.iptun_kparam_linkid);
1739 	if (err != 0) {
1740 		ddi_release_devi(iptun_dip);
1741 		return (err);
1742 	}
1743 
1744 	ns = netstack_get_current();
1745 	err = iptun_create(&ik, CRED());
1746 	netstack_rele(ns);
1747 
1748 	if (err != 0)
1749 		VERIFY(dls_mgmt_destroy(ik.iptun_kparam_linkid, B_FALSE) == 0);
1750 	else
1751 		*linkid = ik.iptun_kparam_linkid;
1752 
1753 	ddi_release_devi(iptun_dip);
1754 	return (err);
1755 }
1756 
1757 static int
1758 i_dls_devnet_destroy_iptun(datalink_id_t linkid)
1759 {
1760 	int err;
1761 
1762 	/*
1763 	 * Note the use of zone_kcred() here as opposed to CRED().  This is
1764 	 * because the process that does the last close of this /dev/net node
1765 	 * may not have necessary privileges to delete this IP tunnel, but the
1766 	 * tunnel must always be implicitly deleted on last close.
1767 	 */
1768 	if ((err = iptun_delete(linkid, zone_kcred())) == 0)
1769 		(void) dls_mgmt_destroy(linkid, B_FALSE);
1770 	return (err);
1771 }
1772 
1773 const char *
1774 dls_devnet_mac(dls_dl_handle_t ddh)
1775 {
1776 	return (ddh->dd_mac);
1777 }
1778 
1779 datalink_id_t
1780 dls_devnet_linkid(dls_dl_handle_t ddh)
1781 {
1782 	return (ddh->dd_linkid);
1783 }
1784