xref: /titanic_52/usr/src/uts/common/io/dls/dls_mgmt.c (revision f936286c99fb83153e4bfd870eb2830a990a82c1)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Datalink management routines.
28  */
29 
30 #include <sys/types.h>
31 #include <sys/door.h>
32 #include <sys/zone.h>
33 #include <sys/modctl.h>
34 #include <sys/file.h>
35 #include <sys/modhash.h>
36 #include <sys/kstat.h>
37 #include <sys/vnode.h>
38 #include <sys/cmn_err.h>
39 #include <sys/softmac.h>
40 #include <sys/dls.h>
41 #include <sys/dls_impl.h>
42 #include <sys/stropts.h>
43 #include <sys/netstack.h>
44 #include <inet/iptun/iptun_impl.h>
45 
46 /*
47  * This vanity name management module is treated as part of the GLD framework
48  * and we don't hold any GLD framework lock across a call to any mac
49  * function that needs to acquire the mac perimeter. The hierarchy is
50  * mac perimeter -> framework locks
51  */
52 
53 typedef struct dls_stack {
54 	zoneid_t	dlss_zoneid;
55 } dls_stack_t;
56 
57 static kmem_cache_t	*i_dls_devnet_cachep;
58 static kmutex_t		i_dls_mgmt_lock;
59 static krwlock_t	i_dls_devnet_lock;
60 static mod_hash_t	*i_dls_devnet_id_hash;
61 static mod_hash_t	*i_dls_devnet_hash;
62 
63 boolean_t		devnet_need_rebuild;
64 
65 #define	VLAN_HASHSZ	67	/* prime */
66 
67 /*
68  * The following macros take a link name without the trailing PPA as input.
69  * Opening a /dev/net node with one of these names causes a tunnel link to be
70  * implicitly created in dls_devnet_hold_by_name() for backward compatibility
71  * with Solaris 10 and prior.
72  */
73 #define	IS_IPV4_TUN(name)	(strcmp((name), "ip.tun") == 0)
74 #define	IS_IPV6_TUN(name)	(strcmp((name), "ip6.tun") == 0)
75 #define	IS_6TO4_TUN(name)	(strcmp((name), "ip.6to4tun") == 0)
76 #define	IS_IPTUN_LINK(name)	(					\
77     IS_IPV4_TUN(name) || IS_IPV6_TUN(name) || IS_6TO4_TUN(name))
78 
79 /* Upcall door handle */
80 static door_handle_t	dls_mgmt_dh = NULL;
81 
82 #define	DD_CONDEMNED		0x1
83 #define	DD_KSTAT_CHANGING	0x2
84 #define	DD_IMPLICIT_IPTUN	0x4 /* Implicitly-created ip*.*tun* tunnel */
85 
86 /*
87  * This structure is used to keep the <linkid, macname> mapping.
88  * This structure itself is not protected by the mac perimeter, but is
89  * protected by the dd_mutex and i_dls_devnet_lock. Thus most of the
90  * functions manipulating this structure such as dls_devnet_set/unset etc.
91  * may be called while not holding the mac perimeter.
92  */
93 typedef struct dls_devnet_s {
94 	datalink_id_t	dd_linkid;
95 	char		dd_linkname[MAXLINKNAMELEN];
96 	char		dd_mac[MAXNAMELEN];
97 	kstat_t		*dd_ksp;	/* kstat in owner_zid */
98 	kstat_t		*dd_zone_ksp;	/* in dd_zid if != owner_zid */
99 	uint32_t	dd_ref;
100 	kmutex_t	dd_mutex;
101 	kcondvar_t	dd_cv;
102 	uint32_t	dd_tref;
103 	uint_t		dd_flags;
104 	zoneid_t	dd_owner_zid;	/* zone where node was created */
105 	zoneid_t	dd_zid;		/* current zone */
106 	boolean_t	dd_prop_loaded;
107 	taskqid_t	dd_prop_taskid;
108 } dls_devnet_t;
109 
110 static int i_dls_devnet_create_iptun(const char *, const char *,
111     datalink_id_t *);
112 static int i_dls_devnet_destroy_iptun(datalink_id_t);
113 static int i_dls_devnet_setzid(dls_devnet_t *, zoneid_t, boolean_t);
114 static int dls_devnet_unset(const char *, datalink_id_t *, boolean_t);
115 
116 /*ARGSUSED*/
117 static int
118 i_dls_devnet_constructor(void *buf, void *arg, int kmflag)
119 {
120 	dls_devnet_t	*ddp = buf;
121 
122 	bzero(buf, sizeof (dls_devnet_t));
123 	mutex_init(&ddp->dd_mutex, NULL, MUTEX_DEFAULT, NULL);
124 	cv_init(&ddp->dd_cv, NULL, CV_DEFAULT, NULL);
125 	return (0);
126 }
127 
128 /*ARGSUSED*/
129 static void
130 i_dls_devnet_destructor(void *buf, void *arg)
131 {
132 	dls_devnet_t	*ddp = buf;
133 
134 	ASSERT(ddp->dd_ksp == NULL);
135 	ASSERT(ddp->dd_ref == 0);
136 	ASSERT(ddp->dd_tref == 0);
137 	mutex_destroy(&ddp->dd_mutex);
138 	cv_destroy(&ddp->dd_cv);
139 }
140 
141 /* ARGSUSED */
142 static int
143 dls_zone_remove(datalink_id_t linkid, void *arg)
144 {
145 	dls_devnet_t *ddp;
146 
147 	if (dls_devnet_hold_tmp(linkid, &ddp) == 0) {
148 		(void) dls_devnet_setzid(ddp, GLOBAL_ZONEID);
149 		dls_devnet_rele_tmp(ddp);
150 	}
151 	return (0);
152 }
153 
154 /* ARGSUSED */
155 static void *
156 dls_stack_init(netstackid_t stackid, netstack_t *ns)
157 {
158 	dls_stack_t *dlss;
159 
160 	dlss = kmem_zalloc(sizeof (*dlss), KM_SLEEP);
161 	dlss->dlss_zoneid = netstackid_to_zoneid(stackid);
162 	return (dlss);
163 }
164 
165 /* ARGSUSED */
166 static void
167 dls_stack_shutdown(netstackid_t stackid, void *arg)
168 {
169 	dls_stack_t	*dlss = (dls_stack_t *)arg;
170 
171 	/* Move remaining datalinks in this zone back to the global zone. */
172 	(void) zone_datalink_walk(dlss->dlss_zoneid, dls_zone_remove, NULL);
173 }
174 
175 /* ARGSUSED */
176 static void
177 dls_stack_fini(netstackid_t stackid, void *arg)
178 {
179 	dls_stack_t	*dlss = (dls_stack_t *)arg;
180 
181 	kmem_free(dlss, sizeof (*dlss));
182 }
183 
184 /*
185  * Module initialization and finalization functions.
186  */
187 void
188 dls_mgmt_init(void)
189 {
190 	mutex_init(&i_dls_mgmt_lock, NULL, MUTEX_DEFAULT, NULL);
191 	rw_init(&i_dls_devnet_lock, NULL, RW_DEFAULT, NULL);
192 
193 	/*
194 	 * Create a kmem_cache of dls_devnet_t structures.
195 	 */
196 	i_dls_devnet_cachep = kmem_cache_create("dls_devnet_cache",
197 	    sizeof (dls_devnet_t), 0, i_dls_devnet_constructor,
198 	    i_dls_devnet_destructor, NULL, NULL, NULL, 0);
199 	ASSERT(i_dls_devnet_cachep != NULL);
200 
201 	/*
202 	 * Create a hash table, keyed by dd_linkid, of dls_devnet_t.
203 	 */
204 	i_dls_devnet_id_hash = mod_hash_create_idhash("dls_devnet_id_hash",
205 	    VLAN_HASHSZ, mod_hash_null_valdtor);
206 
207 	/*
208 	 * Create a hash table, keyed by dd_mac
209 	 */
210 	i_dls_devnet_hash = mod_hash_create_extended("dls_devnet_hash",
211 	    VLAN_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor,
212 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
213 
214 	devnet_need_rebuild = B_FALSE;
215 
216 	netstack_register(NS_DLS, dls_stack_init, dls_stack_shutdown,
217 	    dls_stack_fini);
218 }
219 
220 void
221 dls_mgmt_fini(void)
222 {
223 	netstack_unregister(NS_DLS);
224 	mod_hash_destroy_hash(i_dls_devnet_hash);
225 	mod_hash_destroy_hash(i_dls_devnet_id_hash);
226 	kmem_cache_destroy(i_dls_devnet_cachep);
227 	rw_destroy(&i_dls_devnet_lock);
228 	mutex_destroy(&i_dls_mgmt_lock);
229 }
230 
231 int
232 dls_mgmt_door_set(boolean_t start)
233 {
234 	int	err;
235 
236 	/* handle daemon restart */
237 	mutex_enter(&i_dls_mgmt_lock);
238 	if (dls_mgmt_dh != NULL) {
239 		door_ki_rele(dls_mgmt_dh);
240 		dls_mgmt_dh = NULL;
241 	}
242 
243 	if (start && ((err = door_ki_open(DLMGMT_DOOR, &dls_mgmt_dh)) != 0)) {
244 		mutex_exit(&i_dls_mgmt_lock);
245 		return (err);
246 	}
247 
248 	mutex_exit(&i_dls_mgmt_lock);
249 
250 	/*
251 	 * Create and associate <link name, linkid> mapping for network devices
252 	 * which are already attached before the daemon is started.
253 	 */
254 	if (start)
255 		softmac_recreate();
256 	return (0);
257 }
258 
259 static boolean_t
260 i_dls_mgmt_door_revoked(door_handle_t dh)
261 {
262 	struct door_info info;
263 	extern int sys_shutdown;
264 
265 	ASSERT(dh != NULL);
266 
267 	if (sys_shutdown) {
268 		cmn_err(CE_NOTE, "dls_mgmt_door: shutdown observed\n");
269 		return (B_TRUE);
270 	}
271 
272 	if (door_ki_info(dh, &info) != 0)
273 		return (B_TRUE);
274 
275 	return ((info.di_attributes & DOOR_REVOKED) != 0);
276 }
277 
278 /*
279  * Upcall to the datalink management daemon (dlmgmtd).
280  */
281 static int
282 i_dls_mgmt_upcall(void *arg, size_t asize, void *rbuf, size_t rsize)
283 {
284 	door_arg_t			darg, save_arg;
285 	door_handle_t			dh;
286 	int				err;
287 	int				retry = 0;
288 
289 #define	MAXRETRYNUM	3
290 
291 	ASSERT(arg);
292 	darg.data_ptr = arg;
293 	darg.data_size = asize;
294 	darg.desc_ptr = NULL;
295 	darg.desc_num = 0;
296 	darg.rbuf = rbuf;
297 	darg.rsize = rsize;
298 	save_arg = darg;
299 
300 retry:
301 	mutex_enter(&i_dls_mgmt_lock);
302 	dh = dls_mgmt_dh;
303 	if ((dh == NULL) || i_dls_mgmt_door_revoked(dh)) {
304 		mutex_exit(&i_dls_mgmt_lock);
305 		return (EBADF);
306 	}
307 	door_ki_hold(dh);
308 	mutex_exit(&i_dls_mgmt_lock);
309 
310 	for (;;) {
311 		retry++;
312 		if ((err = door_ki_upcall_limited(dh, &darg, zone_kcred(),
313 		    SIZE_MAX, 0)) == 0)
314 			break;
315 
316 		/*
317 		 * handle door call errors
318 		 */
319 		darg = save_arg;
320 		switch (err) {
321 		case EINTR:
322 			/*
323 			 * If the operation which caused this door upcall gets
324 			 * interrupted, return directly.
325 			 */
326 			goto done;
327 		case EAGAIN:
328 			/*
329 			 * Repeat upcall if the maximum attempt limit has not
330 			 * been reached.
331 			 */
332 			if (retry < MAXRETRYNUM) {
333 				delay(2 * hz);
334 				break;
335 			}
336 			cmn_err(CE_WARN, "dls: dlmgmtd fatal error %d\n", err);
337 			goto done;
338 		default:
339 			/* A fatal door error */
340 			if (i_dls_mgmt_door_revoked(dh)) {
341 				cmn_err(CE_NOTE,
342 				    "dls: dlmgmtd door service revoked\n");
343 
344 				if (retry < MAXRETRYNUM) {
345 					door_ki_rele(dh);
346 					goto retry;
347 				}
348 			}
349 			cmn_err(CE_WARN, "dls: dlmgmtd fatal error %d\n", err);
350 			goto done;
351 		}
352 	}
353 
354 	if (darg.rbuf != rbuf) {
355 		/*
356 		 * The size of the input rbuf was not big enough, so the
357 		 * upcall allocated the rbuf itself.  If this happens, assume
358 		 * that this was an invalid door call request.
359 		 */
360 		kmem_free(darg.rbuf, darg.rsize);
361 		err = ENOSPC;
362 		goto done;
363 	}
364 
365 	if (darg.rsize != rsize) {
366 		err = EINVAL;
367 		goto done;
368 	}
369 
370 	err = ((dlmgmt_retval_t *)rbuf)->lr_err;
371 
372 done:
373 	door_ki_rele(dh);
374 	return (err);
375 }
376 
377 /*
378  * Request the datalink management daemon to create a link with the attributes
379  * below.  Upon success, zero is returned and linkidp contains the linkid for
380  * the new link; otherwise, an errno is returned.
381  *
382  *     - dev		physical dev_t.  required for all physical links,
383  *		        including GLDv3 links.  It will be used to force the
384  *		        attachment of a physical device, hence the
385  *		        registration of its mac
386  *     - class		datalink class
387  *     - media type	media type; DL_OTHER means unknown
388  *     - persist	whether to persist the datalink
389  */
390 int
391 dls_mgmt_create(const char *devname, dev_t dev, datalink_class_t class,
392     uint32_t media, boolean_t persist, datalink_id_t *linkidp)
393 {
394 	dlmgmt_upcall_arg_create_t	create;
395 	dlmgmt_create_retval_t		retval;
396 	int				err;
397 
398 	create.ld_cmd = DLMGMT_CMD_DLS_CREATE;
399 	create.ld_class = class;
400 	create.ld_media = media;
401 	create.ld_phymaj = getmajor(dev);
402 	create.ld_phyinst = getminor(dev);
403 	create.ld_persist = persist;
404 	if (strlcpy(create.ld_devname, devname, sizeof (create.ld_devname)) >=
405 	    sizeof (create.ld_devname))
406 		return (EINVAL);
407 
408 	if ((err = i_dls_mgmt_upcall(&create, sizeof (create), &retval,
409 	    sizeof (retval))) == 0) {
410 		*linkidp = retval.lr_linkid;
411 	}
412 	return (err);
413 }
414 
415 /*
416  * Request the datalink management daemon to destroy the specified link.
417  * Returns zero upon success, or an errno upon failure.
418  */
419 int
420 dls_mgmt_destroy(datalink_id_t linkid, boolean_t persist)
421 {
422 	dlmgmt_upcall_arg_destroy_t	destroy;
423 	dlmgmt_destroy_retval_t		retval;
424 
425 	destroy.ld_cmd = DLMGMT_CMD_DLS_DESTROY;
426 	destroy.ld_linkid = linkid;
427 	destroy.ld_persist = persist;
428 
429 	return (i_dls_mgmt_upcall(&destroy, sizeof (destroy),
430 	    &retval, sizeof (retval)));
431 }
432 
433 /*
434  * Request the datalink management daemon to verify/update the information
435  * for a physical link.  Upon success, get its linkid.
436  *
437  *     - media type	media type
438  *     - novanity	whether this physical datalink supports vanity naming.
439  *			physical links that do not use the GLDv3 MAC plugin
440  *			cannot suport vanity naming
441  *
442  * This function could fail with ENOENT or EEXIST.  Two cases return EEXIST:
443  *
444  * 1. A link with devname already exists, but the media type does not match.
445  *    In this case, mediap will bee set to the media type of the existing link.
446  * 2. A link with devname already exists, but its link name does not match
447  *    the device name, although this link does not support vanity naming.
448  */
449 int
450 dls_mgmt_update(const char *devname, uint32_t media, boolean_t novanity,
451     uint32_t *mediap, datalink_id_t *linkidp)
452 {
453 	dlmgmt_upcall_arg_update_t	update;
454 	dlmgmt_update_retval_t		retval;
455 	int				err;
456 
457 	update.ld_cmd = DLMGMT_CMD_DLS_UPDATE;
458 
459 	if (strlcpy(update.ld_devname, devname, sizeof (update.ld_devname)) >=
460 	    sizeof (update.ld_devname))
461 		return (EINVAL);
462 
463 	update.ld_media = media;
464 	update.ld_novanity = novanity;
465 
466 	if ((err = i_dls_mgmt_upcall(&update, sizeof (update), &retval,
467 	    sizeof (retval))) == EEXIST) {
468 		*linkidp = retval.lr_linkid;
469 		*mediap = retval.lr_media;
470 	} else if (err == 0) {
471 		*linkidp = retval.lr_linkid;
472 	}
473 
474 	return (err);
475 }
476 
477 /*
478  * Request the datalink management daemon to get the information for a link.
479  * Returns zero upon success, or an errno upon failure.
480  *
481  * Only fills in information for argument pointers that are non-NULL.
482  * Note that the link argument is expected to be MAXLINKNAMELEN bytes.
483  */
484 int
485 dls_mgmt_get_linkinfo(datalink_id_t linkid, char *link,
486     datalink_class_t *classp, uint32_t *mediap, uint32_t *flagsp)
487 {
488 	dlmgmt_door_getname_t	getname;
489 	dlmgmt_getname_retval_t	retval;
490 	int			err, len;
491 
492 	getname.ld_cmd = DLMGMT_CMD_GETNAME;
493 	getname.ld_linkid = linkid;
494 
495 	if ((err = i_dls_mgmt_upcall(&getname, sizeof (getname), &retval,
496 	    sizeof (retval))) != 0) {
497 		return (err);
498 	}
499 
500 	len = strlen(retval.lr_link);
501 	if (len <= 1 || len >= MAXLINKNAMELEN)
502 		return (EINVAL);
503 
504 	if (link != NULL)
505 		(void) strlcpy(link, retval.lr_link, MAXLINKNAMELEN);
506 	if (classp != NULL)
507 		*classp = retval.lr_class;
508 	if (mediap != NULL)
509 		*mediap = retval.lr_media;
510 	if (flagsp != NULL)
511 		*flagsp = retval.lr_flags;
512 	return (0);
513 }
514 
515 /*
516  * Request the datalink management daemon to get the linkid for a link.
517  * Returns a non-zero error code on failure.  The linkid argument is only
518  * set on success (when zero is returned.)
519  */
520 int
521 dls_mgmt_get_linkid(const char *link, datalink_id_t *linkid)
522 {
523 	dlmgmt_door_getlinkid_t		getlinkid;
524 	dlmgmt_getlinkid_retval_t	retval;
525 	int				err;
526 
527 	getlinkid.ld_cmd = DLMGMT_CMD_GETLINKID;
528 	(void) strlcpy(getlinkid.ld_link, link, MAXLINKNAMELEN);
529 
530 	if ((err = i_dls_mgmt_upcall(&getlinkid, sizeof (getlinkid), &retval,
531 	    sizeof (retval))) == 0) {
532 		*linkid = retval.lr_linkid;
533 	}
534 	return (err);
535 }
536 
537 datalink_id_t
538 dls_mgmt_get_next(datalink_id_t linkid, datalink_class_t class,
539     datalink_media_t dmedia, uint32_t flags)
540 {
541 	dlmgmt_door_getnext_t	getnext;
542 	dlmgmt_getnext_retval_t	retval;
543 
544 	getnext.ld_cmd = DLMGMT_CMD_GETNEXT;
545 	getnext.ld_class = class;
546 	getnext.ld_dmedia = dmedia;
547 	getnext.ld_flags = flags;
548 	getnext.ld_linkid = linkid;
549 
550 	if (i_dls_mgmt_upcall(&getnext, sizeof (getnext), &retval,
551 	    sizeof (retval)) != 0) {
552 		return (DATALINK_INVALID_LINKID);
553 	}
554 
555 	return (retval.lr_linkid);
556 }
557 
558 static int
559 i_dls_mgmt_get_linkattr(const datalink_id_t linkid, const char *attr,
560     void *attrval, size_t *attrszp)
561 {
562 	dlmgmt_upcall_arg_getattr_t	getattr;
563 	dlmgmt_getattr_retval_t		retval;
564 	int				err;
565 
566 	getattr.ld_cmd = DLMGMT_CMD_DLS_GETATTR;
567 	getattr.ld_linkid = linkid;
568 	(void) strlcpy(getattr.ld_attr, attr, MAXLINKATTRLEN);
569 
570 	if ((err = i_dls_mgmt_upcall(&getattr, sizeof (getattr), &retval,
571 	    sizeof (retval))) == 0) {
572 		if (*attrszp < retval.lr_attrsz)
573 			return (EINVAL);
574 		*attrszp = retval.lr_attrsz;
575 		bcopy(retval.lr_attrval, attrval, retval.lr_attrsz);
576 	}
577 
578 	return (err);
579 }
580 
581 /*
582  * Note that this function can only get devp successfully for non-VLAN link.
583  */
584 int
585 dls_mgmt_get_phydev(datalink_id_t linkid, dev_t *devp)
586 {
587 	uint64_t	maj, inst;
588 	size_t		attrsz = sizeof (uint64_t);
589 
590 	if (i_dls_mgmt_get_linkattr(linkid, FPHYMAJ, &maj, &attrsz) != 0 ||
591 	    attrsz != sizeof (uint64_t) ||
592 	    i_dls_mgmt_get_linkattr(linkid, FPHYINST, &inst, &attrsz) != 0 ||
593 	    attrsz != sizeof (uint64_t)) {
594 		return (EINVAL);
595 	}
596 
597 	*devp = makedevice((major_t)maj, (minor_t)inst);
598 	return (0);
599 }
600 
601 /*
602  * Request the datalink management daemon to push in
603  * all properties associated with the link.
604  * Returns a non-zero error code on failure.
605  */
606 int
607 dls_mgmt_linkprop_init(datalink_id_t linkid)
608 {
609 	dlmgmt_door_linkprop_init_t	li;
610 	dlmgmt_linkprop_init_retval_t	retval;
611 	int				err;
612 
613 	li.ld_cmd = DLMGMT_CMD_LINKPROP_INIT;
614 	li.ld_linkid = linkid;
615 
616 	err = i_dls_mgmt_upcall(&li, sizeof (li), &retval, sizeof (retval));
617 	return (err);
618 }
619 
620 static void
621 dls_devnet_prop_task(void *arg)
622 {
623 	dls_devnet_t		*ddp = arg;
624 
625 	(void) dls_mgmt_linkprop_init(ddp->dd_linkid);
626 
627 	mutex_enter(&ddp->dd_mutex);
628 	ddp->dd_prop_loaded = B_TRUE;
629 	ddp->dd_prop_taskid = NULL;
630 	cv_broadcast(&ddp->dd_cv);
631 	mutex_exit(&ddp->dd_mutex);
632 }
633 
634 /*
635  * Ensure property loading task is completed.
636  */
637 void
638 dls_devnet_prop_task_wait(dls_dl_handle_t ddp)
639 {
640 	mutex_enter(&ddp->dd_mutex);
641 	while (ddp->dd_prop_taskid != NULL)
642 		cv_wait(&ddp->dd_cv, &ddp->dd_mutex);
643 	mutex_exit(&ddp->dd_mutex);
644 }
645 
646 void
647 dls_devnet_rele_tmp(dls_dl_handle_t dlh)
648 {
649 	dls_devnet_t		*ddp = dlh;
650 
651 	mutex_enter(&ddp->dd_mutex);
652 	ASSERT(ddp->dd_tref != 0);
653 	if (--ddp->dd_tref == 0)
654 		cv_signal(&ddp->dd_cv);
655 	mutex_exit(&ddp->dd_mutex);
656 }
657 
658 int
659 dls_devnet_hold_link(datalink_id_t linkid, dls_dl_handle_t *ddhp,
660     dls_link_t **dlpp)
661 {
662 	dls_dl_handle_t	dlh;
663 	dls_link_t	*dlp;
664 	int		err;
665 
666 	if ((err = dls_devnet_hold_tmp(linkid, &dlh)) != 0)
667 		return (err);
668 
669 	if ((err = dls_link_hold(dls_devnet_mac(dlh), &dlp)) != 0) {
670 		dls_devnet_rele_tmp(dlh);
671 		return (err);
672 	}
673 
674 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
675 
676 	*ddhp = dlh;
677 	*dlpp = dlp;
678 	return (0);
679 }
680 
681 void
682 dls_devnet_rele_link(dls_dl_handle_t dlh, dls_link_t *dlp)
683 {
684 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
685 
686 	dls_link_rele(dlp);
687 	dls_devnet_rele_tmp(dlh);
688 }
689 
690 /*
691  * "link" kstats related functions.
692  */
693 
694 /*
695  * Query the "link" kstats.
696  *
697  * We may be called from the kstat subsystem in an arbitrary context.
698  * If the caller is the stack, the context could be an upcall data
699  * thread. Hence we can't acquire the mac perimeter in this function
700  * for fear of deadlock.
701  */
702 static int
703 dls_devnet_stat_update(kstat_t *ksp, int rw)
704 {
705 	dls_devnet_t	*ddp = ksp->ks_private;
706 	dls_link_t	*dlp;
707 	int		err;
708 
709 	/*
710 	 * Check the link is being renamed or if the link is going away
711 	 * before incrementing dd_tref which in turn prevents the link
712 	 * from being renamed or deleted until we finish.
713 	 */
714 	mutex_enter(&ddp->dd_mutex);
715 	if (ddp->dd_flags & (DD_CONDEMNED | DD_KSTAT_CHANGING)) {
716 		mutex_exit(&ddp->dd_mutex);
717 		return (ENOENT);
718 	}
719 	ddp->dd_tref++;
720 	mutex_exit(&ddp->dd_mutex);
721 
722 	/*
723 	 * If a device detach happens at this time, it will block in
724 	 * dls_devnet_unset since the dd_tref has been bumped up above. So the
725 	 * access to 'dlp' is safe even though we don't hold the mac perimeter.
726 	 */
727 	if (mod_hash_find(i_dls_link_hash, (mod_hash_key_t)ddp->dd_mac,
728 	    (mod_hash_val_t *)&dlp) != 0) {
729 		dls_devnet_rele_tmp(ddp);
730 		return (ENOENT);
731 	}
732 
733 	err = dls_stat_update(ksp, dlp, rw);
734 
735 	dls_devnet_rele_tmp(ddp);
736 	return (err);
737 }
738 
739 /*
740  * Create the "link" kstats.
741  */
742 static void
743 dls_devnet_stat_create(dls_devnet_t *ddp, zoneid_t zoneid)
744 {
745 	kstat_t	*ksp;
746 
747 	if (dls_stat_create("link", 0, ddp->dd_linkname, zoneid,
748 	    dls_devnet_stat_update, ddp, &ksp) == 0) {
749 		ASSERT(ksp != NULL);
750 		if (zoneid == ddp->dd_owner_zid) {
751 			ASSERT(ddp->dd_ksp == NULL);
752 			ddp->dd_ksp = ksp;
753 		} else {
754 			ASSERT(ddp->dd_zone_ksp == NULL);
755 			ddp->dd_zone_ksp = ksp;
756 		}
757 	}
758 }
759 
760 /*
761  * Destroy the "link" kstats.
762  */
763 static void
764 dls_devnet_stat_destroy(dls_devnet_t *ddp, zoneid_t zoneid)
765 {
766 	if (zoneid == ddp->dd_owner_zid) {
767 		if (ddp->dd_ksp != NULL) {
768 			kstat_delete(ddp->dd_ksp);
769 			ddp->dd_ksp = NULL;
770 		}
771 	} else {
772 		if (ddp->dd_zone_ksp != NULL) {
773 			kstat_delete(ddp->dd_zone_ksp);
774 			ddp->dd_zone_ksp = NULL;
775 		}
776 	}
777 }
778 
779 /*
780  * The link has been renamed. Destroy the old non-legacy kstats ("link kstats")
781  * and create the new set using the new name.
782  */
783 static void
784 dls_devnet_stat_rename(dls_devnet_t *ddp)
785 {
786 	if (ddp->dd_ksp != NULL) {
787 		kstat_delete(ddp->dd_ksp);
788 		ddp->dd_ksp = NULL;
789 	}
790 	/* We can't rename a link while it's assigned to a non-global zone. */
791 	ASSERT(ddp->dd_zone_ksp == NULL);
792 	dls_devnet_stat_create(ddp, ddp->dd_owner_zid);
793 }
794 
795 /*
796  * Associate a linkid with a given link (identified by macname)
797  */
798 static int
799 dls_devnet_set(const char *macname, datalink_id_t linkid, zoneid_t zoneid,
800     dls_devnet_t **ddpp)
801 {
802 	dls_devnet_t		*ddp = NULL;
803 	datalink_class_t	class;
804 	int			err;
805 	boolean_t		stat_create = B_FALSE;
806 	char			linkname[MAXLINKNAMELEN];
807 
808 	rw_enter(&i_dls_devnet_lock, RW_WRITER);
809 
810 	/*
811 	 * Don't allow callers to set a link name with a linkid that already
812 	 * has a name association (that's what rename is for).
813 	 */
814 	if (linkid != DATALINK_INVALID_LINKID) {
815 		if (mod_hash_find(i_dls_devnet_id_hash,
816 		    (mod_hash_key_t)(uintptr_t)linkid,
817 		    (mod_hash_val_t *)&ddp) == 0) {
818 			err = EEXIST;
819 			goto done;
820 		}
821 		if ((err = dls_mgmt_get_linkinfo(linkid, linkname, &class,
822 		    NULL, NULL)) != 0)
823 			goto done;
824 	}
825 
826 	if ((err = mod_hash_find(i_dls_devnet_hash,
827 	    (mod_hash_key_t)macname, (mod_hash_val_t *)&ddp)) == 0) {
828 		if (ddp->dd_linkid != DATALINK_INVALID_LINKID) {
829 			err = EEXIST;
830 			goto done;
831 		}
832 
833 		/*
834 		 * This might be a physical link that has already
835 		 * been created, but which does not have a linkid
836 		 * because dlmgmtd was not running when it was created.
837 		 */
838 		if (linkid == DATALINK_INVALID_LINKID ||
839 		    class != DATALINK_CLASS_PHYS) {
840 			err = EINVAL;
841 			goto done;
842 		}
843 	} else {
844 		ddp = kmem_cache_alloc(i_dls_devnet_cachep, KM_SLEEP);
845 		ddp->dd_tref = 0;
846 		ddp->dd_ref++;
847 		ddp->dd_owner_zid = zoneid;
848 		(void) strlcpy(ddp->dd_mac, macname, sizeof (ddp->dd_mac));
849 		VERIFY(mod_hash_insert(i_dls_devnet_hash,
850 		    (mod_hash_key_t)ddp->dd_mac, (mod_hash_val_t)ddp) == 0);
851 	}
852 
853 	if (linkid != DATALINK_INVALID_LINKID) {
854 		ddp->dd_linkid = linkid;
855 		(void) strlcpy(ddp->dd_linkname, linkname,
856 		    sizeof (ddp->dd_linkname));
857 		VERIFY(mod_hash_insert(i_dls_devnet_id_hash,
858 		    (mod_hash_key_t)(uintptr_t)linkid,
859 		    (mod_hash_val_t)ddp) == 0);
860 		devnet_need_rebuild = B_TRUE;
861 		stat_create = B_TRUE;
862 		mutex_enter(&ddp->dd_mutex);
863 		if (!ddp->dd_prop_loaded && (ddp->dd_prop_taskid == NULL)) {
864 			ddp->dd_prop_taskid = taskq_dispatch(system_taskq,
865 			    dls_devnet_prop_task, ddp, TQ_SLEEP);
866 		}
867 		mutex_exit(&ddp->dd_mutex);
868 	}
869 	err = 0;
870 done:
871 	/*
872 	 * It is safe to drop the i_dls_devnet_lock at this point. In the case
873 	 * of physical devices, the softmac framework will fail the device
874 	 * detach based on the smac_state or smac_hold_cnt. Other cases like
875 	 * vnic and aggr use their own scheme to serialize creates and deletes
876 	 * and ensure that *ddp is valid.
877 	 */
878 	rw_exit(&i_dls_devnet_lock);
879 	if (err == 0) {
880 		if (zoneid != GLOBAL_ZONEID &&
881 		    (err = i_dls_devnet_setzid(ddp, zoneid, B_FALSE)) != 0)
882 			(void) dls_devnet_unset(macname, &linkid, B_TRUE);
883 		/*
884 		 * The kstat subsystem holds its own locks (rather perimeter)
885 		 * before calling the ks_update (dls_devnet_stat_update) entry
886 		 * point which in turn grabs the i_dls_devnet_lock. So the
887 		 * lock hierarchy is kstat locks -> i_dls_devnet_lock.
888 		 */
889 		if (stat_create)
890 			dls_devnet_stat_create(ddp, zoneid);
891 		if (ddpp != NULL)
892 			*ddpp = ddp;
893 	}
894 	return (err);
895 }
896 
897 /*
898  * Disassociate a linkid with a given link (identified by macname)
899  * This waits until temporary references to the dls_devnet_t are gone.
900  */
901 static int
902 dls_devnet_unset(const char *macname, datalink_id_t *id, boolean_t wait)
903 {
904 	dls_devnet_t	*ddp;
905 	int		err;
906 	mod_hash_val_t	val;
907 
908 	rw_enter(&i_dls_devnet_lock, RW_WRITER);
909 	if ((err = mod_hash_find(i_dls_devnet_hash,
910 	    (mod_hash_key_t)macname, (mod_hash_val_t *)&ddp)) != 0) {
911 		ASSERT(err == MH_ERR_NOTFOUND);
912 		rw_exit(&i_dls_devnet_lock);
913 		return (ENOENT);
914 	}
915 
916 	mutex_enter(&ddp->dd_mutex);
917 
918 	/*
919 	 * Make sure downcalls into softmac_create or softmac_destroy from
920 	 * devfs don't cv_wait on any devfs related condition for fear of
921 	 * deadlock. Return EBUSY if the asynchronous thread started for
922 	 * property loading as part of the post attach hasn't yet completed.
923 	 */
924 	ASSERT(ddp->dd_ref != 0);
925 	if ((ddp->dd_ref != 1) || (!wait &&
926 	    (ddp->dd_tref != 0 || ddp->dd_prop_taskid != NULL))) {
927 		mutex_exit(&ddp->dd_mutex);
928 		rw_exit(&i_dls_devnet_lock);
929 		return (EBUSY);
930 	}
931 
932 	ddp->dd_flags |= DD_CONDEMNED;
933 	ddp->dd_ref--;
934 	*id = ddp->dd_linkid;
935 
936 	if (ddp->dd_zid != GLOBAL_ZONEID)
937 		(void) i_dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE);
938 
939 	/*
940 	 * Remove this dls_devnet_t from the hash table.
941 	 */
942 	VERIFY(mod_hash_remove(i_dls_devnet_hash,
943 	    (mod_hash_key_t)ddp->dd_mac, &val) == 0);
944 
945 	if (ddp->dd_linkid != DATALINK_INVALID_LINKID) {
946 		VERIFY(mod_hash_remove(i_dls_devnet_id_hash,
947 		    (mod_hash_key_t)(uintptr_t)ddp->dd_linkid, &val) == 0);
948 
949 		devnet_need_rebuild = B_TRUE;
950 	}
951 	rw_exit(&i_dls_devnet_lock);
952 
953 	if (wait) {
954 		/*
955 		 * Wait until all temporary references are released.
956 		 */
957 		while ((ddp->dd_tref != 0) || (ddp->dd_prop_taskid != NULL))
958 			cv_wait(&ddp->dd_cv, &ddp->dd_mutex);
959 	} else {
960 		ASSERT(ddp->dd_tref == 0 && ddp->dd_prop_taskid == NULL);
961 	}
962 
963 	if (ddp->dd_linkid != DATALINK_INVALID_LINKID)
964 		dls_devnet_stat_destroy(ddp, ddp->dd_owner_zid);
965 
966 	ddp->dd_prop_loaded = B_FALSE;
967 	ddp->dd_linkid = DATALINK_INVALID_LINKID;
968 	ddp->dd_flags = 0;
969 	mutex_exit(&ddp->dd_mutex);
970 	kmem_cache_free(i_dls_devnet_cachep, ddp);
971 
972 	return (0);
973 }
974 
975 static int
976 dls_devnet_hold_common(datalink_id_t linkid, dls_devnet_t **ddpp,
977     boolean_t tmp_hold)
978 {
979 	dls_devnet_t		*ddp;
980 	dev_t			phydev = 0;
981 	dls_dev_handle_t	ddh = NULL;
982 	int			err;
983 
984 	/*
985 	 * Hold this link to prevent it being detached in case of a
986 	 * physical link.
987 	 */
988 	if (dls_mgmt_get_phydev(linkid, &phydev) == 0)
989 		(void) softmac_hold_device(phydev, &ddh);
990 
991 	rw_enter(&i_dls_devnet_lock, RW_WRITER);
992 	if ((err = mod_hash_find(i_dls_devnet_id_hash,
993 	    (mod_hash_key_t)(uintptr_t)linkid, (mod_hash_val_t *)&ddp)) != 0) {
994 		ASSERT(err == MH_ERR_NOTFOUND);
995 		rw_exit(&i_dls_devnet_lock);
996 		softmac_rele_device(ddh);
997 		return (ENOENT);
998 	}
999 
1000 	mutex_enter(&ddp->dd_mutex);
1001 	ASSERT(ddp->dd_ref > 0);
1002 	if (ddp->dd_flags & DD_CONDEMNED) {
1003 		mutex_exit(&ddp->dd_mutex);
1004 		rw_exit(&i_dls_devnet_lock);
1005 		softmac_rele_device(ddh);
1006 		return (ENOENT);
1007 	}
1008 	if (tmp_hold)
1009 		ddp->dd_tref++;
1010 	else
1011 		ddp->dd_ref++;
1012 	mutex_exit(&ddp->dd_mutex);
1013 	rw_exit(&i_dls_devnet_lock);
1014 
1015 	softmac_rele_device(ddh);
1016 
1017 	*ddpp = ddp;
1018 	return (0);
1019 }
1020 
1021 int
1022 dls_devnet_hold(datalink_id_t linkid, dls_devnet_t **ddpp)
1023 {
1024 	return (dls_devnet_hold_common(linkid, ddpp, B_FALSE));
1025 }
1026 
1027 /*
1028  * Hold the vanity naming structure (dls_devnet_t) temporarily.  The request to
1029  * delete the dls_devnet_t will wait until the temporary reference is released.
1030  */
1031 int
1032 dls_devnet_hold_tmp(datalink_id_t linkid, dls_devnet_t **ddpp)
1033 {
1034 	return (dls_devnet_hold_common(linkid, ddpp, B_TRUE));
1035 }
1036 
1037 /*
1038  * This funtion is called when a DLS client tries to open a device node.
1039  * This dev_t could a result of a /dev/net node access (returned by
1040  * devnet_create_rvp->dls_devnet_open()) or a direct /dev node access.
1041  * In both cases, this function bumps up the reference count of the
1042  * dls_devnet_t structure. The reference is held as long as the device node
1043  * is open. In the case of /dev/net while it is true that the initial reference
1044  * is held when the devnet_create_rvp->dls_devnet_open call happens, this
1045  * initial reference is released immediately in devnet_inactive_callback ->
1046  * dls_devnet_close(). (Note that devnet_inactive_callback() is called right
1047  * after dld_open completes, not when the /dev/net node is being closed).
1048  * To undo this function, call dls_devnet_rele()
1049  */
1050 int
1051 dls_devnet_hold_by_dev(dev_t dev, dls_dl_handle_t *ddhp)
1052 {
1053 	char			name[MAXNAMELEN];
1054 	char			*drv;
1055 	dls_dev_handle_t	ddh = NULL;
1056 	dls_devnet_t		*ddp;
1057 	int			err;
1058 
1059 	if ((drv = ddi_major_to_name(getmajor(dev))) == NULL)
1060 		return (EINVAL);
1061 
1062 	(void) snprintf(name, sizeof (name), "%s%d", drv,
1063 	    DLS_MINOR2INST(getminor(dev)));
1064 
1065 	/*
1066 	 * Hold this link to prevent it being detached in case of a
1067 	 * GLDv3 physical link.
1068 	 */
1069 	if (DLS_MINOR2INST(getminor(dev)) <= DLS_MAX_PPA)
1070 		(void) softmac_hold_device(dev, &ddh);
1071 
1072 	rw_enter(&i_dls_devnet_lock, RW_WRITER);
1073 	if ((err = mod_hash_find(i_dls_devnet_hash,
1074 	    (mod_hash_key_t)name, (mod_hash_val_t *)&ddp)) != 0) {
1075 		ASSERT(err == MH_ERR_NOTFOUND);
1076 		rw_exit(&i_dls_devnet_lock);
1077 		softmac_rele_device(ddh);
1078 		return (ENOENT);
1079 	}
1080 	mutex_enter(&ddp->dd_mutex);
1081 	ASSERT(ddp->dd_ref > 0);
1082 	if (ddp->dd_flags & DD_CONDEMNED) {
1083 		mutex_exit(&ddp->dd_mutex);
1084 		rw_exit(&i_dls_devnet_lock);
1085 		softmac_rele_device(ddh);
1086 		return (ENOENT);
1087 	}
1088 	ddp->dd_ref++;
1089 	mutex_exit(&ddp->dd_mutex);
1090 	rw_exit(&i_dls_devnet_lock);
1091 
1092 	softmac_rele_device(ddh);
1093 
1094 	*ddhp = ddp;
1095 	return (0);
1096 }
1097 
1098 void
1099 dls_devnet_rele(dls_devnet_t *ddp)
1100 {
1101 	mutex_enter(&ddp->dd_mutex);
1102 	ASSERT(ddp->dd_ref > 1);
1103 	ddp->dd_ref--;
1104 	if ((ddp->dd_flags & DD_IMPLICIT_IPTUN) && ddp->dd_ref == 1) {
1105 		mutex_exit(&ddp->dd_mutex);
1106 		if (i_dls_devnet_destroy_iptun(ddp->dd_linkid) != 0)
1107 			ddp->dd_flags |= DD_IMPLICIT_IPTUN;
1108 		return;
1109 	}
1110 	mutex_exit(&ddp->dd_mutex);
1111 }
1112 
1113 static int
1114 dls_devnet_hold_by_name(const char *link, dls_devnet_t **ddpp)
1115 {
1116 	char			drv[MAXLINKNAMELEN];
1117 	uint_t			ppa;
1118 	major_t			major;
1119 	dev_t			phy_dev, tmp_dev;
1120 	datalink_id_t		linkid;
1121 	dls_dev_handle_t	ddh;
1122 	int			err;
1123 
1124 	if ((err = dls_mgmt_get_linkid(link, &linkid)) == 0)
1125 		return (dls_devnet_hold(linkid, ddpp));
1126 
1127 	/*
1128 	 * If we failed to get the link's linkid because the dlmgmtd daemon
1129 	 * has not been started, return ENOENT so that the application can
1130 	 * fallback to open the /dev node.
1131 	 */
1132 	if (err == EBADF)
1133 		return (ENOENT);
1134 
1135 	if (err != ENOENT)
1136 		return (err);
1137 
1138 	if (ddi_parse(link, drv, &ppa) != DDI_SUCCESS)
1139 		return (ENOENT);
1140 
1141 	if (IS_IPTUN_LINK(drv)) {
1142 		if ((err = i_dls_devnet_create_iptun(link, drv, &linkid)) != 0)
1143 			return (err);
1144 		/*
1145 		 * At this point, an IP tunnel MAC has registered, which
1146 		 * resulted in a link being created.
1147 		 */
1148 		err = dls_devnet_hold(linkid, ddpp);
1149 		ASSERT(err == 0);
1150 		if (err != 0) {
1151 			VERIFY(i_dls_devnet_destroy_iptun(linkid) == 0);
1152 			return (err);
1153 		}
1154 		/*
1155 		 * dls_devnet_rele() will know to destroy the implicit IP
1156 		 * tunnel on last reference release if DD_IMPLICIT_IPTUN is
1157 		 * set.
1158 		 */
1159 		(*ddpp)->dd_flags |= DD_IMPLICIT_IPTUN;
1160 		return (0);
1161 	}
1162 
1163 	/*
1164 	 * If this link:
1165 	 * (a) is a physical device, (b) this is the first boot, (c) the MAC
1166 	 * is not registered yet, and (d) we cannot find its linkid, then the
1167 	 * linkname is the same as the devname.
1168 	 *
1169 	 * First filter out invalid names.
1170 	 */
1171 	if ((major = ddi_name_to_major(drv)) == (major_t)-1)
1172 		return (ENOENT);
1173 
1174 	phy_dev = makedevice(major, DLS_PPA2MINOR(ppa));
1175 	if (softmac_hold_device(phy_dev, &ddh) != 0)
1176 		return (ENOENT);
1177 
1178 	/*
1179 	 * At this time, the MAC should be registered, check its phy_dev using
1180 	 * the given name.
1181 	 */
1182 	if ((err = dls_mgmt_get_linkid(link, &linkid)) != 0 ||
1183 	    (err = dls_mgmt_get_phydev(linkid, &tmp_dev)) != 0) {
1184 		softmac_rele_device(ddh);
1185 		return (err);
1186 	}
1187 	if (tmp_dev != phy_dev) {
1188 		softmac_rele_device(ddh);
1189 		return (ENOENT);
1190 	}
1191 
1192 	err = dls_devnet_hold(linkid, ddpp);
1193 	softmac_rele_device(ddh);
1194 	return (err);
1195 }
1196 
1197 int
1198 dls_devnet_macname2linkid(const char *macname, datalink_id_t *linkidp)
1199 {
1200 	dls_devnet_t	*ddp;
1201 
1202 	rw_enter(&i_dls_devnet_lock, RW_READER);
1203 	if (mod_hash_find(i_dls_devnet_hash, (mod_hash_key_t)macname,
1204 	    (mod_hash_val_t *)&ddp) != 0) {
1205 		rw_exit(&i_dls_devnet_lock);
1206 		return (ENOENT);
1207 	}
1208 
1209 	*linkidp = ddp->dd_linkid;
1210 	rw_exit(&i_dls_devnet_lock);
1211 	return (0);
1212 }
1213 
1214 /*
1215  * Get linkid for the given dev.
1216  */
1217 int
1218 dls_devnet_dev2linkid(dev_t dev, datalink_id_t *linkidp)
1219 {
1220 	char	macname[MAXNAMELEN];
1221 	char	*drv;
1222 
1223 	if ((drv = ddi_major_to_name(getmajor(dev))) == NULL)
1224 		return (EINVAL);
1225 
1226 	(void) snprintf(macname, sizeof (macname), "%s%d", drv,
1227 	    DLS_MINOR2INST(getminor(dev)));
1228 	return (dls_devnet_macname2linkid(macname, linkidp));
1229 }
1230 
1231 /*
1232  * Get the link's physical dev_t. It this is a VLAN, get the dev_t of the
1233  * link this VLAN is created on.
1234  */
1235 int
1236 dls_devnet_phydev(datalink_id_t vlanid, dev_t *devp)
1237 {
1238 	dls_devnet_t	*ddp;
1239 	int		err;
1240 
1241 	if ((err = dls_devnet_hold_tmp(vlanid, &ddp)) != 0)
1242 		return (err);
1243 
1244 	err = dls_mgmt_get_phydev(ddp->dd_linkid, devp);
1245 	dls_devnet_rele_tmp(ddp);
1246 	return (err);
1247 }
1248 
1249 /*
1250  * Handle the renaming requests.  There are two rename cases:
1251  *
1252  * 1. Request to rename a valid link (id1) to an non-existent link name
1253  *    (id2). In this case id2 is DATALINK_INVALID_LINKID.  Just check whether
1254  *    id1 is held by any applications.
1255  *
1256  *    In this case, the link's kstats need to be updated using the given name.
1257  *
1258  * 2. Request to rename a valid link (id1) to the name of a REMOVED
1259  *    physical link (id2). In this case, check that id1 and its associated
1260  *    mac is not held by any application, and update the link's linkid to id2.
1261  *
1262  *    This case does not change the <link name, linkid> mapping, so the link's
1263  *    kstats need to be updated with using name associated the given id2.
1264  */
1265 int
1266 dls_devnet_rename(datalink_id_t id1, datalink_id_t id2, const char *link)
1267 {
1268 	dls_dev_handle_t	ddh = NULL;
1269 	int			err = 0;
1270 	dev_t			phydev = 0;
1271 	dls_devnet_t		*ddp;
1272 	mac_perim_handle_t	mph = NULL;
1273 	mac_handle_t		mh;
1274 	mod_hash_val_t		val;
1275 	boolean_t		clear_dd_flag = B_FALSE;
1276 
1277 	/*
1278 	 * In the second case, id2 must be a REMOVED physical link.
1279 	 */
1280 	if ((id2 != DATALINK_INVALID_LINKID) &&
1281 	    (dls_mgmt_get_phydev(id2, &phydev) == 0) &&
1282 	    softmac_hold_device(phydev, &ddh) == 0) {
1283 		softmac_rele_device(ddh);
1284 		return (EEXIST);
1285 	}
1286 
1287 	/*
1288 	 * Hold id1 to prevent it from being detached (if a physical link).
1289 	 */
1290 	if (dls_mgmt_get_phydev(id1, &phydev) == 0)
1291 		(void) softmac_hold_device(phydev, &ddh);
1292 
1293 	/*
1294 	 * The framework does not hold hold locks across calls to the
1295 	 * mac perimeter, hence enter the perimeter first. This also waits
1296 	 * for the property loading to finish.
1297 	 */
1298 	if ((err = mac_perim_enter_by_linkid(id1, &mph)) != 0) {
1299 		softmac_rele_device(ddh);
1300 		return (err);
1301 	}
1302 
1303 	rw_enter(&i_dls_devnet_lock, RW_WRITER);
1304 	if ((err = mod_hash_find(i_dls_devnet_id_hash,
1305 	    (mod_hash_key_t)(uintptr_t)id1, (mod_hash_val_t *)&ddp)) != 0) {
1306 		ASSERT(err == MH_ERR_NOTFOUND);
1307 		err = ENOENT;
1308 		goto done;
1309 	}
1310 
1311 	/*
1312 	 * Return EBUSY if any applications have this link open, if any thread
1313 	 * is currently accessing the link kstats, or if the link is on-loan
1314 	 * to a non-global zone. Then set the DD_KSTAT_CHANGING flag to
1315 	 * prevent any access to the kstats while we delete and recreate
1316 	 * kstats below.
1317 	 */
1318 	mutex_enter(&ddp->dd_mutex);
1319 	if (ddp->dd_ref > 1) {
1320 		mutex_exit(&ddp->dd_mutex);
1321 		err = EBUSY;
1322 		goto done;
1323 	}
1324 
1325 	ddp->dd_flags |= DD_KSTAT_CHANGING;
1326 	clear_dd_flag = B_TRUE;
1327 	mutex_exit(&ddp->dd_mutex);
1328 
1329 	if (id2 == DATALINK_INVALID_LINKID) {
1330 		(void) strlcpy(ddp->dd_linkname, link,
1331 		    sizeof (ddp->dd_linkname));
1332 
1333 		/* rename mac client name and its flow if exists */
1334 		if ((err = mac_open(ddp->dd_mac, &mh)) != 0)
1335 			goto done;
1336 		(void) mac_rename_primary(mh, link);
1337 		mac_close(mh);
1338 		goto done;
1339 	}
1340 
1341 	/*
1342 	 * The second case, check whether the MAC is used by any MAC
1343 	 * user.  This must be a physical link so ddh must not be NULL.
1344 	 */
1345 	if (ddh == NULL) {
1346 		err = EINVAL;
1347 		goto done;
1348 	}
1349 
1350 	if ((err = mac_open(ddp->dd_mac, &mh)) != 0)
1351 		goto done;
1352 
1353 	/*
1354 	 * We release the reference of the MAC which mac_open() is
1355 	 * holding. Note that this mac will not be unregistered
1356 	 * because the physical device is held.
1357 	 */
1358 	mac_close(mh);
1359 
1360 	/*
1361 	 * Check if there is any other MAC clients, if not, hold this mac
1362 	 * exclusively until we are done.
1363 	 */
1364 	if ((err = mac_mark_exclusive(mh)) != 0)
1365 		goto done;
1366 
1367 	/*
1368 	 * Update the link's linkid.
1369 	 */
1370 	if ((err = mod_hash_find(i_dls_devnet_id_hash,
1371 	    (mod_hash_key_t)(uintptr_t)id2, &val)) != MH_ERR_NOTFOUND) {
1372 		mac_unmark_exclusive(mh);
1373 		err = EEXIST;
1374 		goto done;
1375 	}
1376 
1377 	err = dls_mgmt_get_linkinfo(id2, ddp->dd_linkname, NULL, NULL, NULL);
1378 	if (err != 0) {
1379 		mac_unmark_exclusive(mh);
1380 		goto done;
1381 	}
1382 
1383 	(void) mod_hash_remove(i_dls_devnet_id_hash,
1384 	    (mod_hash_key_t)(uintptr_t)id1, &val);
1385 
1386 	ddp->dd_linkid = id2;
1387 	(void) mod_hash_insert(i_dls_devnet_id_hash,
1388 	    (mod_hash_key_t)(uintptr_t)ddp->dd_linkid, (mod_hash_val_t)ddp);
1389 
1390 	mac_unmark_exclusive(mh);
1391 
1392 	/* load properties for new id */
1393 	mutex_enter(&ddp->dd_mutex);
1394 	ddp->dd_prop_loaded = B_FALSE;
1395 	ddp->dd_prop_taskid = taskq_dispatch(system_taskq,
1396 	    dls_devnet_prop_task, ddp, TQ_SLEEP);
1397 	mutex_exit(&ddp->dd_mutex);
1398 
1399 done:
1400 	/*
1401 	 * Change the name of the kstat based on the new link name.
1402 	 * We can't hold the i_dls_devnet_lock across calls to the kstat
1403 	 * subsystem. Instead the DD_KSTAT_CHANGING flag set above in this
1404 	 * function prevents any access to the dd_ksp while we delete and
1405 	 * recreate it below.
1406 	 */
1407 	rw_exit(&i_dls_devnet_lock);
1408 	if (err == 0)
1409 		dls_devnet_stat_rename(ddp);
1410 
1411 	if (clear_dd_flag) {
1412 		mutex_enter(&ddp->dd_mutex);
1413 		ddp->dd_flags &= ~DD_KSTAT_CHANGING;
1414 		mutex_exit(&ddp->dd_mutex);
1415 	}
1416 
1417 	if (mph != NULL)
1418 		mac_perim_exit(mph);
1419 	softmac_rele_device(ddh);
1420 	return (err);
1421 }
1422 
1423 static int
1424 i_dls_devnet_setzid(dls_devnet_t *ddp, zoneid_t new_zoneid, boolean_t setprop)
1425 {
1426 	int			err;
1427 	mac_perim_handle_t	mph;
1428 	boolean_t		upcall_done = B_FALSE;
1429 	datalink_id_t		linkid = ddp->dd_linkid;
1430 	zoneid_t		old_zoneid = ddp->dd_zid;
1431 	dlmgmt_door_setzoneid_t	setzid;
1432 	dlmgmt_setzoneid_retval_t retval;
1433 
1434 	if (old_zoneid == new_zoneid)
1435 		return (0);
1436 
1437 	if ((err = mac_perim_enter_by_macname(ddp->dd_mac, &mph)) != 0)
1438 		return (err);
1439 
1440 	/*
1441 	 * When changing the zoneid of an existing link, we need to tell
1442 	 * dlmgmtd about it.  dlmgmtd already knows the zoneid associated with
1443 	 * newly created links.
1444 	 */
1445 	if (setprop) {
1446 		setzid.ld_cmd = DLMGMT_CMD_SETZONEID;
1447 		setzid.ld_linkid = linkid;
1448 		setzid.ld_zoneid = new_zoneid;
1449 		err = i_dls_mgmt_upcall(&setzid, sizeof (setzid), &retval,
1450 		    sizeof (retval));
1451 		if (err != 0)
1452 			goto done;
1453 		upcall_done = B_TRUE;
1454 	}
1455 	if ((err = dls_link_setzid(ddp->dd_mac, new_zoneid)) == 0) {
1456 		ddp->dd_zid = new_zoneid;
1457 		devnet_need_rebuild = B_TRUE;
1458 	}
1459 
1460 done:
1461 	if (err != 0 && upcall_done) {
1462 		setzid.ld_zoneid = old_zoneid;
1463 		(void) i_dls_mgmt_upcall(&setzid, sizeof (setzid), &retval,
1464 		    sizeof (retval));
1465 	}
1466 	mac_perim_exit(mph);
1467 	return (err);
1468 }
1469 
1470 int
1471 dls_devnet_setzid(dls_dl_handle_t ddh, zoneid_t new_zid)
1472 {
1473 	dls_devnet_t	*ddp;
1474 	int		err;
1475 	zoneid_t	old_zid;
1476 	boolean_t	refheld = B_FALSE;
1477 
1478 	old_zid = ddh->dd_zid;
1479 
1480 	if (old_zid == new_zid)
1481 		return (0);
1482 
1483 	/*
1484 	 * Acquire an additional reference to the link if it is being assigned
1485 	 * to a non-global zone from the global zone.
1486 	 */
1487 	if (old_zid == GLOBAL_ZONEID && new_zid != GLOBAL_ZONEID) {
1488 		if ((err = dls_devnet_hold(ddh->dd_linkid, &ddp)) != 0)
1489 			return (err);
1490 		refheld = B_TRUE;
1491 	}
1492 
1493 	if ((err = i_dls_devnet_setzid(ddh, new_zid, B_TRUE)) != 0) {
1494 		if (refheld)
1495 			dls_devnet_rele(ddp);
1496 		return (err);
1497 	}
1498 
1499 	/*
1500 	 * Release the additional reference if the link is returning to the
1501 	 * global zone from a non-global zone.
1502 	 */
1503 	if (old_zid != GLOBAL_ZONEID && new_zid == GLOBAL_ZONEID)
1504 		dls_devnet_rele(ddh);
1505 
1506 	/* Re-create kstats in the appropriate zones. */
1507 	if (old_zid != GLOBAL_ZONEID)
1508 		dls_devnet_stat_destroy(ddh, old_zid);
1509 	if (new_zid != GLOBAL_ZONEID)
1510 		dls_devnet_stat_create(ddh, new_zid);
1511 
1512 	return (0);
1513 }
1514 
1515 zoneid_t
1516 dls_devnet_getzid(dls_dl_handle_t ddh)
1517 {
1518 	return (((dls_devnet_t *)ddh)->dd_zid);
1519 }
1520 
1521 zoneid_t
1522 dls_devnet_getownerzid(dls_dl_handle_t ddh)
1523 {
1524 	return (((dls_devnet_t *)ddh)->dd_owner_zid);
1525 }
1526 
1527 /*
1528  * Is linkid visible from zoneid?  A link is visible if it was created in the
1529  * zone, or if it is currently assigned to the zone.
1530  */
1531 boolean_t
1532 dls_devnet_islinkvisible(datalink_id_t linkid, zoneid_t zoneid)
1533 {
1534 	dls_devnet_t	*ddp;
1535 	boolean_t	result;
1536 
1537 	if (dls_devnet_hold_tmp(linkid, &ddp) != 0)
1538 		return (B_FALSE);
1539 	result = (ddp->dd_owner_zid == zoneid || ddp->dd_zid == zoneid);
1540 	dls_devnet_rele_tmp(ddp);
1541 	return (result);
1542 }
1543 
1544 /*
1545  * Access a vanity naming node.
1546  */
1547 int
1548 dls_devnet_open(const char *link, dls_dl_handle_t *dhp, dev_t *devp)
1549 {
1550 	dls_devnet_t	*ddp;
1551 	dls_link_t	*dlp;
1552 	zoneid_t	zid = getzoneid();
1553 	int		err;
1554 	mac_perim_handle_t	mph;
1555 
1556 	if ((err = dls_devnet_hold_by_name(link, &ddp)) != 0)
1557 		return (err);
1558 
1559 	dls_devnet_prop_task_wait(ddp);
1560 
1561 	/*
1562 	 * Opening a link that does not belong to the current non-global zone
1563 	 * is not allowed.
1564 	 */
1565 	if (zid != GLOBAL_ZONEID && ddp->dd_zid != zid) {
1566 		dls_devnet_rele(ddp);
1567 		return (ENOENT);
1568 	}
1569 
1570 	err = mac_perim_enter_by_macname(ddp->dd_mac, &mph);
1571 	if (err != 0) {
1572 		dls_devnet_rele(ddp);
1573 		return (err);
1574 	}
1575 
1576 	err = dls_link_hold_create(ddp->dd_mac, &dlp);
1577 	mac_perim_exit(mph);
1578 
1579 	if (err != 0) {
1580 		dls_devnet_rele(ddp);
1581 		return (err);
1582 	}
1583 
1584 	*dhp = ddp;
1585 	*devp = dls_link_dev(dlp);
1586 	return (0);
1587 }
1588 
1589 /*
1590  * Close access to a vanity naming node.
1591  */
1592 void
1593 dls_devnet_close(dls_dl_handle_t dlh)
1594 {
1595 	dls_devnet_t	*ddp = dlh;
1596 	dls_link_t	*dlp;
1597 	mac_perim_handle_t	mph;
1598 
1599 	VERIFY(mac_perim_enter_by_macname(ddp->dd_mac, &mph) == 0);
1600 	VERIFY(dls_link_hold(ddp->dd_mac, &dlp) == 0);
1601 
1602 	/*
1603 	 * One rele for the hold placed in dls_devnet_open, another for
1604 	 * the hold done just above
1605 	 */
1606 	dls_link_rele(dlp);
1607 	dls_link_rele(dlp);
1608 	mac_perim_exit(mph);
1609 
1610 	dls_devnet_rele(ddp);
1611 }
1612 
1613 /*
1614  * This is used by /dev/net to rebuild the nodes for readdir().  It is not
1615  * critical and no protection is needed.
1616  */
1617 boolean_t
1618 dls_devnet_rebuild()
1619 {
1620 	boolean_t updated = devnet_need_rebuild;
1621 
1622 	devnet_need_rebuild = B_FALSE;
1623 	return (updated);
1624 }
1625 
1626 int
1627 dls_devnet_create(mac_handle_t mh, datalink_id_t linkid, zoneid_t zoneid)
1628 {
1629 	dls_link_t	*dlp;
1630 	dls_devnet_t	*ddp;
1631 	int		err;
1632 	mac_perim_handle_t mph;
1633 
1634 	/*
1635 	 * Holding the mac perimeter ensures that the downcall from the
1636 	 * dlmgmt daemon which does the property loading does not proceed
1637 	 * until we relinquish the perimeter.
1638 	 */
1639 	mac_perim_enter_by_mh(mh, &mph);
1640 	/*
1641 	 * Make this association before we call dls_link_hold_create as
1642 	 * we need to use the linkid to get the user name for the link
1643 	 * when we create the MAC client.
1644 	 */
1645 	if ((err = dls_devnet_set(mac_name(mh), linkid, zoneid, &ddp)) == 0) {
1646 		if ((err = dls_link_hold_create(mac_name(mh), &dlp)) != 0) {
1647 			mac_perim_exit(mph);
1648 			(void) dls_devnet_unset(mac_name(mh), &linkid, B_TRUE);
1649 			return (err);
1650 		}
1651 	}
1652 	mac_perim_exit(mph);
1653 	return (err);
1654 }
1655 
1656 /*
1657  * Set the linkid of the dls_devnet_t and add it into the i_dls_devnet_id_hash.
1658  * This is called in the case that the dlmgmtd daemon is started later than
1659  * the physical devices get attached, and the linkid is only known after the
1660  * daemon starts.
1661  */
1662 int
1663 dls_devnet_recreate(mac_handle_t mh, datalink_id_t linkid)
1664 {
1665 	ASSERT(linkid != DATALINK_INVALID_LINKID);
1666 	return (dls_devnet_set(mac_name(mh), linkid, GLOBAL_ZONEID, NULL));
1667 }
1668 
1669 int
1670 dls_devnet_destroy(mac_handle_t mh, datalink_id_t *idp, boolean_t wait)
1671 {
1672 	int			err;
1673 	mac_perim_handle_t	mph;
1674 
1675 	*idp = DATALINK_INVALID_LINKID;
1676 	err = dls_devnet_unset(mac_name(mh), idp, wait);
1677 	if (err != 0 && err != ENOENT)
1678 		return (err);
1679 
1680 	mac_perim_enter_by_mh(mh, &mph);
1681 	err = dls_link_rele_by_name(mac_name(mh));
1682 	mac_perim_exit(mph);
1683 
1684 	if (err != 0) {
1685 		/*
1686 		 * XXX It is a general GLDv3 bug that dls_devnet_set() has to
1687 		 * be called to re-set the link when destroy fails.  The
1688 		 * zoneid below will be incorrect if this function is ever
1689 		 * called from kernel context or from a zone other than that
1690 		 * which initially created the link.
1691 		 */
1692 		(void) dls_devnet_set(mac_name(mh), *idp, crgetzoneid(CRED()),
1693 		    NULL);
1694 	}
1695 	return (err);
1696 }
1697 
1698 /*
1699  * Implicitly create an IP tunnel link.
1700  */
1701 static int
1702 i_dls_devnet_create_iptun(const char *linkname, const char *drvname,
1703     datalink_id_t *linkid)
1704 {
1705 	int		err;
1706 	iptun_kparams_t	ik;
1707 	uint32_t	media;
1708 	netstack_t	*ns;
1709 	major_t		iptun_major;
1710 	dev_info_t	*iptun_dip;
1711 
1712 	/* First ensure that the iptun device is attached. */
1713 	if ((iptun_major = ddi_name_to_major(IPTUN_DRIVER_NAME)) == (major_t)-1)
1714 		return (EINVAL);
1715 	if ((iptun_dip = ddi_hold_devi_by_instance(iptun_major, 0, 0)) == NULL)
1716 		return (EINVAL);
1717 
1718 	if (IS_IPV4_TUN(drvname)) {
1719 		ik.iptun_kparam_type = IPTUN_TYPE_IPV4;
1720 		media = DL_IPV4;
1721 	} else if (IS_6TO4_TUN(drvname)) {
1722 		ik.iptun_kparam_type = IPTUN_TYPE_6TO4;
1723 		media = DL_6TO4;
1724 	} else if (IS_IPV6_TUN(drvname)) {
1725 		ik.iptun_kparam_type = IPTUN_TYPE_IPV6;
1726 		media = DL_IPV6;
1727 	}
1728 	ik.iptun_kparam_flags = (IPTUN_KPARAM_TYPE | IPTUN_KPARAM_IMPLICIT);
1729 
1730 	/* Obtain a datalink id for this tunnel. */
1731 	err = dls_mgmt_create((char *)linkname, 0, DATALINK_CLASS_IPTUN, media,
1732 	    B_FALSE, &ik.iptun_kparam_linkid);
1733 	if (err != 0) {
1734 		ddi_release_devi(iptun_dip);
1735 		return (err);
1736 	}
1737 
1738 	ns = netstack_get_current();
1739 	err = iptun_create(&ik, CRED());
1740 	netstack_rele(ns);
1741 
1742 	if (err != 0)
1743 		VERIFY(dls_mgmt_destroy(ik.iptun_kparam_linkid, B_FALSE) == 0);
1744 	else
1745 		*linkid = ik.iptun_kparam_linkid;
1746 
1747 	ddi_release_devi(iptun_dip);
1748 	return (err);
1749 }
1750 
1751 static int
1752 i_dls_devnet_destroy_iptun(datalink_id_t linkid)
1753 {
1754 	int err;
1755 
1756 	/*
1757 	 * Note the use of zone_kcred() here as opposed to CRED().  This is
1758 	 * because the process that does the last close of this /dev/net node
1759 	 * may not have necessary privileges to delete this IP tunnel, but the
1760 	 * tunnel must always be implicitly deleted on last close.
1761 	 */
1762 	if ((err = iptun_delete(linkid, zone_kcred())) == 0)
1763 		(void) dls_mgmt_destroy(linkid, B_FALSE);
1764 	return (err);
1765 }
1766 
1767 const char *
1768 dls_devnet_mac(dls_dl_handle_t ddh)
1769 {
1770 	return (ddh->dd_mac);
1771 }
1772 
1773 datalink_id_t
1774 dls_devnet_linkid(dls_dl_handle_t ddh)
1775 {
1776 	return (ddh->dd_linkid);
1777 }
1778