xref: /titanic_50/usr/src/uts/common/io/dls/dls_mgmt.c (revision b51e13bf985efd1ff98249cad2824f2952f13ecb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Datalink management routines.
28  */
29 
30 #include <sys/types.h>
31 #include <sys/door.h>
32 #include <sys/zone.h>
33 #include <sys/modctl.h>
34 #include <sys/file.h>
35 #include <sys/modhash.h>
36 #include <sys/kstat.h>
37 #include <sys/vnode.h>
38 #include <sys/cmn_err.h>
39 #include <sys/softmac.h>
40 #include <sys/dls.h>
41 #include <sys/dls_impl.h>
42 #include <sys/stropts.h>
43 #include <sys/netstack.h>
44 #include <inet/iptun/iptun_impl.h>
45 
46 /*
47  * This vanity name management module is treated as part of the GLD framework
48  * and we don't hold any GLD framework lock across a call to any mac
49  * function that needs to acquire the mac perimeter. The hierarchy is
50  * mac perimeter -> framework locks
51  */
52 
53 typedef struct dls_stack {
54 	zoneid_t	dlss_zoneid;
55 } dls_stack_t;
56 
57 static kmem_cache_t	*i_dls_devnet_cachep;
58 static kmutex_t		i_dls_mgmt_lock;
59 static krwlock_t	i_dls_devnet_lock;
60 static mod_hash_t	*i_dls_devnet_id_hash;
61 static mod_hash_t	*i_dls_devnet_hash;
62 
63 bpf_attach_fn_t		dls_bpfattach_fn = NULL;
64 bpf_detach_fn_t		dls_bpfdetach_fn = NULL;
65 boolean_t		devnet_need_rebuild;
66 
67 #define	VLAN_HASHSZ	67	/* prime */
68 
69 /*
70  * The following macros take a link name without the trailing PPA as input.
71  * Opening a /dev/net node with one of these names causes a tunnel link to be
72  * implicitly created in dls_devnet_hold_by_name() for backward compatibility
73  * with Solaris 10 and prior.
74  */
75 #define	IS_IPV4_TUN(name)	(strcmp((name), "ip.tun") == 0)
76 #define	IS_IPV6_TUN(name)	(strcmp((name), "ip6.tun") == 0)
77 #define	IS_6TO4_TUN(name)	(strcmp((name), "ip.6to4tun") == 0)
78 #define	IS_IPTUN_LINK(name)	(					\
79     IS_IPV4_TUN(name) || IS_IPV6_TUN(name) || IS_6TO4_TUN(name))
80 
81 /* Upcall door handle */
82 static door_handle_t	dls_mgmt_dh = NULL;
83 
84 #define	DD_CONDEMNED		0x1
85 #define	DD_KSTAT_CHANGING	0x2
86 #define	DD_IMPLICIT_IPTUN	0x4 /* Implicitly-created ip*.*tun* tunnel */
87 
88 /*
89  * This structure is used to keep the <linkid, macname> mapping.
90  * This structure itself is not protected by the mac perimeter, but is
91  * protected by the dd_mutex and i_dls_devnet_lock. Thus most of the
92  * functions manipulating this structure such as dls_devnet_set/unset etc.
93  * may be called while not holding the mac perimeter.
94  */
95 typedef struct dls_devnet_s {
96 	datalink_id_t	dd_linkid;
97 	char		dd_linkname[MAXLINKNAMELEN];
98 	char		dd_mac[MAXNAMELEN];
99 	kstat_t		*dd_ksp;	/* kstat in owner_zid */
100 	kstat_t		*dd_zone_ksp;	/* in dd_zid if != owner_zid */
101 	uint32_t	dd_ref;
102 	kmutex_t	dd_mutex;
103 	kcondvar_t	dd_cv;
104 	uint32_t	dd_tref;
105 	uint_t		dd_flags;
106 	zoneid_t	dd_owner_zid;	/* zone where node was created */
107 	zoneid_t	dd_zid;		/* current zone */
108 	boolean_t	dd_prop_loaded;
109 	taskqid_t	dd_prop_taskid;
110 } dls_devnet_t;
111 
112 static int i_dls_devnet_create_iptun(const char *, const char *,
113     datalink_id_t *);
114 static int i_dls_devnet_destroy_iptun(datalink_id_t);
115 static int i_dls_devnet_setzid(dls_devnet_t *, zoneid_t, boolean_t);
116 static int dls_devnet_unset(const char *, datalink_id_t *, boolean_t);
117 
118 /*ARGSUSED*/
119 static int
120 i_dls_devnet_constructor(void *buf, void *arg, int kmflag)
121 {
122 	dls_devnet_t	*ddp = buf;
123 
124 	bzero(buf, sizeof (dls_devnet_t));
125 	mutex_init(&ddp->dd_mutex, NULL, MUTEX_DEFAULT, NULL);
126 	cv_init(&ddp->dd_cv, NULL, CV_DEFAULT, NULL);
127 	return (0);
128 }
129 
130 /*ARGSUSED*/
131 static void
132 i_dls_devnet_destructor(void *buf, void *arg)
133 {
134 	dls_devnet_t	*ddp = buf;
135 
136 	ASSERT(ddp->dd_ksp == NULL);
137 	ASSERT(ddp->dd_ref == 0);
138 	ASSERT(ddp->dd_tref == 0);
139 	mutex_destroy(&ddp->dd_mutex);
140 	cv_destroy(&ddp->dd_cv);
141 }
142 
143 /* ARGSUSED */
144 static int
145 dls_zone_remove(datalink_id_t linkid, void *arg)
146 {
147 	dls_devnet_t *ddp;
148 
149 	if (dls_devnet_hold_tmp(linkid, &ddp) == 0) {
150 		(void) dls_devnet_setzid(ddp, GLOBAL_ZONEID);
151 		dls_devnet_rele_tmp(ddp);
152 	}
153 	return (0);
154 }
155 
156 /* ARGSUSED */
157 static void *
158 dls_stack_init(netstackid_t stackid, netstack_t *ns)
159 {
160 	dls_stack_t *dlss;
161 
162 	dlss = kmem_zalloc(sizeof (*dlss), KM_SLEEP);
163 	dlss->dlss_zoneid = netstackid_to_zoneid(stackid);
164 	return (dlss);
165 }
166 
167 /* ARGSUSED */
168 static void
169 dls_stack_shutdown(netstackid_t stackid, void *arg)
170 {
171 	dls_stack_t	*dlss = (dls_stack_t *)arg;
172 
173 	/* Move remaining datalinks in this zone back to the global zone. */
174 	(void) zone_datalink_walk(dlss->dlss_zoneid, dls_zone_remove, NULL);
175 }
176 
177 /* ARGSUSED */
178 static void
179 dls_stack_fini(netstackid_t stackid, void *arg)
180 {
181 	dls_stack_t	*dlss = (dls_stack_t *)arg;
182 
183 	kmem_free(dlss, sizeof (*dlss));
184 }
185 
186 /*
187  * Module initialization and finalization functions.
188  */
189 void
190 dls_mgmt_init(void)
191 {
192 	mutex_init(&i_dls_mgmt_lock, NULL, MUTEX_DEFAULT, NULL);
193 	rw_init(&i_dls_devnet_lock, NULL, RW_DEFAULT, NULL);
194 
195 	/*
196 	 * Create a kmem_cache of dls_devnet_t structures.
197 	 */
198 	i_dls_devnet_cachep = kmem_cache_create("dls_devnet_cache",
199 	    sizeof (dls_devnet_t), 0, i_dls_devnet_constructor,
200 	    i_dls_devnet_destructor, NULL, NULL, NULL, 0);
201 	ASSERT(i_dls_devnet_cachep != NULL);
202 
203 	/*
204 	 * Create a hash table, keyed by dd_linkid, of dls_devnet_t.
205 	 */
206 	i_dls_devnet_id_hash = mod_hash_create_idhash("dls_devnet_id_hash",
207 	    VLAN_HASHSZ, mod_hash_null_valdtor);
208 
209 	/*
210 	 * Create a hash table, keyed by dd_mac
211 	 */
212 	i_dls_devnet_hash = mod_hash_create_extended("dls_devnet_hash",
213 	    VLAN_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor,
214 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
215 
216 	devnet_need_rebuild = B_FALSE;
217 
218 	netstack_register(NS_DLS, dls_stack_init, dls_stack_shutdown,
219 	    dls_stack_fini);
220 }
221 
222 void
223 dls_mgmt_fini(void)
224 {
225 	netstack_unregister(NS_DLS);
226 	mod_hash_destroy_hash(i_dls_devnet_hash);
227 	mod_hash_destroy_hash(i_dls_devnet_id_hash);
228 	kmem_cache_destroy(i_dls_devnet_cachep);
229 	rw_destroy(&i_dls_devnet_lock);
230 	mutex_destroy(&i_dls_mgmt_lock);
231 }
232 
233 int
234 dls_mgmt_door_set(boolean_t start)
235 {
236 	int	err;
237 
238 	/* handle daemon restart */
239 	mutex_enter(&i_dls_mgmt_lock);
240 	if (dls_mgmt_dh != NULL) {
241 		door_ki_rele(dls_mgmt_dh);
242 		dls_mgmt_dh = NULL;
243 	}
244 
245 	if (start && ((err = door_ki_open(DLMGMT_DOOR, &dls_mgmt_dh)) != 0)) {
246 		mutex_exit(&i_dls_mgmt_lock);
247 		return (err);
248 	}
249 
250 	mutex_exit(&i_dls_mgmt_lock);
251 
252 	/*
253 	 * Create and associate <link name, linkid> mapping for network devices
254 	 * which are already attached before the daemon is started.
255 	 */
256 	if (start)
257 		softmac_recreate();
258 	return (0);
259 }
260 
261 static boolean_t
262 i_dls_mgmt_door_revoked(door_handle_t dh)
263 {
264 	struct door_info info;
265 	extern int sys_shutdown;
266 
267 	ASSERT(dh != NULL);
268 
269 	if (sys_shutdown) {
270 		cmn_err(CE_NOTE, "dls_mgmt_door: shutdown observed\n");
271 		return (B_TRUE);
272 	}
273 
274 	if (door_ki_info(dh, &info) != 0)
275 		return (B_TRUE);
276 
277 	return ((info.di_attributes & DOOR_REVOKED) != 0);
278 }
279 
280 /*
281  * Upcall to the datalink management daemon (dlmgmtd).
282  */
283 static int
284 i_dls_mgmt_upcall(void *arg, size_t asize, void *rbuf, size_t rsize)
285 {
286 	door_arg_t			darg, save_arg;
287 	door_handle_t			dh;
288 	int				err;
289 	int				retry = 0;
290 
291 #define	MAXRETRYNUM	3
292 
293 	ASSERT(arg);
294 	darg.data_ptr = arg;
295 	darg.data_size = asize;
296 	darg.desc_ptr = NULL;
297 	darg.desc_num = 0;
298 	darg.rbuf = rbuf;
299 	darg.rsize = rsize;
300 	save_arg = darg;
301 
302 retry:
303 	mutex_enter(&i_dls_mgmt_lock);
304 	dh = dls_mgmt_dh;
305 	if ((dh == NULL) || i_dls_mgmt_door_revoked(dh)) {
306 		mutex_exit(&i_dls_mgmt_lock);
307 		return (EBADF);
308 	}
309 	door_ki_hold(dh);
310 	mutex_exit(&i_dls_mgmt_lock);
311 
312 	for (;;) {
313 		retry++;
314 		if ((err = door_ki_upcall_limited(dh, &darg, zone_kcred(),
315 		    SIZE_MAX, 0)) == 0)
316 			break;
317 
318 		/*
319 		 * handle door call errors
320 		 */
321 		darg = save_arg;
322 		switch (err) {
323 		case EINTR:
324 			/*
325 			 * If the operation which caused this door upcall gets
326 			 * interrupted, return directly.
327 			 */
328 			goto done;
329 		case EAGAIN:
330 			/*
331 			 * Repeat upcall if the maximum attempt limit has not
332 			 * been reached.
333 			 */
334 			if (retry < MAXRETRYNUM) {
335 				delay(2 * hz);
336 				break;
337 			}
338 			cmn_err(CE_WARN, "dls: dlmgmtd fatal error %d\n", err);
339 			goto done;
340 		default:
341 			/* A fatal door error */
342 			if (i_dls_mgmt_door_revoked(dh)) {
343 				cmn_err(CE_NOTE,
344 				    "dls: dlmgmtd door service revoked\n");
345 
346 				if (retry < MAXRETRYNUM) {
347 					door_ki_rele(dh);
348 					goto retry;
349 				}
350 			}
351 			cmn_err(CE_WARN, "dls: dlmgmtd fatal error %d\n", err);
352 			goto done;
353 		}
354 	}
355 
356 	if (darg.rbuf != rbuf) {
357 		/*
358 		 * The size of the input rbuf was not big enough, so the
359 		 * upcall allocated the rbuf itself.  If this happens, assume
360 		 * that this was an invalid door call request.
361 		 */
362 		kmem_free(darg.rbuf, darg.rsize);
363 		err = ENOSPC;
364 		goto done;
365 	}
366 
367 	if (darg.rsize != rsize) {
368 		err = EINVAL;
369 		goto done;
370 	}
371 
372 	err = ((dlmgmt_retval_t *)rbuf)->lr_err;
373 
374 done:
375 	door_ki_rele(dh);
376 	return (err);
377 }
378 
379 /*
380  * Request the datalink management daemon to create a link with the attributes
381  * below.  Upon success, zero is returned and linkidp contains the linkid for
382  * the new link; otherwise, an errno is returned.
383  *
384  *     - dev		physical dev_t.  required for all physical links,
385  *		        including GLDv3 links.  It will be used to force the
386  *		        attachment of a physical device, hence the
387  *		        registration of its mac
388  *     - class		datalink class
389  *     - media type	media type; DL_OTHER means unknown
390  *     - persist	whether to persist the datalink
391  */
392 int
393 dls_mgmt_create(const char *devname, dev_t dev, datalink_class_t class,
394     uint32_t media, boolean_t persist, datalink_id_t *linkidp)
395 {
396 	dlmgmt_upcall_arg_create_t	create;
397 	dlmgmt_create_retval_t		retval;
398 	int				err;
399 
400 	create.ld_cmd = DLMGMT_CMD_DLS_CREATE;
401 	create.ld_class = class;
402 	create.ld_media = media;
403 	create.ld_phymaj = getmajor(dev);
404 	create.ld_phyinst = getminor(dev);
405 	create.ld_persist = persist;
406 	if (strlcpy(create.ld_devname, devname, sizeof (create.ld_devname)) >=
407 	    sizeof (create.ld_devname))
408 		return (EINVAL);
409 
410 	if ((err = i_dls_mgmt_upcall(&create, sizeof (create), &retval,
411 	    sizeof (retval))) == 0) {
412 		*linkidp = retval.lr_linkid;
413 	}
414 	return (err);
415 }
416 
417 /*
418  * Request the datalink management daemon to destroy the specified link.
419  * Returns zero upon success, or an errno upon failure.
420  */
421 int
422 dls_mgmt_destroy(datalink_id_t linkid, boolean_t persist)
423 {
424 	dlmgmt_upcall_arg_destroy_t	destroy;
425 	dlmgmt_destroy_retval_t		retval;
426 
427 	destroy.ld_cmd = DLMGMT_CMD_DLS_DESTROY;
428 	destroy.ld_linkid = linkid;
429 	destroy.ld_persist = persist;
430 
431 	return (i_dls_mgmt_upcall(&destroy, sizeof (destroy),
432 	    &retval, sizeof (retval)));
433 }
434 
435 /*
436  * Request the datalink management daemon to verify/update the information
437  * for a physical link.  Upon success, get its linkid.
438  *
439  *     - media type	media type
440  *     - novanity	whether this physical datalink supports vanity naming.
441  *			physical links that do not use the GLDv3 MAC plugin
442  *			cannot suport vanity naming
443  *
444  * This function could fail with ENOENT or EEXIST.  Two cases return EEXIST:
445  *
446  * 1. A link with devname already exists, but the media type does not match.
447  *    In this case, mediap will bee set to the media type of the existing link.
448  * 2. A link with devname already exists, but its link name does not match
449  *    the device name, although this link does not support vanity naming.
450  */
451 int
452 dls_mgmt_update(const char *devname, uint32_t media, boolean_t novanity,
453     uint32_t *mediap, datalink_id_t *linkidp)
454 {
455 	dlmgmt_upcall_arg_update_t	update;
456 	dlmgmt_update_retval_t		retval;
457 	int				err;
458 
459 	update.ld_cmd = DLMGMT_CMD_DLS_UPDATE;
460 
461 	if (strlcpy(update.ld_devname, devname, sizeof (update.ld_devname)) >=
462 	    sizeof (update.ld_devname))
463 		return (EINVAL);
464 
465 	update.ld_media = media;
466 	update.ld_novanity = novanity;
467 
468 	if ((err = i_dls_mgmt_upcall(&update, sizeof (update), &retval,
469 	    sizeof (retval))) == EEXIST) {
470 		*linkidp = retval.lr_linkid;
471 		*mediap = retval.lr_media;
472 	} else if (err == 0) {
473 		*linkidp = retval.lr_linkid;
474 	}
475 
476 	return (err);
477 }
478 
479 /*
480  * Request the datalink management daemon to get the information for a link.
481  * Returns zero upon success, or an errno upon failure.
482  *
483  * Only fills in information for argument pointers that are non-NULL.
484  * Note that the link argument is expected to be MAXLINKNAMELEN bytes.
485  */
486 int
487 dls_mgmt_get_linkinfo(datalink_id_t linkid, char *link,
488     datalink_class_t *classp, uint32_t *mediap, uint32_t *flagsp)
489 {
490 	dlmgmt_door_getname_t	getname;
491 	dlmgmt_getname_retval_t	retval;
492 	int			err, len;
493 
494 	getname.ld_cmd = DLMGMT_CMD_GETNAME;
495 	getname.ld_linkid = linkid;
496 
497 	if ((err = i_dls_mgmt_upcall(&getname, sizeof (getname), &retval,
498 	    sizeof (retval))) != 0) {
499 		return (err);
500 	}
501 
502 	len = strlen(retval.lr_link);
503 	if (len <= 1 || len >= MAXLINKNAMELEN)
504 		return (EINVAL);
505 
506 	if (link != NULL)
507 		(void) strlcpy(link, retval.lr_link, MAXLINKNAMELEN);
508 	if (classp != NULL)
509 		*classp = retval.lr_class;
510 	if (mediap != NULL)
511 		*mediap = retval.lr_media;
512 	if (flagsp != NULL)
513 		*flagsp = retval.lr_flags;
514 	return (0);
515 }
516 
517 /*
518  * Request the datalink management daemon to get the linkid for a link.
519  * Returns a non-zero error code on failure.  The linkid argument is only
520  * set on success (when zero is returned.)
521  */
522 int
523 dls_mgmt_get_linkid(const char *link, datalink_id_t *linkid)
524 {
525 	dlmgmt_door_getlinkid_t		getlinkid;
526 	dlmgmt_getlinkid_retval_t	retval;
527 	int				err;
528 
529 	getlinkid.ld_cmd = DLMGMT_CMD_GETLINKID;
530 	(void) strlcpy(getlinkid.ld_link, link, MAXLINKNAMELEN);
531 
532 	if ((err = i_dls_mgmt_upcall(&getlinkid, sizeof (getlinkid), &retval,
533 	    sizeof (retval))) == 0) {
534 		*linkid = retval.lr_linkid;
535 	}
536 	return (err);
537 }
538 
539 datalink_id_t
540 dls_mgmt_get_next(datalink_id_t linkid, datalink_class_t class,
541     datalink_media_t dmedia, uint32_t flags)
542 {
543 	dlmgmt_door_getnext_t	getnext;
544 	dlmgmt_getnext_retval_t	retval;
545 
546 	getnext.ld_cmd = DLMGMT_CMD_GETNEXT;
547 	getnext.ld_class = class;
548 	getnext.ld_dmedia = dmedia;
549 	getnext.ld_flags = flags;
550 	getnext.ld_linkid = linkid;
551 
552 	if (i_dls_mgmt_upcall(&getnext, sizeof (getnext), &retval,
553 	    sizeof (retval)) != 0) {
554 		return (DATALINK_INVALID_LINKID);
555 	}
556 
557 	return (retval.lr_linkid);
558 }
559 
560 static int
561 i_dls_mgmt_get_linkattr(const datalink_id_t linkid, const char *attr,
562     void *attrval, size_t *attrszp)
563 {
564 	dlmgmt_upcall_arg_getattr_t	getattr;
565 	dlmgmt_getattr_retval_t		retval;
566 	int				err;
567 
568 	getattr.ld_cmd = DLMGMT_CMD_DLS_GETATTR;
569 	getattr.ld_linkid = linkid;
570 	(void) strlcpy(getattr.ld_attr, attr, MAXLINKATTRLEN);
571 
572 	if ((err = i_dls_mgmt_upcall(&getattr, sizeof (getattr), &retval,
573 	    sizeof (retval))) == 0) {
574 		if (*attrszp < retval.lr_attrsz)
575 			return (EINVAL);
576 		*attrszp = retval.lr_attrsz;
577 		bcopy(retval.lr_attrval, attrval, retval.lr_attrsz);
578 	}
579 
580 	return (err);
581 }
582 
583 /*
584  * Note that this function can only get devp successfully for non-VLAN link.
585  */
586 int
587 dls_mgmt_get_phydev(datalink_id_t linkid, dev_t *devp)
588 {
589 	uint64_t	maj, inst;
590 	size_t		attrsz = sizeof (uint64_t);
591 
592 	if (i_dls_mgmt_get_linkattr(linkid, FPHYMAJ, &maj, &attrsz) != 0 ||
593 	    attrsz != sizeof (uint64_t) ||
594 	    i_dls_mgmt_get_linkattr(linkid, FPHYINST, &inst, &attrsz) != 0 ||
595 	    attrsz != sizeof (uint64_t)) {
596 		return (EINVAL);
597 	}
598 
599 	*devp = makedevice((major_t)maj, (minor_t)inst);
600 	return (0);
601 }
602 
603 /*
604  * Request the datalink management daemon to push in
605  * all properties associated with the link.
606  * Returns a non-zero error code on failure.
607  */
608 int
609 dls_mgmt_linkprop_init(datalink_id_t linkid)
610 {
611 	dlmgmt_door_linkprop_init_t	li;
612 	dlmgmt_linkprop_init_retval_t	retval;
613 	int				err;
614 
615 	li.ld_cmd = DLMGMT_CMD_LINKPROP_INIT;
616 	li.ld_linkid = linkid;
617 
618 	err = i_dls_mgmt_upcall(&li, sizeof (li), &retval, sizeof (retval));
619 	return (err);
620 }
621 
622 static void
623 dls_devnet_prop_task(void *arg)
624 {
625 	dls_devnet_t		*ddp = arg;
626 
627 	(void) dls_mgmt_linkprop_init(ddp->dd_linkid);
628 
629 	mutex_enter(&ddp->dd_mutex);
630 	ddp->dd_prop_loaded = B_TRUE;
631 	ddp->dd_prop_taskid = NULL;
632 	cv_broadcast(&ddp->dd_cv);
633 	mutex_exit(&ddp->dd_mutex);
634 }
635 
636 /*
637  * Ensure property loading task is completed.
638  */
639 void
640 dls_devnet_prop_task_wait(dls_dl_handle_t ddp)
641 {
642 	mutex_enter(&ddp->dd_mutex);
643 	while (ddp->dd_prop_taskid != NULL)
644 		cv_wait(&ddp->dd_cv, &ddp->dd_mutex);
645 	mutex_exit(&ddp->dd_mutex);
646 }
647 
648 void
649 dls_devnet_rele_tmp(dls_dl_handle_t dlh)
650 {
651 	dls_devnet_t		*ddp = dlh;
652 
653 	mutex_enter(&ddp->dd_mutex);
654 	ASSERT(ddp->dd_tref != 0);
655 	if (--ddp->dd_tref == 0)
656 		cv_signal(&ddp->dd_cv);
657 	mutex_exit(&ddp->dd_mutex);
658 }
659 
660 int
661 dls_devnet_hold_link(datalink_id_t linkid, dls_dl_handle_t *ddhp,
662     dls_link_t **dlpp)
663 {
664 	dls_dl_handle_t	dlh;
665 	dls_link_t	*dlp;
666 	int		err;
667 
668 	if ((err = dls_devnet_hold_tmp(linkid, &dlh)) != 0)
669 		return (err);
670 
671 	if ((err = dls_link_hold(dls_devnet_mac(dlh), &dlp)) != 0) {
672 		dls_devnet_rele_tmp(dlh);
673 		return (err);
674 	}
675 
676 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
677 
678 	*ddhp = dlh;
679 	*dlpp = dlp;
680 	return (0);
681 }
682 
683 void
684 dls_devnet_rele_link(dls_dl_handle_t dlh, dls_link_t *dlp)
685 {
686 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
687 
688 	dls_link_rele(dlp);
689 	dls_devnet_rele_tmp(dlh);
690 }
691 
692 /*
693  * "link" kstats related functions.
694  */
695 
696 /*
697  * Query the "link" kstats.
698  *
699  * We may be called from the kstat subsystem in an arbitrary context.
700  * If the caller is the stack, the context could be an upcall data
701  * thread. Hence we can't acquire the mac perimeter in this function
702  * for fear of deadlock.
703  */
704 static int
705 dls_devnet_stat_update(kstat_t *ksp, int rw)
706 {
707 	dls_devnet_t	*ddp = ksp->ks_private;
708 	dls_link_t	*dlp;
709 	int		err;
710 
711 	/*
712 	 * Check the link is being renamed or if the link is going away
713 	 * before incrementing dd_tref which in turn prevents the link
714 	 * from being renamed or deleted until we finish.
715 	 */
716 	mutex_enter(&ddp->dd_mutex);
717 	if (ddp->dd_flags & (DD_CONDEMNED | DD_KSTAT_CHANGING)) {
718 		mutex_exit(&ddp->dd_mutex);
719 		return (ENOENT);
720 	}
721 	ddp->dd_tref++;
722 	mutex_exit(&ddp->dd_mutex);
723 
724 	/*
725 	 * If a device detach happens at this time, it will block in
726 	 * dls_devnet_unset since the dd_tref has been bumped up above. So the
727 	 * access to 'dlp' is safe even though we don't hold the mac perimeter.
728 	 */
729 	if (mod_hash_find(i_dls_link_hash, (mod_hash_key_t)ddp->dd_mac,
730 	    (mod_hash_val_t *)&dlp) != 0) {
731 		dls_devnet_rele_tmp(ddp);
732 		return (ENOENT);
733 	}
734 
735 	err = dls_stat_update(ksp, dlp, rw);
736 
737 	dls_devnet_rele_tmp(ddp);
738 	return (err);
739 }
740 
741 /*
742  * Create the "link" kstats.
743  */
744 static void
745 dls_devnet_stat_create(dls_devnet_t *ddp, zoneid_t zoneid)
746 {
747 	kstat_t	*ksp;
748 
749 	if (dls_stat_create("link", 0, ddp->dd_linkname, zoneid,
750 	    dls_devnet_stat_update, ddp, &ksp) == 0) {
751 		ASSERT(ksp != NULL);
752 		if (zoneid == ddp->dd_owner_zid) {
753 			ASSERT(ddp->dd_ksp == NULL);
754 			ddp->dd_ksp = ksp;
755 		} else {
756 			ASSERT(ddp->dd_zone_ksp == NULL);
757 			ddp->dd_zone_ksp = ksp;
758 		}
759 	}
760 }
761 
762 /*
763  * Destroy the "link" kstats.
764  */
765 static void
766 dls_devnet_stat_destroy(dls_devnet_t *ddp, zoneid_t zoneid)
767 {
768 	if (zoneid == ddp->dd_owner_zid) {
769 		if (ddp->dd_ksp != NULL) {
770 			kstat_delete(ddp->dd_ksp);
771 			ddp->dd_ksp = NULL;
772 		}
773 	} else {
774 		if (ddp->dd_zone_ksp != NULL) {
775 			kstat_delete(ddp->dd_zone_ksp);
776 			ddp->dd_zone_ksp = NULL;
777 		}
778 	}
779 }
780 
781 /*
782  * The link has been renamed. Destroy the old non-legacy kstats ("link kstats")
783  * and create the new set using the new name.
784  */
785 static void
786 dls_devnet_stat_rename(dls_devnet_t *ddp)
787 {
788 	if (ddp->dd_ksp != NULL) {
789 		kstat_delete(ddp->dd_ksp);
790 		ddp->dd_ksp = NULL;
791 	}
792 	/* We can't rename a link while it's assigned to a non-global zone. */
793 	ASSERT(ddp->dd_zone_ksp == NULL);
794 	dls_devnet_stat_create(ddp, ddp->dd_owner_zid);
795 }
796 
797 /*
798  * Associate a linkid with a given link (identified by macname)
799  */
800 static int
801 dls_devnet_set(const char *macname, datalink_id_t linkid, zoneid_t zoneid,
802     dls_devnet_t **ddpp)
803 {
804 	dls_devnet_t		*ddp = NULL;
805 	datalink_class_t	class;
806 	int			err;
807 	boolean_t		stat_create = B_FALSE;
808 	char			linkname[MAXLINKNAMELEN];
809 
810 	rw_enter(&i_dls_devnet_lock, RW_WRITER);
811 
812 	/*
813 	 * Don't allow callers to set a link name with a linkid that already
814 	 * has a name association (that's what rename is for).
815 	 */
816 	if (linkid != DATALINK_INVALID_LINKID) {
817 		if (mod_hash_find(i_dls_devnet_id_hash,
818 		    (mod_hash_key_t)(uintptr_t)linkid,
819 		    (mod_hash_val_t *)&ddp) == 0) {
820 			err = EEXIST;
821 			goto done;
822 		}
823 		if ((err = dls_mgmt_get_linkinfo(linkid, linkname, &class,
824 		    NULL, NULL)) != 0)
825 			goto done;
826 	}
827 
828 	if ((err = mod_hash_find(i_dls_devnet_hash,
829 	    (mod_hash_key_t)macname, (mod_hash_val_t *)&ddp)) == 0) {
830 		if (ddp->dd_linkid != DATALINK_INVALID_LINKID) {
831 			err = EEXIST;
832 			goto done;
833 		}
834 
835 		/*
836 		 * This might be a physical link that has already
837 		 * been created, but which does not have a linkid
838 		 * because dlmgmtd was not running when it was created.
839 		 */
840 		if (linkid == DATALINK_INVALID_LINKID ||
841 		    class != DATALINK_CLASS_PHYS) {
842 			err = EINVAL;
843 			goto done;
844 		}
845 	} else {
846 		ddp = kmem_cache_alloc(i_dls_devnet_cachep, KM_SLEEP);
847 		ddp->dd_tref = 0;
848 		ddp->dd_ref++;
849 		ddp->dd_owner_zid = zoneid;
850 		(void) strlcpy(ddp->dd_mac, macname, sizeof (ddp->dd_mac));
851 		VERIFY(mod_hash_insert(i_dls_devnet_hash,
852 		    (mod_hash_key_t)ddp->dd_mac, (mod_hash_val_t)ddp) == 0);
853 	}
854 
855 	if (linkid != DATALINK_INVALID_LINKID) {
856 		ddp->dd_linkid = linkid;
857 		(void) strlcpy(ddp->dd_linkname, linkname,
858 		    sizeof (ddp->dd_linkname));
859 		VERIFY(mod_hash_insert(i_dls_devnet_id_hash,
860 		    (mod_hash_key_t)(uintptr_t)linkid,
861 		    (mod_hash_val_t)ddp) == 0);
862 		devnet_need_rebuild = B_TRUE;
863 		stat_create = B_TRUE;
864 		mutex_enter(&ddp->dd_mutex);
865 		if (!ddp->dd_prop_loaded && (ddp->dd_prop_taskid == NULL)) {
866 			ddp->dd_prop_taskid = taskq_dispatch(system_taskq,
867 			    dls_devnet_prop_task, ddp, TQ_SLEEP);
868 		}
869 		mutex_exit(&ddp->dd_mutex);
870 	}
871 	err = 0;
872 done:
873 	/*
874 	 * It is safe to drop the i_dls_devnet_lock at this point. In the case
875 	 * of physical devices, the softmac framework will fail the device
876 	 * detach based on the smac_state or smac_hold_cnt. Other cases like
877 	 * vnic and aggr use their own scheme to serialize creates and deletes
878 	 * and ensure that *ddp is valid.
879 	 */
880 	rw_exit(&i_dls_devnet_lock);
881 	if (err == 0) {
882 		if (zoneid != GLOBAL_ZONEID &&
883 		    (err = i_dls_devnet_setzid(ddp, zoneid, B_FALSE)) != 0)
884 			(void) dls_devnet_unset(macname, &linkid, B_TRUE);
885 		/*
886 		 * The kstat subsystem holds its own locks (rather perimeter)
887 		 * before calling the ks_update (dls_devnet_stat_update) entry
888 		 * point which in turn grabs the i_dls_devnet_lock. So the
889 		 * lock hierarchy is kstat locks -> i_dls_devnet_lock.
890 		 */
891 		if (stat_create)
892 			dls_devnet_stat_create(ddp, zoneid);
893 		if (ddpp != NULL)
894 			*ddpp = ddp;
895 	}
896 	return (err);
897 }
898 
899 /*
900  * Disassociate a linkid with a given link (identified by macname)
901  * This waits until temporary references to the dls_devnet_t are gone.
902  */
903 static int
904 dls_devnet_unset(const char *macname, datalink_id_t *id, boolean_t wait)
905 {
906 	dls_devnet_t	*ddp;
907 	int		err;
908 	mod_hash_val_t	val;
909 
910 	rw_enter(&i_dls_devnet_lock, RW_WRITER);
911 	if ((err = mod_hash_find(i_dls_devnet_hash,
912 	    (mod_hash_key_t)macname, (mod_hash_val_t *)&ddp)) != 0) {
913 		ASSERT(err == MH_ERR_NOTFOUND);
914 		rw_exit(&i_dls_devnet_lock);
915 		return (ENOENT);
916 	}
917 
918 	mutex_enter(&ddp->dd_mutex);
919 
920 	/*
921 	 * Make sure downcalls into softmac_create or softmac_destroy from
922 	 * devfs don't cv_wait on any devfs related condition for fear of
923 	 * deadlock. Return EBUSY if the asynchronous thread started for
924 	 * property loading as part of the post attach hasn't yet completed.
925 	 */
926 	ASSERT(ddp->dd_ref != 0);
927 	if ((ddp->dd_ref != 1) || (!wait &&
928 	    (ddp->dd_tref != 0 || ddp->dd_prop_taskid != NULL))) {
929 		mutex_exit(&ddp->dd_mutex);
930 		rw_exit(&i_dls_devnet_lock);
931 		return (EBUSY);
932 	}
933 
934 	ddp->dd_flags |= DD_CONDEMNED;
935 	ddp->dd_ref--;
936 	*id = ddp->dd_linkid;
937 
938 	if (ddp->dd_zid != GLOBAL_ZONEID)
939 		(void) i_dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE);
940 
941 	/*
942 	 * Remove this dls_devnet_t from the hash table.
943 	 */
944 	VERIFY(mod_hash_remove(i_dls_devnet_hash,
945 	    (mod_hash_key_t)ddp->dd_mac, &val) == 0);
946 
947 	if (ddp->dd_linkid != DATALINK_INVALID_LINKID) {
948 		VERIFY(mod_hash_remove(i_dls_devnet_id_hash,
949 		    (mod_hash_key_t)(uintptr_t)ddp->dd_linkid, &val) == 0);
950 
951 		devnet_need_rebuild = B_TRUE;
952 	}
953 	rw_exit(&i_dls_devnet_lock);
954 
955 	if (wait) {
956 		/*
957 		 * Wait until all temporary references are released.
958 		 */
959 		while ((ddp->dd_tref != 0) || (ddp->dd_prop_taskid != NULL))
960 			cv_wait(&ddp->dd_cv, &ddp->dd_mutex);
961 	} else {
962 		ASSERT(ddp->dd_tref == 0 && ddp->dd_prop_taskid == NULL);
963 	}
964 
965 	if (ddp->dd_linkid != DATALINK_INVALID_LINKID)
966 		dls_devnet_stat_destroy(ddp, ddp->dd_owner_zid);
967 
968 	ddp->dd_prop_loaded = B_FALSE;
969 	ddp->dd_linkid = DATALINK_INVALID_LINKID;
970 	ddp->dd_flags = 0;
971 	mutex_exit(&ddp->dd_mutex);
972 	kmem_cache_free(i_dls_devnet_cachep, ddp);
973 
974 	return (0);
975 }
976 
977 static int
978 dls_devnet_hold_common(datalink_id_t linkid, dls_devnet_t **ddpp,
979     boolean_t tmp_hold)
980 {
981 	dls_devnet_t		*ddp;
982 	dev_t			phydev = 0;
983 	dls_dev_handle_t	ddh = NULL;
984 	int			err;
985 
986 	/*
987 	 * Hold this link to prevent it being detached in case of a
988 	 * physical link.
989 	 */
990 	if (dls_mgmt_get_phydev(linkid, &phydev) == 0)
991 		(void) softmac_hold_device(phydev, &ddh);
992 
993 	rw_enter(&i_dls_devnet_lock, RW_WRITER);
994 	if ((err = mod_hash_find(i_dls_devnet_id_hash,
995 	    (mod_hash_key_t)(uintptr_t)linkid, (mod_hash_val_t *)&ddp)) != 0) {
996 		ASSERT(err == MH_ERR_NOTFOUND);
997 		rw_exit(&i_dls_devnet_lock);
998 		softmac_rele_device(ddh);
999 		return (ENOENT);
1000 	}
1001 
1002 	mutex_enter(&ddp->dd_mutex);
1003 	ASSERT(ddp->dd_ref > 0);
1004 	if (ddp->dd_flags & DD_CONDEMNED) {
1005 		mutex_exit(&ddp->dd_mutex);
1006 		rw_exit(&i_dls_devnet_lock);
1007 		softmac_rele_device(ddh);
1008 		return (ENOENT);
1009 	}
1010 	if (tmp_hold)
1011 		ddp->dd_tref++;
1012 	else
1013 		ddp->dd_ref++;
1014 	mutex_exit(&ddp->dd_mutex);
1015 	rw_exit(&i_dls_devnet_lock);
1016 
1017 	softmac_rele_device(ddh);
1018 
1019 	*ddpp = ddp;
1020 	return (0);
1021 }
1022 
1023 int
1024 dls_devnet_hold(datalink_id_t linkid, dls_devnet_t **ddpp)
1025 {
1026 	return (dls_devnet_hold_common(linkid, ddpp, B_FALSE));
1027 }
1028 
1029 /*
1030  * Hold the vanity naming structure (dls_devnet_t) temporarily.  The request to
1031  * delete the dls_devnet_t will wait until the temporary reference is released.
1032  */
1033 int
1034 dls_devnet_hold_tmp(datalink_id_t linkid, dls_devnet_t **ddpp)
1035 {
1036 	return (dls_devnet_hold_common(linkid, ddpp, B_TRUE));
1037 }
1038 
1039 /*
1040  * This funtion is called when a DLS client tries to open a device node.
1041  * This dev_t could a result of a /dev/net node access (returned by
1042  * devnet_create_rvp->dls_devnet_open()) or a direct /dev node access.
1043  * In both cases, this function bumps up the reference count of the
1044  * dls_devnet_t structure. The reference is held as long as the device node
1045  * is open. In the case of /dev/net while it is true that the initial reference
1046  * is held when the devnet_create_rvp->dls_devnet_open call happens, this
1047  * initial reference is released immediately in devnet_inactive_callback ->
1048  * dls_devnet_close(). (Note that devnet_inactive_callback() is called right
1049  * after dld_open completes, not when the /dev/net node is being closed).
1050  * To undo this function, call dls_devnet_rele()
1051  */
1052 int
1053 dls_devnet_hold_by_dev(dev_t dev, dls_dl_handle_t *ddhp)
1054 {
1055 	char			name[MAXNAMELEN];
1056 	char			*drv;
1057 	dls_dev_handle_t	ddh = NULL;
1058 	dls_devnet_t		*ddp;
1059 	int			err;
1060 
1061 	if ((drv = ddi_major_to_name(getmajor(dev))) == NULL)
1062 		return (EINVAL);
1063 
1064 	(void) snprintf(name, sizeof (name), "%s%d", drv,
1065 	    DLS_MINOR2INST(getminor(dev)));
1066 
1067 	/*
1068 	 * Hold this link to prevent it being detached in case of a
1069 	 * GLDv3 physical link.
1070 	 */
1071 	if (DLS_MINOR2INST(getminor(dev)) <= DLS_MAX_PPA)
1072 		(void) softmac_hold_device(dev, &ddh);
1073 
1074 	rw_enter(&i_dls_devnet_lock, RW_WRITER);
1075 	if ((err = mod_hash_find(i_dls_devnet_hash,
1076 	    (mod_hash_key_t)name, (mod_hash_val_t *)&ddp)) != 0) {
1077 		ASSERT(err == MH_ERR_NOTFOUND);
1078 		rw_exit(&i_dls_devnet_lock);
1079 		softmac_rele_device(ddh);
1080 		return (ENOENT);
1081 	}
1082 	mutex_enter(&ddp->dd_mutex);
1083 	ASSERT(ddp->dd_ref > 0);
1084 	if (ddp->dd_flags & DD_CONDEMNED) {
1085 		mutex_exit(&ddp->dd_mutex);
1086 		rw_exit(&i_dls_devnet_lock);
1087 		softmac_rele_device(ddh);
1088 		return (ENOENT);
1089 	}
1090 	ddp->dd_ref++;
1091 	mutex_exit(&ddp->dd_mutex);
1092 	rw_exit(&i_dls_devnet_lock);
1093 
1094 	softmac_rele_device(ddh);
1095 
1096 	*ddhp = ddp;
1097 	return (0);
1098 }
1099 
1100 void
1101 dls_devnet_rele(dls_devnet_t *ddp)
1102 {
1103 	mutex_enter(&ddp->dd_mutex);
1104 	ASSERT(ddp->dd_ref > 1);
1105 	ddp->dd_ref--;
1106 	if ((ddp->dd_flags & DD_IMPLICIT_IPTUN) && ddp->dd_ref == 1) {
1107 		mutex_exit(&ddp->dd_mutex);
1108 		if (i_dls_devnet_destroy_iptun(ddp->dd_linkid) != 0)
1109 			ddp->dd_flags |= DD_IMPLICIT_IPTUN;
1110 		return;
1111 	}
1112 	mutex_exit(&ddp->dd_mutex);
1113 }
1114 
1115 static int
1116 dls_devnet_hold_by_name(const char *link, dls_devnet_t **ddpp)
1117 {
1118 	char			drv[MAXLINKNAMELEN];
1119 	uint_t			ppa;
1120 	major_t			major;
1121 	dev_t			phy_dev, tmp_dev;
1122 	datalink_id_t		linkid;
1123 	dls_dev_handle_t	ddh;
1124 	int			err;
1125 
1126 	if ((err = dls_mgmt_get_linkid(link, &linkid)) == 0)
1127 		return (dls_devnet_hold(linkid, ddpp));
1128 
1129 	/*
1130 	 * If we failed to get the link's linkid because the dlmgmtd daemon
1131 	 * has not been started, return ENOENT so that the application can
1132 	 * fallback to open the /dev node.
1133 	 */
1134 	if (err == EBADF)
1135 		return (ENOENT);
1136 
1137 	if (err != ENOENT)
1138 		return (err);
1139 
1140 	if (ddi_parse(link, drv, &ppa) != DDI_SUCCESS)
1141 		return (ENOENT);
1142 
1143 	if (IS_IPTUN_LINK(drv)) {
1144 		if ((err = i_dls_devnet_create_iptun(link, drv, &linkid)) != 0)
1145 			return (err);
1146 		/*
1147 		 * At this point, an IP tunnel MAC has registered, which
1148 		 * resulted in a link being created.
1149 		 */
1150 		err = dls_devnet_hold(linkid, ddpp);
1151 		ASSERT(err == 0);
1152 		if (err != 0) {
1153 			VERIFY(i_dls_devnet_destroy_iptun(linkid) == 0);
1154 			return (err);
1155 		}
1156 		/*
1157 		 * dls_devnet_rele() will know to destroy the implicit IP
1158 		 * tunnel on last reference release if DD_IMPLICIT_IPTUN is
1159 		 * set.
1160 		 */
1161 		(*ddpp)->dd_flags |= DD_IMPLICIT_IPTUN;
1162 		return (0);
1163 	}
1164 
1165 	/*
1166 	 * If this link:
1167 	 * (a) is a physical device, (b) this is the first boot, (c) the MAC
1168 	 * is not registered yet, and (d) we cannot find its linkid, then the
1169 	 * linkname is the same as the devname.
1170 	 *
1171 	 * First filter out invalid names.
1172 	 */
1173 	if ((major = ddi_name_to_major(drv)) == (major_t)-1)
1174 		return (ENOENT);
1175 
1176 	phy_dev = makedevice(major, DLS_PPA2MINOR(ppa));
1177 	if (softmac_hold_device(phy_dev, &ddh) != 0)
1178 		return (ENOENT);
1179 
1180 	/*
1181 	 * At this time, the MAC should be registered, check its phy_dev using
1182 	 * the given name.
1183 	 */
1184 	if ((err = dls_mgmt_get_linkid(link, &linkid)) != 0 ||
1185 	    (err = dls_mgmt_get_phydev(linkid, &tmp_dev)) != 0) {
1186 		softmac_rele_device(ddh);
1187 		return (err);
1188 	}
1189 	if (tmp_dev != phy_dev) {
1190 		softmac_rele_device(ddh);
1191 		return (ENOENT);
1192 	}
1193 
1194 	err = dls_devnet_hold(linkid, ddpp);
1195 	softmac_rele_device(ddh);
1196 	return (err);
1197 }
1198 
1199 int
1200 dls_devnet_macname2linkid(const char *macname, datalink_id_t *linkidp)
1201 {
1202 	dls_devnet_t	*ddp;
1203 
1204 	rw_enter(&i_dls_devnet_lock, RW_READER);
1205 	if (mod_hash_find(i_dls_devnet_hash, (mod_hash_key_t)macname,
1206 	    (mod_hash_val_t *)&ddp) != 0) {
1207 		rw_exit(&i_dls_devnet_lock);
1208 		return (ENOENT);
1209 	}
1210 
1211 	*linkidp = ddp->dd_linkid;
1212 	rw_exit(&i_dls_devnet_lock);
1213 	return (0);
1214 }
1215 
1216 /*
1217  * Get linkid for the given dev.
1218  */
1219 int
1220 dls_devnet_dev2linkid(dev_t dev, datalink_id_t *linkidp)
1221 {
1222 	char	macname[MAXNAMELEN];
1223 	char	*drv;
1224 
1225 	if ((drv = ddi_major_to_name(getmajor(dev))) == NULL)
1226 		return (EINVAL);
1227 
1228 	(void) snprintf(macname, sizeof (macname), "%s%d", drv,
1229 	    DLS_MINOR2INST(getminor(dev)));
1230 	return (dls_devnet_macname2linkid(macname, linkidp));
1231 }
1232 
1233 /*
1234  * Get the link's physical dev_t. It this is a VLAN, get the dev_t of the
1235  * link this VLAN is created on.
1236  */
1237 int
1238 dls_devnet_phydev(datalink_id_t vlanid, dev_t *devp)
1239 {
1240 	dls_devnet_t	*ddp;
1241 	int		err;
1242 
1243 	if ((err = dls_devnet_hold_tmp(vlanid, &ddp)) != 0)
1244 		return (err);
1245 
1246 	err = dls_mgmt_get_phydev(ddp->dd_linkid, devp);
1247 	dls_devnet_rele_tmp(ddp);
1248 	return (err);
1249 }
1250 
1251 /*
1252  * Handle the renaming requests.  There are two rename cases:
1253  *
1254  * 1. Request to rename a valid link (id1) to an non-existent link name
1255  *    (id2). In this case id2 is DATALINK_INVALID_LINKID.  Just check whether
1256  *    id1 is held by any applications.
1257  *
1258  *    In this case, the link's kstats need to be updated using the given name.
1259  *
1260  * 2. Request to rename a valid link (id1) to the name of a REMOVED
1261  *    physical link (id2). In this case, check that id1 and its associated
1262  *    mac is not held by any application, and update the link's linkid to id2.
1263  *
1264  *    This case does not change the <link name, linkid> mapping, so the link's
1265  *    kstats need to be updated with using name associated the given id2.
1266  */
1267 int
1268 dls_devnet_rename(datalink_id_t id1, datalink_id_t id2, const char *link)
1269 {
1270 	dls_dev_handle_t	ddh = NULL;
1271 	int			err = 0;
1272 	dev_t			phydev = 0;
1273 	dls_devnet_t		*ddp;
1274 	mac_perim_handle_t	mph = NULL;
1275 	mac_handle_t		mh;
1276 	mod_hash_val_t		val;
1277 	boolean_t		clear_dd_flag = B_FALSE;
1278 
1279 	/*
1280 	 * In the second case, id2 must be a REMOVED physical link.
1281 	 */
1282 	if ((id2 != DATALINK_INVALID_LINKID) &&
1283 	    (dls_mgmt_get_phydev(id2, &phydev) == 0) &&
1284 	    softmac_hold_device(phydev, &ddh) == 0) {
1285 		softmac_rele_device(ddh);
1286 		return (EEXIST);
1287 	}
1288 
1289 	/*
1290 	 * Hold id1 to prevent it from being detached (if a physical link).
1291 	 */
1292 	if (dls_mgmt_get_phydev(id1, &phydev) == 0)
1293 		(void) softmac_hold_device(phydev, &ddh);
1294 
1295 	/*
1296 	 * The framework does not hold hold locks across calls to the
1297 	 * mac perimeter, hence enter the perimeter first. This also waits
1298 	 * for the property loading to finish.
1299 	 */
1300 	if ((err = mac_perim_enter_by_linkid(id1, &mph)) != 0) {
1301 		softmac_rele_device(ddh);
1302 		return (err);
1303 	}
1304 
1305 	rw_enter(&i_dls_devnet_lock, RW_WRITER);
1306 	if ((err = mod_hash_find(i_dls_devnet_id_hash,
1307 	    (mod_hash_key_t)(uintptr_t)id1, (mod_hash_val_t *)&ddp)) != 0) {
1308 		ASSERT(err == MH_ERR_NOTFOUND);
1309 		err = ENOENT;
1310 		goto done;
1311 	}
1312 
1313 	/*
1314 	 * Return EBUSY if any applications have this link open, if any thread
1315 	 * is currently accessing the link kstats, or if the link is on-loan
1316 	 * to a non-global zone. Then set the DD_KSTAT_CHANGING flag to
1317 	 * prevent any access to the kstats while we delete and recreate
1318 	 * kstats below.
1319 	 */
1320 	mutex_enter(&ddp->dd_mutex);
1321 	if (ddp->dd_ref > 1) {
1322 		mutex_exit(&ddp->dd_mutex);
1323 		err = EBUSY;
1324 		goto done;
1325 	}
1326 
1327 	ddp->dd_flags |= DD_KSTAT_CHANGING;
1328 	clear_dd_flag = B_TRUE;
1329 	mutex_exit(&ddp->dd_mutex);
1330 
1331 	if (id2 == DATALINK_INVALID_LINKID) {
1332 		(void) strlcpy(ddp->dd_linkname, link,
1333 		    sizeof (ddp->dd_linkname));
1334 
1335 		/* rename mac client name and its flow if exists */
1336 		if ((err = mac_open(ddp->dd_mac, &mh)) != 0)
1337 			goto done;
1338 		(void) mac_rename_primary(mh, link);
1339 		mac_close(mh);
1340 		goto done;
1341 	}
1342 
1343 	/*
1344 	 * The second case, check whether the MAC is used by any MAC
1345 	 * user.  This must be a physical link so ddh must not be NULL.
1346 	 */
1347 	if (ddh == NULL) {
1348 		err = EINVAL;
1349 		goto done;
1350 	}
1351 
1352 	if ((err = mac_open(ddp->dd_mac, &mh)) != 0)
1353 		goto done;
1354 
1355 	/*
1356 	 * We release the reference of the MAC which mac_open() is
1357 	 * holding. Note that this mac will not be unregistered
1358 	 * because the physical device is held.
1359 	 */
1360 	mac_close(mh);
1361 
1362 	/*
1363 	 * Check if there is any other MAC clients, if not, hold this mac
1364 	 * exclusively until we are done.
1365 	 */
1366 	if ((err = mac_mark_exclusive(mh)) != 0)
1367 		goto done;
1368 
1369 	/*
1370 	 * Update the link's linkid.
1371 	 */
1372 	if ((err = mod_hash_find(i_dls_devnet_id_hash,
1373 	    (mod_hash_key_t)(uintptr_t)id2, &val)) != MH_ERR_NOTFOUND) {
1374 		mac_unmark_exclusive(mh);
1375 		err = EEXIST;
1376 		goto done;
1377 	}
1378 
1379 	err = dls_mgmt_get_linkinfo(id2, ddp->dd_linkname, NULL, NULL, NULL);
1380 	if (err != 0) {
1381 		mac_unmark_exclusive(mh);
1382 		goto done;
1383 	}
1384 
1385 	(void) mod_hash_remove(i_dls_devnet_id_hash,
1386 	    (mod_hash_key_t)(uintptr_t)id1, &val);
1387 
1388 	ddp->dd_linkid = id2;
1389 	(void) mod_hash_insert(i_dls_devnet_id_hash,
1390 	    (mod_hash_key_t)(uintptr_t)ddp->dd_linkid, (mod_hash_val_t)ddp);
1391 
1392 	mac_unmark_exclusive(mh);
1393 
1394 	/* load properties for new id */
1395 	mutex_enter(&ddp->dd_mutex);
1396 	ddp->dd_prop_loaded = B_FALSE;
1397 	ddp->dd_prop_taskid = taskq_dispatch(system_taskq,
1398 	    dls_devnet_prop_task, ddp, TQ_SLEEP);
1399 	mutex_exit(&ddp->dd_mutex);
1400 
1401 done:
1402 	/*
1403 	 * Change the name of the kstat based on the new link name.
1404 	 * We can't hold the i_dls_devnet_lock across calls to the kstat
1405 	 * subsystem. Instead the DD_KSTAT_CHANGING flag set above in this
1406 	 * function prevents any access to the dd_ksp while we delete and
1407 	 * recreate it below.
1408 	 */
1409 	rw_exit(&i_dls_devnet_lock);
1410 	if (err == 0)
1411 		dls_devnet_stat_rename(ddp);
1412 
1413 	if (clear_dd_flag) {
1414 		mutex_enter(&ddp->dd_mutex);
1415 		ddp->dd_flags &= ~DD_KSTAT_CHANGING;
1416 		mutex_exit(&ddp->dd_mutex);
1417 	}
1418 
1419 	if (mph != NULL)
1420 		mac_perim_exit(mph);
1421 	softmac_rele_device(ddh);
1422 	return (err);
1423 }
1424 
1425 static int
1426 i_dls_devnet_setzid(dls_devnet_t *ddp, zoneid_t new_zoneid, boolean_t setprop)
1427 {
1428 	int			err;
1429 	mac_perim_handle_t	mph;
1430 	boolean_t		upcall_done = B_FALSE;
1431 	datalink_id_t		linkid = ddp->dd_linkid;
1432 	zoneid_t		old_zoneid = ddp->dd_zid;
1433 	dlmgmt_door_setzoneid_t	setzid;
1434 	dlmgmt_setzoneid_retval_t retval;
1435 
1436 	if (old_zoneid == new_zoneid)
1437 		return (0);
1438 
1439 	if ((err = mac_perim_enter_by_macname(ddp->dd_mac, &mph)) != 0)
1440 		return (err);
1441 
1442 	/*
1443 	 * When changing the zoneid of an existing link, we need to tell
1444 	 * dlmgmtd about it.  dlmgmtd already knows the zoneid associated with
1445 	 * newly created links.
1446 	 */
1447 	if (setprop) {
1448 		setzid.ld_cmd = DLMGMT_CMD_SETZONEID;
1449 		setzid.ld_linkid = linkid;
1450 		setzid.ld_zoneid = new_zoneid;
1451 		err = i_dls_mgmt_upcall(&setzid, sizeof (setzid), &retval,
1452 		    sizeof (retval));
1453 		if (err != 0)
1454 			goto done;
1455 		upcall_done = B_TRUE;
1456 	}
1457 	if ((err = dls_link_setzid(ddp->dd_mac, new_zoneid)) == 0) {
1458 		ddp->dd_zid = new_zoneid;
1459 		devnet_need_rebuild = B_TRUE;
1460 	}
1461 
1462 done:
1463 	if (err != 0 && upcall_done) {
1464 		setzid.ld_zoneid = old_zoneid;
1465 		(void) i_dls_mgmt_upcall(&setzid, sizeof (setzid), &retval,
1466 		    sizeof (retval));
1467 	}
1468 	mac_perim_exit(mph);
1469 	return (err);
1470 }
1471 
1472 int
1473 dls_devnet_setzid(dls_dl_handle_t ddh, zoneid_t new_zid)
1474 {
1475 	dls_devnet_t	*ddp;
1476 	int		err;
1477 	zoneid_t	old_zid;
1478 	boolean_t	refheld = B_FALSE;
1479 
1480 	old_zid = ddh->dd_zid;
1481 
1482 	if (old_zid == new_zid)
1483 		return (0);
1484 
1485 	/*
1486 	 * Acquire an additional reference to the link if it is being assigned
1487 	 * to a non-global zone from the global zone.
1488 	 */
1489 	if (old_zid == GLOBAL_ZONEID && new_zid != GLOBAL_ZONEID) {
1490 		if ((err = dls_devnet_hold(ddh->dd_linkid, &ddp)) != 0)
1491 			return (err);
1492 		refheld = B_TRUE;
1493 	}
1494 
1495 	if ((err = i_dls_devnet_setzid(ddh, new_zid, B_TRUE)) != 0) {
1496 		if (refheld)
1497 			dls_devnet_rele(ddp);
1498 		return (err);
1499 	}
1500 
1501 	/*
1502 	 * Release the additional reference if the link is returning to the
1503 	 * global zone from a non-global zone.
1504 	 */
1505 	if (old_zid != GLOBAL_ZONEID && new_zid == GLOBAL_ZONEID)
1506 		dls_devnet_rele(ddh);
1507 
1508 	/* Re-create kstats in the appropriate zones. */
1509 	if (old_zid != GLOBAL_ZONEID)
1510 		dls_devnet_stat_destroy(ddh, old_zid);
1511 	if (new_zid != GLOBAL_ZONEID)
1512 		dls_devnet_stat_create(ddh, new_zid);
1513 
1514 	return (0);
1515 }
1516 
1517 zoneid_t
1518 dls_devnet_getzid(dls_dl_handle_t ddh)
1519 {
1520 	return (((dls_devnet_t *)ddh)->dd_zid);
1521 }
1522 
1523 zoneid_t
1524 dls_devnet_getownerzid(dls_dl_handle_t ddh)
1525 {
1526 	return (((dls_devnet_t *)ddh)->dd_owner_zid);
1527 }
1528 
1529 /*
1530  * Is linkid visible from zoneid?  A link is visible if it was created in the
1531  * zone, or if it is currently assigned to the zone.
1532  */
1533 boolean_t
1534 dls_devnet_islinkvisible(datalink_id_t linkid, zoneid_t zoneid)
1535 {
1536 	dls_devnet_t	*ddp;
1537 	boolean_t	result;
1538 
1539 	if (dls_devnet_hold_tmp(linkid, &ddp) != 0)
1540 		return (B_FALSE);
1541 	result = (ddp->dd_owner_zid == zoneid || ddp->dd_zid == zoneid);
1542 	dls_devnet_rele_tmp(ddp);
1543 	return (result);
1544 }
1545 
1546 /*
1547  * Access a vanity naming node.
1548  */
1549 int
1550 dls_devnet_open(const char *link, dls_dl_handle_t *dhp, dev_t *devp)
1551 {
1552 	dls_devnet_t	*ddp;
1553 	dls_link_t	*dlp;
1554 	zoneid_t	zid = getzoneid();
1555 	int		err;
1556 	mac_perim_handle_t	mph;
1557 
1558 	if ((err = dls_devnet_hold_by_name(link, &ddp)) != 0)
1559 		return (err);
1560 
1561 	dls_devnet_prop_task_wait(ddp);
1562 
1563 	/*
1564 	 * Opening a link that does not belong to the current non-global zone
1565 	 * is not allowed.
1566 	 */
1567 	if (zid != GLOBAL_ZONEID && ddp->dd_zid != zid) {
1568 		dls_devnet_rele(ddp);
1569 		return (ENOENT);
1570 	}
1571 
1572 	err = mac_perim_enter_by_macname(ddp->dd_mac, &mph);
1573 	if (err != 0) {
1574 		dls_devnet_rele(ddp);
1575 		return (err);
1576 	}
1577 
1578 	err = dls_link_hold_create(ddp->dd_mac, &dlp);
1579 	mac_perim_exit(mph);
1580 
1581 	if (err != 0) {
1582 		dls_devnet_rele(ddp);
1583 		return (err);
1584 	}
1585 
1586 	*dhp = ddp;
1587 	*devp = dls_link_dev(dlp);
1588 	return (0);
1589 }
1590 
1591 /*
1592  * Close access to a vanity naming node.
1593  */
1594 void
1595 dls_devnet_close(dls_dl_handle_t dlh)
1596 {
1597 	dls_devnet_t	*ddp = dlh;
1598 	dls_link_t	*dlp;
1599 	mac_perim_handle_t	mph;
1600 
1601 	VERIFY(mac_perim_enter_by_macname(ddp->dd_mac, &mph) == 0);
1602 	VERIFY(dls_link_hold(ddp->dd_mac, &dlp) == 0);
1603 
1604 	/*
1605 	 * One rele for the hold placed in dls_devnet_open, another for
1606 	 * the hold done just above
1607 	 */
1608 	dls_link_rele(dlp);
1609 	dls_link_rele(dlp);
1610 	mac_perim_exit(mph);
1611 
1612 	dls_devnet_rele(ddp);
1613 }
1614 
1615 /*
1616  * This is used by /dev/net to rebuild the nodes for readdir().  It is not
1617  * critical and no protection is needed.
1618  */
1619 boolean_t
1620 dls_devnet_rebuild()
1621 {
1622 	boolean_t updated = devnet_need_rebuild;
1623 
1624 	devnet_need_rebuild = B_FALSE;
1625 	return (updated);
1626 }
1627 
1628 int
1629 dls_devnet_create(mac_handle_t mh, datalink_id_t linkid, zoneid_t zoneid)
1630 {
1631 	dls_link_t	*dlp;
1632 	dls_devnet_t	*ddp;
1633 	int		err;
1634 	mac_perim_handle_t mph;
1635 
1636 	/*
1637 	 * Holding the mac perimeter ensures that the downcall from the
1638 	 * dlmgmt daemon which does the property loading does not proceed
1639 	 * until we relinquish the perimeter.
1640 	 */
1641 	mac_perim_enter_by_mh(mh, &mph);
1642 	/*
1643 	 * Make this association before we call dls_link_hold_create as
1644 	 * we need to use the linkid to get the user name for the link
1645 	 * when we create the MAC client.
1646 	 */
1647 	if ((err = dls_devnet_set(mac_name(mh), linkid, zoneid, &ddp)) == 0) {
1648 		if ((err = dls_link_hold_create(mac_name(mh), &dlp)) != 0) {
1649 			mac_perim_exit(mph);
1650 			(void) dls_devnet_unset(mac_name(mh), &linkid, B_TRUE);
1651 			return (err);
1652 		}
1653 	}
1654 	/*
1655 	 * Tell BPF it is here, if BPF is there
1656 	 */
1657 	if (dls_bpfattach_fn != NULL) {
1658 		/*
1659 		 * The zoneid is passed in explicitly to prevent the need to
1660 		 * do a lookup in dls using the linkid. Such a lookup would need
1661 		 * to use the same hash table that gets used for walking when
1662 		 * dls_set_bpfattach() is called.
1663 		 */
1664 		dls_bpfattach_fn((uintptr_t)mh, mac_type(mh),
1665 		    dlp->dl_zid, BPR_MAC);
1666 	}
1667 	mac_perim_exit(mph);
1668 	return (err);
1669 }
1670 
1671 /*
1672  * Set the linkid of the dls_devnet_t and add it into the i_dls_devnet_id_hash.
1673  * This is called in the case that the dlmgmtd daemon is started later than
1674  * the physical devices get attached, and the linkid is only known after the
1675  * daemon starts.
1676  */
1677 int
1678 dls_devnet_recreate(mac_handle_t mh, datalink_id_t linkid)
1679 {
1680 	ASSERT(linkid != DATALINK_INVALID_LINKID);
1681 	return (dls_devnet_set(mac_name(mh), linkid, GLOBAL_ZONEID, NULL));
1682 }
1683 
1684 int
1685 dls_devnet_destroy(mac_handle_t mh, datalink_id_t *idp, boolean_t wait)
1686 {
1687 	int			err;
1688 	mac_perim_handle_t	mph;
1689 
1690 	*idp = DATALINK_INVALID_LINKID;
1691 	err = dls_devnet_unset(mac_name(mh), idp, wait);
1692 	if (err != 0 && err != ENOENT)
1693 		return (err);
1694 
1695 	/*
1696 	 * Tell BPF that the link is going away, if BPF is there.
1697 	 */
1698 	if (dls_bpfdetach_fn != NULL)
1699 		dls_bpfdetach_fn((uintptr_t)mh);
1700 
1701 	mac_perim_enter_by_mh(mh, &mph);
1702 	err = dls_link_rele_by_name(mac_name(mh));
1703 	mac_perim_exit(mph);
1704 
1705 	if (err != 0) {
1706 		/*
1707 		 * XXX It is a general GLDv3 bug that dls_devnet_set() has to
1708 		 * be called to re-set the link when destroy fails.  The
1709 		 * zoneid below will be incorrect if this function is ever
1710 		 * called from kernel context or from a zone other than that
1711 		 * which initially created the link.
1712 		 */
1713 		(void) dls_devnet_set(mac_name(mh), *idp, crgetzoneid(CRED()),
1714 		    NULL);
1715 	}
1716 	return (err);
1717 }
1718 
1719 /*
1720  * Implicitly create an IP tunnel link.
1721  */
1722 static int
1723 i_dls_devnet_create_iptun(const char *linkname, const char *drvname,
1724     datalink_id_t *linkid)
1725 {
1726 	int		err;
1727 	iptun_kparams_t	ik;
1728 	uint32_t	media;
1729 	netstack_t	*ns;
1730 	major_t		iptun_major;
1731 	dev_info_t	*iptun_dip;
1732 
1733 	/* First ensure that the iptun device is attached. */
1734 	if ((iptun_major = ddi_name_to_major(IPTUN_DRIVER_NAME)) == (major_t)-1)
1735 		return (EINVAL);
1736 	if ((iptun_dip = ddi_hold_devi_by_instance(iptun_major, 0, 0)) == NULL)
1737 		return (EINVAL);
1738 
1739 	if (IS_IPV4_TUN(drvname)) {
1740 		ik.iptun_kparam_type = IPTUN_TYPE_IPV4;
1741 		media = DL_IPV4;
1742 	} else if (IS_6TO4_TUN(drvname)) {
1743 		ik.iptun_kparam_type = IPTUN_TYPE_6TO4;
1744 		media = DL_6TO4;
1745 	} else if (IS_IPV6_TUN(drvname)) {
1746 		ik.iptun_kparam_type = IPTUN_TYPE_IPV6;
1747 		media = DL_IPV6;
1748 	}
1749 	ik.iptun_kparam_flags = (IPTUN_KPARAM_TYPE | IPTUN_KPARAM_IMPLICIT);
1750 
1751 	/* Obtain a datalink id for this tunnel. */
1752 	err = dls_mgmt_create((char *)linkname, 0, DATALINK_CLASS_IPTUN, media,
1753 	    B_FALSE, &ik.iptun_kparam_linkid);
1754 	if (err != 0) {
1755 		ddi_release_devi(iptun_dip);
1756 		return (err);
1757 	}
1758 
1759 	ns = netstack_get_current();
1760 	err = iptun_create(&ik, CRED());
1761 	netstack_rele(ns);
1762 
1763 	if (err != 0)
1764 		VERIFY(dls_mgmt_destroy(ik.iptun_kparam_linkid, B_FALSE) == 0);
1765 	else
1766 		*linkid = ik.iptun_kparam_linkid;
1767 
1768 	ddi_release_devi(iptun_dip);
1769 	return (err);
1770 }
1771 
1772 static int
1773 i_dls_devnet_destroy_iptun(datalink_id_t linkid)
1774 {
1775 	int err;
1776 
1777 	/*
1778 	 * Note the use of zone_kcred() here as opposed to CRED().  This is
1779 	 * because the process that does the last close of this /dev/net node
1780 	 * may not have necessary privileges to delete this IP tunnel, but the
1781 	 * tunnel must always be implicitly deleted on last close.
1782 	 */
1783 	if ((err = iptun_delete(linkid, zone_kcred())) == 0)
1784 		(void) dls_mgmt_destroy(linkid, B_FALSE);
1785 	return (err);
1786 }
1787 
1788 const char *
1789 dls_devnet_mac(dls_dl_handle_t ddh)
1790 {
1791 	return (ddh->dd_mac);
1792 }
1793 
1794 datalink_id_t
1795 dls_devnet_linkid(dls_dl_handle_t ddh)
1796 {
1797 	return (ddh->dd_linkid);
1798 }
1799 
1800 /*ARGSUSED*/
1801 static uint_t
1802 i_dls_bpfattach_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
1803 {
1804 	dls_link_t		*dlp = (dls_link_t *)val;
1805 
1806 	dls_bpfattach_fn((uintptr_t)dlp->dl_mh, mac_type(dlp->dl_mh),
1807 	    dlp->dl_zid, BPR_MAC);
1808 
1809 	return (MH_WALK_CONTINUE);
1810 }
1811 
1812 /*
1813  * Set the functions to call back to when adding or removing a mac so that
1814  * BPF can keep its internal list of these up to date.
1815  */
1816 void
1817 dls_set_bpfattach(bpf_attach_fn_t attach, bpf_detach_fn_t detach)
1818 {
1819 	bpf_attach_fn_t		old = dls_bpfattach_fn;
1820 
1821 	dls_bpfattach_fn = attach;
1822 	dls_bpfdetach_fn = detach;
1823 
1824 	/*
1825 	 * If we're setting a new attach function, call it for every
1826 	 * mac that has already been attached.
1827 	 */
1828 	if (attach != NULL && old == NULL) {
1829 		mod_hash_walk(i_dls_link_hash, i_dls_bpfattach_walker, NULL);
1830 	}
1831 }
1832