xref: /illumos-gate/usr/src/uts/common/io/dls/dls_mgmt.c (revision 9971fad5069e233a38fe553fcb414ddb0667adb5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  * Copyright (c) 2017 Joyent, Inc.
25  * Copyright 2025 Oxide Computer Company.
26  */
27 /*
28  * Copyright (c) 2016 by Delphix. All rights reserved.
29  */
30 
31 /*
32  * Datalink management routines.
33  */
34 
35 #include <sys/types.h>
36 #include <sys/door.h>
37 #include <sys/zone.h>
38 #include <sys/modctl.h>
39 #include <sys/file.h>
40 #include <sys/modhash.h>
41 #include <sys/kstat.h>
42 #include <sys/vnode.h>
43 #include <sys/cmn_err.h>
44 #include <sys/softmac.h>
45 #include <sys/dls.h>
46 #include <sys/dls_impl.h>
47 #include <sys/stropts.h>
48 #include <sys/netstack.h>
49 #include <inet/iptun/iptun_impl.h>
50 
51 /*
52  * This vanity name management module is treated as part of the GLD framework
53  * and we don't hold any GLD framework lock across a call to any mac
54  * function that needs to acquire the mac perimeter. The hierarchy is
55  * mac perimeter -> framework locks
56  */
57 
58 typedef struct dls_stack {
59 	zoneid_t	dlss_zoneid;
60 } dls_stack_t;
61 
62 static kmem_cache_t	*i_dls_devnet_cachep;
63 
64 /* Upcall door handle and its lock. */
65 static kmutex_t		i_dls_mgmt_lock;
66 static door_handle_t	dls_mgmt_dh = NULL;
67 
68 /*
69  * Any association of <macname, linkid> (set, rename) can require an upcall to
70  * the daemon for the link vanity name. We want set/rename/unset to be mutually
71  * exclusive from start to finish, but it is unsafe to hold a write on
72  * i_dls_devnet_hash_lock during an upcall. Enforce their exclusion using a
73  * separate lock from the hash tables.
74  *
75  * i_dls_devnet_hash_lock protects the hash tables themselves. Taking a write on
76  * it requires we first hold i_dls_devnet_lock. Thus, we can safely drop,
77  * reacquire, and upgrade/downgrade it so long as all table updates occur in a
78  * single write. If a write is intended i_dls_devnet_lock must be acquired
79  * before i_dls_devnet_hash_lock, leaving the valid lock patterns:
80  *  - i_dls_devnet_lock_enter -> i_dls_devnet_hashmap_write
81  *  - i_dls_devnet_lock_enter -> i_dls_devnet_hashmap_read
82  *  - i_dls_devnet_hashmap_read
83  * i_dls_devnet_hashmap_write enforces the first invariant.
84  */
85 static kmutex_t		i_dls_devnet_lock;
86 static kcondvar_t	i_dls_devnet_cv;
87 static kthread_t	*i_dls_devnet_own;
88 
89 static krwlock_t	i_dls_devnet_hash_lock;
90 static mod_hash_t	*i_dls_devnet_id_hash;
91 static mod_hash_t	*i_dls_devnet_hash;
92 
93 static void
i_dls_devnet_lock_enter(void)94 i_dls_devnet_lock_enter(void)
95 {
96 	mutex_enter(&i_dls_devnet_lock);
97 	while (i_dls_devnet_own != NULL) {
98 		cv_wait(&i_dls_devnet_cv, &i_dls_devnet_lock);
99 	}
100 }
101 
102 static void
i_dls_devnet_lock_exit(void)103 i_dls_devnet_lock_exit(void)
104 {
105 	VERIFY3P(i_dls_devnet_own, ==, NULL);
106 	cv_broadcast(&i_dls_devnet_cv);
107 	mutex_exit(&i_dls_devnet_lock);
108 }
109 
110 static void
i_dls_devnet_lock_upcall_start(void)111 i_dls_devnet_lock_upcall_start(void)
112 {
113 	VERIFY(MUTEX_HELD(&i_dls_devnet_lock));
114 	VERIFY3P(i_dls_devnet_own, ==, NULL);
115 	i_dls_devnet_own = curthread;
116 	mutex_exit(&i_dls_devnet_lock);
117 }
118 
119 static void
i_dls_devnet_lock_upcall_end(void)120 i_dls_devnet_lock_upcall_end(void)
121 {
122 	mutex_enter(&i_dls_devnet_lock);
123 	VERIFY3P(i_dls_devnet_own, ==, curthread);
124 	i_dls_devnet_own = NULL;
125 }
126 
127 static void
i_dls_devnet_hashmap_write(void)128 i_dls_devnet_hashmap_write(void)
129 {
130 	VERIFY(MUTEX_HELD(&i_dls_devnet_lock));
131 	rw_enter(&i_dls_devnet_hash_lock, RW_WRITER);
132 }
133 
134 static void
i_dls_devnet_hashmap_read(void)135 i_dls_devnet_hashmap_read(void)
136 {
137 	rw_enter(&i_dls_devnet_hash_lock, RW_READER);
138 }
139 
140 static void
i_dls_devnet_hashmap_exit(void)141 i_dls_devnet_hashmap_exit(void)
142 {
143 	rw_exit(&i_dls_devnet_hash_lock);
144 }
145 
146 boolean_t		devnet_need_rebuild;
147 
148 #define	VLAN_HASHSZ	67	/* prime */
149 
150 /*
151  * The following macros take a link name without the trailing PPA as input.
152  * Opening a /dev/net node with one of these names causes a tunnel link to be
153  * implicitly created in dls_devnet_hold_by_name() for backward compatibility
154  * with Solaris 10 and prior.
155  */
156 #define	IS_IPV4_TUN(name)	(strcmp((name), "ip.tun") == 0)
157 #define	IS_IPV6_TUN(name)	(strcmp((name), "ip6.tun") == 0)
158 #define	IS_6TO4_TUN(name)	(strcmp((name), "ip.6to4tun") == 0)
159 #define	IS_IPTUN_LINK(name)	(					\
160     IS_IPV4_TUN(name) || IS_IPV6_TUN(name) || IS_6TO4_TUN(name))
161 
162 /* dls_devnet_t dd_flags */
163 #define	DD_CONDEMNED		0x1
164 #define	DD_IMPLICIT_IPTUN	0x2 /* Implicitly-created ip*.*tun* tunnel */
165 #define	DD_INITIALIZING		0x4
166 
167 /*
168  * If the link is marked as initializing or condemned then it should
169  * not be visible outside of the DLS framework.
170  */
171 #define	DD_NOT_VISIBLE(flags)	(					\
172 	(flags & (DD_CONDEMNED | DD_INITIALIZING)) != 0)
173 
174 /*
175  * This structure is used to keep the <linkid, macname> mapping.
176  * This structure itself is not protected by the mac perimeter, but is
177  * protected by the dd_mutex and i_dls_devnet_hash_lock. Thus most of the
178  * functions manipulating this structure such as dls_devnet_set/unset etc.
179  * may be called while not holding the mac perimeter.
180  */
181 typedef struct dls_devnet_s {
182 	datalink_id_t	dd_linkid;
183 	char		dd_linkname[MAXLINKNAMELEN];
184 	char		dd_mac[MAXNAMELEN];
185 	kstat_t		*dd_ksp;	/* kstat in owner_zid */
186 	kstat_t		*dd_zone_ksp;	/* in dd_zid if != owner_zid */
187 	uint32_t	dd_ref;
188 	kmutex_t	dd_mutex;
189 	kcondvar_t	dd_cv;
190 	uint32_t	dd_tref;
191 	uint_t		dd_flags;
192 	zoneid_t	dd_owner_zid;	/* zone where node was created */
193 	zoneid_t	dd_zid;		/* current zone */
194 	boolean_t	dd_prop_loaded;
195 	taskqid_t	dd_prop_taskid;
196 	boolean_t	dd_transient;	/* link goes away when zone does */
197 } dls_devnet_t;
198 
199 static int i_dls_devnet_create_iptun(const char *, const char *,
200     datalink_id_t *);
201 static int i_dls_devnet_destroy_iptun(datalink_id_t);
202 static int i_dls_devnet_setzid(dls_devnet_t *, zoneid_t, boolean_t, boolean_t);
203 static int dls_devnet_unset(mac_handle_t, datalink_id_t *, boolean_t);
204 
205 /*ARGSUSED*/
206 static int
i_dls_devnet_constructor(void * buf,void * arg,int kmflag)207 i_dls_devnet_constructor(void *buf, void *arg, int kmflag)
208 {
209 	dls_devnet_t	*ddp = buf;
210 
211 	bzero(buf, sizeof (dls_devnet_t));
212 	mutex_init(&ddp->dd_mutex, NULL, MUTEX_DEFAULT, NULL);
213 	cv_init(&ddp->dd_cv, NULL, CV_DEFAULT, NULL);
214 	return (0);
215 }
216 
217 /*ARGSUSED*/
218 static void
i_dls_devnet_destructor(void * buf,void * arg)219 i_dls_devnet_destructor(void *buf, void *arg)
220 {
221 	dls_devnet_t	*ddp = buf;
222 
223 	VERIFY(ddp->dd_ksp == NULL);
224 	VERIFY(ddp->dd_ref == 0);
225 	VERIFY(ddp->dd_tref == 0);
226 	mutex_destroy(&ddp->dd_mutex);
227 	cv_destroy(&ddp->dd_cv);
228 }
229 
230 /* ARGSUSED */
231 static int
dls_zone_remove(datalink_id_t linkid,void * arg)232 dls_zone_remove(datalink_id_t linkid, void *arg)
233 {
234 	dls_devnet_t *ddp;
235 
236 	if (dls_devnet_hold_tmp(linkid, &ddp) == 0) {
237 		/*
238 		 * Don't bother moving transient links back to the global zone
239 		 * since we will simply delete them in dls_devnet_unset.
240 		 */
241 		if (!ddp->dd_transient)
242 			(void) dls_devnet_setzid(ddp, GLOBAL_ZONEID);
243 		dls_devnet_rele_tmp(ddp);
244 	}
245 	return (0);
246 }
247 
248 /* ARGSUSED */
249 static void *
dls_stack_init(netstackid_t stackid,netstack_t * ns)250 dls_stack_init(netstackid_t stackid, netstack_t *ns)
251 {
252 	dls_stack_t *dlss;
253 
254 	dlss = kmem_zalloc(sizeof (*dlss), KM_SLEEP);
255 	dlss->dlss_zoneid = netstackid_to_zoneid(stackid);
256 	return (dlss);
257 }
258 
259 /* ARGSUSED */
260 static void
dls_stack_shutdown(netstackid_t stackid,void * arg)261 dls_stack_shutdown(netstackid_t stackid, void *arg)
262 {
263 	dls_stack_t	*dlss = (dls_stack_t *)arg;
264 
265 	/* Move remaining datalinks in this zone back to the global zone. */
266 	(void) zone_datalink_walk(dlss->dlss_zoneid, dls_zone_remove, NULL);
267 }
268 
269 /* ARGSUSED */
270 static void
dls_stack_fini(netstackid_t stackid,void * arg)271 dls_stack_fini(netstackid_t stackid, void *arg)
272 {
273 	dls_stack_t	*dlss = (dls_stack_t *)arg;
274 
275 	kmem_free(dlss, sizeof (*dlss));
276 }
277 
278 /*
279  * Module initialization and finalization functions.
280  */
281 void
dls_mgmt_init(void)282 dls_mgmt_init(void)
283 {
284 	mutex_init(&i_dls_mgmt_lock, NULL, MUTEX_DEFAULT, NULL);
285 	mutex_init(&i_dls_devnet_lock, NULL, MUTEX_DEFAULT, NULL);
286 	cv_init(&i_dls_devnet_cv, NULL, CV_DEFAULT, NULL);
287 	i_dls_devnet_own = NULL;
288 	rw_init(&i_dls_devnet_hash_lock, NULL, RW_DEFAULT, NULL);
289 
290 	/*
291 	 * Create a kmem_cache of dls_devnet_t structures.
292 	 */
293 	i_dls_devnet_cachep = kmem_cache_create("dls_devnet_cache",
294 	    sizeof (dls_devnet_t), 0, i_dls_devnet_constructor,
295 	    i_dls_devnet_destructor, NULL, NULL, NULL, 0);
296 	ASSERT(i_dls_devnet_cachep != NULL);
297 
298 	/*
299 	 * Create a hash table, keyed by dd_linkid, of dls_devnet_t.
300 	 */
301 	i_dls_devnet_id_hash = mod_hash_create_idhash("dls_devnet_id_hash",
302 	    VLAN_HASHSZ, mod_hash_null_valdtor);
303 
304 	/*
305 	 * Create a hash table, keyed by dd_mac
306 	 */
307 	i_dls_devnet_hash = mod_hash_create_extended("dls_devnet_hash",
308 	    VLAN_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor,
309 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
310 
311 	devnet_need_rebuild = B_FALSE;
312 
313 	netstack_register(NS_DLS, dls_stack_init, dls_stack_shutdown,
314 	    dls_stack_fini);
315 }
316 
317 void
dls_mgmt_fini(void)318 dls_mgmt_fini(void)
319 {
320 	netstack_unregister(NS_DLS);
321 	mod_hash_destroy_hash(i_dls_devnet_hash);
322 	mod_hash_destroy_hash(i_dls_devnet_id_hash);
323 	kmem_cache_destroy(i_dls_devnet_cachep);
324 	rw_destroy(&i_dls_devnet_hash_lock);
325 	cv_destroy(&i_dls_devnet_cv);
326 	mutex_destroy(&i_dls_devnet_lock);
327 	mutex_destroy(&i_dls_mgmt_lock);
328 }
329 
330 int
dls_mgmt_door_set(boolean_t start)331 dls_mgmt_door_set(boolean_t start)
332 {
333 	int	err;
334 
335 	/* handle daemon restart */
336 	mutex_enter(&i_dls_mgmt_lock);
337 	if (dls_mgmt_dh != NULL) {
338 		door_ki_rele(dls_mgmt_dh);
339 		dls_mgmt_dh = NULL;
340 	}
341 
342 	if (start && ((err = door_ki_open(DLMGMT_DOOR, &dls_mgmt_dh)) != 0)) {
343 		mutex_exit(&i_dls_mgmt_lock);
344 		return (err);
345 	}
346 
347 	mutex_exit(&i_dls_mgmt_lock);
348 
349 	/*
350 	 * Create and associate <link name, linkid> mapping for network devices
351 	 * which are already attached before the daemon is started.
352 	 */
353 	if (start)
354 		softmac_recreate();
355 	return (0);
356 }
357 
358 static boolean_t
i_dls_mgmt_door_revoked(door_handle_t dh)359 i_dls_mgmt_door_revoked(door_handle_t dh)
360 {
361 	struct door_info info;
362 	extern int sys_shutdown;
363 
364 	ASSERT(dh != NULL);
365 
366 	if (sys_shutdown) {
367 		cmn_err(CE_NOTE, "dls_mgmt_door: shutdown observed\n");
368 		return (B_TRUE);
369 	}
370 
371 	if (door_ki_info(dh, &info) != 0)
372 		return (B_TRUE);
373 
374 	return ((info.di_attributes & DOOR_REVOKED) != 0);
375 }
376 
377 /*
378  * Upcall to the datalink management daemon (dlmgmtd).
379  */
380 static int
i_dls_mgmt_upcall(void * arg,size_t asize,void * rbuf,size_t rsize)381 i_dls_mgmt_upcall(void *arg, size_t asize, void *rbuf, size_t rsize)
382 {
383 	door_arg_t			darg, save_arg;
384 	door_handle_t			dh;
385 	int				err;
386 	int				retry = 0;
387 
388 #define	MAXRETRYNUM	3
389 
390 	ASSERT(arg);
391 	darg.data_ptr = arg;
392 	darg.data_size = asize;
393 	darg.desc_ptr = NULL;
394 	darg.desc_num = 0;
395 	darg.rbuf = rbuf;
396 	darg.rsize = rsize;
397 	save_arg = darg;
398 
399 retry:
400 	mutex_enter(&i_dls_mgmt_lock);
401 	dh = dls_mgmt_dh;
402 	if ((dh == NULL) || i_dls_mgmt_door_revoked(dh)) {
403 		mutex_exit(&i_dls_mgmt_lock);
404 		return (EBADF);
405 	}
406 	door_ki_hold(dh);
407 	mutex_exit(&i_dls_mgmt_lock);
408 
409 	for (;;) {
410 		retry++;
411 		if ((err = door_ki_upcall_limited(dh, &darg, zone_kcred(),
412 		    SIZE_MAX, 0)) == 0)
413 			break;
414 
415 		/*
416 		 * handle door call errors
417 		 */
418 		darg = save_arg;
419 		switch (err) {
420 		case EINTR:
421 			/*
422 			 * If the operation which caused this door upcall gets
423 			 * interrupted, return directly.
424 			 */
425 			goto done;
426 		case EAGAIN:
427 			/*
428 			 * Repeat upcall if the maximum attempt limit has not
429 			 * been reached.
430 			 */
431 			if (retry < MAXRETRYNUM) {
432 				delay(2 * hz);
433 				break;
434 			}
435 			cmn_err(CE_WARN, "dls: dlmgmtd fatal error %d\n", err);
436 			goto done;
437 		default:
438 			/* A fatal door error */
439 			if (i_dls_mgmt_door_revoked(dh)) {
440 				cmn_err(CE_NOTE,
441 				    "dls: dlmgmtd door service revoked\n");
442 
443 				if (retry < MAXRETRYNUM) {
444 					door_ki_rele(dh);
445 					goto retry;
446 				}
447 			}
448 			cmn_err(CE_WARN, "dls: dlmgmtd fatal error %d\n", err);
449 			goto done;
450 		}
451 	}
452 
453 	if (darg.rbuf != rbuf) {
454 		/*
455 		 * The size of the input rbuf was not big enough, so the
456 		 * upcall allocated the rbuf itself.  If this happens, assume
457 		 * that this was an invalid door call request.
458 		 */
459 		kmem_free(darg.rbuf, darg.rsize);
460 		err = ENOSPC;
461 		goto done;
462 	}
463 
464 	if (darg.rsize != rsize) {
465 		err = EINVAL;
466 		goto done;
467 	}
468 
469 	err = ((dlmgmt_retval_t *)rbuf)->lr_err;
470 
471 done:
472 	door_ki_rele(dh);
473 	return (err);
474 }
475 
476 /*
477  * Request the datalink management daemon to create a link with the attributes
478  * below.  Upon success, zero is returned and linkidp contains the linkid for
479  * the new link; otherwise, an errno is returned.
480  *
481  *     - dev		physical dev_t.  required for all physical links,
482  *		        including GLDv3 links.  It will be used to force the
483  *		        attachment of a physical device, hence the
484  *		        registration of its mac
485  *     - class		datalink class
486  *     - media type	media type; DL_OTHER means unknown
487  *     - persist	whether to persist the datalink
488  */
489 int
dls_mgmt_create(const char * devname,dev_t dev,datalink_class_t class,uint32_t media,boolean_t persist,datalink_id_t * linkidp)490 dls_mgmt_create(const char *devname, dev_t dev, datalink_class_t class,
491     uint32_t media, boolean_t persist, datalink_id_t *linkidp)
492 {
493 	dlmgmt_upcall_arg_create_t	create;
494 	dlmgmt_create_retval_t		retval;
495 	int				err;
496 
497 	create.ld_cmd = DLMGMT_CMD_DLS_CREATE;
498 	create.ld_class = class;
499 	create.ld_media = media;
500 	create.ld_phymaj = getmajor(dev);
501 	create.ld_phyinst = getminor(dev);
502 	create.ld_persist = persist;
503 	if (strlcpy(create.ld_devname, devname, sizeof (create.ld_devname)) >=
504 	    sizeof (create.ld_devname))
505 		return (EINVAL);
506 
507 	if ((err = i_dls_mgmt_upcall(&create, sizeof (create), &retval,
508 	    sizeof (retval))) == 0) {
509 		*linkidp = retval.lr_linkid;
510 	}
511 	return (err);
512 }
513 
514 /*
515  * Request the datalink management daemon to destroy the specified link.
516  * Returns zero upon success, or an errno upon failure.
517  */
518 int
dls_mgmt_destroy(datalink_id_t linkid,boolean_t persist)519 dls_mgmt_destroy(datalink_id_t linkid, boolean_t persist)
520 {
521 	dlmgmt_upcall_arg_destroy_t	destroy;
522 	dlmgmt_destroy_retval_t		retval;
523 
524 	destroy.ld_cmd = DLMGMT_CMD_DLS_DESTROY;
525 	destroy.ld_linkid = linkid;
526 	destroy.ld_persist = persist;
527 
528 	return (i_dls_mgmt_upcall(&destroy, sizeof (destroy),
529 	    &retval, sizeof (retval)));
530 }
531 
532 /*
533  * Request the datalink management daemon to verify/update the information
534  * for a physical link.  Upon success, get its linkid.
535  *
536  *     - media type	media type
537  *     - novanity	whether this physical datalink supports vanity naming.
538  *			physical links that do not use the GLDv3 MAC plugin
539  *			cannot suport vanity naming
540  *
541  * This function could fail with ENOENT or EEXIST.  Two cases return EEXIST:
542  *
543  * 1. A link with devname already exists, but the media type does not match.
544  *    In this case, mediap will bee set to the media type of the existing link.
545  * 2. A link with devname already exists, but its link name does not match
546  *    the device name, although this link does not support vanity naming.
547  */
548 int
dls_mgmt_update(const char * devname,uint32_t media,boolean_t novanity,uint32_t * mediap,datalink_id_t * linkidp)549 dls_mgmt_update(const char *devname, uint32_t media, boolean_t novanity,
550     uint32_t *mediap, datalink_id_t *linkidp)
551 {
552 	dlmgmt_upcall_arg_update_t	update;
553 	dlmgmt_update_retval_t		retval;
554 	int				err;
555 
556 	update.ld_cmd = DLMGMT_CMD_DLS_UPDATE;
557 
558 	if (strlcpy(update.ld_devname, devname, sizeof (update.ld_devname)) >=
559 	    sizeof (update.ld_devname))
560 		return (EINVAL);
561 
562 	update.ld_media = media;
563 	update.ld_novanity = novanity;
564 
565 	if ((err = i_dls_mgmt_upcall(&update, sizeof (update), &retval,
566 	    sizeof (retval))) == EEXIST) {
567 		*linkidp = retval.lr_linkid;
568 		*mediap = retval.lr_media;
569 	} else if (err == 0) {
570 		*linkidp = retval.lr_linkid;
571 	}
572 
573 	return (err);
574 }
575 
576 /*
577  * Request the datalink management daemon to get the information for a link.
578  * Returns zero upon success, or an errno upon failure.
579  *
580  * Only fills in information for argument pointers that are non-NULL.
581  * Note that the link argument is expected to be MAXLINKNAMELEN bytes.
582  */
583 int
dls_mgmt_get_linkinfo(datalink_id_t linkid,char * link,datalink_class_t * classp,uint32_t * mediap,uint32_t * flagsp)584 dls_mgmt_get_linkinfo(datalink_id_t linkid, char *link,
585     datalink_class_t *classp, uint32_t *mediap, uint32_t *flagsp)
586 {
587 	dlmgmt_door_getname_t	getname;
588 	dlmgmt_getname_retval_t	retval;
589 	int			err, len;
590 
591 	getname.ld_cmd = DLMGMT_CMD_GETNAME;
592 	getname.ld_linkid = linkid;
593 
594 	if ((err = i_dls_mgmt_upcall(&getname, sizeof (getname), &retval,
595 	    sizeof (retval))) != 0) {
596 		return (err);
597 	}
598 
599 	len = strlen(retval.lr_link);
600 	if (len <= 1 || len >= MAXLINKNAMELEN)
601 		return (EINVAL);
602 
603 	if (link != NULL)
604 		(void) strlcpy(link, retval.lr_link, MAXLINKNAMELEN);
605 	if (classp != NULL)
606 		*classp = retval.lr_class;
607 	if (mediap != NULL)
608 		*mediap = retval.lr_media;
609 	if (flagsp != NULL)
610 		*flagsp = retval.lr_flags;
611 	return (0);
612 }
613 
614 /*
615  * Request the datalink management daemon to get the linkid for a link.
616  * Returns a non-zero error code on failure.  The linkid argument is only
617  * set on success (when zero is returned.)
618  */
619 int
dls_mgmt_get_linkid(const char * link,datalink_id_t * linkid)620 dls_mgmt_get_linkid(const char *link, datalink_id_t *linkid)
621 {
622 	dlmgmt_door_getlinkid_t		getlinkid;
623 	dlmgmt_getlinkid_retval_t	retval;
624 	int				err;
625 
626 	getlinkid.ld_cmd = DLMGMT_CMD_GETLINKID;
627 	(void) strlcpy(getlinkid.ld_link, link, MAXLINKNAMELEN);
628 
629 	if ((err = i_dls_mgmt_upcall(&getlinkid, sizeof (getlinkid), &retval,
630 	    sizeof (retval))) == 0) {
631 		*linkid = retval.lr_linkid;
632 	}
633 	return (err);
634 }
635 
636 datalink_id_t
dls_mgmt_get_next(datalink_id_t linkid,datalink_class_t class,datalink_media_t dmedia,uint32_t flags)637 dls_mgmt_get_next(datalink_id_t linkid, datalink_class_t class,
638     datalink_media_t dmedia, uint32_t flags)
639 {
640 	dlmgmt_door_getnext_t	getnext;
641 	dlmgmt_getnext_retval_t	retval;
642 
643 	getnext.ld_cmd = DLMGMT_CMD_GETNEXT;
644 	getnext.ld_class = class;
645 	getnext.ld_dmedia = dmedia;
646 	getnext.ld_flags = flags;
647 	getnext.ld_linkid = linkid;
648 
649 	if (i_dls_mgmt_upcall(&getnext, sizeof (getnext), &retval,
650 	    sizeof (retval)) != 0) {
651 		return (DATALINK_INVALID_LINKID);
652 	}
653 
654 	return (retval.lr_linkid);
655 }
656 
657 static int
i_dls_mgmt_get_linkattr(const datalink_id_t linkid,const char * attr,void * attrval,size_t * attrszp)658 i_dls_mgmt_get_linkattr(const datalink_id_t linkid, const char *attr,
659     void *attrval, size_t *attrszp)
660 {
661 	dlmgmt_upcall_arg_getattr_t	getattr;
662 	dlmgmt_getattr_retval_t		retval;
663 	int				err;
664 
665 	getattr.ld_cmd = DLMGMT_CMD_DLS_GETATTR;
666 	getattr.ld_linkid = linkid;
667 	(void) strlcpy(getattr.ld_attr, attr, MAXLINKATTRLEN);
668 
669 	if ((err = i_dls_mgmt_upcall(&getattr, sizeof (getattr), &retval,
670 	    sizeof (retval))) == 0) {
671 		if (*attrszp < retval.lr_attrsz)
672 			return (EINVAL);
673 		*attrszp = retval.lr_attrsz;
674 		bcopy(retval.lr_attrval, attrval, retval.lr_attrsz);
675 	}
676 
677 	return (err);
678 }
679 
680 /*
681  * Note that this function can only get devp successfully for non-VLAN link.
682  */
683 int
dls_mgmt_get_phydev(datalink_id_t linkid,dev_t * devp)684 dls_mgmt_get_phydev(datalink_id_t linkid, dev_t *devp)
685 {
686 	uint64_t	maj, inst;
687 	size_t		attrsz = sizeof (uint64_t);
688 
689 	if (i_dls_mgmt_get_linkattr(linkid, FPHYMAJ, &maj, &attrsz) != 0 ||
690 	    attrsz != sizeof (uint64_t) ||
691 	    i_dls_mgmt_get_linkattr(linkid, FPHYINST, &inst, &attrsz) != 0 ||
692 	    attrsz != sizeof (uint64_t)) {
693 		return (EINVAL);
694 	}
695 
696 	*devp = makedevice((major_t)maj, (minor_t)inst);
697 	return (0);
698 }
699 
700 /*
701  * Request the datalink management daemon to push in
702  * all properties associated with the link.
703  * Returns a non-zero error code on failure.
704  */
705 int
dls_mgmt_linkprop_init(datalink_id_t linkid)706 dls_mgmt_linkprop_init(datalink_id_t linkid)
707 {
708 	dlmgmt_door_linkprop_init_t	li;
709 	dlmgmt_linkprop_init_retval_t	retval;
710 	int				err;
711 
712 	li.ld_cmd = DLMGMT_CMD_LINKPROP_INIT;
713 	li.ld_linkid = linkid;
714 
715 	err = i_dls_mgmt_upcall(&li, sizeof (li), &retval, sizeof (retval));
716 	return (err);
717 }
718 
719 static void
dls_devnet_prop_task(void * arg)720 dls_devnet_prop_task(void *arg)
721 {
722 	dls_devnet_t		*ddp = arg;
723 
724 	(void) dls_mgmt_linkprop_init(ddp->dd_linkid);
725 
726 	mutex_enter(&ddp->dd_mutex);
727 	ddp->dd_prop_loaded = B_TRUE;
728 	ddp->dd_prop_taskid = 0;
729 	cv_broadcast(&ddp->dd_cv);
730 	mutex_exit(&ddp->dd_mutex);
731 }
732 
733 /*
734  * Ensure property loading task is completed.
735  */
736 void
dls_devnet_prop_task_wait(dls_dl_handle_t ddp)737 dls_devnet_prop_task_wait(dls_dl_handle_t ddp)
738 {
739 	mutex_enter(&ddp->dd_mutex);
740 	while (ddp->dd_prop_taskid != 0)
741 		cv_wait(&ddp->dd_cv, &ddp->dd_mutex);
742 	mutex_exit(&ddp->dd_mutex);
743 }
744 
745 void
dls_devnet_rele_tmp(dls_dl_handle_t dlh)746 dls_devnet_rele_tmp(dls_dl_handle_t dlh)
747 {
748 	dls_devnet_t		*ddp = dlh;
749 
750 	mutex_enter(&ddp->dd_mutex);
751 	ASSERT(ddp->dd_tref != 0);
752 	if (--ddp->dd_tref == 0)
753 		cv_signal(&ddp->dd_cv);
754 	mutex_exit(&ddp->dd_mutex);
755 }
756 
757 int
dls_devnet_hold_link(datalink_id_t linkid,dls_dl_handle_t * ddhp,dls_link_t ** dlpp)758 dls_devnet_hold_link(datalink_id_t linkid, dls_dl_handle_t *ddhp,
759     dls_link_t **dlpp)
760 {
761 	dls_dl_handle_t	dlh;
762 	dls_link_t	*dlp;
763 	int		err;
764 
765 	if ((err = dls_devnet_hold_tmp(linkid, &dlh)) != 0)
766 		return (err);
767 
768 	if ((err = dls_link_hold(dls_devnet_mac(dlh), &dlp)) != 0) {
769 		dls_devnet_rele_tmp(dlh);
770 		return (err);
771 	}
772 
773 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
774 
775 	*ddhp = dlh;
776 	*dlpp = dlp;
777 	return (0);
778 }
779 
780 void
dls_devnet_rele_link(dls_dl_handle_t dlh,dls_link_t * dlp)781 dls_devnet_rele_link(dls_dl_handle_t dlh, dls_link_t *dlp)
782 {
783 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
784 
785 	dls_link_rele(dlp);
786 	dls_devnet_rele_tmp(dlh);
787 }
788 
789 /*
790  * "link" kstats related functions.
791  */
792 
793 /*
794  * Query the "link" kstats.
795  *
796  * We may be called from the kstat subsystem in an arbitrary context.
797  * If the caller is the stack, the context could be an upcall data
798  * thread. Hence we can't acquire the mac perimeter in this function
799  * for fear of deadlock.
800  */
801 static int
dls_devnet_stat_update(kstat_t * ksp,int rw)802 dls_devnet_stat_update(kstat_t *ksp, int rw)
803 {
804 	datalink_id_t	linkid = (datalink_id_t)(uintptr_t)ksp->ks_private;
805 	dls_devnet_t	*ddp;
806 	dls_link_t	*dlp;
807 	int		err;
808 
809 	if ((err = dls_devnet_hold_tmp(linkid, &ddp)) != 0) {
810 		return (err);
811 	}
812 
813 	/*
814 	 * If a device detach happens at this time, it will block in
815 	 * dls_devnet_unset since the dd_tref has been bumped in
816 	 * dls_devnet_hold_tmp(). So the access to 'dlp' is safe even though
817 	 * we don't hold the mac perimeter.
818 	 */
819 	if (mod_hash_find(i_dls_link_hash, (mod_hash_key_t)ddp->dd_mac,
820 	    (mod_hash_val_t *)&dlp) != 0) {
821 		dls_devnet_rele_tmp(ddp);
822 		return (ENOENT);
823 	}
824 
825 	err = dls_stat_update(ksp, dlp, rw);
826 
827 	dls_devnet_rele_tmp(ddp);
828 	return (err);
829 }
830 
831 /*
832  * Create the "link" kstats.
833  */
834 static void
dls_devnet_stat_create(dls_devnet_t * ddp,zoneid_t zoneid)835 dls_devnet_stat_create(dls_devnet_t *ddp, zoneid_t zoneid)
836 {
837 	kstat_t	*ksp;
838 
839 	if (dls_stat_create("link", 0, ddp->dd_linkname, zoneid,
840 	    dls_devnet_stat_update, (void *)(uintptr_t)ddp->dd_linkid,
841 	    &ksp) == 0) {
842 		ASSERT(ksp != NULL);
843 		if (zoneid == ddp->dd_owner_zid) {
844 			ASSERT(ddp->dd_ksp == NULL);
845 			ddp->dd_ksp = ksp;
846 		} else {
847 			ASSERT(ddp->dd_zone_ksp == NULL);
848 			ddp->dd_zone_ksp = ksp;
849 		}
850 	}
851 }
852 
853 /*
854  * Destroy the "link" kstats.
855  */
856 static void
dls_devnet_stat_destroy(dls_devnet_t * ddp,zoneid_t zoneid)857 dls_devnet_stat_destroy(dls_devnet_t *ddp, zoneid_t zoneid)
858 {
859 	if (zoneid == ddp->dd_owner_zid) {
860 		if (ddp->dd_ksp != NULL) {
861 			kstat_delete(ddp->dd_ksp);
862 			ddp->dd_ksp = NULL;
863 		}
864 	} else {
865 		if (ddp->dd_zone_ksp != NULL) {
866 			kstat_delete(ddp->dd_zone_ksp);
867 			ddp->dd_zone_ksp = NULL;
868 		}
869 	}
870 }
871 
872 /*
873  * The link has been renamed. Destroy the old non-legacy kstats ("link kstats")
874  * and create the new set using the new name.
875  */
876 static void
dls_devnet_stat_rename(dls_devnet_t * ddp)877 dls_devnet_stat_rename(dls_devnet_t *ddp)
878 {
879 	if (ddp->dd_ksp != NULL) {
880 		kstat_delete(ddp->dd_ksp);
881 		ddp->dd_ksp = NULL;
882 	}
883 	/* We can't rename a link while it's assigned to a non-global zone. */
884 	ASSERT(ddp->dd_zone_ksp == NULL);
885 	dls_devnet_stat_create(ddp, ddp->dd_owner_zid);
886 }
887 
888 /*
889  * Associate the linkid with the link identified by macname. If this
890  * is called on behalf of a physical link then linkid may be
891  * DATALINK_INVALID_LINKID. Otherwise, if called on behalf of a
892  * virtual link, linkid must have a value.
893  */
894 static int
dls_devnet_set(mac_handle_t mh,datalink_id_t linkid,zoneid_t zoneid,dls_devnet_t ** ddpp)895 dls_devnet_set(mac_handle_t mh, datalink_id_t linkid, zoneid_t zoneid,
896     dls_devnet_t **ddpp)
897 {
898 	const char		*macname = mac_name(mh);
899 	dls_devnet_t		*ddp = NULL;
900 	datalink_class_t	class;
901 	int			err;
902 	boolean_t		stat_create = B_FALSE;
903 	char			linkname[MAXLINKNAMELEN];
904 
905 	i_dls_devnet_lock_enter();
906 
907 	/*
908 	 * Don't allow callers to set a link name with a linkid that already
909 	 * has a name association (that's what rename is for).
910 	 */
911 	if (linkid != DATALINK_INVALID_LINKID) {
912 		/*
913 		 * This temporary read access is valid, as no other set/rename
914 		 * operation can attempt an insert on the same linkid while
915 		 * i_dls_devnet_lock is held.
916 		 */
917 		i_dls_devnet_hashmap_read();
918 		if (mod_hash_find(i_dls_devnet_id_hash,
919 		    (mod_hash_key_t)(uintptr_t)linkid,
920 		    (mod_hash_val_t *)&ddp) == 0) {
921 			err = EEXIST;
922 			goto done;
923 		}
924 		i_dls_devnet_hashmap_exit();
925 
926 		i_dls_devnet_lock_upcall_start();
927 		err = dls_mgmt_get_linkinfo(linkid, linkname, &class,
928 		    NULL, NULL);
929 		i_dls_devnet_lock_upcall_end();
930 
931 		if (err != 0)
932 			goto done_rw_unlocked;
933 	}
934 
935 	i_dls_devnet_hashmap_write();
936 
937 	if ((err = mod_hash_find(i_dls_devnet_hash,
938 	    (mod_hash_key_t)macname, (mod_hash_val_t *)&ddp)) == 0) {
939 		if (ddp->dd_linkid != DATALINK_INVALID_LINKID) {
940 			err = EEXIST;
941 			goto done;
942 		}
943 
944 		/*
945 		 * If we arrive here we know we are attempting to set
946 		 * the linkid on a physical link. A virtual link
947 		 * should never arrive here because it should never
948 		 * call this function without a linkid. Virtual links
949 		 * are created through dlgmtmd and thus we know
950 		 * dlmgmtd is alive to assign it a linkid (search for
951 		 * uses of dladm_create_datalink_id() to prove this to
952 		 * yourself); we don't have the same guarantee for a
953 		 * physical link which may perform an upcall for a
954 		 * linkid while dlmgmtd is down but will continue
955 		 * creating a devnet without the linkid (see
956 		 * softmac_create_datalink() to see how physical link
957 		 * creation works). That is why there is no entry in
958 		 * the id hash but there is one in the macname hash --
959 		 * softmac couldn't acquire a linkid the first time it
960 		 * called this function.
961 		 *
962 		 * Because of the check above, we also know that
963 		 * ddp->dd_linkid is not set. Following this, the link
964 		 * must still be in the DD_INITIALIZING state because
965 		 * that flag is removed IFF dd_linkid is set. This is
966 		 * why we can ASSERT the DD_INITIALIZING flag below if
967 		 * the call to i_dls_devnet_setzid() fails.
968 		 */
969 		if (linkid == DATALINK_INVALID_LINKID ||
970 		    class != DATALINK_CLASS_PHYS) {
971 			err = EINVAL;
972 			goto done;
973 		}
974 
975 		ASSERT(ddp->dd_flags & DD_INITIALIZING);
976 	} else {
977 		ddp = kmem_cache_alloc(i_dls_devnet_cachep, KM_SLEEP);
978 		ddp->dd_flags = DD_INITIALIZING;
979 		ddp->dd_tref = 0;
980 		ddp->dd_ref++;
981 		ddp->dd_owner_zid = zoneid;
982 		/*
983 		 * If we are creating a new devnet which will be owned by a NGZ
984 		 * then mark it as transient. This link has never been in the
985 		 * GZ, the GZ will not have a hold on its reference, and we do
986 		 * not want to return it to the GZ when the zone halts.
987 		 */
988 		if (zoneid != GLOBAL_ZONEID)
989 			ddp->dd_transient = B_TRUE;
990 		(void) strlcpy(ddp->dd_mac, macname, sizeof (ddp->dd_mac));
991 		VERIFY(mod_hash_insert(i_dls_devnet_hash,
992 		    (mod_hash_key_t)ddp->dd_mac, (mod_hash_val_t)ddp) == 0);
993 	}
994 
995 	if (linkid != DATALINK_INVALID_LINKID) {
996 		ddp->dd_linkid = linkid;
997 		(void) strlcpy(ddp->dd_linkname, linkname,
998 		    sizeof (ddp->dd_linkname));
999 		VERIFY(mod_hash_insert(i_dls_devnet_id_hash,
1000 		    (mod_hash_key_t)(uintptr_t)linkid,
1001 		    (mod_hash_val_t)ddp) == 0);
1002 		devnet_need_rebuild = B_TRUE;
1003 		stat_create = B_TRUE;
1004 	}
1005 	err = 0;
1006 done:
1007 	/*
1008 	 * It is safe to drop the i_dls_devnet_hash_lock at this point. In the
1009 	 * case of physical devices, the softmac framework will fail the device
1010 	 * detach based on the smac_state or smac_hold_cnt. Other cases like
1011 	 * vnic and aggr use their own scheme to serialize creates and deletes
1012 	 * and ensure that *ddp is valid.
1013 	 */
1014 	i_dls_devnet_hashmap_exit();
1015 done_rw_unlocked:
1016 	i_dls_devnet_lock_exit();
1017 
1018 	if (err == 0 && zoneid != GLOBAL_ZONEID) {
1019 		/*
1020 		 * If this link is being created directly within a non-global
1021 		 * zone, then flag it as transient so that it will be cleaned
1022 		 * up when the zone is shut down.
1023 		 */
1024 		err = i_dls_devnet_setzid(ddp, zoneid, B_FALSE, B_TRUE);
1025 		if (err != 0) {
1026 			/*
1027 			 * At this point the link is marked as
1028 			 * DD_INITIALIZING -- there can be no
1029 			 * outstanding temp refs and therefore no need
1030 			 * to wait for them.
1031 			 */
1032 			ASSERT(ddp->dd_flags & DD_INITIALIZING);
1033 			(void) dls_devnet_unset(mh, &linkid, B_FALSE);
1034 			return (err);
1035 		}
1036 	}
1037 
1038 	if (err == 0) {
1039 		/*
1040 		 * The kstat subsystem holds its own locks (rather perimeter)
1041 		 * before calling the ks_update (dls_devnet_stat_update) entry
1042 		 * point which in turn grabs the i_dls_devnet_hash_lock. So the
1043 		 * lock hierarchy is kstat locks -> i_dls_devnet_hash_lock.
1044 		 */
1045 		if (stat_create)
1046 			dls_devnet_stat_create(ddp, zoneid);
1047 		if (ddpp != NULL)
1048 			*ddpp = ddp;
1049 
1050 		mutex_enter(&ddp->dd_mutex);
1051 		if (linkid != DATALINK_INVALID_LINKID &&
1052 		    !ddp->dd_prop_loaded && ddp->dd_prop_taskid == 0) {
1053 			ddp->dd_prop_taskid = taskq_dispatch(system_taskq,
1054 			    dls_devnet_prop_task, ddp, TQ_SLEEP);
1055 		}
1056 		mutex_exit(&ddp->dd_mutex);
1057 
1058 	}
1059 	return (err);
1060 }
1061 
1062 /*
1063  * Disassociate the linkid from the link identified by macname. If
1064  * wait is B_TRUE, wait until all temporary refs are released and the
1065  * prop task is finished.
1066  *
1067  * If waiting then you SHOULD NOT call this from inside the MAC perim
1068  * as deadlock will ensue. Otherwise, this function is safe to call
1069  * from inside or outside the MAC perim.
1070  */
1071 static int
dls_devnet_unset(mac_handle_t mh,datalink_id_t * id,boolean_t wait)1072 dls_devnet_unset(mac_handle_t mh, datalink_id_t *id, boolean_t wait)
1073 {
1074 	const char	*macname = mac_name(mh);
1075 	dls_devnet_t	*ddp;
1076 	int		err;
1077 	mod_hash_val_t	val;
1078 
1079 	i_dls_devnet_lock_enter();
1080 	i_dls_devnet_hashmap_write();
1081 
1082 	if ((err = mod_hash_find(i_dls_devnet_hash,
1083 	    (mod_hash_key_t)macname, (mod_hash_val_t *)&ddp)) != 0) {
1084 		ASSERT(err == MH_ERR_NOTFOUND);
1085 		i_dls_devnet_hashmap_exit();
1086 		return (ENOENT);
1087 	}
1088 
1089 	mutex_enter(&ddp->dd_mutex);
1090 
1091 	/*
1092 	 * Make sure downcalls into softmac_create or softmac_destroy from
1093 	 * devfs don't cv_wait on any devfs related condition for fear of
1094 	 * deadlock. Return EBUSY if the asynchronous thread started for
1095 	 * property loading as part of the post attach hasn't yet completed.
1096 	 */
1097 	VERIFY(ddp->dd_ref != 0);
1098 	if ((ddp->dd_ref != 1) || (!wait &&
1099 	    (ddp->dd_tref != 0 || ddp->dd_prop_taskid != 0))) {
1100 		int zstatus = 0;
1101 
1102 		/*
1103 		 * There are a couple of alternatives that might be going on
1104 		 * here; a) the zone is shutting down and it has a transient
1105 		 * link assigned, in which case we want to clean it up instead
1106 		 * of moving it back to the global zone, or b) its possible
1107 		 * that we're trying to clean up an orphaned vnic that was
1108 		 * delegated to a zone and which wasn't cleaned up properly
1109 		 * when the zone went away.  Check for either of these cases
1110 		 * before we simply return EBUSY.
1111 		 *
1112 		 * zstatus indicates which situation we are dealing with:
1113 		 *	 0 - means return EBUSY
1114 		 *	 1 - means case (a), cleanup transient link
1115 		 *	-1 - means case (b), orphaned VNIC
1116 		 */
1117 		if (ddp->dd_ref > 1 && ddp->dd_zid != GLOBAL_ZONEID) {
1118 			zone_t	*zp;
1119 
1120 			if ((zp = zone_find_by_id(ddp->dd_zid)) == NULL) {
1121 				zstatus = -1;
1122 			} else {
1123 				if (ddp->dd_transient) {
1124 					zone_status_t s = zone_status_get(zp);
1125 
1126 					if (s >= ZONE_IS_SHUTTING_DOWN)
1127 						zstatus = 1;
1128 				}
1129 				zone_rele(zp);
1130 			}
1131 		}
1132 
1133 		if (zstatus == 0) {
1134 			mutex_exit(&ddp->dd_mutex);
1135 			i_dls_devnet_hashmap_exit();
1136 			i_dls_devnet_lock_exit();
1137 			return (EBUSY);
1138 		}
1139 
1140 		/*
1141 		 * We want to delete the link, reset ref to 1;
1142 		 */
1143 		if (zstatus == -1) {
1144 			/* Log a warning, but continue in this case */
1145 			cmn_err(CE_WARN, "clear orphaned datalink: %s\n",
1146 			    ddp->dd_linkname);
1147 		}
1148 		ddp->dd_ref = 1;
1149 	}
1150 
1151 	ddp->dd_flags |= DD_CONDEMNED;
1152 	ddp->dd_ref--;
1153 	*id = ddp->dd_linkid;
1154 
1155 	/*
1156 	 * Remove this dls_devnet_t from the hash table.
1157 	 */
1158 	VERIFY(mod_hash_remove(i_dls_devnet_hash,
1159 	    (mod_hash_key_t)ddp->dd_mac, &val) == 0);
1160 
1161 	if (ddp->dd_linkid != DATALINK_INVALID_LINKID) {
1162 		VERIFY(mod_hash_remove(i_dls_devnet_id_hash,
1163 		    (mod_hash_key_t)(uintptr_t)ddp->dd_linkid, &val) == 0);
1164 
1165 		devnet_need_rebuild = B_TRUE;
1166 	}
1167 
1168 	i_dls_devnet_hashmap_exit();
1169 	i_dls_devnet_lock_exit();
1170 
1171 	/*
1172 	 * It is important to call i_dls_devnet_setzid() WITHOUT the
1173 	 * i_dls_devnet_hash_lock held. The setzid call grabs the MAC
1174 	 * perim; thus causing DLS -> MAC lock ordering if performed
1175 	 * with the i_dls_devnet_hash_lock held. This forces consumers to
1176 	 * grab the MAC perim before calling dls_devnet_unset() (the
1177 	 * locking rules state MAC -> DLS order). By performing the
1178 	 * setzid outside of the i_dls_devnet_hash_lock consumers can
1179 	 * safely call dls_devnet_unset() outside the MAC perim.
1180 	 */
1181 	if (ddp->dd_zid != GLOBAL_ZONEID) {
1182 		/*
1183 		 * We need to release the dd_mutex before we try and destroy the
1184 		 * stat. When we destroy it, we'll need to grab the lock for the
1185 		 * kstat but if there's a concurrent reader of the kstat, we'll
1186 		 * be blocked on it. This will lead to deadlock because these
1187 		 * kstats employ a ks_update function (dls_devnet_stat_update)
1188 		 * which needs the dd_mutex that we currently hold.
1189 		 *
1190 		 * Because we've already flagged the dls_devnet_t as
1191 		 * DD_CONDEMNED and we still have a write lock on
1192 		 * i_dls_devnet_hash_lock, we should be able to release the
1193 		 * dd_mutex.
1194 		 */
1195 		mutex_exit(&ddp->dd_mutex);
1196 		dls_devnet_stat_destroy(ddp, ddp->dd_zid);
1197 		mutex_enter(&ddp->dd_mutex);
1198 		(void) i_dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE,
1199 		    B_FALSE);
1200 	}
1201 
1202 	if (wait) {
1203 		/*
1204 		 * Wait until all temporary references are released.
1205 		 * The holders of the tref need the MAC perim to
1206 		 * perform their work and release the tref. To avoid
1207 		 * deadlock, assert that the perim is never held here.
1208 		 */
1209 		ASSERT0(MAC_PERIM_HELD(mh));
1210 		while ((ddp->dd_tref != 0) || (ddp->dd_prop_taskid != 0))
1211 			cv_wait(&ddp->dd_cv, &ddp->dd_mutex);
1212 	} else {
1213 		VERIFY(ddp->dd_tref == 0);
1214 		VERIFY(ddp->dd_prop_taskid == 0);
1215 	}
1216 
1217 	if (ddp->dd_linkid != DATALINK_INVALID_LINKID)
1218 		dls_devnet_stat_destroy(ddp, ddp->dd_owner_zid);
1219 
1220 	ddp->dd_prop_loaded = B_FALSE;
1221 	ddp->dd_linkid = DATALINK_INVALID_LINKID;
1222 	ddp->dd_flags = 0;
1223 	mutex_exit(&ddp->dd_mutex);
1224 	kmem_cache_free(i_dls_devnet_cachep, ddp);
1225 
1226 	return (0);
1227 }
1228 
1229 /*
1230  * This is a private hold routine used when we already have the dls_link_t, thus
1231  * we know that it cannot go away.
1232  */
1233 int
dls_devnet_hold_tmp_by_link(dls_link_t * dlp,dls_dl_handle_t * ddhp)1234 dls_devnet_hold_tmp_by_link(dls_link_t *dlp, dls_dl_handle_t *ddhp)
1235 {
1236 	int err;
1237 	dls_devnet_t *ddp = NULL;
1238 
1239 	i_dls_devnet_hashmap_read();
1240 	if ((err = mod_hash_find(i_dls_devnet_hash,
1241 	    (mod_hash_key_t)dlp->dl_name, (mod_hash_val_t *)&ddp)) != 0) {
1242 		ASSERT(err == MH_ERR_NOTFOUND);
1243 		i_dls_devnet_hashmap_exit();
1244 		return (ENOENT);
1245 	}
1246 
1247 	mutex_enter(&ddp->dd_mutex);
1248 	VERIFY(ddp->dd_ref > 0);
1249 	if (DD_NOT_VISIBLE(ddp->dd_flags)) {
1250 		mutex_exit(&ddp->dd_mutex);
1251 		i_dls_devnet_hashmap_exit();
1252 		return (ENOENT);
1253 	}
1254 	ddp->dd_tref++;
1255 	mutex_exit(&ddp->dd_mutex);
1256 	i_dls_devnet_hashmap_exit();
1257 
1258 	*ddhp = ddp;
1259 	return (0);
1260 }
1261 
1262 static int
dls_devnet_hold_common(datalink_id_t linkid,dls_devnet_t ** ddpp,boolean_t tmp_hold)1263 dls_devnet_hold_common(datalink_id_t linkid, dls_devnet_t **ddpp,
1264     boolean_t tmp_hold)
1265 {
1266 	dls_devnet_t		*ddp;
1267 	int			err;
1268 
1269 	i_dls_devnet_hashmap_read();
1270 	if ((err = mod_hash_find(i_dls_devnet_id_hash,
1271 	    (mod_hash_key_t)(uintptr_t)linkid, (mod_hash_val_t *)&ddp)) != 0) {
1272 		ASSERT(err == MH_ERR_NOTFOUND);
1273 		i_dls_devnet_hashmap_exit();
1274 		return (ENOENT);
1275 	}
1276 
1277 	mutex_enter(&ddp->dd_mutex);
1278 	VERIFY(ddp->dd_ref > 0);
1279 	if (DD_NOT_VISIBLE(ddp->dd_flags)) {
1280 		mutex_exit(&ddp->dd_mutex);
1281 		i_dls_devnet_hashmap_exit();
1282 		return (ENOENT);
1283 	}
1284 	if (tmp_hold)
1285 		ddp->dd_tref++;
1286 	else
1287 		ddp->dd_ref++;
1288 	mutex_exit(&ddp->dd_mutex);
1289 	i_dls_devnet_hashmap_exit();
1290 
1291 	*ddpp = ddp;
1292 	return (0);
1293 }
1294 
1295 int
dls_devnet_hold(datalink_id_t linkid,dls_devnet_t ** ddpp)1296 dls_devnet_hold(datalink_id_t linkid, dls_devnet_t **ddpp)
1297 {
1298 	return (dls_devnet_hold_common(linkid, ddpp, B_FALSE));
1299 }
1300 
1301 /*
1302  * Hold the vanity naming structure (dls_devnet_t) temporarily.  The request to
1303  * delete the dls_devnet_t will wait until the temporary reference is released.
1304  */
1305 int
dls_devnet_hold_tmp(datalink_id_t linkid,dls_devnet_t ** ddpp)1306 dls_devnet_hold_tmp(datalink_id_t linkid, dls_devnet_t **ddpp)
1307 {
1308 	return (dls_devnet_hold_common(linkid, ddpp, B_TRUE));
1309 }
1310 
1311 /*
1312  * This funtion is called when a DLS client tries to open a device node.
1313  * This dev_t could be a result of a /dev/net node access (returned by
1314  * devnet_create_rvp->dls_devnet_open()) or a direct /dev node access.
1315  * In both cases, this function bumps up the reference count of the
1316  * dls_devnet_t structure. The reference is held as long as the device node
1317  * is open. In the case of /dev/net while it is true that the initial reference
1318  * is held when the devnet_create_rvp->dls_devnet_open call happens, this
1319  * initial reference is released immediately in devnet_inactive_callback ->
1320  * dls_devnet_close(). (Note that devnet_inactive_callback() is called right
1321  * after dld_open completes, not when the /dev/net node is being closed).
1322  * To undo this function, call dls_devnet_rele()
1323  */
1324 int
dls_devnet_hold_by_dev(dev_t dev,dls_dl_handle_t * ddhp)1325 dls_devnet_hold_by_dev(dev_t dev, dls_dl_handle_t *ddhp)
1326 {
1327 	char			name[MAXNAMELEN];
1328 	char			*drv;
1329 	dls_devnet_t		*ddp;
1330 	int			err;
1331 
1332 	if ((drv = ddi_major_to_name(getmajor(dev))) == NULL)
1333 		return (EINVAL);
1334 
1335 	(void) snprintf(name, sizeof (name), "%s%d", drv,
1336 	    DLS_MINOR2INST(getminor(dev)));
1337 
1338 	i_dls_devnet_hashmap_read();
1339 	if ((err = mod_hash_find(i_dls_devnet_hash,
1340 	    (mod_hash_key_t)name, (mod_hash_val_t *)&ddp)) != 0) {
1341 		ASSERT(err == MH_ERR_NOTFOUND);
1342 		i_dls_devnet_hashmap_exit();
1343 		return (ENOENT);
1344 	}
1345 	mutex_enter(&ddp->dd_mutex);
1346 	VERIFY(ddp->dd_ref > 0);
1347 	if (DD_NOT_VISIBLE(ddp->dd_flags)) {
1348 		mutex_exit(&ddp->dd_mutex);
1349 		i_dls_devnet_hashmap_exit();
1350 		return (ENOENT);
1351 	}
1352 	ddp->dd_ref++;
1353 	mutex_exit(&ddp->dd_mutex);
1354 	i_dls_devnet_hashmap_exit();
1355 
1356 	*ddhp = ddp;
1357 	return (0);
1358 }
1359 
1360 void
dls_devnet_rele(dls_devnet_t * ddp)1361 dls_devnet_rele(dls_devnet_t *ddp)
1362 {
1363 	mutex_enter(&ddp->dd_mutex);
1364 	VERIFY(ddp->dd_ref > 1);
1365 	ddp->dd_ref--;
1366 	if ((ddp->dd_flags & DD_IMPLICIT_IPTUN) && ddp->dd_ref == 1) {
1367 		mutex_exit(&ddp->dd_mutex);
1368 		if (i_dls_devnet_destroy_iptun(ddp->dd_linkid) != 0)
1369 			ddp->dd_flags |= DD_IMPLICIT_IPTUN;
1370 		return;
1371 	}
1372 	mutex_exit(&ddp->dd_mutex);
1373 }
1374 
1375 static int
dls_devnet_hold_by_name(const char * link,dls_devnet_t ** ddpp)1376 dls_devnet_hold_by_name(const char *link, dls_devnet_t **ddpp)
1377 {
1378 	char			drv[MAXLINKNAMELEN];
1379 	uint_t			ppa;
1380 	major_t			major;
1381 	dev_t			phy_dev, tmp_dev;
1382 	datalink_id_t		linkid;
1383 	dls_dev_handle_t	ddh;
1384 	int			err;
1385 
1386 	if ((err = dls_mgmt_get_linkid(link, &linkid)) == 0)
1387 		return (dls_devnet_hold(linkid, ddpp));
1388 
1389 	/*
1390 	 * If we failed to get the link's linkid because the dlmgmtd daemon
1391 	 * has not been started, return ENOENT so that the application can
1392 	 * fallback to open the /dev node.
1393 	 */
1394 	if (err == EBADF)
1395 		return (ENOENT);
1396 
1397 	if (err != ENOENT)
1398 		return (err);
1399 
1400 	/*
1401 	 * If we reach this point it means dlmgmtd is up but has no
1402 	 * mapping for the link name.
1403 	 */
1404 	if (ddi_parse_dlen(link, drv, MAXLINKNAMELEN, &ppa) != DDI_SUCCESS)
1405 		return (ENOENT);
1406 
1407 	if (IS_IPTUN_LINK(drv)) {
1408 		if ((err = i_dls_devnet_create_iptun(link, drv, &linkid)) != 0)
1409 			return (err);
1410 		/*
1411 		 * At this point, an IP tunnel MAC has registered, which
1412 		 * resulted in a link being created.
1413 		 */
1414 		err = dls_devnet_hold(linkid, ddpp);
1415 		if (err != 0) {
1416 			VERIFY(i_dls_devnet_destroy_iptun(linkid) == 0);
1417 			return (err);
1418 		}
1419 		/*
1420 		 * dls_devnet_rele() will know to destroy the implicit IP
1421 		 * tunnel on last reference release if DD_IMPLICIT_IPTUN is
1422 		 * set.
1423 		 */
1424 		(*ddpp)->dd_flags |= DD_IMPLICIT_IPTUN;
1425 		return (0);
1426 	}
1427 
1428 	/*
1429 	 * If this link:
1430 	 * (a) is a physical device, (b) this is the first boot, (c) the MAC
1431 	 * is not registered yet, and (d) we cannot find its linkid, then the
1432 	 * linkname is the same as the devname.
1433 	 *
1434 	 * First filter out invalid names.
1435 	 */
1436 	if ((major = ddi_name_to_major(drv)) == (major_t)-1)
1437 		return (ENOENT);
1438 
1439 	phy_dev = makedevice(major, DLS_PPA2MINOR(ppa));
1440 	if (softmac_hold_device(phy_dev, &ddh) != 0)
1441 		return (ENOENT);
1442 
1443 	/*
1444 	 * At this time, the MAC should be registered, check its phy_dev using
1445 	 * the given name.
1446 	 */
1447 	if ((err = dls_mgmt_get_linkid(link, &linkid)) != 0 ||
1448 	    (err = dls_mgmt_get_phydev(linkid, &tmp_dev)) != 0) {
1449 		softmac_rele_device(ddh);
1450 		return (err);
1451 	}
1452 	if (tmp_dev != phy_dev) {
1453 		softmac_rele_device(ddh);
1454 		return (ENOENT);
1455 	}
1456 
1457 	err = dls_devnet_hold(linkid, ddpp);
1458 	softmac_rele_device(ddh);
1459 	return (err);
1460 }
1461 
1462 int
dls_devnet_macname2linkid(const char * macname,datalink_id_t * linkidp)1463 dls_devnet_macname2linkid(const char *macname, datalink_id_t *linkidp)
1464 {
1465 	dls_devnet_t	*ddp;
1466 
1467 	i_dls_devnet_hashmap_read();
1468 	if (mod_hash_find(i_dls_devnet_hash, (mod_hash_key_t)macname,
1469 	    (mod_hash_val_t *)&ddp) != 0) {
1470 		i_dls_devnet_hashmap_exit();
1471 		return (ENOENT);
1472 	}
1473 
1474 	*linkidp = ddp->dd_linkid;
1475 	i_dls_devnet_hashmap_exit();
1476 	return (0);
1477 }
1478 
1479 /*
1480  * Get linkid for the given dev.
1481  */
1482 int
dls_devnet_dev2linkid(dev_t dev,datalink_id_t * linkidp)1483 dls_devnet_dev2linkid(dev_t dev, datalink_id_t *linkidp)
1484 {
1485 	char	macname[MAXNAMELEN];
1486 	char	*drv;
1487 
1488 	if ((drv = ddi_major_to_name(getmajor(dev))) == NULL)
1489 		return (EINVAL);
1490 
1491 	(void) snprintf(macname, sizeof (macname), "%s%d", drv,
1492 	    DLS_MINOR2INST(getminor(dev)));
1493 	return (dls_devnet_macname2linkid(macname, linkidp));
1494 }
1495 
1496 /*
1497  * Get the link's physical dev_t. It this is a VLAN, get the dev_t of the
1498  * link this VLAN is created on.
1499  */
1500 int
dls_devnet_phydev(datalink_id_t vlanid,dev_t * devp)1501 dls_devnet_phydev(datalink_id_t vlanid, dev_t *devp)
1502 {
1503 	dls_devnet_t	*ddp;
1504 	int		err;
1505 
1506 	if ((err = dls_devnet_hold_tmp(vlanid, &ddp)) != 0)
1507 		return (err);
1508 
1509 	err = dls_mgmt_get_phydev(ddp->dd_linkid, devp);
1510 	dls_devnet_rele_tmp(ddp);
1511 	return (err);
1512 }
1513 
1514 /*
1515  * Handle the renaming requests.  There are two rename cases:
1516  *
1517  * 1. Request to rename a valid link (id1) to an non-existent link name
1518  *    (id2). In this case id2 is DATALINK_INVALID_LINKID.  Just check whether
1519  *    id1 is held by any applications.
1520  *
1521  *    In this case, the link's kstats need to be updated using the given name.
1522  *
1523  * 2. Request to rename a valid link (id1) to the name of a REMOVED
1524  *    physical link (id2). In this case, check that id1 and its associated
1525  *    mac is not held by any application, and update the link's linkid to id2.
1526  *
1527  *    This case does not change the <link name, linkid> mapping, so the link's
1528  *    kstats need to be updated with using name associated the given id2.
1529  */
1530 int
dls_devnet_rename(datalink_id_t id1,datalink_id_t id2,const char * link)1531 dls_devnet_rename(datalink_id_t id1, datalink_id_t id2, const char *link)
1532 {
1533 	dls_dev_handle_t	ddh = NULL;
1534 	int			err = 0;
1535 	dev_t			phydev = 0;
1536 	dls_devnet_t		*ddp;
1537 	mac_perim_handle_t	mph = NULL;
1538 	mac_handle_t		mh;
1539 	mod_hash_val_t		val;
1540 
1541 	/*
1542 	 * In the second case, id2 must be a REMOVED physical link.
1543 	 */
1544 	if ((id2 != DATALINK_INVALID_LINKID) &&
1545 	    (dls_mgmt_get_phydev(id2, &phydev) == 0) &&
1546 	    softmac_hold_device(phydev, &ddh) == 0) {
1547 		softmac_rele_device(ddh);
1548 		return (EEXIST);
1549 	}
1550 
1551 	/*
1552 	 * Hold id1 to prevent it from being detached (if a physical link).
1553 	 */
1554 	if (dls_mgmt_get_phydev(id1, &phydev) == 0)
1555 		(void) softmac_hold_device(phydev, &ddh);
1556 
1557 	/*
1558 	 * The framework does not hold hold locks across calls to the
1559 	 * mac perimeter, hence enter the perimeter first. This also waits
1560 	 * for the property loading to finish.
1561 	 */
1562 	if ((err = mac_perim_enter_by_linkid(id1, &mph)) != 0) {
1563 		softmac_rele_device(ddh);
1564 		return (err);
1565 	}
1566 
1567 	i_dls_devnet_lock_enter();
1568 	i_dls_devnet_hashmap_read();
1569 
1570 	if ((err = mod_hash_find(i_dls_devnet_id_hash,
1571 	    (mod_hash_key_t)(uintptr_t)id1, (mod_hash_val_t *)&ddp)) != 0) {
1572 		ASSERT(err == MH_ERR_NOTFOUND);
1573 		err = ENOENT;
1574 		goto done;
1575 	}
1576 
1577 	mutex_enter(&ddp->dd_mutex);
1578 	if (ddp->dd_ref > 1) {
1579 		mutex_exit(&ddp->dd_mutex);
1580 		err = EBUSY;
1581 		goto done;
1582 	}
1583 	mutex_exit(&ddp->dd_mutex);
1584 
1585 	if (id2 == DATALINK_INVALID_LINKID) {
1586 		(void) strlcpy(ddp->dd_linkname, link,
1587 		    sizeof (ddp->dd_linkname));
1588 
1589 		/* rename mac client name and its flow if exists */
1590 		if ((err = mac_open(ddp->dd_mac, &mh)) != 0)
1591 			goto done;
1592 		(void) mac_rename_primary(mh, link);
1593 		mac_close(mh);
1594 		goto done;
1595 	}
1596 
1597 	/*
1598 	 * The second case, check whether the MAC is used by any MAC
1599 	 * user.  This must be a physical link so ddh must not be NULL.
1600 	 */
1601 	if (ddh == NULL) {
1602 		err = EINVAL;
1603 		goto done;
1604 	}
1605 
1606 	if ((err = mac_open(ddp->dd_mac, &mh)) != 0)
1607 		goto done;
1608 
1609 	/*
1610 	 * We release the reference of the MAC which mac_open() is
1611 	 * holding. Note that this mac will not be unregistered
1612 	 * because the physical device is held.
1613 	 */
1614 	mac_close(mh);
1615 
1616 	/*
1617 	 * Check if there is any other MAC clients, if not, hold this mac
1618 	 * exclusively until we are done.
1619 	 */
1620 	if ((err = mac_mark_exclusive(mh)) != 0)
1621 		goto done;
1622 
1623 	/*
1624 	 * Update the link's linkid.
1625 	 */
1626 	if ((err = mod_hash_find(i_dls_devnet_id_hash,
1627 	    (mod_hash_key_t)(uintptr_t)id2, &val)) != MH_ERR_NOTFOUND) {
1628 		mac_unmark_exclusive(mh);
1629 		err = EEXIST;
1630 		goto done;
1631 	}
1632 
1633 	/*
1634 	 * Temporarily drop the hashmap lock for the upcall -- ddp will remain
1635 	 * valid because we hold i_dls_devnet_lock. Taking this is a
1636 	 * prerequisite for dls_devnet_unset to proceed, and it is the only
1637 	 * pathway through which ddp can be freed.
1638 	 */
1639 	i_dls_devnet_hashmap_exit();
1640 	i_dls_devnet_lock_upcall_start();
1641 	err = dls_mgmt_get_linkinfo(id2, ddp->dd_linkname, NULL, NULL, NULL);
1642 	i_dls_devnet_lock_upcall_end();
1643 
1644 	if (err != 0) {
1645 		mac_unmark_exclusive(mh);
1646 		goto done_rw_unlocked;
1647 	}
1648 
1649 	i_dls_devnet_hashmap_write();
1650 
1651 	(void) mod_hash_remove(i_dls_devnet_id_hash,
1652 	    (mod_hash_key_t)(uintptr_t)id1, &val);
1653 
1654 	ddp->dd_linkid = id2;
1655 	(void) mod_hash_insert(i_dls_devnet_id_hash,
1656 	    (mod_hash_key_t)(uintptr_t)ddp->dd_linkid, (mod_hash_val_t)ddp);
1657 
1658 	mac_unmark_exclusive(mh);
1659 
1660 	/* load properties for new id */
1661 	mutex_enter(&ddp->dd_mutex);
1662 	ddp->dd_prop_loaded = B_FALSE;
1663 	ddp->dd_prop_taskid = taskq_dispatch(system_taskq,
1664 	    dls_devnet_prop_task, ddp, TQ_SLEEP);
1665 	mutex_exit(&ddp->dd_mutex);
1666 
1667 done:
1668 	i_dls_devnet_hashmap_exit();
1669 done_rw_unlocked:
1670 	i_dls_devnet_lock_exit();
1671 
1672 	if (err == 0)
1673 		dls_devnet_stat_rename(ddp);
1674 
1675 	if (mph != NULL)
1676 		mac_perim_exit(mph);
1677 	softmac_rele_device(ddh);
1678 	return (err);
1679 }
1680 
1681 static int
i_dls_devnet_setzid(dls_devnet_t * ddp,zoneid_t new_zoneid,boolean_t setprop,boolean_t transient)1682 i_dls_devnet_setzid(dls_devnet_t *ddp, zoneid_t new_zoneid, boolean_t setprop,
1683     boolean_t transient)
1684 {
1685 	int			err;
1686 	mac_perim_handle_t	mph;
1687 	boolean_t		upcall_done = B_FALSE;
1688 	datalink_id_t		linkid = ddp->dd_linkid;
1689 	zoneid_t		old_zoneid = ddp->dd_zid;
1690 	dlmgmt_door_setzoneid_t	setzid;
1691 	dlmgmt_setzoneid_retval_t retval;
1692 
1693 	if (old_zoneid == new_zoneid)
1694 		return (0);
1695 
1696 	if ((err = mac_perim_enter_by_macname(ddp->dd_mac, &mph)) != 0)
1697 		return (err);
1698 
1699 	/*
1700 	 * When changing the zoneid of an existing link, we need to tell
1701 	 * dlmgmtd about it.  dlmgmtd already knows the zoneid associated with
1702 	 * newly created links.
1703 	 */
1704 	if (setprop) {
1705 		setzid.ld_cmd = DLMGMT_CMD_SETZONEID;
1706 		setzid.ld_linkid = linkid;
1707 		setzid.ld_zoneid = new_zoneid;
1708 		err = i_dls_mgmt_upcall(&setzid, sizeof (setzid), &retval,
1709 		    sizeof (retval));
1710 		if (err != 0)
1711 			goto done;
1712 
1713 		/*
1714 		 * We set upcall_done only if the upcall is
1715 		 * successful. This way, if dls_link_setzid() fails,
1716 		 * we know another upcall must be done to reset the
1717 		 * dlmgmtd state.
1718 		 */
1719 		upcall_done = B_TRUE;
1720 	}
1721 	if ((err = dls_link_setzid(ddp->dd_mac, new_zoneid)) == 0) {
1722 		ddp->dd_zid = new_zoneid;
1723 		ddp->dd_transient = transient;
1724 		devnet_need_rebuild = B_TRUE;
1725 	}
1726 
1727 done:
1728 	if (err != 0 && upcall_done) {
1729 		setzid.ld_zoneid = old_zoneid;
1730 		(void) i_dls_mgmt_upcall(&setzid, sizeof (setzid), &retval,
1731 		    sizeof (retval));
1732 	}
1733 	mac_perim_exit(mph);
1734 	return (err);
1735 }
1736 
1737 int
dls_devnet_setzid(dls_dl_handle_t ddh,zoneid_t new_zid)1738 dls_devnet_setzid(dls_dl_handle_t ddh, zoneid_t new_zid)
1739 {
1740 	dls_devnet_t	*ddp;
1741 	int		err;
1742 	zoneid_t	old_zid;
1743 	boolean_t	refheld = B_FALSE;
1744 
1745 	old_zid = ddh->dd_zid;
1746 
1747 	if (old_zid == new_zid)
1748 		return (0);
1749 
1750 	/*
1751 	 * Acquire an additional reference to the link if it is being assigned
1752 	 * to a non-global zone from the global zone.
1753 	 */
1754 	if (old_zid == GLOBAL_ZONEID && new_zid != GLOBAL_ZONEID) {
1755 		if ((err = dls_devnet_hold(ddh->dd_linkid, &ddp)) != 0)
1756 			return (err);
1757 		refheld = B_TRUE;
1758 	}
1759 
1760 	if ((err = i_dls_devnet_setzid(ddh, new_zid, B_TRUE, B_FALSE)) != 0) {
1761 		if (refheld)
1762 			dls_devnet_rele(ddp);
1763 		return (err);
1764 	}
1765 
1766 	/*
1767 	 * Release the additional reference if the link is returning to the
1768 	 * global zone from a non-global zone.
1769 	 */
1770 	if (old_zid != GLOBAL_ZONEID && new_zid == GLOBAL_ZONEID)
1771 		dls_devnet_rele(ddh);
1772 
1773 	/* Re-create kstats in the appropriate zones. */
1774 	if (old_zid != GLOBAL_ZONEID)
1775 		dls_devnet_stat_destroy(ddh, old_zid);
1776 	if (new_zid != GLOBAL_ZONEID)
1777 		dls_devnet_stat_create(ddh, new_zid);
1778 
1779 	return (0);
1780 }
1781 
1782 zoneid_t
dls_devnet_getzid(dls_dl_handle_t ddh)1783 dls_devnet_getzid(dls_dl_handle_t ddh)
1784 {
1785 	return (((dls_devnet_t *)ddh)->dd_zid);
1786 }
1787 
1788 zoneid_t
dls_devnet_getownerzid(dls_dl_handle_t ddh)1789 dls_devnet_getownerzid(dls_dl_handle_t ddh)
1790 {
1791 	return (((dls_devnet_t *)ddh)->dd_owner_zid);
1792 }
1793 
1794 /*
1795  * Is linkid visible from zoneid?  A link is visible if it was created in the
1796  * zone, or if it is currently assigned to the zone.
1797  */
1798 boolean_t
dls_devnet_islinkvisible(datalink_id_t linkid,zoneid_t zoneid)1799 dls_devnet_islinkvisible(datalink_id_t linkid, zoneid_t zoneid)
1800 {
1801 	dls_devnet_t	*ddp;
1802 	boolean_t	result;
1803 
1804 	if (dls_devnet_hold_tmp(linkid, &ddp) != 0)
1805 		return (B_FALSE);
1806 	result = (ddp->dd_owner_zid == zoneid || ddp->dd_zid == zoneid);
1807 	dls_devnet_rele_tmp(ddp);
1808 	return (result);
1809 }
1810 
1811 /*
1812  * Access a vanity naming node.
1813  */
1814 int
dls_devnet_open(const char * link,dls_dl_handle_t * dhp,dev_t * devp)1815 dls_devnet_open(const char *link, dls_dl_handle_t *dhp, dev_t *devp)
1816 {
1817 	dls_devnet_t	*ddp;
1818 	dls_link_t	*dlp;
1819 	zoneid_t	zid = getzoneid();
1820 	int		err;
1821 	mac_perim_handle_t	mph;
1822 
1823 	if ((err = dls_devnet_hold_by_name(link, &ddp)) != 0)
1824 		return (err);
1825 
1826 	dls_devnet_prop_task_wait(ddp);
1827 
1828 	/*
1829 	 * Opening a link that does not belong to the current non-global zone
1830 	 * is not allowed.
1831 	 */
1832 	if (zid != GLOBAL_ZONEID && ddp->dd_zid != zid) {
1833 		dls_devnet_rele(ddp);
1834 		return (ENOENT);
1835 	}
1836 
1837 	err = mac_perim_enter_by_macname(ddp->dd_mac, &mph);
1838 	if (err != 0) {
1839 		dls_devnet_rele(ddp);
1840 		return (err);
1841 	}
1842 
1843 	err = dls_link_hold_create(ddp->dd_mac, &dlp);
1844 	mac_perim_exit(mph);
1845 
1846 	if (err != 0) {
1847 		dls_devnet_rele(ddp);
1848 		return (err);
1849 	}
1850 
1851 	*dhp = ddp;
1852 	*devp = dls_link_dev(dlp);
1853 	return (0);
1854 }
1855 
1856 /*
1857  * Close access to a vanity naming node.
1858  */
1859 void
dls_devnet_close(dls_dl_handle_t dlh)1860 dls_devnet_close(dls_dl_handle_t dlh)
1861 {
1862 	dls_devnet_t	*ddp = dlh;
1863 	dls_link_t	*dlp;
1864 	mac_perim_handle_t	mph;
1865 
1866 	VERIFY(mac_perim_enter_by_macname(ddp->dd_mac, &mph) == 0);
1867 	VERIFY(dls_link_hold(ddp->dd_mac, &dlp) == 0);
1868 
1869 	/*
1870 	 * One rele for the hold placed in dls_devnet_open, another for
1871 	 * the hold done just above
1872 	 */
1873 	dls_link_rele(dlp);
1874 	dls_link_rele(dlp);
1875 	mac_perim_exit(mph);
1876 
1877 	dls_devnet_rele(ddp);
1878 }
1879 
1880 /*
1881  * This is used by /dev/net to rebuild the nodes for readdir().  It is not
1882  * critical and no protection is needed.
1883  */
1884 boolean_t
dls_devnet_rebuild()1885 dls_devnet_rebuild()
1886 {
1887 	boolean_t updated = devnet_need_rebuild;
1888 
1889 	devnet_need_rebuild = B_FALSE;
1890 	return (updated);
1891 }
1892 
1893 int
dls_devnet_create(mac_handle_t mh,datalink_id_t linkid,zoneid_t zoneid)1894 dls_devnet_create(mac_handle_t mh, datalink_id_t linkid, zoneid_t zoneid)
1895 {
1896 	dls_link_t	*dlp;
1897 	dls_devnet_t	*ddp;
1898 	int		err;
1899 	mac_perim_handle_t mph;
1900 
1901 	/*
1902 	 * Holding the mac perimeter ensures that the downcall from the
1903 	 * dlmgmt daemon which does the property loading does not proceed
1904 	 * until we relinquish the perimeter.
1905 	 */
1906 	mac_perim_enter_by_mh(mh, &mph);
1907 	/*
1908 	 * Make this association before we call dls_link_hold_create as
1909 	 * we need to use the linkid to get the user name for the link
1910 	 * when we create the MAC client.
1911 	 */
1912 	if ((err = dls_devnet_set(mh, linkid, zoneid, &ddp)) == 0) {
1913 		if ((err = dls_link_hold_create(mac_name(mh), &dlp)) != 0) {
1914 			mac_perim_exit(mph);
1915 			(void) dls_devnet_unset(mh, &linkid, B_FALSE);
1916 			return (err);
1917 		}
1918 
1919 		/*
1920 		 * If dd_linkid is set then the link was successfully
1921 		 * initialized. In this case we can remove the
1922 		 * initializing flag and make the link visible to the
1923 		 * rest of the system.
1924 		 *
1925 		 * If not set then we were called by softmac and it
1926 		 * was unable to obtain a linkid for the physical link
1927 		 * because dlmgmtd is down. In that case softmac will
1928 		 * eventually obtain a linkid and call
1929 		 * dls_devnet_recreate() to complete initialization.
1930 		 */
1931 		mutex_enter(&ddp->dd_mutex);
1932 		if (ddp->dd_linkid != DATALINK_INVALID_LINKID)
1933 			ddp->dd_flags &= ~DD_INITIALIZING;
1934 		mutex_exit(&ddp->dd_mutex);
1935 
1936 	}
1937 
1938 	mac_perim_exit(mph);
1939 	return (err);
1940 }
1941 
1942 /*
1943  * Set the linkid of the dls_devnet_t and add it into the i_dls_devnet_id_hash.
1944  * This is called in the case that the dlmgmtd daemon is started later than
1945  * the physical devices get attached, and the linkid is only known after the
1946  * daemon starts.
1947  */
1948 int
dls_devnet_recreate(mac_handle_t mh,datalink_id_t linkid)1949 dls_devnet_recreate(mac_handle_t mh, datalink_id_t linkid)
1950 {
1951 	dls_devnet_t	*ddp;
1952 	int		err;
1953 
1954 	VERIFY(linkid != DATALINK_INVALID_LINKID);
1955 	if ((err = dls_devnet_set(mh, linkid, GLOBAL_ZONEID, &ddp)) == 0) {
1956 		mutex_enter(&ddp->dd_mutex);
1957 		if (ddp->dd_linkid != DATALINK_INVALID_LINKID)
1958 			ddp->dd_flags &= ~DD_INITIALIZING;
1959 		mutex_exit(&ddp->dd_mutex);
1960 	}
1961 
1962 	return (err);
1963 
1964 }
1965 
1966 int
dls_devnet_destroy(mac_handle_t mh,datalink_id_t * idp,boolean_t wait)1967 dls_devnet_destroy(mac_handle_t mh, datalink_id_t *idp, boolean_t wait)
1968 {
1969 	int			err;
1970 	mac_perim_handle_t	mph;
1971 
1972 	*idp = DATALINK_INVALID_LINKID;
1973 	err = dls_devnet_unset(mh, idp, wait);
1974 
1975 	/*
1976 	 * We continue on in the face of ENOENT because the devnet
1977 	 * unset and DLS link release are not atomic and we may have a
1978 	 * scenario where there is no entry in i_dls_devnet_hash for
1979 	 * the MAC name but there is an entry in i_dls_link_hash. For
1980 	 * example, if the following occurred:
1981 	 *
1982 	 * 1. dls_devnet_unset() returns success, and
1983 	 *
1984 	 * 2. dls_link_rele_by_name() fails with ENOTEMPTY because
1985 	 *    flows still exist, and
1986 	 *
1987 	 * 3. dls_devnet_set() fails to set the zone id and calls
1988 	 *    dls_devnet_unset() -- leaving an entry in
1989 	 *    i_dls_link_hash but no corresponding entry in
1990 	 *    i_dls_devnet_hash.
1991 	 *
1992 	 * Even if #3 wasn't true the dls_devnet_set() may fail for
1993 	 * different reasons in the future; the point is that it _can_
1994 	 * fail as part of its contract. We can't rely on it working
1995 	 * so we must assume that these two pieces of state (devnet
1996 	 * and link hashes), which should always be in sync, can get
1997 	 * out of sync and thus even if we get ENOENT from the devnet
1998 	 * hash we should still try to delete from the link hash just
1999 	 * in case.
2000 	 *
2001 	 * We could prevent the ENOTEMPTY from dls_link_rele_by_name()
2002 	 * by calling mac_disable() before calling
2003 	 * dls_devnet_destroy() but that's not currently possible due
2004 	 * to a long-standing bug. OpenSolaris 6791335: The semantics
2005 	 * of mac_disable() were modified by Crossbow such that
2006 	 * dls_devnet_destroy() needs to be called before
2007 	 * mac_disable() can succeed. This is because of the implicit
2008 	 * reference that dls has on the mac_impl_t.
2009 	 */
2010 	if (err != 0 && err != ENOENT)
2011 		return (err);
2012 
2013 	mac_perim_enter_by_mh(mh, &mph);
2014 	err = dls_link_rele_by_name(mac_name(mh));
2015 	mac_perim_exit(mph);
2016 
2017 	if (err != 0) {
2018 		dls_devnet_t	*ddp;
2019 
2020 		/*
2021 		 * XXX It is a general GLDv3 bug that dls_devnet_set() has to
2022 		 * be called to re-set the link when destroy fails.  The
2023 		 * zoneid below will be incorrect if this function is ever
2024 		 * called from kernel context or from a zone other than that
2025 		 * which initially created the link.
2026 		 */
2027 		(void) dls_devnet_set(mh, *idp, crgetzoneid(CRED()), &ddp);
2028 
2029 		/*
2030 		 * You might think dd_linkid should always be set
2031 		 * here, but in the case where dls_devnet_unset()
2032 		 * returns ENOENT it will be DATALINK_INVALID_LINKID.
2033 		 * Stay consistent with the rest of DLS and only
2034 		 * remove the initializing flag if linkid is set.
2035 		 */
2036 		mutex_enter(&ddp->dd_mutex);
2037 		if (ddp->dd_linkid != DATALINK_INVALID_LINKID)
2038 			ddp->dd_flags &= ~DD_INITIALIZING;
2039 		mutex_exit(&ddp->dd_mutex);
2040 	}
2041 	return (err);
2042 }
2043 
2044 /*
2045  * Implicitly create an IP tunnel link.
2046  */
2047 static int
i_dls_devnet_create_iptun(const char * linkname,const char * drvname,datalink_id_t * linkid)2048 i_dls_devnet_create_iptun(const char *linkname, const char *drvname,
2049     datalink_id_t *linkid)
2050 {
2051 	int		err;
2052 	iptun_kparams_t	ik;
2053 	uint32_t	media;
2054 	netstack_t	*ns;
2055 	major_t		iptun_major;
2056 	dev_info_t	*iptun_dip;
2057 
2058 	/* First ensure that the iptun device is attached. */
2059 	if ((iptun_major = ddi_name_to_major(IPTUN_DRIVER_NAME)) == (major_t)-1)
2060 		return (EINVAL);
2061 	if ((iptun_dip = ddi_hold_devi_by_instance(iptun_major, 0, 0)) == NULL)
2062 		return (EINVAL);
2063 
2064 	if (IS_IPV4_TUN(drvname)) {
2065 		ik.iptun_kparam_type = IPTUN_TYPE_IPV4;
2066 		media = DL_IPV4;
2067 	} else if (IS_6TO4_TUN(drvname)) {
2068 		ik.iptun_kparam_type = IPTUN_TYPE_6TO4;
2069 		media = DL_6TO4;
2070 	} else if (IS_IPV6_TUN(drvname)) {
2071 		ik.iptun_kparam_type = IPTUN_TYPE_IPV6;
2072 		media = DL_IPV6;
2073 	}
2074 	ik.iptun_kparam_flags = (IPTUN_KPARAM_TYPE | IPTUN_KPARAM_IMPLICIT);
2075 
2076 	/* Obtain a datalink id for this tunnel. */
2077 	err = dls_mgmt_create((char *)linkname, 0, DATALINK_CLASS_IPTUN, media,
2078 	    B_FALSE, &ik.iptun_kparam_linkid);
2079 	if (err != 0) {
2080 		ddi_release_devi(iptun_dip);
2081 		return (err);
2082 	}
2083 
2084 	ns = netstack_get_current();
2085 	err = iptun_create(&ik, CRED());
2086 	netstack_rele(ns);
2087 
2088 	if (err != 0)
2089 		VERIFY(dls_mgmt_destroy(ik.iptun_kparam_linkid, B_FALSE) == 0);
2090 	else
2091 		*linkid = ik.iptun_kparam_linkid;
2092 
2093 	ddi_release_devi(iptun_dip);
2094 	return (err);
2095 }
2096 
2097 static int
i_dls_devnet_destroy_iptun(datalink_id_t linkid)2098 i_dls_devnet_destroy_iptun(datalink_id_t linkid)
2099 {
2100 	int err;
2101 
2102 	/*
2103 	 * Note the use of zone_kcred() here as opposed to CRED().  This is
2104 	 * because the process that does the last close of this /dev/net node
2105 	 * may not have necessary privileges to delete this IP tunnel, but the
2106 	 * tunnel must always be implicitly deleted on last close.
2107 	 */
2108 	if ((err = iptun_delete(linkid, zone_kcred())) == 0)
2109 		(void) dls_mgmt_destroy(linkid, B_FALSE);
2110 	return (err);
2111 }
2112 
2113 const char *
dls_devnet_link(dls_dl_handle_t ddh)2114 dls_devnet_link(dls_dl_handle_t ddh)
2115 {
2116 	return (ddh->dd_linkname);
2117 }
2118 
2119 const char *
dls_devnet_mac(dls_dl_handle_t ddh)2120 dls_devnet_mac(dls_dl_handle_t ddh)
2121 {
2122 	return (ddh->dd_mac);
2123 }
2124 
2125 datalink_id_t
dls_devnet_linkid(dls_dl_handle_t ddh)2126 dls_devnet_linkid(dls_dl_handle_t ddh)
2127 {
2128 	return (ddh->dd_linkid);
2129 }
2130