1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright (c) 2017 Joyent, Inc. 25 * Copyright 2025 Oxide Computer Company. 26 */ 27 /* 28 * Copyright (c) 2016 by Delphix. All rights reserved. 29 */ 30 31 /* 32 * Datalink management routines. 33 */ 34 35 #include <sys/types.h> 36 #include <sys/door.h> 37 #include <sys/zone.h> 38 #include <sys/modctl.h> 39 #include <sys/file.h> 40 #include <sys/modhash.h> 41 #include <sys/kstat.h> 42 #include <sys/vnode.h> 43 #include <sys/cmn_err.h> 44 #include <sys/softmac.h> 45 #include <sys/dls.h> 46 #include <sys/dls_impl.h> 47 #include <sys/stropts.h> 48 #include <sys/netstack.h> 49 #include <inet/iptun/iptun_impl.h> 50 51 /* 52 * This vanity name management module is treated as part of the GLD framework 53 * and we don't hold any GLD framework lock across a call to any mac 54 * function that needs to acquire the mac perimeter. The hierarchy is 55 * mac perimeter -> framework locks 56 */ 57 58 typedef struct dls_stack { 59 zoneid_t dlss_zoneid; 60 } dls_stack_t; 61 62 static kmem_cache_t *i_dls_devnet_cachep; 63 64 /* Upcall door handle and its lock. */ 65 static kmutex_t i_dls_mgmt_lock; 66 static door_handle_t dls_mgmt_dh = NULL; 67 68 /* 69 * Any association of <macname, linkid> (set, rename) can require an upcall to 70 * the daemon for the link vanity name. We want set/rename/unset to be mutually 71 * exclusive from start to finish, but it is unsafe to hold a write on 72 * i_dls_devnet_hash_lock during an upcall. Enforce their exclusion using a 73 * separate lock from the hash tables. 74 * 75 * i_dls_devnet_hash_lock protects the hash tables themselves. Taking a write on 76 * it requires we first hold i_dls_devnet_lock. Thus, we can safely drop, 77 * reacquire, and upgrade/downgrade it so long as all table updates occur in a 78 * single write. If a write is intended i_dls_devnet_lock must be acquired 79 * before i_dls_devnet_hash_lock, leaving the valid lock patterns: 80 * - i_dls_devnet_lock_enter -> i_dls_devnet_hashmap_write 81 * - i_dls_devnet_lock_enter -> i_dls_devnet_hashmap_read 82 * - i_dls_devnet_hashmap_read 83 * i_dls_devnet_hashmap_write enforces the first invariant. 84 */ 85 static kmutex_t i_dls_devnet_lock; 86 static kcondvar_t i_dls_devnet_cv; 87 static kthread_t *i_dls_devnet_own; 88 89 static krwlock_t i_dls_devnet_hash_lock; 90 static mod_hash_t *i_dls_devnet_id_hash; 91 static mod_hash_t *i_dls_devnet_hash; 92 93 static void 94 i_dls_devnet_lock_enter(void) 95 { 96 mutex_enter(&i_dls_devnet_lock); 97 while (i_dls_devnet_own != NULL) { 98 cv_wait(&i_dls_devnet_cv, &i_dls_devnet_lock); 99 } 100 } 101 102 static void 103 i_dls_devnet_lock_exit(void) 104 { 105 VERIFY3P(i_dls_devnet_own, ==, NULL); 106 cv_broadcast(&i_dls_devnet_cv); 107 mutex_exit(&i_dls_devnet_lock); 108 } 109 110 static void 111 i_dls_devnet_lock_upcall_start(void) 112 { 113 VERIFY(MUTEX_HELD(&i_dls_devnet_lock)); 114 VERIFY3P(i_dls_devnet_own, ==, NULL); 115 i_dls_devnet_own = curthread; 116 mutex_exit(&i_dls_devnet_lock); 117 } 118 119 static void 120 i_dls_devnet_lock_upcall_end(void) 121 { 122 mutex_enter(&i_dls_devnet_lock); 123 VERIFY3P(i_dls_devnet_own, ==, curthread); 124 i_dls_devnet_own = NULL; 125 } 126 127 static void 128 i_dls_devnet_hashmap_write(void) 129 { 130 VERIFY(MUTEX_HELD(&i_dls_devnet_lock)); 131 rw_enter(&i_dls_devnet_hash_lock, RW_WRITER); 132 } 133 134 static void 135 i_dls_devnet_hashmap_read(void) 136 { 137 rw_enter(&i_dls_devnet_hash_lock, RW_READER); 138 } 139 140 static void 141 i_dls_devnet_hashmap_exit(void) 142 { 143 rw_exit(&i_dls_devnet_hash_lock); 144 } 145 146 boolean_t devnet_need_rebuild; 147 148 #define VLAN_HASHSZ 67 /* prime */ 149 150 /* 151 * The following macros take a link name without the trailing PPA as input. 152 * Opening a /dev/net node with one of these names causes a tunnel link to be 153 * implicitly created in dls_devnet_hold_by_name() for backward compatibility 154 * with Solaris 10 and prior. 155 */ 156 #define IS_IPV4_TUN(name) (strcmp((name), "ip.tun") == 0) 157 #define IS_IPV6_TUN(name) (strcmp((name), "ip6.tun") == 0) 158 #define IS_6TO4_TUN(name) (strcmp((name), "ip.6to4tun") == 0) 159 #define IS_IPTUN_LINK(name) ( \ 160 IS_IPV4_TUN(name) || IS_IPV6_TUN(name) || IS_6TO4_TUN(name)) 161 162 /* dls_devnet_t dd_flags */ 163 #define DD_CONDEMNED 0x1 164 #define DD_IMPLICIT_IPTUN 0x2 /* Implicitly-created ip*.*tun* tunnel */ 165 #define DD_INITIALIZING 0x4 166 167 /* 168 * If the link is marked as initializing or condemned then it should 169 * not be visible outside of the DLS framework. 170 */ 171 #define DD_NOT_VISIBLE(flags) ( \ 172 (flags & (DD_CONDEMNED | DD_INITIALIZING)) != 0) 173 174 /* 175 * This structure is used to keep the <linkid, macname> mapping. 176 * This structure itself is not protected by the mac perimeter, but is 177 * protected by the dd_mutex and i_dls_devnet_hash_lock. Thus most of the 178 * functions manipulating this structure such as dls_devnet_set/unset etc. 179 * may be called while not holding the mac perimeter. 180 */ 181 typedef struct dls_devnet_s { 182 datalink_id_t dd_linkid; 183 char dd_linkname[MAXLINKNAMELEN]; 184 char dd_mac[MAXNAMELEN]; 185 kstat_t *dd_ksp; /* kstat in owner_zid */ 186 kstat_t *dd_zone_ksp; /* in dd_zid if != owner_zid */ 187 uint32_t dd_ref; 188 kmutex_t dd_mutex; 189 kcondvar_t dd_cv; 190 uint32_t dd_tref; 191 uint_t dd_flags; 192 zoneid_t dd_owner_zid; /* zone where node was created */ 193 zoneid_t dd_zid; /* current zone */ 194 boolean_t dd_prop_loaded; 195 taskqid_t dd_prop_taskid; 196 boolean_t dd_transient; /* link goes away when zone does */ 197 } dls_devnet_t; 198 199 static int i_dls_devnet_create_iptun(const char *, const char *, 200 datalink_id_t *); 201 static int i_dls_devnet_destroy_iptun(datalink_id_t); 202 static int i_dls_devnet_setzid(dls_devnet_t *, zoneid_t, boolean_t, boolean_t); 203 static int dls_devnet_unset(mac_handle_t, datalink_id_t *, boolean_t); 204 205 /*ARGSUSED*/ 206 static int 207 i_dls_devnet_constructor(void *buf, void *arg, int kmflag) 208 { 209 dls_devnet_t *ddp = buf; 210 211 bzero(buf, sizeof (dls_devnet_t)); 212 mutex_init(&ddp->dd_mutex, NULL, MUTEX_DEFAULT, NULL); 213 cv_init(&ddp->dd_cv, NULL, CV_DEFAULT, NULL); 214 return (0); 215 } 216 217 /*ARGSUSED*/ 218 static void 219 i_dls_devnet_destructor(void *buf, void *arg) 220 { 221 dls_devnet_t *ddp = buf; 222 223 VERIFY(ddp->dd_ksp == NULL); 224 VERIFY(ddp->dd_ref == 0); 225 VERIFY(ddp->dd_tref == 0); 226 mutex_destroy(&ddp->dd_mutex); 227 cv_destroy(&ddp->dd_cv); 228 } 229 230 /* ARGSUSED */ 231 static int 232 dls_zone_remove(datalink_id_t linkid, void *arg) 233 { 234 dls_devnet_t *ddp; 235 236 if (dls_devnet_hold_tmp(linkid, &ddp) == 0) { 237 /* 238 * Don't bother moving transient links back to the global zone 239 * since we will simply delete them in dls_devnet_unset. 240 */ 241 if (!ddp->dd_transient) 242 (void) dls_devnet_setzid(ddp, GLOBAL_ZONEID); 243 dls_devnet_rele_tmp(ddp); 244 } 245 return (0); 246 } 247 248 /* ARGSUSED */ 249 static void * 250 dls_stack_init(netstackid_t stackid, netstack_t *ns) 251 { 252 dls_stack_t *dlss; 253 254 dlss = kmem_zalloc(sizeof (*dlss), KM_SLEEP); 255 dlss->dlss_zoneid = netstackid_to_zoneid(stackid); 256 return (dlss); 257 } 258 259 /* ARGSUSED */ 260 static void 261 dls_stack_shutdown(netstackid_t stackid, void *arg) 262 { 263 dls_stack_t *dlss = (dls_stack_t *)arg; 264 265 /* Move remaining datalinks in this zone back to the global zone. */ 266 (void) zone_datalink_walk(dlss->dlss_zoneid, dls_zone_remove, NULL); 267 } 268 269 /* ARGSUSED */ 270 static void 271 dls_stack_fini(netstackid_t stackid, void *arg) 272 { 273 dls_stack_t *dlss = (dls_stack_t *)arg; 274 275 kmem_free(dlss, sizeof (*dlss)); 276 } 277 278 /* 279 * Module initialization and finalization functions. 280 */ 281 void 282 dls_mgmt_init(void) 283 { 284 mutex_init(&i_dls_mgmt_lock, NULL, MUTEX_DEFAULT, NULL); 285 mutex_init(&i_dls_devnet_lock, NULL, MUTEX_DEFAULT, NULL); 286 cv_init(&i_dls_devnet_cv, NULL, CV_DEFAULT, NULL); 287 i_dls_devnet_own = NULL; 288 rw_init(&i_dls_devnet_hash_lock, NULL, RW_DEFAULT, NULL); 289 290 /* 291 * Create a kmem_cache of dls_devnet_t structures. 292 */ 293 i_dls_devnet_cachep = kmem_cache_create("dls_devnet_cache", 294 sizeof (dls_devnet_t), 0, i_dls_devnet_constructor, 295 i_dls_devnet_destructor, NULL, NULL, NULL, 0); 296 ASSERT(i_dls_devnet_cachep != NULL); 297 298 /* 299 * Create a hash table, keyed by dd_linkid, of dls_devnet_t. 300 */ 301 i_dls_devnet_id_hash = mod_hash_create_idhash("dls_devnet_id_hash", 302 VLAN_HASHSZ, mod_hash_null_valdtor); 303 304 /* 305 * Create a hash table, keyed by dd_mac 306 */ 307 i_dls_devnet_hash = mod_hash_create_extended("dls_devnet_hash", 308 VLAN_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor, 309 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 310 311 devnet_need_rebuild = B_FALSE; 312 313 netstack_register(NS_DLS, dls_stack_init, dls_stack_shutdown, 314 dls_stack_fini); 315 } 316 317 void 318 dls_mgmt_fini(void) 319 { 320 netstack_unregister(NS_DLS); 321 mod_hash_destroy_hash(i_dls_devnet_hash); 322 mod_hash_destroy_hash(i_dls_devnet_id_hash); 323 kmem_cache_destroy(i_dls_devnet_cachep); 324 rw_destroy(&i_dls_devnet_hash_lock); 325 cv_destroy(&i_dls_devnet_cv); 326 mutex_destroy(&i_dls_devnet_lock); 327 mutex_destroy(&i_dls_mgmt_lock); 328 } 329 330 int 331 dls_mgmt_door_set(boolean_t start) 332 { 333 int err; 334 335 /* handle daemon restart */ 336 mutex_enter(&i_dls_mgmt_lock); 337 if (dls_mgmt_dh != NULL) { 338 door_ki_rele(dls_mgmt_dh); 339 dls_mgmt_dh = NULL; 340 } 341 342 if (start && ((err = door_ki_open(DLMGMT_DOOR, &dls_mgmt_dh)) != 0)) { 343 mutex_exit(&i_dls_mgmt_lock); 344 return (err); 345 } 346 347 mutex_exit(&i_dls_mgmt_lock); 348 349 /* 350 * Create and associate <link name, linkid> mapping for network devices 351 * which are already attached before the daemon is started. 352 */ 353 if (start) 354 softmac_recreate(); 355 return (0); 356 } 357 358 static boolean_t 359 i_dls_mgmt_door_revoked(door_handle_t dh) 360 { 361 struct door_info info; 362 extern int sys_shutdown; 363 364 ASSERT(dh != NULL); 365 366 if (sys_shutdown) { 367 cmn_err(CE_NOTE, "dls_mgmt_door: shutdown observed\n"); 368 return (B_TRUE); 369 } 370 371 if (door_ki_info(dh, &info) != 0) 372 return (B_TRUE); 373 374 return ((info.di_attributes & DOOR_REVOKED) != 0); 375 } 376 377 /* 378 * Upcall to the datalink management daemon (dlmgmtd). 379 */ 380 static int 381 i_dls_mgmt_upcall(void *arg, size_t asize, void *rbuf, size_t rsize) 382 { 383 door_arg_t darg, save_arg; 384 door_handle_t dh; 385 int err; 386 int retry = 0; 387 388 #define MAXRETRYNUM 3 389 390 ASSERT(arg); 391 darg.data_ptr = arg; 392 darg.data_size = asize; 393 darg.desc_ptr = NULL; 394 darg.desc_num = 0; 395 darg.rbuf = rbuf; 396 darg.rsize = rsize; 397 save_arg = darg; 398 399 retry: 400 mutex_enter(&i_dls_mgmt_lock); 401 dh = dls_mgmt_dh; 402 if ((dh == NULL) || i_dls_mgmt_door_revoked(dh)) { 403 mutex_exit(&i_dls_mgmt_lock); 404 return (EBADF); 405 } 406 door_ki_hold(dh); 407 mutex_exit(&i_dls_mgmt_lock); 408 409 for (;;) { 410 retry++; 411 if ((err = door_ki_upcall_limited(dh, &darg, zone_kcred(), 412 SIZE_MAX, 0)) == 0) 413 break; 414 415 /* 416 * handle door call errors 417 */ 418 darg = save_arg; 419 switch (err) { 420 case EINTR: 421 /* 422 * If the operation which caused this door upcall gets 423 * interrupted, return directly. 424 */ 425 goto done; 426 case EAGAIN: 427 /* 428 * Repeat upcall if the maximum attempt limit has not 429 * been reached. 430 */ 431 if (retry < MAXRETRYNUM) { 432 delay(2 * hz); 433 break; 434 } 435 cmn_err(CE_WARN, "dls: dlmgmtd fatal error %d\n", err); 436 goto done; 437 default: 438 /* A fatal door error */ 439 if (i_dls_mgmt_door_revoked(dh)) { 440 cmn_err(CE_NOTE, 441 "dls: dlmgmtd door service revoked\n"); 442 443 if (retry < MAXRETRYNUM) { 444 door_ki_rele(dh); 445 goto retry; 446 } 447 } 448 cmn_err(CE_WARN, "dls: dlmgmtd fatal error %d\n", err); 449 goto done; 450 } 451 } 452 453 if (darg.rbuf != rbuf) { 454 /* 455 * The size of the input rbuf was not big enough, so the 456 * upcall allocated the rbuf itself. If this happens, assume 457 * that this was an invalid door call request. 458 */ 459 kmem_free(darg.rbuf, darg.rsize); 460 err = ENOSPC; 461 goto done; 462 } 463 464 if (darg.rsize != rsize) { 465 err = EINVAL; 466 goto done; 467 } 468 469 err = ((dlmgmt_retval_t *)rbuf)->lr_err; 470 471 done: 472 door_ki_rele(dh); 473 return (err); 474 } 475 476 /* 477 * Request the datalink management daemon to create a link with the attributes 478 * below. Upon success, zero is returned and linkidp contains the linkid for 479 * the new link; otherwise, an errno is returned. 480 * 481 * - dev physical dev_t. required for all physical links, 482 * including GLDv3 links. It will be used to force the 483 * attachment of a physical device, hence the 484 * registration of its mac 485 * - class datalink class 486 * - media type media type; DL_OTHER means unknown 487 * - persist whether to persist the datalink 488 */ 489 int 490 dls_mgmt_create(const char *devname, dev_t dev, datalink_class_t class, 491 uint32_t media, boolean_t persist, datalink_id_t *linkidp) 492 { 493 dlmgmt_upcall_arg_create_t create; 494 dlmgmt_create_retval_t retval; 495 int err; 496 497 create.ld_cmd = DLMGMT_CMD_DLS_CREATE; 498 create.ld_class = class; 499 create.ld_media = media; 500 create.ld_phymaj = getmajor(dev); 501 create.ld_phyinst = getminor(dev); 502 create.ld_persist = persist; 503 if (strlcpy(create.ld_devname, devname, sizeof (create.ld_devname)) >= 504 sizeof (create.ld_devname)) 505 return (EINVAL); 506 507 if ((err = i_dls_mgmt_upcall(&create, sizeof (create), &retval, 508 sizeof (retval))) == 0) { 509 *linkidp = retval.lr_linkid; 510 } 511 return (err); 512 } 513 514 /* 515 * Request the datalink management daemon to destroy the specified link. 516 * Returns zero upon success, or an errno upon failure. 517 */ 518 int 519 dls_mgmt_destroy(datalink_id_t linkid, boolean_t persist) 520 { 521 dlmgmt_upcall_arg_destroy_t destroy; 522 dlmgmt_destroy_retval_t retval; 523 524 destroy.ld_cmd = DLMGMT_CMD_DLS_DESTROY; 525 destroy.ld_linkid = linkid; 526 destroy.ld_persist = persist; 527 528 return (i_dls_mgmt_upcall(&destroy, sizeof (destroy), 529 &retval, sizeof (retval))); 530 } 531 532 /* 533 * Request the datalink management daemon to verify/update the information 534 * for a physical link. Upon success, get its linkid. 535 * 536 * - media type media type 537 * - novanity whether this physical datalink supports vanity naming. 538 * physical links that do not use the GLDv3 MAC plugin 539 * cannot suport vanity naming 540 * 541 * This function could fail with ENOENT or EEXIST. Two cases return EEXIST: 542 * 543 * 1. A link with devname already exists, but the media type does not match. 544 * In this case, mediap will bee set to the media type of the existing link. 545 * 2. A link with devname already exists, but its link name does not match 546 * the device name, although this link does not support vanity naming. 547 */ 548 int 549 dls_mgmt_update(const char *devname, uint32_t media, boolean_t novanity, 550 uint32_t *mediap, datalink_id_t *linkidp) 551 { 552 dlmgmt_upcall_arg_update_t update; 553 dlmgmt_update_retval_t retval; 554 int err; 555 556 update.ld_cmd = DLMGMT_CMD_DLS_UPDATE; 557 558 if (strlcpy(update.ld_devname, devname, sizeof (update.ld_devname)) >= 559 sizeof (update.ld_devname)) 560 return (EINVAL); 561 562 update.ld_media = media; 563 update.ld_novanity = novanity; 564 565 if ((err = i_dls_mgmt_upcall(&update, sizeof (update), &retval, 566 sizeof (retval))) == EEXIST) { 567 *linkidp = retval.lr_linkid; 568 *mediap = retval.lr_media; 569 } else if (err == 0) { 570 *linkidp = retval.lr_linkid; 571 } 572 573 return (err); 574 } 575 576 /* 577 * Request the datalink management daemon to get the information for a link. 578 * Returns zero upon success, or an errno upon failure. 579 * 580 * Only fills in information for argument pointers that are non-NULL. 581 * Note that the link argument is expected to be MAXLINKNAMELEN bytes. 582 */ 583 int 584 dls_mgmt_get_linkinfo(datalink_id_t linkid, char *link, 585 datalink_class_t *classp, uint32_t *mediap, uint32_t *flagsp) 586 { 587 dlmgmt_door_getname_t getname; 588 dlmgmt_getname_retval_t retval; 589 int err, len; 590 591 getname.ld_cmd = DLMGMT_CMD_GETNAME; 592 getname.ld_linkid = linkid; 593 594 if ((err = i_dls_mgmt_upcall(&getname, sizeof (getname), &retval, 595 sizeof (retval))) != 0) { 596 return (err); 597 } 598 599 len = strlen(retval.lr_link); 600 if (len <= 1 || len >= MAXLINKNAMELEN) 601 return (EINVAL); 602 603 if (link != NULL) 604 (void) strlcpy(link, retval.lr_link, MAXLINKNAMELEN); 605 if (classp != NULL) 606 *classp = retval.lr_class; 607 if (mediap != NULL) 608 *mediap = retval.lr_media; 609 if (flagsp != NULL) 610 *flagsp = retval.lr_flags; 611 return (0); 612 } 613 614 /* 615 * Request the datalink management daemon to get the linkid for a link. 616 * Returns a non-zero error code on failure. The linkid argument is only 617 * set on success (when zero is returned.) 618 */ 619 int 620 dls_mgmt_get_linkid(const char *link, datalink_id_t *linkid) 621 { 622 dlmgmt_door_getlinkid_t getlinkid; 623 dlmgmt_getlinkid_retval_t retval; 624 int err; 625 626 getlinkid.ld_cmd = DLMGMT_CMD_GETLINKID; 627 (void) strlcpy(getlinkid.ld_link, link, MAXLINKNAMELEN); 628 629 if ((err = i_dls_mgmt_upcall(&getlinkid, sizeof (getlinkid), &retval, 630 sizeof (retval))) == 0) { 631 *linkid = retval.lr_linkid; 632 } 633 return (err); 634 } 635 636 datalink_id_t 637 dls_mgmt_get_next(datalink_id_t linkid, datalink_class_t class, 638 datalink_media_t dmedia, uint32_t flags) 639 { 640 dlmgmt_door_getnext_t getnext; 641 dlmgmt_getnext_retval_t retval; 642 643 getnext.ld_cmd = DLMGMT_CMD_GETNEXT; 644 getnext.ld_class = class; 645 getnext.ld_dmedia = dmedia; 646 getnext.ld_flags = flags; 647 getnext.ld_linkid = linkid; 648 649 if (i_dls_mgmt_upcall(&getnext, sizeof (getnext), &retval, 650 sizeof (retval)) != 0) { 651 return (DATALINK_INVALID_LINKID); 652 } 653 654 return (retval.lr_linkid); 655 } 656 657 static int 658 i_dls_mgmt_get_linkattr(const datalink_id_t linkid, const char *attr, 659 void *attrval, size_t *attrszp) 660 { 661 dlmgmt_upcall_arg_getattr_t getattr; 662 dlmgmt_getattr_retval_t retval; 663 int err; 664 665 getattr.ld_cmd = DLMGMT_CMD_DLS_GETATTR; 666 getattr.ld_linkid = linkid; 667 (void) strlcpy(getattr.ld_attr, attr, MAXLINKATTRLEN); 668 669 if ((err = i_dls_mgmt_upcall(&getattr, sizeof (getattr), &retval, 670 sizeof (retval))) == 0) { 671 if (*attrszp < retval.lr_attrsz) 672 return (EINVAL); 673 *attrszp = retval.lr_attrsz; 674 bcopy(retval.lr_attrval, attrval, retval.lr_attrsz); 675 } 676 677 return (err); 678 } 679 680 /* 681 * Note that this function can only get devp successfully for non-VLAN link. 682 */ 683 int 684 dls_mgmt_get_phydev(datalink_id_t linkid, dev_t *devp) 685 { 686 uint64_t maj, inst; 687 size_t attrsz = sizeof (uint64_t); 688 689 if (i_dls_mgmt_get_linkattr(linkid, FPHYMAJ, &maj, &attrsz) != 0 || 690 attrsz != sizeof (uint64_t) || 691 i_dls_mgmt_get_linkattr(linkid, FPHYINST, &inst, &attrsz) != 0 || 692 attrsz != sizeof (uint64_t)) { 693 return (EINVAL); 694 } 695 696 *devp = makedevice((major_t)maj, (minor_t)inst); 697 return (0); 698 } 699 700 /* 701 * Request the datalink management daemon to push in 702 * all properties associated with the link. 703 * Returns a non-zero error code on failure. 704 */ 705 int 706 dls_mgmt_linkprop_init(datalink_id_t linkid) 707 { 708 dlmgmt_door_linkprop_init_t li; 709 dlmgmt_linkprop_init_retval_t retval; 710 int err; 711 712 li.ld_cmd = DLMGMT_CMD_LINKPROP_INIT; 713 li.ld_linkid = linkid; 714 715 err = i_dls_mgmt_upcall(&li, sizeof (li), &retval, sizeof (retval)); 716 return (err); 717 } 718 719 static void 720 dls_devnet_prop_task(void *arg) 721 { 722 dls_devnet_t *ddp = arg; 723 724 (void) dls_mgmt_linkprop_init(ddp->dd_linkid); 725 726 mutex_enter(&ddp->dd_mutex); 727 ddp->dd_prop_loaded = B_TRUE; 728 ddp->dd_prop_taskid = 0; 729 cv_broadcast(&ddp->dd_cv); 730 mutex_exit(&ddp->dd_mutex); 731 } 732 733 /* 734 * Ensure property loading task is completed. 735 */ 736 void 737 dls_devnet_prop_task_wait(dls_dl_handle_t ddp) 738 { 739 mutex_enter(&ddp->dd_mutex); 740 while (ddp->dd_prop_taskid != 0) 741 cv_wait(&ddp->dd_cv, &ddp->dd_mutex); 742 mutex_exit(&ddp->dd_mutex); 743 } 744 745 void 746 dls_devnet_rele_tmp(dls_dl_handle_t dlh) 747 { 748 dls_devnet_t *ddp = dlh; 749 750 mutex_enter(&ddp->dd_mutex); 751 ASSERT(ddp->dd_tref != 0); 752 if (--ddp->dd_tref == 0) 753 cv_signal(&ddp->dd_cv); 754 mutex_exit(&ddp->dd_mutex); 755 } 756 757 int 758 dls_devnet_hold_link(datalink_id_t linkid, dls_dl_handle_t *ddhp, 759 dls_link_t **dlpp) 760 { 761 dls_dl_handle_t dlh; 762 dls_link_t *dlp; 763 int err; 764 765 if ((err = dls_devnet_hold_tmp(linkid, &dlh)) != 0) 766 return (err); 767 768 if ((err = dls_link_hold(dls_devnet_mac(dlh), &dlp)) != 0) { 769 dls_devnet_rele_tmp(dlh); 770 return (err); 771 } 772 773 ASSERT(MAC_PERIM_HELD(dlp->dl_mh)); 774 775 *ddhp = dlh; 776 *dlpp = dlp; 777 return (0); 778 } 779 780 void 781 dls_devnet_rele_link(dls_dl_handle_t dlh, dls_link_t *dlp) 782 { 783 ASSERT(MAC_PERIM_HELD(dlp->dl_mh)); 784 785 dls_link_rele(dlp); 786 dls_devnet_rele_tmp(dlh); 787 } 788 789 /* 790 * "link" kstats related functions. 791 */ 792 793 /* 794 * Query the "link" kstats. 795 * 796 * We may be called from the kstat subsystem in an arbitrary context. 797 * If the caller is the stack, the context could be an upcall data 798 * thread. Hence we can't acquire the mac perimeter in this function 799 * for fear of deadlock. 800 */ 801 static int 802 dls_devnet_stat_update(kstat_t *ksp, int rw) 803 { 804 datalink_id_t linkid = (datalink_id_t)(uintptr_t)ksp->ks_private; 805 dls_devnet_t *ddp; 806 dls_link_t *dlp; 807 int err; 808 809 if ((err = dls_devnet_hold_tmp(linkid, &ddp)) != 0) { 810 return (err); 811 } 812 813 /* 814 * If a device detach happens at this time, it will block in 815 * dls_devnet_unset since the dd_tref has been bumped in 816 * dls_devnet_hold_tmp(). So the access to 'dlp' is safe even though 817 * we don't hold the mac perimeter. 818 */ 819 if (mod_hash_find(i_dls_link_hash, (mod_hash_key_t)ddp->dd_mac, 820 (mod_hash_val_t *)&dlp) != 0) { 821 dls_devnet_rele_tmp(ddp); 822 return (ENOENT); 823 } 824 825 err = dls_stat_update(ksp, dlp, rw); 826 827 dls_devnet_rele_tmp(ddp); 828 return (err); 829 } 830 831 /* 832 * Create the "link" kstats. 833 */ 834 static void 835 dls_devnet_stat_create(dls_devnet_t *ddp, zoneid_t zoneid) 836 { 837 kstat_t *ksp; 838 839 if (dls_stat_create("link", 0, ddp->dd_linkname, zoneid, 840 dls_devnet_stat_update, (void *)(uintptr_t)ddp->dd_linkid, 841 &ksp) == 0) { 842 ASSERT(ksp != NULL); 843 if (zoneid == ddp->dd_owner_zid) { 844 ASSERT(ddp->dd_ksp == NULL); 845 ddp->dd_ksp = ksp; 846 } else { 847 ASSERT(ddp->dd_zone_ksp == NULL); 848 ddp->dd_zone_ksp = ksp; 849 } 850 } 851 } 852 853 /* 854 * Destroy the "link" kstats. 855 */ 856 static void 857 dls_devnet_stat_destroy(dls_devnet_t *ddp, zoneid_t zoneid) 858 { 859 if (zoneid == ddp->dd_owner_zid) { 860 if (ddp->dd_ksp != NULL) { 861 kstat_delete(ddp->dd_ksp); 862 ddp->dd_ksp = NULL; 863 } 864 } else { 865 if (ddp->dd_zone_ksp != NULL) { 866 kstat_delete(ddp->dd_zone_ksp); 867 ddp->dd_zone_ksp = NULL; 868 } 869 } 870 } 871 872 /* 873 * The link has been renamed. Destroy the old non-legacy kstats ("link kstats") 874 * and create the new set using the new name. 875 */ 876 static void 877 dls_devnet_stat_rename(dls_devnet_t *ddp) 878 { 879 if (ddp->dd_ksp != NULL) { 880 kstat_delete(ddp->dd_ksp); 881 ddp->dd_ksp = NULL; 882 } 883 /* We can't rename a link while it's assigned to a non-global zone. */ 884 ASSERT(ddp->dd_zone_ksp == NULL); 885 dls_devnet_stat_create(ddp, ddp->dd_owner_zid); 886 } 887 888 /* 889 * Associate the linkid with the link identified by macname. If this 890 * is called on behalf of a physical link then linkid may be 891 * DATALINK_INVALID_LINKID. Otherwise, if called on behalf of a 892 * virtual link, linkid must have a value. 893 */ 894 static int 895 dls_devnet_set(mac_handle_t mh, datalink_id_t linkid, zoneid_t zoneid, 896 dls_devnet_t **ddpp) 897 { 898 const char *macname = mac_name(mh); 899 dls_devnet_t *ddp = NULL; 900 datalink_class_t class; 901 int err; 902 boolean_t stat_create = B_FALSE; 903 char linkname[MAXLINKNAMELEN]; 904 905 i_dls_devnet_lock_enter(); 906 907 /* 908 * Don't allow callers to set a link name with a linkid that already 909 * has a name association (that's what rename is for). 910 */ 911 if (linkid != DATALINK_INVALID_LINKID) { 912 /* 913 * This temporary read access is valid, as no other set/rename 914 * operation can attempt an insert on the same linkid while 915 * i_dls_devnet_lock is held. 916 */ 917 i_dls_devnet_hashmap_read(); 918 if (mod_hash_find(i_dls_devnet_id_hash, 919 (mod_hash_key_t)(uintptr_t)linkid, 920 (mod_hash_val_t *)&ddp) == 0) { 921 err = EEXIST; 922 goto done; 923 } 924 i_dls_devnet_hashmap_exit(); 925 926 i_dls_devnet_lock_upcall_start(); 927 err = dls_mgmt_get_linkinfo(linkid, linkname, &class, 928 NULL, NULL); 929 i_dls_devnet_lock_upcall_end(); 930 931 if (err != 0) 932 goto done_rw_unlocked; 933 } 934 935 i_dls_devnet_hashmap_write(); 936 937 if ((err = mod_hash_find(i_dls_devnet_hash, 938 (mod_hash_key_t)macname, (mod_hash_val_t *)&ddp)) == 0) { 939 if (ddp->dd_linkid != DATALINK_INVALID_LINKID) { 940 err = EEXIST; 941 goto done; 942 } 943 944 /* 945 * If we arrive here we know we are attempting to set 946 * the linkid on a physical link. A virtual link 947 * should never arrive here because it should never 948 * call this function without a linkid. Virtual links 949 * are created through dlgmtmd and thus we know 950 * dlmgmtd is alive to assign it a linkid (search for 951 * uses of dladm_create_datalink_id() to prove this to 952 * yourself); we don't have the same guarantee for a 953 * physical link which may perform an upcall for a 954 * linkid while dlmgmtd is down but will continue 955 * creating a devnet without the linkid (see 956 * softmac_create_datalink() to see how physical link 957 * creation works). That is why there is no entry in 958 * the id hash but there is one in the macname hash -- 959 * softmac couldn't acquire a linkid the first time it 960 * called this function. 961 * 962 * Because of the check above, we also know that 963 * ddp->dd_linkid is not set. Following this, the link 964 * must still be in the DD_INITIALIZING state because 965 * that flag is removed IFF dd_linkid is set. This is 966 * why we can ASSERT the DD_INITIALIZING flag below if 967 * the call to i_dls_devnet_setzid() fails. 968 */ 969 if (linkid == DATALINK_INVALID_LINKID || 970 class != DATALINK_CLASS_PHYS) { 971 err = EINVAL; 972 goto done; 973 } 974 975 ASSERT(ddp->dd_flags & DD_INITIALIZING); 976 } else { 977 ddp = kmem_cache_alloc(i_dls_devnet_cachep, KM_SLEEP); 978 ddp->dd_flags = DD_INITIALIZING; 979 ddp->dd_tref = 0; 980 ddp->dd_ref++; 981 ddp->dd_owner_zid = zoneid; 982 /* 983 * If we are creating a new devnet which will be owned by a NGZ 984 * then mark it as transient. This link has never been in the 985 * GZ, the GZ will not have a hold on its reference, and we do 986 * not want to return it to the GZ when the zone halts. 987 */ 988 if (zoneid != GLOBAL_ZONEID) 989 ddp->dd_transient = B_TRUE; 990 (void) strlcpy(ddp->dd_mac, macname, sizeof (ddp->dd_mac)); 991 VERIFY(mod_hash_insert(i_dls_devnet_hash, 992 (mod_hash_key_t)ddp->dd_mac, (mod_hash_val_t)ddp) == 0); 993 } 994 995 if (linkid != DATALINK_INVALID_LINKID) { 996 ddp->dd_linkid = linkid; 997 (void) strlcpy(ddp->dd_linkname, linkname, 998 sizeof (ddp->dd_linkname)); 999 VERIFY(mod_hash_insert(i_dls_devnet_id_hash, 1000 (mod_hash_key_t)(uintptr_t)linkid, 1001 (mod_hash_val_t)ddp) == 0); 1002 devnet_need_rebuild = B_TRUE; 1003 stat_create = B_TRUE; 1004 } 1005 err = 0; 1006 done: 1007 /* 1008 * It is safe to drop the i_dls_devnet_hash_lock at this point. In the 1009 * case of physical devices, the softmac framework will fail the device 1010 * detach based on the smac_state or smac_hold_cnt. Other cases like 1011 * vnic and aggr use their own scheme to serialize creates and deletes 1012 * and ensure that *ddp is valid. 1013 */ 1014 i_dls_devnet_hashmap_exit(); 1015 done_rw_unlocked: 1016 i_dls_devnet_lock_exit(); 1017 1018 if (err == 0 && zoneid != GLOBAL_ZONEID) { 1019 /* 1020 * If this link is being created directly within a non-global 1021 * zone, then flag it as transient so that it will be cleaned 1022 * up when the zone is shut down. 1023 */ 1024 err = i_dls_devnet_setzid(ddp, zoneid, B_FALSE, B_TRUE); 1025 if (err != 0) { 1026 /* 1027 * At this point the link is marked as 1028 * DD_INITIALIZING -- there can be no 1029 * outstanding temp refs and therefore no need 1030 * to wait for them. 1031 */ 1032 ASSERT(ddp->dd_flags & DD_INITIALIZING); 1033 (void) dls_devnet_unset(mh, &linkid, B_FALSE); 1034 return (err); 1035 } 1036 } 1037 1038 if (err == 0) { 1039 /* 1040 * The kstat subsystem holds its own locks (rather perimeter) 1041 * before calling the ks_update (dls_devnet_stat_update) entry 1042 * point which in turn grabs the i_dls_devnet_hash_lock. So the 1043 * lock hierarchy is kstat locks -> i_dls_devnet_hash_lock. 1044 */ 1045 if (stat_create) 1046 dls_devnet_stat_create(ddp, zoneid); 1047 if (ddpp != NULL) 1048 *ddpp = ddp; 1049 1050 mutex_enter(&ddp->dd_mutex); 1051 if (linkid != DATALINK_INVALID_LINKID && 1052 !ddp->dd_prop_loaded && ddp->dd_prop_taskid == 0) { 1053 ddp->dd_prop_taskid = taskq_dispatch(system_taskq, 1054 dls_devnet_prop_task, ddp, TQ_SLEEP); 1055 } 1056 mutex_exit(&ddp->dd_mutex); 1057 1058 } 1059 return (err); 1060 } 1061 1062 /* 1063 * Disassociate the linkid from the link identified by macname. If 1064 * wait is B_TRUE, wait until all temporary refs are released and the 1065 * prop task is finished. 1066 * 1067 * If waiting then you SHOULD NOT call this from inside the MAC perim 1068 * as deadlock will ensue. Otherwise, this function is safe to call 1069 * from inside or outside the MAC perim. 1070 */ 1071 static int 1072 dls_devnet_unset(mac_handle_t mh, datalink_id_t *id, boolean_t wait) 1073 { 1074 const char *macname = mac_name(mh); 1075 dls_devnet_t *ddp; 1076 int err; 1077 mod_hash_val_t val; 1078 1079 i_dls_devnet_lock_enter(); 1080 i_dls_devnet_hashmap_write(); 1081 1082 if ((err = mod_hash_find(i_dls_devnet_hash, 1083 (mod_hash_key_t)macname, (mod_hash_val_t *)&ddp)) != 0) { 1084 ASSERT(err == MH_ERR_NOTFOUND); 1085 i_dls_devnet_hashmap_exit(); 1086 return (ENOENT); 1087 } 1088 1089 mutex_enter(&ddp->dd_mutex); 1090 1091 /* 1092 * Make sure downcalls into softmac_create or softmac_destroy from 1093 * devfs don't cv_wait on any devfs related condition for fear of 1094 * deadlock. Return EBUSY if the asynchronous thread started for 1095 * property loading as part of the post attach hasn't yet completed. 1096 */ 1097 VERIFY(ddp->dd_ref != 0); 1098 if ((ddp->dd_ref != 1) || (!wait && 1099 (ddp->dd_tref != 0 || ddp->dd_prop_taskid != 0))) { 1100 int zstatus = 0; 1101 1102 /* 1103 * There are a couple of alternatives that might be going on 1104 * here; a) the zone is shutting down and it has a transient 1105 * link assigned, in which case we want to clean it up instead 1106 * of moving it back to the global zone, or b) its possible 1107 * that we're trying to clean up an orphaned vnic that was 1108 * delegated to a zone and which wasn't cleaned up properly 1109 * when the zone went away. Check for either of these cases 1110 * before we simply return EBUSY. 1111 * 1112 * zstatus indicates which situation we are dealing with: 1113 * 0 - means return EBUSY 1114 * 1 - means case (a), cleanup transient link 1115 * -1 - means case (b), orphaned VNIC 1116 */ 1117 if (ddp->dd_ref > 1 && ddp->dd_zid != GLOBAL_ZONEID) { 1118 zone_t *zp; 1119 1120 if ((zp = zone_find_by_id(ddp->dd_zid)) == NULL) { 1121 zstatus = -1; 1122 } else { 1123 if (ddp->dd_transient) { 1124 zone_status_t s = zone_status_get(zp); 1125 1126 if (s >= ZONE_IS_SHUTTING_DOWN) 1127 zstatus = 1; 1128 } 1129 zone_rele(zp); 1130 } 1131 } 1132 1133 if (zstatus == 0) { 1134 mutex_exit(&ddp->dd_mutex); 1135 i_dls_devnet_hashmap_exit(); 1136 i_dls_devnet_lock_exit(); 1137 return (EBUSY); 1138 } 1139 1140 /* 1141 * We want to delete the link, reset ref to 1; 1142 */ 1143 if (zstatus == -1) { 1144 /* Log a warning, but continue in this case */ 1145 cmn_err(CE_WARN, "clear orphaned datalink: %s\n", 1146 ddp->dd_linkname); 1147 } 1148 ddp->dd_ref = 1; 1149 } 1150 1151 ddp->dd_flags |= DD_CONDEMNED; 1152 ddp->dd_ref--; 1153 *id = ddp->dd_linkid; 1154 1155 /* 1156 * Remove this dls_devnet_t from the hash table. 1157 */ 1158 VERIFY(mod_hash_remove(i_dls_devnet_hash, 1159 (mod_hash_key_t)ddp->dd_mac, &val) == 0); 1160 1161 if (ddp->dd_linkid != DATALINK_INVALID_LINKID) { 1162 VERIFY(mod_hash_remove(i_dls_devnet_id_hash, 1163 (mod_hash_key_t)(uintptr_t)ddp->dd_linkid, &val) == 0); 1164 1165 devnet_need_rebuild = B_TRUE; 1166 } 1167 1168 i_dls_devnet_hashmap_exit(); 1169 i_dls_devnet_lock_exit(); 1170 1171 /* 1172 * It is important to call i_dls_devnet_setzid() WITHOUT the 1173 * i_dls_devnet_hash_lock held. The setzid call grabs the MAC 1174 * perim; thus causing DLS -> MAC lock ordering if performed 1175 * with the i_dls_devnet_hash_lock held. This forces consumers to 1176 * grab the MAC perim before calling dls_devnet_unset() (the 1177 * locking rules state MAC -> DLS order). By performing the 1178 * setzid outside of the i_dls_devnet_hash_lock consumers can 1179 * safely call dls_devnet_unset() outside the MAC perim. 1180 */ 1181 if (ddp->dd_zid != GLOBAL_ZONEID) { 1182 /* 1183 * We need to release the dd_mutex before we try and destroy the 1184 * stat. When we destroy it, we'll need to grab the lock for the 1185 * kstat but if there's a concurrent reader of the kstat, we'll 1186 * be blocked on it. This will lead to deadlock because these 1187 * kstats employ a ks_update function (dls_devnet_stat_update) 1188 * which needs the dd_mutex that we currently hold. 1189 * 1190 * Because we've already flagged the dls_devnet_t as 1191 * DD_CONDEMNED and we still have a write lock on 1192 * i_dls_devnet_hash_lock, we should be able to release the 1193 * dd_mutex. 1194 */ 1195 mutex_exit(&ddp->dd_mutex); 1196 dls_devnet_stat_destroy(ddp, ddp->dd_zid); 1197 mutex_enter(&ddp->dd_mutex); 1198 (void) i_dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE, 1199 B_FALSE); 1200 } 1201 1202 if (wait) { 1203 /* 1204 * Wait until all temporary references are released. 1205 * The holders of the tref need the MAC perim to 1206 * perform their work and release the tref. To avoid 1207 * deadlock, assert that the perim is never held here. 1208 */ 1209 ASSERT0(MAC_PERIM_HELD(mh)); 1210 while ((ddp->dd_tref != 0) || (ddp->dd_prop_taskid != 0)) 1211 cv_wait(&ddp->dd_cv, &ddp->dd_mutex); 1212 } else { 1213 VERIFY(ddp->dd_tref == 0); 1214 VERIFY(ddp->dd_prop_taskid == 0); 1215 } 1216 1217 if (ddp->dd_linkid != DATALINK_INVALID_LINKID) 1218 dls_devnet_stat_destroy(ddp, ddp->dd_owner_zid); 1219 1220 ddp->dd_prop_loaded = B_FALSE; 1221 ddp->dd_linkid = DATALINK_INVALID_LINKID; 1222 ddp->dd_flags = 0; 1223 mutex_exit(&ddp->dd_mutex); 1224 kmem_cache_free(i_dls_devnet_cachep, ddp); 1225 1226 return (0); 1227 } 1228 1229 /* 1230 * This is a private hold routine used when we already have the dls_link_t, thus 1231 * we know that it cannot go away. 1232 */ 1233 int 1234 dls_devnet_hold_tmp_by_link(dls_link_t *dlp, dls_dl_handle_t *ddhp) 1235 { 1236 int err; 1237 dls_devnet_t *ddp = NULL; 1238 1239 i_dls_devnet_hashmap_read(); 1240 if ((err = mod_hash_find(i_dls_devnet_hash, 1241 (mod_hash_key_t)dlp->dl_name, (mod_hash_val_t *)&ddp)) != 0) { 1242 ASSERT(err == MH_ERR_NOTFOUND); 1243 i_dls_devnet_hashmap_exit(); 1244 return (ENOENT); 1245 } 1246 1247 mutex_enter(&ddp->dd_mutex); 1248 VERIFY(ddp->dd_ref > 0); 1249 if (DD_NOT_VISIBLE(ddp->dd_flags)) { 1250 mutex_exit(&ddp->dd_mutex); 1251 i_dls_devnet_hashmap_exit(); 1252 return (ENOENT); 1253 } 1254 ddp->dd_tref++; 1255 mutex_exit(&ddp->dd_mutex); 1256 i_dls_devnet_hashmap_exit(); 1257 1258 *ddhp = ddp; 1259 return (0); 1260 } 1261 1262 static int 1263 dls_devnet_hold_common(datalink_id_t linkid, dls_devnet_t **ddpp, 1264 boolean_t tmp_hold) 1265 { 1266 dls_devnet_t *ddp; 1267 int err; 1268 1269 i_dls_devnet_hashmap_read(); 1270 if ((err = mod_hash_find(i_dls_devnet_id_hash, 1271 (mod_hash_key_t)(uintptr_t)linkid, (mod_hash_val_t *)&ddp)) != 0) { 1272 ASSERT(err == MH_ERR_NOTFOUND); 1273 i_dls_devnet_hashmap_exit(); 1274 return (ENOENT); 1275 } 1276 1277 mutex_enter(&ddp->dd_mutex); 1278 VERIFY(ddp->dd_ref > 0); 1279 if (DD_NOT_VISIBLE(ddp->dd_flags)) { 1280 mutex_exit(&ddp->dd_mutex); 1281 i_dls_devnet_hashmap_exit(); 1282 return (ENOENT); 1283 } 1284 if (tmp_hold) 1285 ddp->dd_tref++; 1286 else 1287 ddp->dd_ref++; 1288 mutex_exit(&ddp->dd_mutex); 1289 i_dls_devnet_hashmap_exit(); 1290 1291 *ddpp = ddp; 1292 return (0); 1293 } 1294 1295 int 1296 dls_devnet_hold(datalink_id_t linkid, dls_devnet_t **ddpp) 1297 { 1298 return (dls_devnet_hold_common(linkid, ddpp, B_FALSE)); 1299 } 1300 1301 /* 1302 * Hold the vanity naming structure (dls_devnet_t) temporarily. The request to 1303 * delete the dls_devnet_t will wait until the temporary reference is released. 1304 */ 1305 int 1306 dls_devnet_hold_tmp(datalink_id_t linkid, dls_devnet_t **ddpp) 1307 { 1308 return (dls_devnet_hold_common(linkid, ddpp, B_TRUE)); 1309 } 1310 1311 /* 1312 * This funtion is called when a DLS client tries to open a device node. 1313 * This dev_t could be a result of a /dev/net node access (returned by 1314 * devnet_create_rvp->dls_devnet_open()) or a direct /dev node access. 1315 * In both cases, this function bumps up the reference count of the 1316 * dls_devnet_t structure. The reference is held as long as the device node 1317 * is open. In the case of /dev/net while it is true that the initial reference 1318 * is held when the devnet_create_rvp->dls_devnet_open call happens, this 1319 * initial reference is released immediately in devnet_inactive_callback -> 1320 * dls_devnet_close(). (Note that devnet_inactive_callback() is called right 1321 * after dld_open completes, not when the /dev/net node is being closed). 1322 * To undo this function, call dls_devnet_rele() 1323 */ 1324 int 1325 dls_devnet_hold_by_dev(dev_t dev, dls_dl_handle_t *ddhp) 1326 { 1327 char name[MAXNAMELEN]; 1328 char *drv; 1329 dls_devnet_t *ddp; 1330 int err; 1331 1332 if ((drv = ddi_major_to_name(getmajor(dev))) == NULL) 1333 return (EINVAL); 1334 1335 (void) snprintf(name, sizeof (name), "%s%d", drv, 1336 DLS_MINOR2INST(getminor(dev))); 1337 1338 i_dls_devnet_hashmap_read(); 1339 if ((err = mod_hash_find(i_dls_devnet_hash, 1340 (mod_hash_key_t)name, (mod_hash_val_t *)&ddp)) != 0) { 1341 ASSERT(err == MH_ERR_NOTFOUND); 1342 i_dls_devnet_hashmap_exit(); 1343 return (ENOENT); 1344 } 1345 mutex_enter(&ddp->dd_mutex); 1346 VERIFY(ddp->dd_ref > 0); 1347 if (DD_NOT_VISIBLE(ddp->dd_flags)) { 1348 mutex_exit(&ddp->dd_mutex); 1349 i_dls_devnet_hashmap_exit(); 1350 return (ENOENT); 1351 } 1352 ddp->dd_ref++; 1353 mutex_exit(&ddp->dd_mutex); 1354 i_dls_devnet_hashmap_exit(); 1355 1356 *ddhp = ddp; 1357 return (0); 1358 } 1359 1360 void 1361 dls_devnet_rele(dls_devnet_t *ddp) 1362 { 1363 mutex_enter(&ddp->dd_mutex); 1364 VERIFY(ddp->dd_ref > 1); 1365 ddp->dd_ref--; 1366 if ((ddp->dd_flags & DD_IMPLICIT_IPTUN) && ddp->dd_ref == 1) { 1367 mutex_exit(&ddp->dd_mutex); 1368 if (i_dls_devnet_destroy_iptun(ddp->dd_linkid) != 0) 1369 ddp->dd_flags |= DD_IMPLICIT_IPTUN; 1370 return; 1371 } 1372 mutex_exit(&ddp->dd_mutex); 1373 } 1374 1375 static int 1376 dls_devnet_hold_by_name(const char *link, dls_devnet_t **ddpp) 1377 { 1378 char drv[MAXLINKNAMELEN]; 1379 uint_t ppa; 1380 major_t major; 1381 dev_t phy_dev, tmp_dev; 1382 datalink_id_t linkid; 1383 dls_dev_handle_t ddh; 1384 int err; 1385 1386 if ((err = dls_mgmt_get_linkid(link, &linkid)) == 0) 1387 return (dls_devnet_hold(linkid, ddpp)); 1388 1389 /* 1390 * If we failed to get the link's linkid because the dlmgmtd daemon 1391 * has not been started, return ENOENT so that the application can 1392 * fallback to open the /dev node. 1393 */ 1394 if (err == EBADF) 1395 return (ENOENT); 1396 1397 if (err != ENOENT) 1398 return (err); 1399 1400 /* 1401 * If we reach this point it means dlmgmtd is up but has no 1402 * mapping for the link name. 1403 */ 1404 if (ddi_parse_dlen(link, drv, MAXLINKNAMELEN, &ppa) != DDI_SUCCESS) 1405 return (ENOENT); 1406 1407 if (IS_IPTUN_LINK(drv)) { 1408 if ((err = i_dls_devnet_create_iptun(link, drv, &linkid)) != 0) 1409 return (err); 1410 /* 1411 * At this point, an IP tunnel MAC has registered, which 1412 * resulted in a link being created. 1413 */ 1414 err = dls_devnet_hold(linkid, ddpp); 1415 if (err != 0) { 1416 VERIFY(i_dls_devnet_destroy_iptun(linkid) == 0); 1417 return (err); 1418 } 1419 /* 1420 * dls_devnet_rele() will know to destroy the implicit IP 1421 * tunnel on last reference release if DD_IMPLICIT_IPTUN is 1422 * set. 1423 */ 1424 (*ddpp)->dd_flags |= DD_IMPLICIT_IPTUN; 1425 return (0); 1426 } 1427 1428 /* 1429 * If this link: 1430 * (a) is a physical device, (b) this is the first boot, (c) the MAC 1431 * is not registered yet, and (d) we cannot find its linkid, then the 1432 * linkname is the same as the devname. 1433 * 1434 * First filter out invalid names. 1435 */ 1436 if ((major = ddi_name_to_major(drv)) == (major_t)-1) 1437 return (ENOENT); 1438 1439 phy_dev = makedevice(major, DLS_PPA2MINOR(ppa)); 1440 if (softmac_hold_device(phy_dev, &ddh) != 0) 1441 return (ENOENT); 1442 1443 /* 1444 * At this time, the MAC should be registered, check its phy_dev using 1445 * the given name. 1446 */ 1447 if ((err = dls_mgmt_get_linkid(link, &linkid)) != 0 || 1448 (err = dls_mgmt_get_phydev(linkid, &tmp_dev)) != 0) { 1449 softmac_rele_device(ddh); 1450 return (err); 1451 } 1452 if (tmp_dev != phy_dev) { 1453 softmac_rele_device(ddh); 1454 return (ENOENT); 1455 } 1456 1457 err = dls_devnet_hold(linkid, ddpp); 1458 softmac_rele_device(ddh); 1459 return (err); 1460 } 1461 1462 int 1463 dls_devnet_macname2linkid(const char *macname, datalink_id_t *linkidp) 1464 { 1465 dls_devnet_t *ddp; 1466 1467 i_dls_devnet_hashmap_read(); 1468 if (mod_hash_find(i_dls_devnet_hash, (mod_hash_key_t)macname, 1469 (mod_hash_val_t *)&ddp) != 0) { 1470 i_dls_devnet_hashmap_exit(); 1471 return (ENOENT); 1472 } 1473 1474 *linkidp = ddp->dd_linkid; 1475 i_dls_devnet_hashmap_exit(); 1476 return (0); 1477 } 1478 1479 /* 1480 * Get linkid for the given dev. 1481 */ 1482 int 1483 dls_devnet_dev2linkid(dev_t dev, datalink_id_t *linkidp) 1484 { 1485 char macname[MAXNAMELEN]; 1486 char *drv; 1487 1488 if ((drv = ddi_major_to_name(getmajor(dev))) == NULL) 1489 return (EINVAL); 1490 1491 (void) snprintf(macname, sizeof (macname), "%s%d", drv, 1492 DLS_MINOR2INST(getminor(dev))); 1493 return (dls_devnet_macname2linkid(macname, linkidp)); 1494 } 1495 1496 /* 1497 * Get the link's physical dev_t. It this is a VLAN, get the dev_t of the 1498 * link this VLAN is created on. 1499 */ 1500 int 1501 dls_devnet_phydev(datalink_id_t vlanid, dev_t *devp) 1502 { 1503 dls_devnet_t *ddp; 1504 int err; 1505 1506 if ((err = dls_devnet_hold_tmp(vlanid, &ddp)) != 0) 1507 return (err); 1508 1509 err = dls_mgmt_get_phydev(ddp->dd_linkid, devp); 1510 dls_devnet_rele_tmp(ddp); 1511 return (err); 1512 } 1513 1514 /* 1515 * Handle the renaming requests. There are two rename cases: 1516 * 1517 * 1. Request to rename a valid link (id1) to an non-existent link name 1518 * (id2). In this case id2 is DATALINK_INVALID_LINKID. Just check whether 1519 * id1 is held by any applications. 1520 * 1521 * In this case, the link's kstats need to be updated using the given name. 1522 * 1523 * 2. Request to rename a valid link (id1) to the name of a REMOVED 1524 * physical link (id2). In this case, check that id1 and its associated 1525 * mac is not held by any application, and update the link's linkid to id2. 1526 * 1527 * This case does not change the <link name, linkid> mapping, so the link's 1528 * kstats need to be updated with using name associated the given id2. 1529 */ 1530 int 1531 dls_devnet_rename(datalink_id_t id1, datalink_id_t id2, const char *link) 1532 { 1533 dls_dev_handle_t ddh = NULL; 1534 int err = 0; 1535 dev_t phydev = 0; 1536 dls_devnet_t *ddp; 1537 mac_perim_handle_t mph = NULL; 1538 mac_handle_t mh; 1539 mod_hash_val_t val; 1540 1541 /* 1542 * In the second case, id2 must be a REMOVED physical link. 1543 */ 1544 if ((id2 != DATALINK_INVALID_LINKID) && 1545 (dls_mgmt_get_phydev(id2, &phydev) == 0) && 1546 softmac_hold_device(phydev, &ddh) == 0) { 1547 softmac_rele_device(ddh); 1548 return (EEXIST); 1549 } 1550 1551 /* 1552 * Hold id1 to prevent it from being detached (if a physical link). 1553 */ 1554 if (dls_mgmt_get_phydev(id1, &phydev) == 0) 1555 (void) softmac_hold_device(phydev, &ddh); 1556 1557 /* 1558 * The framework does not hold hold locks across calls to the 1559 * mac perimeter, hence enter the perimeter first. This also waits 1560 * for the property loading to finish. 1561 */ 1562 if ((err = mac_perim_enter_by_linkid(id1, &mph)) != 0) { 1563 softmac_rele_device(ddh); 1564 return (err); 1565 } 1566 1567 i_dls_devnet_lock_enter(); 1568 i_dls_devnet_hashmap_read(); 1569 1570 if ((err = mod_hash_find(i_dls_devnet_id_hash, 1571 (mod_hash_key_t)(uintptr_t)id1, (mod_hash_val_t *)&ddp)) != 0) { 1572 ASSERT(err == MH_ERR_NOTFOUND); 1573 err = ENOENT; 1574 goto done; 1575 } 1576 1577 mutex_enter(&ddp->dd_mutex); 1578 if (ddp->dd_ref > 1) { 1579 mutex_exit(&ddp->dd_mutex); 1580 err = EBUSY; 1581 goto done; 1582 } 1583 mutex_exit(&ddp->dd_mutex); 1584 1585 if (id2 == DATALINK_INVALID_LINKID) { 1586 (void) strlcpy(ddp->dd_linkname, link, 1587 sizeof (ddp->dd_linkname)); 1588 1589 /* rename mac client name and its flow if exists */ 1590 if ((err = mac_open(ddp->dd_mac, &mh)) != 0) 1591 goto done; 1592 (void) mac_rename_primary(mh, link); 1593 mac_close(mh); 1594 goto done; 1595 } 1596 1597 /* 1598 * The second case, check whether the MAC is used by any MAC 1599 * user. This must be a physical link so ddh must not be NULL. 1600 */ 1601 if (ddh == NULL) { 1602 err = EINVAL; 1603 goto done; 1604 } 1605 1606 if ((err = mac_open(ddp->dd_mac, &mh)) != 0) 1607 goto done; 1608 1609 /* 1610 * We release the reference of the MAC which mac_open() is 1611 * holding. Note that this mac will not be unregistered 1612 * because the physical device is held. 1613 */ 1614 mac_close(mh); 1615 1616 /* 1617 * Check if there is any other MAC clients, if not, hold this mac 1618 * exclusively until we are done. 1619 */ 1620 if ((err = mac_mark_exclusive(mh)) != 0) 1621 goto done; 1622 1623 /* 1624 * Update the link's linkid. 1625 */ 1626 if ((err = mod_hash_find(i_dls_devnet_id_hash, 1627 (mod_hash_key_t)(uintptr_t)id2, &val)) != MH_ERR_NOTFOUND) { 1628 mac_unmark_exclusive(mh); 1629 err = EEXIST; 1630 goto done; 1631 } 1632 1633 /* 1634 * Temporarily drop the hashmap lock for the upcall -- ddp will remain 1635 * valid because we hold i_dls_devnet_lock. Taking this is a 1636 * prerequisite for dls_devnet_unset to proceed, and it is the only 1637 * pathway through which ddp can be freed. 1638 */ 1639 i_dls_devnet_hashmap_exit(); 1640 i_dls_devnet_lock_upcall_start(); 1641 err = dls_mgmt_get_linkinfo(id2, ddp->dd_linkname, NULL, NULL, NULL); 1642 i_dls_devnet_lock_upcall_end(); 1643 1644 if (err != 0) { 1645 mac_unmark_exclusive(mh); 1646 goto done_rw_unlocked; 1647 } 1648 1649 i_dls_devnet_hashmap_write(); 1650 1651 (void) mod_hash_remove(i_dls_devnet_id_hash, 1652 (mod_hash_key_t)(uintptr_t)id1, &val); 1653 1654 ddp->dd_linkid = id2; 1655 (void) mod_hash_insert(i_dls_devnet_id_hash, 1656 (mod_hash_key_t)(uintptr_t)ddp->dd_linkid, (mod_hash_val_t)ddp); 1657 1658 mac_unmark_exclusive(mh); 1659 1660 /* load properties for new id */ 1661 mutex_enter(&ddp->dd_mutex); 1662 ddp->dd_prop_loaded = B_FALSE; 1663 ddp->dd_prop_taskid = taskq_dispatch(system_taskq, 1664 dls_devnet_prop_task, ddp, TQ_SLEEP); 1665 mutex_exit(&ddp->dd_mutex); 1666 1667 done: 1668 i_dls_devnet_hashmap_exit(); 1669 done_rw_unlocked: 1670 i_dls_devnet_lock_exit(); 1671 1672 if (err == 0) 1673 dls_devnet_stat_rename(ddp); 1674 1675 if (mph != NULL) 1676 mac_perim_exit(mph); 1677 softmac_rele_device(ddh); 1678 return (err); 1679 } 1680 1681 static int 1682 i_dls_devnet_setzid(dls_devnet_t *ddp, zoneid_t new_zoneid, boolean_t setprop, 1683 boolean_t transient) 1684 { 1685 int err; 1686 mac_perim_handle_t mph; 1687 boolean_t upcall_done = B_FALSE; 1688 datalink_id_t linkid = ddp->dd_linkid; 1689 zoneid_t old_zoneid = ddp->dd_zid; 1690 dlmgmt_door_setzoneid_t setzid; 1691 dlmgmt_setzoneid_retval_t retval; 1692 1693 if (old_zoneid == new_zoneid) 1694 return (0); 1695 1696 if ((err = mac_perim_enter_by_macname(ddp->dd_mac, &mph)) != 0) 1697 return (err); 1698 1699 /* 1700 * When changing the zoneid of an existing link, we need to tell 1701 * dlmgmtd about it. dlmgmtd already knows the zoneid associated with 1702 * newly created links. 1703 */ 1704 if (setprop) { 1705 setzid.ld_cmd = DLMGMT_CMD_SETZONEID; 1706 setzid.ld_linkid = linkid; 1707 setzid.ld_zoneid = new_zoneid; 1708 err = i_dls_mgmt_upcall(&setzid, sizeof (setzid), &retval, 1709 sizeof (retval)); 1710 if (err != 0) 1711 goto done; 1712 1713 /* 1714 * We set upcall_done only if the upcall is 1715 * successful. This way, if dls_link_setzid() fails, 1716 * we know another upcall must be done to reset the 1717 * dlmgmtd state. 1718 */ 1719 upcall_done = B_TRUE; 1720 } 1721 if ((err = dls_link_setzid(ddp->dd_mac, new_zoneid)) == 0) { 1722 ddp->dd_zid = new_zoneid; 1723 ddp->dd_transient = transient; 1724 devnet_need_rebuild = B_TRUE; 1725 } 1726 1727 done: 1728 if (err != 0 && upcall_done) { 1729 setzid.ld_zoneid = old_zoneid; 1730 (void) i_dls_mgmt_upcall(&setzid, sizeof (setzid), &retval, 1731 sizeof (retval)); 1732 } 1733 mac_perim_exit(mph); 1734 return (err); 1735 } 1736 1737 int 1738 dls_devnet_setzid(dls_dl_handle_t ddh, zoneid_t new_zid) 1739 { 1740 dls_devnet_t *ddp; 1741 int err; 1742 zoneid_t old_zid; 1743 boolean_t refheld = B_FALSE; 1744 1745 old_zid = ddh->dd_zid; 1746 1747 if (old_zid == new_zid) 1748 return (0); 1749 1750 /* 1751 * Acquire an additional reference to the link if it is being assigned 1752 * to a non-global zone from the global zone. 1753 */ 1754 if (old_zid == GLOBAL_ZONEID && new_zid != GLOBAL_ZONEID) { 1755 if ((err = dls_devnet_hold(ddh->dd_linkid, &ddp)) != 0) 1756 return (err); 1757 refheld = B_TRUE; 1758 } 1759 1760 if ((err = i_dls_devnet_setzid(ddh, new_zid, B_TRUE, B_FALSE)) != 0) { 1761 if (refheld) 1762 dls_devnet_rele(ddp); 1763 return (err); 1764 } 1765 1766 /* 1767 * Release the additional reference if the link is returning to the 1768 * global zone from a non-global zone. 1769 */ 1770 if (old_zid != GLOBAL_ZONEID && new_zid == GLOBAL_ZONEID) 1771 dls_devnet_rele(ddh); 1772 1773 /* Re-create kstats in the appropriate zones. */ 1774 if (old_zid != GLOBAL_ZONEID) 1775 dls_devnet_stat_destroy(ddh, old_zid); 1776 if (new_zid != GLOBAL_ZONEID) 1777 dls_devnet_stat_create(ddh, new_zid); 1778 1779 return (0); 1780 } 1781 1782 zoneid_t 1783 dls_devnet_getzid(dls_dl_handle_t ddh) 1784 { 1785 return (((dls_devnet_t *)ddh)->dd_zid); 1786 } 1787 1788 zoneid_t 1789 dls_devnet_getownerzid(dls_dl_handle_t ddh) 1790 { 1791 return (((dls_devnet_t *)ddh)->dd_owner_zid); 1792 } 1793 1794 /* 1795 * Is linkid visible from zoneid? A link is visible if it was created in the 1796 * zone, or if it is currently assigned to the zone. 1797 */ 1798 boolean_t 1799 dls_devnet_islinkvisible(datalink_id_t linkid, zoneid_t zoneid) 1800 { 1801 dls_devnet_t *ddp; 1802 boolean_t result; 1803 1804 if (dls_devnet_hold_tmp(linkid, &ddp) != 0) 1805 return (B_FALSE); 1806 result = (ddp->dd_owner_zid == zoneid || ddp->dd_zid == zoneid); 1807 dls_devnet_rele_tmp(ddp); 1808 return (result); 1809 } 1810 1811 /* 1812 * Access a vanity naming node. 1813 */ 1814 int 1815 dls_devnet_open(const char *link, dls_dl_handle_t *dhp, dev_t *devp) 1816 { 1817 dls_devnet_t *ddp; 1818 dls_link_t *dlp; 1819 zoneid_t zid = getzoneid(); 1820 int err; 1821 mac_perim_handle_t mph; 1822 1823 if ((err = dls_devnet_hold_by_name(link, &ddp)) != 0) 1824 return (err); 1825 1826 dls_devnet_prop_task_wait(ddp); 1827 1828 /* 1829 * Opening a link that does not belong to the current non-global zone 1830 * is not allowed. 1831 */ 1832 if (zid != GLOBAL_ZONEID && ddp->dd_zid != zid) { 1833 dls_devnet_rele(ddp); 1834 return (ENOENT); 1835 } 1836 1837 err = mac_perim_enter_by_macname(ddp->dd_mac, &mph); 1838 if (err != 0) { 1839 dls_devnet_rele(ddp); 1840 return (err); 1841 } 1842 1843 err = dls_link_hold_create(ddp->dd_mac, &dlp); 1844 mac_perim_exit(mph); 1845 1846 if (err != 0) { 1847 dls_devnet_rele(ddp); 1848 return (err); 1849 } 1850 1851 *dhp = ddp; 1852 *devp = dls_link_dev(dlp); 1853 return (0); 1854 } 1855 1856 /* 1857 * Close access to a vanity naming node. 1858 */ 1859 void 1860 dls_devnet_close(dls_dl_handle_t dlh) 1861 { 1862 dls_devnet_t *ddp = dlh; 1863 dls_link_t *dlp; 1864 mac_perim_handle_t mph; 1865 1866 VERIFY(mac_perim_enter_by_macname(ddp->dd_mac, &mph) == 0); 1867 VERIFY(dls_link_hold(ddp->dd_mac, &dlp) == 0); 1868 1869 /* 1870 * One rele for the hold placed in dls_devnet_open, another for 1871 * the hold done just above 1872 */ 1873 dls_link_rele(dlp); 1874 dls_link_rele(dlp); 1875 mac_perim_exit(mph); 1876 1877 dls_devnet_rele(ddp); 1878 } 1879 1880 /* 1881 * This is used by /dev/net to rebuild the nodes for readdir(). It is not 1882 * critical and no protection is needed. 1883 */ 1884 boolean_t 1885 dls_devnet_rebuild() 1886 { 1887 boolean_t updated = devnet_need_rebuild; 1888 1889 devnet_need_rebuild = B_FALSE; 1890 return (updated); 1891 } 1892 1893 int 1894 dls_devnet_create(mac_handle_t mh, datalink_id_t linkid, zoneid_t zoneid) 1895 { 1896 dls_link_t *dlp; 1897 dls_devnet_t *ddp; 1898 int err; 1899 mac_perim_handle_t mph; 1900 1901 /* 1902 * Holding the mac perimeter ensures that the downcall from the 1903 * dlmgmt daemon which does the property loading does not proceed 1904 * until we relinquish the perimeter. 1905 */ 1906 mac_perim_enter_by_mh(mh, &mph); 1907 /* 1908 * Make this association before we call dls_link_hold_create as 1909 * we need to use the linkid to get the user name for the link 1910 * when we create the MAC client. 1911 */ 1912 if ((err = dls_devnet_set(mh, linkid, zoneid, &ddp)) == 0) { 1913 if ((err = dls_link_hold_create(mac_name(mh), &dlp)) != 0) { 1914 mac_perim_exit(mph); 1915 (void) dls_devnet_unset(mh, &linkid, B_FALSE); 1916 return (err); 1917 } 1918 1919 /* 1920 * If dd_linkid is set then the link was successfully 1921 * initialized. In this case we can remove the 1922 * initializing flag and make the link visible to the 1923 * rest of the system. 1924 * 1925 * If not set then we were called by softmac and it 1926 * was unable to obtain a linkid for the physical link 1927 * because dlmgmtd is down. In that case softmac will 1928 * eventually obtain a linkid and call 1929 * dls_devnet_recreate() to complete initialization. 1930 */ 1931 mutex_enter(&ddp->dd_mutex); 1932 if (ddp->dd_linkid != DATALINK_INVALID_LINKID) 1933 ddp->dd_flags &= ~DD_INITIALIZING; 1934 mutex_exit(&ddp->dd_mutex); 1935 1936 } 1937 1938 mac_perim_exit(mph); 1939 return (err); 1940 } 1941 1942 /* 1943 * Set the linkid of the dls_devnet_t and add it into the i_dls_devnet_id_hash. 1944 * This is called in the case that the dlmgmtd daemon is started later than 1945 * the physical devices get attached, and the linkid is only known after the 1946 * daemon starts. 1947 */ 1948 int 1949 dls_devnet_recreate(mac_handle_t mh, datalink_id_t linkid) 1950 { 1951 dls_devnet_t *ddp; 1952 int err; 1953 1954 VERIFY(linkid != DATALINK_INVALID_LINKID); 1955 if ((err = dls_devnet_set(mh, linkid, GLOBAL_ZONEID, &ddp)) == 0) { 1956 mutex_enter(&ddp->dd_mutex); 1957 if (ddp->dd_linkid != DATALINK_INVALID_LINKID) 1958 ddp->dd_flags &= ~DD_INITIALIZING; 1959 mutex_exit(&ddp->dd_mutex); 1960 } 1961 1962 return (err); 1963 1964 } 1965 1966 int 1967 dls_devnet_destroy(mac_handle_t mh, datalink_id_t *idp, boolean_t wait) 1968 { 1969 int err; 1970 mac_perim_handle_t mph; 1971 1972 *idp = DATALINK_INVALID_LINKID; 1973 err = dls_devnet_unset(mh, idp, wait); 1974 1975 /* 1976 * We continue on in the face of ENOENT because the devnet 1977 * unset and DLS link release are not atomic and we may have a 1978 * scenario where there is no entry in i_dls_devnet_hash for 1979 * the MAC name but there is an entry in i_dls_link_hash. For 1980 * example, if the following occurred: 1981 * 1982 * 1. dls_devnet_unset() returns success, and 1983 * 1984 * 2. dls_link_rele_by_name() fails with ENOTEMPTY because 1985 * flows still exist, and 1986 * 1987 * 3. dls_devnet_set() fails to set the zone id and calls 1988 * dls_devnet_unset() -- leaving an entry in 1989 * i_dls_link_hash but no corresponding entry in 1990 * i_dls_devnet_hash. 1991 * 1992 * Even if #3 wasn't true the dls_devnet_set() may fail for 1993 * different reasons in the future; the point is that it _can_ 1994 * fail as part of its contract. We can't rely on it working 1995 * so we must assume that these two pieces of state (devnet 1996 * and link hashes), which should always be in sync, can get 1997 * out of sync and thus even if we get ENOENT from the devnet 1998 * hash we should still try to delete from the link hash just 1999 * in case. 2000 * 2001 * We could prevent the ENOTEMPTY from dls_link_rele_by_name() 2002 * by calling mac_disable() before calling 2003 * dls_devnet_destroy() but that's not currently possible due 2004 * to a long-standing bug. OpenSolaris 6791335: The semantics 2005 * of mac_disable() were modified by Crossbow such that 2006 * dls_devnet_destroy() needs to be called before 2007 * mac_disable() can succeed. This is because of the implicit 2008 * reference that dls has on the mac_impl_t. 2009 */ 2010 if (err != 0 && err != ENOENT) 2011 return (err); 2012 2013 mac_perim_enter_by_mh(mh, &mph); 2014 err = dls_link_rele_by_name(mac_name(mh)); 2015 mac_perim_exit(mph); 2016 2017 if (err != 0) { 2018 dls_devnet_t *ddp; 2019 2020 /* 2021 * XXX It is a general GLDv3 bug that dls_devnet_set() has to 2022 * be called to re-set the link when destroy fails. The 2023 * zoneid below will be incorrect if this function is ever 2024 * called from kernel context or from a zone other than that 2025 * which initially created the link. 2026 */ 2027 (void) dls_devnet_set(mh, *idp, crgetzoneid(CRED()), &ddp); 2028 2029 /* 2030 * You might think dd_linkid should always be set 2031 * here, but in the case where dls_devnet_unset() 2032 * returns ENOENT it will be DATALINK_INVALID_LINKID. 2033 * Stay consistent with the rest of DLS and only 2034 * remove the initializing flag if linkid is set. 2035 */ 2036 mutex_enter(&ddp->dd_mutex); 2037 if (ddp->dd_linkid != DATALINK_INVALID_LINKID) 2038 ddp->dd_flags &= ~DD_INITIALIZING; 2039 mutex_exit(&ddp->dd_mutex); 2040 } 2041 return (err); 2042 } 2043 2044 /* 2045 * Implicitly create an IP tunnel link. 2046 */ 2047 static int 2048 i_dls_devnet_create_iptun(const char *linkname, const char *drvname, 2049 datalink_id_t *linkid) 2050 { 2051 int err; 2052 iptun_kparams_t ik; 2053 uint32_t media; 2054 netstack_t *ns; 2055 major_t iptun_major; 2056 dev_info_t *iptun_dip; 2057 2058 /* First ensure that the iptun device is attached. */ 2059 if ((iptun_major = ddi_name_to_major(IPTUN_DRIVER_NAME)) == (major_t)-1) 2060 return (EINVAL); 2061 if ((iptun_dip = ddi_hold_devi_by_instance(iptun_major, 0, 0)) == NULL) 2062 return (EINVAL); 2063 2064 if (IS_IPV4_TUN(drvname)) { 2065 ik.iptun_kparam_type = IPTUN_TYPE_IPV4; 2066 media = DL_IPV4; 2067 } else if (IS_6TO4_TUN(drvname)) { 2068 ik.iptun_kparam_type = IPTUN_TYPE_6TO4; 2069 media = DL_6TO4; 2070 } else if (IS_IPV6_TUN(drvname)) { 2071 ik.iptun_kparam_type = IPTUN_TYPE_IPV6; 2072 media = DL_IPV6; 2073 } 2074 ik.iptun_kparam_flags = (IPTUN_KPARAM_TYPE | IPTUN_KPARAM_IMPLICIT); 2075 2076 /* Obtain a datalink id for this tunnel. */ 2077 err = dls_mgmt_create((char *)linkname, 0, DATALINK_CLASS_IPTUN, media, 2078 B_FALSE, &ik.iptun_kparam_linkid); 2079 if (err != 0) { 2080 ddi_release_devi(iptun_dip); 2081 return (err); 2082 } 2083 2084 ns = netstack_get_current(); 2085 err = iptun_create(&ik, CRED()); 2086 netstack_rele(ns); 2087 2088 if (err != 0) 2089 VERIFY(dls_mgmt_destroy(ik.iptun_kparam_linkid, B_FALSE) == 0); 2090 else 2091 *linkid = ik.iptun_kparam_linkid; 2092 2093 ddi_release_devi(iptun_dip); 2094 return (err); 2095 } 2096 2097 static int 2098 i_dls_devnet_destroy_iptun(datalink_id_t linkid) 2099 { 2100 int err; 2101 2102 /* 2103 * Note the use of zone_kcred() here as opposed to CRED(). This is 2104 * because the process that does the last close of this /dev/net node 2105 * may not have necessary privileges to delete this IP tunnel, but the 2106 * tunnel must always be implicitly deleted on last close. 2107 */ 2108 if ((err = iptun_delete(linkid, zone_kcred())) == 0) 2109 (void) dls_mgmt_destroy(linkid, B_FALSE); 2110 return (err); 2111 } 2112 2113 const char * 2114 dls_devnet_link(dls_dl_handle_t ddh) 2115 { 2116 return (ddh->dd_linkname); 2117 } 2118 2119 const char * 2120 dls_devnet_mac(dls_dl_handle_t ddh) 2121 { 2122 return (ddh->dd_mac); 2123 } 2124 2125 datalink_id_t 2126 dls_devnet_linkid(dls_dl_handle_t ddh) 2127 { 2128 return (ddh->dd_linkid); 2129 } 2130