1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2015 Joyent, Inc. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/cred.h> 28 #include <sys/sysmacros.h> 29 #include <sys/conf.h> 30 #include <sys/cmn_err.h> 31 #include <sys/list.h> 32 #include <sys/ksynch.h> 33 #include <sys/kmem.h> 34 #include <sys/stream.h> 35 #include <sys/modctl.h> 36 #include <sys/ddi.h> 37 #include <sys/sunddi.h> 38 #include <sys/atomic.h> 39 #include <sys/stat.h> 40 #include <sys/modhash.h> 41 #include <sys/strsubr.h> 42 #include <sys/strsun.h> 43 #include <sys/dlpi.h> 44 #include <sys/mac.h> 45 #include <sys/mac_provider.h> 46 #include <sys/mac_client.h> 47 #include <sys/mac_client_priv.h> 48 #include <sys/mac_ether.h> 49 #include <sys/dls.h> 50 #include <sys/pattr.h> 51 #include <sys/time.h> 52 #include <sys/vlan.h> 53 #include <sys/vnic.h> 54 #include <sys/vnic_impl.h> 55 #include <sys/mac_impl.h> 56 #include <sys/mac_flow_impl.h> 57 #include <inet/ip_impl.h> 58 59 /* 60 * Note that for best performance, the VNIC is a passthrough design. 61 * For each VNIC corresponds a MAC client of the underlying MAC (lower MAC). 62 * This MAC client is opened by the VNIC driver at VNIC creation, 63 * and closed when the VNIC is deleted. 64 * When a MAC client of the VNIC itself opens a VNIC, the MAC layer 65 * (upper MAC) detects that the MAC being opened is a VNIC. Instead 66 * of allocating a new MAC client, it asks the VNIC driver to return 67 * the lower MAC client handle associated with the VNIC, and that handle 68 * is returned to the upper MAC client directly. This allows access 69 * by upper MAC clients of the VNIC to have direct access to the lower 70 * MAC client for the control path and data path. 71 * 72 * Due to this passthrough, some of the entry points exported by the 73 * VNIC driver are never directly invoked. These entry points include 74 * vnic_m_start, vnic_m_stop, vnic_m_promisc, vnic_m_multicst, etc. 75 * 76 * VNICs support multiple upper mac clients to enable support for 77 * multiple MAC addresses on the VNIC. When the VNIC is created the 78 * initial mac client is the primary upper mac. Any additional mac 79 * clients are secondary macs. 80 */ 81 82 static int vnic_m_start(void *); 83 static void vnic_m_stop(void *); 84 static int vnic_m_promisc(void *, boolean_t); 85 static int vnic_m_multicst(void *, boolean_t, const uint8_t *); 86 static int vnic_m_unicst(void *, const uint8_t *); 87 static int vnic_m_stat(void *, uint_t, uint64_t *); 88 static void vnic_m_ioctl(void *, queue_t *, mblk_t *); 89 static int vnic_m_setprop(void *, const char *, mac_prop_id_t, uint_t, 90 const void *); 91 static int vnic_m_getprop(void *, const char *, mac_prop_id_t, uint_t, void *); 92 static void vnic_m_propinfo(void *, const char *, mac_prop_id_t, 93 mac_prop_info_handle_t); 94 static mblk_t *vnic_m_tx(void *, mblk_t *); 95 static boolean_t vnic_m_capab_get(void *, mac_capab_t, void *); 96 static void vnic_notify_cb(void *, mac_notify_type_t); 97 static void vnic_cleanup_secondary_macs(vnic_t *, int); 98 99 static kmem_cache_t *vnic_cache; 100 static krwlock_t vnic_lock; 101 static uint_t vnic_count; 102 103 #define ANCHOR_VNIC_MIN_MTU 576 104 #define ANCHOR_VNIC_MAX_MTU 9000 105 106 /* hash of VNICs (vnic_t's), keyed by VNIC id */ 107 static mod_hash_t *vnic_hash; 108 #define VNIC_HASHSZ 64 109 #define VNIC_HASH_KEY(vnic_id) ((mod_hash_key_t)(uintptr_t)vnic_id) 110 111 #define VNIC_M_CALLBACK_FLAGS \ 112 (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO) 113 114 static mac_callbacks_t vnic_m_callbacks = { 115 VNIC_M_CALLBACK_FLAGS, 116 vnic_m_stat, 117 vnic_m_start, 118 vnic_m_stop, 119 vnic_m_promisc, 120 vnic_m_multicst, 121 vnic_m_unicst, 122 vnic_m_tx, 123 NULL, 124 vnic_m_ioctl, 125 vnic_m_capab_get, 126 NULL, 127 NULL, 128 vnic_m_setprop, 129 vnic_m_getprop, 130 vnic_m_propinfo 131 }; 132 133 void 134 vnic_dev_init(void) 135 { 136 vnic_cache = kmem_cache_create("vnic_cache", 137 sizeof (vnic_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 138 139 vnic_hash = mod_hash_create_idhash("vnic_hash", 140 VNIC_HASHSZ, mod_hash_null_valdtor); 141 142 rw_init(&vnic_lock, NULL, RW_DEFAULT, NULL); 143 144 vnic_count = 0; 145 } 146 147 void 148 vnic_dev_fini(void) 149 { 150 ASSERT(vnic_count == 0); 151 152 rw_destroy(&vnic_lock); 153 mod_hash_destroy_idhash(vnic_hash); 154 kmem_cache_destroy(vnic_cache); 155 } 156 157 uint_t 158 vnic_dev_count(void) 159 { 160 return (vnic_count); 161 } 162 163 static vnic_ioc_diag_t 164 vnic_mac2vnic_diag(mac_diag_t diag) 165 { 166 switch (diag) { 167 case MAC_DIAG_MACADDR_NIC: 168 return (VNIC_IOC_DIAG_MACADDR_NIC); 169 case MAC_DIAG_MACADDR_INUSE: 170 return (VNIC_IOC_DIAG_MACADDR_INUSE); 171 case MAC_DIAG_MACADDR_INVALID: 172 return (VNIC_IOC_DIAG_MACADDR_INVALID); 173 case MAC_DIAG_MACADDRLEN_INVALID: 174 return (VNIC_IOC_DIAG_MACADDRLEN_INVALID); 175 case MAC_DIAG_MACFACTORYSLOTINVALID: 176 return (VNIC_IOC_DIAG_MACFACTORYSLOTINVALID); 177 case MAC_DIAG_MACFACTORYSLOTUSED: 178 return (VNIC_IOC_DIAG_MACFACTORYSLOTUSED); 179 case MAC_DIAG_MACFACTORYSLOTALLUSED: 180 return (VNIC_IOC_DIAG_MACFACTORYSLOTALLUSED); 181 case MAC_DIAG_MACFACTORYNOTSUP: 182 return (VNIC_IOC_DIAG_MACFACTORYNOTSUP); 183 case MAC_DIAG_MACPREFIX_INVALID: 184 return (VNIC_IOC_DIAG_MACPREFIX_INVALID); 185 case MAC_DIAG_MACPREFIXLEN_INVALID: 186 return (VNIC_IOC_DIAG_MACPREFIXLEN_INVALID); 187 case MAC_DIAG_MACNO_HWRINGS: 188 return (VNIC_IOC_DIAG_NO_HWRINGS); 189 default: 190 return (VNIC_IOC_DIAG_NONE); 191 } 192 } 193 194 static int 195 vnic_unicast_add(vnic_t *vnic, vnic_mac_addr_type_t vnic_addr_type, 196 int *addr_slot, uint_t prefix_len, int *addr_len_ptr_arg, 197 uint8_t *mac_addr_arg, uint16_t flags, vnic_ioc_diag_t *diag, 198 uint16_t vid, boolean_t req_hwgrp_flag) 199 { 200 mac_diag_t mac_diag; 201 uint16_t mac_flags = 0; 202 int err; 203 uint_t addr_len; 204 205 if (flags & VNIC_IOC_CREATE_NODUPCHECK) 206 mac_flags |= MAC_UNICAST_NODUPCHECK; 207 208 switch (vnic_addr_type) { 209 case VNIC_MAC_ADDR_TYPE_FIXED: 210 case VNIC_MAC_ADDR_TYPE_VRID: 211 /* 212 * The MAC address value to assign to the VNIC 213 * is already provided in mac_addr_arg. addr_len_ptr_arg 214 * already contains the MAC address length. 215 */ 216 break; 217 218 case VNIC_MAC_ADDR_TYPE_RANDOM: 219 /* 220 * Random MAC address. There are two sub-cases: 221 * 222 * 1 - If mac_len == 0, a new MAC address is generated. 223 * The length of the MAC address to generated depends 224 * on the type of MAC used. The prefix to use for the MAC 225 * address is stored in the most significant bytes 226 * of the mac_addr argument, and its length is specified 227 * by the mac_prefix_len argument. This prefix can 228 * correspond to a IEEE OUI in the case of Ethernet, 229 * for example. 230 * 231 * 2 - If mac_len > 0, the address was already picked 232 * randomly, and is now passed back during VNIC 233 * re-creation. The mac_addr argument contains the MAC 234 * address that was generated. We distinguish this 235 * case from the fixed MAC address case, since we 236 * want the user consumers to know, when they query 237 * the list of VNICs, that a VNIC was assigned a 238 * random MAC address vs assigned a fixed address 239 * specified by the user. 240 */ 241 242 /* 243 * If it's a pre-generated address, we're done. mac_addr_arg 244 * and addr_len_ptr_arg already contain the MAC address 245 * value and length. 246 */ 247 if (*addr_len_ptr_arg > 0) 248 break; 249 250 /* generate a new random MAC address */ 251 if ((err = mac_addr_random(vnic->vn_mch, 252 prefix_len, mac_addr_arg, &mac_diag)) != 0) { 253 *diag = vnic_mac2vnic_diag(mac_diag); 254 return (err); 255 } 256 *addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh); 257 break; 258 259 case VNIC_MAC_ADDR_TYPE_FACTORY: 260 err = mac_addr_factory_reserve(vnic->vn_mch, addr_slot); 261 if (err != 0) { 262 if (err == EINVAL) 263 *diag = VNIC_IOC_DIAG_MACFACTORYSLOTINVALID; 264 if (err == EBUSY) 265 *diag = VNIC_IOC_DIAG_MACFACTORYSLOTUSED; 266 if (err == ENOSPC) 267 *diag = VNIC_IOC_DIAG_MACFACTORYSLOTALLUSED; 268 return (err); 269 } 270 271 mac_addr_factory_value(vnic->vn_lower_mh, *addr_slot, 272 mac_addr_arg, &addr_len, NULL, NULL); 273 *addr_len_ptr_arg = addr_len; 274 break; 275 276 case VNIC_MAC_ADDR_TYPE_AUTO: 277 /* first try to allocate a factory MAC address */ 278 err = mac_addr_factory_reserve(vnic->vn_mch, addr_slot); 279 if (err == 0) { 280 mac_addr_factory_value(vnic->vn_lower_mh, *addr_slot, 281 mac_addr_arg, &addr_len, NULL, NULL); 282 vnic_addr_type = VNIC_MAC_ADDR_TYPE_FACTORY; 283 *addr_len_ptr_arg = addr_len; 284 break; 285 } 286 287 /* 288 * Allocating a factory MAC address failed, generate a 289 * random MAC address instead. 290 */ 291 if ((err = mac_addr_random(vnic->vn_mch, 292 prefix_len, mac_addr_arg, &mac_diag)) != 0) { 293 *diag = vnic_mac2vnic_diag(mac_diag); 294 return (err); 295 } 296 *addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh); 297 vnic_addr_type = VNIC_MAC_ADDR_TYPE_RANDOM; 298 break; 299 case VNIC_MAC_ADDR_TYPE_PRIMARY: 300 /* 301 * We get the address here since we copy it in the 302 * vnic's vn_addr. 303 * We can't ask for hardware resources since we 304 * don't currently support hardware classification 305 * for these MAC clients. 306 */ 307 if (req_hwgrp_flag) { 308 *diag = VNIC_IOC_DIAG_NO_HWRINGS; 309 return (ENOTSUP); 310 } 311 mac_unicast_primary_get(vnic->vn_lower_mh, mac_addr_arg); 312 *addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh); 313 mac_flags |= MAC_UNICAST_VNIC_PRIMARY; 314 break; 315 } 316 317 vnic->vn_addr_type = vnic_addr_type; 318 319 err = mac_unicast_add(vnic->vn_mch, mac_addr_arg, mac_flags, 320 &vnic->vn_muh, vid, &mac_diag); 321 if (err != 0) { 322 if (vnic_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY) { 323 /* release factory MAC address */ 324 mac_addr_factory_release(vnic->vn_mch, *addr_slot); 325 } 326 *diag = vnic_mac2vnic_diag(mac_diag); 327 } 328 329 return (err); 330 } 331 332 /* 333 * Create a new VNIC upon request from administrator. 334 * Returns 0 on success, an errno on failure. 335 */ 336 /* ARGSUSED */ 337 int 338 vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid, 339 vnic_mac_addr_type_t *vnic_addr_type, int *mac_len, uchar_t *mac_addr, 340 int *mac_slot, uint_t mac_prefix_len, uint16_t vid, vrid_t vrid, 341 int af, mac_resource_props_t *mrp, uint32_t flags, vnic_ioc_diag_t *diag, 342 cred_t *credp) 343 { 344 vnic_t *vnic; 345 mac_register_t *mac; 346 int err; 347 boolean_t is_anchor = ((flags & VNIC_IOC_CREATE_ANCHOR) != 0); 348 char vnic_name[MAXNAMELEN]; 349 const mac_info_t *minfop; 350 uint32_t req_hwgrp_flag = B_FALSE; 351 352 *diag = VNIC_IOC_DIAG_NONE; 353 354 rw_enter(&vnic_lock, RW_WRITER); 355 356 /* does a VNIC with the same id already exist? */ 357 err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id), 358 (mod_hash_val_t *)&vnic); 359 if (err == 0) { 360 rw_exit(&vnic_lock); 361 return (EEXIST); 362 } 363 364 vnic = kmem_cache_alloc(vnic_cache, KM_NOSLEEP); 365 if (vnic == NULL) { 366 rw_exit(&vnic_lock); 367 return (ENOMEM); 368 } 369 370 bzero(vnic, sizeof (*vnic)); 371 372 vnic->vn_id = vnic_id; 373 vnic->vn_link_id = linkid; 374 vnic->vn_vrid = vrid; 375 vnic->vn_af = af; 376 377 if (!is_anchor) { 378 if (linkid == DATALINK_INVALID_LINKID) { 379 err = EINVAL; 380 goto bail; 381 } 382 383 /* 384 * Open the lower MAC and assign its initial bandwidth and 385 * MAC address. We do this here during VNIC creation and 386 * do not wait until the upper MAC client open so that we 387 * can validate the VNIC creation parameters (bandwidth, 388 * MAC address, etc) and reserve a factory MAC address if 389 * one was requested. 390 */ 391 err = mac_open_by_linkid(linkid, &vnic->vn_lower_mh); 392 if (err != 0) 393 goto bail; 394 395 /* 396 * VNIC(vlan) over VNICs(vlans) is not supported. 397 */ 398 if (mac_is_vnic(vnic->vn_lower_mh)) { 399 err = EINVAL; 400 goto bail; 401 } 402 403 /* only ethernet support for now */ 404 minfop = mac_info(vnic->vn_lower_mh); 405 if (minfop->mi_nativemedia != DL_ETHER) { 406 err = ENOTSUP; 407 goto bail; 408 } 409 410 (void) dls_mgmt_get_linkinfo(vnic_id, vnic_name, NULL, NULL, 411 NULL); 412 err = mac_client_open(vnic->vn_lower_mh, &vnic->vn_mch, 413 vnic_name, MAC_OPEN_FLAGS_IS_VNIC); 414 if (err != 0) 415 goto bail; 416 417 /* assign a MAC address to the VNIC */ 418 419 err = vnic_unicast_add(vnic, *vnic_addr_type, mac_slot, 420 mac_prefix_len, mac_len, mac_addr, flags, diag, vid, 421 req_hwgrp_flag); 422 if (err != 0) { 423 vnic->vn_muh = NULL; 424 if (diag != NULL && req_hwgrp_flag) 425 *diag = VNIC_IOC_DIAG_NO_HWRINGS; 426 goto bail; 427 } 428 429 /* register to receive notification from underlying MAC */ 430 vnic->vn_mnh = mac_notify_add(vnic->vn_lower_mh, vnic_notify_cb, 431 vnic); 432 433 *vnic_addr_type = vnic->vn_addr_type; 434 vnic->vn_addr_len = *mac_len; 435 vnic->vn_vid = vid; 436 437 bcopy(mac_addr, vnic->vn_addr, vnic->vn_addr_len); 438 439 if (vnic->vn_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY) 440 vnic->vn_slot_id = *mac_slot; 441 442 /* 443 * Set the initial VNIC capabilities. If the VNIC is created 444 * over MACs which does not support nactive vlan, disable 445 * VNIC's hardware checksum capability if its VID is not 0, 446 * since the underlying MAC would get the hardware checksum 447 * offset wrong in case of VLAN packets. 448 */ 449 if (vid == 0 || !mac_capab_get(vnic->vn_lower_mh, 450 MAC_CAPAB_NO_NATIVEVLAN, NULL)) { 451 if (!mac_capab_get(vnic->vn_lower_mh, MAC_CAPAB_HCKSUM, 452 &vnic->vn_hcksum_txflags)) 453 vnic->vn_hcksum_txflags = 0; 454 } else { 455 vnic->vn_hcksum_txflags = 0; 456 } 457 } 458 459 /* register with the MAC module */ 460 if ((mac = mac_alloc(MAC_VERSION)) == NULL) 461 goto bail; 462 463 mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 464 mac->m_driver = vnic; 465 mac->m_dip = vnic_get_dip(); 466 mac->m_instance = (uint_t)-1; 467 mac->m_src_addr = vnic->vn_addr; 468 mac->m_callbacks = &vnic_m_callbacks; 469 470 if (!is_anchor) { 471 /* 472 * If this is a VNIC based VLAN, then we check for the 473 * margin unless it has been created with the force 474 * flag. If we are configuring a VLAN over an etherstub, 475 * we don't check the margin even if force is not set. 476 */ 477 if (vid == 0 || (flags & VNIC_IOC_CREATE_FORCE) != 0) { 478 if (vid != VLAN_ID_NONE) 479 vnic->vn_force = B_TRUE; 480 /* 481 * As the current margin size of the underlying mac is 482 * used to determine the margin size of the VNIC 483 * itself, request the underlying mac not to change 484 * to a smaller margin size. 485 */ 486 err = mac_margin_add(vnic->vn_lower_mh, 487 &vnic->vn_margin, B_TRUE); 488 ASSERT(err == 0); 489 } else { 490 vnic->vn_margin = VLAN_TAGSZ; 491 err = mac_margin_add(vnic->vn_lower_mh, 492 &vnic->vn_margin, B_FALSE); 493 if (err != 0) { 494 mac_free(mac); 495 if (diag != NULL) 496 *diag = VNIC_IOC_DIAG_MACMARGIN_INVALID; 497 goto bail; 498 } 499 } 500 501 mac_sdu_get(vnic->vn_lower_mh, &mac->m_min_sdu, 502 &mac->m_max_sdu); 503 err = mac_mtu_add(vnic->vn_lower_mh, &mac->m_max_sdu, B_FALSE); 504 if (err != 0) { 505 VERIFY(mac_margin_remove(vnic->vn_lower_mh, 506 vnic->vn_margin) == 0); 507 mac_free(mac); 508 if (diag != NULL) 509 *diag = VNIC_IOC_DIAG_MACMTU_INVALID; 510 goto bail; 511 } 512 vnic->vn_mtu = mac->m_max_sdu; 513 } else { 514 vnic->vn_margin = VLAN_TAGSZ; 515 mac->m_min_sdu = 1; 516 mac->m_max_sdu = ANCHOR_VNIC_MAX_MTU; 517 vnic->vn_mtu = ANCHOR_VNIC_MAX_MTU; 518 } 519 520 mac->m_margin = vnic->vn_margin; 521 522 err = mac_register(mac, &vnic->vn_mh); 523 mac_free(mac); 524 if (err != 0) { 525 if (!is_anchor) { 526 VERIFY(mac_mtu_remove(vnic->vn_lower_mh, 527 vnic->vn_mtu) == 0); 528 VERIFY(mac_margin_remove(vnic->vn_lower_mh, 529 vnic->vn_margin) == 0); 530 } 531 goto bail; 532 } 533 534 /* Set the VNIC's MAC in the client */ 535 if (!is_anchor) { 536 mac_set_upper_mac(vnic->vn_mch, vnic->vn_mh, mrp); 537 538 if (mrp != NULL) { 539 if ((mrp->mrp_mask & MRP_RX_RINGS) != 0 || 540 (mrp->mrp_mask & MRP_TX_RINGS) != 0) { 541 req_hwgrp_flag = B_TRUE; 542 } 543 err = mac_client_set_resources(vnic->vn_mch, mrp); 544 if (err != 0) { 545 VERIFY(mac_mtu_remove(vnic->vn_lower_mh, 546 vnic->vn_mtu) == 0); 547 VERIFY(mac_margin_remove(vnic->vn_lower_mh, 548 vnic->vn_margin) == 0); 549 (void) mac_unregister(vnic->vn_mh); 550 goto bail; 551 } 552 } 553 } 554 555 err = dls_devnet_create(vnic->vn_mh, vnic->vn_id, crgetzoneid(credp)); 556 if (err != 0) { 557 VERIFY(is_anchor || mac_margin_remove(vnic->vn_lower_mh, 558 vnic->vn_margin) == 0); 559 if (!is_anchor) { 560 VERIFY(mac_mtu_remove(vnic->vn_lower_mh, 561 vnic->vn_mtu) == 0); 562 VERIFY(mac_margin_remove(vnic->vn_lower_mh, 563 vnic->vn_margin) == 0); 564 } 565 (void) mac_unregister(vnic->vn_mh); 566 goto bail; 567 } 568 569 /* add new VNIC to hash table */ 570 err = mod_hash_insert(vnic_hash, VNIC_HASH_KEY(vnic_id), 571 (mod_hash_val_t)vnic); 572 ASSERT(err == 0); 573 vnic_count++; 574 575 /* 576 * Now that we've enabled this VNIC, we should go through and update the 577 * link state by setting it to our parents. 578 */ 579 vnic->vn_enabled = B_TRUE; 580 581 if (is_anchor) { 582 mac_link_update(vnic->vn_mh, LINK_STATE_UP); 583 } else { 584 mac_link_update(vnic->vn_mh, 585 mac_client_stat_get(vnic->vn_mch, MAC_STAT_LINK_STATE)); 586 } 587 588 rw_exit(&vnic_lock); 589 590 return (0); 591 592 bail: 593 rw_exit(&vnic_lock); 594 if (!is_anchor) { 595 if (vnic->vn_mnh != NULL) 596 (void) mac_notify_remove(vnic->vn_mnh, B_TRUE); 597 if (vnic->vn_muh != NULL) 598 (void) mac_unicast_remove(vnic->vn_mch, vnic->vn_muh); 599 if (vnic->vn_mch != NULL) 600 mac_client_close(vnic->vn_mch, MAC_CLOSE_FLAGS_IS_VNIC); 601 if (vnic->vn_lower_mh != NULL) 602 mac_close(vnic->vn_lower_mh); 603 } 604 605 kmem_cache_free(vnic_cache, vnic); 606 return (err); 607 } 608 609 /* 610 * Modify the properties of an existing VNIC. 611 */ 612 /* ARGSUSED */ 613 int 614 vnic_dev_modify(datalink_id_t vnic_id, uint_t modify_mask, 615 vnic_mac_addr_type_t mac_addr_type, uint_t mac_len, uchar_t *mac_addr, 616 uint_t mac_slot, mac_resource_props_t *mrp) 617 { 618 vnic_t *vnic = NULL; 619 620 rw_enter(&vnic_lock, RW_WRITER); 621 622 if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id), 623 (mod_hash_val_t *)&vnic) != 0) { 624 rw_exit(&vnic_lock); 625 return (ENOENT); 626 } 627 628 rw_exit(&vnic_lock); 629 630 return (0); 631 } 632 633 /* ARGSUSED */ 634 int 635 vnic_dev_delete(datalink_id_t vnic_id, uint32_t flags, cred_t *credp) 636 { 637 vnic_t *vnic = NULL; 638 mod_hash_val_t val; 639 datalink_id_t tmpid; 640 int rc; 641 642 rw_enter(&vnic_lock, RW_WRITER); 643 644 if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id), 645 (mod_hash_val_t *)&vnic) != 0) { 646 rw_exit(&vnic_lock); 647 return (ENOENT); 648 } 649 650 if ((rc = dls_devnet_destroy(vnic->vn_mh, &tmpid, B_TRUE)) != 0) { 651 rw_exit(&vnic_lock); 652 return (rc); 653 } 654 655 ASSERT(vnic_id == tmpid); 656 657 /* 658 * We cannot unregister the MAC yet. Unregistering would 659 * free up mac_impl_t which should not happen at this time. 660 * So disable mac_impl_t by calling mac_disable(). This will prevent 661 * any new claims on mac_impl_t. 662 */ 663 if ((rc = mac_disable(vnic->vn_mh)) != 0) { 664 (void) dls_devnet_create(vnic->vn_mh, vnic_id, 665 crgetzoneid(credp)); 666 rw_exit(&vnic_lock); 667 return (rc); 668 } 669 670 vnic_cleanup_secondary_macs(vnic, vnic->vn_nhandles); 671 672 vnic->vn_enabled = B_FALSE; 673 (void) mod_hash_remove(vnic_hash, VNIC_HASH_KEY(vnic_id), &val); 674 ASSERT(vnic == (vnic_t *)val); 675 vnic_count--; 676 rw_exit(&vnic_lock); 677 678 /* 679 * XXX-nicolas shouldn't have a void cast here, if it's 680 * expected that the function will never fail, then we should 681 * have an ASSERT(). 682 */ 683 (void) mac_unregister(vnic->vn_mh); 684 685 if (vnic->vn_lower_mh != NULL) { 686 /* 687 * Check if MAC address for the vnic was obtained from the 688 * factory MAC addresses. If yes, release it. 689 */ 690 if (vnic->vn_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY) { 691 (void) mac_addr_factory_release(vnic->vn_mch, 692 vnic->vn_slot_id); 693 } 694 (void) mac_margin_remove(vnic->vn_lower_mh, vnic->vn_margin); 695 (void) mac_mtu_remove(vnic->vn_lower_mh, vnic->vn_mtu); 696 (void) mac_notify_remove(vnic->vn_mnh, B_TRUE); 697 (void) mac_unicast_remove(vnic->vn_mch, vnic->vn_muh); 698 mac_client_close(vnic->vn_mch, MAC_CLOSE_FLAGS_IS_VNIC); 699 mac_close(vnic->vn_lower_mh); 700 } 701 702 kmem_cache_free(vnic_cache, vnic); 703 return (0); 704 } 705 706 /* ARGSUSED */ 707 mblk_t * 708 vnic_m_tx(void *arg, mblk_t *mp_chain) 709 { 710 /* 711 * This function could be invoked for an anchor VNIC when sending 712 * broadcast and multicast packets, and unicast packets which did 713 * not match any local known destination. 714 */ 715 freemsgchain(mp_chain); 716 return (NULL); 717 } 718 719 /*ARGSUSED*/ 720 static void 721 vnic_m_ioctl(void *arg, queue_t *q, mblk_t *mp) 722 { 723 miocnak(q, mp, 0, ENOTSUP); 724 } 725 726 /* 727 * This entry point cannot be passed-through, since it is invoked 728 * for the per-VNIC kstats which must be exported independently 729 * of the existence of VNIC MAC clients. 730 */ 731 static int 732 vnic_m_stat(void *arg, uint_t stat, uint64_t *val) 733 { 734 vnic_t *vnic = arg; 735 int rval = 0; 736 737 if (vnic->vn_lower_mh == NULL) { 738 /* 739 * It's an anchor VNIC, which does not have any 740 * statistics in itself. 741 */ 742 return (ENOTSUP); 743 } 744 745 /* 746 * ENOTSUP must be reported for unsupported stats, the VNIC 747 * driver reports a subset of the stats that would 748 * be returned by a real piece of hardware. 749 */ 750 751 switch (stat) { 752 case MAC_STAT_LINK_STATE: 753 case MAC_STAT_LINK_UP: 754 case MAC_STAT_PROMISC: 755 case MAC_STAT_IFSPEED: 756 case MAC_STAT_MULTIRCV: 757 case MAC_STAT_MULTIXMT: 758 case MAC_STAT_BRDCSTRCV: 759 case MAC_STAT_BRDCSTXMT: 760 case MAC_STAT_OPACKETS: 761 case MAC_STAT_OBYTES: 762 case MAC_STAT_IERRORS: 763 case MAC_STAT_OERRORS: 764 case MAC_STAT_RBYTES: 765 case MAC_STAT_IPACKETS: 766 *val = mac_client_stat_get(vnic->vn_mch, stat); 767 break; 768 default: 769 rval = ENOTSUP; 770 } 771 772 return (rval); 773 } 774 775 /* 776 * Invoked by the upper MAC to retrieve the lower MAC client handle 777 * corresponding to a VNIC. A pointer to this function is obtained 778 * by the upper MAC via capability query. 779 * 780 * XXX-nicolas Note: this currently causes all VNIC MAC clients to 781 * receive the same MAC client handle for the same VNIC. This is ok 782 * as long as we have only one VNIC MAC client which sends and 783 * receives data, but we don't currently enforce this at the MAC layer. 784 */ 785 static void * 786 vnic_mac_client_handle(void *vnic_arg) 787 { 788 vnic_t *vnic = vnic_arg; 789 790 return (vnic->vn_mch); 791 } 792 793 /* 794 * Invoked when updating the primary MAC so that the secondary MACs are 795 * kept in sync. 796 */ 797 static void 798 vnic_mac_secondary_update(void *vnic_arg) 799 { 800 vnic_t *vn = vnic_arg; 801 int i; 802 803 for (i = 1; i <= vn->vn_nhandles; i++) { 804 mac_secondary_dup(vn->vn_mc_handles[0], vn->vn_mc_handles[i]); 805 } 806 } 807 808 /* 809 * Return information about the specified capability. 810 */ 811 /* ARGSUSED */ 812 static boolean_t 813 vnic_m_capab_get(void *arg, mac_capab_t cap, void *cap_data) 814 { 815 vnic_t *vnic = arg; 816 817 switch (cap) { 818 case MAC_CAPAB_HCKSUM: { 819 uint32_t *hcksum_txflags = cap_data; 820 821 *hcksum_txflags = vnic->vn_hcksum_txflags & 822 (HCKSUM_INET_FULL_V4 | HCKSUM_IPHDRCKSUM | 823 HCKSUM_INET_PARTIAL); 824 break; 825 } 826 case MAC_CAPAB_VNIC: { 827 mac_capab_vnic_t *vnic_capab = cap_data; 828 829 if (vnic->vn_lower_mh == NULL) { 830 /* 831 * It's an anchor VNIC, we don't have an underlying 832 * NIC and MAC client handle. 833 */ 834 return (B_FALSE); 835 } 836 837 if (vnic_capab != NULL) { 838 vnic_capab->mcv_arg = vnic; 839 vnic_capab->mcv_mac_client_handle = 840 vnic_mac_client_handle; 841 vnic_capab->mcv_mac_secondary_update = 842 vnic_mac_secondary_update; 843 } 844 break; 845 } 846 case MAC_CAPAB_ANCHOR_VNIC: { 847 /* since it's an anchor VNIC we don't have lower mac handle */ 848 if (vnic->vn_lower_mh == NULL) { 849 ASSERT(vnic->vn_link_id == 0); 850 return (B_TRUE); 851 } 852 return (B_FALSE); 853 } 854 case MAC_CAPAB_NO_NATIVEVLAN: 855 return (B_FALSE); 856 case MAC_CAPAB_NO_ZCOPY: 857 return (B_TRUE); 858 case MAC_CAPAB_VRRP: { 859 mac_capab_vrrp_t *vrrp_capab = cap_data; 860 861 if (vnic->vn_vrid != 0) { 862 if (vrrp_capab != NULL) 863 vrrp_capab->mcv_af = vnic->vn_af; 864 return (B_TRUE); 865 } 866 return (B_FALSE); 867 } 868 default: 869 return (B_FALSE); 870 } 871 return (B_TRUE); 872 } 873 874 /* ARGSUSED */ 875 static int 876 vnic_m_start(void *arg) 877 { 878 return (0); 879 } 880 881 /* ARGSUSED */ 882 static void 883 vnic_m_stop(void *arg) 884 { 885 } 886 887 /* ARGSUSED */ 888 static int 889 vnic_m_promisc(void *arg, boolean_t on) 890 { 891 return (0); 892 } 893 894 /* ARGSUSED */ 895 static int 896 vnic_m_multicst(void *arg, boolean_t add, const uint8_t *addrp) 897 { 898 return (0); 899 } 900 901 static int 902 vnic_m_unicst(void *arg, const uint8_t *macaddr) 903 { 904 vnic_t *vnic = arg; 905 906 return (mac_vnic_unicast_set(vnic->vn_mch, macaddr)); 907 } 908 909 static void 910 vnic_cleanup_secondary_macs(vnic_t *vn, int cnt) 911 { 912 int i; 913 914 /* Remove existing secondaries (primary is at 0) */ 915 for (i = 1; i <= cnt; i++) { 916 mac_rx_clear(vn->vn_mc_handles[i]); 917 918 /* unicast handle might not have been set yet */ 919 if (vn->vn_mu_handles[i] != NULL) 920 (void) mac_unicast_remove(vn->vn_mc_handles[i], 921 vn->vn_mu_handles[i]); 922 923 mac_secondary_cleanup(vn->vn_mc_handles[i]); 924 925 mac_client_close(vn->vn_mc_handles[i], MAC_CLOSE_FLAGS_IS_VNIC); 926 927 vn->vn_mu_handles[i] = NULL; 928 vn->vn_mc_handles[i] = NULL; 929 } 930 931 vn->vn_nhandles = 0; 932 } 933 934 /* 935 * Setup secondary MAC addresses on the vnic. Due to limitations in the mac 936 * code, each mac address must be associated with a mac_client (and the 937 * flow that goes along with the client) so we need to create those clients 938 * here. 939 */ 940 static int 941 vnic_set_secondary_macs(vnic_t *vn, mac_secondary_addr_t *msa) 942 { 943 int i, err; 944 char primary_name[MAXNAMELEN]; 945 946 /* First, remove pre-existing secondaries */ 947 ASSERT(vn->vn_nhandles < MPT_MAXMACADDR); 948 vnic_cleanup_secondary_macs(vn, vn->vn_nhandles); 949 950 if (msa->ms_addrcnt == (uint32_t)-1) 951 msa->ms_addrcnt = 0; 952 953 vn->vn_nhandles = msa->ms_addrcnt; 954 955 (void) dls_mgmt_get_linkinfo(vn->vn_id, primary_name, NULL, NULL, NULL); 956 957 /* 958 * Now add the new secondary MACs 959 * Recall that the primary MAC address is the first element. 960 * The secondary clients are named after the primary with their 961 * index to distinguish them. 962 */ 963 for (i = 1; i <= vn->vn_nhandles; i++) { 964 uint8_t *addr; 965 mac_diag_t mac_diag; 966 char secondary_name[MAXNAMELEN]; 967 968 (void) snprintf(secondary_name, sizeof (secondary_name), 969 "%s%02d", primary_name, i); 970 971 err = mac_client_open(vn->vn_lower_mh, &vn->vn_mc_handles[i], 972 secondary_name, MAC_OPEN_FLAGS_IS_VNIC); 973 if (err != 0) { 974 /* Remove any that we successfully added */ 975 vnic_cleanup_secondary_macs(vn, --i); 976 return (err); 977 } 978 979 /* 980 * Assign a MAC address to the VNIC 981 * 982 * Normally this would be done with vnic_unicast_add but since 983 * we know these are fixed adddresses, and since we need to 984 * save this in the proper array slot, we bypass that function 985 * and go direct. 986 */ 987 addr = msa->ms_addrs[i - 1]; 988 err = mac_unicast_add(vn->vn_mc_handles[i], addr, 0, 989 &vn->vn_mu_handles[i], vn->vn_vid, &mac_diag); 990 if (err != 0) { 991 /* Remove any that we successfully added */ 992 vnic_cleanup_secondary_macs(vn, i); 993 return (err); 994 } 995 996 /* 997 * Setup the secondary the same way as the primary (i.e. 998 * receiver function/argument (e.g. i_dls_link_rx, mac_pkt_drop, 999 * etc.), the promisc list, and the resource controls). 1000 */ 1001 mac_secondary_dup(vn->vn_mc_handles[0], vn->vn_mc_handles[i]); 1002 } 1003 1004 return (0); 1005 } 1006 1007 static int 1008 vnic_get_secondary_macs(vnic_t *vn, uint_t pr_valsize, void *pr_val) 1009 { 1010 int i; 1011 mac_secondary_addr_t msa; 1012 1013 if (pr_valsize < sizeof (msa)) 1014 return (EINVAL); 1015 1016 /* Get existing addresses (primary is at 0) */ 1017 ASSERT(vn->vn_nhandles < MPT_MAXMACADDR); 1018 for (i = 1; i <= vn->vn_nhandles; i++) { 1019 ASSERT(vn->vn_mc_handles[i] != NULL); 1020 mac_unicast_secondary_get(vn->vn_mc_handles[i], 1021 msa.ms_addrs[i - 1]); 1022 } 1023 msa.ms_addrcnt = vn->vn_nhandles; 1024 1025 bcopy(&msa, pr_val, sizeof (msa)); 1026 return (0); 1027 } 1028 1029 /* 1030 * Callback functions for set/get of properties 1031 */ 1032 /*ARGSUSED*/ 1033 static int 1034 vnic_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num, 1035 uint_t pr_valsize, const void *pr_val) 1036 { 1037 int err = 0; 1038 vnic_t *vn = m_driver; 1039 1040 switch (pr_num) { 1041 case MAC_PROP_MTU: { 1042 uint32_t mtu; 1043 1044 if (pr_valsize < sizeof (mtu)) { 1045 err = EINVAL; 1046 break; 1047 } 1048 bcopy(pr_val, &mtu, sizeof (mtu)); 1049 1050 if (vn->vn_link_id == DATALINK_INVALID_LINKID) { 1051 if (mtu < ANCHOR_VNIC_MIN_MTU || 1052 mtu > ANCHOR_VNIC_MAX_MTU) { 1053 err = EINVAL; 1054 break; 1055 } 1056 } else { 1057 err = mac_mtu_add(vn->vn_lower_mh, &mtu, B_FALSE); 1058 /* 1059 * If it's not supported to set a value here, translate 1060 * that to EINVAL, so user land gets a better idea of 1061 * what went wrong. This realistically means that they 1062 * violated the output of prop info. 1063 */ 1064 if (err == ENOTSUP) 1065 err = EINVAL; 1066 if (err != 0) 1067 break; 1068 VERIFY(mac_mtu_remove(vn->vn_lower_mh, 1069 vn->vn_mtu) == 0); 1070 } 1071 vn->vn_mtu = mtu; 1072 err = mac_maxsdu_update(vn->vn_mh, mtu); 1073 break; 1074 } 1075 case MAC_PROP_VN_PROMISC_FILTERED: { 1076 boolean_t filtered; 1077 1078 if (pr_valsize < sizeof (filtered)) { 1079 err = EINVAL; 1080 break; 1081 } 1082 1083 bcopy(pr_val, &filtered, sizeof (filtered)); 1084 mac_set_promisc_filtered(vn->vn_mch, filtered); 1085 break; 1086 } 1087 case MAC_PROP_SECONDARY_ADDRS: { 1088 mac_secondary_addr_t msa; 1089 1090 bcopy(pr_val, &msa, sizeof (msa)); 1091 err = vnic_set_secondary_macs(vn, &msa); 1092 break; 1093 } 1094 default: 1095 err = ENOTSUP; 1096 break; 1097 } 1098 return (err); 1099 } 1100 1101 /* ARGSUSED */ 1102 static int 1103 vnic_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, 1104 uint_t pr_valsize, void *pr_val) 1105 { 1106 vnic_t *vn = arg; 1107 int ret = 0; 1108 boolean_t out; 1109 1110 switch (pr_num) { 1111 case MAC_PROP_VN_PROMISC_FILTERED: 1112 out = mac_get_promisc_filtered(vn->vn_mch); 1113 ASSERT(pr_valsize >= sizeof (boolean_t)); 1114 bcopy(&out, pr_val, sizeof (boolean_t)); 1115 break; 1116 case MAC_PROP_SECONDARY_ADDRS: 1117 ret = vnic_get_secondary_macs(vn, pr_valsize, pr_val); 1118 break; 1119 default: 1120 ret = EINVAL; 1121 break; 1122 } 1123 1124 return (ret); 1125 } 1126 1127 /* ARGSUSED */ 1128 static void vnic_m_propinfo(void *m_driver, const char *pr_name, 1129 mac_prop_id_t pr_num, mac_prop_info_handle_t prh) 1130 { 1131 vnic_t *vn = m_driver; 1132 1133 switch (pr_num) { 1134 case MAC_PROP_MTU: 1135 if (vn->vn_link_id == DATALINK_INVALID_LINKID) { 1136 mac_prop_info_set_range_uint32(prh, 1137 ANCHOR_VNIC_MIN_MTU, ANCHOR_VNIC_MAX_MTU); 1138 } else { 1139 uint32_t max; 1140 mac_perim_handle_t mph; 1141 mac_propval_range_t range; 1142 1143 /* 1144 * The valid range for a VNIC's MTU is the minimum that 1145 * the device supports and the current value of the 1146 * device. A VNIC cannot increase the current MTU of the 1147 * device. Therefore we need to get the range from the 1148 * propinfo endpoint and current mtu from the 1149 * traditional property endpoint. 1150 */ 1151 mac_perim_enter_by_mh(vn->vn_lower_mh, &mph); 1152 if (mac_get_prop(vn->vn_lower_mh, MAC_PROP_MTU, "mtu", 1153 &max, sizeof (uint32_t)) != 0) { 1154 mac_perim_exit(mph); 1155 return; 1156 } 1157 1158 range.mpr_count = 1; 1159 if (mac_prop_info(vn->vn_lower_mh, MAC_PROP_MTU, "mtu", 1160 NULL, 0, &range, NULL) != 0) { 1161 mac_perim_exit(mph); 1162 return; 1163 } 1164 1165 mac_prop_info_set_default_uint32(prh, max); 1166 mac_prop_info_set_range_uint32(prh, 1167 range.mpr_range_uint32[0].mpur_min, max); 1168 mac_perim_exit(mph); 1169 } 1170 break; 1171 } 1172 } 1173 1174 1175 int 1176 vnic_info(vnic_info_t *info, cred_t *credp) 1177 { 1178 vnic_t *vnic; 1179 int err; 1180 1181 /* Make sure that the VNIC link is visible from the caller's zone. */ 1182 if (!dls_devnet_islinkvisible(info->vn_vnic_id, crgetzoneid(credp))) 1183 return (ENOENT); 1184 1185 rw_enter(&vnic_lock, RW_WRITER); 1186 1187 err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(info->vn_vnic_id), 1188 (mod_hash_val_t *)&vnic); 1189 if (err != 0) { 1190 rw_exit(&vnic_lock); 1191 return (ENOENT); 1192 } 1193 1194 info->vn_link_id = vnic->vn_link_id; 1195 info->vn_mac_addr_type = vnic->vn_addr_type; 1196 info->vn_mac_len = vnic->vn_addr_len; 1197 bcopy(vnic->vn_addr, info->vn_mac_addr, MAXMACADDRLEN); 1198 info->vn_mac_slot = vnic->vn_slot_id; 1199 info->vn_mac_prefix_len = 0; 1200 info->vn_vid = vnic->vn_vid; 1201 info->vn_force = vnic->vn_force; 1202 info->vn_vrid = vnic->vn_vrid; 1203 info->vn_af = vnic->vn_af; 1204 1205 bzero(&info->vn_resource_props, sizeof (mac_resource_props_t)); 1206 if (vnic->vn_mch != NULL) 1207 mac_client_get_resources(vnic->vn_mch, 1208 &info->vn_resource_props); 1209 1210 rw_exit(&vnic_lock); 1211 return (0); 1212 } 1213 1214 static void 1215 vnic_notify_cb(void *arg, mac_notify_type_t type) 1216 { 1217 vnic_t *vnic = arg; 1218 1219 /* 1220 * Do not deliver notifications if the vnic is not fully initialized 1221 * or is in process of being torn down. 1222 */ 1223 if (!vnic->vn_enabled) 1224 return; 1225 1226 switch (type) { 1227 case MAC_NOTE_UNICST: 1228 /* 1229 * Only the VLAN VNIC needs to be notified with primary MAC 1230 * address change. 1231 */ 1232 if (vnic->vn_addr_type != VNIC_MAC_ADDR_TYPE_PRIMARY) 1233 return; 1234 1235 /* the unicast MAC address value */ 1236 mac_unicast_primary_get(vnic->vn_lower_mh, vnic->vn_addr); 1237 1238 /* notify its upper layer MAC about MAC address change */ 1239 mac_unicst_update(vnic->vn_mh, (const uint8_t *)vnic->vn_addr); 1240 break; 1241 1242 case MAC_NOTE_LINK: 1243 mac_link_update(vnic->vn_mh, 1244 mac_client_stat_get(vnic->vn_mch, MAC_STAT_LINK_STATE)); 1245 break; 1246 1247 default: 1248 break; 1249 } 1250 } 1251