1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2015 Joyent, Inc. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/cred.h> 28 #include <sys/sysmacros.h> 29 #include <sys/conf.h> 30 #include <sys/cmn_err.h> 31 #include <sys/list.h> 32 #include <sys/ksynch.h> 33 #include <sys/kmem.h> 34 #include <sys/stream.h> 35 #include <sys/modctl.h> 36 #include <sys/ddi.h> 37 #include <sys/sunddi.h> 38 #include <sys/atomic.h> 39 #include <sys/stat.h> 40 #include <sys/modhash.h> 41 #include <sys/strsubr.h> 42 #include <sys/strsun.h> 43 #include <sys/dlpi.h> 44 #include <sys/mac.h> 45 #include <sys/mac_provider.h> 46 #include <sys/mac_client.h> 47 #include <sys/mac_client_priv.h> 48 #include <sys/mac_ether.h> 49 #include <sys/dls.h> 50 #include <sys/pattr.h> 51 #include <sys/time.h> 52 #include <sys/vlan.h> 53 #include <sys/vnic.h> 54 #include <sys/vnic_impl.h> 55 #include <sys/mac_flow_impl.h> 56 #include <inet/ip_impl.h> 57 58 /* 59 * Note that for best performance, the VNIC is a passthrough design. 60 * For each VNIC corresponds a MAC client of the underlying MAC (lower MAC). 61 * This MAC client is opened by the VNIC driver at VNIC creation, 62 * and closed when the VNIC is deleted. 63 * When a MAC client of the VNIC itself opens a VNIC, the MAC layer 64 * (upper MAC) detects that the MAC being opened is a VNIC. Instead 65 * of allocating a new MAC client, it asks the VNIC driver to return 66 * the lower MAC client handle associated with the VNIC, and that handle 67 * is returned to the upper MAC client directly. This allows access 68 * by upper MAC clients of the VNIC to have direct access to the lower 69 * MAC client for the control path and data path. 70 * 71 * Due to this passthrough, some of the entry points exported by the 72 * VNIC driver are never directly invoked. These entry points include 73 * vnic_m_start, vnic_m_stop, vnic_m_promisc, vnic_m_multicst, etc. 74 * 75 * VNICs support multiple upper mac clients to enable support for 76 * multiple MAC addresses on the VNIC. When the VNIC is created the 77 * initial mac client is the primary upper mac. Any additional mac 78 * clients are secondary macs. 79 */ 80 81 static int vnic_m_start(void *); 82 static void vnic_m_stop(void *); 83 static int vnic_m_promisc(void *, boolean_t); 84 static int vnic_m_multicst(void *, boolean_t, const uint8_t *); 85 static int vnic_m_unicst(void *, const uint8_t *); 86 static int vnic_m_stat(void *, uint_t, uint64_t *); 87 static void vnic_m_ioctl(void *, queue_t *, mblk_t *); 88 static int vnic_m_setprop(void *, const char *, mac_prop_id_t, uint_t, 89 const void *); 90 static int vnic_m_getprop(void *, const char *, mac_prop_id_t, uint_t, void *); 91 static void vnic_m_propinfo(void *, const char *, mac_prop_id_t, 92 mac_prop_info_handle_t); 93 static mblk_t *vnic_m_tx(void *, mblk_t *); 94 static boolean_t vnic_m_capab_get(void *, mac_capab_t, void *); 95 static void vnic_notify_cb(void *, mac_notify_type_t); 96 static void vnic_cleanup_secondary_macs(vnic_t *, int); 97 98 static kmem_cache_t *vnic_cache; 99 static krwlock_t vnic_lock; 100 static uint_t vnic_count; 101 102 #define ANCHOR_VNIC_MIN_MTU 576 103 #define ANCHOR_VNIC_MAX_MTU 9000 104 105 /* hash of VNICs (vnic_t's), keyed by VNIC id */ 106 static mod_hash_t *vnic_hash; 107 #define VNIC_HASHSZ 64 108 #define VNIC_HASH_KEY(vnic_id) ((mod_hash_key_t)(uintptr_t)vnic_id) 109 110 #define VNIC_M_CALLBACK_FLAGS \ 111 (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO) 112 113 static mac_callbacks_t vnic_m_callbacks = { 114 VNIC_M_CALLBACK_FLAGS, 115 vnic_m_stat, 116 vnic_m_start, 117 vnic_m_stop, 118 vnic_m_promisc, 119 vnic_m_multicst, 120 vnic_m_unicst, 121 vnic_m_tx, 122 NULL, 123 vnic_m_ioctl, 124 vnic_m_capab_get, 125 NULL, 126 NULL, 127 vnic_m_setprop, 128 vnic_m_getprop, 129 vnic_m_propinfo 130 }; 131 132 void 133 vnic_dev_init(void) 134 { 135 vnic_cache = kmem_cache_create("vnic_cache", 136 sizeof (vnic_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 137 138 vnic_hash = mod_hash_create_idhash("vnic_hash", 139 VNIC_HASHSZ, mod_hash_null_valdtor); 140 141 rw_init(&vnic_lock, NULL, RW_DEFAULT, NULL); 142 143 vnic_count = 0; 144 } 145 146 void 147 vnic_dev_fini(void) 148 { 149 ASSERT(vnic_count == 0); 150 151 rw_destroy(&vnic_lock); 152 mod_hash_destroy_idhash(vnic_hash); 153 kmem_cache_destroy(vnic_cache); 154 } 155 156 uint_t 157 vnic_dev_count(void) 158 { 159 return (vnic_count); 160 } 161 162 static vnic_ioc_diag_t 163 vnic_mac2vnic_diag(mac_diag_t diag) 164 { 165 switch (diag) { 166 case MAC_DIAG_MACADDR_NIC: 167 return (VNIC_IOC_DIAG_MACADDR_NIC); 168 case MAC_DIAG_MACADDR_INUSE: 169 return (VNIC_IOC_DIAG_MACADDR_INUSE); 170 case MAC_DIAG_MACADDR_INVALID: 171 return (VNIC_IOC_DIAG_MACADDR_INVALID); 172 case MAC_DIAG_MACADDRLEN_INVALID: 173 return (VNIC_IOC_DIAG_MACADDRLEN_INVALID); 174 case MAC_DIAG_MACFACTORYSLOTINVALID: 175 return (VNIC_IOC_DIAG_MACFACTORYSLOTINVALID); 176 case MAC_DIAG_MACFACTORYSLOTUSED: 177 return (VNIC_IOC_DIAG_MACFACTORYSLOTUSED); 178 case MAC_DIAG_MACFACTORYSLOTALLUSED: 179 return (VNIC_IOC_DIAG_MACFACTORYSLOTALLUSED); 180 case MAC_DIAG_MACFACTORYNOTSUP: 181 return (VNIC_IOC_DIAG_MACFACTORYNOTSUP); 182 case MAC_DIAG_MACPREFIX_INVALID: 183 return (VNIC_IOC_DIAG_MACPREFIX_INVALID); 184 case MAC_DIAG_MACPREFIXLEN_INVALID: 185 return (VNIC_IOC_DIAG_MACPREFIXLEN_INVALID); 186 case MAC_DIAG_MACNO_HWRINGS: 187 return (VNIC_IOC_DIAG_NO_HWRINGS); 188 default: 189 return (VNIC_IOC_DIAG_NONE); 190 } 191 } 192 193 static int 194 vnic_unicast_add(vnic_t *vnic, vnic_mac_addr_type_t vnic_addr_type, 195 int *addr_slot, uint_t prefix_len, int *addr_len_ptr_arg, 196 uint8_t *mac_addr_arg, uint16_t flags, vnic_ioc_diag_t *diag, 197 uint16_t vid, boolean_t req_hwgrp_flag) 198 { 199 mac_diag_t mac_diag; 200 uint16_t mac_flags = 0; 201 int err; 202 uint_t addr_len; 203 204 if (flags & VNIC_IOC_CREATE_NODUPCHECK) 205 mac_flags |= MAC_UNICAST_NODUPCHECK; 206 207 switch (vnic_addr_type) { 208 case VNIC_MAC_ADDR_TYPE_FIXED: 209 case VNIC_MAC_ADDR_TYPE_VRID: 210 /* 211 * The MAC address value to assign to the VNIC 212 * is already provided in mac_addr_arg. addr_len_ptr_arg 213 * already contains the MAC address length. 214 */ 215 break; 216 217 case VNIC_MAC_ADDR_TYPE_RANDOM: 218 /* 219 * Random MAC address. There are two sub-cases: 220 * 221 * 1 - If mac_len == 0, a new MAC address is generated. 222 * The length of the MAC address to generated depends 223 * on the type of MAC used. The prefix to use for the MAC 224 * address is stored in the most significant bytes 225 * of the mac_addr argument, and its length is specified 226 * by the mac_prefix_len argument. This prefix can 227 * correspond to a IEEE OUI in the case of Ethernet, 228 * for example. 229 * 230 * 2 - If mac_len > 0, the address was already picked 231 * randomly, and is now passed back during VNIC 232 * re-creation. The mac_addr argument contains the MAC 233 * address that was generated. We distinguish this 234 * case from the fixed MAC address case, since we 235 * want the user consumers to know, when they query 236 * the list of VNICs, that a VNIC was assigned a 237 * random MAC address vs assigned a fixed address 238 * specified by the user. 239 */ 240 241 /* 242 * If it's a pre-generated address, we're done. mac_addr_arg 243 * and addr_len_ptr_arg already contain the MAC address 244 * value and length. 245 */ 246 if (*addr_len_ptr_arg > 0) 247 break; 248 249 /* generate a new random MAC address */ 250 if ((err = mac_addr_random(vnic->vn_mch, 251 prefix_len, mac_addr_arg, &mac_diag)) != 0) { 252 *diag = vnic_mac2vnic_diag(mac_diag); 253 return (err); 254 } 255 *addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh); 256 break; 257 258 case VNIC_MAC_ADDR_TYPE_FACTORY: 259 err = mac_addr_factory_reserve(vnic->vn_mch, addr_slot); 260 if (err != 0) { 261 if (err == EINVAL) 262 *diag = VNIC_IOC_DIAG_MACFACTORYSLOTINVALID; 263 if (err == EBUSY) 264 *diag = VNIC_IOC_DIAG_MACFACTORYSLOTUSED; 265 if (err == ENOSPC) 266 *diag = VNIC_IOC_DIAG_MACFACTORYSLOTALLUSED; 267 return (err); 268 } 269 270 mac_addr_factory_value(vnic->vn_lower_mh, *addr_slot, 271 mac_addr_arg, &addr_len, NULL, NULL); 272 *addr_len_ptr_arg = addr_len; 273 break; 274 275 case VNIC_MAC_ADDR_TYPE_AUTO: 276 /* first try to allocate a factory MAC address */ 277 err = mac_addr_factory_reserve(vnic->vn_mch, addr_slot); 278 if (err == 0) { 279 mac_addr_factory_value(vnic->vn_lower_mh, *addr_slot, 280 mac_addr_arg, &addr_len, NULL, NULL); 281 vnic_addr_type = VNIC_MAC_ADDR_TYPE_FACTORY; 282 *addr_len_ptr_arg = addr_len; 283 break; 284 } 285 286 /* 287 * Allocating a factory MAC address failed, generate a 288 * random MAC address instead. 289 */ 290 if ((err = mac_addr_random(vnic->vn_mch, 291 prefix_len, mac_addr_arg, &mac_diag)) != 0) { 292 *diag = vnic_mac2vnic_diag(mac_diag); 293 return (err); 294 } 295 *addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh); 296 vnic_addr_type = VNIC_MAC_ADDR_TYPE_RANDOM; 297 break; 298 case VNIC_MAC_ADDR_TYPE_PRIMARY: 299 /* 300 * We get the address here since we copy it in the 301 * vnic's vn_addr. 302 * We can't ask for hardware resources since we 303 * don't currently support hardware classification 304 * for these MAC clients. 305 */ 306 if (req_hwgrp_flag) { 307 *diag = VNIC_IOC_DIAG_NO_HWRINGS; 308 return (ENOTSUP); 309 } 310 mac_unicast_primary_get(vnic->vn_lower_mh, mac_addr_arg); 311 *addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh); 312 mac_flags |= MAC_UNICAST_VNIC_PRIMARY; 313 break; 314 } 315 316 vnic->vn_addr_type = vnic_addr_type; 317 318 err = mac_unicast_add(vnic->vn_mch, mac_addr_arg, mac_flags, 319 &vnic->vn_muh, vid, &mac_diag); 320 if (err != 0) { 321 if (vnic_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY) { 322 /* release factory MAC address */ 323 mac_addr_factory_release(vnic->vn_mch, *addr_slot); 324 } 325 *diag = vnic_mac2vnic_diag(mac_diag); 326 } 327 328 return (err); 329 } 330 331 /* 332 * Create a new VNIC upon request from administrator. 333 * Returns 0 on success, an errno on failure. 334 */ 335 /* ARGSUSED */ 336 int 337 vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid, 338 vnic_mac_addr_type_t *vnic_addr_type, int *mac_len, uchar_t *mac_addr, 339 int *mac_slot, uint_t mac_prefix_len, uint16_t vid, vrid_t vrid, 340 int af, mac_resource_props_t *mrp, uint32_t flags, vnic_ioc_diag_t *diag, 341 cred_t *credp) 342 { 343 vnic_t *vnic; 344 mac_register_t *mac; 345 int err; 346 boolean_t is_anchor = ((flags & VNIC_IOC_CREATE_ANCHOR) != 0); 347 char vnic_name[MAXNAMELEN]; 348 const mac_info_t *minfop; 349 uint32_t req_hwgrp_flag = B_FALSE; 350 351 *diag = VNIC_IOC_DIAG_NONE; 352 353 rw_enter(&vnic_lock, RW_WRITER); 354 355 /* does a VNIC with the same id already exist? */ 356 err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id), 357 (mod_hash_val_t *)&vnic); 358 if (err == 0) { 359 rw_exit(&vnic_lock); 360 return (EEXIST); 361 } 362 363 vnic = kmem_cache_alloc(vnic_cache, KM_NOSLEEP); 364 if (vnic == NULL) { 365 rw_exit(&vnic_lock); 366 return (ENOMEM); 367 } 368 369 bzero(vnic, sizeof (*vnic)); 370 371 vnic->vn_id = vnic_id; 372 vnic->vn_link_id = linkid; 373 vnic->vn_vrid = vrid; 374 vnic->vn_af = af; 375 376 if (!is_anchor) { 377 if (linkid == DATALINK_INVALID_LINKID) { 378 err = EINVAL; 379 goto bail; 380 } 381 382 /* 383 * Open the lower MAC and assign its initial bandwidth and 384 * MAC address. We do this here during VNIC creation and 385 * do not wait until the upper MAC client open so that we 386 * can validate the VNIC creation parameters (bandwidth, 387 * MAC address, etc) and reserve a factory MAC address if 388 * one was requested. 389 */ 390 err = mac_open_by_linkid(linkid, &vnic->vn_lower_mh); 391 if (err != 0) 392 goto bail; 393 394 /* 395 * VNIC(vlan) over VNICs(vlans) is not supported. 396 */ 397 if (mac_is_vnic(vnic->vn_lower_mh)) { 398 err = EINVAL; 399 goto bail; 400 } 401 402 /* only ethernet support for now */ 403 minfop = mac_info(vnic->vn_lower_mh); 404 if (minfop->mi_nativemedia != DL_ETHER) { 405 err = ENOTSUP; 406 goto bail; 407 } 408 409 (void) dls_mgmt_get_linkinfo(vnic_id, vnic_name, NULL, NULL, 410 NULL); 411 err = mac_client_open(vnic->vn_lower_mh, &vnic->vn_mch, 412 vnic_name, MAC_OPEN_FLAGS_IS_VNIC); 413 if (err != 0) 414 goto bail; 415 416 /* assign a MAC address to the VNIC */ 417 418 err = vnic_unicast_add(vnic, *vnic_addr_type, mac_slot, 419 mac_prefix_len, mac_len, mac_addr, flags, diag, vid, 420 req_hwgrp_flag); 421 if (err != 0) { 422 vnic->vn_muh = NULL; 423 if (diag != NULL && req_hwgrp_flag) 424 *diag = VNIC_IOC_DIAG_NO_HWRINGS; 425 goto bail; 426 } 427 428 /* register to receive notification from underlying MAC */ 429 vnic->vn_mnh = mac_notify_add(vnic->vn_lower_mh, vnic_notify_cb, 430 vnic); 431 432 *vnic_addr_type = vnic->vn_addr_type; 433 vnic->vn_addr_len = *mac_len; 434 vnic->vn_vid = vid; 435 436 bcopy(mac_addr, vnic->vn_addr, vnic->vn_addr_len); 437 438 if (vnic->vn_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY) 439 vnic->vn_slot_id = *mac_slot; 440 441 /* 442 * Set the initial VNIC capabilities. If the VNIC is created 443 * over MACs which does not support nactive vlan, disable 444 * VNIC's hardware checksum capability if its VID is not 0, 445 * since the underlying MAC would get the hardware checksum 446 * offset wrong in case of VLAN packets. 447 */ 448 if (vid == 0 || !mac_capab_get(vnic->vn_lower_mh, 449 MAC_CAPAB_NO_NATIVEVLAN, NULL)) { 450 if (!mac_capab_get(vnic->vn_lower_mh, MAC_CAPAB_HCKSUM, 451 &vnic->vn_hcksum_txflags)) 452 vnic->vn_hcksum_txflags = 0; 453 } else { 454 vnic->vn_hcksum_txflags = 0; 455 } 456 } 457 458 /* register with the MAC module */ 459 if ((mac = mac_alloc(MAC_VERSION)) == NULL) 460 goto bail; 461 462 mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 463 mac->m_driver = vnic; 464 mac->m_dip = vnic_get_dip(); 465 mac->m_instance = (uint_t)-1; 466 mac->m_src_addr = vnic->vn_addr; 467 mac->m_callbacks = &vnic_m_callbacks; 468 469 if (!is_anchor) { 470 /* 471 * If this is a VNIC based VLAN, then we check for the 472 * margin unless it has been created with the force 473 * flag. If we are configuring a VLAN over an etherstub, 474 * we don't check the margin even if force is not set. 475 */ 476 if (vid == 0 || (flags & VNIC_IOC_CREATE_FORCE) != 0) { 477 if (vid != VLAN_ID_NONE) 478 vnic->vn_force = B_TRUE; 479 /* 480 * As the current margin size of the underlying mac is 481 * used to determine the margin size of the VNIC 482 * itself, request the underlying mac not to change 483 * to a smaller margin size. 484 */ 485 err = mac_margin_add(vnic->vn_lower_mh, 486 &vnic->vn_margin, B_TRUE); 487 ASSERT(err == 0); 488 } else { 489 vnic->vn_margin = VLAN_TAGSZ; 490 err = mac_margin_add(vnic->vn_lower_mh, 491 &vnic->vn_margin, B_FALSE); 492 if (err != 0) { 493 mac_free(mac); 494 if (diag != NULL) 495 *diag = VNIC_IOC_DIAG_MACMARGIN_INVALID; 496 goto bail; 497 } 498 } 499 500 mac_sdu_get(vnic->vn_lower_mh, &mac->m_min_sdu, 501 &mac->m_max_sdu); 502 err = mac_mtu_add(vnic->vn_lower_mh, &mac->m_max_sdu, B_FALSE); 503 if (err != 0) { 504 VERIFY(mac_margin_remove(vnic->vn_lower_mh, 505 vnic->vn_margin) == 0); 506 mac_free(mac); 507 if (diag != NULL) 508 *diag = VNIC_IOC_DIAG_MACMTU_INVALID; 509 goto bail; 510 } 511 vnic->vn_mtu = mac->m_max_sdu; 512 } else { 513 vnic->vn_margin = VLAN_TAGSZ; 514 mac->m_min_sdu = 1; 515 mac->m_max_sdu = ANCHOR_VNIC_MAX_MTU; 516 vnic->vn_mtu = ANCHOR_VNIC_MAX_MTU; 517 } 518 519 mac->m_margin = vnic->vn_margin; 520 521 err = mac_register(mac, &vnic->vn_mh); 522 mac_free(mac); 523 if (err != 0) { 524 if (!is_anchor) { 525 VERIFY(mac_mtu_remove(vnic->vn_lower_mh, 526 vnic->vn_mtu) == 0); 527 VERIFY(mac_margin_remove(vnic->vn_lower_mh, 528 vnic->vn_margin) == 0); 529 } 530 goto bail; 531 } 532 533 /* Set the VNIC's MAC in the client */ 534 if (!is_anchor) { 535 mac_set_upper_mac(vnic->vn_mch, vnic->vn_mh, mrp); 536 537 if (mrp != NULL) { 538 if ((mrp->mrp_mask & MRP_RX_RINGS) != 0 || 539 (mrp->mrp_mask & MRP_TX_RINGS) != 0) { 540 req_hwgrp_flag = B_TRUE; 541 } 542 err = mac_client_set_resources(vnic->vn_mch, mrp); 543 if (err != 0) { 544 VERIFY(mac_mtu_remove(vnic->vn_lower_mh, 545 vnic->vn_mtu) == 0); 546 VERIFY(mac_margin_remove(vnic->vn_lower_mh, 547 vnic->vn_margin) == 0); 548 (void) mac_unregister(vnic->vn_mh); 549 goto bail; 550 } 551 } 552 } 553 554 err = dls_devnet_create(vnic->vn_mh, vnic->vn_id, crgetzoneid(credp)); 555 if (err != 0) { 556 VERIFY(is_anchor || mac_margin_remove(vnic->vn_lower_mh, 557 vnic->vn_margin) == 0); 558 if (!is_anchor) { 559 VERIFY(mac_mtu_remove(vnic->vn_lower_mh, 560 vnic->vn_mtu) == 0); 561 VERIFY(mac_margin_remove(vnic->vn_lower_mh, 562 vnic->vn_margin) == 0); 563 } 564 (void) mac_unregister(vnic->vn_mh); 565 goto bail; 566 } 567 568 /* add new VNIC to hash table */ 569 err = mod_hash_insert(vnic_hash, VNIC_HASH_KEY(vnic_id), 570 (mod_hash_val_t)vnic); 571 ASSERT(err == 0); 572 vnic_count++; 573 574 /* 575 * Now that we've enabled this VNIC, we should go through and update the 576 * link state by setting it to our parents. 577 */ 578 vnic->vn_enabled = B_TRUE; 579 580 if (is_anchor) { 581 mac_link_update(vnic->vn_mh, LINK_STATE_UP); 582 } else { 583 mac_link_update(vnic->vn_mh, 584 mac_client_stat_get(vnic->vn_mch, MAC_STAT_LINK_STATE)); 585 } 586 587 rw_exit(&vnic_lock); 588 589 return (0); 590 591 bail: 592 rw_exit(&vnic_lock); 593 if (!is_anchor) { 594 if (vnic->vn_mnh != NULL) 595 (void) mac_notify_remove(vnic->vn_mnh, B_TRUE); 596 if (vnic->vn_muh != NULL) 597 (void) mac_unicast_remove(vnic->vn_mch, vnic->vn_muh); 598 if (vnic->vn_mch != NULL) 599 mac_client_close(vnic->vn_mch, MAC_CLOSE_FLAGS_IS_VNIC); 600 if (vnic->vn_lower_mh != NULL) 601 mac_close(vnic->vn_lower_mh); 602 } 603 604 kmem_cache_free(vnic_cache, vnic); 605 return (err); 606 } 607 608 /* 609 * Modify the properties of an existing VNIC. 610 */ 611 /* ARGSUSED */ 612 int 613 vnic_dev_modify(datalink_id_t vnic_id, uint_t modify_mask, 614 vnic_mac_addr_type_t mac_addr_type, uint_t mac_len, uchar_t *mac_addr, 615 uint_t mac_slot, mac_resource_props_t *mrp) 616 { 617 vnic_t *vnic = NULL; 618 619 rw_enter(&vnic_lock, RW_WRITER); 620 621 if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id), 622 (mod_hash_val_t *)&vnic) != 0) { 623 rw_exit(&vnic_lock); 624 return (ENOENT); 625 } 626 627 rw_exit(&vnic_lock); 628 629 return (0); 630 } 631 632 /* ARGSUSED */ 633 int 634 vnic_dev_delete(datalink_id_t vnic_id, uint32_t flags, cred_t *credp) 635 { 636 vnic_t *vnic = NULL; 637 mod_hash_val_t val; 638 datalink_id_t tmpid; 639 int rc; 640 641 rw_enter(&vnic_lock, RW_WRITER); 642 643 if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id), 644 (mod_hash_val_t *)&vnic) != 0) { 645 rw_exit(&vnic_lock); 646 return (ENOENT); 647 } 648 649 if ((rc = dls_devnet_destroy(vnic->vn_mh, &tmpid, B_TRUE)) != 0) { 650 rw_exit(&vnic_lock); 651 return (rc); 652 } 653 654 ASSERT(vnic_id == tmpid); 655 656 /* 657 * We cannot unregister the MAC yet. Unregistering would 658 * free up mac_impl_t which should not happen at this time. 659 * So disable mac_impl_t by calling mac_disable(). This will prevent 660 * any new claims on mac_impl_t. 661 */ 662 if ((rc = mac_disable(vnic->vn_mh)) != 0) { 663 (void) dls_devnet_create(vnic->vn_mh, vnic_id, 664 crgetzoneid(credp)); 665 rw_exit(&vnic_lock); 666 return (rc); 667 } 668 669 vnic_cleanup_secondary_macs(vnic, vnic->vn_nhandles); 670 671 vnic->vn_enabled = B_FALSE; 672 (void) mod_hash_remove(vnic_hash, VNIC_HASH_KEY(vnic_id), &val); 673 ASSERT(vnic == (vnic_t *)val); 674 vnic_count--; 675 rw_exit(&vnic_lock); 676 677 /* 678 * XXX-nicolas shouldn't have a void cast here, if it's 679 * expected that the function will never fail, then we should 680 * have an ASSERT(). 681 */ 682 (void) mac_unregister(vnic->vn_mh); 683 684 if (vnic->vn_lower_mh != NULL) { 685 /* 686 * Check if MAC address for the vnic was obtained from the 687 * factory MAC addresses. If yes, release it. 688 */ 689 if (vnic->vn_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY) { 690 (void) mac_addr_factory_release(vnic->vn_mch, 691 vnic->vn_slot_id); 692 } 693 (void) mac_margin_remove(vnic->vn_lower_mh, vnic->vn_margin); 694 (void) mac_mtu_remove(vnic->vn_lower_mh, vnic->vn_mtu); 695 (void) mac_notify_remove(vnic->vn_mnh, B_TRUE); 696 (void) mac_unicast_remove(vnic->vn_mch, vnic->vn_muh); 697 mac_client_close(vnic->vn_mch, MAC_CLOSE_FLAGS_IS_VNIC); 698 mac_close(vnic->vn_lower_mh); 699 } 700 701 kmem_cache_free(vnic_cache, vnic); 702 return (0); 703 } 704 705 /* ARGSUSED */ 706 mblk_t * 707 vnic_m_tx(void *arg, mblk_t *mp_chain) 708 { 709 /* 710 * This function could be invoked for an anchor VNIC when sending 711 * broadcast and multicast packets, and unicast packets which did 712 * not match any local known destination. 713 */ 714 freemsgchain(mp_chain); 715 return (NULL); 716 } 717 718 /*ARGSUSED*/ 719 static void 720 vnic_m_ioctl(void *arg, queue_t *q, mblk_t *mp) 721 { 722 miocnak(q, mp, 0, ENOTSUP); 723 } 724 725 /* 726 * This entry point cannot be passed-through, since it is invoked 727 * for the per-VNIC kstats which must be exported independently 728 * of the existence of VNIC MAC clients. 729 */ 730 static int 731 vnic_m_stat(void *arg, uint_t stat, uint64_t *val) 732 { 733 vnic_t *vnic = arg; 734 int rval = 0; 735 736 if (vnic->vn_lower_mh == NULL) { 737 /* 738 * It's an anchor VNIC, which does not have any 739 * statistics in itself. 740 */ 741 return (ENOTSUP); 742 } 743 744 /* 745 * ENOTSUP must be reported for unsupported stats, the VNIC 746 * driver reports a subset of the stats that would 747 * be returned by a real piece of hardware. 748 */ 749 750 switch (stat) { 751 case MAC_STAT_LINK_STATE: 752 case MAC_STAT_LINK_UP: 753 case MAC_STAT_PROMISC: 754 case MAC_STAT_IFSPEED: 755 case MAC_STAT_MULTIRCV: 756 case MAC_STAT_MULTIXMT: 757 case MAC_STAT_BRDCSTRCV: 758 case MAC_STAT_BRDCSTXMT: 759 case MAC_STAT_OPACKETS: 760 case MAC_STAT_OBYTES: 761 case MAC_STAT_IERRORS: 762 case MAC_STAT_OERRORS: 763 case MAC_STAT_RBYTES: 764 case MAC_STAT_IPACKETS: 765 *val = mac_client_stat_get(vnic->vn_mch, stat); 766 break; 767 default: 768 rval = ENOTSUP; 769 } 770 771 return (rval); 772 } 773 774 /* 775 * Invoked by the upper MAC to retrieve the lower MAC client handle 776 * corresponding to a VNIC. A pointer to this function is obtained 777 * by the upper MAC via capability query. 778 * 779 * XXX-nicolas Note: this currently causes all VNIC MAC clients to 780 * receive the same MAC client handle for the same VNIC. This is ok 781 * as long as we have only one VNIC MAC client which sends and 782 * receives data, but we don't currently enforce this at the MAC layer. 783 */ 784 static void * 785 vnic_mac_client_handle(void *vnic_arg) 786 { 787 vnic_t *vnic = vnic_arg; 788 789 return (vnic->vn_mch); 790 } 791 792 /* 793 * Invoked when updating the primary MAC so that the secondary MACs are 794 * kept in sync. 795 */ 796 static void 797 vnic_mac_secondary_update(void *vnic_arg) 798 { 799 vnic_t *vn = vnic_arg; 800 int i; 801 802 for (i = 1; i <= vn->vn_nhandles; i++) { 803 mac_secondary_dup(vn->vn_mc_handles[0], vn->vn_mc_handles[i]); 804 } 805 } 806 807 /* 808 * Return information about the specified capability. 809 */ 810 /* ARGSUSED */ 811 static boolean_t 812 vnic_m_capab_get(void *arg, mac_capab_t cap, void *cap_data) 813 { 814 vnic_t *vnic = arg; 815 816 switch (cap) { 817 case MAC_CAPAB_HCKSUM: { 818 uint32_t *hcksum_txflags = cap_data; 819 820 *hcksum_txflags = vnic->vn_hcksum_txflags & 821 (HCKSUM_INET_FULL_V4 | HCKSUM_IPHDRCKSUM | 822 HCKSUM_INET_PARTIAL); 823 break; 824 } 825 case MAC_CAPAB_VNIC: { 826 mac_capab_vnic_t *vnic_capab = cap_data; 827 828 if (vnic->vn_lower_mh == NULL) { 829 /* 830 * It's an anchor VNIC, we don't have an underlying 831 * NIC and MAC client handle. 832 */ 833 return (B_FALSE); 834 } 835 836 if (vnic_capab != NULL) { 837 vnic_capab->mcv_arg = vnic; 838 vnic_capab->mcv_mac_client_handle = 839 vnic_mac_client_handle; 840 vnic_capab->mcv_mac_secondary_update = 841 vnic_mac_secondary_update; 842 } 843 break; 844 } 845 case MAC_CAPAB_ANCHOR_VNIC: { 846 /* since it's an anchor VNIC we don't have lower mac handle */ 847 if (vnic->vn_lower_mh == NULL) { 848 ASSERT(vnic->vn_link_id == 0); 849 return (B_TRUE); 850 } 851 return (B_FALSE); 852 } 853 case MAC_CAPAB_NO_NATIVEVLAN: 854 return (B_FALSE); 855 case MAC_CAPAB_NO_ZCOPY: 856 return (B_TRUE); 857 case MAC_CAPAB_VRRP: { 858 mac_capab_vrrp_t *vrrp_capab = cap_data; 859 860 if (vnic->vn_vrid != 0) { 861 if (vrrp_capab != NULL) 862 vrrp_capab->mcv_af = vnic->vn_af; 863 return (B_TRUE); 864 } 865 return (B_FALSE); 866 } 867 default: 868 return (B_FALSE); 869 } 870 return (B_TRUE); 871 } 872 873 /* ARGSUSED */ 874 static int 875 vnic_m_start(void *arg) 876 { 877 return (0); 878 } 879 880 /* ARGSUSED */ 881 static void 882 vnic_m_stop(void *arg) 883 { 884 } 885 886 /* ARGSUSED */ 887 static int 888 vnic_m_promisc(void *arg, boolean_t on) 889 { 890 return (0); 891 } 892 893 /* ARGSUSED */ 894 static int 895 vnic_m_multicst(void *arg, boolean_t add, const uint8_t *addrp) 896 { 897 return (0); 898 } 899 900 static int 901 vnic_m_unicst(void *arg, const uint8_t *macaddr) 902 { 903 vnic_t *vnic = arg; 904 905 return (mac_vnic_unicast_set(vnic->vn_mch, macaddr)); 906 } 907 908 static void 909 vnic_cleanup_secondary_macs(vnic_t *vn, int cnt) 910 { 911 int i; 912 913 /* Remove existing secondaries (primary is at 0) */ 914 for (i = 1; i <= cnt; i++) { 915 mac_rx_clear(vn->vn_mc_handles[i]); 916 917 /* unicast handle might not have been set yet */ 918 if (vn->vn_mu_handles[i] != NULL) 919 (void) mac_unicast_remove(vn->vn_mc_handles[i], 920 vn->vn_mu_handles[i]); 921 922 mac_secondary_cleanup(vn->vn_mc_handles[i]); 923 924 mac_client_close(vn->vn_mc_handles[i], MAC_CLOSE_FLAGS_IS_VNIC); 925 926 vn->vn_mu_handles[i] = NULL; 927 vn->vn_mc_handles[i] = NULL; 928 } 929 930 vn->vn_nhandles = 0; 931 } 932 933 /* 934 * Setup secondary MAC addresses on the vnic. Due to limitations in the mac 935 * code, each mac address must be associated with a mac_client (and the 936 * flow that goes along with the client) so we need to create those clients 937 * here. 938 */ 939 static int 940 vnic_set_secondary_macs(vnic_t *vn, mac_secondary_addr_t *msa) 941 { 942 int i, err; 943 char primary_name[MAXNAMELEN]; 944 945 /* First, remove pre-existing secondaries */ 946 ASSERT(vn->vn_nhandles < MPT_MAXMACADDR); 947 vnic_cleanup_secondary_macs(vn, vn->vn_nhandles); 948 949 if (msa->ms_addrcnt == (uint32_t)-1) 950 msa->ms_addrcnt = 0; 951 952 vn->vn_nhandles = msa->ms_addrcnt; 953 954 (void) dls_mgmt_get_linkinfo(vn->vn_id, primary_name, NULL, NULL, NULL); 955 956 /* 957 * Now add the new secondary MACs 958 * Recall that the primary MAC address is the first element. 959 * The secondary clients are named after the primary with their 960 * index to distinguish them. 961 */ 962 for (i = 1; i <= vn->vn_nhandles; i++) { 963 uint8_t *addr; 964 mac_diag_t mac_diag; 965 char secondary_name[MAXNAMELEN]; 966 967 (void) snprintf(secondary_name, sizeof (secondary_name), 968 "%s%02d", primary_name, i); 969 970 err = mac_client_open(vn->vn_lower_mh, &vn->vn_mc_handles[i], 971 secondary_name, MAC_OPEN_FLAGS_IS_VNIC); 972 if (err != 0) { 973 /* Remove any that we successfully added */ 974 vnic_cleanup_secondary_macs(vn, --i); 975 return (err); 976 } 977 978 /* 979 * Assign a MAC address to the VNIC 980 * 981 * Normally this would be done with vnic_unicast_add but since 982 * we know these are fixed adddresses, and since we need to 983 * save this in the proper array slot, we bypass that function 984 * and go direct. 985 */ 986 addr = msa->ms_addrs[i - 1]; 987 err = mac_unicast_add(vn->vn_mc_handles[i], addr, 0, 988 &vn->vn_mu_handles[i], vn->vn_vid, &mac_diag); 989 if (err != 0) { 990 /* Remove any that we successfully added */ 991 vnic_cleanup_secondary_macs(vn, i); 992 return (err); 993 } 994 995 /* 996 * Setup the secondary the same way as the primary (i.e. 997 * receiver function/argument (e.g. i_dls_link_rx, mac_pkt_drop, 998 * etc.), the promisc list, and the resource controls). 999 */ 1000 mac_secondary_dup(vn->vn_mc_handles[0], vn->vn_mc_handles[i]); 1001 } 1002 1003 return (0); 1004 } 1005 1006 static int 1007 vnic_get_secondary_macs(vnic_t *vn, uint_t pr_valsize, void *pr_val) 1008 { 1009 int i; 1010 mac_secondary_addr_t msa; 1011 1012 if (pr_valsize < sizeof (msa)) 1013 return (EINVAL); 1014 1015 /* Get existing addresses (primary is at 0) */ 1016 ASSERT(vn->vn_nhandles < MPT_MAXMACADDR); 1017 for (i = 1; i <= vn->vn_nhandles; i++) { 1018 ASSERT(vn->vn_mc_handles[i] != NULL); 1019 mac_unicast_secondary_get(vn->vn_mc_handles[i], 1020 msa.ms_addrs[i - 1]); 1021 } 1022 msa.ms_addrcnt = vn->vn_nhandles; 1023 1024 bcopy(&msa, pr_val, sizeof (msa)); 1025 return (0); 1026 } 1027 1028 /* 1029 * Callback functions for set/get of properties 1030 */ 1031 /*ARGSUSED*/ 1032 static int 1033 vnic_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num, 1034 uint_t pr_valsize, const void *pr_val) 1035 { 1036 int err = 0; 1037 vnic_t *vn = m_driver; 1038 1039 switch (pr_num) { 1040 case MAC_PROP_MTU: { 1041 uint32_t mtu; 1042 1043 if (pr_valsize < sizeof (mtu)) { 1044 err = EINVAL; 1045 break; 1046 } 1047 bcopy(pr_val, &mtu, sizeof (mtu)); 1048 1049 if (vn->vn_link_id == DATALINK_INVALID_LINKID) { 1050 if (mtu < ANCHOR_VNIC_MIN_MTU || 1051 mtu > ANCHOR_VNIC_MAX_MTU) { 1052 err = EINVAL; 1053 break; 1054 } 1055 } else { 1056 err = mac_mtu_add(vn->vn_lower_mh, &mtu, B_FALSE); 1057 /* 1058 * If it's not supported to set a value here, translate 1059 * that to EINVAL, so user land gets a better idea of 1060 * what went wrong. This realistically means that they 1061 * violated the output of prop info. 1062 */ 1063 if (err == ENOTSUP) 1064 err = EINVAL; 1065 if (err != 0) 1066 break; 1067 VERIFY(mac_mtu_remove(vn->vn_lower_mh, 1068 vn->vn_mtu) == 0); 1069 } 1070 vn->vn_mtu = mtu; 1071 err = mac_maxsdu_update(vn->vn_mh, mtu); 1072 break; 1073 } 1074 case MAC_PROP_SECONDARY_ADDRS: { 1075 mac_secondary_addr_t msa; 1076 1077 bcopy(pr_val, &msa, sizeof (msa)); 1078 err = vnic_set_secondary_macs(vn, &msa); 1079 break; 1080 } 1081 default: 1082 err = ENOTSUP; 1083 break; 1084 } 1085 return (err); 1086 } 1087 1088 /* ARGSUSED */ 1089 static int 1090 vnic_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, 1091 uint_t pr_valsize, void *pr_val) 1092 { 1093 vnic_t *vn = arg; 1094 int ret = 0; 1095 1096 switch (pr_num) { 1097 case MAC_PROP_SECONDARY_ADDRS: 1098 ret = vnic_get_secondary_macs(vn, pr_valsize, pr_val); 1099 break; 1100 default: 1101 ret = EINVAL; 1102 break; 1103 } 1104 1105 return (ret); 1106 } 1107 1108 /* ARGSUSED */ 1109 static void vnic_m_propinfo(void *m_driver, const char *pr_name, 1110 mac_prop_id_t pr_num, mac_prop_info_handle_t prh) 1111 { 1112 vnic_t *vn = m_driver; 1113 1114 switch (pr_num) { 1115 case MAC_PROP_MTU: 1116 if (vn->vn_link_id == DATALINK_INVALID_LINKID) { 1117 mac_prop_info_set_range_uint32(prh, 1118 ANCHOR_VNIC_MIN_MTU, ANCHOR_VNIC_MAX_MTU); 1119 } else { 1120 uint32_t max; 1121 mac_perim_handle_t mph; 1122 mac_propval_range_t range; 1123 1124 /* 1125 * The valid range for a VNIC's MTU is the minimum that 1126 * the device supports and the current value of the 1127 * device. A VNIC cannot increase the current MTU of the 1128 * device. Therefore we need to get the range from the 1129 * propinfo endpoint and current mtu from the 1130 * traditional property endpoint. 1131 */ 1132 mac_perim_enter_by_mh(vn->vn_lower_mh, &mph); 1133 if (mac_get_prop(vn->vn_lower_mh, MAC_PROP_MTU, "mtu", 1134 &max, sizeof (uint32_t)) != 0) { 1135 mac_perim_exit(mph); 1136 return; 1137 } 1138 1139 range.mpr_count = 1; 1140 if (mac_prop_info(vn->vn_lower_mh, MAC_PROP_MTU, "mtu", 1141 NULL, 0, &range, NULL) != 0) { 1142 mac_perim_exit(mph); 1143 return; 1144 } 1145 1146 mac_prop_info_set_default_uint32(prh, max); 1147 mac_prop_info_set_range_uint32(prh, 1148 range.mpr_range_uint32[0].mpur_min, max); 1149 mac_perim_exit(mph); 1150 } 1151 break; 1152 } 1153 } 1154 1155 1156 int 1157 vnic_info(vnic_info_t *info, cred_t *credp) 1158 { 1159 vnic_t *vnic; 1160 int err; 1161 1162 /* Make sure that the VNIC link is visible from the caller's zone. */ 1163 if (!dls_devnet_islinkvisible(info->vn_vnic_id, crgetzoneid(credp))) 1164 return (ENOENT); 1165 1166 rw_enter(&vnic_lock, RW_WRITER); 1167 1168 err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(info->vn_vnic_id), 1169 (mod_hash_val_t *)&vnic); 1170 if (err != 0) { 1171 rw_exit(&vnic_lock); 1172 return (ENOENT); 1173 } 1174 1175 info->vn_link_id = vnic->vn_link_id; 1176 info->vn_mac_addr_type = vnic->vn_addr_type; 1177 info->vn_mac_len = vnic->vn_addr_len; 1178 bcopy(vnic->vn_addr, info->vn_mac_addr, MAXMACADDRLEN); 1179 info->vn_mac_slot = vnic->vn_slot_id; 1180 info->vn_mac_prefix_len = 0; 1181 info->vn_vid = vnic->vn_vid; 1182 info->vn_force = vnic->vn_force; 1183 info->vn_vrid = vnic->vn_vrid; 1184 info->vn_af = vnic->vn_af; 1185 1186 bzero(&info->vn_resource_props, sizeof (mac_resource_props_t)); 1187 if (vnic->vn_mch != NULL) 1188 mac_client_get_resources(vnic->vn_mch, 1189 &info->vn_resource_props); 1190 1191 rw_exit(&vnic_lock); 1192 return (0); 1193 } 1194 1195 static void 1196 vnic_notify_cb(void *arg, mac_notify_type_t type) 1197 { 1198 vnic_t *vnic = arg; 1199 1200 /* 1201 * Do not deliver notifications if the vnic is not fully initialized 1202 * or is in process of being torn down. 1203 */ 1204 if (!vnic->vn_enabled) 1205 return; 1206 1207 switch (type) { 1208 case MAC_NOTE_UNICST: 1209 /* 1210 * Only the VLAN VNIC needs to be notified with primary MAC 1211 * address change. 1212 */ 1213 if (vnic->vn_addr_type != VNIC_MAC_ADDR_TYPE_PRIMARY) 1214 return; 1215 1216 /* the unicast MAC address value */ 1217 mac_unicast_primary_get(vnic->vn_lower_mh, vnic->vn_addr); 1218 1219 /* notify its upper layer MAC about MAC address change */ 1220 mac_unicst_update(vnic->vn_mh, (const uint8_t *)vnic->vn_addr); 1221 break; 1222 1223 case MAC_NOTE_LINK: 1224 mac_link_update(vnic->vn_mh, 1225 mac_client_stat_get(vnic->vn_mch, MAC_STAT_LINK_STATE)); 1226 break; 1227 1228 default: 1229 break; 1230 } 1231 } 1232