1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2015 Joyent, Inc. 24 * Copyright 2016 OmniTI Computer Consulting, Inc. All rights reserved. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/cred.h> 29 #include <sys/sysmacros.h> 30 #include <sys/conf.h> 31 #include <sys/cmn_err.h> 32 #include <sys/list.h> 33 #include <sys/ksynch.h> 34 #include <sys/kmem.h> 35 #include <sys/stream.h> 36 #include <sys/modctl.h> 37 #include <sys/ddi.h> 38 #include <sys/sunddi.h> 39 #include <sys/atomic.h> 40 #include <sys/stat.h> 41 #include <sys/modhash.h> 42 #include <sys/strsubr.h> 43 #include <sys/strsun.h> 44 #include <sys/dlpi.h> 45 #include <sys/mac.h> 46 #include <sys/mac_provider.h> 47 #include <sys/mac_client.h> 48 #include <sys/mac_client_priv.h> 49 #include <sys/mac_ether.h> 50 #include <sys/dls.h> 51 #include <sys/pattr.h> 52 #include <sys/time.h> 53 #include <sys/vlan.h> 54 #include <sys/vnic.h> 55 #include <sys/vnic_impl.h> 56 #include <sys/mac_impl.h> 57 #include <sys/mac_flow_impl.h> 58 #include <inet/ip_impl.h> 59 60 /* 61 * Note that for best performance, the VNIC is a passthrough design. 62 * For each VNIC corresponds a MAC client of the underlying MAC (lower MAC). 63 * This MAC client is opened by the VNIC driver at VNIC creation, 64 * and closed when the VNIC is deleted. 65 * When a MAC client of the VNIC itself opens a VNIC, the MAC layer 66 * (upper MAC) detects that the MAC being opened is a VNIC. Instead 67 * of allocating a new MAC client, it asks the VNIC driver to return 68 * the lower MAC client handle associated with the VNIC, and that handle 69 * is returned to the upper MAC client directly. This allows access 70 * by upper MAC clients of the VNIC to have direct access to the lower 71 * MAC client for the control path and data path. 72 * 73 * Due to this passthrough, some of the entry points exported by the 74 * VNIC driver are never directly invoked. These entry points include 75 * vnic_m_start, vnic_m_stop, vnic_m_promisc, vnic_m_multicst, etc. 76 * 77 * VNICs support multiple upper mac clients to enable support for 78 * multiple MAC addresses on the VNIC. When the VNIC is created the 79 * initial mac client is the primary upper mac. Any additional mac 80 * clients are secondary macs. 81 */ 82 83 static int vnic_m_start(void *); 84 static void vnic_m_stop(void *); 85 static int vnic_m_promisc(void *, boolean_t); 86 static int vnic_m_multicst(void *, boolean_t, const uint8_t *); 87 static int vnic_m_unicst(void *, const uint8_t *); 88 static int vnic_m_stat(void *, uint_t, uint64_t *); 89 static void vnic_m_ioctl(void *, queue_t *, mblk_t *); 90 static int vnic_m_setprop(void *, const char *, mac_prop_id_t, uint_t, 91 const void *); 92 static int vnic_m_getprop(void *, const char *, mac_prop_id_t, uint_t, void *); 93 static void vnic_m_propinfo(void *, const char *, mac_prop_id_t, 94 mac_prop_info_handle_t); 95 static mblk_t *vnic_m_tx(void *, mblk_t *); 96 static boolean_t vnic_m_capab_get(void *, mac_capab_t, void *); 97 static void vnic_notify_cb(void *, mac_notify_type_t); 98 static void vnic_cleanup_secondary_macs(vnic_t *, int); 99 100 static kmem_cache_t *vnic_cache; 101 static krwlock_t vnic_lock; 102 static uint_t vnic_count; 103 104 #define ANCHOR_VNIC_MIN_MTU 576 105 #define ANCHOR_VNIC_MAX_MTU 9000 106 107 /* hash of VNICs (vnic_t's), keyed by VNIC id */ 108 static mod_hash_t *vnic_hash; 109 #define VNIC_HASHSZ 64 110 #define VNIC_HASH_KEY(vnic_id) ((mod_hash_key_t)(uintptr_t)vnic_id) 111 112 #define VNIC_M_CALLBACK_FLAGS \ 113 (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO) 114 115 static mac_callbacks_t vnic_m_callbacks = { 116 VNIC_M_CALLBACK_FLAGS, 117 vnic_m_stat, 118 vnic_m_start, 119 vnic_m_stop, 120 vnic_m_promisc, 121 vnic_m_multicst, 122 vnic_m_unicst, 123 vnic_m_tx, 124 NULL, 125 vnic_m_ioctl, 126 vnic_m_capab_get, 127 NULL, 128 NULL, 129 vnic_m_setprop, 130 vnic_m_getprop, 131 vnic_m_propinfo 132 }; 133 134 void 135 vnic_dev_init(void) 136 { 137 vnic_cache = kmem_cache_create("vnic_cache", 138 sizeof (vnic_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 139 140 vnic_hash = mod_hash_create_idhash("vnic_hash", 141 VNIC_HASHSZ, mod_hash_null_valdtor); 142 143 rw_init(&vnic_lock, NULL, RW_DEFAULT, NULL); 144 145 vnic_count = 0; 146 } 147 148 void 149 vnic_dev_fini(void) 150 { 151 ASSERT(vnic_count == 0); 152 153 rw_destroy(&vnic_lock); 154 mod_hash_destroy_idhash(vnic_hash); 155 kmem_cache_destroy(vnic_cache); 156 } 157 158 uint_t 159 vnic_dev_count(void) 160 { 161 return (vnic_count); 162 } 163 164 static vnic_ioc_diag_t 165 vnic_mac2vnic_diag(mac_diag_t diag) 166 { 167 switch (diag) { 168 case MAC_DIAG_MACADDR_NIC: 169 return (VNIC_IOC_DIAG_MACADDR_NIC); 170 case MAC_DIAG_MACADDR_INUSE: 171 return (VNIC_IOC_DIAG_MACADDR_INUSE); 172 case MAC_DIAG_MACADDR_INVALID: 173 return (VNIC_IOC_DIAG_MACADDR_INVALID); 174 case MAC_DIAG_MACADDRLEN_INVALID: 175 return (VNIC_IOC_DIAG_MACADDRLEN_INVALID); 176 case MAC_DIAG_MACFACTORYSLOTINVALID: 177 return (VNIC_IOC_DIAG_MACFACTORYSLOTINVALID); 178 case MAC_DIAG_MACFACTORYSLOTUSED: 179 return (VNIC_IOC_DIAG_MACFACTORYSLOTUSED); 180 case MAC_DIAG_MACFACTORYSLOTALLUSED: 181 return (VNIC_IOC_DIAG_MACFACTORYSLOTALLUSED); 182 case MAC_DIAG_MACFACTORYNOTSUP: 183 return (VNIC_IOC_DIAG_MACFACTORYNOTSUP); 184 case MAC_DIAG_MACPREFIX_INVALID: 185 return (VNIC_IOC_DIAG_MACPREFIX_INVALID); 186 case MAC_DIAG_MACPREFIXLEN_INVALID: 187 return (VNIC_IOC_DIAG_MACPREFIXLEN_INVALID); 188 case MAC_DIAG_MACNO_HWRINGS: 189 return (VNIC_IOC_DIAG_NO_HWRINGS); 190 default: 191 return (VNIC_IOC_DIAG_NONE); 192 } 193 } 194 195 static int 196 vnic_unicast_add(vnic_t *vnic, vnic_mac_addr_type_t vnic_addr_type, 197 int *addr_slot, uint_t prefix_len, int *addr_len_ptr_arg, 198 uint8_t *mac_addr_arg, uint16_t flags, vnic_ioc_diag_t *diag, 199 uint16_t vid, boolean_t req_hwgrp_flag) 200 { 201 mac_diag_t mac_diag; 202 uint16_t mac_flags = 0; 203 int err; 204 uint_t addr_len; 205 206 if (flags & VNIC_IOC_CREATE_NODUPCHECK) 207 mac_flags |= MAC_UNICAST_NODUPCHECK; 208 209 switch (vnic_addr_type) { 210 case VNIC_MAC_ADDR_TYPE_FIXED: 211 case VNIC_MAC_ADDR_TYPE_VRID: 212 /* 213 * The MAC address value to assign to the VNIC 214 * is already provided in mac_addr_arg. addr_len_ptr_arg 215 * already contains the MAC address length. 216 */ 217 break; 218 219 case VNIC_MAC_ADDR_TYPE_RANDOM: 220 /* 221 * Random MAC address. There are two sub-cases: 222 * 223 * 1 - If mac_len == 0, a new MAC address is generated. 224 * The length of the MAC address to generated depends 225 * on the type of MAC used. The prefix to use for the MAC 226 * address is stored in the most significant bytes 227 * of the mac_addr argument, and its length is specified 228 * by the mac_prefix_len argument. This prefix can 229 * correspond to a IEEE OUI in the case of Ethernet, 230 * for example. 231 * 232 * 2 - If mac_len > 0, the address was already picked 233 * randomly, and is now passed back during VNIC 234 * re-creation. The mac_addr argument contains the MAC 235 * address that was generated. We distinguish this 236 * case from the fixed MAC address case, since we 237 * want the user consumers to know, when they query 238 * the list of VNICs, that a VNIC was assigned a 239 * random MAC address vs assigned a fixed address 240 * specified by the user. 241 */ 242 243 /* 244 * If it's a pre-generated address, we're done. mac_addr_arg 245 * and addr_len_ptr_arg already contain the MAC address 246 * value and length. 247 */ 248 if (*addr_len_ptr_arg > 0) 249 break; 250 251 /* generate a new random MAC address */ 252 if ((err = mac_addr_random(vnic->vn_mch, 253 prefix_len, mac_addr_arg, &mac_diag)) != 0) { 254 *diag = vnic_mac2vnic_diag(mac_diag); 255 return (err); 256 } 257 *addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh); 258 break; 259 260 case VNIC_MAC_ADDR_TYPE_FACTORY: 261 err = mac_addr_factory_reserve(vnic->vn_mch, addr_slot); 262 if (err != 0) { 263 if (err == EINVAL) 264 *diag = VNIC_IOC_DIAG_MACFACTORYSLOTINVALID; 265 if (err == EBUSY) 266 *diag = VNIC_IOC_DIAG_MACFACTORYSLOTUSED; 267 if (err == ENOSPC) 268 *diag = VNIC_IOC_DIAG_MACFACTORYSLOTALLUSED; 269 return (err); 270 } 271 272 mac_addr_factory_value(vnic->vn_lower_mh, *addr_slot, 273 mac_addr_arg, &addr_len, NULL, NULL); 274 *addr_len_ptr_arg = addr_len; 275 break; 276 277 case VNIC_MAC_ADDR_TYPE_AUTO: 278 /* first try to allocate a factory MAC address */ 279 err = mac_addr_factory_reserve(vnic->vn_mch, addr_slot); 280 if (err == 0) { 281 mac_addr_factory_value(vnic->vn_lower_mh, *addr_slot, 282 mac_addr_arg, &addr_len, NULL, NULL); 283 vnic_addr_type = VNIC_MAC_ADDR_TYPE_FACTORY; 284 *addr_len_ptr_arg = addr_len; 285 break; 286 } 287 288 /* 289 * Allocating a factory MAC address failed, generate a 290 * random MAC address instead. 291 */ 292 if ((err = mac_addr_random(vnic->vn_mch, 293 prefix_len, mac_addr_arg, &mac_diag)) != 0) { 294 *diag = vnic_mac2vnic_diag(mac_diag); 295 return (err); 296 } 297 *addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh); 298 vnic_addr_type = VNIC_MAC_ADDR_TYPE_RANDOM; 299 break; 300 case VNIC_MAC_ADDR_TYPE_PRIMARY: 301 /* 302 * We get the address here since we copy it in the 303 * vnic's vn_addr. 304 * We can't ask for hardware resources since we 305 * don't currently support hardware classification 306 * for these MAC clients. 307 */ 308 if (req_hwgrp_flag) { 309 *diag = VNIC_IOC_DIAG_NO_HWRINGS; 310 return (ENOTSUP); 311 } 312 mac_unicast_primary_get(vnic->vn_lower_mh, mac_addr_arg); 313 *addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh); 314 mac_flags |= MAC_UNICAST_VNIC_PRIMARY; 315 break; 316 } 317 318 vnic->vn_addr_type = vnic_addr_type; 319 320 err = mac_unicast_add(vnic->vn_mch, mac_addr_arg, mac_flags, 321 &vnic->vn_muh, vid, &mac_diag); 322 if (err != 0) { 323 if (vnic_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY) { 324 /* release factory MAC address */ 325 mac_addr_factory_release(vnic->vn_mch, *addr_slot); 326 } 327 *diag = vnic_mac2vnic_diag(mac_diag); 328 } 329 330 return (err); 331 } 332 333 /* 334 * Create a new VNIC upon request from administrator. 335 * Returns 0 on success, an errno on failure. 336 */ 337 /* ARGSUSED */ 338 int 339 vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid, 340 vnic_mac_addr_type_t *vnic_addr_type, int *mac_len, uchar_t *mac_addr, 341 int *mac_slot, uint_t mac_prefix_len, uint16_t vid, vrid_t vrid, 342 int af, mac_resource_props_t *mrp, uint32_t flags, vnic_ioc_diag_t *diag, 343 cred_t *credp) 344 { 345 vnic_t *vnic; 346 mac_register_t *mac; 347 int err; 348 boolean_t is_anchor = ((flags & VNIC_IOC_CREATE_ANCHOR) != 0); 349 char vnic_name[MAXNAMELEN]; 350 const mac_info_t *minfop; 351 uint32_t req_hwgrp_flag = B_FALSE; 352 353 *diag = VNIC_IOC_DIAG_NONE; 354 355 rw_enter(&vnic_lock, RW_WRITER); 356 357 /* does a VNIC with the same id already exist? */ 358 err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id), 359 (mod_hash_val_t *)&vnic); 360 if (err == 0) { 361 rw_exit(&vnic_lock); 362 return (EEXIST); 363 } 364 365 vnic = kmem_cache_alloc(vnic_cache, KM_NOSLEEP); 366 if (vnic == NULL) { 367 rw_exit(&vnic_lock); 368 return (ENOMEM); 369 } 370 371 bzero(vnic, sizeof (*vnic)); 372 373 vnic->vn_id = vnic_id; 374 vnic->vn_link_id = linkid; 375 vnic->vn_vrid = vrid; 376 vnic->vn_af = af; 377 378 if (!is_anchor) { 379 if (linkid == DATALINK_INVALID_LINKID) { 380 err = EINVAL; 381 goto bail; 382 } 383 384 /* 385 * Open the lower MAC and assign its initial bandwidth and 386 * MAC address. We do this here during VNIC creation and 387 * do not wait until the upper MAC client open so that we 388 * can validate the VNIC creation parameters (bandwidth, 389 * MAC address, etc) and reserve a factory MAC address if 390 * one was requested. 391 */ 392 err = mac_open_by_linkid(linkid, &vnic->vn_lower_mh); 393 if (err != 0) 394 goto bail; 395 396 /* 397 * VNIC(vlan) over VNICs(vlans) is not supported. 398 */ 399 if (mac_is_vnic(vnic->vn_lower_mh)) { 400 err = EINVAL; 401 goto bail; 402 } 403 404 /* only ethernet support for now */ 405 minfop = mac_info(vnic->vn_lower_mh); 406 if (minfop->mi_nativemedia != DL_ETHER) { 407 err = ENOTSUP; 408 goto bail; 409 } 410 411 (void) dls_mgmt_get_linkinfo(vnic_id, vnic_name, NULL, NULL, 412 NULL); 413 err = mac_client_open(vnic->vn_lower_mh, &vnic->vn_mch, 414 vnic_name, MAC_OPEN_FLAGS_IS_VNIC); 415 if (err != 0) 416 goto bail; 417 418 /* assign a MAC address to the VNIC */ 419 420 err = vnic_unicast_add(vnic, *vnic_addr_type, mac_slot, 421 mac_prefix_len, mac_len, mac_addr, flags, diag, vid, 422 req_hwgrp_flag); 423 if (err != 0) { 424 vnic->vn_muh = NULL; 425 if (diag != NULL && req_hwgrp_flag) 426 *diag = VNIC_IOC_DIAG_NO_HWRINGS; 427 goto bail; 428 } 429 430 /* register to receive notification from underlying MAC */ 431 vnic->vn_mnh = mac_notify_add(vnic->vn_lower_mh, vnic_notify_cb, 432 vnic); 433 434 *vnic_addr_type = vnic->vn_addr_type; 435 vnic->vn_addr_len = *mac_len; 436 vnic->vn_vid = vid; 437 438 bcopy(mac_addr, vnic->vn_addr, vnic->vn_addr_len); 439 440 if (vnic->vn_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY) 441 vnic->vn_slot_id = *mac_slot; 442 443 /* 444 * Set the initial VNIC capabilities. If the VNIC is created 445 * over MACs which does not support nactive vlan, disable 446 * VNIC's hardware checksum capability if its VID is not 0, 447 * since the underlying MAC would get the hardware checksum 448 * offset wrong in case of VLAN packets. 449 */ 450 if (vid == 0 || !mac_capab_get(vnic->vn_lower_mh, 451 MAC_CAPAB_NO_NATIVEVLAN, NULL)) { 452 if (!mac_capab_get(vnic->vn_lower_mh, MAC_CAPAB_HCKSUM, 453 &vnic->vn_hcksum_txflags)) 454 vnic->vn_hcksum_txflags = 0; 455 } else { 456 vnic->vn_hcksum_txflags = 0; 457 } 458 } 459 460 /* register with the MAC module */ 461 if ((mac = mac_alloc(MAC_VERSION)) == NULL) 462 goto bail; 463 464 mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 465 mac->m_driver = vnic; 466 mac->m_dip = vnic_get_dip(); 467 mac->m_instance = (uint_t)-1; 468 mac->m_src_addr = vnic->vn_addr; 469 mac->m_callbacks = &vnic_m_callbacks; 470 471 if (!is_anchor) { 472 /* 473 * If this is a VNIC based VLAN, then we check for the 474 * margin unless it has been created with the force 475 * flag. If we are configuring a VLAN over an etherstub, 476 * we don't check the margin even if force is not set. 477 */ 478 if (vid == 0 || (flags & VNIC_IOC_CREATE_FORCE) != 0) { 479 if (vid != VLAN_ID_NONE) 480 vnic->vn_force = B_TRUE; 481 /* 482 * As the current margin size of the underlying mac is 483 * used to determine the margin size of the VNIC 484 * itself, request the underlying mac not to change 485 * to a smaller margin size. 486 */ 487 err = mac_margin_add(vnic->vn_lower_mh, 488 &vnic->vn_margin, B_TRUE); 489 ASSERT(err == 0); 490 } else { 491 vnic->vn_margin = VLAN_TAGSZ; 492 err = mac_margin_add(vnic->vn_lower_mh, 493 &vnic->vn_margin, B_FALSE); 494 if (err != 0) { 495 mac_free(mac); 496 if (diag != NULL) 497 *diag = VNIC_IOC_DIAG_MACMARGIN_INVALID; 498 goto bail; 499 } 500 } 501 502 mac_sdu_get(vnic->vn_lower_mh, &mac->m_min_sdu, 503 &mac->m_max_sdu); 504 err = mac_mtu_add(vnic->vn_lower_mh, &mac->m_max_sdu, B_FALSE); 505 if (err != 0) { 506 VERIFY(mac_margin_remove(vnic->vn_lower_mh, 507 vnic->vn_margin) == 0); 508 mac_free(mac); 509 if (diag != NULL) 510 *diag = VNIC_IOC_DIAG_MACMTU_INVALID; 511 goto bail; 512 } 513 vnic->vn_mtu = mac->m_max_sdu; 514 } else { 515 vnic->vn_margin = VLAN_TAGSZ; 516 mac->m_min_sdu = 1; 517 mac->m_max_sdu = ANCHOR_VNIC_MAX_MTU; 518 vnic->vn_mtu = ANCHOR_VNIC_MAX_MTU; 519 } 520 521 mac->m_margin = vnic->vn_margin; 522 523 err = mac_register(mac, &vnic->vn_mh); 524 mac_free(mac); 525 if (err != 0) { 526 if (!is_anchor) { 527 VERIFY(mac_mtu_remove(vnic->vn_lower_mh, 528 vnic->vn_mtu) == 0); 529 VERIFY(mac_margin_remove(vnic->vn_lower_mh, 530 vnic->vn_margin) == 0); 531 } 532 goto bail; 533 } 534 535 /* Set the VNIC's MAC in the client */ 536 if (!is_anchor) { 537 mac_set_upper_mac(vnic->vn_mch, vnic->vn_mh, mrp); 538 539 if (mrp != NULL) { 540 if ((mrp->mrp_mask & MRP_RX_RINGS) != 0 || 541 (mrp->mrp_mask & MRP_TX_RINGS) != 0) { 542 req_hwgrp_flag = B_TRUE; 543 } 544 err = mac_client_set_resources(vnic->vn_mch, mrp); 545 if (err != 0) { 546 VERIFY(mac_mtu_remove(vnic->vn_lower_mh, 547 vnic->vn_mtu) == 0); 548 VERIFY(mac_margin_remove(vnic->vn_lower_mh, 549 vnic->vn_margin) == 0); 550 (void) mac_unregister(vnic->vn_mh); 551 goto bail; 552 } 553 } 554 } 555 556 err = dls_devnet_create(vnic->vn_mh, vnic->vn_id, crgetzoneid(credp)); 557 if (err != 0) { 558 VERIFY(is_anchor || mac_margin_remove(vnic->vn_lower_mh, 559 vnic->vn_margin) == 0); 560 if (!is_anchor) { 561 VERIFY(mac_mtu_remove(vnic->vn_lower_mh, 562 vnic->vn_mtu) == 0); 563 VERIFY(mac_margin_remove(vnic->vn_lower_mh, 564 vnic->vn_margin) == 0); 565 } 566 (void) mac_unregister(vnic->vn_mh); 567 goto bail; 568 } 569 570 /* add new VNIC to hash table */ 571 err = mod_hash_insert(vnic_hash, VNIC_HASH_KEY(vnic_id), 572 (mod_hash_val_t)vnic); 573 ASSERT(err == 0); 574 vnic_count++; 575 576 /* 577 * Now that we've enabled this VNIC, we should go through and update the 578 * link state by setting it to our parents. 579 */ 580 vnic->vn_enabled = B_TRUE; 581 582 if (is_anchor) { 583 mac_link_update(vnic->vn_mh, LINK_STATE_UP); 584 } else { 585 mac_link_update(vnic->vn_mh, 586 mac_client_stat_get(vnic->vn_mch, MAC_STAT_LINK_STATE)); 587 } 588 589 rw_exit(&vnic_lock); 590 591 return (0); 592 593 bail: 594 rw_exit(&vnic_lock); 595 if (!is_anchor) { 596 if (vnic->vn_mnh != NULL) 597 (void) mac_notify_remove(vnic->vn_mnh, B_TRUE); 598 if (vnic->vn_muh != NULL) 599 (void) mac_unicast_remove(vnic->vn_mch, vnic->vn_muh); 600 if (vnic->vn_mch != NULL) 601 mac_client_close(vnic->vn_mch, MAC_CLOSE_FLAGS_IS_VNIC); 602 if (vnic->vn_lower_mh != NULL) 603 mac_close(vnic->vn_lower_mh); 604 } 605 606 kmem_cache_free(vnic_cache, vnic); 607 return (err); 608 } 609 610 /* 611 * Modify the properties of an existing VNIC. 612 */ 613 /* ARGSUSED */ 614 int 615 vnic_dev_modify(datalink_id_t vnic_id, uint_t modify_mask, 616 vnic_mac_addr_type_t mac_addr_type, uint_t mac_len, uchar_t *mac_addr, 617 uint_t mac_slot, mac_resource_props_t *mrp) 618 { 619 vnic_t *vnic = NULL; 620 621 rw_enter(&vnic_lock, RW_WRITER); 622 623 if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id), 624 (mod_hash_val_t *)&vnic) != 0) { 625 rw_exit(&vnic_lock); 626 return (ENOENT); 627 } 628 629 rw_exit(&vnic_lock); 630 631 return (0); 632 } 633 634 /* ARGSUSED */ 635 int 636 vnic_dev_delete(datalink_id_t vnic_id, uint32_t flags, cred_t *credp) 637 { 638 vnic_t *vnic = NULL; 639 mod_hash_val_t val; 640 datalink_id_t tmpid; 641 int rc; 642 643 rw_enter(&vnic_lock, RW_WRITER); 644 645 if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id), 646 (mod_hash_val_t *)&vnic) != 0) { 647 rw_exit(&vnic_lock); 648 return (ENOENT); 649 } 650 651 if ((rc = dls_devnet_destroy(vnic->vn_mh, &tmpid, B_TRUE)) != 0) { 652 rw_exit(&vnic_lock); 653 return (rc); 654 } 655 656 ASSERT(vnic_id == tmpid); 657 658 /* 659 * We cannot unregister the MAC yet. Unregistering would 660 * free up mac_impl_t which should not happen at this time. 661 * So disable mac_impl_t by calling mac_disable(). This will prevent 662 * any new claims on mac_impl_t. 663 */ 664 if ((rc = mac_disable(vnic->vn_mh)) != 0) { 665 (void) dls_devnet_create(vnic->vn_mh, vnic_id, 666 crgetzoneid(credp)); 667 rw_exit(&vnic_lock); 668 return (rc); 669 } 670 671 vnic_cleanup_secondary_macs(vnic, vnic->vn_nhandles); 672 673 vnic->vn_enabled = B_FALSE; 674 (void) mod_hash_remove(vnic_hash, VNIC_HASH_KEY(vnic_id), &val); 675 ASSERT(vnic == (vnic_t *)val); 676 vnic_count--; 677 rw_exit(&vnic_lock); 678 679 /* 680 * XXX-nicolas shouldn't have a void cast here, if it's 681 * expected that the function will never fail, then we should 682 * have an ASSERT(). 683 */ 684 (void) mac_unregister(vnic->vn_mh); 685 686 if (vnic->vn_lower_mh != NULL) { 687 /* 688 * Check if MAC address for the vnic was obtained from the 689 * factory MAC addresses. If yes, release it. 690 */ 691 if (vnic->vn_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY) { 692 (void) mac_addr_factory_release(vnic->vn_mch, 693 vnic->vn_slot_id); 694 } 695 (void) mac_margin_remove(vnic->vn_lower_mh, vnic->vn_margin); 696 (void) mac_mtu_remove(vnic->vn_lower_mh, vnic->vn_mtu); 697 (void) mac_notify_remove(vnic->vn_mnh, B_TRUE); 698 (void) mac_unicast_remove(vnic->vn_mch, vnic->vn_muh); 699 mac_client_close(vnic->vn_mch, MAC_CLOSE_FLAGS_IS_VNIC); 700 mac_close(vnic->vn_lower_mh); 701 } 702 703 kmem_cache_free(vnic_cache, vnic); 704 return (0); 705 } 706 707 /* ARGSUSED */ 708 mblk_t * 709 vnic_m_tx(void *arg, mblk_t *mp_chain) 710 { 711 /* 712 * This function could be invoked for an anchor VNIC when sending 713 * broadcast and multicast packets, and unicast packets which did 714 * not match any local known destination. 715 */ 716 freemsgchain(mp_chain); 717 return (NULL); 718 } 719 720 /*ARGSUSED*/ 721 static void 722 vnic_m_ioctl(void *arg, queue_t *q, mblk_t *mp) 723 { 724 miocnak(q, mp, 0, ENOTSUP); 725 } 726 727 /* 728 * This entry point cannot be passed-through, since it is invoked 729 * for the per-VNIC kstats which must be exported independently 730 * of the existence of VNIC MAC clients. 731 */ 732 static int 733 vnic_m_stat(void *arg, uint_t stat, uint64_t *val) 734 { 735 vnic_t *vnic = arg; 736 int rval = 0; 737 738 if (vnic->vn_lower_mh == NULL) { 739 /* 740 * It's an anchor VNIC, which does not have any 741 * statistics in itself. 742 */ 743 return (ENOTSUP); 744 } 745 746 /* 747 * ENOTSUP must be reported for unsupported stats, the VNIC 748 * driver reports a subset of the stats that would 749 * be returned by a real piece of hardware. 750 */ 751 752 switch (stat) { 753 case MAC_STAT_LINK_STATE: 754 case MAC_STAT_LINK_UP: 755 case MAC_STAT_PROMISC: 756 case MAC_STAT_IFSPEED: 757 case MAC_STAT_MULTIRCV: 758 case MAC_STAT_MULTIXMT: 759 case MAC_STAT_BRDCSTRCV: 760 case MAC_STAT_BRDCSTXMT: 761 case MAC_STAT_OPACKETS: 762 case MAC_STAT_OBYTES: 763 case MAC_STAT_IERRORS: 764 case MAC_STAT_OERRORS: 765 case MAC_STAT_RBYTES: 766 case MAC_STAT_IPACKETS: 767 *val = mac_client_stat_get(vnic->vn_mch, stat); 768 break; 769 default: 770 rval = ENOTSUP; 771 } 772 773 return (rval); 774 } 775 776 /* 777 * Invoked by the upper MAC to retrieve the lower MAC client handle 778 * corresponding to a VNIC. A pointer to this function is obtained 779 * by the upper MAC via capability query. 780 * 781 * XXX-nicolas Note: this currently causes all VNIC MAC clients to 782 * receive the same MAC client handle for the same VNIC. This is ok 783 * as long as we have only one VNIC MAC client which sends and 784 * receives data, but we don't currently enforce this at the MAC layer. 785 */ 786 static void * 787 vnic_mac_client_handle(void *vnic_arg) 788 { 789 vnic_t *vnic = vnic_arg; 790 791 return (vnic->vn_mch); 792 } 793 794 /* 795 * Invoked when updating the primary MAC so that the secondary MACs are 796 * kept in sync. 797 */ 798 static void 799 vnic_mac_secondary_update(void *vnic_arg) 800 { 801 vnic_t *vn = vnic_arg; 802 int i; 803 804 for (i = 1; i <= vn->vn_nhandles; i++) { 805 mac_secondary_dup(vn->vn_mc_handles[0], vn->vn_mc_handles[i]); 806 } 807 } 808 809 /* 810 * Return information about the specified capability. 811 */ 812 /* ARGSUSED */ 813 static boolean_t 814 vnic_m_capab_get(void *arg, mac_capab_t cap, void *cap_data) 815 { 816 vnic_t *vnic = arg; 817 818 switch (cap) { 819 case MAC_CAPAB_HCKSUM: { 820 uint32_t *hcksum_txflags = cap_data; 821 822 *hcksum_txflags = vnic->vn_hcksum_txflags & 823 (HCKSUM_INET_FULL_V4 | HCKSUM_IPHDRCKSUM | 824 HCKSUM_INET_PARTIAL); 825 break; 826 } 827 case MAC_CAPAB_VNIC: { 828 mac_capab_vnic_t *vnic_capab = cap_data; 829 830 if (vnic->vn_lower_mh == NULL) { 831 /* 832 * It's an anchor VNIC, we don't have an underlying 833 * NIC and MAC client handle. 834 */ 835 return (B_FALSE); 836 } 837 838 if (vnic_capab != NULL) { 839 vnic_capab->mcv_arg = vnic; 840 vnic_capab->mcv_mac_client_handle = 841 vnic_mac_client_handle; 842 vnic_capab->mcv_mac_secondary_update = 843 vnic_mac_secondary_update; 844 } 845 break; 846 } 847 case MAC_CAPAB_ANCHOR_VNIC: { 848 /* since it's an anchor VNIC we don't have lower mac handle */ 849 if (vnic->vn_lower_mh == NULL) { 850 ASSERT(vnic->vn_link_id == 0); 851 return (B_TRUE); 852 } 853 return (B_FALSE); 854 } 855 case MAC_CAPAB_NO_NATIVEVLAN: 856 return (B_FALSE); 857 case MAC_CAPAB_NO_ZCOPY: 858 return (B_TRUE); 859 case MAC_CAPAB_VRRP: { 860 mac_capab_vrrp_t *vrrp_capab = cap_data; 861 862 if (vnic->vn_vrid != 0) { 863 if (vrrp_capab != NULL) 864 vrrp_capab->mcv_af = vnic->vn_af; 865 return (B_TRUE); 866 } 867 return (B_FALSE); 868 } 869 default: 870 return (B_FALSE); 871 } 872 return (B_TRUE); 873 } 874 875 /* ARGSUSED */ 876 static int 877 vnic_m_start(void *arg) 878 { 879 return (0); 880 } 881 882 /* ARGSUSED */ 883 static void 884 vnic_m_stop(void *arg) 885 { 886 } 887 888 /* ARGSUSED */ 889 static int 890 vnic_m_promisc(void *arg, boolean_t on) 891 { 892 return (0); 893 } 894 895 /* ARGSUSED */ 896 static int 897 vnic_m_multicst(void *arg, boolean_t add, const uint8_t *addrp) 898 { 899 return (0); 900 } 901 902 static int 903 vnic_m_unicst(void *arg, const uint8_t *macaddr) 904 { 905 vnic_t *vnic = arg; 906 907 return (mac_vnic_unicast_set(vnic->vn_mch, macaddr)); 908 } 909 910 static void 911 vnic_cleanup_secondary_macs(vnic_t *vn, int cnt) 912 { 913 int i; 914 915 /* Remove existing secondaries (primary is at 0) */ 916 for (i = 1; i <= cnt; i++) { 917 mac_rx_clear(vn->vn_mc_handles[i]); 918 919 /* unicast handle might not have been set yet */ 920 if (vn->vn_mu_handles[i] != NULL) 921 (void) mac_unicast_remove(vn->vn_mc_handles[i], 922 vn->vn_mu_handles[i]); 923 924 mac_secondary_cleanup(vn->vn_mc_handles[i]); 925 926 mac_client_close(vn->vn_mc_handles[i], MAC_CLOSE_FLAGS_IS_VNIC); 927 928 vn->vn_mu_handles[i] = NULL; 929 vn->vn_mc_handles[i] = NULL; 930 } 931 932 vn->vn_nhandles = 0; 933 } 934 935 /* 936 * Setup secondary MAC addresses on the vnic. Due to limitations in the mac 937 * code, each mac address must be associated with a mac_client (and the 938 * flow that goes along with the client) so we need to create those clients 939 * here. 940 */ 941 static int 942 vnic_set_secondary_macs(vnic_t *vn, mac_secondary_addr_t *msa) 943 { 944 int i, err; 945 char primary_name[MAXNAMELEN]; 946 947 /* First, remove pre-existing secondaries */ 948 ASSERT(vn->vn_nhandles < MPT_MAXMACADDR); 949 vnic_cleanup_secondary_macs(vn, vn->vn_nhandles); 950 951 if (msa->ms_addrcnt == (uint32_t)-1) 952 msa->ms_addrcnt = 0; 953 954 vn->vn_nhandles = msa->ms_addrcnt; 955 956 (void) dls_mgmt_get_linkinfo(vn->vn_id, primary_name, NULL, NULL, NULL); 957 958 /* 959 * Now add the new secondary MACs 960 * Recall that the primary MAC address is the first element. 961 * The secondary clients are named after the primary with their 962 * index to distinguish them. 963 */ 964 for (i = 1; i <= vn->vn_nhandles; i++) { 965 uint8_t *addr; 966 mac_diag_t mac_diag; 967 char secondary_name[MAXNAMELEN]; 968 969 (void) snprintf(secondary_name, sizeof (secondary_name), 970 "%s%02d", primary_name, i); 971 972 err = mac_client_open(vn->vn_lower_mh, &vn->vn_mc_handles[i], 973 secondary_name, MAC_OPEN_FLAGS_IS_VNIC); 974 if (err != 0) { 975 /* Remove any that we successfully added */ 976 vnic_cleanup_secondary_macs(vn, --i); 977 return (err); 978 } 979 980 /* 981 * Assign a MAC address to the VNIC 982 * 983 * Normally this would be done with vnic_unicast_add but since 984 * we know these are fixed adddresses, and since we need to 985 * save this in the proper array slot, we bypass that function 986 * and go direct. 987 */ 988 addr = msa->ms_addrs[i - 1]; 989 err = mac_unicast_add(vn->vn_mc_handles[i], addr, 0, 990 &vn->vn_mu_handles[i], vn->vn_vid, &mac_diag); 991 if (err != 0) { 992 /* Remove any that we successfully added */ 993 vnic_cleanup_secondary_macs(vn, i); 994 return (err); 995 } 996 997 /* 998 * Setup the secondary the same way as the primary (i.e. 999 * receiver function/argument (e.g. i_dls_link_rx, mac_pkt_drop, 1000 * etc.), the promisc list, and the resource controls). 1001 */ 1002 mac_secondary_dup(vn->vn_mc_handles[0], vn->vn_mc_handles[i]); 1003 } 1004 1005 return (0); 1006 } 1007 1008 static int 1009 vnic_get_secondary_macs(vnic_t *vn, uint_t pr_valsize, void *pr_val) 1010 { 1011 int i; 1012 mac_secondary_addr_t msa; 1013 1014 if (pr_valsize < sizeof (msa)) 1015 return (EINVAL); 1016 1017 /* Get existing addresses (primary is at 0) */ 1018 ASSERT(vn->vn_nhandles < MPT_MAXMACADDR); 1019 for (i = 1; i <= vn->vn_nhandles; i++) { 1020 ASSERT(vn->vn_mc_handles[i] != NULL); 1021 mac_unicast_secondary_get(vn->vn_mc_handles[i], 1022 msa.ms_addrs[i - 1]); 1023 } 1024 msa.ms_addrcnt = vn->vn_nhandles; 1025 1026 bcopy(&msa, pr_val, sizeof (msa)); 1027 return (0); 1028 } 1029 1030 /* 1031 * Callback functions for set/get of properties 1032 */ 1033 /*ARGSUSED*/ 1034 static int 1035 vnic_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num, 1036 uint_t pr_valsize, const void *pr_val) 1037 { 1038 int err = 0; 1039 vnic_t *vn = m_driver; 1040 1041 switch (pr_num) { 1042 case MAC_PROP_MTU: { 1043 uint32_t mtu; 1044 1045 if (pr_valsize < sizeof (mtu)) { 1046 err = EINVAL; 1047 break; 1048 } 1049 bcopy(pr_val, &mtu, sizeof (mtu)); 1050 1051 if (vn->vn_link_id == DATALINK_INVALID_LINKID) { 1052 if (mtu < ANCHOR_VNIC_MIN_MTU || 1053 mtu > ANCHOR_VNIC_MAX_MTU) { 1054 err = EINVAL; 1055 break; 1056 } 1057 } else { 1058 err = mac_mtu_add(vn->vn_lower_mh, &mtu, B_FALSE); 1059 /* 1060 * If it's not supported to set a value here, translate 1061 * that to EINVAL, so user land gets a better idea of 1062 * what went wrong. This realistically means that they 1063 * violated the output of prop info. 1064 */ 1065 if (err == ENOTSUP) 1066 err = EINVAL; 1067 if (err != 0) 1068 break; 1069 VERIFY(mac_mtu_remove(vn->vn_lower_mh, 1070 vn->vn_mtu) == 0); 1071 } 1072 vn->vn_mtu = mtu; 1073 err = mac_maxsdu_update(vn->vn_mh, mtu); 1074 break; 1075 } 1076 case MAC_PROP_VN_PROMISC_FILTERED: { 1077 boolean_t filtered; 1078 1079 if (pr_valsize < sizeof (filtered)) { 1080 err = EINVAL; 1081 break; 1082 } 1083 1084 bcopy(pr_val, &filtered, sizeof (filtered)); 1085 mac_set_promisc_filtered(vn->vn_mch, filtered); 1086 break; 1087 } 1088 case MAC_PROP_SECONDARY_ADDRS: { 1089 mac_secondary_addr_t msa; 1090 1091 bcopy(pr_val, &msa, sizeof (msa)); 1092 err = vnic_set_secondary_macs(vn, &msa); 1093 break; 1094 } 1095 default: 1096 err = ENOTSUP; 1097 break; 1098 } 1099 return (err); 1100 } 1101 1102 /* ARGSUSED */ 1103 static int 1104 vnic_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, 1105 uint_t pr_valsize, void *pr_val) 1106 { 1107 vnic_t *vn = arg; 1108 int ret = 0; 1109 boolean_t out; 1110 1111 switch (pr_num) { 1112 case MAC_PROP_VN_PROMISC_FILTERED: 1113 out = mac_get_promisc_filtered(vn->vn_mch); 1114 ASSERT(pr_valsize >= sizeof (boolean_t)); 1115 bcopy(&out, pr_val, sizeof (boolean_t)); 1116 break; 1117 case MAC_PROP_SECONDARY_ADDRS: 1118 ret = vnic_get_secondary_macs(vn, pr_valsize, pr_val); 1119 break; 1120 default: 1121 ret = ENOTSUP; 1122 break; 1123 } 1124 1125 return (ret); 1126 } 1127 1128 /* ARGSUSED */ 1129 static void vnic_m_propinfo(void *m_driver, const char *pr_name, 1130 mac_prop_id_t pr_num, mac_prop_info_handle_t prh) 1131 { 1132 vnic_t *vn = m_driver; 1133 1134 switch (pr_num) { 1135 case MAC_PROP_MTU: 1136 if (vn->vn_link_id == DATALINK_INVALID_LINKID) { 1137 mac_prop_info_set_range_uint32(prh, 1138 ANCHOR_VNIC_MIN_MTU, ANCHOR_VNIC_MAX_MTU); 1139 } else { 1140 uint32_t max; 1141 mac_perim_handle_t mph; 1142 mac_propval_range_t range; 1143 1144 /* 1145 * The valid range for a VNIC's MTU is the minimum that 1146 * the device supports and the current value of the 1147 * device. A VNIC cannot increase the current MTU of the 1148 * device. Therefore we need to get the range from the 1149 * propinfo endpoint and current mtu from the 1150 * traditional property endpoint. 1151 */ 1152 mac_perim_enter_by_mh(vn->vn_lower_mh, &mph); 1153 if (mac_get_prop(vn->vn_lower_mh, MAC_PROP_MTU, "mtu", 1154 &max, sizeof (uint32_t)) != 0) { 1155 mac_perim_exit(mph); 1156 return; 1157 } 1158 1159 range.mpr_count = 1; 1160 if (mac_prop_info(vn->vn_lower_mh, MAC_PROP_MTU, "mtu", 1161 NULL, 0, &range, NULL) != 0) { 1162 mac_perim_exit(mph); 1163 return; 1164 } 1165 1166 mac_prop_info_set_default_uint32(prh, max); 1167 mac_prop_info_set_range_uint32(prh, 1168 range.mpr_range_uint32[0].mpur_min, max); 1169 mac_perim_exit(mph); 1170 } 1171 break; 1172 } 1173 } 1174 1175 1176 int 1177 vnic_info(vnic_info_t *info, cred_t *credp) 1178 { 1179 vnic_t *vnic; 1180 int err; 1181 1182 /* Make sure that the VNIC link is visible from the caller's zone. */ 1183 if (!dls_devnet_islinkvisible(info->vn_vnic_id, crgetzoneid(credp))) 1184 return (ENOENT); 1185 1186 rw_enter(&vnic_lock, RW_WRITER); 1187 1188 err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(info->vn_vnic_id), 1189 (mod_hash_val_t *)&vnic); 1190 if (err != 0) { 1191 rw_exit(&vnic_lock); 1192 return (ENOENT); 1193 } 1194 1195 info->vn_link_id = vnic->vn_link_id; 1196 info->vn_mac_addr_type = vnic->vn_addr_type; 1197 info->vn_mac_len = vnic->vn_addr_len; 1198 bcopy(vnic->vn_addr, info->vn_mac_addr, MAXMACADDRLEN); 1199 info->vn_mac_slot = vnic->vn_slot_id; 1200 info->vn_mac_prefix_len = 0; 1201 info->vn_vid = vnic->vn_vid; 1202 info->vn_force = vnic->vn_force; 1203 info->vn_vrid = vnic->vn_vrid; 1204 info->vn_af = vnic->vn_af; 1205 1206 bzero(&info->vn_resource_props, sizeof (mac_resource_props_t)); 1207 if (vnic->vn_mch != NULL) 1208 mac_client_get_resources(vnic->vn_mch, 1209 &info->vn_resource_props); 1210 1211 rw_exit(&vnic_lock); 1212 return (0); 1213 } 1214 1215 static void 1216 vnic_notify_cb(void *arg, mac_notify_type_t type) 1217 { 1218 vnic_t *vnic = arg; 1219 1220 /* 1221 * Do not deliver notifications if the vnic is not fully initialized 1222 * or is in process of being torn down. 1223 */ 1224 if (!vnic->vn_enabled) 1225 return; 1226 1227 switch (type) { 1228 case MAC_NOTE_UNICST: 1229 /* 1230 * Only the VLAN VNIC needs to be notified with primary MAC 1231 * address change. 1232 */ 1233 if (vnic->vn_addr_type != VNIC_MAC_ADDR_TYPE_PRIMARY) 1234 return; 1235 1236 /* the unicast MAC address value */ 1237 mac_unicast_primary_get(vnic->vn_lower_mh, vnic->vn_addr); 1238 1239 /* notify its upper layer MAC about MAC address change */ 1240 mac_unicst_update(vnic->vn_mh, (const uint8_t *)vnic->vn_addr); 1241 break; 1242 1243 case MAC_NOTE_LINK: 1244 mac_link_update(vnic->vn_mh, 1245 mac_client_stat_get(vnic->vn_mch, MAC_STAT_LINK_STATE)); 1246 break; 1247 1248 default: 1249 break; 1250 } 1251 } 1252