1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/sysmacros.h> 30 #include <sys/conf.h> 31 #include <sys/cmn_err.h> 32 #include <sys/list.h> 33 #include <sys/ksynch.h> 34 #include <sys/kmem.h> 35 #include <sys/stream.h> 36 #include <sys/modctl.h> 37 #include <sys/ddi.h> 38 #include <sys/sunddi.h> 39 #include <sys/atomic.h> 40 #include <sys/stat.h> 41 #include <sys/modhash.h> 42 #include <sys/strsubr.h> 43 #include <sys/strsun.h> 44 #include <sys/dlpi.h> 45 #include <sys/mac.h> 46 #include <sys/mac_ether.h> 47 #include <sys/dls.h> 48 #include <sys/pattr.h> 49 #include <sys/vnic.h> 50 #include <sys/vnic_impl.h> 51 #include <sys/gld.h> 52 #include <inet/ip.h> 53 #include <inet/ip_impl.h> 54 55 static int vnic_m_start(void *); 56 static void vnic_m_stop(void *); 57 static int vnic_m_promisc(void *, boolean_t); 58 static int vnic_m_multicst(void *, boolean_t, const uint8_t *); 59 static int vnic_m_unicst(void *, const uint8_t *); 60 static int vnic_m_stat(void *, uint_t, uint64_t *); 61 static void vnic_m_resources(void *); 62 static mblk_t *vnic_m_tx(void *, mblk_t *); 63 static boolean_t vnic_m_capab_get(void *, mac_capab_t, void *); 64 static void vnic_mac_free(vnic_mac_t *); 65 static uint_t vnic_info_walker(mod_hash_key_t, mod_hash_val_t *, void *); 66 static void vnic_notify_cb(void *, mac_notify_type_t); 67 static int vnic_modify_mac_addr(vnic_t *, uint_t, uchar_t *); 68 static mblk_t *vnic_active_tx(void *, mblk_t *); 69 static int vnic_promisc_set(vnic_t *, boolean_t); 70 71 static kmem_cache_t *vnic_cache; 72 static kmem_cache_t *vnic_mac_cache; 73 static krwlock_t vnic_lock; 74 static kmutex_t vnic_mac_lock; 75 static uint_t vnic_count; 76 77 /* hash of VNICs (vnic_t's), keyed by VNIC id */ 78 static mod_hash_t *vnic_hash; 79 #define VNIC_HASHSZ 64 80 #define VNIC_HASH_KEY(vnic_id) ((mod_hash_key_t)(uintptr_t)vnic_id) 81 82 /* 83 * Hash of underlying open MACs (vnic_mac_t's), keyed by the string 84 * "<device name><instance number>/<port number>". 85 */ 86 static mod_hash_t *vnic_mac_hash; 87 #define VNIC_MAC_HASHSZ 64 88 89 #define VNIC_MAC_REFHOLD(va) { \ 90 ASSERT(MUTEX_HELD(&vnic_mac_lock)); \ 91 (va)->va_refs++; \ 92 ASSERT((va)->va_refs != 0); \ 93 } 94 95 #define VNIC_MAC_REFRELE(va) { \ 96 ASSERT(MUTEX_HELD(&vnic_mac_lock)); \ 97 ASSERT((va)->va_refs != 0); \ 98 if (--((va)->va_refs) == 0) \ 99 vnic_mac_free(va); \ 100 } 101 102 static uchar_t vnic_brdcst_mac[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 103 104 /* used by vnic_walker */ 105 typedef struct vnic_info_state { 106 datalink_id_t vs_vnic_id; 107 datalink_id_t vs_linkid; 108 boolean_t vs_vnic_found; 109 vnic_info_new_vnic_fn_t vs_new_vnic_fn; 110 void *vs_fn_arg; 111 int vs_rc; 112 } vnic_info_state_t; 113 114 #define VNIC_M_CALLBACK_FLAGS (MC_RESOURCES | MC_GETCAPAB) 115 116 static mac_callbacks_t vnic_m_callbacks = { 117 VNIC_M_CALLBACK_FLAGS, 118 vnic_m_stat, 119 vnic_m_start, 120 vnic_m_stop, 121 vnic_m_promisc, 122 vnic_m_multicst, 123 vnic_m_unicst, 124 vnic_m_tx, 125 vnic_m_resources, 126 NULL, /* m_ioctl */ 127 vnic_m_capab_get 128 }; 129 130 /* ARGSUSED */ 131 static int 132 vnic_mac_ctor(void *buf, void *arg, int kmflag) 133 { 134 vnic_mac_t *vnic_mac = buf; 135 136 bzero(vnic_mac, sizeof (vnic_mac_t)); 137 rw_init(&vnic_mac->va_bcast_grp_lock, NULL, RW_DRIVER, NULL); 138 rw_init(&vnic_mac->va_promisc_lock, NULL, RW_DRIVER, NULL); 139 140 return (0); 141 } 142 143 /* ARGSUSED */ 144 static void 145 vnic_mac_dtor(void *buf, void *arg) 146 { 147 vnic_mac_t *vnic_mac = buf; 148 149 rw_destroy(&vnic_mac->va_promisc_lock); 150 rw_destroy(&vnic_mac->va_bcast_grp_lock); 151 } 152 153 void 154 vnic_dev_init(void) 155 { 156 vnic_cache = kmem_cache_create("vnic_cache", 157 sizeof (vnic_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 158 159 vnic_mac_cache = kmem_cache_create("vnic_mac_cache", 160 sizeof (vnic_mac_t), 0, vnic_mac_ctor, vnic_mac_dtor, 161 NULL, NULL, NULL, 0); 162 163 vnic_hash = mod_hash_create_idhash("vnic_hash", 164 VNIC_HASHSZ, mod_hash_null_valdtor); 165 166 vnic_mac_hash = mod_hash_create_idhash("vnic_mac_hash", 167 VNIC_MAC_HASHSZ, mod_hash_null_valdtor); 168 169 rw_init(&vnic_lock, NULL, RW_DEFAULT, NULL); 170 171 mutex_init(&vnic_mac_lock, NULL, MUTEX_DEFAULT, NULL); 172 173 vnic_count = 0; 174 } 175 176 void 177 vnic_dev_fini(void) 178 { 179 ASSERT(vnic_count == 0); 180 181 mutex_destroy(&vnic_mac_lock); 182 rw_destroy(&vnic_lock); 183 mod_hash_destroy_idhash(vnic_mac_hash); 184 mod_hash_destroy_idhash(vnic_hash); 185 kmem_cache_destroy(vnic_mac_cache); 186 kmem_cache_destroy(vnic_cache); 187 } 188 189 uint_t 190 vnic_dev_count(void) 191 { 192 return (vnic_count); 193 } 194 195 static int 196 vnic_mac_open(datalink_id_t linkid, vnic_mac_t **vmp) 197 { 198 int err; 199 vnic_mac_t *vnic_mac = NULL; 200 const mac_info_t *mip; 201 202 *vmp = NULL; 203 204 mutex_enter(&vnic_mac_lock); 205 206 err = mod_hash_find(vnic_mac_hash, (mod_hash_key_t)(uintptr_t)linkid, 207 (mod_hash_val_t *)&vnic_mac); 208 if (err == 0) { 209 /* this MAC is already opened, increment reference count */ 210 VNIC_MAC_REFHOLD(vnic_mac); 211 mutex_exit(&vnic_mac_lock); 212 *vmp = vnic_mac; 213 return (0); 214 } 215 216 vnic_mac = kmem_cache_alloc(vnic_mac_cache, KM_SLEEP); 217 if ((err = mac_open_by_linkid(linkid, &vnic_mac->va_mh)) != 0) { 218 vnic_mac->va_mh = NULL; 219 goto bail; 220 } 221 222 /* 223 * For now, we do not support VNICs over legacy drivers. This will 224 * soon be changed. 225 */ 226 if (mac_is_legacy(vnic_mac->va_mh)) { 227 err = ENOTSUP; 228 goto bail; 229 } 230 231 /* only ethernet support, for now */ 232 mip = mac_info(vnic_mac->va_mh); 233 if (mip->mi_media != DL_ETHER) { 234 err = ENOTSUP; 235 goto bail; 236 } 237 if (mip->mi_media != mip->mi_nativemedia) { 238 err = ENOTSUP; 239 goto bail; 240 } 241 242 vnic_mac->va_linkid = linkid; 243 244 /* add entry to hash table */ 245 err = mod_hash_insert(vnic_mac_hash, (mod_hash_key_t)(uintptr_t)linkid, 246 (mod_hash_val_t)vnic_mac); 247 ASSERT(err == 0); 248 249 /* initialize the flow table associated with lower MAC */ 250 vnic_mac->va_addr_len = ETHERADDRL; 251 (void) vnic_classifier_flow_tab_init(vnic_mac, vnic_mac->va_addr_len, 252 KM_SLEEP); 253 254 vnic_mac->va_txinfo = mac_vnic_tx_get(vnic_mac->va_mh); 255 vnic_mac->va_notify_hdl = mac_notify_add(vnic_mac->va_mh, 256 vnic_notify_cb, vnic_mac); 257 258 VNIC_MAC_REFHOLD(vnic_mac); 259 *vmp = vnic_mac; 260 mutex_exit(&vnic_mac_lock); 261 return (0); 262 263 bail: 264 if (vnic_mac != NULL) { 265 if (vnic_mac->va_mh != NULL) 266 mac_close(vnic_mac->va_mh); 267 kmem_cache_free(vnic_mac_cache, vnic_mac); 268 } 269 mutex_exit(&vnic_mac_lock); 270 return (err); 271 } 272 273 /* 274 * Create a new flow for the active MAC client sharing the NIC 275 * with the VNICs. This allows the unicast packets for that NIC 276 * to be classified and passed up to the active MAC client. It 277 * also allows packets sent from a VNIC to the active link to 278 * be classified by the VNIC transmit function and delivered via 279 * the MAC module locally. Returns B_TRUE on success, B_FALSE on 280 * failure. 281 */ 282 static int 283 vnic_init_active_rx(vnic_mac_t *vnic_mac) 284 { 285 uchar_t nic_mac_addr[MAXMACADDRLEN]; 286 287 if (vnic_mac->va_active_flow != NULL) 288 return (B_TRUE); 289 290 mac_unicst_get(vnic_mac->va_mh, nic_mac_addr); 291 292 vnic_mac->va_active_flow = vnic_classifier_flow_create( 293 vnic_mac->va_addr_len, nic_mac_addr, NULL, B_TRUE, KM_SLEEP); 294 295 vnic_classifier_flow_add(vnic_mac, vnic_mac->va_active_flow, 296 (vnic_rx_fn_t)mac_active_rx, vnic_mac->va_mh, NULL); 297 return (B_TRUE); 298 } 299 300 static void 301 vnic_fini_active_rx(vnic_mac_t *vnic_mac) 302 { 303 if (vnic_mac->va_active_flow == NULL) 304 return; 305 306 vnic_classifier_flow_remove(vnic_mac, vnic_mac->va_active_flow); 307 vnic_classifier_flow_destroy(vnic_mac->va_active_flow); 308 vnic_mac->va_active_flow = NULL; 309 } 310 311 static void 312 vnic_update_active_rx(vnic_mac_t *vnic_mac) 313 { 314 if (vnic_mac->va_active_flow == NULL) 315 return; 316 317 vnic_fini_active_rx(vnic_mac); 318 (void) vnic_init_active_rx(vnic_mac); 319 } 320 321 /* 322 * Copy an mblk, preserving its hardware checksum flags. 323 */ 324 mblk_t * 325 vnic_copymsg_cksum(mblk_t *mp) 326 { 327 mblk_t *mp1; 328 uint32_t start, stuff, end, value, flags; 329 330 mp1 = copymsg(mp); 331 if (mp1 == NULL) 332 return (NULL); 333 334 hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags); 335 (void) hcksum_assoc(mp1, NULL, NULL, start, stuff, end, value, 336 flags, KM_NOSLEEP); 337 338 return (mp1); 339 } 340 341 /* 342 * Copy an mblk chain, presenting the hardware checksum flags of the 343 * individual mblks. 344 */ 345 mblk_t * 346 vnic_copymsgchain_cksum(mblk_t *mp) 347 { 348 mblk_t *nmp = NULL; 349 mblk_t **nmpp = &nmp; 350 351 for (; mp != NULL; mp = mp->b_next) { 352 if ((*nmpp = vnic_copymsg_cksum(mp)) == NULL) { 353 freemsgchain(nmp); 354 return (NULL); 355 } 356 357 nmpp = &((*nmpp)->b_next); 358 } 359 360 return (nmp); 361 } 362 363 364 /* 365 * Process the specified mblk chain for proper handling of hardware 366 * checksum offload. This routine is invoked for loopback VNIC traffic. 367 * The function handles a NULL mblk chain passed as argument. 368 */ 369 mblk_t * 370 vnic_fix_cksum(mblk_t *mp_chain) 371 { 372 mblk_t *mp, *prev = NULL, *new_chain = mp_chain, *mp1; 373 uint32_t flags, start, stuff, end, value; 374 375 for (mp = mp_chain; mp != NULL; prev = mp, mp = mp->b_next) { 376 uint16_t len; 377 uint32_t offset; 378 struct ether_header *ehp; 379 uint16_t sap; 380 381 hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, 382 &flags); 383 if (flags == 0) 384 continue; 385 386 /* 387 * Since the processing of checksum offload for loopback 388 * traffic requires modification of the packet contents, 389 * ensure sure that we are always modifying our own copy. 390 */ 391 if (DB_REF(mp) > 1) { 392 mp1 = copymsg(mp); 393 if (mp1 == NULL) 394 continue; 395 mp1->b_next = mp->b_next; 396 mp->b_next = NULL; 397 freemsg(mp); 398 if (prev != NULL) 399 prev->b_next = mp1; 400 else 401 new_chain = mp1; 402 mp = mp1; 403 } 404 405 /* 406 * Ethernet, and optionally VLAN header. 407 */ 408 /*LINTED*/ 409 ehp = (struct ether_header *)mp->b_rptr; 410 if (ntohs(ehp->ether_type) == VLAN_TPID) { 411 struct ether_vlan_header *evhp; 412 413 ASSERT(MBLKL(mp) >= 414 sizeof (struct ether_vlan_header)); 415 /*LINTED*/ 416 evhp = (struct ether_vlan_header *)mp->b_rptr; 417 sap = ntohs(evhp->ether_type); 418 offset = sizeof (struct ether_vlan_header); 419 } else { 420 sap = ntohs(ehp->ether_type); 421 offset = sizeof (struct ether_header); 422 } 423 424 if (MBLKL(mp) <= offset) { 425 offset -= MBLKL(mp); 426 if (mp->b_cont == NULL) { 427 /* corrupted packet, skip it */ 428 if (prev != NULL) 429 prev->b_next = mp->b_next; 430 else 431 new_chain = mp->b_next; 432 mp1 = mp->b_next; 433 mp->b_next = NULL; 434 freemsg(mp); 435 mp = mp1; 436 continue; 437 } 438 mp = mp->b_cont; 439 } 440 441 if (flags & (HCK_FULLCKSUM | HCK_IPV4_HDRCKSUM)) { 442 ipha_t *ipha = NULL; 443 444 /* 445 * In order to compute the full and header 446 * checksums, we need to find and parse 447 * the IP and/or ULP headers. 448 */ 449 450 sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap; 451 452 /* 453 * IP header. 454 */ 455 if (sap != ETHERTYPE_IP) 456 continue; 457 458 ASSERT(MBLKL(mp) >= offset + sizeof (ipha_t)); 459 /*LINTED*/ 460 ipha = (ipha_t *)(mp->b_rptr + offset); 461 462 if (flags & HCK_FULLCKSUM) { 463 ipaddr_t src, dst; 464 uint32_t cksum; 465 uint16_t *up; 466 uint8_t proto; 467 468 /* 469 * Pointer to checksum field in ULP header. 470 */ 471 proto = ipha->ipha_protocol; 472 ASSERT(ipha->ipha_version_and_hdr_length == 473 IP_SIMPLE_HDR_VERSION); 474 if (proto == IPPROTO_TCP) { 475 /*LINTED*/ 476 up = IPH_TCPH_CHECKSUMP(ipha, 477 IP_SIMPLE_HDR_LENGTH); 478 } else { 479 ASSERT(proto == IPPROTO_UDP); 480 /*LINTED*/ 481 up = IPH_UDPH_CHECKSUMP(ipha, 482 IP_SIMPLE_HDR_LENGTH); 483 } 484 485 /* 486 * Pseudo-header checksum. 487 */ 488 src = ipha->ipha_src; 489 dst = ipha->ipha_dst; 490 len = ntohs(ipha->ipha_length) - 491 IP_SIMPLE_HDR_LENGTH; 492 493 cksum = (dst >> 16) + (dst & 0xFFFF) + 494 (src >> 16) + (src & 0xFFFF); 495 cksum += htons(len); 496 497 /* 498 * The checksum value stored in the packet needs 499 * to be correct. Compute it here. 500 */ 501 *up = 0; 502 cksum += (((proto) == IPPROTO_UDP) ? 503 IP_UDP_CSUM_COMP : IP_TCP_CSUM_COMP); 504 cksum = IP_CSUM(mp, IP_SIMPLE_HDR_LENGTH + 505 offset, cksum); 506 *(up) = (uint16_t)(cksum ? cksum : ~cksum); 507 508 flags |= HCK_FULLCKSUM_OK; 509 value = 0xffff; 510 } 511 512 if (flags & HCK_IPV4_HDRCKSUM) { 513 ASSERT(ipha != NULL); 514 ipha->ipha_hdr_checksum = 515 (uint16_t)ip_csum_hdr(ipha); 516 } 517 } 518 519 if (flags & HCK_PARTIALCKSUM) { 520 uint16_t *up, partial, cksum; 521 uchar_t *ipp; /* ptr to beginning of IP header */ 522 523 if (mp->b_cont != NULL) { 524 mblk_t *mp1; 525 526 mp1 = msgpullup(mp, offset + end); 527 if (mp1 == NULL) 528 continue; 529 mp1->b_next = mp->b_next; 530 mp->b_next = NULL; 531 freemsg(mp); 532 if (prev != NULL) 533 prev->b_next = mp1; 534 else 535 new_chain = mp1; 536 mp = mp1; 537 } 538 539 ipp = mp->b_rptr + offset; 540 /*LINTED*/ 541 up = (uint16_t *)((uchar_t *)ipp + stuff); 542 partial = *up; 543 *up = 0; 544 545 cksum = IP_BCSUM_PARTIAL(mp->b_rptr + offset + start, 546 end - start, partial); 547 cksum = ~cksum; 548 *up = cksum ? cksum : ~cksum; 549 550 /* 551 * Since we already computed the whole checksum, 552 * indicate to the stack that it has already 553 * been verified by the hardware. 554 */ 555 flags &= ~HCK_PARTIALCKSUM; 556 flags |= (HCK_FULLCKSUM | HCK_FULLCKSUM_OK); 557 value = 0xffff; 558 } 559 560 (void) hcksum_assoc(mp, NULL, NULL, start, stuff, end, 561 value, flags, KM_NOSLEEP); 562 } 563 564 return (new_chain); 565 } 566 567 static void 568 vnic_mac_close(vnic_mac_t *vnic_mac) 569 { 570 mutex_enter(&vnic_mac_lock); 571 VNIC_MAC_REFRELE(vnic_mac); 572 mutex_exit(&vnic_mac_lock); 573 } 574 575 static void 576 vnic_mac_free(vnic_mac_t *vnic_mac) 577 { 578 mod_hash_val_t val; 579 580 ASSERT(MUTEX_HELD(&vnic_mac_lock)); 581 vnic_fini_active_rx(vnic_mac); 582 mac_notify_remove(vnic_mac->va_mh, vnic_mac->va_notify_hdl); 583 if (vnic_mac->va_mac_set) { 584 vnic_mac->va_mac_set = B_FALSE; 585 mac_vnic_clear(vnic_mac->va_mh); 586 } 587 vnic_classifier_flow_tab_fini(vnic_mac); 588 mac_close(vnic_mac->va_mh); 589 590 (void) mod_hash_remove(vnic_mac_hash, 591 (mod_hash_key_t)(uintptr_t)vnic_mac->va_linkid, &val); 592 ASSERT(vnic_mac == (vnic_mac_t *)val); 593 594 kmem_cache_free(vnic_mac_cache, vnic_mac); 595 } 596 597 /* 598 * Initial VNIC receive routine. Invoked for packets that are steered 599 * to a VNIC but the VNIC has not been started yet. 600 */ 601 /* ARGSUSED */ 602 static void 603 vnic_rx_initial(void *arg1, void *arg2, mblk_t *mp_chain) 604 { 605 vnic_t *vnic = arg1; 606 mblk_t *mp; 607 608 /* update stats */ 609 for (mp = mp_chain; mp != NULL; mp = mp->b_next) 610 vnic->vn_stat_ierrors++; 611 freemsgchain(mp_chain); 612 } 613 614 /* 615 * VNIC receive routine invoked after the classifier for the VNIC 616 * has been initialized and the VNIC has been started. 617 */ 618 /* ARGSUSED */ 619 void 620 vnic_rx(void *arg1, void *arg2, mblk_t *mp_chain) 621 { 622 vnic_t *vnic = arg1; 623 mblk_t *mp; 624 625 /* update stats */ 626 for (mp = mp_chain; mp != NULL; mp = mp->b_next) { 627 vnic->vn_stat_ipackets++; 628 vnic->vn_stat_rbytes += msgdsize(mp); 629 } 630 631 /* pass packet up */ 632 mac_rx(vnic->vn_mh, NULL, mp_chain); 633 } 634 635 /* 636 * Routine to create a MAC-based VNIC. Adds the passed MAC address 637 * to an unused slot in the NIC if one is available. Otherwise it 638 * sets the NIC in promiscuous mode and assigns the MAC address to 639 * a Rx ring if available or a soft ring. 640 */ 641 static int 642 vnic_add_unicstaddr(vnic_t *vnic, mac_multi_addr_t *maddr) 643 { 644 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 645 int err; 646 647 if (mac_unicst_verify(vnic_mac->va_mh, maddr->mma_addr, 648 maddr->mma_addrlen) == B_FALSE) 649 return (EINVAL); 650 651 if (mac_vnic_capab_get(vnic_mac->va_mh, MAC_CAPAB_MULTIADDRESS, 652 &(vnic->vn_mma_capab))) { 653 if (vnic->vn_maddr_naddrfree == 0) { 654 /* 655 * No free address slots available. 656 * Enable promiscuous mode. 657 */ 658 goto set_promisc; 659 } 660 661 err = vnic->vn_maddr_add(vnic->vn_maddr_handle, maddr); 662 if (err != 0) { 663 if (err == ENOSPC) { 664 /* 665 * There was a race to add addresses 666 * with other multiple address consumers, 667 * and we lost out. Use promisc mode. 668 */ 669 goto set_promisc; 670 } 671 672 return (err); 673 } 674 675 vnic->vn_slot_id = maddr->mma_slot; 676 vnic->vn_multi_mac = B_TRUE; 677 } else { 678 /* 679 * Either multiple MAC address support is not 680 * available or all available addresses have 681 * been used up. 682 */ 683 set_promisc: 684 if ((err = mac_promisc_set(vnic_mac->va_mh, B_TRUE, 685 MAC_DEVPROMISC)) != 0) { 686 return (err); 687 } 688 689 vnic->vn_promisc_mac = B_TRUE; 690 } 691 return (err); 692 } 693 694 /* 695 * VNIC is getting deleted. Remove the MAC address from the slot. 696 * If promiscuous mode was being used, then unset the promiscuous mode. 697 */ 698 static int 699 vnic_remove_unicstaddr(vnic_t *vnic) 700 { 701 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 702 int err; 703 704 if (vnic->vn_multi_mac) { 705 ASSERT(vnic->vn_promisc_mac == B_FALSE); 706 err = vnic->vn_maddr_remove(vnic->vn_maddr_handle, 707 vnic->vn_slot_id); 708 vnic->vn_multi_mac = B_FALSE; 709 } 710 711 if (vnic->vn_promisc_mac) { 712 ASSERT(vnic->vn_multi_mac == B_FALSE); 713 err = mac_promisc_set(vnic_mac->va_mh, B_FALSE, MAC_DEVPROMISC); 714 vnic->vn_promisc_mac = B_FALSE; 715 } 716 717 return (err); 718 } 719 720 /* 721 * Create a new VNIC upon request from administrator. 722 * Returns 0 on success, an errno on failure. 723 */ 724 int 725 vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid, int mac_len, 726 uchar_t *mac_addr) 727 { 728 vnic_t *vnic = NULL; 729 mac_register_t *mac; 730 int err; 731 vnic_mac_t *vnic_mac; 732 const mac_info_t *lower_mac_info; 733 mac_multi_addr_t maddr; 734 mac_txinfo_t tx_info; 735 736 if (mac_len != ETHERADDRL) { 737 /* currently only ethernet NICs are supported */ 738 return (EINVAL); 739 } 740 741 rw_enter(&vnic_lock, RW_WRITER); 742 743 /* does a VNIC with the same id already exist? */ 744 err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id), 745 (mod_hash_val_t *)&vnic); 746 if (err == 0) { 747 rw_exit(&vnic_lock); 748 return (EEXIST); 749 } 750 751 vnic = kmem_cache_alloc(vnic_cache, KM_NOSLEEP); 752 if (vnic == NULL) { 753 rw_exit(&vnic_lock); 754 return (ENOMEM); 755 } 756 757 /* open underlying MAC */ 758 err = vnic_mac_open(linkid, &vnic_mac); 759 if (err != 0) { 760 kmem_cache_free(vnic_cache, vnic); 761 rw_exit(&vnic_lock); 762 return (err); 763 } 764 765 bzero(vnic, sizeof (*vnic)); 766 vnic->vn_id = vnic_id; 767 vnic->vn_vnic_mac = vnic_mac; 768 769 vnic->vn_started = B_FALSE; 770 vnic->vn_promisc = B_FALSE; 771 vnic->vn_multi_mac = B_FALSE; 772 vnic->vn_bcast_grp = B_FALSE; 773 774 /* set the VNIC MAC address */ 775 maddr.mma_addrlen = mac_len; 776 maddr.mma_slot = 0; 777 maddr.mma_flags = 0; 778 bcopy(mac_addr, maddr.mma_addr, mac_len); 779 if ((err = vnic_add_unicstaddr(vnic, &maddr)) != 0) 780 goto bail; 781 bcopy(mac_addr, vnic->vn_addr, mac_len); 782 783 /* set the initial VNIC capabilities */ 784 if (!mac_vnic_capab_get(vnic_mac->va_mh, MAC_CAPAB_HCKSUM, 785 &vnic->vn_hcksum_txflags)) 786 vnic->vn_hcksum_txflags = 0; 787 788 /* register with the MAC module */ 789 if ((mac = mac_alloc(MAC_VERSION)) == NULL) 790 goto bail; 791 792 mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 793 mac->m_driver = vnic; 794 mac->m_dip = vnic_get_dip(); 795 mac->m_instance = (uint_t)-1; 796 mac->m_src_addr = vnic->vn_addr; 797 mac->m_callbacks = &vnic_m_callbacks; 798 799 lower_mac_info = mac_info(vnic_mac->va_mh); 800 mac->m_min_sdu = lower_mac_info->mi_sdu_min; 801 mac->m_max_sdu = lower_mac_info->mi_sdu_max; 802 803 /* 804 * As the current margin size of the underlying mac is used to 805 * determine the margin size of the VNIC itself, request the 806 * underlying mac not to change to a smaller margin size. 807 */ 808 err = mac_margin_add(vnic_mac->va_mh, &(vnic->vn_margin), B_TRUE); 809 if (err != 0) 810 goto bail; 811 mac->m_margin = vnic->vn_margin; 812 err = mac_register(mac, &vnic->vn_mh); 813 mac_free(mac); 814 if (err != 0) { 815 VERIFY(mac_margin_remove(vnic_mac->va_mh, 816 vnic->vn_margin) == 0); 817 goto bail; 818 } 819 820 if ((err = dls_devnet_create(vnic->vn_mh, vnic->vn_id)) != 0) { 821 VERIFY(mac_margin_remove(vnic_mac->va_mh, 822 vnic->vn_margin) == 0); 823 (void) mac_unregister(vnic->vn_mh); 824 goto bail; 825 } 826 827 /* add new VNIC to hash table */ 828 err = mod_hash_insert(vnic_hash, VNIC_HASH_KEY(vnic_id), 829 (mod_hash_val_t)vnic); 830 ASSERT(err == 0); 831 vnic_count++; 832 833 rw_exit(&vnic_lock); 834 835 /* Create a flow, initialized with the MAC address of the VNIC */ 836 if ((vnic->vn_flow_ent = vnic_classifier_flow_create(mac_len, mac_addr, 837 NULL, B_FALSE, KM_SLEEP)) == NULL) { 838 (void) vnic_dev_delete(vnic_id); 839 vnic = NULL; 840 err = ENOMEM; 841 goto bail_unlocked; 842 } 843 844 vnic_classifier_flow_add(vnic_mac, vnic->vn_flow_ent, vnic_rx_initial, 845 vnic, vnic); 846 847 /* setup VNIC to receive broadcast packets */ 848 err = vnic_bcast_add(vnic, vnic_brdcst_mac, MAC_ADDRTYPE_BROADCAST); 849 if (err != 0) { 850 (void) vnic_dev_delete(vnic_id); 851 vnic = NULL; 852 goto bail_unlocked; 853 } 854 vnic->vn_bcast_grp = B_TRUE; 855 856 mutex_enter(&vnic_mac_lock); 857 if (!vnic_mac->va_mac_set) { 858 /* 859 * We want to MAC layer to call the VNIC tx outbound 860 * routine, so that local broadcast packets sent by 861 * the active interface sharing the underlying NIC (if 862 * any), can be broadcast to every VNIC. 863 */ 864 tx_info.mt_fn = vnic_active_tx; 865 tx_info.mt_arg = vnic_mac; 866 if (!mac_vnic_set(vnic_mac->va_mh, &tx_info, 867 vnic_m_capab_get, vnic)) { 868 mutex_exit(&vnic_mac_lock); 869 (void) vnic_dev_delete(vnic_id); 870 vnic = NULL; 871 err = EBUSY; 872 goto bail_unlocked; 873 } 874 vnic_mac->va_mac_set = B_TRUE; 875 } 876 mutex_exit(&vnic_mac_lock); 877 878 /* allow passing packets to NIC's active MAC client */ 879 if (!vnic_init_active_rx(vnic_mac)) { 880 (void) vnic_dev_delete(vnic_id); 881 vnic = NULL; 882 err = ENOMEM; 883 goto bail_unlocked; 884 } 885 886 return (0); 887 888 bail: 889 (void) vnic_remove_unicstaddr(vnic); 890 vnic_mac_close(vnic_mac); 891 rw_exit(&vnic_lock); 892 893 bail_unlocked: 894 if (vnic != NULL) { 895 kmem_cache_free(vnic_cache, vnic); 896 } 897 898 return (err); 899 } 900 901 /* 902 * Modify the properties of an existing VNIC. 903 */ 904 /* ARGSUSED */ 905 int 906 vnic_dev_modify(datalink_id_t vnic_id, uint_t modify_mask, 907 vnic_mac_addr_type_t mac_addr_type, uint_t mac_len, uchar_t *mac_addr) 908 { 909 vnic_t *vnic = NULL; 910 int rv = 0; 911 boolean_t notify_mac_addr = B_FALSE; 912 913 rw_enter(&vnic_lock, RW_WRITER); 914 915 if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id), 916 (mod_hash_val_t *)&vnic) != 0) { 917 rw_exit(&vnic_lock); 918 return (ENOENT); 919 } 920 921 if (modify_mask & VNIC_IOC_MODIFY_ADDR) { 922 rv = vnic_modify_mac_addr(vnic, mac_len, mac_addr); 923 if (rv == 0) 924 notify_mac_addr = B_TRUE; 925 } 926 927 rw_exit(&vnic_lock); 928 929 if (notify_mac_addr) 930 mac_unicst_update(vnic->vn_mh, mac_addr); 931 932 return (rv); 933 } 934 935 int 936 vnic_dev_delete(datalink_id_t vnic_id) 937 { 938 vnic_t *vnic = NULL; 939 mod_hash_val_t val; 940 vnic_flow_t *flent; 941 datalink_id_t tmpid; 942 int rc; 943 vnic_mac_t *vnic_mac; 944 945 rw_enter(&vnic_lock, RW_WRITER); 946 947 if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id), 948 (mod_hash_val_t *)&vnic) != 0) { 949 rw_exit(&vnic_lock); 950 return (ENOENT); 951 } 952 953 if ((rc = dls_devnet_destroy(vnic->vn_mh, &tmpid)) != 0) { 954 rw_exit(&vnic_lock); 955 return (rc); 956 } 957 958 ASSERT(vnic_id == tmpid); 959 960 /* 961 * We cannot unregister the MAC yet. Unregistering would 962 * free up mac_impl_t which should not happen at this time. 963 * Packets could be entering vnic_rx() through the 964 * flow entry and so mac_impl_t cannot be NULL. So disable 965 * mac_impl_t by calling mac_disable(). This will prevent any 966 * new claims on mac_impl_t. 967 */ 968 if (mac_disable(vnic->vn_mh) != 0) { 969 (void) dls_devnet_create(vnic->vn_mh, vnic_id); 970 rw_exit(&vnic_lock); 971 return (EBUSY); 972 } 973 974 (void) mod_hash_remove(vnic_hash, VNIC_HASH_KEY(vnic_id), &val); 975 ASSERT(vnic == (vnic_t *)val); 976 977 if (vnic->vn_bcast_grp) 978 (void) vnic_bcast_delete(vnic, vnic_brdcst_mac); 979 980 flent = vnic->vn_flow_ent; 981 if (flent != NULL) { 982 /* 983 * vnic_classifier_flow_destroy() ensures that the 984 * flow is no longer used. 985 */ 986 vnic_classifier_flow_remove(vnic->vn_vnic_mac, flent); 987 vnic_classifier_flow_destroy(flent); 988 } 989 990 rc = mac_margin_remove(vnic->vn_vnic_mac->va_mh, vnic->vn_margin); 991 ASSERT(rc == 0); 992 rc = mac_unregister(vnic->vn_mh); 993 ASSERT(rc == 0); 994 (void) vnic_remove_unicstaddr(vnic); 995 vnic_mac = vnic->vn_vnic_mac; 996 kmem_cache_free(vnic_cache, vnic); 997 vnic_count--; 998 rw_exit(&vnic_lock); 999 vnic_mac_close(vnic_mac); 1000 return (0); 1001 } 1002 1003 /* 1004 * For the specified packet chain, return a sub-chain to be sent 1005 * and the transmit function to be used to send the packet. Also 1006 * return a pointer to the sub-chain of packets that should 1007 * be re-classified. If the function returns NULL, the packet 1008 * should be sent using the underlying NIC. 1009 */ 1010 static vnic_flow_t * 1011 vnic_classify(vnic_mac_t *vnic_mac, mblk_t *mp, mblk_t **mp_chain_rest) 1012 { 1013 vnic_flow_t *flow_ent; 1014 1015 /* one packet at a time */ 1016 *mp_chain_rest = mp->b_next; 1017 mp->b_next = NULL; 1018 1019 /* do classification on the packet */ 1020 flow_ent = vnic_classifier_get_flow(vnic_mac, mp); 1021 1022 return (flow_ent); 1023 } 1024 1025 /* 1026 * Send a packet chain to a local VNIC or an active MAC client. 1027 */ 1028 static void 1029 vnic_local_tx(vnic_mac_t *vnic_mac, vnic_flow_t *flow_ent, mblk_t *mp_chain) 1030 { 1031 mblk_t *mp1; 1032 const vnic_flow_fn_info_t *fn_info; 1033 vnic_t *vnic; 1034 1035 if (!vnic_classifier_is_active(flow_ent) && 1036 mac_promisc_get(vnic_mac->va_mh, MAC_PROMISC)) { 1037 /* 1038 * If the MAC is in promiscous mode, 1039 * send a copy of the active client. 1040 */ 1041 if ((mp1 = vnic_copymsgchain_cksum(mp_chain)) == NULL) 1042 goto sendit; 1043 if ((mp1 = vnic_fix_cksum(mp1)) == NULL) 1044 goto sendit; 1045 mac_active_rx(vnic_mac->va_mh, NULL, mp1); 1046 } 1047 sendit: 1048 fn_info = vnic_classifier_get_fn_info(flow_ent); 1049 /* 1050 * If the vnic to which we would deliver this packet is in 1051 * promiscuous mode then it already received the packet via 1052 * vnic_promisc_rx(). 1053 * 1054 * XXX assumes that ff_arg2 is a vnic_t pointer if it is 1055 * non-NULL (currently always true). 1056 */ 1057 vnic = (vnic_t *)fn_info->ff_arg2; 1058 if ((vnic != NULL) && vnic->vn_promisc) 1059 freemsg(mp_chain); 1060 else if ((mp1 = vnic_fix_cksum(mp_chain)) != NULL) 1061 (fn_info->ff_fn)(fn_info->ff_arg1, fn_info->ff_arg2, mp1); 1062 } 1063 1064 /* 1065 * This function is invoked when a MAC client needs to send a packet 1066 * to a NIC which is shared by VNICs. It is passed to the MAC layer 1067 * by a call to mac_vnic_set() when the NIC is opened, and is returned 1068 * to MAC clients by mac_tx_get() when VNICs are present. 1069 */ 1070 mblk_t * 1071 vnic_active_tx(void *arg, mblk_t *mp_chain) 1072 { 1073 vnic_mac_t *vnic_mac = arg; 1074 mblk_t *mp, *extra_mp = NULL; 1075 vnic_flow_t *flow_ent; 1076 void *flow_cookie; 1077 const mac_txinfo_t *mtp = vnic_mac->va_txinfo; 1078 1079 for (mp = mp_chain; mp != NULL; mp = extra_mp) { 1080 mblk_t *next; 1081 1082 next = mp->b_next; 1083 mp->b_next = NULL; 1084 1085 vnic_promisc_rx(vnic_mac, (vnic_t *)-1, mp); 1086 1087 flow_ent = vnic_classify(vnic_mac, mp, &extra_mp); 1088 ASSERT(extra_mp == NULL); 1089 extra_mp = next; 1090 1091 if (flow_ent != NULL) { 1092 flow_cookie = vnic_classifier_get_client_cookie( 1093 flow_ent); 1094 if (flow_cookie != NULL) { 1095 /* 1096 * Send a copy to every VNIC defined on the 1097 * interface, as well as the underlying MAC. 1098 */ 1099 vnic_bcast_send(flow_cookie, (vnic_t *)-1, mp); 1100 } else { 1101 /* 1102 * loopback the packet to a local VNIC or 1103 * an active MAC client. 1104 */ 1105 vnic_local_tx(vnic_mac, flow_ent, mp); 1106 } 1107 VNIC_FLOW_REFRELE(flow_ent); 1108 mp_chain = NULL; 1109 } else { 1110 /* 1111 * Non-VNIC destination, send via the underlying 1112 * NIC. In order to avoid a recursive call 1113 * to this function, we ensured that mtp points 1114 * to the unerlying NIC transmit function 1115 * by inilizating through mac_vnic_tx_get(). 1116 */ 1117 mp_chain = mtp->mt_fn(mtp->mt_arg, mp); 1118 if (mp_chain != NULL) 1119 break; 1120 } 1121 } 1122 1123 if ((mp_chain != NULL) && (extra_mp != NULL)) { 1124 ASSERT(mp_chain->b_next == NULL); 1125 mp_chain->b_next = extra_mp; 1126 } 1127 return (mp_chain); 1128 } 1129 1130 /* 1131 * VNIC transmit function. 1132 */ 1133 mblk_t * 1134 vnic_m_tx(void *arg, mblk_t *mp_chain) 1135 { 1136 vnic_t *vnic = arg; 1137 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 1138 mblk_t *mp, *extra_mp = NULL; 1139 vnic_flow_t *flow_ent; 1140 void *flow_cookie; 1141 1142 /* 1143 * Update stats. 1144 */ 1145 for (mp = mp_chain; mp != NULL; mp = mp->b_next) { 1146 vnic->vn_stat_opackets++; 1147 vnic->vn_stat_obytes += msgdsize(mp); 1148 } 1149 1150 for (mp = mp_chain; mp != NULL; mp = extra_mp) { 1151 mblk_t *next; 1152 1153 next = mp->b_next; 1154 mp->b_next = NULL; 1155 1156 vnic_promisc_rx(vnic->vn_vnic_mac, vnic, mp); 1157 1158 flow_ent = vnic_classify(vnic->vn_vnic_mac, mp, &extra_mp); 1159 ASSERT(extra_mp == NULL); 1160 extra_mp = next; 1161 1162 if (flow_ent != NULL) { 1163 flow_cookie = vnic_classifier_get_client_cookie( 1164 flow_ent); 1165 if (flow_cookie != NULL) { 1166 /* 1167 * The vnic_bcast_send function expects 1168 * to receive the sender VNIC as value 1169 * for arg2. 1170 */ 1171 vnic_bcast_send(flow_cookie, vnic, mp); 1172 } else { 1173 /* 1174 * loopback the packet to a local VNIC or 1175 * an active MAC client. 1176 */ 1177 vnic_local_tx(vnic_mac, flow_ent, mp); 1178 } 1179 VNIC_FLOW_REFRELE(flow_ent); 1180 mp_chain = NULL; 1181 } else { 1182 /* 1183 * Non-local destination, send via the underlying 1184 * NIC. 1185 */ 1186 const mac_txinfo_t *mtp = vnic->vn_txinfo; 1187 mp_chain = mtp->mt_fn(mtp->mt_arg, mp); 1188 if (mp_chain != NULL) 1189 break; 1190 } 1191 } 1192 1193 /* update stats to account for unsent packets */ 1194 for (mp = mp_chain; mp != NULL; mp = mp->b_next) { 1195 vnic->vn_stat_opackets--; 1196 vnic->vn_stat_obytes -= msgdsize(mp); 1197 vnic->vn_stat_oerrors++; 1198 /* 1199 * link back in the last portion not counted due to bandwidth 1200 * control. 1201 */ 1202 if (mp->b_next == NULL) { 1203 mp->b_next = extra_mp; 1204 break; 1205 } 1206 } 1207 1208 return (mp_chain); 1209 } 1210 1211 /* ARGSUSED */ 1212 static void 1213 vnic_m_resources(void *arg) 1214 { 1215 /* no resources to advertise */ 1216 } 1217 1218 static int 1219 vnic_m_stat(void *arg, uint_t stat, uint64_t *val) 1220 { 1221 vnic_t *vnic = arg; 1222 int rval = 0; 1223 1224 rw_enter(&vnic_lock, RW_READER); 1225 1226 switch (stat) { 1227 case ETHER_STAT_LINK_DUPLEX: 1228 *val = mac_stat_get(vnic->vn_vnic_mac->va_mh, 1229 ETHER_STAT_LINK_DUPLEX); 1230 break; 1231 case MAC_STAT_IFSPEED: 1232 *val = mac_stat_get(vnic->vn_vnic_mac->va_mh, 1233 MAC_STAT_IFSPEED); 1234 break; 1235 case MAC_STAT_MULTIRCV: 1236 *val = vnic->vn_stat_multircv; 1237 break; 1238 case MAC_STAT_BRDCSTRCV: 1239 *val = vnic->vn_stat_brdcstrcv; 1240 break; 1241 case MAC_STAT_MULTIXMT: 1242 *val = vnic->vn_stat_multixmt; 1243 break; 1244 case MAC_STAT_BRDCSTXMT: 1245 *val = vnic->vn_stat_brdcstxmt; 1246 break; 1247 case MAC_STAT_IERRORS: 1248 *val = vnic->vn_stat_ierrors; 1249 break; 1250 case MAC_STAT_OERRORS: 1251 *val = vnic->vn_stat_oerrors; 1252 break; 1253 case MAC_STAT_RBYTES: 1254 *val = vnic->vn_stat_rbytes; 1255 break; 1256 case MAC_STAT_IPACKETS: 1257 *val = vnic->vn_stat_ipackets; 1258 break; 1259 case MAC_STAT_OBYTES: 1260 *val = vnic->vn_stat_obytes; 1261 break; 1262 case MAC_STAT_OPACKETS: 1263 *val = vnic->vn_stat_opackets; 1264 break; 1265 default: 1266 rval = ENOTSUP; 1267 } 1268 1269 rw_exit(&vnic_lock); 1270 return (rval); 1271 } 1272 1273 /* 1274 * Return information about the specified capability. 1275 */ 1276 /* ARGSUSED */ 1277 static boolean_t 1278 vnic_m_capab_get(void *arg, mac_capab_t cap, void *cap_data) 1279 { 1280 vnic_t *vnic = arg; 1281 1282 switch (cap) { 1283 case MAC_CAPAB_POLL: 1284 return (B_TRUE); 1285 case MAC_CAPAB_HCKSUM: { 1286 uint32_t *hcksum_txflags = cap_data; 1287 1288 *hcksum_txflags = vnic->vn_hcksum_txflags & 1289 (HCKSUM_INET_FULL_V4 | HCKSUM_IPHDRCKSUM | 1290 HCKSUM_INET_PARTIAL); 1291 break; 1292 } 1293 default: 1294 return (B_FALSE); 1295 } 1296 return (B_TRUE); 1297 } 1298 1299 static int 1300 vnic_m_start(void *arg) 1301 { 1302 vnic_t *vnic = arg; 1303 mac_handle_t lower_mh = vnic->vn_vnic_mac->va_mh; 1304 int rc; 1305 1306 rc = mac_start(lower_mh); 1307 if (rc != 0) 1308 return (rc); 1309 1310 vnic_classifier_flow_update_fn(vnic->vn_flow_ent, vnic_rx, vnic, vnic); 1311 return (0); 1312 } 1313 1314 static void 1315 vnic_m_stop(void *arg) 1316 { 1317 vnic_t *vnic = arg; 1318 mac_handle_t lower_mh = vnic->vn_vnic_mac->va_mh; 1319 1320 vnic_classifier_flow_update_fn(vnic->vn_flow_ent, vnic_rx_initial, 1321 vnic, vnic); 1322 mac_stop(lower_mh); 1323 } 1324 1325 /* ARGSUSED */ 1326 static int 1327 vnic_m_promisc(void *arg, boolean_t on) 1328 { 1329 vnic_t *vnic = arg; 1330 1331 return (vnic_promisc_set(vnic, on)); 1332 } 1333 1334 static int 1335 vnic_m_multicst(void *arg, boolean_t add, const uint8_t *addrp) 1336 { 1337 vnic_t *vnic = arg; 1338 int rc = 0; 1339 1340 if (add) 1341 rc = vnic_bcast_add(vnic, addrp, MAC_ADDRTYPE_MULTICAST); 1342 else 1343 vnic_bcast_delete(vnic, addrp); 1344 1345 return (rc); 1346 } 1347 1348 static int 1349 vnic_m_unicst(void *arg, const uint8_t *mac_addr) 1350 { 1351 vnic_t *vnic = arg; 1352 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 1353 int rv; 1354 1355 rw_enter(&vnic_lock, RW_WRITER); 1356 rv = vnic_modify_mac_addr(vnic, vnic_mac->va_addr_len, 1357 (uchar_t *)mac_addr); 1358 rw_exit(&vnic_lock); 1359 1360 if (rv == 0) 1361 mac_unicst_update(vnic->vn_mh, mac_addr); 1362 return (0); 1363 } 1364 1365 int 1366 vnic_info(uint_t *nvnics, datalink_id_t vnic_id, datalink_id_t linkid, 1367 void *fn_arg, vnic_info_new_vnic_fn_t new_vnic_fn) 1368 { 1369 vnic_info_state_t state; 1370 int rc = 0; 1371 1372 rw_enter(&vnic_lock, RW_READER); 1373 1374 *nvnics = vnic_count; 1375 1376 bzero(&state, sizeof (state)); 1377 state.vs_vnic_id = vnic_id; 1378 state.vs_linkid = linkid; 1379 state.vs_new_vnic_fn = new_vnic_fn; 1380 state.vs_fn_arg = fn_arg; 1381 1382 mod_hash_walk(vnic_hash, vnic_info_walker, &state); 1383 1384 if ((rc = state.vs_rc) == 0 && vnic_id != DATALINK_ALL_LINKID && 1385 !state.vs_vnic_found) 1386 rc = ENOENT; 1387 1388 rw_exit(&vnic_lock); 1389 return (rc); 1390 } 1391 1392 /* 1393 * Walker invoked when building a list of vnics that must be passed 1394 * up to user space. 1395 */ 1396 /*ARGSUSED*/ 1397 static uint_t 1398 vnic_info_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 1399 { 1400 vnic_t *vnic; 1401 vnic_info_state_t *state = arg; 1402 1403 if (state->vs_rc != 0) 1404 return (MH_WALK_TERMINATE); /* terminate walk */ 1405 1406 vnic = (vnic_t *)val; 1407 1408 if (state->vs_vnic_id != DATALINK_ALL_LINKID && 1409 vnic->vn_id != state->vs_vnic_id) { 1410 goto bail; 1411 } 1412 1413 state->vs_vnic_found = B_TRUE; 1414 1415 state->vs_rc = state->vs_new_vnic_fn(state->vs_fn_arg, 1416 vnic->vn_id, vnic->vn_addr_type, vnic->vn_vnic_mac->va_addr_len, 1417 vnic->vn_addr, vnic->vn_vnic_mac->va_linkid); 1418 bail: 1419 return ((state->vs_rc == 0) ? MH_WALK_CONTINUE : MH_WALK_TERMINATE); 1420 } 1421 1422 /* 1423 * vnic_notify_cb() and vnic_notify_walker() below are used to 1424 * process events received from an underlying NIC and, if needed, 1425 * forward these events to the VNICs defined on top of that NIC. 1426 */ 1427 1428 typedef struct vnic_notify_state { 1429 mac_notify_type_t vo_type; 1430 vnic_mac_t *vo_vnic_mac; 1431 } vnic_notify_state_t; 1432 1433 /* ARGSUSED */ 1434 static uint_t 1435 vnic_notify_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 1436 { 1437 vnic_t *vnic = (vnic_t *)val; 1438 vnic_notify_state_t *state = arg; 1439 1440 /* ignore VNICs that don't use the specified underlying MAC */ 1441 if (vnic->vn_vnic_mac != state->vo_vnic_mac) 1442 return (MH_WALK_CONTINUE); 1443 1444 switch (state->vo_type) { 1445 case MAC_NOTE_TX: 1446 mac_tx_update(vnic->vn_mh); 1447 break; 1448 case MAC_NOTE_LINK: 1449 /* 1450 * The VNIC link state must be up regardless of 1451 * the link state of the underlying NIC to maintain 1452 * connectivity between VNICs on the same host. 1453 */ 1454 mac_link_update(vnic->vn_mh, LINK_STATE_UP); 1455 break; 1456 case MAC_NOTE_UNICST: 1457 vnic_update_active_rx(vnic->vn_vnic_mac); 1458 break; 1459 case MAC_NOTE_VNIC: 1460 /* only for clients which share a NIC with a VNIC */ 1461 break; 1462 case MAC_NOTE_PROMISC: 1463 mutex_enter(&vnic_mac_lock); 1464 vnic->vn_vnic_mac->va_txinfo = mac_vnic_tx_get( 1465 vnic->vn_vnic_mac->va_mh); 1466 mutex_exit(&vnic_mac_lock); 1467 break; 1468 } 1469 1470 return (MH_WALK_CONTINUE); 1471 } 1472 1473 static void 1474 vnic_notify_cb(void *arg, mac_notify_type_t type) 1475 { 1476 vnic_mac_t *vnic = arg; 1477 vnic_notify_state_t state; 1478 1479 state.vo_type = type; 1480 state.vo_vnic_mac = vnic; 1481 1482 rw_enter(&vnic_lock, RW_READER); 1483 mod_hash_walk(vnic_hash, vnic_notify_walker, &state); 1484 rw_exit(&vnic_lock); 1485 } 1486 1487 static int 1488 vnic_modify_mac_addr(vnic_t *vnic, uint_t mac_len, uchar_t *mac_addr) 1489 { 1490 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 1491 vnic_flow_t *vnic_flow = vnic->vn_flow_ent; 1492 1493 ASSERT(RW_WRITE_HELD(&vnic_lock)); 1494 1495 if (mac_len != vnic_mac->va_addr_len) 1496 return (EINVAL); 1497 1498 vnic_classifier_flow_update_addr(vnic_flow, mac_addr); 1499 return (0); 1500 } 1501 1502 static int 1503 vnic_promisc_set(vnic_t *vnic, boolean_t on) 1504 { 1505 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 1506 int r = -1; 1507 1508 if (vnic->vn_promisc == on) 1509 return (0); 1510 1511 if (on) { 1512 if ((r = mac_promisc_set(vnic_mac->va_mh, B_TRUE, 1513 MAC_DEVPROMISC)) != 0) { 1514 return (r); 1515 } 1516 1517 rw_enter(&vnic_mac->va_promisc_lock, RW_WRITER); 1518 vnic->vn_promisc_next = vnic_mac->va_promisc; 1519 vnic_mac->va_promisc = vnic; 1520 vnic_mac->va_promisc_gen++; 1521 1522 vnic->vn_promisc = B_TRUE; 1523 rw_exit(&vnic_mac->va_promisc_lock); 1524 1525 return (0); 1526 } else { 1527 vnic_t *loop, *prev = NULL; 1528 1529 rw_enter(&vnic_mac->va_promisc_lock, RW_WRITER); 1530 loop = vnic_mac->va_promisc; 1531 1532 while ((loop != NULL) && (loop != vnic)) { 1533 prev = loop; 1534 loop = loop->vn_promisc_next; 1535 } 1536 1537 if ((loop != NULL) && 1538 ((r = mac_promisc_set(vnic_mac->va_mh, B_FALSE, 1539 MAC_DEVPROMISC)) == 0)) { 1540 if (prev != NULL) 1541 prev->vn_promisc_next = loop->vn_promisc_next; 1542 else 1543 vnic_mac->va_promisc = loop->vn_promisc_next; 1544 vnic_mac->va_promisc_gen++; 1545 1546 vnic->vn_promisc = B_FALSE; 1547 } 1548 rw_exit(&vnic_mac->va_promisc_lock); 1549 1550 return (r); 1551 } 1552 } 1553 1554 void 1555 vnic_promisc_rx(vnic_mac_t *vnic_mac, vnic_t *sender, mblk_t *mp) 1556 { 1557 vnic_t *loop; 1558 vnic_flow_t *flow; 1559 const vnic_flow_fn_info_t *fn_info; 1560 mac_header_info_t hdr_info; 1561 boolean_t dst_must_match = B_TRUE; 1562 1563 ASSERT(mp->b_next == NULL); 1564 1565 rw_enter(&vnic_mac->va_promisc_lock, RW_READER); 1566 if (vnic_mac->va_promisc == NULL) 1567 goto done; 1568 1569 if (mac_header_info(vnic_mac->va_mh, mp, &hdr_info) != 0) 1570 goto done; 1571 1572 /* 1573 * If this is broadcast or multicast then the destination 1574 * address need not match for us to deliver it. 1575 */ 1576 if ((hdr_info.mhi_dsttype == MAC_ADDRTYPE_BROADCAST) || 1577 (hdr_info.mhi_dsttype == MAC_ADDRTYPE_MULTICAST)) 1578 dst_must_match = B_FALSE; 1579 1580 for (loop = vnic_mac->va_promisc; 1581 loop != NULL; 1582 loop = loop->vn_promisc_next) { 1583 if (loop == sender) 1584 continue; 1585 1586 if (dst_must_match && 1587 (bcmp(hdr_info.mhi_daddr, loop->vn_addr, 1588 sizeof (loop->vn_addr)) != 0)) 1589 continue; 1590 1591 flow = loop->vn_flow_ent; 1592 ASSERT(flow != NULL); 1593 1594 if (!flow->vf_is_active) { 1595 mblk_t *copy; 1596 uint64_t gen; 1597 1598 if ((copy = vnic_copymsg_cksum(mp)) == NULL) 1599 break; 1600 if ((sender != NULL) && 1601 ((copy = vnic_fix_cksum(copy)) == NULL)) 1602 break; 1603 1604 VNIC_FLOW_REFHOLD(flow); 1605 gen = vnic_mac->va_promisc_gen; 1606 rw_exit(&vnic_mac->va_promisc_lock); 1607 1608 fn_info = vnic_classifier_get_fn_info(flow); 1609 (fn_info->ff_fn)(fn_info->ff_arg1, 1610 fn_info->ff_arg2, copy); 1611 1612 VNIC_FLOW_REFRELE(flow); 1613 rw_enter(&vnic_mac->va_promisc_lock, RW_READER); 1614 if (vnic_mac->va_promisc_gen != gen) 1615 break; 1616 } 1617 } 1618 done: 1619 rw_exit(&vnic_mac->va_promisc_lock); 1620 } 1621