1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/sysmacros.h> 30 #include <sys/conf.h> 31 #include <sys/cmn_err.h> 32 #include <sys/list.h> 33 #include <sys/ksynch.h> 34 #include <sys/kmem.h> 35 #include <sys/stream.h> 36 #include <sys/modctl.h> 37 #include <sys/ddi.h> 38 #include <sys/sunddi.h> 39 #include <sys/atomic.h> 40 #include <sys/stat.h> 41 #include <sys/modhash.h> 42 #include <sys/strsubr.h> 43 #include <sys/strsun.h> 44 #include <sys/dlpi.h> 45 #include <sys/mac.h> 46 #include <sys/mac_ether.h> 47 #include <sys/dls.h> 48 #include <sys/pattr.h> 49 #include <sys/vnic.h> 50 #include <sys/vnic_impl.h> 51 #include <sys/gld.h> 52 #include <inet/ip.h> 53 #include <inet/ip_impl.h> 54 55 static int vnic_m_start(void *); 56 static void vnic_m_stop(void *); 57 static int vnic_m_promisc(void *, boolean_t); 58 static int vnic_m_multicst(void *, boolean_t, const uint8_t *); 59 static int vnic_m_unicst(void *, const uint8_t *); 60 static int vnic_m_stat(void *, uint_t, uint64_t *); 61 static void vnic_m_resources(void *); 62 static mblk_t *vnic_m_tx(void *, mblk_t *); 63 static boolean_t vnic_m_capab_get(void *, mac_capab_t, void *); 64 static void vnic_mac_free(vnic_mac_t *); 65 static uint_t vnic_info_walker(mod_hash_key_t, mod_hash_val_t *, void *); 66 static void vnic_notify_cb(void *, mac_notify_type_t); 67 static int vnic_modify_mac_addr(vnic_t *, uint_t, uchar_t *); 68 static mblk_t *vnic_active_tx(void *, mblk_t *); 69 static int vnic_promisc_set(vnic_t *, boolean_t); 70 71 static kmem_cache_t *vnic_cache; 72 static kmem_cache_t *vnic_mac_cache; 73 static krwlock_t vnic_lock; 74 static kmutex_t vnic_mac_lock; 75 static uint_t vnic_count; 76 77 /* hash of VNICs (vnic_t's), keyed by VNIC id */ 78 static mod_hash_t *vnic_hash; 79 #define VNIC_HASHSZ 64 80 #define VNIC_HASH_KEY(vnic_id) ((mod_hash_key_t)(uintptr_t)vnic_id) 81 82 /* 83 * Hash of underlying open MACs (vnic_mac_t's), keyed by the string 84 * "<device name><instance number>/<port number>". 85 */ 86 static mod_hash_t *vnic_mac_hash; 87 #define VNIC_MAC_HASHSZ 64 88 89 #define VNIC_MAC_REFHOLD(va) { \ 90 ASSERT(MUTEX_HELD(&vnic_mac_lock)); \ 91 (va)->va_refs++; \ 92 ASSERT((va)->va_refs != 0); \ 93 } 94 95 #define VNIC_MAC_REFRELE(va) { \ 96 ASSERT(MUTEX_HELD(&vnic_mac_lock)); \ 97 ASSERT((va)->va_refs != 0); \ 98 if (--((va)->va_refs) == 0) \ 99 vnic_mac_free(va); \ 100 } 101 102 static uchar_t vnic_brdcst_mac[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 103 104 /* used by vnic_walker */ 105 typedef struct vnic_info_state { 106 datalink_id_t vs_vnic_id; 107 datalink_id_t vs_linkid; 108 boolean_t vs_vnic_found; 109 vnic_info_new_vnic_fn_t vs_new_vnic_fn; 110 void *vs_fn_arg; 111 int vs_rc; 112 } vnic_info_state_t; 113 114 #define VNIC_M_CALLBACK_FLAGS (MC_RESOURCES | MC_GETCAPAB) 115 116 static mac_callbacks_t vnic_m_callbacks = { 117 VNIC_M_CALLBACK_FLAGS, 118 vnic_m_stat, 119 vnic_m_start, 120 vnic_m_stop, 121 vnic_m_promisc, 122 vnic_m_multicst, 123 vnic_m_unicst, 124 vnic_m_tx, 125 vnic_m_resources, 126 NULL, /* m_ioctl */ 127 vnic_m_capab_get 128 }; 129 130 /* ARGSUSED */ 131 static int 132 vnic_mac_ctor(void *buf, void *arg, int kmflag) 133 { 134 vnic_mac_t *vnic_mac = buf; 135 136 bzero(vnic_mac, sizeof (vnic_mac_t)); 137 rw_init(&vnic_mac->va_bcast_grp_lock, NULL, RW_DRIVER, NULL); 138 rw_init(&vnic_mac->va_promisc_lock, NULL, RW_DRIVER, NULL); 139 140 return (0); 141 } 142 143 /* ARGSUSED */ 144 static void 145 vnic_mac_dtor(void *buf, void *arg) 146 { 147 vnic_mac_t *vnic_mac = buf; 148 149 rw_destroy(&vnic_mac->va_promisc_lock); 150 rw_destroy(&vnic_mac->va_bcast_grp_lock); 151 } 152 153 void 154 vnic_dev_init(void) 155 { 156 vnic_cache = kmem_cache_create("vnic_cache", 157 sizeof (vnic_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 158 159 vnic_mac_cache = kmem_cache_create("vnic_mac_cache", 160 sizeof (vnic_mac_t), 0, vnic_mac_ctor, vnic_mac_dtor, 161 NULL, NULL, NULL, 0); 162 163 vnic_hash = mod_hash_create_idhash("vnic_hash", 164 VNIC_HASHSZ, mod_hash_null_valdtor); 165 166 vnic_mac_hash = mod_hash_create_idhash("vnic_mac_hash", 167 VNIC_MAC_HASHSZ, mod_hash_null_valdtor); 168 169 rw_init(&vnic_lock, NULL, RW_DEFAULT, NULL); 170 171 mutex_init(&vnic_mac_lock, NULL, MUTEX_DEFAULT, NULL); 172 173 vnic_count = 0; 174 } 175 176 void 177 vnic_dev_fini(void) 178 { 179 ASSERT(vnic_count == 0); 180 181 mutex_destroy(&vnic_mac_lock); 182 rw_destroy(&vnic_lock); 183 mod_hash_destroy_idhash(vnic_mac_hash); 184 mod_hash_destroy_idhash(vnic_hash); 185 kmem_cache_destroy(vnic_mac_cache); 186 kmem_cache_destroy(vnic_cache); 187 } 188 189 uint_t 190 vnic_dev_count(void) 191 { 192 return (vnic_count); 193 } 194 195 static int 196 vnic_mac_open(datalink_id_t linkid, vnic_mac_t **vmp) 197 { 198 int err; 199 vnic_mac_t *vnic_mac = NULL; 200 const mac_info_t *mip; 201 202 *vmp = NULL; 203 204 mutex_enter(&vnic_mac_lock); 205 206 err = mod_hash_find(vnic_mac_hash, (mod_hash_key_t)(uintptr_t)linkid, 207 (mod_hash_val_t *)&vnic_mac); 208 if (err == 0) { 209 /* this MAC is already opened, increment reference count */ 210 VNIC_MAC_REFHOLD(vnic_mac); 211 mutex_exit(&vnic_mac_lock); 212 *vmp = vnic_mac; 213 return (0); 214 } 215 216 vnic_mac = kmem_cache_alloc(vnic_mac_cache, KM_SLEEP); 217 if ((err = mac_open_by_linkid(linkid, &vnic_mac->va_mh)) != 0) { 218 vnic_mac->va_mh = NULL; 219 goto bail; 220 } 221 222 /* 223 * For now, we do not support VNICs over legacy drivers. This will 224 * soon be changed. 225 */ 226 if (mac_is_legacy(vnic_mac->va_mh)) { 227 err = ENOTSUP; 228 goto bail; 229 } 230 231 /* only ethernet support, for now */ 232 mip = mac_info(vnic_mac->va_mh); 233 if (mip->mi_media != DL_ETHER) { 234 err = ENOTSUP; 235 goto bail; 236 } 237 if (mip->mi_media != mip->mi_nativemedia) { 238 err = ENOTSUP; 239 goto bail; 240 } 241 242 vnic_mac->va_linkid = linkid; 243 244 /* add entry to hash table */ 245 err = mod_hash_insert(vnic_mac_hash, (mod_hash_key_t)(uintptr_t)linkid, 246 (mod_hash_val_t)vnic_mac); 247 ASSERT(err == 0); 248 249 /* initialize the flow table associated with lower MAC */ 250 vnic_mac->va_addr_len = ETHERADDRL; 251 (void) vnic_classifier_flow_tab_init(vnic_mac, vnic_mac->va_addr_len, 252 KM_SLEEP); 253 254 vnic_mac->va_txinfo = mac_vnic_tx_get(vnic_mac->va_mh); 255 vnic_mac->va_notify_hdl = mac_notify_add(vnic_mac->va_mh, 256 vnic_notify_cb, vnic_mac); 257 258 VNIC_MAC_REFHOLD(vnic_mac); 259 *vmp = vnic_mac; 260 mutex_exit(&vnic_mac_lock); 261 return (0); 262 263 bail: 264 if (vnic_mac != NULL) { 265 if (vnic_mac->va_mh != NULL) 266 mac_close(vnic_mac->va_mh); 267 kmem_cache_free(vnic_mac_cache, vnic_mac); 268 } 269 mutex_exit(&vnic_mac_lock); 270 return (err); 271 } 272 273 /* 274 * Create a new flow for the active MAC client sharing the NIC 275 * with the VNICs. This allows the unicast packets for that NIC 276 * to be classified and passed up to the active MAC client. It 277 * also allows packets sent from a VNIC to the active link to 278 * be classified by the VNIC transmit function and delivered via 279 * the MAC module locally. Returns B_TRUE on success, B_FALSE on 280 * failure. 281 */ 282 static int 283 vnic_init_active_rx(vnic_mac_t *vnic_mac) 284 { 285 uchar_t nic_mac_addr[MAXMACADDRLEN]; 286 287 if (vnic_mac->va_active_flow != NULL) 288 return (B_TRUE); 289 290 mac_unicst_get(vnic_mac->va_mh, nic_mac_addr); 291 292 vnic_mac->va_active_flow = vnic_classifier_flow_create( 293 vnic_mac->va_addr_len, nic_mac_addr, NULL, B_TRUE, KM_SLEEP); 294 295 vnic_classifier_flow_add(vnic_mac, vnic_mac->va_active_flow, 296 (vnic_rx_fn_t)mac_active_rx, vnic_mac->va_mh, NULL); 297 return (B_TRUE); 298 } 299 300 static void 301 vnic_fini_active_rx(vnic_mac_t *vnic_mac) 302 { 303 if (vnic_mac->va_active_flow == NULL) 304 return; 305 306 vnic_classifier_flow_remove(vnic_mac, vnic_mac->va_active_flow); 307 vnic_classifier_flow_destroy(vnic_mac->va_active_flow); 308 vnic_mac->va_active_flow = NULL; 309 } 310 311 static void 312 vnic_update_active_rx(vnic_mac_t *vnic_mac) 313 { 314 if (vnic_mac->va_active_flow == NULL) 315 return; 316 317 vnic_fini_active_rx(vnic_mac); 318 (void) vnic_init_active_rx(vnic_mac); 319 } 320 321 /* 322 * Copy an mblk, preserving its hardware checksum flags. 323 */ 324 mblk_t * 325 vnic_copymsg_cksum(mblk_t *mp) 326 { 327 mblk_t *mp1; 328 uint32_t start, stuff, end, value, flags; 329 330 mp1 = copymsg(mp); 331 if (mp1 == NULL) 332 return (NULL); 333 334 hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags); 335 (void) hcksum_assoc(mp1, NULL, NULL, start, stuff, end, value, 336 flags, KM_NOSLEEP); 337 338 return (mp1); 339 } 340 341 /* 342 * Copy an mblk chain, presenting the hardware checksum flags of the 343 * individual mblks. 344 */ 345 mblk_t * 346 vnic_copymsgchain_cksum(mblk_t *mp) 347 { 348 mblk_t *nmp = NULL; 349 mblk_t **nmpp = &nmp; 350 351 for (; mp != NULL; mp = mp->b_next) { 352 if ((*nmpp = vnic_copymsg_cksum(mp)) == NULL) { 353 freemsgchain(nmp); 354 return (NULL); 355 } 356 357 nmpp = &((*nmpp)->b_next); 358 } 359 360 return (nmp); 361 } 362 363 364 /* 365 * Process the specified mblk chain for proper handling of hardware 366 * checksum offload. This routine is invoked for loopback VNIC traffic. 367 * The function handles a NULL mblk chain passed as argument. 368 */ 369 mblk_t * 370 vnic_fix_cksum(mblk_t *mp_chain) 371 { 372 mblk_t *mp, *prev = NULL, *new_chain = mp_chain, *mp1; 373 uint32_t flags, start, stuff, end, value; 374 375 for (mp = mp_chain; mp != NULL; prev = mp, mp = mp->b_next) { 376 uint16_t len; 377 uint32_t offset; 378 struct ether_header *ehp; 379 uint16_t sap; 380 381 hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, 382 &flags); 383 if (flags == 0) 384 continue; 385 386 /* 387 * Since the processing of checksum offload for loopback 388 * traffic requires modification of the packet contents, 389 * ensure sure that we are always modifying our own copy. 390 */ 391 if (DB_REF(mp) > 1) { 392 mp1 = copymsg(mp); 393 if (mp1 == NULL) 394 continue; 395 mp1->b_next = mp->b_next; 396 mp->b_next = NULL; 397 freemsg(mp); 398 if (prev != NULL) 399 prev->b_next = mp1; 400 else 401 new_chain = mp1; 402 mp = mp1; 403 } 404 405 /* 406 * Ethernet, and optionally VLAN header. 407 */ 408 /*LINTED*/ 409 ehp = (struct ether_header *)mp->b_rptr; 410 if (ntohs(ehp->ether_type) == VLAN_TPID) { 411 struct ether_vlan_header *evhp; 412 413 ASSERT(MBLKL(mp) >= 414 sizeof (struct ether_vlan_header)); 415 /*LINTED*/ 416 evhp = (struct ether_vlan_header *)mp->b_rptr; 417 sap = ntohs(evhp->ether_type); 418 offset = sizeof (struct ether_vlan_header); 419 } else { 420 sap = ntohs(ehp->ether_type); 421 offset = sizeof (struct ether_header); 422 } 423 424 if (MBLKL(mp) <= offset) { 425 offset -= MBLKL(mp); 426 if (mp->b_cont == NULL) { 427 /* corrupted packet, skip it */ 428 if (prev != NULL) 429 prev->b_next = mp->b_next; 430 else 431 new_chain = mp->b_next; 432 mp1 = mp->b_next; 433 mp->b_next = NULL; 434 freemsg(mp); 435 mp = mp1; 436 continue; 437 } 438 mp = mp->b_cont; 439 } 440 441 if (flags & (HCK_FULLCKSUM | HCK_IPV4_HDRCKSUM)) { 442 ipha_t *ipha = NULL; 443 444 /* 445 * In order to compute the full and header 446 * checksums, we need to find and parse 447 * the IP and/or ULP headers. 448 */ 449 450 sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap; 451 452 /* 453 * IP header. 454 */ 455 if (sap != ETHERTYPE_IP) 456 continue; 457 458 ASSERT(MBLKL(mp) >= offset + sizeof (ipha_t)); 459 /*LINTED*/ 460 ipha = (ipha_t *)(mp->b_rptr + offset); 461 462 if (flags & HCK_FULLCKSUM) { 463 ipaddr_t src, dst; 464 uint32_t cksum; 465 uint16_t *up; 466 uint8_t proto; 467 468 /* 469 * Pointer to checksum field in ULP header. 470 */ 471 proto = ipha->ipha_protocol; 472 ASSERT(ipha->ipha_version_and_hdr_length == 473 IP_SIMPLE_HDR_VERSION); 474 if (proto == IPPROTO_TCP) { 475 /*LINTED*/ 476 up = IPH_TCPH_CHECKSUMP(ipha, 477 IP_SIMPLE_HDR_LENGTH); 478 } else { 479 ASSERT(proto == IPPROTO_UDP); 480 /*LINTED*/ 481 up = IPH_UDPH_CHECKSUMP(ipha, 482 IP_SIMPLE_HDR_LENGTH); 483 } 484 485 /* 486 * Pseudo-header checksum. 487 */ 488 src = ipha->ipha_src; 489 dst = ipha->ipha_dst; 490 len = ntohs(ipha->ipha_length) - 491 IP_SIMPLE_HDR_LENGTH; 492 493 cksum = (dst >> 16) + (dst & 0xFFFF) + 494 (src >> 16) + (src & 0xFFFF); 495 cksum += htons(len); 496 497 /* 498 * The checksum value stored in the packet needs 499 * to be correct. Compute it here. 500 */ 501 *up = 0; 502 cksum += (((proto) == IPPROTO_UDP) ? 503 IP_UDP_CSUM_COMP : IP_TCP_CSUM_COMP); 504 cksum = IP_CSUM(mp, IP_SIMPLE_HDR_LENGTH + 505 offset, cksum); 506 *(up) = (uint16_t)(cksum ? cksum : ~cksum); 507 508 flags |= HCK_FULLCKSUM_OK; 509 value = 0xffff; 510 } 511 512 if (flags & HCK_IPV4_HDRCKSUM) { 513 ASSERT(ipha != NULL); 514 ipha->ipha_hdr_checksum = 515 (uint16_t)ip_csum_hdr(ipha); 516 } 517 } 518 519 if (flags & HCK_PARTIALCKSUM) { 520 uint16_t *up, partial, cksum; 521 uchar_t *ipp; /* ptr to beginning of IP header */ 522 523 if (mp->b_cont != NULL) { 524 mblk_t *mp1; 525 526 mp1 = msgpullup(mp, offset + end); 527 if (mp1 == NULL) 528 continue; 529 mp1->b_next = mp->b_next; 530 mp->b_next = NULL; 531 freemsg(mp); 532 if (prev != NULL) 533 prev->b_next = mp1; 534 else 535 new_chain = mp1; 536 mp = mp1; 537 } 538 539 ipp = mp->b_rptr + offset; 540 /*LINTED*/ 541 up = (uint16_t *)((uchar_t *)ipp + stuff); 542 partial = *up; 543 *up = 0; 544 545 cksum = IP_BCSUM_PARTIAL(mp->b_rptr + offset + start, 546 end - start, partial); 547 cksum = ~cksum; 548 *up = cksum ? cksum : ~cksum; 549 550 /* 551 * Since we already computed the whole checksum, 552 * indicate to the stack that it has already 553 * been verified by the hardware. 554 */ 555 flags &= ~HCK_PARTIALCKSUM; 556 flags |= (HCK_FULLCKSUM | HCK_FULLCKSUM_OK); 557 value = 0xffff; 558 } 559 560 (void) hcksum_assoc(mp, NULL, NULL, start, stuff, end, 561 value, flags, KM_NOSLEEP); 562 } 563 564 return (new_chain); 565 } 566 567 static void 568 vnic_mac_close(vnic_mac_t *vnic_mac) 569 { 570 mutex_enter(&vnic_mac_lock); 571 VNIC_MAC_REFRELE(vnic_mac); 572 mutex_exit(&vnic_mac_lock); 573 } 574 575 static void 576 vnic_mac_free(vnic_mac_t *vnic_mac) 577 { 578 mod_hash_val_t val; 579 580 ASSERT(MUTEX_HELD(&vnic_mac_lock)); 581 vnic_fini_active_rx(vnic_mac); 582 mac_notify_remove(vnic_mac->va_mh, vnic_mac->va_notify_hdl); 583 if (vnic_mac->va_mac_set) { 584 vnic_mac->va_mac_set = B_FALSE; 585 mac_vnic_clear(vnic_mac->va_mh); 586 } 587 vnic_classifier_flow_tab_fini(vnic_mac); 588 mac_close(vnic_mac->va_mh); 589 590 (void) mod_hash_remove(vnic_mac_hash, 591 (mod_hash_key_t)(uintptr_t)vnic_mac->va_linkid, &val); 592 ASSERT(vnic_mac == (vnic_mac_t *)val); 593 594 kmem_cache_free(vnic_mac_cache, vnic_mac); 595 } 596 597 /* 598 * Initial VNIC receive routine. Invoked for packets that are steered 599 * to a VNIC but the VNIC has not been started yet. 600 */ 601 /* ARGSUSED */ 602 static void 603 vnic_rx_initial(void *arg1, void *arg2, mblk_t *mp_chain) 604 { 605 vnic_t *vnic = arg1; 606 mblk_t *mp; 607 608 /* update stats */ 609 for (mp = mp_chain; mp != NULL; mp = mp->b_next) 610 vnic->vn_stat_ierrors++; 611 freemsgchain(mp_chain); 612 } 613 614 /* 615 * VNIC receive routine invoked after the classifier for the VNIC 616 * has been initialized and the VNIC has been started. 617 */ 618 /* ARGSUSED */ 619 void 620 vnic_rx(void *arg1, void *arg2, mblk_t *mp_chain) 621 { 622 vnic_t *vnic = arg1; 623 mblk_t *mp; 624 625 /* update stats */ 626 for (mp = mp_chain; mp != NULL; mp = mp->b_next) { 627 vnic->vn_stat_ipackets++; 628 vnic->vn_stat_rbytes += msgdsize(mp); 629 } 630 631 /* pass packet up */ 632 mac_rx(vnic->vn_mh, NULL, mp_chain); 633 } 634 635 /* 636 * Routine to create a MAC-based VNIC. Adds the passed MAC address 637 * to an unused slot in the NIC if one is available. Otherwise it 638 * sets the NIC in promiscuous mode and assigns the MAC address to 639 * a Rx ring if available or a soft ring. 640 */ 641 static int 642 vnic_add_unicstaddr(vnic_t *vnic, mac_multi_addr_t *maddr) 643 { 644 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 645 int err; 646 647 if (mac_unicst_verify(vnic_mac->va_mh, maddr->mma_addr, 648 maddr->mma_addrlen) == B_FALSE) 649 return (EINVAL); 650 651 if (mac_vnic_capab_get(vnic_mac->va_mh, MAC_CAPAB_MULTIADDRESS, 652 &(vnic->vn_mma_capab))) { 653 if (vnic->vn_maddr_naddrfree == 0) { 654 /* 655 * No free address slots available. 656 * Enable promiscuous mode. 657 */ 658 goto set_promisc; 659 } 660 661 err = vnic->vn_maddr_add(vnic->vn_maddr_handle, maddr); 662 if (err != 0) { 663 if (err == ENOSPC) { 664 /* 665 * There was a race to add addresses 666 * with other multiple address consumers, 667 * and we lost out. Use promisc mode. 668 */ 669 goto set_promisc; 670 } 671 672 return (err); 673 } 674 675 vnic->vn_slot_id = maddr->mma_slot; 676 vnic->vn_multi_mac = B_TRUE; 677 } else { 678 /* 679 * Either multiple MAC address support is not 680 * available or all available addresses have 681 * been used up. 682 */ 683 set_promisc: 684 if ((err = mac_promisc_set(vnic_mac->va_mh, B_TRUE, 685 MAC_DEVPROMISC)) != 0) { 686 return (err); 687 } 688 689 vnic->vn_promisc_mac = B_TRUE; 690 } 691 return (err); 692 } 693 694 /* 695 * VNIC is getting deleted. Remove the MAC address from the slot. 696 * If promiscuous mode was being used, then unset the promiscuous mode. 697 */ 698 static int 699 vnic_remove_unicstaddr(vnic_t *vnic) 700 { 701 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 702 int err; 703 704 if (vnic->vn_multi_mac) { 705 ASSERT(vnic->vn_promisc_mac == B_FALSE); 706 err = vnic->vn_maddr_remove(vnic->vn_maddr_handle, 707 vnic->vn_slot_id); 708 vnic->vn_multi_mac = B_FALSE; 709 } 710 711 if (vnic->vn_promisc_mac) { 712 ASSERT(vnic->vn_multi_mac == B_FALSE); 713 err = mac_promisc_set(vnic_mac->va_mh, B_FALSE, MAC_DEVPROMISC); 714 vnic->vn_promisc_mac = B_FALSE; 715 } 716 717 return (err); 718 } 719 720 /* 721 * Create a new VNIC upon request from administrator. 722 * Returns 0 on success, an errno on failure. 723 */ 724 int 725 vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid, int mac_len, 726 uchar_t *mac_addr) 727 { 728 vnic_t *vnic = NULL; 729 mac_register_t *mac; 730 int err; 731 vnic_mac_t *vnic_mac; 732 mac_multi_addr_t maddr; 733 mac_txinfo_t tx_info; 734 735 if (mac_len != ETHERADDRL) { 736 /* currently only ethernet NICs are supported */ 737 return (EINVAL); 738 } 739 740 rw_enter(&vnic_lock, RW_WRITER); 741 742 /* does a VNIC with the same id already exist? */ 743 err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id), 744 (mod_hash_val_t *)&vnic); 745 if (err == 0) { 746 rw_exit(&vnic_lock); 747 return (EEXIST); 748 } 749 750 vnic = kmem_cache_alloc(vnic_cache, KM_NOSLEEP); 751 if (vnic == NULL) { 752 rw_exit(&vnic_lock); 753 return (ENOMEM); 754 } 755 756 /* open underlying MAC */ 757 err = vnic_mac_open(linkid, &vnic_mac); 758 if (err != 0) { 759 kmem_cache_free(vnic_cache, vnic); 760 rw_exit(&vnic_lock); 761 return (err); 762 } 763 764 bzero(vnic, sizeof (*vnic)); 765 vnic->vn_id = vnic_id; 766 vnic->vn_vnic_mac = vnic_mac; 767 768 vnic->vn_started = B_FALSE; 769 vnic->vn_promisc = B_FALSE; 770 vnic->vn_multi_mac = B_FALSE; 771 vnic->vn_bcast_grp = B_FALSE; 772 773 /* set the VNIC MAC address */ 774 maddr.mma_addrlen = mac_len; 775 maddr.mma_slot = 0; 776 maddr.mma_flags = 0; 777 bcopy(mac_addr, maddr.mma_addr, mac_len); 778 if ((err = vnic_add_unicstaddr(vnic, &maddr)) != 0) 779 goto bail; 780 bcopy(mac_addr, vnic->vn_addr, mac_len); 781 782 /* set the initial VNIC capabilities */ 783 if (!mac_vnic_capab_get(vnic_mac->va_mh, MAC_CAPAB_HCKSUM, 784 &vnic->vn_hcksum_txflags)) 785 vnic->vn_hcksum_txflags = 0; 786 787 /* register with the MAC module */ 788 if ((mac = mac_alloc(MAC_VERSION)) == NULL) 789 goto bail; 790 791 mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 792 mac->m_driver = vnic; 793 mac->m_dip = vnic_get_dip(); 794 mac->m_instance = (uint_t)-1; 795 mac->m_src_addr = vnic->vn_addr; 796 mac->m_callbacks = &vnic_m_callbacks; 797 798 mac_sdu_get(vnic_mac->va_mh, &mac->m_min_sdu, &mac->m_max_sdu); 799 800 /* 801 * As the current margin size of the underlying mac is used to 802 * determine the margin size of the VNIC itself, request the 803 * underlying mac not to change to a smaller margin size. 804 */ 805 err = mac_margin_add(vnic_mac->va_mh, &(vnic->vn_margin), B_TRUE); 806 if (err != 0) 807 goto bail; 808 mac->m_margin = vnic->vn_margin; 809 err = mac_register(mac, &vnic->vn_mh); 810 mac_free(mac); 811 if (err != 0) { 812 VERIFY(mac_margin_remove(vnic_mac->va_mh, 813 vnic->vn_margin) == 0); 814 goto bail; 815 } 816 817 if ((err = dls_devnet_create(vnic->vn_mh, vnic->vn_id)) != 0) { 818 VERIFY(mac_margin_remove(vnic_mac->va_mh, 819 vnic->vn_margin) == 0); 820 (void) mac_unregister(vnic->vn_mh); 821 goto bail; 822 } 823 824 /* add new VNIC to hash table */ 825 err = mod_hash_insert(vnic_hash, VNIC_HASH_KEY(vnic_id), 826 (mod_hash_val_t)vnic); 827 ASSERT(err == 0); 828 vnic_count++; 829 830 rw_exit(&vnic_lock); 831 832 /* Create a flow, initialized with the MAC address of the VNIC */ 833 if ((vnic->vn_flow_ent = vnic_classifier_flow_create(mac_len, mac_addr, 834 NULL, B_FALSE, KM_SLEEP)) == NULL) { 835 (void) vnic_dev_delete(vnic_id); 836 vnic = NULL; 837 err = ENOMEM; 838 goto bail_unlocked; 839 } 840 841 vnic_classifier_flow_add(vnic_mac, vnic->vn_flow_ent, vnic_rx_initial, 842 vnic, vnic); 843 844 /* setup VNIC to receive broadcast packets */ 845 err = vnic_bcast_add(vnic, vnic_brdcst_mac, MAC_ADDRTYPE_BROADCAST); 846 if (err != 0) { 847 (void) vnic_dev_delete(vnic_id); 848 vnic = NULL; 849 goto bail_unlocked; 850 } 851 vnic->vn_bcast_grp = B_TRUE; 852 853 mutex_enter(&vnic_mac_lock); 854 if (!vnic_mac->va_mac_set) { 855 /* 856 * We want to MAC layer to call the VNIC tx outbound 857 * routine, so that local broadcast packets sent by 858 * the active interface sharing the underlying NIC (if 859 * any), can be broadcast to every VNIC. 860 */ 861 tx_info.mt_fn = vnic_active_tx; 862 tx_info.mt_arg = vnic_mac; 863 if (!mac_vnic_set(vnic_mac->va_mh, &tx_info, 864 vnic_m_capab_get, vnic)) { 865 mutex_exit(&vnic_mac_lock); 866 (void) vnic_dev_delete(vnic_id); 867 vnic = NULL; 868 err = EBUSY; 869 goto bail_unlocked; 870 } 871 vnic_mac->va_mac_set = B_TRUE; 872 } 873 mutex_exit(&vnic_mac_lock); 874 875 /* allow passing packets to NIC's active MAC client */ 876 if (!vnic_init_active_rx(vnic_mac)) { 877 (void) vnic_dev_delete(vnic_id); 878 vnic = NULL; 879 err = ENOMEM; 880 goto bail_unlocked; 881 } 882 883 return (0); 884 885 bail: 886 (void) vnic_remove_unicstaddr(vnic); 887 vnic_mac_close(vnic_mac); 888 rw_exit(&vnic_lock); 889 890 bail_unlocked: 891 if (vnic != NULL) { 892 kmem_cache_free(vnic_cache, vnic); 893 } 894 895 return (err); 896 } 897 898 /* 899 * Modify the properties of an existing VNIC. 900 */ 901 /* ARGSUSED */ 902 int 903 vnic_dev_modify(datalink_id_t vnic_id, uint_t modify_mask, 904 vnic_mac_addr_type_t mac_addr_type, uint_t mac_len, uchar_t *mac_addr) 905 { 906 vnic_t *vnic = NULL; 907 int rv = 0; 908 boolean_t notify_mac_addr = B_FALSE; 909 910 rw_enter(&vnic_lock, RW_WRITER); 911 912 if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id), 913 (mod_hash_val_t *)&vnic) != 0) { 914 rw_exit(&vnic_lock); 915 return (ENOENT); 916 } 917 918 if (modify_mask & VNIC_IOC_MODIFY_ADDR) { 919 rv = vnic_modify_mac_addr(vnic, mac_len, mac_addr); 920 if (rv == 0) 921 notify_mac_addr = B_TRUE; 922 } 923 924 rw_exit(&vnic_lock); 925 926 if (notify_mac_addr) 927 mac_unicst_update(vnic->vn_mh, mac_addr); 928 929 return (rv); 930 } 931 932 int 933 vnic_dev_delete(datalink_id_t vnic_id) 934 { 935 vnic_t *vnic = NULL; 936 mod_hash_val_t val; 937 vnic_flow_t *flent; 938 datalink_id_t tmpid; 939 int rc; 940 vnic_mac_t *vnic_mac; 941 942 rw_enter(&vnic_lock, RW_WRITER); 943 944 if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id), 945 (mod_hash_val_t *)&vnic) != 0) { 946 rw_exit(&vnic_lock); 947 return (ENOENT); 948 } 949 950 if ((rc = dls_devnet_destroy(vnic->vn_mh, &tmpid)) != 0) { 951 rw_exit(&vnic_lock); 952 return (rc); 953 } 954 955 ASSERT(vnic_id == tmpid); 956 957 /* 958 * We cannot unregister the MAC yet. Unregistering would 959 * free up mac_impl_t which should not happen at this time. 960 * Packets could be entering vnic_rx() through the 961 * flow entry and so mac_impl_t cannot be NULL. So disable 962 * mac_impl_t by calling mac_disable(). This will prevent any 963 * new claims on mac_impl_t. 964 */ 965 if (mac_disable(vnic->vn_mh) != 0) { 966 (void) dls_devnet_create(vnic->vn_mh, vnic_id); 967 rw_exit(&vnic_lock); 968 return (EBUSY); 969 } 970 971 (void) mod_hash_remove(vnic_hash, VNIC_HASH_KEY(vnic_id), &val); 972 ASSERT(vnic == (vnic_t *)val); 973 974 if (vnic->vn_bcast_grp) 975 (void) vnic_bcast_delete(vnic, vnic_brdcst_mac); 976 977 flent = vnic->vn_flow_ent; 978 if (flent != NULL) { 979 /* 980 * vnic_classifier_flow_destroy() ensures that the 981 * flow is no longer used. 982 */ 983 vnic_classifier_flow_remove(vnic->vn_vnic_mac, flent); 984 vnic_classifier_flow_destroy(flent); 985 } 986 987 rc = mac_margin_remove(vnic->vn_vnic_mac->va_mh, vnic->vn_margin); 988 ASSERT(rc == 0); 989 rc = mac_unregister(vnic->vn_mh); 990 ASSERT(rc == 0); 991 (void) vnic_remove_unicstaddr(vnic); 992 vnic_mac = vnic->vn_vnic_mac; 993 kmem_cache_free(vnic_cache, vnic); 994 vnic_count--; 995 rw_exit(&vnic_lock); 996 vnic_mac_close(vnic_mac); 997 return (0); 998 } 999 1000 /* 1001 * For the specified packet chain, return a sub-chain to be sent 1002 * and the transmit function to be used to send the packet. Also 1003 * return a pointer to the sub-chain of packets that should 1004 * be re-classified. If the function returns NULL, the packet 1005 * should be sent using the underlying NIC. 1006 */ 1007 static vnic_flow_t * 1008 vnic_classify(vnic_mac_t *vnic_mac, mblk_t *mp, mblk_t **mp_chain_rest) 1009 { 1010 vnic_flow_t *flow_ent; 1011 1012 /* one packet at a time */ 1013 *mp_chain_rest = mp->b_next; 1014 mp->b_next = NULL; 1015 1016 /* do classification on the packet */ 1017 flow_ent = vnic_classifier_get_flow(vnic_mac, mp); 1018 1019 return (flow_ent); 1020 } 1021 1022 /* 1023 * Send a packet chain to a local VNIC or an active MAC client. 1024 */ 1025 static void 1026 vnic_local_tx(vnic_mac_t *vnic_mac, vnic_flow_t *flow_ent, mblk_t *mp_chain) 1027 { 1028 mblk_t *mp1; 1029 const vnic_flow_fn_info_t *fn_info; 1030 vnic_t *vnic; 1031 1032 if (!vnic_classifier_is_active(flow_ent) && 1033 mac_promisc_get(vnic_mac->va_mh, MAC_PROMISC)) { 1034 /* 1035 * If the MAC is in promiscous mode, 1036 * send a copy of the active client. 1037 */ 1038 if ((mp1 = vnic_copymsgchain_cksum(mp_chain)) == NULL) 1039 goto sendit; 1040 if ((mp1 = vnic_fix_cksum(mp1)) == NULL) 1041 goto sendit; 1042 mac_active_rx(vnic_mac->va_mh, NULL, mp1); 1043 } 1044 sendit: 1045 fn_info = vnic_classifier_get_fn_info(flow_ent); 1046 /* 1047 * If the vnic to which we would deliver this packet is in 1048 * promiscuous mode then it already received the packet via 1049 * vnic_promisc_rx(). 1050 * 1051 * XXX assumes that ff_arg2 is a vnic_t pointer if it is 1052 * non-NULL (currently always true). 1053 */ 1054 vnic = (vnic_t *)fn_info->ff_arg2; 1055 if ((vnic != NULL) && vnic->vn_promisc) 1056 freemsg(mp_chain); 1057 else if ((mp1 = vnic_fix_cksum(mp_chain)) != NULL) 1058 (fn_info->ff_fn)(fn_info->ff_arg1, fn_info->ff_arg2, mp1); 1059 } 1060 1061 /* 1062 * This function is invoked when a MAC client needs to send a packet 1063 * to a NIC which is shared by VNICs. It is passed to the MAC layer 1064 * by a call to mac_vnic_set() when the NIC is opened, and is returned 1065 * to MAC clients by mac_tx_get() when VNICs are present. 1066 */ 1067 mblk_t * 1068 vnic_active_tx(void *arg, mblk_t *mp_chain) 1069 { 1070 vnic_mac_t *vnic_mac = arg; 1071 mblk_t *mp, *extra_mp = NULL; 1072 vnic_flow_t *flow_ent; 1073 void *flow_cookie; 1074 const mac_txinfo_t *mtp = vnic_mac->va_txinfo; 1075 1076 for (mp = mp_chain; mp != NULL; mp = extra_mp) { 1077 mblk_t *next; 1078 1079 next = mp->b_next; 1080 mp->b_next = NULL; 1081 1082 vnic_promisc_rx(vnic_mac, (vnic_t *)-1, mp); 1083 1084 flow_ent = vnic_classify(vnic_mac, mp, &extra_mp); 1085 ASSERT(extra_mp == NULL); 1086 extra_mp = next; 1087 1088 if (flow_ent != NULL) { 1089 flow_cookie = vnic_classifier_get_client_cookie( 1090 flow_ent); 1091 if (flow_cookie != NULL) { 1092 /* 1093 * Send a copy to every VNIC defined on the 1094 * interface, as well as the underlying MAC. 1095 */ 1096 vnic_bcast_send(flow_cookie, (vnic_t *)-1, mp); 1097 } else { 1098 /* 1099 * loopback the packet to a local VNIC or 1100 * an active MAC client. 1101 */ 1102 vnic_local_tx(vnic_mac, flow_ent, mp); 1103 } 1104 VNIC_FLOW_REFRELE(flow_ent); 1105 mp_chain = NULL; 1106 } else { 1107 /* 1108 * Non-VNIC destination, send via the underlying 1109 * NIC. In order to avoid a recursive call 1110 * to this function, we ensured that mtp points 1111 * to the unerlying NIC transmit function 1112 * by inilizating through mac_vnic_tx_get(). 1113 */ 1114 mp_chain = mtp->mt_fn(mtp->mt_arg, mp); 1115 if (mp_chain != NULL) 1116 break; 1117 } 1118 } 1119 1120 if ((mp_chain != NULL) && (extra_mp != NULL)) { 1121 ASSERT(mp_chain->b_next == NULL); 1122 mp_chain->b_next = extra_mp; 1123 } 1124 return (mp_chain); 1125 } 1126 1127 /* 1128 * VNIC transmit function. 1129 */ 1130 mblk_t * 1131 vnic_m_tx(void *arg, mblk_t *mp_chain) 1132 { 1133 vnic_t *vnic = arg; 1134 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 1135 mblk_t *mp, *extra_mp = NULL; 1136 vnic_flow_t *flow_ent; 1137 void *flow_cookie; 1138 1139 /* 1140 * Update stats. 1141 */ 1142 for (mp = mp_chain; mp != NULL; mp = mp->b_next) { 1143 vnic->vn_stat_opackets++; 1144 vnic->vn_stat_obytes += msgdsize(mp); 1145 } 1146 1147 for (mp = mp_chain; mp != NULL; mp = extra_mp) { 1148 mblk_t *next; 1149 1150 next = mp->b_next; 1151 mp->b_next = NULL; 1152 1153 vnic_promisc_rx(vnic->vn_vnic_mac, vnic, mp); 1154 1155 flow_ent = vnic_classify(vnic->vn_vnic_mac, mp, &extra_mp); 1156 ASSERT(extra_mp == NULL); 1157 extra_mp = next; 1158 1159 if (flow_ent != NULL) { 1160 flow_cookie = vnic_classifier_get_client_cookie( 1161 flow_ent); 1162 if (flow_cookie != NULL) { 1163 /* 1164 * The vnic_bcast_send function expects 1165 * to receive the sender VNIC as value 1166 * for arg2. 1167 */ 1168 vnic_bcast_send(flow_cookie, vnic, mp); 1169 } else { 1170 /* 1171 * loopback the packet to a local VNIC or 1172 * an active MAC client. 1173 */ 1174 vnic_local_tx(vnic_mac, flow_ent, mp); 1175 } 1176 VNIC_FLOW_REFRELE(flow_ent); 1177 mp_chain = NULL; 1178 } else { 1179 /* 1180 * Non-local destination, send via the underlying 1181 * NIC. 1182 */ 1183 const mac_txinfo_t *mtp = vnic->vn_txinfo; 1184 mp_chain = mtp->mt_fn(mtp->mt_arg, mp); 1185 if (mp_chain != NULL) 1186 break; 1187 } 1188 } 1189 1190 /* update stats to account for unsent packets */ 1191 for (mp = mp_chain; mp != NULL; mp = mp->b_next) { 1192 vnic->vn_stat_opackets--; 1193 vnic->vn_stat_obytes -= msgdsize(mp); 1194 vnic->vn_stat_oerrors++; 1195 /* 1196 * link back in the last portion not counted due to bandwidth 1197 * control. 1198 */ 1199 if (mp->b_next == NULL) { 1200 mp->b_next = extra_mp; 1201 break; 1202 } 1203 } 1204 1205 return (mp_chain); 1206 } 1207 1208 /* ARGSUSED */ 1209 static void 1210 vnic_m_resources(void *arg) 1211 { 1212 /* no resources to advertise */ 1213 } 1214 1215 static int 1216 vnic_m_stat(void *arg, uint_t stat, uint64_t *val) 1217 { 1218 vnic_t *vnic = arg; 1219 int rval = 0; 1220 1221 rw_enter(&vnic_lock, RW_READER); 1222 1223 switch (stat) { 1224 case ETHER_STAT_LINK_DUPLEX: 1225 *val = mac_stat_get(vnic->vn_vnic_mac->va_mh, 1226 ETHER_STAT_LINK_DUPLEX); 1227 break; 1228 case MAC_STAT_IFSPEED: 1229 *val = mac_stat_get(vnic->vn_vnic_mac->va_mh, 1230 MAC_STAT_IFSPEED); 1231 break; 1232 case MAC_STAT_MULTIRCV: 1233 *val = vnic->vn_stat_multircv; 1234 break; 1235 case MAC_STAT_BRDCSTRCV: 1236 *val = vnic->vn_stat_brdcstrcv; 1237 break; 1238 case MAC_STAT_MULTIXMT: 1239 *val = vnic->vn_stat_multixmt; 1240 break; 1241 case MAC_STAT_BRDCSTXMT: 1242 *val = vnic->vn_stat_brdcstxmt; 1243 break; 1244 case MAC_STAT_IERRORS: 1245 *val = vnic->vn_stat_ierrors; 1246 break; 1247 case MAC_STAT_OERRORS: 1248 *val = vnic->vn_stat_oerrors; 1249 break; 1250 case MAC_STAT_RBYTES: 1251 *val = vnic->vn_stat_rbytes; 1252 break; 1253 case MAC_STAT_IPACKETS: 1254 *val = vnic->vn_stat_ipackets; 1255 break; 1256 case MAC_STAT_OBYTES: 1257 *val = vnic->vn_stat_obytes; 1258 break; 1259 case MAC_STAT_OPACKETS: 1260 *val = vnic->vn_stat_opackets; 1261 break; 1262 default: 1263 rval = ENOTSUP; 1264 } 1265 1266 rw_exit(&vnic_lock); 1267 return (rval); 1268 } 1269 1270 /* 1271 * Return information about the specified capability. 1272 */ 1273 /* ARGSUSED */ 1274 static boolean_t 1275 vnic_m_capab_get(void *arg, mac_capab_t cap, void *cap_data) 1276 { 1277 vnic_t *vnic = arg; 1278 1279 switch (cap) { 1280 case MAC_CAPAB_POLL: 1281 return (B_TRUE); 1282 case MAC_CAPAB_HCKSUM: { 1283 uint32_t *hcksum_txflags = cap_data; 1284 1285 *hcksum_txflags = vnic->vn_hcksum_txflags & 1286 (HCKSUM_INET_FULL_V4 | HCKSUM_IPHDRCKSUM | 1287 HCKSUM_INET_PARTIAL); 1288 break; 1289 } 1290 default: 1291 return (B_FALSE); 1292 } 1293 return (B_TRUE); 1294 } 1295 1296 static int 1297 vnic_m_start(void *arg) 1298 { 1299 vnic_t *vnic = arg; 1300 mac_handle_t lower_mh = vnic->vn_vnic_mac->va_mh; 1301 int rc; 1302 1303 rc = mac_start(lower_mh); 1304 if (rc != 0) 1305 return (rc); 1306 1307 vnic_classifier_flow_update_fn(vnic->vn_flow_ent, vnic_rx, vnic, vnic); 1308 return (0); 1309 } 1310 1311 static void 1312 vnic_m_stop(void *arg) 1313 { 1314 vnic_t *vnic = arg; 1315 mac_handle_t lower_mh = vnic->vn_vnic_mac->va_mh; 1316 1317 vnic_classifier_flow_update_fn(vnic->vn_flow_ent, vnic_rx_initial, 1318 vnic, vnic); 1319 mac_stop(lower_mh); 1320 } 1321 1322 /* ARGSUSED */ 1323 static int 1324 vnic_m_promisc(void *arg, boolean_t on) 1325 { 1326 vnic_t *vnic = arg; 1327 1328 return (vnic_promisc_set(vnic, on)); 1329 } 1330 1331 static int 1332 vnic_m_multicst(void *arg, boolean_t add, const uint8_t *addrp) 1333 { 1334 vnic_t *vnic = arg; 1335 int rc = 0; 1336 1337 if (add) 1338 rc = vnic_bcast_add(vnic, addrp, MAC_ADDRTYPE_MULTICAST); 1339 else 1340 vnic_bcast_delete(vnic, addrp); 1341 1342 return (rc); 1343 } 1344 1345 static int 1346 vnic_m_unicst(void *arg, const uint8_t *mac_addr) 1347 { 1348 vnic_t *vnic = arg; 1349 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 1350 int rv; 1351 1352 rw_enter(&vnic_lock, RW_WRITER); 1353 rv = vnic_modify_mac_addr(vnic, vnic_mac->va_addr_len, 1354 (uchar_t *)mac_addr); 1355 rw_exit(&vnic_lock); 1356 1357 if (rv == 0) 1358 mac_unicst_update(vnic->vn_mh, mac_addr); 1359 return (0); 1360 } 1361 1362 int 1363 vnic_info(uint_t *nvnics, datalink_id_t vnic_id, datalink_id_t linkid, 1364 void *fn_arg, vnic_info_new_vnic_fn_t new_vnic_fn) 1365 { 1366 vnic_info_state_t state; 1367 int rc = 0; 1368 1369 rw_enter(&vnic_lock, RW_READER); 1370 1371 *nvnics = vnic_count; 1372 1373 bzero(&state, sizeof (state)); 1374 state.vs_vnic_id = vnic_id; 1375 state.vs_linkid = linkid; 1376 state.vs_new_vnic_fn = new_vnic_fn; 1377 state.vs_fn_arg = fn_arg; 1378 1379 mod_hash_walk(vnic_hash, vnic_info_walker, &state); 1380 1381 if ((rc = state.vs_rc) == 0 && vnic_id != DATALINK_ALL_LINKID && 1382 !state.vs_vnic_found) 1383 rc = ENOENT; 1384 1385 rw_exit(&vnic_lock); 1386 return (rc); 1387 } 1388 1389 /* 1390 * Walker invoked when building a list of vnics that must be passed 1391 * up to user space. 1392 */ 1393 /*ARGSUSED*/ 1394 static uint_t 1395 vnic_info_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 1396 { 1397 vnic_t *vnic; 1398 vnic_info_state_t *state = arg; 1399 1400 if (state->vs_rc != 0) 1401 return (MH_WALK_TERMINATE); /* terminate walk */ 1402 1403 vnic = (vnic_t *)val; 1404 1405 if (state->vs_vnic_id != DATALINK_ALL_LINKID && 1406 vnic->vn_id != state->vs_vnic_id) { 1407 goto bail; 1408 } 1409 1410 state->vs_vnic_found = B_TRUE; 1411 1412 state->vs_rc = state->vs_new_vnic_fn(state->vs_fn_arg, 1413 vnic->vn_id, vnic->vn_addr_type, vnic->vn_vnic_mac->va_addr_len, 1414 vnic->vn_addr, vnic->vn_vnic_mac->va_linkid); 1415 bail: 1416 return ((state->vs_rc == 0) ? MH_WALK_CONTINUE : MH_WALK_TERMINATE); 1417 } 1418 1419 /* 1420 * vnic_notify_cb() and vnic_notify_walker() below are used to 1421 * process events received from an underlying NIC and, if needed, 1422 * forward these events to the VNICs defined on top of that NIC. 1423 */ 1424 1425 typedef struct vnic_notify_state { 1426 mac_notify_type_t vo_type; 1427 vnic_mac_t *vo_vnic_mac; 1428 } vnic_notify_state_t; 1429 1430 /* ARGSUSED */ 1431 static uint_t 1432 vnic_notify_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 1433 { 1434 vnic_t *vnic = (vnic_t *)val; 1435 vnic_notify_state_t *state = arg; 1436 1437 /* ignore VNICs that don't use the specified underlying MAC */ 1438 if (vnic->vn_vnic_mac != state->vo_vnic_mac) 1439 return (MH_WALK_CONTINUE); 1440 1441 switch (state->vo_type) { 1442 case MAC_NOTE_TX: 1443 mac_tx_update(vnic->vn_mh); 1444 break; 1445 case MAC_NOTE_LINK: 1446 /* 1447 * The VNIC link state must be up regardless of 1448 * the link state of the underlying NIC to maintain 1449 * connectivity between VNICs on the same host. 1450 */ 1451 mac_link_update(vnic->vn_mh, LINK_STATE_UP); 1452 break; 1453 case MAC_NOTE_UNICST: 1454 vnic_update_active_rx(vnic->vn_vnic_mac); 1455 break; 1456 case MAC_NOTE_VNIC: 1457 /* only for clients which share a NIC with a VNIC */ 1458 break; 1459 case MAC_NOTE_PROMISC: 1460 mutex_enter(&vnic_mac_lock); 1461 vnic->vn_vnic_mac->va_txinfo = mac_vnic_tx_get( 1462 vnic->vn_vnic_mac->va_mh); 1463 mutex_exit(&vnic_mac_lock); 1464 break; 1465 } 1466 1467 return (MH_WALK_CONTINUE); 1468 } 1469 1470 static void 1471 vnic_notify_cb(void *arg, mac_notify_type_t type) 1472 { 1473 vnic_mac_t *vnic = arg; 1474 vnic_notify_state_t state; 1475 1476 state.vo_type = type; 1477 state.vo_vnic_mac = vnic; 1478 1479 rw_enter(&vnic_lock, RW_READER); 1480 mod_hash_walk(vnic_hash, vnic_notify_walker, &state); 1481 rw_exit(&vnic_lock); 1482 } 1483 1484 static int 1485 vnic_modify_mac_addr(vnic_t *vnic, uint_t mac_len, uchar_t *mac_addr) 1486 { 1487 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 1488 vnic_flow_t *vnic_flow = vnic->vn_flow_ent; 1489 1490 ASSERT(RW_WRITE_HELD(&vnic_lock)); 1491 1492 if (mac_len != vnic_mac->va_addr_len) 1493 return (EINVAL); 1494 1495 vnic_classifier_flow_update_addr(vnic_flow, mac_addr); 1496 return (0); 1497 } 1498 1499 static int 1500 vnic_promisc_set(vnic_t *vnic, boolean_t on) 1501 { 1502 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 1503 int r = -1; 1504 1505 if (vnic->vn_promisc == on) 1506 return (0); 1507 1508 if (on) { 1509 if ((r = mac_promisc_set(vnic_mac->va_mh, B_TRUE, 1510 MAC_DEVPROMISC)) != 0) { 1511 return (r); 1512 } 1513 1514 rw_enter(&vnic_mac->va_promisc_lock, RW_WRITER); 1515 vnic->vn_promisc_next = vnic_mac->va_promisc; 1516 vnic_mac->va_promisc = vnic; 1517 vnic_mac->va_promisc_gen++; 1518 1519 vnic->vn_promisc = B_TRUE; 1520 rw_exit(&vnic_mac->va_promisc_lock); 1521 1522 return (0); 1523 } else { 1524 vnic_t *loop, *prev = NULL; 1525 1526 rw_enter(&vnic_mac->va_promisc_lock, RW_WRITER); 1527 loop = vnic_mac->va_promisc; 1528 1529 while ((loop != NULL) && (loop != vnic)) { 1530 prev = loop; 1531 loop = loop->vn_promisc_next; 1532 } 1533 1534 if ((loop != NULL) && 1535 ((r = mac_promisc_set(vnic_mac->va_mh, B_FALSE, 1536 MAC_DEVPROMISC)) == 0)) { 1537 if (prev != NULL) 1538 prev->vn_promisc_next = loop->vn_promisc_next; 1539 else 1540 vnic_mac->va_promisc = loop->vn_promisc_next; 1541 vnic_mac->va_promisc_gen++; 1542 1543 vnic->vn_promisc = B_FALSE; 1544 } 1545 rw_exit(&vnic_mac->va_promisc_lock); 1546 1547 return (r); 1548 } 1549 } 1550 1551 void 1552 vnic_promisc_rx(vnic_mac_t *vnic_mac, vnic_t *sender, mblk_t *mp) 1553 { 1554 vnic_t *loop; 1555 vnic_flow_t *flow; 1556 const vnic_flow_fn_info_t *fn_info; 1557 mac_header_info_t hdr_info; 1558 boolean_t dst_must_match = B_TRUE; 1559 1560 ASSERT(mp->b_next == NULL); 1561 1562 rw_enter(&vnic_mac->va_promisc_lock, RW_READER); 1563 if (vnic_mac->va_promisc == NULL) 1564 goto done; 1565 1566 if (mac_header_info(vnic_mac->va_mh, mp, &hdr_info) != 0) 1567 goto done; 1568 1569 /* 1570 * If this is broadcast or multicast then the destination 1571 * address need not match for us to deliver it. 1572 */ 1573 if ((hdr_info.mhi_dsttype == MAC_ADDRTYPE_BROADCAST) || 1574 (hdr_info.mhi_dsttype == MAC_ADDRTYPE_MULTICAST)) 1575 dst_must_match = B_FALSE; 1576 1577 for (loop = vnic_mac->va_promisc; 1578 loop != NULL; 1579 loop = loop->vn_promisc_next) { 1580 if (loop == sender) 1581 continue; 1582 1583 if (dst_must_match && 1584 (bcmp(hdr_info.mhi_daddr, loop->vn_addr, 1585 sizeof (loop->vn_addr)) != 0)) 1586 continue; 1587 1588 flow = loop->vn_flow_ent; 1589 ASSERT(flow != NULL); 1590 1591 if (!flow->vf_is_active) { 1592 mblk_t *copy; 1593 uint64_t gen; 1594 1595 if ((copy = vnic_copymsg_cksum(mp)) == NULL) 1596 break; 1597 if ((sender != NULL) && 1598 ((copy = vnic_fix_cksum(copy)) == NULL)) 1599 break; 1600 1601 VNIC_FLOW_REFHOLD(flow); 1602 gen = vnic_mac->va_promisc_gen; 1603 rw_exit(&vnic_mac->va_promisc_lock); 1604 1605 fn_info = vnic_classifier_get_fn_info(flow); 1606 (fn_info->ff_fn)(fn_info->ff_arg1, 1607 fn_info->ff_arg2, copy); 1608 1609 VNIC_FLOW_REFRELE(flow); 1610 rw_enter(&vnic_mac->va_promisc_lock, RW_READER); 1611 if (vnic_mac->va_promisc_gen != gen) 1612 break; 1613 } 1614 } 1615 done: 1616 rw_exit(&vnic_mac->va_promisc_lock); 1617 } 1618