1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/sysmacros.h> 30 #include <sys/conf.h> 31 #include <sys/cmn_err.h> 32 #include <sys/list.h> 33 #include <sys/ksynch.h> 34 #include <sys/kmem.h> 35 #include <sys/stream.h> 36 #include <sys/modctl.h> 37 #include <sys/ddi.h> 38 #include <sys/sunddi.h> 39 #include <sys/atomic.h> 40 #include <sys/stat.h> 41 #include <sys/modhash.h> 42 #include <sys/strsubr.h> 43 #include <sys/strsun.h> 44 #include <sys/dlpi.h> 45 #include <sys/mac.h> 46 #include <sys/mac_ether.h> 47 #include <sys/pattr.h> 48 #if 0 49 #include <sys/vlan.h> 50 #endif 51 #include <sys/vnic.h> 52 #include <sys/vnic_impl.h> 53 #include <sys/gld.h> 54 #include <inet/ip.h> 55 #include <inet/ip_impl.h> 56 57 static int vnic_m_start(void *); 58 static void vnic_m_stop(void *); 59 static int vnic_m_promisc(void *, boolean_t); 60 static int vnic_m_multicst(void *, boolean_t, const uint8_t *); 61 static int vnic_m_unicst(void *, const uint8_t *); 62 static int vnic_m_stat(void *, uint_t, uint64_t *); 63 static void vnic_m_resources(void *); 64 static mblk_t *vnic_m_tx(void *, mblk_t *); 65 static boolean_t vnic_m_capab_get(void *, mac_capab_t, void *); 66 static void vnic_mac_free(vnic_mac_t *); 67 static uint_t vnic_info_walker(mod_hash_key_t, mod_hash_val_t *, void *); 68 static void vnic_notify_cb(void *, mac_notify_type_t); 69 static int vnic_modify_mac_addr(vnic_t *, uint_t, uchar_t *); 70 static mblk_t *vnic_active_tx(void *, mblk_t *); 71 static int vnic_promisc_set(vnic_t *, boolean_t); 72 73 static kmem_cache_t *vnic_cache; 74 static kmem_cache_t *vnic_mac_cache; 75 static krwlock_t vnic_lock; 76 static kmutex_t vnic_mac_lock; 77 static uint_t vnic_count; 78 79 /* hash of VNICs (vnic_t's), keyed by VNIC id */ 80 static mod_hash_t *vnic_hash; 81 #define VNIC_HASHSZ 64 82 #define VNIC_HASH_KEY(vnic_id) ((mod_hash_key_t)(uintptr_t)vnic_id) 83 84 /* 85 * Hash of underlying open MACs (vnic_mac_t's), keyed by the string 86 * "<device name><instance number>/<port number>". 87 */ 88 static mod_hash_t *vnic_mac_hash; 89 #define VNIC_MAC_HASHSZ 64 90 91 #define VNIC_MAC_REFHOLD(va) { \ 92 ASSERT(MUTEX_HELD(&vnic_mac_lock)); \ 93 (va)->va_refs++; \ 94 ASSERT((va)->va_refs != 0); \ 95 } 96 97 #define VNIC_MAC_REFRELE(va) { \ 98 ASSERT(MUTEX_HELD(&vnic_mac_lock)); \ 99 ASSERT((va)->va_refs != 0); \ 100 if (--((va)->va_refs) == 0) \ 101 vnic_mac_free(va); \ 102 } 103 104 static uchar_t vnic_brdcst_mac[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 105 106 /* used by vnic_walker */ 107 typedef struct vnic_info_state { 108 uint32_t vs_vnic_id; 109 char vs_dev_name[MAXNAMELEN]; 110 boolean_t vs_vnic_found; 111 vnic_info_new_vnic_fn_t vs_new_vnic_fn; 112 void *vs_fn_arg; 113 int vs_rc; 114 } vnic_info_state_t; 115 116 #define VNIC_M_CALLBACK_FLAGS (MC_RESOURCES | MC_GETCAPAB) 117 118 static mac_callbacks_t vnic_m_callbacks = { 119 VNIC_M_CALLBACK_FLAGS, 120 vnic_m_stat, 121 vnic_m_start, 122 vnic_m_stop, 123 vnic_m_promisc, 124 vnic_m_multicst, 125 vnic_m_unicst, 126 vnic_m_tx, 127 vnic_m_resources, 128 NULL, /* m_ioctl */ 129 vnic_m_capab_get 130 }; 131 132 /* ARGSUSED */ 133 static int 134 vnic_mac_ctor(void *buf, void *arg, int kmflag) 135 { 136 vnic_mac_t *vnic_mac = buf; 137 138 bzero(vnic_mac, sizeof (vnic_mac_t)); 139 rw_init(&vnic_mac->va_bcast_grp_lock, NULL, RW_DRIVER, NULL); 140 rw_init(&vnic_mac->va_promisc_lock, NULL, RW_DRIVER, NULL); 141 142 return (0); 143 } 144 145 /* ARGSUSED */ 146 static void 147 vnic_mac_dtor(void *buf, void *arg) 148 { 149 vnic_mac_t *vnic_mac = buf; 150 151 rw_destroy(&vnic_mac->va_promisc_lock); 152 rw_destroy(&vnic_mac->va_bcast_grp_lock); 153 } 154 155 void 156 vnic_dev_init(void) 157 { 158 vnic_cache = kmem_cache_create("vnic_cache", 159 sizeof (vnic_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 160 161 vnic_mac_cache = kmem_cache_create("vnic_mac_cache", 162 sizeof (vnic_mac_t), 0, vnic_mac_ctor, vnic_mac_dtor, 163 NULL, NULL, NULL, 0); 164 165 vnic_hash = mod_hash_create_idhash("vnic_hash", 166 VNIC_HASHSZ, mod_hash_null_valdtor); 167 168 vnic_mac_hash = mod_hash_create_strhash("vnic_mac_hash", 169 VNIC_MAC_HASHSZ, mod_hash_null_valdtor); 170 171 rw_init(&vnic_lock, NULL, RW_DEFAULT, NULL); 172 173 mutex_init(&vnic_mac_lock, NULL, MUTEX_DEFAULT, NULL); 174 175 vnic_count = 0; 176 } 177 178 void 179 vnic_dev_fini(void) 180 { 181 ASSERT(vnic_count == 0); 182 183 mutex_destroy(&vnic_mac_lock); 184 rw_destroy(&vnic_lock); 185 mod_hash_destroy_strhash(vnic_mac_hash); 186 mod_hash_destroy_idhash(vnic_hash); 187 kmem_cache_destroy(vnic_mac_cache); 188 kmem_cache_destroy(vnic_cache); 189 } 190 191 uint_t 192 vnic_dev_count(void) 193 { 194 return (vnic_count); 195 } 196 197 static int 198 vnic_mac_open(const char *dev_name, vnic_mac_t **vmp) 199 { 200 char *str_key; 201 int err; 202 vnic_mac_t *vnic_mac = NULL; 203 char driver[MAXNAMELEN]; 204 uint_t ddi_instance; 205 const mac_info_t *mip; 206 207 *vmp = NULL; 208 209 if (ddi_parse(dev_name, driver, &ddi_instance) != DDI_SUCCESS) 210 return (EINVAL); 211 212 mutex_enter(&vnic_mac_lock); 213 214 err = mod_hash_find(vnic_mac_hash, (mod_hash_key_t)dev_name, 215 (mod_hash_val_t *)&vnic_mac); 216 if (err == 0) { 217 /* this MAC is already opened, increment reference count */ 218 VNIC_MAC_REFHOLD(vnic_mac); 219 mutex_exit(&vnic_mac_lock); 220 *vmp = vnic_mac; 221 return (0); 222 } 223 224 vnic_mac = kmem_cache_alloc(vnic_mac_cache, KM_SLEEP); 225 226 if ((err = mac_open(dev_name, ddi_instance, &vnic_mac->va_mh)) != 0) { 227 vnic_mac->va_mh = NULL; 228 goto bail; 229 } 230 231 /* only ethernet support, for now */ 232 mip = mac_info(vnic_mac->va_mh); 233 if (mip->mi_media != DL_ETHER) { 234 err = ENOTSUP; 235 goto bail; 236 } 237 if (mip->mi_media != mip->mi_nativemedia) { 238 err = ENOTSUP; 239 goto bail; 240 } 241 242 (void) strcpy(vnic_mac->va_dev_name, dev_name); 243 244 /* add entry to hash table */ 245 str_key = kmem_alloc(strlen(dev_name) + 1, KM_SLEEP); 246 (void) strcpy(str_key, dev_name); 247 err = mod_hash_insert(vnic_mac_hash, (mod_hash_key_t)str_key, 248 (mod_hash_val_t)vnic_mac); 249 ASSERT(err == 0); 250 251 /* initialize the flow table associated with lower MAC */ 252 vnic_mac->va_addr_len = ETHERADDRL; 253 (void) vnic_classifier_flow_tab_init(vnic_mac, vnic_mac->va_addr_len, 254 KM_SLEEP); 255 256 vnic_mac->va_txinfo = mac_vnic_tx_get(vnic_mac->va_mh); 257 vnic_mac->va_notify_hdl = mac_notify_add(vnic_mac->va_mh, 258 vnic_notify_cb, vnic_mac); 259 260 VNIC_MAC_REFHOLD(vnic_mac); 261 *vmp = vnic_mac; 262 mutex_exit(&vnic_mac_lock); 263 return (0); 264 265 bail: 266 if (vnic_mac != NULL) { 267 if (vnic_mac->va_mh != NULL) 268 mac_close(vnic_mac->va_mh); 269 kmem_cache_free(vnic_mac_cache, vnic_mac); 270 } 271 mutex_exit(&vnic_mac_lock); 272 return (err); 273 } 274 275 /* 276 * Create a new flow for the active MAC client sharing the NIC 277 * with the VNICs. This allows the unicast packets for that NIC 278 * to be classified and passed up to the active MAC client. It 279 * also allows packets sent from a VNIC to the active link to 280 * be classified by the VNIC transmit function and delivered via 281 * the MAC module locally. Returns B_TRUE on success, B_FALSE on 282 * failure. 283 */ 284 static int 285 vnic_init_active_rx(vnic_mac_t *vnic_mac) 286 { 287 uchar_t nic_mac_addr[MAXMACADDRLEN]; 288 289 if (vnic_mac->va_active_flow != NULL) 290 return (B_TRUE); 291 292 mac_unicst_get(vnic_mac->va_mh, nic_mac_addr); 293 294 vnic_mac->va_active_flow = vnic_classifier_flow_create( 295 vnic_mac->va_addr_len, nic_mac_addr, NULL, B_TRUE, KM_SLEEP); 296 297 vnic_classifier_flow_add(vnic_mac, vnic_mac->va_active_flow, 298 (vnic_rx_fn_t)mac_active_rx, vnic_mac->va_mh, NULL); 299 return (B_TRUE); 300 } 301 302 static void 303 vnic_fini_active_rx(vnic_mac_t *vnic_mac) 304 { 305 if (vnic_mac->va_active_flow == NULL) 306 return; 307 308 vnic_classifier_flow_remove(vnic_mac, vnic_mac->va_active_flow); 309 vnic_classifier_flow_destroy(vnic_mac->va_active_flow); 310 vnic_mac->va_active_flow = NULL; 311 } 312 313 static void 314 vnic_update_active_rx(vnic_mac_t *vnic_mac) 315 { 316 if (vnic_mac->va_active_flow == NULL) 317 return; 318 319 vnic_fini_active_rx(vnic_mac); 320 (void) vnic_init_active_rx(vnic_mac); 321 } 322 323 /* 324 * Copy an mblk, preserving its hardware checksum flags. 325 */ 326 mblk_t * 327 vnic_copymsg_cksum(mblk_t *mp) 328 { 329 mblk_t *mp1; 330 uint32_t start, stuff, end, value, flags; 331 332 mp1 = copymsg(mp); 333 if (mp1 == NULL) 334 return (NULL); 335 336 hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags); 337 (void) hcksum_assoc(mp1, NULL, NULL, start, stuff, end, value, 338 flags, KM_NOSLEEP); 339 340 return (mp1); 341 } 342 343 /* 344 * Copy an mblk chain, presenting the hardware checksum flags of the 345 * individual mblks. 346 */ 347 mblk_t * 348 vnic_copymsgchain_cksum(mblk_t *mp) 349 { 350 mblk_t *nmp = NULL; 351 mblk_t **nmpp = &nmp; 352 353 for (; mp != NULL; mp = mp->b_next) { 354 if ((*nmpp = vnic_copymsg_cksum(mp)) == NULL) { 355 freemsgchain(nmp); 356 return (NULL); 357 } 358 359 nmpp = &((*nmpp)->b_next); 360 } 361 362 return (nmp); 363 } 364 365 366 /* 367 * Process the specified mblk chain for proper handling of hardware 368 * checksum offload. This routine is invoked for loopback VNIC traffic. 369 * The function handles a NULL mblk chain passed as argument. 370 */ 371 mblk_t * 372 vnic_fix_cksum(mblk_t *mp_chain) 373 { 374 mblk_t *mp, *prev = NULL, *new_chain = mp_chain, *mp1; 375 uint32_t flags, start, stuff, end, value; 376 377 for (mp = mp_chain; mp != NULL; prev = mp, mp = mp->b_next) { 378 uint16_t len; 379 uint32_t offset; 380 struct ether_header *ehp; 381 uint16_t sap; 382 383 hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, 384 &flags); 385 if (flags == 0) 386 continue; 387 388 /* 389 * Since the processing of checksum offload for loopback 390 * traffic requires modification of the packet contents, 391 * ensure sure that we are always modifying our own copy. 392 */ 393 if (DB_REF(mp) > 1) { 394 mp1 = copymsg(mp); 395 if (mp1 == NULL) 396 continue; 397 mp1->b_next = mp->b_next; 398 mp->b_next = NULL; 399 freemsg(mp); 400 if (prev != NULL) 401 prev->b_next = mp1; 402 else 403 new_chain = mp1; 404 mp = mp1; 405 } 406 407 /* 408 * Ethernet, and optionally VLAN header. 409 */ 410 /*LINTED*/ 411 ehp = (struct ether_header *)mp->b_rptr; 412 if (ntohs(ehp->ether_type) == VLAN_TPID) { 413 struct ether_vlan_header *evhp; 414 415 ASSERT(MBLKL(mp) >= 416 sizeof (struct ether_vlan_header)); 417 /*LINTED*/ 418 evhp = (struct ether_vlan_header *)mp->b_rptr; 419 sap = ntohs(evhp->ether_type); 420 offset = sizeof (struct ether_vlan_header); 421 } else { 422 sap = ntohs(ehp->ether_type); 423 offset = sizeof (struct ether_header); 424 } 425 426 if (MBLKL(mp) <= offset) { 427 offset -= MBLKL(mp); 428 if (mp->b_cont == NULL) { 429 /* corrupted packet, skip it */ 430 if (prev != NULL) 431 prev->b_next = mp->b_next; 432 else 433 new_chain = mp->b_next; 434 mp1 = mp->b_next; 435 mp->b_next = NULL; 436 freemsg(mp); 437 mp = mp1; 438 continue; 439 } 440 mp = mp->b_cont; 441 } 442 443 if (flags & (HCK_FULLCKSUM | HCK_IPV4_HDRCKSUM)) { 444 ipha_t *ipha = NULL; 445 446 /* 447 * In order to compute the full and header 448 * checksums, we need to find and parse 449 * the IP and/or ULP headers. 450 */ 451 452 sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap; 453 454 /* 455 * IP header. 456 */ 457 if (sap != ETHERTYPE_IP) 458 continue; 459 460 ASSERT(MBLKL(mp) >= offset + sizeof (ipha_t)); 461 /*LINTED*/ 462 ipha = (ipha_t *)(mp->b_rptr + offset); 463 464 if (flags & HCK_FULLCKSUM) { 465 ipaddr_t src, dst; 466 uint32_t cksum; 467 uint16_t *up; 468 uint8_t proto; 469 470 /* 471 * Pointer to checksum field in ULP header. 472 */ 473 proto = ipha->ipha_protocol; 474 ASSERT(ipha->ipha_version_and_hdr_length == 475 IP_SIMPLE_HDR_VERSION); 476 if (proto == IPPROTO_TCP) { 477 /*LINTED*/ 478 up = IPH_TCPH_CHECKSUMP(ipha, 479 IP_SIMPLE_HDR_LENGTH); 480 } else { 481 ASSERT(proto == IPPROTO_UDP); 482 /*LINTED*/ 483 up = IPH_UDPH_CHECKSUMP(ipha, 484 IP_SIMPLE_HDR_LENGTH); 485 } 486 487 /* 488 * Pseudo-header checksum. 489 */ 490 src = ipha->ipha_src; 491 dst = ipha->ipha_dst; 492 len = ntohs(ipha->ipha_length) - 493 IP_SIMPLE_HDR_LENGTH; 494 495 cksum = (dst >> 16) + (dst & 0xFFFF) + 496 (src >> 16) + (src & 0xFFFF); 497 cksum += htons(len); 498 499 /* 500 * The checksum value stored in the packet needs 501 * to be correct. Compute it here. 502 */ 503 *up = 0; 504 cksum += (((proto) == IPPROTO_UDP) ? 505 IP_UDP_CSUM_COMP : IP_TCP_CSUM_COMP); 506 cksum = IP_CSUM(mp, IP_SIMPLE_HDR_LENGTH + 507 offset, cksum); 508 *(up) = (uint16_t)(cksum ? cksum : ~cksum); 509 510 flags |= HCK_FULLCKSUM_OK; 511 value = 0xffff; 512 } 513 514 if (flags & HCK_IPV4_HDRCKSUM) { 515 ASSERT(ipha != NULL); 516 ipha->ipha_hdr_checksum = 517 (uint16_t)ip_csum_hdr(ipha); 518 } 519 } 520 521 if (flags & HCK_PARTIALCKSUM) { 522 uint16_t *up, partial, cksum; 523 uchar_t *ipp; /* ptr to beginning of IP header */ 524 525 if (mp->b_cont != NULL) { 526 mblk_t *mp1; 527 528 mp1 = msgpullup(mp, offset + end); 529 if (mp1 == NULL) 530 continue; 531 mp1->b_next = mp->b_next; 532 mp->b_next = NULL; 533 freemsg(mp); 534 if (prev != NULL) 535 prev->b_next = mp1; 536 else 537 new_chain = mp1; 538 mp = mp1; 539 } 540 541 ipp = mp->b_rptr + offset; 542 /*LINTED*/ 543 up = (uint16_t *)((uchar_t *)ipp + stuff); 544 partial = *up; 545 *up = 0; 546 547 cksum = IP_BCSUM_PARTIAL(mp->b_rptr + offset + start, 548 end - start, partial); 549 cksum = ~cksum; 550 *up = cksum ? cksum : ~cksum; 551 552 /* 553 * Since we already computed the whole checksum, 554 * indicate to the stack that it has already 555 * been verified by the hardware. 556 */ 557 flags &= ~HCK_PARTIALCKSUM; 558 flags |= (HCK_FULLCKSUM | HCK_FULLCKSUM_OK); 559 value = 0xffff; 560 } 561 562 (void) hcksum_assoc(mp, NULL, NULL, start, stuff, end, 563 value, flags, KM_NOSLEEP); 564 } 565 566 return (new_chain); 567 } 568 569 static void 570 vnic_mac_close(vnic_mac_t *vnic_mac) 571 { 572 mutex_enter(&vnic_mac_lock); 573 VNIC_MAC_REFRELE(vnic_mac); 574 mutex_exit(&vnic_mac_lock); 575 } 576 577 static void 578 vnic_mac_free(vnic_mac_t *vnic_mac) 579 { 580 mod_hash_val_t val; 581 582 ASSERT(MUTEX_HELD(&vnic_mac_lock)); 583 vnic_fini_active_rx(vnic_mac); 584 mac_notify_remove(vnic_mac->va_mh, vnic_mac->va_notify_hdl); 585 if (vnic_mac->va_mac_set) { 586 vnic_mac->va_mac_set = B_FALSE; 587 mac_vnic_clear(vnic_mac->va_mh); 588 } 589 vnic_classifier_flow_tab_fini(vnic_mac); 590 mac_close(vnic_mac->va_mh); 591 592 (void) mod_hash_remove(vnic_mac_hash, 593 (mod_hash_key_t)vnic_mac->va_dev_name, &val); 594 ASSERT(vnic_mac == (vnic_mac_t *)val); 595 596 kmem_cache_free(vnic_mac_cache, vnic_mac); 597 } 598 599 /* 600 * Initial VNIC receive routine. Invoked for packets that are steered 601 * to a VNIC but the VNIC has not been started yet. 602 */ 603 /* ARGSUSED */ 604 static void 605 vnic_rx_initial(void *arg1, void *arg2, mblk_t *mp_chain) 606 { 607 vnic_t *vnic = arg1; 608 mblk_t *mp; 609 610 /* update stats */ 611 for (mp = mp_chain; mp != NULL; mp = mp->b_next) 612 vnic->vn_stat_ierrors++; 613 freemsgchain(mp_chain); 614 } 615 616 /* 617 * VNIC receive routine invoked after the classifier for the VNIC 618 * has been initialized and the VNIC has been started. 619 */ 620 /* ARGSUSED */ 621 void 622 vnic_rx(void *arg1, void *arg2, mblk_t *mp_chain) 623 { 624 vnic_t *vnic = arg1; 625 mblk_t *mp; 626 627 /* update stats */ 628 for (mp = mp_chain; mp != NULL; mp = mp->b_next) { 629 vnic->vn_stat_ipackets++; 630 vnic->vn_stat_rbytes += msgdsize(mp); 631 } 632 633 /* pass packet up */ 634 mac_rx(vnic->vn_mh, NULL, mp_chain); 635 } 636 637 /* 638 * Routine to create a MAC-based VNIC. Adds the passed MAC address 639 * to an unused slot in the NIC if one is available. Otherwise it 640 * sets the NIC in promiscuous mode and assigns the MAC address to 641 * a Rx ring if available or a soft ring. 642 */ 643 static int 644 vnic_add_unicstaddr(vnic_t *vnic, mac_multi_addr_t *maddr) 645 { 646 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 647 int err; 648 649 if (mac_unicst_verify(vnic_mac->va_mh, maddr->mma_addr, 650 maddr->mma_addrlen) == B_FALSE) 651 return (EINVAL); 652 653 if (mac_vnic_capab_get(vnic_mac->va_mh, MAC_CAPAB_MULTIADDRESS, 654 &(vnic->vn_mma_capab))) { 655 if (vnic->vn_maddr_naddrfree == 0) { 656 /* 657 * No free address slots available. 658 * Enable promiscuous mode. 659 */ 660 goto set_promisc; 661 } 662 663 err = vnic->vn_maddr_add(vnic->vn_maddr_handle, maddr); 664 if (err != 0) { 665 if (err == ENOSPC) { 666 /* 667 * There was a race to add addresses 668 * with other multiple address consumers, 669 * and we lost out. Use promisc mode. 670 */ 671 goto set_promisc; 672 } 673 674 return (err); 675 } 676 677 vnic->vn_slot_id = maddr->mma_slot; 678 vnic->vn_multi_mac = B_TRUE; 679 } else { 680 /* 681 * Either multiple MAC address support is not 682 * available or all available addresses have 683 * been used up. 684 */ 685 set_promisc: 686 err = mac_promisc_set(vnic_mac->va_mh, B_TRUE, MAC_DEVPROMISC); 687 if (err != 0) { 688 return (err); 689 } 690 691 vnic->vn_promisc_mac = B_TRUE; 692 } 693 return (err); 694 } 695 696 /* 697 * VNIC is getting deleted. Remove the MAC address from the slot. 698 * If promiscuous mode was being used, then unset the promiscuous mode. 699 */ 700 static int 701 vnic_remove_unicstaddr(vnic_t *vnic) 702 { 703 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 704 int err; 705 706 if (vnic->vn_multi_mac) { 707 ASSERT(vnic->vn_promisc_mac == B_FALSE); 708 err = vnic->vn_maddr_remove(vnic->vn_maddr_handle, 709 vnic->vn_slot_id); 710 vnic->vn_multi_mac = B_FALSE; 711 } 712 713 if (vnic->vn_promisc_mac) { 714 ASSERT(vnic->vn_multi_mac == B_FALSE); 715 err = mac_promisc_set(vnic_mac->va_mh, B_FALSE, MAC_DEVPROMISC); 716 vnic->vn_promisc_mac = B_FALSE; 717 } 718 719 return (err); 720 } 721 722 /* 723 * Create a new VNIC upon request from administrator. 724 * Returns 0 on success, an errno on failure. 725 */ 726 int 727 vnic_dev_create(uint_t vnic_id, char *dev_name, int mac_len, uchar_t *mac_addr) 728 { 729 vnic_t *vnic = NULL; 730 mac_register_t *mac; 731 int err; 732 vnic_mac_t *vnic_mac; 733 const mac_info_t *lower_mac_info; 734 mac_multi_addr_t maddr; 735 mac_txinfo_t tx_info; 736 737 if (mac_len != ETHERADDRL) { 738 /* currently only ethernet NICs are supported */ 739 return (EINVAL); 740 } 741 742 rw_enter(&vnic_lock, RW_WRITER); 743 744 /* does a VNIC with the same id already exist? */ 745 err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id), 746 (mod_hash_val_t *)&vnic); 747 if (err == 0) { 748 rw_exit(&vnic_lock); 749 return (EEXIST); 750 } 751 752 vnic = kmem_cache_alloc(vnic_cache, KM_NOSLEEP); 753 if (vnic == NULL) { 754 rw_exit(&vnic_lock); 755 return (ENOMEM); 756 } 757 758 /* open underlying MAC */ 759 err = vnic_mac_open(dev_name, &vnic_mac); 760 if (err != 0) { 761 kmem_cache_free(vnic_cache, vnic); 762 rw_exit(&vnic_lock); 763 return (err); 764 } 765 766 bzero(vnic, sizeof (*vnic)); 767 vnic->vn_id = vnic_id; 768 vnic->vn_vnic_mac = vnic_mac; 769 770 vnic->vn_started = B_FALSE; 771 vnic->vn_promisc = B_FALSE; 772 vnic->vn_multi_mac = B_FALSE; 773 vnic->vn_bcast_grp = B_FALSE; 774 775 /* set the VNIC MAC address */ 776 maddr.mma_addrlen = mac_len; 777 maddr.mma_slot = 0; 778 maddr.mma_flags = 0; 779 bcopy(mac_addr, maddr.mma_addr, mac_len); 780 if ((err = vnic_add_unicstaddr(vnic, &maddr)) != 0) 781 goto bail; 782 bcopy(mac_addr, vnic->vn_addr, mac_len); 783 784 /* set the initial VNIC capabilities */ 785 if (!mac_vnic_capab_get(vnic_mac->va_mh, MAC_CAPAB_HCKSUM, 786 &vnic->vn_hcksum_txflags)) 787 vnic->vn_hcksum_txflags = 0; 788 789 /* register with the MAC module */ 790 if ((mac = mac_alloc(MAC_VERSION)) == NULL) 791 goto bail; 792 793 mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 794 mac->m_driver = vnic; 795 mac->m_dip = vnic_get_dip(); 796 mac->m_instance = vnic_id; 797 mac->m_src_addr = vnic->vn_addr; 798 mac->m_callbacks = &vnic_m_callbacks; 799 800 lower_mac_info = mac_info(vnic_mac->va_mh); 801 mac->m_min_sdu = lower_mac_info->mi_sdu_min; 802 mac->m_max_sdu = lower_mac_info->mi_sdu_max; 803 804 err = mac_register(mac, &vnic->vn_mh); 805 mac_free(mac); 806 if (err != 0) 807 goto bail; 808 809 /* add new VNIC to hash table */ 810 err = mod_hash_insert(vnic_hash, VNIC_HASH_KEY(vnic_id), 811 (mod_hash_val_t)vnic); 812 ASSERT(err == 0); 813 vnic_count++; 814 815 rw_exit(&vnic_lock); 816 817 /* Create a flow, initialized with the MAC address of the VNIC */ 818 if ((vnic->vn_flow_ent = vnic_classifier_flow_create(mac_len, mac_addr, 819 NULL, B_FALSE, KM_SLEEP)) == NULL) { 820 (void) vnic_dev_delete(vnic_id); 821 vnic = NULL; 822 err = ENOMEM; 823 goto bail_unlocked; 824 } 825 826 vnic_classifier_flow_add(vnic_mac, vnic->vn_flow_ent, vnic_rx_initial, 827 vnic, vnic); 828 829 /* setup VNIC to receive broadcast packets */ 830 err = vnic_bcast_add(vnic, vnic_brdcst_mac, MAC_ADDRTYPE_BROADCAST); 831 if (err != 0) { 832 (void) vnic_dev_delete(vnic_id); 833 vnic = NULL; 834 goto bail_unlocked; 835 } 836 vnic->vn_bcast_grp = B_TRUE; 837 838 mutex_enter(&vnic_mac_lock); 839 if (!vnic_mac->va_mac_set) { 840 /* 841 * We want to MAC layer to call the VNIC tx outbound 842 * routine, so that local broadcast packets sent by 843 * the active interface sharing the underlying NIC (if 844 * any), can be broadcast to every VNIC. 845 */ 846 tx_info.mt_fn = vnic_active_tx; 847 tx_info.mt_arg = vnic_mac; 848 if (!mac_vnic_set(vnic_mac->va_mh, &tx_info, 849 vnic_m_capab_get, vnic)) { 850 mutex_exit(&vnic_mac_lock); 851 (void) vnic_dev_delete(vnic_id); 852 vnic = NULL; 853 err = EBUSY; 854 goto bail_unlocked; 855 } 856 vnic_mac->va_mac_set = B_TRUE; 857 } 858 mutex_exit(&vnic_mac_lock); 859 860 /* allow passing packets to NIC's active MAC client */ 861 if (!vnic_init_active_rx(vnic_mac)) { 862 (void) vnic_dev_delete(vnic_id); 863 vnic = NULL; 864 err = ENOMEM; 865 goto bail_unlocked; 866 } 867 868 return (0); 869 870 bail: 871 (void) vnic_remove_unicstaddr(vnic); 872 vnic_mac_close(vnic_mac); 873 rw_exit(&vnic_lock); 874 875 bail_unlocked: 876 if (vnic != NULL) { 877 kmem_cache_free(vnic_cache, vnic); 878 } 879 880 return (err); 881 } 882 883 /* 884 * Modify the properties of an existing VNIC. 885 */ 886 /* ARGSUSED */ 887 int 888 vnic_dev_modify(uint_t vnic_id, uint_t modify_mask, 889 vnic_mac_addr_type_t mac_addr_type, uint_t mac_len, uchar_t *mac_addr) 890 { 891 vnic_t *vnic = NULL; 892 int rv = 0; 893 boolean_t notify_mac_addr = B_FALSE; 894 895 rw_enter(&vnic_lock, RW_WRITER); 896 897 if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id), 898 (mod_hash_val_t *)&vnic) != 0) { 899 rw_exit(&vnic_lock); 900 return (ENOENT); 901 } 902 903 if (modify_mask & VNIC_IOC_MODIFY_ADDR) { 904 rv = vnic_modify_mac_addr(vnic, mac_len, mac_addr); 905 if (rv == 0) 906 notify_mac_addr = B_TRUE; 907 } 908 909 rw_exit(&vnic_lock); 910 911 if (notify_mac_addr) 912 mac_unicst_update(vnic->vn_mh, mac_addr); 913 914 return (rv); 915 } 916 917 int 918 vnic_dev_delete(uint_t vnic_id) 919 { 920 vnic_t *vnic = NULL; 921 mod_hash_val_t val; 922 vnic_flow_t *flent; 923 int rc; 924 925 rw_enter(&vnic_lock, RW_WRITER); 926 927 if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id), 928 (mod_hash_val_t *)&vnic) != 0) { 929 rw_exit(&vnic_lock); 930 return (ENOENT); 931 } 932 933 /* 934 * We cannot unregister the MAC yet. Unregistering would 935 * free up mac_impl_t which should not happen at this time. 936 * Packets could be entering vnic_rx() through the 937 * flow entry and so mac_impl_t cannot be NULL. So disable 938 * mac_impl_t by calling mac_disable(). This will prevent any 939 * new claims on mac_impl_t. 940 */ 941 if (mac_disable(vnic->vn_mh) != 0) { 942 rw_exit(&vnic_lock); 943 return (EBUSY); 944 } 945 946 (void) mod_hash_remove(vnic_hash, VNIC_HASH_KEY(vnic_id), &val); 947 ASSERT(vnic == (vnic_t *)val); 948 949 if (vnic->vn_bcast_grp) 950 (void) vnic_bcast_delete(vnic, vnic_brdcst_mac); 951 952 flent = vnic->vn_flow_ent; 953 if (flent != NULL) { 954 /* 955 * vnic_classifier_flow_destroy() ensures that the 956 * flow is no longer used. 957 */ 958 vnic_classifier_flow_remove(vnic->vn_vnic_mac, flent); 959 vnic_classifier_flow_destroy(flent); 960 } 961 962 rc = mac_unregister(vnic->vn_mh); 963 ASSERT(rc == 0); 964 (void) vnic_remove_unicstaddr(vnic); 965 vnic_mac_close(vnic->vn_vnic_mac); 966 kmem_cache_free(vnic_cache, vnic); 967 vnic_count--; 968 rw_exit(&vnic_lock); 969 return (0); 970 } 971 972 /* 973 * For the specified packet chain, return a sub-chain to be sent 974 * and the transmit function to be used to send the packet. Also 975 * return a pointer to the sub-chain of packets that should 976 * be re-classified. If the function returns NULL, the packet 977 * should be sent using the underlying NIC. 978 */ 979 static vnic_flow_t * 980 vnic_classify(vnic_mac_t *vnic_mac, mblk_t *mp, mblk_t **mp_chain_rest) 981 { 982 vnic_flow_t *flow_ent; 983 984 /* one packet at a time */ 985 *mp_chain_rest = mp->b_next; 986 mp->b_next = NULL; 987 988 /* do classification on the packet */ 989 flow_ent = vnic_classifier_get_flow(vnic_mac, mp); 990 991 return (flow_ent); 992 } 993 994 /* 995 * Send a packet chain to a local VNIC or an active MAC client. 996 */ 997 static void 998 vnic_local_tx(vnic_mac_t *vnic_mac, vnic_flow_t *flow_ent, mblk_t *mp_chain) 999 { 1000 mblk_t *mp1; 1001 const vnic_flow_fn_info_t *fn_info; 1002 vnic_t *vnic; 1003 1004 if (!vnic_classifier_is_active(flow_ent) && 1005 mac_promisc_get(vnic_mac->va_mh, MAC_PROMISC)) { 1006 /* 1007 * If the MAC is in promiscous mode, 1008 * send a copy of the active client. 1009 */ 1010 if ((mp1 = vnic_copymsgchain_cksum(mp_chain)) == NULL) 1011 goto sendit; 1012 if ((mp1 = vnic_fix_cksum(mp1)) == NULL) 1013 goto sendit; 1014 mac_active_rx(vnic_mac->va_mh, NULL, mp1); 1015 } 1016 sendit: 1017 fn_info = vnic_classifier_get_fn_info(flow_ent); 1018 /* 1019 * If the vnic to which we would deliver this packet is in 1020 * promiscuous mode then it already received the packet via 1021 * vnic_promisc_rx(). 1022 * 1023 * XXX assumes that ff_arg2 is a vnic_t pointer if it is 1024 * non-NULL (currently always true). 1025 */ 1026 vnic = (vnic_t *)fn_info->ff_arg2; 1027 if ((vnic != NULL) && vnic->vn_promisc) 1028 freemsg(mp_chain); 1029 else if ((mp1 = vnic_fix_cksum(mp_chain)) != NULL) 1030 (fn_info->ff_fn)(fn_info->ff_arg1, fn_info->ff_arg2, mp1); 1031 } 1032 1033 /* 1034 * This function is invoked when a MAC client needs to send a packet 1035 * to a NIC which is shared by VNICs. It is passed to the MAC layer 1036 * by a call to mac_vnic_set() when the NIC is opened, and is returned 1037 * to MAC clients by mac_tx_get() when VNICs are present. 1038 */ 1039 mblk_t * 1040 vnic_active_tx(void *arg, mblk_t *mp_chain) 1041 { 1042 vnic_mac_t *vnic_mac = arg; 1043 mblk_t *mp, *extra_mp = NULL; 1044 vnic_flow_t *flow_ent; 1045 void *flow_cookie; 1046 const mac_txinfo_t *mtp = vnic_mac->va_txinfo; 1047 1048 for (mp = mp_chain; mp != NULL; mp = extra_mp) { 1049 mblk_t *next; 1050 1051 next = mp->b_next; 1052 mp->b_next = NULL; 1053 1054 vnic_promisc_rx(vnic_mac, (vnic_t *)-1, mp); 1055 1056 flow_ent = vnic_classify(vnic_mac, mp, &extra_mp); 1057 ASSERT(extra_mp == NULL); 1058 extra_mp = next; 1059 1060 if (flow_ent != NULL) { 1061 flow_cookie = vnic_classifier_get_client_cookie( 1062 flow_ent); 1063 if (flow_cookie != NULL) { 1064 /* 1065 * Send a copy to every VNIC defined on the 1066 * interface, as well as the underlying MAC. 1067 */ 1068 vnic_bcast_send(flow_cookie, (vnic_t *)-1, mp); 1069 } else { 1070 /* 1071 * loopback the packet to a local VNIC or 1072 * an active MAC client. 1073 */ 1074 vnic_local_tx(vnic_mac, flow_ent, mp); 1075 } 1076 VNIC_FLOW_REFRELE(flow_ent); 1077 mp_chain = NULL; 1078 } else { 1079 /* 1080 * Non-VNIC destination, send via the underlying 1081 * NIC. In order to avoid a recursive call 1082 * to this function, we ensured that mtp points 1083 * to the unerlying NIC transmit function 1084 * by inilizating through mac_vnic_tx_get(). 1085 */ 1086 mp_chain = mtp->mt_fn(mtp->mt_arg, mp); 1087 if (mp_chain != NULL) 1088 break; 1089 } 1090 } 1091 1092 if ((mp_chain != NULL) && (extra_mp != NULL)) { 1093 ASSERT(mp_chain->b_next == NULL); 1094 mp_chain->b_next = extra_mp; 1095 } 1096 return (mp_chain); 1097 } 1098 1099 /* 1100 * VNIC transmit function. 1101 */ 1102 mblk_t * 1103 vnic_m_tx(void *arg, mblk_t *mp_chain) 1104 { 1105 vnic_t *vnic = arg; 1106 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 1107 mblk_t *mp, *extra_mp = NULL; 1108 vnic_flow_t *flow_ent; 1109 void *flow_cookie; 1110 1111 /* 1112 * Update stats. 1113 */ 1114 for (mp = mp_chain; mp != NULL; mp = mp->b_next) { 1115 vnic->vn_stat_opackets++; 1116 vnic->vn_stat_obytes += msgdsize(mp); 1117 } 1118 1119 for (mp = mp_chain; mp != NULL; mp = extra_mp) { 1120 mblk_t *next; 1121 1122 next = mp->b_next; 1123 mp->b_next = NULL; 1124 1125 vnic_promisc_rx(vnic->vn_vnic_mac, vnic, mp); 1126 1127 flow_ent = vnic_classify(vnic->vn_vnic_mac, mp, &extra_mp); 1128 ASSERT(extra_mp == NULL); 1129 extra_mp = next; 1130 1131 if (flow_ent != NULL) { 1132 flow_cookie = vnic_classifier_get_client_cookie( 1133 flow_ent); 1134 if (flow_cookie != NULL) { 1135 /* 1136 * The vnic_bcast_send function expects 1137 * to receive the sender VNIC as value 1138 * for arg2. 1139 */ 1140 vnic_bcast_send(flow_cookie, vnic, mp); 1141 } else { 1142 /* 1143 * loopback the packet to a local VNIC or 1144 * an active MAC client. 1145 */ 1146 vnic_local_tx(vnic_mac, flow_ent, mp); 1147 } 1148 VNIC_FLOW_REFRELE(flow_ent); 1149 mp_chain = NULL; 1150 } else { 1151 /* 1152 * Non-local destination, send via the underlying 1153 * NIC. 1154 */ 1155 const mac_txinfo_t *mtp = vnic->vn_txinfo; 1156 mp_chain = mtp->mt_fn(mtp->mt_arg, mp); 1157 if (mp_chain != NULL) 1158 break; 1159 } 1160 } 1161 1162 /* update stats to account for unsent packets */ 1163 for (mp = mp_chain; mp != NULL; mp = mp->b_next) { 1164 vnic->vn_stat_opackets--; 1165 vnic->vn_stat_obytes -= msgdsize(mp); 1166 vnic->vn_stat_oerrors++; 1167 /* 1168 * link back in the last portion not counted due to bandwidth 1169 * control. 1170 */ 1171 if (mp->b_next == NULL) { 1172 mp->b_next = extra_mp; 1173 break; 1174 } 1175 } 1176 1177 return (mp_chain); 1178 } 1179 1180 /* ARGSUSED */ 1181 static void 1182 vnic_m_resources(void *arg) 1183 { 1184 /* no resources to advertise */ 1185 } 1186 1187 static int 1188 vnic_m_stat(void *arg, uint_t stat, uint64_t *val) 1189 { 1190 vnic_t *vnic = arg; 1191 int rval = 0; 1192 1193 rw_enter(&vnic_lock, RW_READER); 1194 1195 switch (stat) { 1196 case ETHER_STAT_LINK_DUPLEX: 1197 *val = mac_stat_get(vnic->vn_vnic_mac->va_mh, 1198 ETHER_STAT_LINK_DUPLEX); 1199 break; 1200 case MAC_STAT_IFSPEED: 1201 *val = mac_stat_get(vnic->vn_vnic_mac->va_mh, 1202 MAC_STAT_IFSPEED); 1203 break; 1204 case MAC_STAT_MULTIRCV: 1205 *val = vnic->vn_stat_multircv; 1206 break; 1207 case MAC_STAT_BRDCSTRCV: 1208 *val = vnic->vn_stat_brdcstrcv; 1209 break; 1210 case MAC_STAT_MULTIXMT: 1211 *val = vnic->vn_stat_multixmt; 1212 break; 1213 case MAC_STAT_BRDCSTXMT: 1214 *val = vnic->vn_stat_brdcstxmt; 1215 break; 1216 case MAC_STAT_IERRORS: 1217 *val = vnic->vn_stat_ierrors; 1218 break; 1219 case MAC_STAT_OERRORS: 1220 *val = vnic->vn_stat_oerrors; 1221 break; 1222 case MAC_STAT_RBYTES: 1223 *val = vnic->vn_stat_rbytes; 1224 break; 1225 case MAC_STAT_IPACKETS: 1226 *val = vnic->vn_stat_ipackets; 1227 break; 1228 case MAC_STAT_OBYTES: 1229 *val = vnic->vn_stat_obytes; 1230 break; 1231 case MAC_STAT_OPACKETS: 1232 *val = vnic->vn_stat_opackets; 1233 break; 1234 default: 1235 rval = ENOTSUP; 1236 } 1237 1238 rw_exit(&vnic_lock); 1239 return (rval); 1240 } 1241 1242 /* 1243 * Return information about the specified capability. 1244 */ 1245 /* ARGSUSED */ 1246 static boolean_t 1247 vnic_m_capab_get(void *arg, mac_capab_t cap, void *cap_data) 1248 { 1249 vnic_t *vnic = arg; 1250 1251 switch (cap) { 1252 case MAC_CAPAB_POLL: 1253 return (B_TRUE); 1254 case MAC_CAPAB_HCKSUM: { 1255 uint32_t *hcksum_txflags = cap_data; 1256 1257 *hcksum_txflags = vnic->vn_hcksum_txflags & 1258 (HCKSUM_INET_FULL_V4 | HCKSUM_IPHDRCKSUM | 1259 HCKSUM_INET_PARTIAL); 1260 break; 1261 } 1262 default: 1263 return (B_FALSE); 1264 } 1265 return (B_TRUE); 1266 } 1267 1268 static int 1269 vnic_m_start(void *arg) 1270 { 1271 vnic_t *vnic = arg; 1272 mac_handle_t lower_mh = vnic->vn_vnic_mac->va_mh; 1273 int rc; 1274 1275 rc = mac_start(lower_mh); 1276 if (rc != 0) 1277 return (rc); 1278 1279 vnic_classifier_flow_update_fn(vnic->vn_flow_ent, vnic_rx, vnic, vnic); 1280 return (0); 1281 } 1282 1283 static void 1284 vnic_m_stop(void *arg) 1285 { 1286 vnic_t *vnic = arg; 1287 mac_handle_t lower_mh = vnic->vn_vnic_mac->va_mh; 1288 1289 vnic_classifier_flow_update_fn(vnic->vn_flow_ent, vnic_rx_initial, 1290 vnic, vnic); 1291 mac_stop(lower_mh); 1292 } 1293 1294 /* ARGSUSED */ 1295 static int 1296 vnic_m_promisc(void *arg, boolean_t on) 1297 { 1298 vnic_t *vnic = arg; 1299 1300 return (vnic_promisc_set(vnic, on)); 1301 } 1302 1303 static int 1304 vnic_m_multicst(void *arg, boolean_t add, const uint8_t *addrp) 1305 { 1306 vnic_t *vnic = arg; 1307 int rc = 0; 1308 1309 if (add) 1310 rc = vnic_bcast_add(vnic, addrp, MAC_ADDRTYPE_MULTICAST); 1311 else 1312 vnic_bcast_delete(vnic, addrp); 1313 1314 return (rc); 1315 } 1316 1317 static int 1318 vnic_m_unicst(void *arg, const uint8_t *mac_addr) 1319 { 1320 vnic_t *vnic = arg; 1321 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 1322 int rv; 1323 1324 rw_enter(&vnic_lock, RW_WRITER); 1325 rv = vnic_modify_mac_addr(vnic, vnic_mac->va_addr_len, 1326 (uchar_t *)mac_addr); 1327 rw_exit(&vnic_lock); 1328 1329 if (rv == 0) 1330 mac_unicst_update(vnic->vn_mh, mac_addr); 1331 return (0); 1332 } 1333 1334 int 1335 vnic_info(uint_t *nvnics, uint32_t vnic_id, char *dev_name, void *fn_arg, 1336 vnic_info_new_vnic_fn_t new_vnic_fn) 1337 { 1338 vnic_info_state_t state; 1339 int rc = 0; 1340 1341 rw_enter(&vnic_lock, RW_READER); 1342 1343 *nvnics = vnic_count; 1344 1345 bzero(&state, sizeof (state)); 1346 state.vs_vnic_id = vnic_id; 1347 bcopy(state.vs_dev_name, dev_name, MAXNAMELEN); 1348 state.vs_new_vnic_fn = new_vnic_fn; 1349 state.vs_fn_arg = fn_arg; 1350 1351 mod_hash_walk(vnic_hash, vnic_info_walker, &state); 1352 1353 if ((rc = state.vs_rc) == 0 && vnic_id != 0 && 1354 state.vs_vnic_found) 1355 rc = ENOENT; 1356 1357 rw_exit(&vnic_lock); 1358 return (rc); 1359 } 1360 1361 /* 1362 * Walker invoked when building a list of vnics that must be passed 1363 * up to user space. 1364 */ 1365 /*ARGSUSED*/ 1366 static uint_t 1367 vnic_info_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 1368 { 1369 vnic_t *vnic; 1370 vnic_info_state_t *state = arg; 1371 1372 if (state->vs_rc != 0) 1373 return (MH_WALK_TERMINATE); /* terminate walk */ 1374 1375 vnic = (vnic_t *)val; 1376 1377 if (state->vs_vnic_id != 0 && vnic->vn_id != state->vs_vnic_id) 1378 goto bail; 1379 1380 state->vs_vnic_found = B_TRUE; 1381 1382 state->vs_rc = state->vs_new_vnic_fn(state->vs_fn_arg, 1383 vnic->vn_id, vnic->vn_addr_type, vnic->vn_vnic_mac->va_addr_len, 1384 vnic->vn_addr, vnic->vn_vnic_mac->va_dev_name); 1385 bail: 1386 return ((state->vs_rc == 0) ? MH_WALK_CONTINUE : MH_WALK_TERMINATE); 1387 } 1388 1389 /* 1390 * vnic_notify_cb() and vnic_notify_walker() below are used to 1391 * process events received from an underlying NIC and, if needed, 1392 * forward these events to the VNICs defined on top of that NIC. 1393 */ 1394 1395 typedef struct vnic_notify_state { 1396 mac_notify_type_t vo_type; 1397 vnic_mac_t *vo_vnic_mac; 1398 } vnic_notify_state_t; 1399 1400 /* ARGSUSED */ 1401 static uint_t 1402 vnic_notify_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 1403 { 1404 vnic_t *vnic = (vnic_t *)val; 1405 vnic_notify_state_t *state = arg; 1406 1407 /* ignore VNICs that don't use the specified underlying MAC */ 1408 if (vnic->vn_vnic_mac != state->vo_vnic_mac) 1409 return (MH_WALK_CONTINUE); 1410 1411 switch (state->vo_type) { 1412 case MAC_NOTE_TX: 1413 mac_tx_update(vnic->vn_mh); 1414 break; 1415 case MAC_NOTE_LINK: 1416 /* 1417 * The VNIC link state must be up regardless of 1418 * the link state of the underlying NIC to maintain 1419 * connectivity between VNICs on the same host. 1420 */ 1421 mac_link_update(vnic->vn_mh, LINK_STATE_UP); 1422 break; 1423 case MAC_NOTE_UNICST: 1424 vnic_update_active_rx(vnic->vn_vnic_mac); 1425 break; 1426 case MAC_NOTE_VNIC: 1427 /* only for clients which share a NIC with a VNIC */ 1428 break; 1429 case MAC_NOTE_PROMISC: 1430 mutex_enter(&vnic_mac_lock); 1431 vnic->vn_vnic_mac->va_txinfo = mac_vnic_tx_get( 1432 vnic->vn_vnic_mac->va_mh); 1433 mutex_exit(&vnic_mac_lock); 1434 break; 1435 } 1436 1437 return (MH_WALK_CONTINUE); 1438 } 1439 1440 static void 1441 vnic_notify_cb(void *arg, mac_notify_type_t type) 1442 { 1443 vnic_mac_t *vnic = arg; 1444 vnic_notify_state_t state; 1445 1446 state.vo_type = type; 1447 state.vo_vnic_mac = vnic; 1448 1449 rw_enter(&vnic_lock, RW_READER); 1450 mod_hash_walk(vnic_hash, vnic_notify_walker, &state); 1451 rw_exit(&vnic_lock); 1452 } 1453 1454 static int 1455 vnic_modify_mac_addr(vnic_t *vnic, uint_t mac_len, uchar_t *mac_addr) 1456 { 1457 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 1458 vnic_flow_t *vnic_flow = vnic->vn_flow_ent; 1459 1460 ASSERT(RW_WRITE_HELD(&vnic_lock)); 1461 1462 if (mac_len != vnic_mac->va_addr_len) 1463 return (EINVAL); 1464 1465 vnic_classifier_flow_update_addr(vnic_flow, mac_addr); 1466 return (0); 1467 } 1468 1469 static int 1470 vnic_promisc_set(vnic_t *vnic, boolean_t on) 1471 { 1472 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 1473 int r = -1; 1474 1475 if (vnic->vn_promisc == on) 1476 return (0); 1477 1478 if (on) { 1479 r = mac_promisc_set(vnic_mac->va_mh, B_TRUE, MAC_DEVPROMISC); 1480 if (r != 0) 1481 return (r); 1482 1483 rw_enter(&vnic_mac->va_promisc_lock, RW_WRITER); 1484 vnic->vn_promisc_next = vnic_mac->va_promisc; 1485 vnic_mac->va_promisc = vnic; 1486 vnic_mac->va_promisc_gen++; 1487 1488 vnic->vn_promisc = B_TRUE; 1489 rw_exit(&vnic_mac->va_promisc_lock); 1490 1491 return (0); 1492 } else { 1493 vnic_t *loop, *prev = NULL; 1494 1495 rw_enter(&vnic_mac->va_promisc_lock, RW_WRITER); 1496 loop = vnic_mac->va_promisc; 1497 1498 while ((loop != NULL) && (loop != vnic)) { 1499 prev = loop; 1500 loop = loop->vn_promisc_next; 1501 } 1502 1503 if ((loop != NULL) && 1504 ((r = mac_promisc_set(vnic_mac->va_mh, B_FALSE, 1505 MAC_DEVPROMISC)) == 0)) { 1506 if (prev != NULL) 1507 prev->vn_promisc_next = loop->vn_promisc_next; 1508 else 1509 vnic_mac->va_promisc = loop->vn_promisc_next; 1510 vnic_mac->va_promisc_gen++; 1511 1512 vnic->vn_promisc = B_FALSE; 1513 } 1514 rw_exit(&vnic_mac->va_promisc_lock); 1515 1516 return (r); 1517 } 1518 } 1519 1520 void 1521 vnic_promisc_rx(vnic_mac_t *vnic_mac, vnic_t *sender, mblk_t *mp) 1522 { 1523 vnic_t *loop; 1524 vnic_flow_t *flow; 1525 const vnic_flow_fn_info_t *fn_info; 1526 mac_header_info_t hdr_info; 1527 boolean_t dst_must_match = B_TRUE; 1528 1529 ASSERT(mp->b_next == NULL); 1530 1531 rw_enter(&vnic_mac->va_promisc_lock, RW_READER); 1532 if (vnic_mac->va_promisc == NULL) 1533 goto done; 1534 1535 if (mac_header_info(vnic_mac->va_mh, mp, &hdr_info) != 0) 1536 goto done; 1537 1538 /* 1539 * If this is broadcast or multicast then the destination 1540 * address need not match for us to deliver it. 1541 */ 1542 if ((hdr_info.mhi_dsttype == MAC_ADDRTYPE_BROADCAST) || 1543 (hdr_info.mhi_dsttype == MAC_ADDRTYPE_MULTICAST)) 1544 dst_must_match = B_FALSE; 1545 1546 for (loop = vnic_mac->va_promisc; 1547 loop != NULL; 1548 loop = loop->vn_promisc_next) { 1549 if (loop == sender) 1550 continue; 1551 1552 if (dst_must_match && 1553 (bcmp(hdr_info.mhi_daddr, loop->vn_addr, 1554 sizeof (loop->vn_addr)) != 0)) 1555 continue; 1556 1557 flow = loop->vn_flow_ent; 1558 ASSERT(flow != NULL); 1559 1560 if (!flow->vf_is_active) { 1561 mblk_t *copy; 1562 uint64_t gen; 1563 1564 if ((copy = vnic_copymsg_cksum(mp)) == NULL) 1565 break; 1566 if ((sender != NULL) && 1567 ((copy = vnic_fix_cksum(copy)) == NULL)) 1568 break; 1569 1570 VNIC_FLOW_REFHOLD(flow); 1571 gen = vnic_mac->va_promisc_gen; 1572 rw_exit(&vnic_mac->va_promisc_lock); 1573 1574 fn_info = vnic_classifier_get_fn_info(flow); 1575 (fn_info->ff_fn)(fn_info->ff_arg1, 1576 fn_info->ff_arg2, copy); 1577 1578 VNIC_FLOW_REFRELE(flow); 1579 rw_enter(&vnic_mac->va_promisc_lock, RW_READER); 1580 if (vnic_mac->va_promisc_gen != gen) 1581 break; 1582 } 1583 } 1584 done: 1585 rw_exit(&vnic_mac->va_promisc_lock); 1586 } 1587