1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/sysmacros.h> 30 #include <sys/conf.h> 31 #include <sys/cmn_err.h> 32 #include <sys/list.h> 33 #include <sys/ksynch.h> 34 #include <sys/kmem.h> 35 #include <sys/stream.h> 36 #include <sys/modctl.h> 37 #include <sys/ddi.h> 38 #include <sys/sunddi.h> 39 #include <sys/atomic.h> 40 #include <sys/stat.h> 41 #include <sys/modhash.h> 42 #include <sys/strsubr.h> 43 #include <sys/strsun.h> 44 #include <sys/dlpi.h> 45 #include <sys/mac.h> 46 #include <sys/mac_ether.h> 47 #include <sys/pattr.h> 48 #if 0 49 #include <sys/vlan.h> 50 #endif 51 #include <sys/vnic.h> 52 #include <sys/vnic_impl.h> 53 #include <sys/gld.h> 54 #include <inet/ip.h> 55 #include <inet/ip_impl.h> 56 57 static int vnic_m_start(void *); 58 static void vnic_m_stop(void *); 59 static int vnic_m_promisc(void *, boolean_t); 60 static int vnic_m_multicst(void *, boolean_t, const uint8_t *); 61 static int vnic_m_unicst(void *, const uint8_t *); 62 static int vnic_m_stat(void *, uint_t, uint64_t *); 63 static void vnic_m_resources(void *); 64 static mblk_t *vnic_m_tx(void *, mblk_t *); 65 static boolean_t vnic_m_capab_get(void *, mac_capab_t, void *); 66 static void vnic_mac_free(vnic_mac_t *); 67 static uint_t vnic_info_walker(mod_hash_key_t, mod_hash_val_t *, void *); 68 static void vnic_notify_cb(void *, mac_notify_type_t); 69 static int vnic_modify_mac_addr(vnic_t *, uint_t, uchar_t *); 70 static mblk_t *vnic_active_tx(void *, mblk_t *); 71 static int vnic_promisc_set(vnic_t *, boolean_t); 72 73 static kmem_cache_t *vnic_cache; 74 static kmem_cache_t *vnic_mac_cache; 75 static krwlock_t vnic_lock; 76 static kmutex_t vnic_mac_lock; 77 static uint_t vnic_count; 78 79 /* hash of VNICs (vnic_t's), keyed by VNIC id */ 80 static mod_hash_t *vnic_hash; 81 #define VNIC_HASHSZ 64 82 #define VNIC_HASH_KEY(vnic_id) ((mod_hash_key_t)(uintptr_t)vnic_id) 83 84 /* 85 * Hash of underlying open MACs (vnic_mac_t's), keyed by the string 86 * "<device name><instance number>/<port number>". 87 */ 88 static mod_hash_t *vnic_mac_hash; 89 #define VNIC_MAC_HASHSZ 64 90 91 #define VNIC_MAC_REFHOLD(va) { \ 92 ASSERT(MUTEX_HELD(&vnic_mac_lock)); \ 93 (va)->va_refs++; \ 94 ASSERT((va)->va_refs != 0); \ 95 } 96 97 #define VNIC_MAC_REFRELE(va) { \ 98 ASSERT(MUTEX_HELD(&vnic_mac_lock)); \ 99 ASSERT((va)->va_refs != 0); \ 100 if (--((va)->va_refs) == 0) \ 101 vnic_mac_free(va); \ 102 } 103 104 static uchar_t vnic_brdcst_mac[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 105 106 /* used by vnic_walker */ 107 typedef struct vnic_info_state { 108 uint32_t vs_vnic_id; 109 char vs_dev_name[MAXNAMELEN]; 110 boolean_t vs_vnic_found; 111 vnic_info_new_vnic_fn_t vs_new_vnic_fn; 112 void *vs_fn_arg; 113 int vs_rc; 114 } vnic_info_state_t; 115 116 #define VNIC_M_CALLBACK_FLAGS (MC_RESOURCES | MC_GETCAPAB) 117 118 static mac_callbacks_t vnic_m_callbacks = { 119 VNIC_M_CALLBACK_FLAGS, 120 vnic_m_stat, 121 vnic_m_start, 122 vnic_m_stop, 123 vnic_m_promisc, 124 vnic_m_multicst, 125 vnic_m_unicst, 126 vnic_m_tx, 127 vnic_m_resources, 128 NULL, /* m_ioctl */ 129 vnic_m_capab_get 130 }; 131 132 /* ARGSUSED */ 133 static int 134 vnic_mac_ctor(void *buf, void *arg, int kmflag) 135 { 136 vnic_mac_t *vnic_mac = buf; 137 138 bzero(vnic_mac, sizeof (vnic_mac_t)); 139 rw_init(&vnic_mac->va_bcast_grp_lock, NULL, RW_DRIVER, NULL); 140 rw_init(&vnic_mac->va_promisc_lock, NULL, RW_DRIVER, NULL); 141 142 return (0); 143 } 144 145 /* ARGSUSED */ 146 static void 147 vnic_mac_dtor(void *buf, void *arg) 148 { 149 vnic_mac_t *vnic_mac = buf; 150 151 rw_destroy(&vnic_mac->va_promisc_lock); 152 rw_destroy(&vnic_mac->va_bcast_grp_lock); 153 } 154 155 void 156 vnic_dev_init(void) 157 { 158 vnic_cache = kmem_cache_create("vnic_cache", 159 sizeof (vnic_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 160 161 vnic_mac_cache = kmem_cache_create("vnic_mac_cache", 162 sizeof (vnic_mac_t), 0, vnic_mac_ctor, vnic_mac_dtor, 163 NULL, NULL, NULL, 0); 164 165 vnic_hash = mod_hash_create_idhash("vnic_hash", 166 VNIC_HASHSZ, mod_hash_null_valdtor); 167 168 vnic_mac_hash = mod_hash_create_strhash("vnic_mac_hash", 169 VNIC_MAC_HASHSZ, mod_hash_null_valdtor); 170 171 rw_init(&vnic_lock, NULL, RW_DEFAULT, NULL); 172 173 mutex_init(&vnic_mac_lock, NULL, MUTEX_DEFAULT, NULL); 174 175 vnic_count = 0; 176 } 177 178 void 179 vnic_dev_fini(void) 180 { 181 ASSERT(vnic_count == 0); 182 183 mutex_destroy(&vnic_mac_lock); 184 rw_destroy(&vnic_lock); 185 mod_hash_destroy_strhash(vnic_mac_hash); 186 mod_hash_destroy_idhash(vnic_hash); 187 kmem_cache_destroy(vnic_mac_cache); 188 kmem_cache_destroy(vnic_cache); 189 } 190 191 uint_t 192 vnic_dev_count(void) 193 { 194 return (vnic_count); 195 } 196 197 static int 198 vnic_mac_open(const char *dev_name, vnic_mac_t **vmp) 199 { 200 char *str_key; 201 int err; 202 vnic_mac_t *vnic_mac = NULL; 203 char driver[MAXNAMELEN]; 204 uint_t ddi_instance; 205 const mac_info_t *mip; 206 207 *vmp = NULL; 208 209 if (ddi_parse(dev_name, driver, &ddi_instance) != DDI_SUCCESS) 210 return (EINVAL); 211 212 mutex_enter(&vnic_mac_lock); 213 214 err = mod_hash_find(vnic_mac_hash, (mod_hash_key_t)dev_name, 215 (mod_hash_val_t *)&vnic_mac); 216 if (err == 0) { 217 /* this MAC is already opened, increment reference count */ 218 VNIC_MAC_REFHOLD(vnic_mac); 219 mutex_exit(&vnic_mac_lock); 220 *vmp = vnic_mac; 221 return (0); 222 } 223 224 vnic_mac = kmem_cache_alloc(vnic_mac_cache, KM_SLEEP); 225 226 if ((err = mac_open(dev_name, ddi_instance, &vnic_mac->va_mh)) != 0) { 227 vnic_mac->va_mh = NULL; 228 goto bail; 229 } 230 231 /* only ethernet support, for now */ 232 mip = mac_info(vnic_mac->va_mh); 233 if (mip->mi_media != DL_ETHER) { 234 err = ENOTSUP; 235 goto bail; 236 } 237 if (mip->mi_media != mip->mi_nativemedia) { 238 err = ENOTSUP; 239 goto bail; 240 } 241 242 (void) strcpy(vnic_mac->va_dev_name, dev_name); 243 244 /* add entry to hash table */ 245 str_key = kmem_alloc(strlen(dev_name) + 1, KM_SLEEP); 246 (void) strcpy(str_key, dev_name); 247 err = mod_hash_insert(vnic_mac_hash, (mod_hash_key_t)str_key, 248 (mod_hash_val_t)vnic_mac); 249 ASSERT(err == 0); 250 251 /* initialize the flow table associated with lower MAC */ 252 vnic_mac->va_addr_len = ETHERADDRL; 253 (void) vnic_classifier_flow_tab_init(vnic_mac, vnic_mac->va_addr_len, 254 KM_SLEEP); 255 256 vnic_mac->va_txinfo = mac_vnic_tx_get(vnic_mac->va_mh); 257 vnic_mac->va_notify_hdl = mac_notify_add(vnic_mac->va_mh, 258 vnic_notify_cb, vnic_mac); 259 260 VNIC_MAC_REFHOLD(vnic_mac); 261 *vmp = vnic_mac; 262 mutex_exit(&vnic_mac_lock); 263 return (0); 264 265 bail: 266 if (vnic_mac != NULL) { 267 if (vnic_mac->va_mh != NULL) 268 mac_close(vnic_mac->va_mh); 269 kmem_cache_free(vnic_mac_cache, vnic_mac); 270 } 271 mutex_exit(&vnic_mac_lock); 272 return (err); 273 } 274 275 /* 276 * Create a new flow for the active MAC client sharing the NIC 277 * with the VNICs. This allows the unicast packets for that NIC 278 * to be classified and passed up to the active MAC client. It 279 * also allows packets sent from a VNIC to the active link to 280 * be classified by the VNIC transmit function and delivered via 281 * the MAC module locally. Returns B_TRUE on success, B_FALSE on 282 * failure. 283 */ 284 static int 285 vnic_init_active_rx(vnic_mac_t *vnic_mac) 286 { 287 uchar_t nic_mac_addr[MAXMACADDRLEN]; 288 289 if (vnic_mac->va_active_flow != NULL) 290 return (B_TRUE); 291 292 mac_unicst_get(vnic_mac->va_mh, nic_mac_addr); 293 294 vnic_mac->va_active_flow = vnic_classifier_flow_create( 295 vnic_mac->va_addr_len, nic_mac_addr, NULL, B_TRUE, KM_SLEEP); 296 297 vnic_classifier_flow_add(vnic_mac, vnic_mac->va_active_flow, 298 (vnic_rx_fn_t)mac_active_rx, vnic_mac->va_mh, NULL); 299 return (B_TRUE); 300 } 301 302 static void 303 vnic_fini_active_rx(vnic_mac_t *vnic_mac) 304 { 305 if (vnic_mac->va_active_flow == NULL) 306 return; 307 308 vnic_classifier_flow_remove(vnic_mac, vnic_mac->va_active_flow); 309 vnic_classifier_flow_destroy(vnic_mac->va_active_flow); 310 vnic_mac->va_active_flow = NULL; 311 } 312 313 static void 314 vnic_update_active_rx(vnic_mac_t *vnic_mac) 315 { 316 if (vnic_mac->va_active_flow == NULL) 317 return; 318 319 vnic_fini_active_rx(vnic_mac); 320 (void) vnic_init_active_rx(vnic_mac); 321 } 322 323 /* 324 * Copy an mblk, preserving its hardware checksum flags. 325 */ 326 mblk_t * 327 vnic_copymsg_cksum(mblk_t *mp) 328 { 329 mblk_t *mp1; 330 uint32_t start, stuff, end, value, flags; 331 332 mp1 = copymsg(mp); 333 if (mp1 == NULL) 334 return (NULL); 335 336 hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags); 337 (void) hcksum_assoc(mp1, NULL, NULL, start, stuff, end, value, 338 flags, KM_NOSLEEP); 339 340 return (mp1); 341 } 342 343 /* 344 * Copy an mblk chain, presenting the hardware checksum flags of the 345 * individual mblks. 346 */ 347 mblk_t * 348 vnic_copymsgchain_cksum(mblk_t *mp) 349 { 350 mblk_t *nmp = NULL; 351 mblk_t **nmpp = &nmp; 352 353 for (; mp != NULL; mp = mp->b_next) { 354 if ((*nmpp = vnic_copymsg_cksum(mp)) == NULL) { 355 freemsgchain(nmp); 356 return (NULL); 357 } 358 359 nmpp = &((*nmpp)->b_next); 360 } 361 362 return (nmp); 363 } 364 365 366 /* 367 * Process the specified mblk chain for proper handling of hardware 368 * checksum offload. This routine is invoked for loopback VNIC traffic. 369 * The function handles a NULL mblk chain passed as argument. 370 */ 371 mblk_t * 372 vnic_fix_cksum(mblk_t *mp_chain) 373 { 374 mblk_t *mp, *prev = NULL, *new_chain = mp_chain, *mp1; 375 uint32_t flags, start, stuff, end, value; 376 377 for (mp = mp_chain; mp != NULL; prev = mp, mp = mp->b_next) { 378 uint16_t len; 379 uint32_t offset; 380 struct ether_header *ehp; 381 uint16_t sap; 382 383 hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, 384 &flags); 385 if (flags == 0) 386 continue; 387 388 /* 389 * Since the processing of checksum offload for loopback 390 * traffic requires modification of the packet contents, 391 * ensure sure that we are always modifying our own copy. 392 */ 393 if (DB_REF(mp) > 1) { 394 mp1 = copymsg(mp); 395 if (mp1 == NULL) 396 continue; 397 mp1->b_next = mp->b_next; 398 mp->b_next = NULL; 399 freemsg(mp); 400 if (prev != NULL) 401 prev->b_next = mp1; 402 else 403 new_chain = mp1; 404 mp = mp1; 405 } 406 407 /* 408 * Ethernet, and optionally VLAN header. 409 */ 410 /*LINTED*/ 411 ehp = (struct ether_header *)mp->b_rptr; 412 if (ntohs(ehp->ether_type) == VLAN_TPID) { 413 struct ether_vlan_header *evhp; 414 415 ASSERT(MBLKL(mp) >= 416 sizeof (struct ether_vlan_header)); 417 /*LINTED*/ 418 evhp = (struct ether_vlan_header *)mp->b_rptr; 419 sap = ntohs(evhp->ether_type); 420 offset = sizeof (struct ether_vlan_header); 421 } else { 422 sap = ntohs(ehp->ether_type); 423 offset = sizeof (struct ether_header); 424 } 425 426 if (MBLKL(mp) <= offset) { 427 offset -= MBLKL(mp); 428 if (mp->b_cont == NULL) { 429 /* corrupted packet, skip it */ 430 if (prev != NULL) 431 prev->b_next = mp->b_next; 432 else 433 new_chain = mp->b_next; 434 mp1 = mp->b_next; 435 mp->b_next = NULL; 436 freemsg(mp); 437 mp = mp1; 438 continue; 439 } 440 mp = mp->b_cont; 441 } 442 443 if (flags & (HCK_FULLCKSUM | HCK_IPV4_HDRCKSUM)) { 444 ipha_t *ipha = NULL; 445 446 /* 447 * In order to compute the full and header 448 * checksums, we need to find and parse 449 * the IP and/or ULP headers. 450 */ 451 452 sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap; 453 454 /* 455 * IP header. 456 */ 457 if (sap != ETHERTYPE_IP) 458 continue; 459 460 ASSERT(MBLKL(mp) >= offset + sizeof (ipha_t)); 461 /*LINTED*/ 462 ipha = (ipha_t *)(mp->b_rptr + offset); 463 464 if (flags & HCK_FULLCKSUM) { 465 ipaddr_t src, dst; 466 uint32_t cksum; 467 uint16_t *up; 468 uint8_t proto; 469 470 /* 471 * Pointer to checksum field in ULP header. 472 */ 473 proto = ipha->ipha_protocol; 474 ASSERT(ipha->ipha_version_and_hdr_length == 475 IP_SIMPLE_HDR_VERSION); 476 if (proto == IPPROTO_TCP) { 477 /*LINTED*/ 478 up = IPH_TCPH_CHECKSUMP(ipha, 479 IP_SIMPLE_HDR_LENGTH); 480 } else { 481 ASSERT(proto == IPPROTO_UDP); 482 /*LINTED*/ 483 up = IPH_UDPH_CHECKSUMP(ipha, 484 IP_SIMPLE_HDR_LENGTH); 485 } 486 487 /* 488 * Pseudo-header checksum. 489 */ 490 src = ipha->ipha_src; 491 dst = ipha->ipha_dst; 492 len = ntohs(ipha->ipha_length) - 493 IP_SIMPLE_HDR_LENGTH; 494 495 cksum = (dst >> 16) + (dst & 0xFFFF) + 496 (src >> 16) + (src & 0xFFFF); 497 cksum += htons(len); 498 499 /* 500 * The checksum value stored in the packet needs 501 * to be correct. Compute it here. 502 */ 503 *up = 0; 504 cksum += (((proto) == IPPROTO_UDP) ? 505 IP_UDP_CSUM_COMP : IP_TCP_CSUM_COMP); 506 cksum = IP_CSUM(mp, IP_SIMPLE_HDR_LENGTH + 507 offset, cksum); 508 *(up) = (uint16_t)(cksum ? cksum : ~cksum); 509 510 flags |= HCK_FULLCKSUM_OK; 511 value = 0xffff; 512 } 513 514 if (flags & HCK_IPV4_HDRCKSUM) { 515 ASSERT(ipha != NULL); 516 ipha->ipha_hdr_checksum = 517 (uint16_t)ip_csum_hdr(ipha); 518 } 519 } 520 521 if (flags & HCK_PARTIALCKSUM) { 522 uint16_t *up, partial, cksum; 523 uchar_t *ipp; /* ptr to beginning of IP header */ 524 525 if (mp->b_cont != NULL) { 526 mblk_t *mp1; 527 528 mp1 = msgpullup(mp, offset + end); 529 if (mp1 == NULL) 530 continue; 531 mp1->b_next = mp->b_next; 532 mp->b_next = NULL; 533 freemsg(mp); 534 if (prev != NULL) 535 prev->b_next = mp1; 536 else 537 new_chain = mp1; 538 mp = mp1; 539 } 540 541 ipp = mp->b_rptr + offset; 542 /*LINTED*/ 543 up = (uint16_t *)((uchar_t *)ipp + stuff); 544 partial = *up; 545 *up = 0; 546 547 cksum = IP_BCSUM_PARTIAL(mp->b_rptr + offset + start, 548 end - start, partial); 549 cksum = ~cksum; 550 *up = cksum ? cksum : ~cksum; 551 552 /* 553 * Since we already computed the whole checksum, 554 * indicate to the stack that it has already 555 * been verified by the hardware. 556 */ 557 flags &= ~HCK_PARTIALCKSUM; 558 flags |= (HCK_FULLCKSUM | HCK_FULLCKSUM_OK); 559 value = 0xffff; 560 } 561 562 (void) hcksum_assoc(mp, NULL, NULL, start, stuff, end, 563 value, flags, KM_NOSLEEP); 564 } 565 566 return (new_chain); 567 } 568 569 static void 570 vnic_mac_close(vnic_mac_t *vnic_mac) 571 { 572 mutex_enter(&vnic_mac_lock); 573 VNIC_MAC_REFRELE(vnic_mac); 574 mutex_exit(&vnic_mac_lock); 575 } 576 577 static void 578 vnic_mac_free(vnic_mac_t *vnic_mac) 579 { 580 mod_hash_val_t val; 581 582 ASSERT(MUTEX_HELD(&vnic_mac_lock)); 583 vnic_fini_active_rx(vnic_mac); 584 mac_notify_remove(vnic_mac->va_mh, vnic_mac->va_notify_hdl); 585 if (vnic_mac->va_mac_set) { 586 vnic_mac->va_mac_set = B_FALSE; 587 mac_vnic_clear(vnic_mac->va_mh); 588 } 589 vnic_classifier_flow_tab_fini(vnic_mac); 590 mac_close(vnic_mac->va_mh); 591 592 (void) mod_hash_remove(vnic_mac_hash, 593 (mod_hash_key_t)vnic_mac->va_dev_name, &val); 594 ASSERT(vnic_mac == (vnic_mac_t *)val); 595 596 kmem_cache_free(vnic_mac_cache, vnic_mac); 597 } 598 599 /* 600 * Initial VNIC receive routine. Invoked for packets that are steered 601 * to a VNIC but the VNIC has not been started yet. 602 */ 603 /* ARGSUSED */ 604 static void 605 vnic_rx_initial(void *arg1, void *arg2, mblk_t *mp_chain) 606 { 607 vnic_t *vnic = arg1; 608 mblk_t *mp; 609 610 /* update stats */ 611 for (mp = mp_chain; mp != NULL; mp = mp->b_next) 612 vnic->vn_stat_ierrors++; 613 freemsgchain(mp_chain); 614 } 615 616 /* 617 * VNIC receive routine invoked after the classifier for the VNIC 618 * has been initialized and the VNIC has been started. 619 */ 620 /* ARGSUSED */ 621 void 622 vnic_rx(void *arg1, void *arg2, mblk_t *mp_chain) 623 { 624 vnic_t *vnic = arg1; 625 mblk_t *mp; 626 627 /* update stats */ 628 for (mp = mp_chain; mp != NULL; mp = mp->b_next) { 629 vnic->vn_stat_ipackets++; 630 vnic->vn_stat_rbytes += msgdsize(mp); 631 } 632 633 /* pass packet up */ 634 mac_rx(vnic->vn_mh, NULL, mp_chain); 635 } 636 637 /* 638 * Routine to create a MAC-based VNIC. Adds the passed MAC address 639 * to an unused slot in the NIC if one is available. Otherwise it 640 * sets the NIC in promiscuous mode and assigns the MAC address to 641 * a Rx ring if available or a soft ring. 642 */ 643 static int 644 vnic_add_unicstaddr(vnic_t *vnic, mac_multi_addr_t *maddr) 645 { 646 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 647 int err; 648 649 if (mac_vnic_capab_get(vnic_mac->va_mh, MAC_CAPAB_MULTIADDRESS, 650 &(vnic->vn_mma_capab))) { 651 if (vnic->vn_maddr_naddrfree == 0) { 652 /* 653 * No free address slots available. 654 * Enable promiscuous mode. 655 */ 656 goto set_promisc; 657 } 658 659 err = vnic->vn_maddr_add(vnic->vn_maddr_handle, maddr); 660 if (err != 0) { 661 if (err == ENOSPC) { 662 /* 663 * There was a race to add addresses 664 * with other multiple address consumers, 665 * and we lost out. Use promisc mode. 666 */ 667 goto set_promisc; 668 } 669 670 return (err); 671 } 672 673 vnic->vn_slot_id = maddr->mma_slot; 674 vnic->vn_multi_mac = B_TRUE; 675 } else { 676 /* 677 * Either multiple MAC address support is not 678 * available or all available addresses have 679 * been used up. 680 */ 681 set_promisc: 682 err = mac_promisc_set(vnic_mac->va_mh, B_TRUE, MAC_DEVPROMISC); 683 if (err != 0) { 684 return (err); 685 } 686 687 vnic->vn_promisc_mac = B_TRUE; 688 } 689 return (err); 690 } 691 692 /* 693 * VNIC is getting deleted. Remove the MAC address from the slot. 694 * If promiscuous mode was being used, then unset the promiscuous mode. 695 */ 696 static int 697 vnic_remove_unicstaddr(vnic_t *vnic) 698 { 699 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 700 int err; 701 702 if (vnic->vn_multi_mac) { 703 ASSERT(vnic->vn_promisc_mac == B_FALSE); 704 err = vnic->vn_maddr_remove(vnic->vn_maddr_handle, 705 vnic->vn_slot_id); 706 vnic->vn_multi_mac = B_FALSE; 707 } 708 709 if (vnic->vn_promisc_mac) { 710 ASSERT(vnic->vn_multi_mac == B_FALSE); 711 err = mac_promisc_set(vnic_mac->va_mh, B_FALSE, MAC_DEVPROMISC); 712 vnic->vn_promisc_mac = B_FALSE; 713 } 714 715 return (err); 716 } 717 718 /* 719 * Create a new VNIC upon request from administrator. 720 * Returns 0 on success, an errno on failure. 721 */ 722 int 723 vnic_dev_create(uint_t vnic_id, char *dev_name, int mac_len, uchar_t *mac_addr) 724 { 725 vnic_t *vnic = NULL; 726 mac_register_t *mac; 727 int err; 728 vnic_mac_t *vnic_mac; 729 const mac_info_t *lower_mac_info; 730 mac_multi_addr_t maddr; 731 mac_txinfo_t tx_info; 732 733 if (mac_len != ETHERADDRL) { 734 /* currently only ethernet NICs are supported */ 735 return (EINVAL); 736 } 737 738 rw_enter(&vnic_lock, RW_WRITER); 739 740 /* does a VNIC with the same id already exist? */ 741 err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id), 742 (mod_hash_val_t *)&vnic); 743 if (err == 0) { 744 rw_exit(&vnic_lock); 745 return (EEXIST); 746 } 747 748 vnic = kmem_cache_alloc(vnic_cache, KM_NOSLEEP); 749 if (vnic == NULL) { 750 rw_exit(&vnic_lock); 751 return (ENOMEM); 752 } 753 754 /* open underlying MAC */ 755 err = vnic_mac_open(dev_name, &vnic_mac); 756 if (err != 0) { 757 kmem_cache_free(vnic_cache, vnic); 758 rw_exit(&vnic_lock); 759 return (err); 760 } 761 762 bzero(vnic, sizeof (*vnic)); 763 vnic->vn_id = vnic_id; 764 vnic->vn_vnic_mac = vnic_mac; 765 766 vnic->vn_started = B_FALSE; 767 vnic->vn_promisc = B_FALSE; 768 vnic->vn_multi_mac = B_FALSE; 769 vnic->vn_bcast_grp = B_FALSE; 770 771 /* set the VNIC MAC address */ 772 maddr.mma_addrlen = mac_len; 773 maddr.mma_slot = 0; 774 maddr.mma_flags = 0; 775 bcopy(mac_addr, maddr.mma_addr, mac_len); 776 if ((err = vnic_add_unicstaddr(vnic, &maddr)) != 0) 777 goto bail; 778 bcopy(mac_addr, vnic->vn_addr, mac_len); 779 780 /* set the initial VNIC capabilities */ 781 if (!mac_vnic_capab_get(vnic_mac->va_mh, MAC_CAPAB_HCKSUM, 782 &vnic->vn_hcksum_txflags)) 783 vnic->vn_hcksum_txflags = 0; 784 785 /* register with the MAC module */ 786 if ((mac = mac_alloc(MAC_VERSION)) == NULL) 787 goto bail; 788 789 mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 790 mac->m_driver = vnic; 791 mac->m_dip = vnic_get_dip(); 792 mac->m_instance = vnic_id; 793 mac->m_src_addr = vnic->vn_addr; 794 mac->m_callbacks = &vnic_m_callbacks; 795 796 lower_mac_info = mac_info(vnic_mac->va_mh); 797 mac->m_min_sdu = lower_mac_info->mi_sdu_min; 798 mac->m_max_sdu = lower_mac_info->mi_sdu_max; 799 800 err = mac_register(mac, &vnic->vn_mh); 801 mac_free(mac); 802 if (err != 0) 803 goto bail; 804 805 /* add new VNIC to hash table */ 806 err = mod_hash_insert(vnic_hash, VNIC_HASH_KEY(vnic_id), 807 (mod_hash_val_t)vnic); 808 ASSERT(err == 0); 809 vnic_count++; 810 811 rw_exit(&vnic_lock); 812 813 /* Create a flow, initialized with the MAC address of the VNIC */ 814 if ((vnic->vn_flow_ent = vnic_classifier_flow_create(mac_len, mac_addr, 815 NULL, B_FALSE, KM_SLEEP)) == NULL) { 816 (void) vnic_dev_delete(vnic_id); 817 vnic = NULL; 818 err = ENOMEM; 819 goto bail_unlocked; 820 } 821 822 vnic_classifier_flow_add(vnic_mac, vnic->vn_flow_ent, vnic_rx_initial, 823 vnic, vnic); 824 825 /* setup VNIC to receive broadcast packets */ 826 err = vnic_bcast_add(vnic, vnic_brdcst_mac, MAC_ADDRTYPE_BROADCAST); 827 if (err != 0) { 828 (void) vnic_dev_delete(vnic_id); 829 vnic = NULL; 830 goto bail_unlocked; 831 } 832 vnic->vn_bcast_grp = B_TRUE; 833 834 mutex_enter(&vnic_mac_lock); 835 if (!vnic_mac->va_mac_set) { 836 /* 837 * We want to MAC layer to call the VNIC tx outbound 838 * routine, so that local broadcast packets sent by 839 * the active interface sharing the underlying NIC (if 840 * any), can be broadcast to every VNIC. 841 */ 842 tx_info.mt_fn = vnic_active_tx; 843 tx_info.mt_arg = vnic_mac; 844 if (!mac_vnic_set(vnic_mac->va_mh, &tx_info, 845 vnic_m_capab_get, vnic)) { 846 mutex_exit(&vnic_mac_lock); 847 (void) vnic_dev_delete(vnic_id); 848 vnic = NULL; 849 err = EBUSY; 850 goto bail_unlocked; 851 } 852 vnic_mac->va_mac_set = B_TRUE; 853 } 854 mutex_exit(&vnic_mac_lock); 855 856 /* allow passing packets to NIC's active MAC client */ 857 if (!vnic_init_active_rx(vnic_mac)) { 858 (void) vnic_dev_delete(vnic_id); 859 vnic = NULL; 860 err = ENOMEM; 861 goto bail_unlocked; 862 } 863 864 return (0); 865 866 bail: 867 (void) vnic_remove_unicstaddr(vnic); 868 vnic_mac_close(vnic_mac); 869 rw_exit(&vnic_lock); 870 871 bail_unlocked: 872 if (vnic != NULL) { 873 kmem_cache_free(vnic_cache, vnic); 874 } 875 876 return (err); 877 } 878 879 /* 880 * Modify the properties of an existing VNIC. 881 */ 882 /* ARGSUSED */ 883 int 884 vnic_dev_modify(uint_t vnic_id, uint_t modify_mask, 885 vnic_mac_addr_type_t mac_addr_type, uint_t mac_len, uchar_t *mac_addr) 886 { 887 vnic_t *vnic = NULL; 888 int rv = 0; 889 boolean_t notify_mac_addr = B_FALSE; 890 891 rw_enter(&vnic_lock, RW_WRITER); 892 893 if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id), 894 (mod_hash_val_t *)&vnic) != 0) { 895 rw_exit(&vnic_lock); 896 return (ENOENT); 897 } 898 899 if (modify_mask & VNIC_IOC_MODIFY_ADDR) { 900 rv = vnic_modify_mac_addr(vnic, mac_len, mac_addr); 901 if (rv == 0) 902 notify_mac_addr = B_TRUE; 903 } 904 905 rw_exit(&vnic_lock); 906 907 if (notify_mac_addr) 908 mac_unicst_update(vnic->vn_mh, mac_addr); 909 910 return (rv); 911 } 912 913 int 914 vnic_dev_delete(uint_t vnic_id) 915 { 916 vnic_t *vnic = NULL; 917 mod_hash_val_t val; 918 vnic_flow_t *flent; 919 int rc; 920 921 rw_enter(&vnic_lock, RW_WRITER); 922 923 if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id), 924 (mod_hash_val_t *)&vnic) != 0) { 925 rw_exit(&vnic_lock); 926 return (ENOENT); 927 } 928 929 /* 930 * We cannot unregister the MAC yet. Unregistering would 931 * free up mac_impl_t which should not happen at this time. 932 * Packets could be entering vnic_rx() through the 933 * flow entry and so mac_impl_t cannot be NULL. So disable 934 * mac_impl_t by calling mac_disable(). This will prevent any 935 * new claims on mac_impl_t. 936 */ 937 if (mac_disable(vnic->vn_mh) != 0) { 938 rw_exit(&vnic_lock); 939 return (EBUSY); 940 } 941 942 (void) mod_hash_remove(vnic_hash, VNIC_HASH_KEY(vnic_id), &val); 943 ASSERT(vnic == (vnic_t *)val); 944 945 if (vnic->vn_bcast_grp) 946 (void) vnic_bcast_delete(vnic, vnic_brdcst_mac); 947 948 flent = vnic->vn_flow_ent; 949 if (flent != NULL) { 950 /* 951 * vnic_classifier_flow_destroy() ensures that the 952 * flow is no longer used. 953 */ 954 vnic_classifier_flow_remove(vnic->vn_vnic_mac, flent); 955 vnic_classifier_flow_destroy(flent); 956 } 957 958 rc = mac_unregister(vnic->vn_mh); 959 ASSERT(rc == 0); 960 (void) vnic_remove_unicstaddr(vnic); 961 vnic_mac_close(vnic->vn_vnic_mac); 962 kmem_cache_free(vnic_cache, vnic); 963 vnic_count--; 964 rw_exit(&vnic_lock); 965 return (0); 966 } 967 968 /* 969 * For the specified packet chain, return a sub-chain to be sent 970 * and the transmit function to be used to send the packet. Also 971 * return a pointer to the sub-chain of packets that should 972 * be re-classified. If the function returns NULL, the packet 973 * should be sent using the underlying NIC. 974 */ 975 static vnic_flow_t * 976 vnic_classify(vnic_mac_t *vnic_mac, mblk_t *mp, mblk_t **mp_chain_rest) 977 { 978 vnic_flow_t *flow_ent; 979 980 /* one packet at a time */ 981 *mp_chain_rest = mp->b_next; 982 mp->b_next = NULL; 983 984 /* do classification on the packet */ 985 flow_ent = vnic_classifier_get_flow(vnic_mac, mp); 986 987 return (flow_ent); 988 } 989 990 /* 991 * Send a packet chain to a local VNIC or an active MAC client. 992 */ 993 static void 994 vnic_local_tx(vnic_mac_t *vnic_mac, vnic_flow_t *flow_ent, mblk_t *mp_chain) 995 { 996 mblk_t *mp1; 997 const vnic_flow_fn_info_t *fn_info; 998 vnic_t *vnic; 999 1000 if (!vnic_classifier_is_active(flow_ent) && 1001 mac_promisc_get(vnic_mac->va_mh, MAC_PROMISC)) { 1002 /* 1003 * If the MAC is in promiscous mode, 1004 * send a copy of the active client. 1005 */ 1006 if ((mp1 = vnic_copymsgchain_cksum(mp_chain)) == NULL) 1007 goto sendit; 1008 if ((mp1 = vnic_fix_cksum(mp1)) == NULL) 1009 goto sendit; 1010 mac_active_rx(vnic_mac->va_mh, NULL, mp1); 1011 } 1012 sendit: 1013 fn_info = vnic_classifier_get_fn_info(flow_ent); 1014 /* 1015 * If the vnic to which we would deliver this packet is in 1016 * promiscuous mode then it already received the packet via 1017 * vnic_promisc_rx(). 1018 * 1019 * XXX assumes that ff_arg2 is a vnic_t pointer if it is 1020 * non-NULL (currently always true). 1021 */ 1022 vnic = (vnic_t *)fn_info->ff_arg2; 1023 if ((vnic != NULL) && vnic->vn_promisc) 1024 freemsg(mp_chain); 1025 else if ((mp1 = vnic_fix_cksum(mp_chain)) != NULL) 1026 (fn_info->ff_fn)(fn_info->ff_arg1, fn_info->ff_arg2, mp1); 1027 } 1028 1029 /* 1030 * This function is invoked when a MAC client needs to send a packet 1031 * to a NIC which is shared by VNICs. It is passed to the MAC layer 1032 * by a call to mac_vnic_set() when the NIC is opened, and is returned 1033 * to MAC clients by mac_tx_get() when VNICs are present. 1034 */ 1035 mblk_t * 1036 vnic_active_tx(void *arg, mblk_t *mp_chain) 1037 { 1038 vnic_mac_t *vnic_mac = arg; 1039 mblk_t *mp, *extra_mp = NULL; 1040 vnic_flow_t *flow_ent; 1041 void *flow_cookie; 1042 const mac_txinfo_t *mtp = vnic_mac->va_txinfo; 1043 1044 for (mp = mp_chain; mp != NULL; mp = extra_mp) { 1045 mblk_t *next; 1046 1047 next = mp->b_next; 1048 mp->b_next = NULL; 1049 1050 vnic_promisc_rx(vnic_mac, (vnic_t *)-1, mp); 1051 1052 flow_ent = vnic_classify(vnic_mac, mp, &extra_mp); 1053 ASSERT(extra_mp == NULL); 1054 extra_mp = next; 1055 1056 if (flow_ent != NULL) { 1057 flow_cookie = vnic_classifier_get_client_cookie( 1058 flow_ent); 1059 if (flow_cookie != NULL) { 1060 /* 1061 * Send a copy to every VNIC defined on the 1062 * interface, as well as the underlying MAC. 1063 */ 1064 vnic_bcast_send(flow_cookie, (vnic_t *)-1, mp); 1065 } else { 1066 /* 1067 * loopback the packet to a local VNIC or 1068 * an active MAC client. 1069 */ 1070 vnic_local_tx(vnic_mac, flow_ent, mp); 1071 } 1072 VNIC_FLOW_REFRELE(flow_ent); 1073 mp_chain = NULL; 1074 } else { 1075 /* 1076 * Non-VNIC destination, send via the underlying 1077 * NIC. In order to avoid a recursive call 1078 * to this function, we ensured that mtp points 1079 * to the unerlying NIC transmit function 1080 * by inilizating through mac_vnic_tx_get(). 1081 */ 1082 mp_chain = mtp->mt_fn(mtp->mt_arg, mp); 1083 if (mp_chain != NULL) 1084 break; 1085 } 1086 } 1087 1088 if ((mp_chain != NULL) && (extra_mp != NULL)) { 1089 ASSERT(mp_chain->b_next == NULL); 1090 mp_chain->b_next = extra_mp; 1091 } 1092 return (mp_chain); 1093 } 1094 1095 /* 1096 * VNIC transmit function. 1097 */ 1098 mblk_t * 1099 vnic_m_tx(void *arg, mblk_t *mp_chain) 1100 { 1101 vnic_t *vnic = arg; 1102 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 1103 mblk_t *mp, *extra_mp = NULL; 1104 vnic_flow_t *flow_ent; 1105 void *flow_cookie; 1106 1107 /* 1108 * Update stats. 1109 */ 1110 for (mp = mp_chain; mp != NULL; mp = mp->b_next) { 1111 vnic->vn_stat_opackets++; 1112 vnic->vn_stat_obytes += msgdsize(mp); 1113 } 1114 1115 for (mp = mp_chain; mp != NULL; mp = extra_mp) { 1116 mblk_t *next; 1117 1118 next = mp->b_next; 1119 mp->b_next = NULL; 1120 1121 vnic_promisc_rx(vnic->vn_vnic_mac, vnic, mp); 1122 1123 flow_ent = vnic_classify(vnic->vn_vnic_mac, mp, &extra_mp); 1124 ASSERT(extra_mp == NULL); 1125 extra_mp = next; 1126 1127 if (flow_ent != NULL) { 1128 flow_cookie = vnic_classifier_get_client_cookie( 1129 flow_ent); 1130 if (flow_cookie != NULL) { 1131 /* 1132 * The vnic_bcast_send function expects 1133 * to receive the sender VNIC as value 1134 * for arg2. 1135 */ 1136 vnic_bcast_send(flow_cookie, vnic, mp); 1137 } else { 1138 /* 1139 * loopback the packet to a local VNIC or 1140 * an active MAC client. 1141 */ 1142 vnic_local_tx(vnic_mac, flow_ent, mp); 1143 } 1144 VNIC_FLOW_REFRELE(flow_ent); 1145 mp_chain = NULL; 1146 } else { 1147 /* 1148 * Non-local destination, send via the underlying 1149 * NIC. 1150 */ 1151 const mac_txinfo_t *mtp = vnic->vn_txinfo; 1152 mp_chain = mtp->mt_fn(mtp->mt_arg, mp); 1153 if (mp_chain != NULL) 1154 break; 1155 } 1156 } 1157 1158 /* update stats to account for unsent packets */ 1159 for (mp = mp_chain; mp != NULL; mp = mp->b_next) { 1160 vnic->vn_stat_opackets--; 1161 vnic->vn_stat_obytes -= msgdsize(mp); 1162 vnic->vn_stat_oerrors++; 1163 /* 1164 * link back in the last portion not counted due to bandwidth 1165 * control. 1166 */ 1167 if (mp->b_next == NULL) { 1168 mp->b_next = extra_mp; 1169 break; 1170 } 1171 } 1172 1173 return (mp_chain); 1174 } 1175 1176 /* ARGSUSED */ 1177 static void 1178 vnic_m_resources(void *arg) 1179 { 1180 /* no resources to advertise */ 1181 } 1182 1183 static int 1184 vnic_m_stat(void *arg, uint_t stat, uint64_t *val) 1185 { 1186 vnic_t *vnic = arg; 1187 int rval = 0; 1188 1189 rw_enter(&vnic_lock, RW_READER); 1190 1191 switch (stat) { 1192 case ETHER_STAT_LINK_DUPLEX: 1193 *val = mac_stat_get(vnic->vn_vnic_mac->va_mh, 1194 ETHER_STAT_LINK_DUPLEX); 1195 break; 1196 case MAC_STAT_IFSPEED: 1197 *val = mac_stat_get(vnic->vn_vnic_mac->va_mh, 1198 MAC_STAT_IFSPEED); 1199 break; 1200 case MAC_STAT_MULTIRCV: 1201 *val = vnic->vn_stat_multircv; 1202 break; 1203 case MAC_STAT_BRDCSTRCV: 1204 *val = vnic->vn_stat_brdcstrcv; 1205 break; 1206 case MAC_STAT_MULTIXMT: 1207 *val = vnic->vn_stat_multixmt; 1208 break; 1209 case MAC_STAT_BRDCSTXMT: 1210 *val = vnic->vn_stat_brdcstxmt; 1211 break; 1212 case MAC_STAT_IERRORS: 1213 *val = vnic->vn_stat_ierrors; 1214 break; 1215 case MAC_STAT_OERRORS: 1216 *val = vnic->vn_stat_oerrors; 1217 break; 1218 case MAC_STAT_RBYTES: 1219 *val = vnic->vn_stat_rbytes; 1220 break; 1221 case MAC_STAT_IPACKETS: 1222 *val = vnic->vn_stat_ipackets; 1223 break; 1224 case MAC_STAT_OBYTES: 1225 *val = vnic->vn_stat_obytes; 1226 break; 1227 case MAC_STAT_OPACKETS: 1228 *val = vnic->vn_stat_opackets; 1229 break; 1230 default: 1231 rval = ENOTSUP; 1232 } 1233 1234 rw_exit(&vnic_lock); 1235 return (rval); 1236 } 1237 1238 /* 1239 * Return information about the specified capability. 1240 */ 1241 /* ARGSUSED */ 1242 static boolean_t 1243 vnic_m_capab_get(void *arg, mac_capab_t cap, void *cap_data) 1244 { 1245 vnic_t *vnic = arg; 1246 1247 switch (cap) { 1248 case MAC_CAPAB_POLL: 1249 return (B_TRUE); 1250 case MAC_CAPAB_HCKSUM: { 1251 uint32_t *hcksum_txflags = cap_data; 1252 1253 *hcksum_txflags = vnic->vn_hcksum_txflags & 1254 (HCKSUM_INET_FULL_V4 | HCKSUM_IPHDRCKSUM | 1255 HCKSUM_INET_PARTIAL); 1256 break; 1257 } 1258 default: 1259 return (B_FALSE); 1260 } 1261 return (B_TRUE); 1262 } 1263 1264 static int 1265 vnic_m_start(void *arg) 1266 { 1267 vnic_t *vnic = arg; 1268 mac_handle_t lower_mh = vnic->vn_vnic_mac->va_mh; 1269 int rc; 1270 1271 rc = mac_start(lower_mh); 1272 if (rc != 0) 1273 return (rc); 1274 1275 vnic_classifier_flow_update_fn(vnic->vn_flow_ent, vnic_rx, vnic, vnic); 1276 return (0); 1277 } 1278 1279 static void 1280 vnic_m_stop(void *arg) 1281 { 1282 vnic_t *vnic = arg; 1283 mac_handle_t lower_mh = vnic->vn_vnic_mac->va_mh; 1284 1285 vnic_classifier_flow_update_fn(vnic->vn_flow_ent, vnic_rx_initial, 1286 vnic, vnic); 1287 mac_stop(lower_mh); 1288 } 1289 1290 /* ARGSUSED */ 1291 static int 1292 vnic_m_promisc(void *arg, boolean_t on) 1293 { 1294 vnic_t *vnic = arg; 1295 1296 return (vnic_promisc_set(vnic, on)); 1297 } 1298 1299 static int 1300 vnic_m_multicst(void *arg, boolean_t add, const uint8_t *addrp) 1301 { 1302 vnic_t *vnic = arg; 1303 int rc = 0; 1304 1305 if (add) 1306 rc = vnic_bcast_add(vnic, addrp, MAC_ADDRTYPE_MULTICAST); 1307 else 1308 vnic_bcast_delete(vnic, addrp); 1309 1310 return (rc); 1311 } 1312 1313 static int 1314 vnic_m_unicst(void *arg, const uint8_t *mac_addr) 1315 { 1316 vnic_t *vnic = arg; 1317 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 1318 int rv; 1319 1320 rw_enter(&vnic_lock, RW_WRITER); 1321 rv = vnic_modify_mac_addr(vnic, vnic_mac->va_addr_len, 1322 (uchar_t *)mac_addr); 1323 rw_exit(&vnic_lock); 1324 1325 if (rv == 0) 1326 mac_unicst_update(vnic->vn_mh, mac_addr); 1327 return (0); 1328 } 1329 1330 int 1331 vnic_info(uint_t *nvnics, uint32_t vnic_id, char *dev_name, void *fn_arg, 1332 vnic_info_new_vnic_fn_t new_vnic_fn) 1333 { 1334 vnic_info_state_t state; 1335 int rc = 0; 1336 1337 rw_enter(&vnic_lock, RW_READER); 1338 1339 *nvnics = vnic_count; 1340 1341 bzero(&state, sizeof (state)); 1342 state.vs_vnic_id = vnic_id; 1343 bcopy(state.vs_dev_name, dev_name, MAXNAMELEN); 1344 state.vs_new_vnic_fn = new_vnic_fn; 1345 state.vs_fn_arg = fn_arg; 1346 1347 mod_hash_walk(vnic_hash, vnic_info_walker, &state); 1348 1349 if ((rc = state.vs_rc) == 0 && vnic_id != 0 && 1350 state.vs_vnic_found) 1351 rc = ENOENT; 1352 1353 rw_exit(&vnic_lock); 1354 return (rc); 1355 } 1356 1357 /* 1358 * Walker invoked when building a list of vnics that must be passed 1359 * up to user space. 1360 */ 1361 /*ARGSUSED*/ 1362 static uint_t 1363 vnic_info_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 1364 { 1365 vnic_t *vnic; 1366 vnic_info_state_t *state = arg; 1367 1368 if (state->vs_rc != 0) 1369 return (MH_WALK_TERMINATE); /* terminate walk */ 1370 1371 vnic = (vnic_t *)val; 1372 1373 if (state->vs_vnic_id != 0 && vnic->vn_id != state->vs_vnic_id) 1374 goto bail; 1375 1376 state->vs_vnic_found = B_TRUE; 1377 1378 state->vs_rc = state->vs_new_vnic_fn(state->vs_fn_arg, 1379 vnic->vn_id, vnic->vn_addr_type, vnic->vn_vnic_mac->va_addr_len, 1380 vnic->vn_addr, vnic->vn_vnic_mac->va_dev_name); 1381 bail: 1382 return ((state->vs_rc == 0) ? MH_WALK_CONTINUE : MH_WALK_TERMINATE); 1383 } 1384 1385 /* 1386 * vnic_notify_cb() and vnic_notify_walker() below are used to 1387 * process events received from an underlying NIC and, if needed, 1388 * forward these events to the VNICs defined on top of that NIC. 1389 */ 1390 1391 typedef struct vnic_notify_state { 1392 mac_notify_type_t vo_type; 1393 vnic_mac_t *vo_vnic_mac; 1394 } vnic_notify_state_t; 1395 1396 /* ARGSUSED */ 1397 static uint_t 1398 vnic_notify_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 1399 { 1400 vnic_t *vnic = (vnic_t *)val; 1401 vnic_notify_state_t *state = arg; 1402 1403 /* ignore VNICs that don't use the specified underlying MAC */ 1404 if (vnic->vn_vnic_mac != state->vo_vnic_mac) 1405 return (MH_WALK_CONTINUE); 1406 1407 switch (state->vo_type) { 1408 case MAC_NOTE_TX: 1409 mac_tx_update(vnic->vn_mh); 1410 break; 1411 case MAC_NOTE_LINK: 1412 /* 1413 * The VNIC link state must be up regardless of 1414 * the link state of the underlying NIC to maintain 1415 * connectivity between VNICs on the same host. 1416 */ 1417 mac_link_update(vnic->vn_mh, LINK_STATE_UP); 1418 break; 1419 case MAC_NOTE_UNICST: 1420 vnic_update_active_rx(vnic->vn_vnic_mac); 1421 break; 1422 case MAC_NOTE_VNIC: 1423 /* only for clients which share a NIC with a VNIC */ 1424 break; 1425 case MAC_NOTE_PROMISC: 1426 mutex_enter(&vnic_mac_lock); 1427 vnic->vn_vnic_mac->va_txinfo = mac_vnic_tx_get( 1428 vnic->vn_vnic_mac->va_mh); 1429 mutex_exit(&vnic_mac_lock); 1430 break; 1431 } 1432 1433 return (MH_WALK_CONTINUE); 1434 } 1435 1436 static void 1437 vnic_notify_cb(void *arg, mac_notify_type_t type) 1438 { 1439 vnic_mac_t *vnic = arg; 1440 vnic_notify_state_t state; 1441 1442 state.vo_type = type; 1443 state.vo_vnic_mac = vnic; 1444 1445 rw_enter(&vnic_lock, RW_READER); 1446 mod_hash_walk(vnic_hash, vnic_notify_walker, &state); 1447 rw_exit(&vnic_lock); 1448 } 1449 1450 static int 1451 vnic_modify_mac_addr(vnic_t *vnic, uint_t mac_len, uchar_t *mac_addr) 1452 { 1453 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 1454 vnic_flow_t *vnic_flow = vnic->vn_flow_ent; 1455 1456 ASSERT(RW_WRITE_HELD(&vnic_lock)); 1457 1458 if (mac_len != vnic_mac->va_addr_len) 1459 return (EINVAL); 1460 1461 vnic_classifier_flow_update_addr(vnic_flow, mac_addr); 1462 return (0); 1463 } 1464 1465 static int 1466 vnic_promisc_set(vnic_t *vnic, boolean_t on) 1467 { 1468 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 1469 int r = -1; 1470 1471 if (vnic->vn_promisc == on) 1472 return (0); 1473 1474 if (on) { 1475 r = mac_promisc_set(vnic_mac->va_mh, B_TRUE, MAC_DEVPROMISC); 1476 if (r != 0) 1477 return (r); 1478 1479 rw_enter(&vnic_mac->va_promisc_lock, RW_WRITER); 1480 vnic->vn_promisc_next = vnic_mac->va_promisc; 1481 vnic_mac->va_promisc = vnic; 1482 vnic_mac->va_promisc_gen++; 1483 1484 vnic->vn_promisc = B_TRUE; 1485 rw_exit(&vnic_mac->va_promisc_lock); 1486 1487 return (0); 1488 } else { 1489 vnic_t *loop, *prev = NULL; 1490 1491 rw_enter(&vnic_mac->va_promisc_lock, RW_WRITER); 1492 loop = vnic_mac->va_promisc; 1493 1494 while ((loop != NULL) && (loop != vnic)) { 1495 prev = loop; 1496 loop = loop->vn_promisc_next; 1497 } 1498 1499 if ((loop != NULL) && 1500 ((r = mac_promisc_set(vnic_mac->va_mh, B_FALSE, 1501 MAC_DEVPROMISC)) == 0)) { 1502 if (prev != NULL) 1503 prev->vn_promisc_next = loop->vn_promisc_next; 1504 else 1505 vnic_mac->va_promisc = loop->vn_promisc_next; 1506 vnic_mac->va_promisc_gen++; 1507 1508 vnic->vn_promisc = B_FALSE; 1509 } 1510 rw_exit(&vnic_mac->va_promisc_lock); 1511 1512 return (r); 1513 } 1514 } 1515 1516 void 1517 vnic_promisc_rx(vnic_mac_t *vnic_mac, vnic_t *sender, mblk_t *mp) 1518 { 1519 vnic_t *loop; 1520 vnic_flow_t *flow; 1521 const vnic_flow_fn_info_t *fn_info; 1522 mac_header_info_t hdr_info; 1523 boolean_t dst_must_match = B_TRUE; 1524 1525 ASSERT(mp->b_next == NULL); 1526 1527 rw_enter(&vnic_mac->va_promisc_lock, RW_READER); 1528 if (vnic_mac->va_promisc == NULL) 1529 goto done; 1530 1531 if (mac_header_info(vnic_mac->va_mh, mp, &hdr_info) != 0) 1532 goto done; 1533 1534 /* 1535 * If this is broadcast or multicast then the destination 1536 * address need not match for us to deliver it. 1537 */ 1538 if ((hdr_info.mhi_dsttype == MAC_ADDRTYPE_BROADCAST) || 1539 (hdr_info.mhi_dsttype == MAC_ADDRTYPE_MULTICAST)) 1540 dst_must_match = B_FALSE; 1541 1542 for (loop = vnic_mac->va_promisc; 1543 loop != NULL; 1544 loop = loop->vn_promisc_next) { 1545 mblk_t *copy; 1546 uint64_t gen; 1547 1548 if (loop == sender) 1549 continue; 1550 1551 if (dst_must_match && 1552 (bcmp(hdr_info.mhi_daddr, loop->vn_addr, 1553 sizeof (loop->vn_addr)) != 0)) 1554 continue; 1555 1556 flow = loop->vn_flow_ent; 1557 ASSERT(flow != NULL); 1558 1559 if (!flow->vf_is_active) { 1560 VNIC_FLOW_REFHOLD(flow); 1561 gen = vnic_mac->va_promisc_gen; 1562 rw_exit(&vnic_mac->va_promisc_lock); 1563 1564 if ((copy = vnic_copymsg_cksum(mp)) != NULL) { 1565 fn_info = vnic_classifier_get_fn_info(flow); 1566 (fn_info->ff_fn)(fn_info->ff_arg1, 1567 fn_info->ff_arg2, copy); 1568 } 1569 1570 VNIC_FLOW_REFRELE(flow); 1571 rw_enter(&vnic_mac->va_promisc_lock, RW_READER); 1572 if (vnic_mac->va_promisc_gen != gen) 1573 break; 1574 } 1575 } 1576 done: 1577 rw_exit(&vnic_mac->va_promisc_lock); 1578 } 1579