1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/sysmacros.h> 30 #include <sys/conf.h> 31 #include <sys/cmn_err.h> 32 #include <sys/list.h> 33 #include <sys/ksynch.h> 34 #include <sys/kmem.h> 35 #include <sys/stream.h> 36 #include <sys/modctl.h> 37 #include <sys/ddi.h> 38 #include <sys/sunddi.h> 39 #include <sys/atomic.h> 40 #include <sys/stat.h> 41 #include <sys/modhash.h> 42 #include <sys/strsubr.h> 43 #include <sys/strsun.h> 44 #include <sys/dlpi.h> 45 #include <sys/mac.h> 46 #include <sys/mac_ether.h> 47 #include <sys/pattr.h> 48 #if 0 49 #include <sys/vlan.h> 50 #endif 51 #include <sys/vnic.h> 52 #include <sys/vnic_impl.h> 53 #include <sys/gld.h> 54 #include <inet/ip.h> 55 #include <inet/ip_impl.h> 56 57 static int vnic_m_start(void *); 58 static void vnic_m_stop(void *); 59 static int vnic_m_promisc(void *, boolean_t); 60 static int vnic_m_multicst(void *, boolean_t, const uint8_t *); 61 static int vnic_m_unicst(void *, const uint8_t *); 62 static int vnic_m_stat(void *, uint_t, uint64_t *); 63 static void vnic_m_resources(void *); 64 static mblk_t *vnic_m_tx(void *, mblk_t *); 65 static boolean_t vnic_m_capab_get(void *, mac_capab_t, void *); 66 static void vnic_mac_free(vnic_mac_t *); 67 static uint_t vnic_info_walker(mod_hash_key_t, mod_hash_val_t *, void *); 68 static void vnic_notify_cb(void *, mac_notify_type_t); 69 static int vnic_modify_mac_addr(vnic_t *, uint_t, uchar_t *); 70 static mblk_t *vnic_active_tx(void *, mblk_t *); 71 static int vnic_promisc_set(vnic_t *, boolean_t); 72 73 static kmem_cache_t *vnic_cache; 74 static kmem_cache_t *vnic_mac_cache; 75 static krwlock_t vnic_lock; 76 static kmutex_t vnic_mac_lock; 77 static uint_t vnic_count; 78 79 /* hash of VNICs (vnic_t's), keyed by VNIC id */ 80 static mod_hash_t *vnic_hash; 81 #define VNIC_HASHSZ 64 82 #define VNIC_HASH_KEY(vnic_id) ((mod_hash_key_t)(uintptr_t)vnic_id) 83 84 /* 85 * Hash of underlying open MACs (vnic_mac_t's), keyed by the string 86 * "<device name><instance number>/<port number>". 87 */ 88 static mod_hash_t *vnic_mac_hash; 89 #define VNIC_MAC_HASHSZ 64 90 91 #define VNIC_MAC_REFHOLD(va) { \ 92 ASSERT(MUTEX_HELD(&vnic_mac_lock)); \ 93 (va)->va_refs++; \ 94 ASSERT((va)->va_refs != 0); \ 95 } 96 97 #define VNIC_MAC_REFRELE(va) { \ 98 ASSERT(MUTEX_HELD(&vnic_mac_lock)); \ 99 ASSERT((va)->va_refs != 0); \ 100 if (--((va)->va_refs) == 0) \ 101 vnic_mac_free(va); \ 102 } 103 104 static uchar_t vnic_brdcst_mac[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 105 106 /* used by vnic_walker */ 107 typedef struct vnic_info_state { 108 uint32_t vs_vnic_id; 109 char vs_dev_name[MAXNAMELEN]; 110 boolean_t vs_vnic_found; 111 vnic_info_new_vnic_fn_t vs_new_vnic_fn; 112 void *vs_fn_arg; 113 int vs_rc; 114 } vnic_info_state_t; 115 116 #define VNIC_M_CALLBACK_FLAGS (MC_RESOURCES | MC_GETCAPAB) 117 118 static mac_callbacks_t vnic_m_callbacks = { 119 VNIC_M_CALLBACK_FLAGS, 120 vnic_m_stat, 121 vnic_m_start, 122 vnic_m_stop, 123 vnic_m_promisc, 124 vnic_m_multicst, 125 vnic_m_unicst, 126 vnic_m_tx, 127 vnic_m_resources, 128 NULL, /* m_ioctl */ 129 vnic_m_capab_get 130 }; 131 132 /* ARGSUSED */ 133 static int 134 vnic_mac_ctor(void *buf, void *arg, int kmflag) 135 { 136 vnic_mac_t *vnic_mac = buf; 137 138 bzero(vnic_mac, sizeof (vnic_mac_t)); 139 rw_init(&vnic_mac->va_bcast_grp_lock, NULL, RW_DRIVER, NULL); 140 rw_init(&vnic_mac->va_promisc_lock, NULL, RW_DRIVER, NULL); 141 142 return (0); 143 } 144 145 /* ARGSUSED */ 146 static void 147 vnic_mac_dtor(void *buf, void *arg) 148 { 149 vnic_mac_t *vnic_mac = buf; 150 151 rw_destroy(&vnic_mac->va_promisc_lock); 152 rw_destroy(&vnic_mac->va_bcast_grp_lock); 153 } 154 155 void 156 vnic_dev_init(void) 157 { 158 vnic_cache = kmem_cache_create("vnic_cache", 159 sizeof (vnic_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 160 161 vnic_mac_cache = kmem_cache_create("vnic_mac_cache", 162 sizeof (vnic_mac_t), 0, vnic_mac_ctor, vnic_mac_dtor, 163 NULL, NULL, NULL, 0); 164 165 vnic_hash = mod_hash_create_idhash("vnic_hash", 166 VNIC_HASHSZ, mod_hash_null_valdtor); 167 168 vnic_mac_hash = mod_hash_create_strhash("vnic_mac_hash", 169 VNIC_MAC_HASHSZ, mod_hash_null_valdtor); 170 171 rw_init(&vnic_lock, NULL, RW_DEFAULT, NULL); 172 173 mutex_init(&vnic_mac_lock, NULL, MUTEX_DEFAULT, NULL); 174 175 vnic_count = 0; 176 } 177 178 void 179 vnic_dev_fini(void) 180 { 181 ASSERT(vnic_count == 0); 182 183 mutex_destroy(&vnic_mac_lock); 184 rw_destroy(&vnic_lock); 185 mod_hash_destroy_strhash(vnic_mac_hash); 186 mod_hash_destroy_idhash(vnic_hash); 187 kmem_cache_destroy(vnic_mac_cache); 188 kmem_cache_destroy(vnic_cache); 189 } 190 191 uint_t 192 vnic_dev_count(void) 193 { 194 return (vnic_count); 195 } 196 197 static int 198 vnic_mac_open(const char *dev_name, vnic_mac_t **vmp) 199 { 200 char *str_key; 201 int err; 202 vnic_mac_t *vnic_mac = NULL; 203 char driver[MAXNAMELEN]; 204 uint_t ddi_instance; 205 const mac_info_t *mip; 206 207 *vmp = NULL; 208 209 if (ddi_parse(dev_name, driver, &ddi_instance) != DDI_SUCCESS) 210 return (EINVAL); 211 212 mutex_enter(&vnic_mac_lock); 213 214 err = mod_hash_find(vnic_mac_hash, (mod_hash_key_t)dev_name, 215 (mod_hash_val_t *)&vnic_mac); 216 if (err == 0) { 217 /* this MAC is already opened, increment reference count */ 218 VNIC_MAC_REFHOLD(vnic_mac); 219 mutex_exit(&vnic_mac_lock); 220 *vmp = vnic_mac; 221 return (0); 222 } 223 224 vnic_mac = kmem_cache_alloc(vnic_mac_cache, KM_SLEEP); 225 226 if ((err = mac_open(dev_name, ddi_instance, &vnic_mac->va_mh)) != 0) { 227 vnic_mac->va_mh = NULL; 228 goto bail; 229 } 230 231 /* only ethernet support, for now */ 232 mip = mac_info(vnic_mac->va_mh); 233 if (mip->mi_media != DL_ETHER) { 234 err = ENOTSUP; 235 goto bail; 236 } 237 if (mip->mi_media != mip->mi_nativemedia) { 238 err = ENOTSUP; 239 goto bail; 240 } 241 242 (void) strcpy(vnic_mac->va_dev_name, dev_name); 243 244 /* add entry to hash table */ 245 str_key = kmem_alloc(strlen(dev_name) + 1, KM_SLEEP); 246 (void) strcpy(str_key, dev_name); 247 err = mod_hash_insert(vnic_mac_hash, (mod_hash_key_t)str_key, 248 (mod_hash_val_t)vnic_mac); 249 ASSERT(err == 0); 250 251 /* initialize the flow table associated with lower MAC */ 252 vnic_mac->va_addr_len = ETHERADDRL; 253 (void) vnic_classifier_flow_tab_init(vnic_mac, vnic_mac->va_addr_len, 254 KM_SLEEP); 255 256 vnic_mac->va_txinfo = mac_vnic_tx_get(vnic_mac->va_mh); 257 vnic_mac->va_notify_hdl = mac_notify_add(vnic_mac->va_mh, 258 vnic_notify_cb, vnic_mac); 259 260 VNIC_MAC_REFHOLD(vnic_mac); 261 *vmp = vnic_mac; 262 mutex_exit(&vnic_mac_lock); 263 return (0); 264 265 bail: 266 if (vnic_mac != NULL) { 267 if (vnic_mac->va_mh != NULL) 268 mac_close(vnic_mac->va_mh); 269 kmem_cache_free(vnic_mac_cache, vnic_mac); 270 } 271 mutex_exit(&vnic_mac_lock); 272 return (err); 273 } 274 275 /* 276 * Create a new flow for the active MAC client sharing the NIC 277 * with the VNICs. This allows the unicast packets for that NIC 278 * to be classified and passed up to the active MAC client. It 279 * also allows packets sent from a VNIC to the active link to 280 * be classified by the VNIC transmit function and delivered via 281 * the MAC module locally. Returns B_TRUE on success, B_FALSE on 282 * failure. 283 */ 284 static int 285 vnic_init_active_rx(vnic_mac_t *vnic_mac) 286 { 287 uchar_t nic_mac_addr[MAXMACADDRLEN]; 288 289 if (vnic_mac->va_active_flow != NULL) 290 return (B_TRUE); 291 292 mac_unicst_get(vnic_mac->va_mh, nic_mac_addr); 293 294 vnic_mac->va_active_flow = vnic_classifier_flow_create( 295 vnic_mac->va_addr_len, nic_mac_addr, NULL, B_TRUE, KM_SLEEP); 296 297 vnic_classifier_flow_add(vnic_mac, vnic_mac->va_active_flow, 298 (vnic_rx_fn_t)mac_active_rx, vnic_mac->va_mh, NULL); 299 return (B_TRUE); 300 } 301 302 static void 303 vnic_fini_active_rx(vnic_mac_t *vnic_mac) 304 { 305 if (vnic_mac->va_active_flow == NULL) 306 return; 307 308 vnic_classifier_flow_remove(vnic_mac, vnic_mac->va_active_flow); 309 vnic_classifier_flow_destroy(vnic_mac->va_active_flow); 310 vnic_mac->va_active_flow = NULL; 311 } 312 313 static void 314 vnic_update_active_rx(vnic_mac_t *vnic_mac) 315 { 316 if (vnic_mac->va_active_flow == NULL) 317 return; 318 319 vnic_fini_active_rx(vnic_mac); 320 (void) vnic_init_active_rx(vnic_mac); 321 } 322 323 /* 324 * Copy an mblk, preserving its hardware checksum flags. 325 */ 326 mblk_t * 327 vnic_copymsg_cksum(mblk_t *mp) 328 { 329 mblk_t *mp1; 330 uint32_t start, stuff, end, value, flags; 331 332 mp1 = copymsg(mp); 333 if (mp1 == NULL) 334 return (NULL); 335 336 hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags); 337 (void) hcksum_assoc(mp1, NULL, NULL, start, stuff, end, value, 338 flags, KM_NOSLEEP); 339 340 return (mp1); 341 } 342 343 /* 344 * Copy an mblk chain, presenting the hardware checksum flags of the 345 * individual mblks. 346 */ 347 mblk_t * 348 vnic_copymsgchain_cksum(mblk_t *mp) 349 { 350 mblk_t *nmp = NULL; 351 mblk_t **nmpp = &nmp; 352 353 for (; mp != NULL; mp = mp->b_next) { 354 if ((*nmpp = vnic_copymsg_cksum(mp)) == NULL) { 355 freemsgchain(nmp); 356 return (NULL); 357 } 358 359 nmpp = &((*nmpp)->b_next); 360 } 361 362 return (nmp); 363 } 364 365 366 /* 367 * Process the specified mblk chain for proper handling of hardware 368 * checksum offload. This routine is invoked for loopback VNIC traffic. 369 * The function handles a NULL mblk chain passed as argument. 370 */ 371 mblk_t * 372 vnic_fix_cksum(mblk_t *mp_chain) 373 { 374 mblk_t *mp, *prev = NULL, *new_chain = mp_chain, *mp1; 375 uint32_t flags, start, stuff, end, value; 376 377 for (mp = mp_chain; mp != NULL; prev = mp, mp = mp->b_next) { 378 uint16_t len; 379 uint32_t offset; 380 struct ether_header *ehp; 381 uint16_t sap; 382 383 hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, 384 &flags); 385 if (flags == 0) 386 continue; 387 388 /* 389 * Since the processing of checksum offload for loopback 390 * traffic requires modification of the packet contents, 391 * ensure sure that we are always modifying our own copy. 392 */ 393 if (DB_REF(mp) > 1) { 394 mp1 = copymsg(mp); 395 if (mp1 == NULL) 396 continue; 397 mp1->b_next = mp->b_next; 398 mp->b_next = NULL; 399 freemsg(mp); 400 if (prev != NULL) 401 prev->b_next = mp1; 402 else 403 new_chain = mp1; 404 mp = mp1; 405 } 406 407 /* 408 * Ethernet, and optionally VLAN header. 409 */ 410 /*LINTED*/ 411 ehp = (struct ether_header *)mp->b_rptr; 412 if (ntohs(ehp->ether_type) == VLAN_TPID) { 413 struct ether_vlan_header *evhp; 414 415 ASSERT(MBLKL(mp) >= 416 sizeof (struct ether_vlan_header)); 417 /*LINTED*/ 418 evhp = (struct ether_vlan_header *)mp->b_rptr; 419 sap = ntohs(evhp->ether_type); 420 offset = sizeof (struct ether_vlan_header); 421 } else { 422 sap = ntohs(ehp->ether_type); 423 offset = sizeof (struct ether_header); 424 } 425 426 if (MBLKL(mp) <= offset) { 427 offset -= MBLKL(mp); 428 if (mp->b_cont == NULL) { 429 /* corrupted packet, skip it */ 430 if (prev != NULL) 431 prev->b_next = mp->b_next; 432 else 433 new_chain = mp->b_next; 434 mp1 = mp->b_next; 435 mp->b_next = NULL; 436 freemsg(mp); 437 mp = mp1; 438 continue; 439 } 440 mp = mp->b_cont; 441 } 442 443 if (flags & (HCK_FULLCKSUM | HCK_IPV4_HDRCKSUM)) { 444 ipha_t *ipha = NULL; 445 446 /* 447 * In order to compute the full and header 448 * checksums, we need to find and parse 449 * the IP and/or ULP headers. 450 */ 451 452 sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap; 453 454 /* 455 * IP header. 456 */ 457 if (sap != ETHERTYPE_IP) 458 continue; 459 460 ASSERT(MBLKL(mp) >= offset + sizeof (ipha_t)); 461 /*LINTED*/ 462 ipha = (ipha_t *)(mp->b_rptr + offset); 463 464 if (flags & HCK_FULLCKSUM) { 465 ipaddr_t src, dst; 466 uint32_t cksum; 467 uint16_t *up; 468 uint8_t proto; 469 470 /* 471 * Pointer to checksum field in ULP header. 472 */ 473 proto = ipha->ipha_protocol; 474 ASSERT(ipha->ipha_version_and_hdr_length == 475 IP_SIMPLE_HDR_VERSION); 476 if (proto == IPPROTO_TCP) { 477 /*LINTED*/ 478 up = IPH_TCPH_CHECKSUMP(ipha, 479 IP_SIMPLE_HDR_LENGTH); 480 } else { 481 ASSERT(proto == IPPROTO_UDP); 482 /*LINTED*/ 483 up = IPH_UDPH_CHECKSUMP(ipha, 484 IP_SIMPLE_HDR_LENGTH); 485 } 486 487 /* 488 * Pseudo-header checksum. 489 */ 490 src = ipha->ipha_src; 491 dst = ipha->ipha_dst; 492 len = ntohs(ipha->ipha_length) - 493 IP_SIMPLE_HDR_LENGTH; 494 495 cksum = (dst >> 16) + (dst & 0xFFFF) + 496 (src >> 16) + (src & 0xFFFF); 497 cksum += htons(len); 498 499 /* 500 * The checksum value stored in the packet needs 501 * to be correct. Compute it here. 502 */ 503 *up = 0; 504 cksum += (((proto) == IPPROTO_UDP) ? 505 IP_UDP_CSUM_COMP : IP_TCP_CSUM_COMP); 506 cksum = IP_CSUM(mp, IP_SIMPLE_HDR_LENGTH + 507 offset, cksum); 508 *(up) = (uint16_t)(cksum ? cksum : ~cksum); 509 510 flags |= HCK_FULLCKSUM_OK; 511 value = 0xffff; 512 } 513 514 if (flags & HCK_IPV4_HDRCKSUM) { 515 ASSERT(ipha != NULL); 516 ipha->ipha_hdr_checksum = 517 (uint16_t)ip_csum_hdr(ipha); 518 } 519 } 520 521 if (flags & HCK_PARTIALCKSUM) { 522 uint16_t *up, partial, cksum; 523 uchar_t *ipp; /* ptr to beginning of IP header */ 524 525 if (mp->b_cont != NULL) { 526 mblk_t *mp1; 527 528 mp1 = msgpullup(mp, offset + end); 529 if (mp1 == NULL) 530 continue; 531 mp1->b_next = mp->b_next; 532 mp->b_next = NULL; 533 freemsg(mp); 534 if (prev != NULL) 535 prev->b_next = mp1; 536 else 537 new_chain = mp1; 538 mp = mp1; 539 } 540 541 ipp = mp->b_rptr + offset; 542 /*LINTED*/ 543 up = (uint16_t *)((uchar_t *)ipp + stuff); 544 partial = *up; 545 *up = 0; 546 547 cksum = IP_BCSUM_PARTIAL(mp->b_rptr + offset + start, 548 end - start, partial); 549 cksum = ~cksum; 550 *up = cksum ? cksum : ~cksum; 551 552 /* 553 * Since we already computed the whole checksum, 554 * indicate to the stack that it has already 555 * been verified by the hardware. 556 */ 557 flags &= ~HCK_PARTIALCKSUM; 558 flags |= (HCK_FULLCKSUM | HCK_FULLCKSUM_OK); 559 value = 0xffff; 560 } 561 562 (void) hcksum_assoc(mp, NULL, NULL, start, stuff, end, 563 value, flags, KM_NOSLEEP); 564 } 565 566 return (new_chain); 567 } 568 569 static void 570 vnic_mac_close(vnic_mac_t *vnic_mac) 571 { 572 mutex_enter(&vnic_mac_lock); 573 VNIC_MAC_REFRELE(vnic_mac); 574 mutex_exit(&vnic_mac_lock); 575 } 576 577 static void 578 vnic_mac_free(vnic_mac_t *vnic_mac) 579 { 580 mod_hash_val_t val; 581 582 ASSERT(MUTEX_HELD(&vnic_mac_lock)); 583 vnic_fini_active_rx(vnic_mac); 584 mac_notify_remove(vnic_mac->va_mh, vnic_mac->va_notify_hdl); 585 if (vnic_mac->va_mac_set) { 586 vnic_mac->va_mac_set = B_FALSE; 587 mac_vnic_clear(vnic_mac->va_mh); 588 } 589 vnic_classifier_flow_tab_fini(vnic_mac); 590 mac_close(vnic_mac->va_mh); 591 592 (void) mod_hash_remove(vnic_mac_hash, 593 (mod_hash_key_t)vnic_mac->va_dev_name, &val); 594 ASSERT(vnic_mac == (vnic_mac_t *)val); 595 596 kmem_cache_free(vnic_mac_cache, vnic_mac); 597 } 598 599 /* 600 * Initial VNIC receive routine. Invoked for packets that are steered 601 * to a VNIC but the VNIC has not been started yet. 602 */ 603 /* ARGSUSED */ 604 static void 605 vnic_rx_initial(void *arg1, void *arg2, mblk_t *mp_chain) 606 { 607 vnic_t *vnic = arg1; 608 mblk_t *mp; 609 610 /* update stats */ 611 for (mp = mp_chain; mp != NULL; mp = mp->b_next) 612 vnic->vn_stat_ierrors++; 613 freemsgchain(mp_chain); 614 } 615 616 /* 617 * VNIC receive routine invoked after the classifier for the VNIC 618 * has been initialized and the VNIC has been started. 619 */ 620 /* ARGSUSED */ 621 void 622 vnic_rx(void *arg1, void *arg2, mblk_t *mp_chain) 623 { 624 vnic_t *vnic = arg1; 625 mblk_t *mp; 626 627 /* update stats */ 628 for (mp = mp_chain; mp != NULL; mp = mp->b_next) { 629 vnic->vn_stat_ipackets++; 630 vnic->vn_stat_rbytes += msgdsize(mp); 631 } 632 633 /* pass packet up */ 634 mac_rx(vnic->vn_mh, NULL, mp_chain); 635 } 636 637 /* 638 * Routine to create a MAC-based VNIC. Adds the passed MAC address 639 * to an unused slot in the NIC if one is available. Otherwise it 640 * sets the NIC in promiscuous mode and assigns the MAC address to 641 * a Rx ring if available or a soft ring. 642 */ 643 static int 644 vnic_add_unicstaddr(vnic_t *vnic, mac_multi_addr_t *maddr) 645 { 646 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 647 int err; 648 649 if (mac_unicst_verify(vnic_mac->va_mh, maddr->mma_addr, 650 maddr->mma_addrlen) == B_FALSE) 651 return (EINVAL); 652 653 if (mac_vnic_capab_get(vnic_mac->va_mh, MAC_CAPAB_MULTIADDRESS, 654 &(vnic->vn_mma_capab))) { 655 if (vnic->vn_maddr_naddrfree == 0) { 656 /* 657 * No free address slots available. 658 * Enable promiscuous mode. 659 */ 660 goto set_promisc; 661 } 662 663 err = vnic->vn_maddr_add(vnic->vn_maddr_handle, maddr); 664 if (err != 0) { 665 if (err == ENOSPC) { 666 /* 667 * There was a race to add addresses 668 * with other multiple address consumers, 669 * and we lost out. Use promisc mode. 670 */ 671 goto set_promisc; 672 } 673 674 return (err); 675 } 676 677 vnic->vn_slot_id = maddr->mma_slot; 678 vnic->vn_multi_mac = B_TRUE; 679 } else { 680 /* 681 * Either multiple MAC address support is not 682 * available or all available addresses have 683 * been used up. 684 */ 685 set_promisc: 686 err = mac_promisc_set(vnic_mac->va_mh, B_TRUE, MAC_DEVPROMISC); 687 if (err != 0) { 688 return (err); 689 } 690 691 vnic->vn_promisc_mac = B_TRUE; 692 } 693 return (err); 694 } 695 696 /* 697 * VNIC is getting deleted. Remove the MAC address from the slot. 698 * If promiscuous mode was being used, then unset the promiscuous mode. 699 */ 700 static int 701 vnic_remove_unicstaddr(vnic_t *vnic) 702 { 703 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 704 int err; 705 706 if (vnic->vn_multi_mac) { 707 ASSERT(vnic->vn_promisc_mac == B_FALSE); 708 err = vnic->vn_maddr_remove(vnic->vn_maddr_handle, 709 vnic->vn_slot_id); 710 vnic->vn_multi_mac = B_FALSE; 711 } 712 713 if (vnic->vn_promisc_mac) { 714 ASSERT(vnic->vn_multi_mac == B_FALSE); 715 err = mac_promisc_set(vnic_mac->va_mh, B_FALSE, MAC_DEVPROMISC); 716 vnic->vn_promisc_mac = B_FALSE; 717 } 718 719 return (err); 720 } 721 722 /* 723 * Create a new VNIC upon request from administrator. 724 * Returns 0 on success, an errno on failure. 725 */ 726 int 727 vnic_dev_create(uint_t vnic_id, char *dev_name, int mac_len, uchar_t *mac_addr) 728 { 729 vnic_t *vnic = NULL; 730 mac_register_t *mac; 731 int err; 732 vnic_mac_t *vnic_mac; 733 const mac_info_t *lower_mac_info; 734 mac_multi_addr_t maddr; 735 mac_txinfo_t tx_info; 736 737 if (mac_len != ETHERADDRL) { 738 /* currently only ethernet NICs are supported */ 739 return (EINVAL); 740 } 741 742 rw_enter(&vnic_lock, RW_WRITER); 743 744 /* does a VNIC with the same id already exist? */ 745 err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id), 746 (mod_hash_val_t *)&vnic); 747 if (err == 0) { 748 rw_exit(&vnic_lock); 749 return (EEXIST); 750 } 751 752 vnic = kmem_cache_alloc(vnic_cache, KM_NOSLEEP); 753 if (vnic == NULL) { 754 rw_exit(&vnic_lock); 755 return (ENOMEM); 756 } 757 758 /* open underlying MAC */ 759 err = vnic_mac_open(dev_name, &vnic_mac); 760 if (err != 0) { 761 kmem_cache_free(vnic_cache, vnic); 762 rw_exit(&vnic_lock); 763 return (err); 764 } 765 766 bzero(vnic, sizeof (*vnic)); 767 vnic->vn_id = vnic_id; 768 vnic->vn_vnic_mac = vnic_mac; 769 770 vnic->vn_started = B_FALSE; 771 vnic->vn_promisc = B_FALSE; 772 vnic->vn_multi_mac = B_FALSE; 773 vnic->vn_bcast_grp = B_FALSE; 774 775 /* set the VNIC MAC address */ 776 maddr.mma_addrlen = mac_len; 777 maddr.mma_slot = 0; 778 maddr.mma_flags = 0; 779 bcopy(mac_addr, maddr.mma_addr, mac_len); 780 if ((err = vnic_add_unicstaddr(vnic, &maddr)) != 0) 781 goto bail; 782 bcopy(mac_addr, vnic->vn_addr, mac_len); 783 784 /* set the initial VNIC capabilities */ 785 if (!mac_vnic_capab_get(vnic_mac->va_mh, MAC_CAPAB_HCKSUM, 786 &vnic->vn_hcksum_txflags)) 787 vnic->vn_hcksum_txflags = 0; 788 789 /* register with the MAC module */ 790 if ((mac = mac_alloc(MAC_VERSION)) == NULL) 791 goto bail; 792 793 mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 794 mac->m_driver = vnic; 795 mac->m_dip = vnic_get_dip(); 796 mac->m_instance = vnic_id; 797 mac->m_src_addr = vnic->vn_addr; 798 mac->m_callbacks = &vnic_m_callbacks; 799 800 lower_mac_info = mac_info(vnic_mac->va_mh); 801 mac->m_min_sdu = lower_mac_info->mi_sdu_min; 802 mac->m_max_sdu = lower_mac_info->mi_sdu_max; 803 804 err = mac_register(mac, &vnic->vn_mh); 805 mac_free(mac); 806 if (err != 0) 807 goto bail; 808 809 /* add new VNIC to hash table */ 810 err = mod_hash_insert(vnic_hash, VNIC_HASH_KEY(vnic_id), 811 (mod_hash_val_t)vnic); 812 ASSERT(err == 0); 813 vnic_count++; 814 815 rw_exit(&vnic_lock); 816 817 /* Create a flow, initialized with the MAC address of the VNIC */ 818 if ((vnic->vn_flow_ent = vnic_classifier_flow_create(mac_len, mac_addr, 819 NULL, B_FALSE, KM_SLEEP)) == NULL) { 820 (void) vnic_dev_delete(vnic_id); 821 vnic = NULL; 822 err = ENOMEM; 823 goto bail_unlocked; 824 } 825 826 vnic_classifier_flow_add(vnic_mac, vnic->vn_flow_ent, vnic_rx_initial, 827 vnic, vnic); 828 829 /* setup VNIC to receive broadcast packets */ 830 err = vnic_bcast_add(vnic, vnic_brdcst_mac, MAC_ADDRTYPE_BROADCAST); 831 if (err != 0) { 832 (void) vnic_dev_delete(vnic_id); 833 vnic = NULL; 834 goto bail_unlocked; 835 } 836 vnic->vn_bcast_grp = B_TRUE; 837 838 mutex_enter(&vnic_mac_lock); 839 if (!vnic_mac->va_mac_set) { 840 /* 841 * We want to MAC layer to call the VNIC tx outbound 842 * routine, so that local broadcast packets sent by 843 * the active interface sharing the underlying NIC (if 844 * any), can be broadcast to every VNIC. 845 */ 846 tx_info.mt_fn = vnic_active_tx; 847 tx_info.mt_arg = vnic_mac; 848 if (!mac_vnic_set(vnic_mac->va_mh, &tx_info, 849 vnic_m_capab_get, vnic)) { 850 mutex_exit(&vnic_mac_lock); 851 (void) vnic_dev_delete(vnic_id); 852 vnic = NULL; 853 err = EBUSY; 854 goto bail_unlocked; 855 } 856 vnic_mac->va_mac_set = B_TRUE; 857 } 858 mutex_exit(&vnic_mac_lock); 859 860 /* allow passing packets to NIC's active MAC client */ 861 if (!vnic_init_active_rx(vnic_mac)) { 862 (void) vnic_dev_delete(vnic_id); 863 vnic = NULL; 864 err = ENOMEM; 865 goto bail_unlocked; 866 } 867 868 return (0); 869 870 bail: 871 (void) vnic_remove_unicstaddr(vnic); 872 vnic_mac_close(vnic_mac); 873 rw_exit(&vnic_lock); 874 875 bail_unlocked: 876 if (vnic != NULL) { 877 kmem_cache_free(vnic_cache, vnic); 878 } 879 880 return (err); 881 } 882 883 /* 884 * Modify the properties of an existing VNIC. 885 */ 886 /* ARGSUSED */ 887 int 888 vnic_dev_modify(uint_t vnic_id, uint_t modify_mask, 889 vnic_mac_addr_type_t mac_addr_type, uint_t mac_len, uchar_t *mac_addr) 890 { 891 vnic_t *vnic = NULL; 892 int rv = 0; 893 boolean_t notify_mac_addr = B_FALSE; 894 895 rw_enter(&vnic_lock, RW_WRITER); 896 897 if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id), 898 (mod_hash_val_t *)&vnic) != 0) { 899 rw_exit(&vnic_lock); 900 return (ENOENT); 901 } 902 903 if (modify_mask & VNIC_IOC_MODIFY_ADDR) { 904 rv = vnic_modify_mac_addr(vnic, mac_len, mac_addr); 905 if (rv == 0) 906 notify_mac_addr = B_TRUE; 907 } 908 909 rw_exit(&vnic_lock); 910 911 if (notify_mac_addr) 912 mac_unicst_update(vnic->vn_mh, mac_addr); 913 914 return (rv); 915 } 916 917 int 918 vnic_dev_delete(uint_t vnic_id) 919 { 920 vnic_t *vnic = NULL; 921 mod_hash_val_t val; 922 vnic_flow_t *flent; 923 int rc; 924 vnic_mac_t *vnic_mac; 925 926 rw_enter(&vnic_lock, RW_WRITER); 927 928 if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id), 929 (mod_hash_val_t *)&vnic) != 0) { 930 rw_exit(&vnic_lock); 931 return (ENOENT); 932 } 933 934 /* 935 * We cannot unregister the MAC yet. Unregistering would 936 * free up mac_impl_t which should not happen at this time. 937 * Packets could be entering vnic_rx() through the 938 * flow entry and so mac_impl_t cannot be NULL. So disable 939 * mac_impl_t by calling mac_disable(). This will prevent any 940 * new claims on mac_impl_t. 941 */ 942 if (mac_disable(vnic->vn_mh) != 0) { 943 rw_exit(&vnic_lock); 944 return (EBUSY); 945 } 946 947 (void) mod_hash_remove(vnic_hash, VNIC_HASH_KEY(vnic_id), &val); 948 ASSERT(vnic == (vnic_t *)val); 949 950 if (vnic->vn_bcast_grp) 951 (void) vnic_bcast_delete(vnic, vnic_brdcst_mac); 952 953 flent = vnic->vn_flow_ent; 954 if (flent != NULL) { 955 /* 956 * vnic_classifier_flow_destroy() ensures that the 957 * flow is no longer used. 958 */ 959 vnic_classifier_flow_remove(vnic->vn_vnic_mac, flent); 960 vnic_classifier_flow_destroy(flent); 961 } 962 963 rc = mac_unregister(vnic->vn_mh); 964 ASSERT(rc == 0); 965 (void) vnic_remove_unicstaddr(vnic); 966 vnic_mac = vnic->vn_vnic_mac; 967 kmem_cache_free(vnic_cache, vnic); 968 vnic_count--; 969 rw_exit(&vnic_lock); 970 vnic_mac_close(vnic_mac); 971 return (0); 972 } 973 974 /* 975 * For the specified packet chain, return a sub-chain to be sent 976 * and the transmit function to be used to send the packet. Also 977 * return a pointer to the sub-chain of packets that should 978 * be re-classified. If the function returns NULL, the packet 979 * should be sent using the underlying NIC. 980 */ 981 static vnic_flow_t * 982 vnic_classify(vnic_mac_t *vnic_mac, mblk_t *mp, mblk_t **mp_chain_rest) 983 { 984 vnic_flow_t *flow_ent; 985 986 /* one packet at a time */ 987 *mp_chain_rest = mp->b_next; 988 mp->b_next = NULL; 989 990 /* do classification on the packet */ 991 flow_ent = vnic_classifier_get_flow(vnic_mac, mp); 992 993 return (flow_ent); 994 } 995 996 /* 997 * Send a packet chain to a local VNIC or an active MAC client. 998 */ 999 static void 1000 vnic_local_tx(vnic_mac_t *vnic_mac, vnic_flow_t *flow_ent, mblk_t *mp_chain) 1001 { 1002 mblk_t *mp1; 1003 const vnic_flow_fn_info_t *fn_info; 1004 vnic_t *vnic; 1005 1006 if (!vnic_classifier_is_active(flow_ent) && 1007 mac_promisc_get(vnic_mac->va_mh, MAC_PROMISC)) { 1008 /* 1009 * If the MAC is in promiscous mode, 1010 * send a copy of the active client. 1011 */ 1012 if ((mp1 = vnic_copymsgchain_cksum(mp_chain)) == NULL) 1013 goto sendit; 1014 if ((mp1 = vnic_fix_cksum(mp1)) == NULL) 1015 goto sendit; 1016 mac_active_rx(vnic_mac->va_mh, NULL, mp1); 1017 } 1018 sendit: 1019 fn_info = vnic_classifier_get_fn_info(flow_ent); 1020 /* 1021 * If the vnic to which we would deliver this packet is in 1022 * promiscuous mode then it already received the packet via 1023 * vnic_promisc_rx(). 1024 * 1025 * XXX assumes that ff_arg2 is a vnic_t pointer if it is 1026 * non-NULL (currently always true). 1027 */ 1028 vnic = (vnic_t *)fn_info->ff_arg2; 1029 if ((vnic != NULL) && vnic->vn_promisc) 1030 freemsg(mp_chain); 1031 else if ((mp1 = vnic_fix_cksum(mp_chain)) != NULL) 1032 (fn_info->ff_fn)(fn_info->ff_arg1, fn_info->ff_arg2, mp1); 1033 } 1034 1035 /* 1036 * This function is invoked when a MAC client needs to send a packet 1037 * to a NIC which is shared by VNICs. It is passed to the MAC layer 1038 * by a call to mac_vnic_set() when the NIC is opened, and is returned 1039 * to MAC clients by mac_tx_get() when VNICs are present. 1040 */ 1041 mblk_t * 1042 vnic_active_tx(void *arg, mblk_t *mp_chain) 1043 { 1044 vnic_mac_t *vnic_mac = arg; 1045 mblk_t *mp, *extra_mp = NULL; 1046 vnic_flow_t *flow_ent; 1047 void *flow_cookie; 1048 const mac_txinfo_t *mtp = vnic_mac->va_txinfo; 1049 1050 for (mp = mp_chain; mp != NULL; mp = extra_mp) { 1051 mblk_t *next; 1052 1053 next = mp->b_next; 1054 mp->b_next = NULL; 1055 1056 vnic_promisc_rx(vnic_mac, (vnic_t *)-1, mp); 1057 1058 flow_ent = vnic_classify(vnic_mac, mp, &extra_mp); 1059 ASSERT(extra_mp == NULL); 1060 extra_mp = next; 1061 1062 if (flow_ent != NULL) { 1063 flow_cookie = vnic_classifier_get_client_cookie( 1064 flow_ent); 1065 if (flow_cookie != NULL) { 1066 /* 1067 * Send a copy to every VNIC defined on the 1068 * interface, as well as the underlying MAC. 1069 */ 1070 vnic_bcast_send(flow_cookie, (vnic_t *)-1, mp); 1071 } else { 1072 /* 1073 * loopback the packet to a local VNIC or 1074 * an active MAC client. 1075 */ 1076 vnic_local_tx(vnic_mac, flow_ent, mp); 1077 } 1078 VNIC_FLOW_REFRELE(flow_ent); 1079 mp_chain = NULL; 1080 } else { 1081 /* 1082 * Non-VNIC destination, send via the underlying 1083 * NIC. In order to avoid a recursive call 1084 * to this function, we ensured that mtp points 1085 * to the unerlying NIC transmit function 1086 * by inilizating through mac_vnic_tx_get(). 1087 */ 1088 mp_chain = mtp->mt_fn(mtp->mt_arg, mp); 1089 if (mp_chain != NULL) 1090 break; 1091 } 1092 } 1093 1094 if ((mp_chain != NULL) && (extra_mp != NULL)) { 1095 ASSERT(mp_chain->b_next == NULL); 1096 mp_chain->b_next = extra_mp; 1097 } 1098 return (mp_chain); 1099 } 1100 1101 /* 1102 * VNIC transmit function. 1103 */ 1104 mblk_t * 1105 vnic_m_tx(void *arg, mblk_t *mp_chain) 1106 { 1107 vnic_t *vnic = arg; 1108 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 1109 mblk_t *mp, *extra_mp = NULL; 1110 vnic_flow_t *flow_ent; 1111 void *flow_cookie; 1112 1113 /* 1114 * Update stats. 1115 */ 1116 for (mp = mp_chain; mp != NULL; mp = mp->b_next) { 1117 vnic->vn_stat_opackets++; 1118 vnic->vn_stat_obytes += msgdsize(mp); 1119 } 1120 1121 for (mp = mp_chain; mp != NULL; mp = extra_mp) { 1122 mblk_t *next; 1123 1124 next = mp->b_next; 1125 mp->b_next = NULL; 1126 1127 vnic_promisc_rx(vnic->vn_vnic_mac, vnic, mp); 1128 1129 flow_ent = vnic_classify(vnic->vn_vnic_mac, mp, &extra_mp); 1130 ASSERT(extra_mp == NULL); 1131 extra_mp = next; 1132 1133 if (flow_ent != NULL) { 1134 flow_cookie = vnic_classifier_get_client_cookie( 1135 flow_ent); 1136 if (flow_cookie != NULL) { 1137 /* 1138 * The vnic_bcast_send function expects 1139 * to receive the sender VNIC as value 1140 * for arg2. 1141 */ 1142 vnic_bcast_send(flow_cookie, vnic, mp); 1143 } else { 1144 /* 1145 * loopback the packet to a local VNIC or 1146 * an active MAC client. 1147 */ 1148 vnic_local_tx(vnic_mac, flow_ent, mp); 1149 } 1150 VNIC_FLOW_REFRELE(flow_ent); 1151 mp_chain = NULL; 1152 } else { 1153 /* 1154 * Non-local destination, send via the underlying 1155 * NIC. 1156 */ 1157 const mac_txinfo_t *mtp = vnic->vn_txinfo; 1158 mp_chain = mtp->mt_fn(mtp->mt_arg, mp); 1159 if (mp_chain != NULL) 1160 break; 1161 } 1162 } 1163 1164 /* update stats to account for unsent packets */ 1165 for (mp = mp_chain; mp != NULL; mp = mp->b_next) { 1166 vnic->vn_stat_opackets--; 1167 vnic->vn_stat_obytes -= msgdsize(mp); 1168 vnic->vn_stat_oerrors++; 1169 /* 1170 * link back in the last portion not counted due to bandwidth 1171 * control. 1172 */ 1173 if (mp->b_next == NULL) { 1174 mp->b_next = extra_mp; 1175 break; 1176 } 1177 } 1178 1179 return (mp_chain); 1180 } 1181 1182 /* ARGSUSED */ 1183 static void 1184 vnic_m_resources(void *arg) 1185 { 1186 /* no resources to advertise */ 1187 } 1188 1189 static int 1190 vnic_m_stat(void *arg, uint_t stat, uint64_t *val) 1191 { 1192 vnic_t *vnic = arg; 1193 int rval = 0; 1194 1195 rw_enter(&vnic_lock, RW_READER); 1196 1197 switch (stat) { 1198 case ETHER_STAT_LINK_DUPLEX: 1199 *val = mac_stat_get(vnic->vn_vnic_mac->va_mh, 1200 ETHER_STAT_LINK_DUPLEX); 1201 break; 1202 case MAC_STAT_IFSPEED: 1203 *val = mac_stat_get(vnic->vn_vnic_mac->va_mh, 1204 MAC_STAT_IFSPEED); 1205 break; 1206 case MAC_STAT_MULTIRCV: 1207 *val = vnic->vn_stat_multircv; 1208 break; 1209 case MAC_STAT_BRDCSTRCV: 1210 *val = vnic->vn_stat_brdcstrcv; 1211 break; 1212 case MAC_STAT_MULTIXMT: 1213 *val = vnic->vn_stat_multixmt; 1214 break; 1215 case MAC_STAT_BRDCSTXMT: 1216 *val = vnic->vn_stat_brdcstxmt; 1217 break; 1218 case MAC_STAT_IERRORS: 1219 *val = vnic->vn_stat_ierrors; 1220 break; 1221 case MAC_STAT_OERRORS: 1222 *val = vnic->vn_stat_oerrors; 1223 break; 1224 case MAC_STAT_RBYTES: 1225 *val = vnic->vn_stat_rbytes; 1226 break; 1227 case MAC_STAT_IPACKETS: 1228 *val = vnic->vn_stat_ipackets; 1229 break; 1230 case MAC_STAT_OBYTES: 1231 *val = vnic->vn_stat_obytes; 1232 break; 1233 case MAC_STAT_OPACKETS: 1234 *val = vnic->vn_stat_opackets; 1235 break; 1236 default: 1237 rval = ENOTSUP; 1238 } 1239 1240 rw_exit(&vnic_lock); 1241 return (rval); 1242 } 1243 1244 /* 1245 * Return information about the specified capability. 1246 */ 1247 /* ARGSUSED */ 1248 static boolean_t 1249 vnic_m_capab_get(void *arg, mac_capab_t cap, void *cap_data) 1250 { 1251 vnic_t *vnic = arg; 1252 1253 switch (cap) { 1254 case MAC_CAPAB_POLL: 1255 return (B_TRUE); 1256 case MAC_CAPAB_HCKSUM: { 1257 uint32_t *hcksum_txflags = cap_data; 1258 1259 *hcksum_txflags = vnic->vn_hcksum_txflags & 1260 (HCKSUM_INET_FULL_V4 | HCKSUM_IPHDRCKSUM | 1261 HCKSUM_INET_PARTIAL); 1262 break; 1263 } 1264 default: 1265 return (B_FALSE); 1266 } 1267 return (B_TRUE); 1268 } 1269 1270 static int 1271 vnic_m_start(void *arg) 1272 { 1273 vnic_t *vnic = arg; 1274 mac_handle_t lower_mh = vnic->vn_vnic_mac->va_mh; 1275 int rc; 1276 1277 rc = mac_start(lower_mh); 1278 if (rc != 0) 1279 return (rc); 1280 1281 vnic_classifier_flow_update_fn(vnic->vn_flow_ent, vnic_rx, vnic, vnic); 1282 return (0); 1283 } 1284 1285 static void 1286 vnic_m_stop(void *arg) 1287 { 1288 vnic_t *vnic = arg; 1289 mac_handle_t lower_mh = vnic->vn_vnic_mac->va_mh; 1290 1291 vnic_classifier_flow_update_fn(vnic->vn_flow_ent, vnic_rx_initial, 1292 vnic, vnic); 1293 mac_stop(lower_mh); 1294 } 1295 1296 /* ARGSUSED */ 1297 static int 1298 vnic_m_promisc(void *arg, boolean_t on) 1299 { 1300 vnic_t *vnic = arg; 1301 1302 return (vnic_promisc_set(vnic, on)); 1303 } 1304 1305 static int 1306 vnic_m_multicst(void *arg, boolean_t add, const uint8_t *addrp) 1307 { 1308 vnic_t *vnic = arg; 1309 int rc = 0; 1310 1311 if (add) 1312 rc = vnic_bcast_add(vnic, addrp, MAC_ADDRTYPE_MULTICAST); 1313 else 1314 vnic_bcast_delete(vnic, addrp); 1315 1316 return (rc); 1317 } 1318 1319 static int 1320 vnic_m_unicst(void *arg, const uint8_t *mac_addr) 1321 { 1322 vnic_t *vnic = arg; 1323 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 1324 int rv; 1325 1326 rw_enter(&vnic_lock, RW_WRITER); 1327 rv = vnic_modify_mac_addr(vnic, vnic_mac->va_addr_len, 1328 (uchar_t *)mac_addr); 1329 rw_exit(&vnic_lock); 1330 1331 if (rv == 0) 1332 mac_unicst_update(vnic->vn_mh, mac_addr); 1333 return (0); 1334 } 1335 1336 int 1337 vnic_info(uint_t *nvnics, uint32_t vnic_id, char *dev_name, void *fn_arg, 1338 vnic_info_new_vnic_fn_t new_vnic_fn) 1339 { 1340 vnic_info_state_t state; 1341 int rc = 0; 1342 1343 rw_enter(&vnic_lock, RW_READER); 1344 1345 *nvnics = vnic_count; 1346 1347 bzero(&state, sizeof (state)); 1348 state.vs_vnic_id = vnic_id; 1349 bcopy(state.vs_dev_name, dev_name, MAXNAMELEN); 1350 state.vs_new_vnic_fn = new_vnic_fn; 1351 state.vs_fn_arg = fn_arg; 1352 1353 mod_hash_walk(vnic_hash, vnic_info_walker, &state); 1354 1355 if ((rc = state.vs_rc) == 0 && vnic_id != 0 && 1356 state.vs_vnic_found) 1357 rc = ENOENT; 1358 1359 rw_exit(&vnic_lock); 1360 return (rc); 1361 } 1362 1363 /* 1364 * Walker invoked when building a list of vnics that must be passed 1365 * up to user space. 1366 */ 1367 /*ARGSUSED*/ 1368 static uint_t 1369 vnic_info_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 1370 { 1371 vnic_t *vnic; 1372 vnic_info_state_t *state = arg; 1373 1374 if (state->vs_rc != 0) 1375 return (MH_WALK_TERMINATE); /* terminate walk */ 1376 1377 vnic = (vnic_t *)val; 1378 1379 if (state->vs_vnic_id != 0 && vnic->vn_id != state->vs_vnic_id) 1380 goto bail; 1381 1382 state->vs_vnic_found = B_TRUE; 1383 1384 state->vs_rc = state->vs_new_vnic_fn(state->vs_fn_arg, 1385 vnic->vn_id, vnic->vn_addr_type, vnic->vn_vnic_mac->va_addr_len, 1386 vnic->vn_addr, vnic->vn_vnic_mac->va_dev_name); 1387 bail: 1388 return ((state->vs_rc == 0) ? MH_WALK_CONTINUE : MH_WALK_TERMINATE); 1389 } 1390 1391 /* 1392 * vnic_notify_cb() and vnic_notify_walker() below are used to 1393 * process events received from an underlying NIC and, if needed, 1394 * forward these events to the VNICs defined on top of that NIC. 1395 */ 1396 1397 typedef struct vnic_notify_state { 1398 mac_notify_type_t vo_type; 1399 vnic_mac_t *vo_vnic_mac; 1400 } vnic_notify_state_t; 1401 1402 /* ARGSUSED */ 1403 static uint_t 1404 vnic_notify_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 1405 { 1406 vnic_t *vnic = (vnic_t *)val; 1407 vnic_notify_state_t *state = arg; 1408 1409 /* ignore VNICs that don't use the specified underlying MAC */ 1410 if (vnic->vn_vnic_mac != state->vo_vnic_mac) 1411 return (MH_WALK_CONTINUE); 1412 1413 switch (state->vo_type) { 1414 case MAC_NOTE_TX: 1415 mac_tx_update(vnic->vn_mh); 1416 break; 1417 case MAC_NOTE_LINK: 1418 /* 1419 * The VNIC link state must be up regardless of 1420 * the link state of the underlying NIC to maintain 1421 * connectivity between VNICs on the same host. 1422 */ 1423 mac_link_update(vnic->vn_mh, LINK_STATE_UP); 1424 break; 1425 case MAC_NOTE_UNICST: 1426 vnic_update_active_rx(vnic->vn_vnic_mac); 1427 break; 1428 case MAC_NOTE_VNIC: 1429 /* only for clients which share a NIC with a VNIC */ 1430 break; 1431 case MAC_NOTE_PROMISC: 1432 mutex_enter(&vnic_mac_lock); 1433 vnic->vn_vnic_mac->va_txinfo = mac_vnic_tx_get( 1434 vnic->vn_vnic_mac->va_mh); 1435 mutex_exit(&vnic_mac_lock); 1436 break; 1437 } 1438 1439 return (MH_WALK_CONTINUE); 1440 } 1441 1442 static void 1443 vnic_notify_cb(void *arg, mac_notify_type_t type) 1444 { 1445 vnic_mac_t *vnic = arg; 1446 vnic_notify_state_t state; 1447 1448 state.vo_type = type; 1449 state.vo_vnic_mac = vnic; 1450 1451 rw_enter(&vnic_lock, RW_READER); 1452 mod_hash_walk(vnic_hash, vnic_notify_walker, &state); 1453 rw_exit(&vnic_lock); 1454 } 1455 1456 static int 1457 vnic_modify_mac_addr(vnic_t *vnic, uint_t mac_len, uchar_t *mac_addr) 1458 { 1459 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 1460 vnic_flow_t *vnic_flow = vnic->vn_flow_ent; 1461 1462 ASSERT(RW_WRITE_HELD(&vnic_lock)); 1463 1464 if (mac_len != vnic_mac->va_addr_len) 1465 return (EINVAL); 1466 1467 vnic_classifier_flow_update_addr(vnic_flow, mac_addr); 1468 return (0); 1469 } 1470 1471 static int 1472 vnic_promisc_set(vnic_t *vnic, boolean_t on) 1473 { 1474 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 1475 int r = -1; 1476 1477 if (vnic->vn_promisc == on) 1478 return (0); 1479 1480 if (on) { 1481 r = mac_promisc_set(vnic_mac->va_mh, B_TRUE, MAC_DEVPROMISC); 1482 if (r != 0) 1483 return (r); 1484 1485 rw_enter(&vnic_mac->va_promisc_lock, RW_WRITER); 1486 vnic->vn_promisc_next = vnic_mac->va_promisc; 1487 vnic_mac->va_promisc = vnic; 1488 vnic_mac->va_promisc_gen++; 1489 1490 vnic->vn_promisc = B_TRUE; 1491 rw_exit(&vnic_mac->va_promisc_lock); 1492 1493 return (0); 1494 } else { 1495 vnic_t *loop, *prev = NULL; 1496 1497 rw_enter(&vnic_mac->va_promisc_lock, RW_WRITER); 1498 loop = vnic_mac->va_promisc; 1499 1500 while ((loop != NULL) && (loop != vnic)) { 1501 prev = loop; 1502 loop = loop->vn_promisc_next; 1503 } 1504 1505 if ((loop != NULL) && 1506 ((r = mac_promisc_set(vnic_mac->va_mh, B_FALSE, 1507 MAC_DEVPROMISC)) == 0)) { 1508 if (prev != NULL) 1509 prev->vn_promisc_next = loop->vn_promisc_next; 1510 else 1511 vnic_mac->va_promisc = loop->vn_promisc_next; 1512 vnic_mac->va_promisc_gen++; 1513 1514 vnic->vn_promisc = B_FALSE; 1515 } 1516 rw_exit(&vnic_mac->va_promisc_lock); 1517 1518 return (r); 1519 } 1520 } 1521 1522 void 1523 vnic_promisc_rx(vnic_mac_t *vnic_mac, vnic_t *sender, mblk_t *mp) 1524 { 1525 vnic_t *loop; 1526 vnic_flow_t *flow; 1527 const vnic_flow_fn_info_t *fn_info; 1528 mac_header_info_t hdr_info; 1529 boolean_t dst_must_match = B_TRUE; 1530 1531 ASSERT(mp->b_next == NULL); 1532 1533 rw_enter(&vnic_mac->va_promisc_lock, RW_READER); 1534 if (vnic_mac->va_promisc == NULL) 1535 goto done; 1536 1537 if (mac_header_info(vnic_mac->va_mh, mp, &hdr_info) != 0) 1538 goto done; 1539 1540 /* 1541 * If this is broadcast or multicast then the destination 1542 * address need not match for us to deliver it. 1543 */ 1544 if ((hdr_info.mhi_dsttype == MAC_ADDRTYPE_BROADCAST) || 1545 (hdr_info.mhi_dsttype == MAC_ADDRTYPE_MULTICAST)) 1546 dst_must_match = B_FALSE; 1547 1548 for (loop = vnic_mac->va_promisc; 1549 loop != NULL; 1550 loop = loop->vn_promisc_next) { 1551 if (loop == sender) 1552 continue; 1553 1554 if (dst_must_match && 1555 (bcmp(hdr_info.mhi_daddr, loop->vn_addr, 1556 sizeof (loop->vn_addr)) != 0)) 1557 continue; 1558 1559 flow = loop->vn_flow_ent; 1560 ASSERT(flow != NULL); 1561 1562 if (!flow->vf_is_active) { 1563 mblk_t *copy; 1564 uint64_t gen; 1565 1566 if ((copy = vnic_copymsg_cksum(mp)) == NULL) 1567 break; 1568 if ((sender != NULL) && 1569 ((copy = vnic_fix_cksum(copy)) == NULL)) 1570 break; 1571 1572 VNIC_FLOW_REFHOLD(flow); 1573 gen = vnic_mac->va_promisc_gen; 1574 rw_exit(&vnic_mac->va_promisc_lock); 1575 1576 fn_info = vnic_classifier_get_fn_info(flow); 1577 (fn_info->ff_fn)(fn_info->ff_arg1, 1578 fn_info->ff_arg2, copy); 1579 1580 VNIC_FLOW_REFRELE(flow); 1581 rw_enter(&vnic_mac->va_promisc_lock, RW_READER); 1582 if (vnic_mac->va_promisc_gen != gen) 1583 break; 1584 } 1585 } 1586 done: 1587 rw_exit(&vnic_mac->va_promisc_lock); 1588 } 1589