1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/sysmacros.h> 30 #include <sys/conf.h> 31 #include <sys/cmn_err.h> 32 #include <sys/list.h> 33 #include <sys/ksynch.h> 34 #include <sys/kmem.h> 35 #include <sys/stream.h> 36 #include <sys/modctl.h> 37 #include <sys/ddi.h> 38 #include <sys/sunddi.h> 39 #include <sys/atomic.h> 40 #include <sys/stat.h> 41 #include <sys/modhash.h> 42 #include <sys/strsubr.h> 43 #include <sys/strsun.h> 44 #include <sys/dlpi.h> 45 #include <sys/mac.h> 46 #include <sys/mac_ether.h> 47 #include <sys/pattr.h> 48 #if 0 49 #include <sys/vlan.h> 50 #endif 51 #include <sys/vnic.h> 52 #include <sys/vnic_impl.h> 53 #include <sys/gld.h> 54 #include <inet/ip.h> 55 #include <inet/ip_impl.h> 56 57 static int vnic_m_start(void *); 58 static void vnic_m_stop(void *); 59 static int vnic_m_promisc(void *, boolean_t); 60 static int vnic_m_multicst(void *, boolean_t, const uint8_t *); 61 static int vnic_m_unicst(void *, const uint8_t *); 62 static int vnic_m_stat(void *, uint_t, uint64_t *); 63 static void vnic_m_resources(void *); 64 static mblk_t *vnic_m_tx(void *, mblk_t *); 65 static boolean_t vnic_m_capab_get(void *, mac_capab_t, void *); 66 static void vnic_mac_free(vnic_mac_t *); 67 static uint_t vnic_info_walker(mod_hash_key_t, mod_hash_val_t *, void *); 68 static void vnic_notify_cb(void *, mac_notify_type_t); 69 static int vnic_modify_mac_addr(vnic_t *, uint_t, uchar_t *); 70 static mblk_t *vnic_active_tx(void *, mblk_t *); 71 static int vnic_promisc_set(vnic_t *, boolean_t); 72 73 static kmem_cache_t *vnic_cache; 74 static kmem_cache_t *vnic_mac_cache; 75 static krwlock_t vnic_lock; 76 static kmutex_t vnic_mac_lock; 77 static uint_t vnic_count; 78 79 /* hash of VNICs (vnic_t's), keyed by VNIC id */ 80 static mod_hash_t *vnic_hash; 81 #define VNIC_HASHSZ 64 82 #define VNIC_HASH_KEY(vnic_id) ((mod_hash_key_t)(uintptr_t)vnic_id) 83 84 /* 85 * Hash of underlying open MACs (vnic_mac_t's), keyed by the string 86 * "<device name><instance number>/<port number>". 87 */ 88 static mod_hash_t *vnic_mac_hash; 89 #define VNIC_MAC_HASHSZ 64 90 91 #define VNIC_MAC_REFHOLD(va) { \ 92 ASSERT(MUTEX_HELD(&vnic_mac_lock)); \ 93 (va)->va_refs++; \ 94 ASSERT((va)->va_refs != 0); \ 95 } 96 97 #define VNIC_MAC_REFRELE(va) { \ 98 ASSERT(MUTEX_HELD(&vnic_mac_lock)); \ 99 ASSERT((va)->va_refs != 0); \ 100 if (--((va)->va_refs) == 0) \ 101 vnic_mac_free(va); \ 102 } 103 104 static uchar_t vnic_brdcst_mac[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 105 106 /* used by vnic_walker */ 107 typedef struct vnic_info_state { 108 uint32_t vs_vnic_id; 109 char vs_dev_name[MAXNAMELEN]; 110 boolean_t vs_vnic_found; 111 vnic_info_new_vnic_fn_t vs_new_vnic_fn; 112 void *vs_fn_arg; 113 int vs_rc; 114 } vnic_info_state_t; 115 116 #define VNIC_M_CALLBACK_FLAGS (MC_RESOURCES | MC_GETCAPAB) 117 118 static mac_callbacks_t vnic_m_callbacks = { 119 VNIC_M_CALLBACK_FLAGS, 120 vnic_m_stat, 121 vnic_m_start, 122 vnic_m_stop, 123 vnic_m_promisc, 124 vnic_m_multicst, 125 vnic_m_unicst, 126 vnic_m_tx, 127 vnic_m_resources, 128 NULL, /* m_ioctl */ 129 vnic_m_capab_get 130 }; 131 132 /* ARGSUSED */ 133 static int 134 vnic_mac_ctor(void *buf, void *arg, int kmflag) 135 { 136 vnic_mac_t *vnic_mac = buf; 137 138 bzero(vnic_mac, sizeof (vnic_mac_t)); 139 rw_init(&vnic_mac->va_bcast_grp_lock, NULL, RW_DRIVER, NULL); 140 rw_init(&vnic_mac->va_promisc_lock, NULL, RW_DRIVER, NULL); 141 142 return (0); 143 } 144 145 /* ARGSUSED */ 146 static void 147 vnic_mac_dtor(void *buf, void *arg) 148 { 149 vnic_mac_t *vnic_mac = buf; 150 151 rw_destroy(&vnic_mac->va_promisc_lock); 152 rw_destroy(&vnic_mac->va_bcast_grp_lock); 153 } 154 155 void 156 vnic_dev_init(void) 157 { 158 vnic_cache = kmem_cache_create("vnic_cache", 159 sizeof (vnic_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 160 161 vnic_mac_cache = kmem_cache_create("vnic_mac_cache", 162 sizeof (vnic_mac_t), 0, vnic_mac_ctor, vnic_mac_dtor, 163 NULL, NULL, NULL, 0); 164 165 vnic_hash = mod_hash_create_idhash("vnic_hash", 166 VNIC_HASHSZ, mod_hash_null_valdtor); 167 168 vnic_mac_hash = mod_hash_create_strhash("vnic_mac_hash", 169 VNIC_MAC_HASHSZ, mod_hash_null_valdtor); 170 171 rw_init(&vnic_lock, NULL, RW_DEFAULT, NULL); 172 173 mutex_init(&vnic_mac_lock, NULL, MUTEX_DEFAULT, NULL); 174 175 vnic_count = 0; 176 } 177 178 void 179 vnic_dev_fini(void) 180 { 181 ASSERT(vnic_count == 0); 182 183 mutex_destroy(&vnic_mac_lock); 184 rw_destroy(&vnic_lock); 185 mod_hash_destroy_strhash(vnic_mac_hash); 186 mod_hash_destroy_idhash(vnic_hash); 187 kmem_cache_destroy(vnic_mac_cache); 188 kmem_cache_destroy(vnic_cache); 189 } 190 191 uint_t 192 vnic_dev_count(void) 193 { 194 return (vnic_count); 195 } 196 197 static int 198 vnic_mac_open(const char *dev_name, vnic_mac_t **vmp) 199 { 200 char *str_key; 201 int err; 202 vnic_mac_t *vnic_mac = NULL; 203 const mac_info_t *mip; 204 205 *vmp = NULL; 206 207 mutex_enter(&vnic_mac_lock); 208 209 err = mod_hash_find(vnic_mac_hash, (mod_hash_key_t)dev_name, 210 (mod_hash_val_t *)&vnic_mac); 211 if (err == 0) { 212 /* this MAC is already opened, increment reference count */ 213 VNIC_MAC_REFHOLD(vnic_mac); 214 mutex_exit(&vnic_mac_lock); 215 *vmp = vnic_mac; 216 return (0); 217 } 218 219 vnic_mac = kmem_cache_alloc(vnic_mac_cache, KM_SLEEP); 220 221 if ((err = mac_open(dev_name, &vnic_mac->va_mh)) != 0) { 222 vnic_mac->va_mh = NULL; 223 goto bail; 224 } 225 226 /* only ethernet support, for now */ 227 mip = mac_info(vnic_mac->va_mh); 228 if (mip->mi_media != DL_ETHER) { 229 err = ENOTSUP; 230 goto bail; 231 } 232 if (mip->mi_media != mip->mi_nativemedia) { 233 err = ENOTSUP; 234 goto bail; 235 } 236 237 (void) strcpy(vnic_mac->va_dev_name, dev_name); 238 239 /* add entry to hash table */ 240 str_key = kmem_alloc(strlen(dev_name) + 1, KM_SLEEP); 241 (void) strcpy(str_key, dev_name); 242 err = mod_hash_insert(vnic_mac_hash, (mod_hash_key_t)str_key, 243 (mod_hash_val_t)vnic_mac); 244 ASSERT(err == 0); 245 246 /* initialize the flow table associated with lower MAC */ 247 vnic_mac->va_addr_len = ETHERADDRL; 248 (void) vnic_classifier_flow_tab_init(vnic_mac, vnic_mac->va_addr_len, 249 KM_SLEEP); 250 251 vnic_mac->va_txinfo = mac_vnic_tx_get(vnic_mac->va_mh); 252 vnic_mac->va_notify_hdl = mac_notify_add(vnic_mac->va_mh, 253 vnic_notify_cb, vnic_mac); 254 255 VNIC_MAC_REFHOLD(vnic_mac); 256 *vmp = vnic_mac; 257 mutex_exit(&vnic_mac_lock); 258 return (0); 259 260 bail: 261 if (vnic_mac != NULL) { 262 if (vnic_mac->va_mh != NULL) 263 mac_close(vnic_mac->va_mh); 264 kmem_cache_free(vnic_mac_cache, vnic_mac); 265 } 266 mutex_exit(&vnic_mac_lock); 267 return (err); 268 } 269 270 /* 271 * Create a new flow for the active MAC client sharing the NIC 272 * with the VNICs. This allows the unicast packets for that NIC 273 * to be classified and passed up to the active MAC client. It 274 * also allows packets sent from a VNIC to the active link to 275 * be classified by the VNIC transmit function and delivered via 276 * the MAC module locally. Returns B_TRUE on success, B_FALSE on 277 * failure. 278 */ 279 static int 280 vnic_init_active_rx(vnic_mac_t *vnic_mac) 281 { 282 uchar_t nic_mac_addr[MAXMACADDRLEN]; 283 284 if (vnic_mac->va_active_flow != NULL) 285 return (B_TRUE); 286 287 mac_unicst_get(vnic_mac->va_mh, nic_mac_addr); 288 289 vnic_mac->va_active_flow = vnic_classifier_flow_create( 290 vnic_mac->va_addr_len, nic_mac_addr, NULL, B_TRUE, KM_SLEEP); 291 292 vnic_classifier_flow_add(vnic_mac, vnic_mac->va_active_flow, 293 (vnic_rx_fn_t)mac_active_rx, vnic_mac->va_mh, NULL); 294 return (B_TRUE); 295 } 296 297 static void 298 vnic_fini_active_rx(vnic_mac_t *vnic_mac) 299 { 300 if (vnic_mac->va_active_flow == NULL) 301 return; 302 303 vnic_classifier_flow_remove(vnic_mac, vnic_mac->va_active_flow); 304 vnic_classifier_flow_destroy(vnic_mac->va_active_flow); 305 vnic_mac->va_active_flow = NULL; 306 } 307 308 static void 309 vnic_update_active_rx(vnic_mac_t *vnic_mac) 310 { 311 if (vnic_mac->va_active_flow == NULL) 312 return; 313 314 vnic_fini_active_rx(vnic_mac); 315 (void) vnic_init_active_rx(vnic_mac); 316 } 317 318 /* 319 * Copy an mblk, preserving its hardware checksum flags. 320 */ 321 mblk_t * 322 vnic_copymsg_cksum(mblk_t *mp) 323 { 324 mblk_t *mp1; 325 uint32_t start, stuff, end, value, flags; 326 327 mp1 = copymsg(mp); 328 if (mp1 == NULL) 329 return (NULL); 330 331 hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags); 332 (void) hcksum_assoc(mp1, NULL, NULL, start, stuff, end, value, 333 flags, KM_NOSLEEP); 334 335 return (mp1); 336 } 337 338 /* 339 * Copy an mblk chain, presenting the hardware checksum flags of the 340 * individual mblks. 341 */ 342 mblk_t * 343 vnic_copymsgchain_cksum(mblk_t *mp) 344 { 345 mblk_t *nmp = NULL; 346 mblk_t **nmpp = &nmp; 347 348 for (; mp != NULL; mp = mp->b_next) { 349 if ((*nmpp = vnic_copymsg_cksum(mp)) == NULL) { 350 freemsgchain(nmp); 351 return (NULL); 352 } 353 354 nmpp = &((*nmpp)->b_next); 355 } 356 357 return (nmp); 358 } 359 360 361 /* 362 * Process the specified mblk chain for proper handling of hardware 363 * checksum offload. This routine is invoked for loopback VNIC traffic. 364 * The function handles a NULL mblk chain passed as argument. 365 */ 366 mblk_t * 367 vnic_fix_cksum(mblk_t *mp_chain) 368 { 369 mblk_t *mp, *prev = NULL, *new_chain = mp_chain, *mp1; 370 uint32_t flags, start, stuff, end, value; 371 372 for (mp = mp_chain; mp != NULL; prev = mp, mp = mp->b_next) { 373 uint16_t len; 374 uint32_t offset; 375 struct ether_header *ehp; 376 uint16_t sap; 377 378 hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, 379 &flags); 380 if (flags == 0) 381 continue; 382 383 /* 384 * Since the processing of checksum offload for loopback 385 * traffic requires modification of the packet contents, 386 * ensure sure that we are always modifying our own copy. 387 */ 388 if (DB_REF(mp) > 1) { 389 mp1 = copymsg(mp); 390 if (mp1 == NULL) 391 continue; 392 mp1->b_next = mp->b_next; 393 mp->b_next = NULL; 394 freemsg(mp); 395 if (prev != NULL) 396 prev->b_next = mp1; 397 else 398 new_chain = mp1; 399 mp = mp1; 400 } 401 402 /* 403 * Ethernet, and optionally VLAN header. 404 */ 405 /*LINTED*/ 406 ehp = (struct ether_header *)mp->b_rptr; 407 if (ntohs(ehp->ether_type) == VLAN_TPID) { 408 struct ether_vlan_header *evhp; 409 410 ASSERT(MBLKL(mp) >= 411 sizeof (struct ether_vlan_header)); 412 /*LINTED*/ 413 evhp = (struct ether_vlan_header *)mp->b_rptr; 414 sap = ntohs(evhp->ether_type); 415 offset = sizeof (struct ether_vlan_header); 416 } else { 417 sap = ntohs(ehp->ether_type); 418 offset = sizeof (struct ether_header); 419 } 420 421 if (MBLKL(mp) <= offset) { 422 offset -= MBLKL(mp); 423 if (mp->b_cont == NULL) { 424 /* corrupted packet, skip it */ 425 if (prev != NULL) 426 prev->b_next = mp->b_next; 427 else 428 new_chain = mp->b_next; 429 mp1 = mp->b_next; 430 mp->b_next = NULL; 431 freemsg(mp); 432 mp = mp1; 433 continue; 434 } 435 mp = mp->b_cont; 436 } 437 438 if (flags & (HCK_FULLCKSUM | HCK_IPV4_HDRCKSUM)) { 439 ipha_t *ipha = NULL; 440 441 /* 442 * In order to compute the full and header 443 * checksums, we need to find and parse 444 * the IP and/or ULP headers. 445 */ 446 447 sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap; 448 449 /* 450 * IP header. 451 */ 452 if (sap != ETHERTYPE_IP) 453 continue; 454 455 ASSERT(MBLKL(mp) >= offset + sizeof (ipha_t)); 456 /*LINTED*/ 457 ipha = (ipha_t *)(mp->b_rptr + offset); 458 459 if (flags & HCK_FULLCKSUM) { 460 ipaddr_t src, dst; 461 uint32_t cksum; 462 uint16_t *up; 463 uint8_t proto; 464 465 /* 466 * Pointer to checksum field in ULP header. 467 */ 468 proto = ipha->ipha_protocol; 469 ASSERT(ipha->ipha_version_and_hdr_length == 470 IP_SIMPLE_HDR_VERSION); 471 if (proto == IPPROTO_TCP) { 472 /*LINTED*/ 473 up = IPH_TCPH_CHECKSUMP(ipha, 474 IP_SIMPLE_HDR_LENGTH); 475 } else { 476 ASSERT(proto == IPPROTO_UDP); 477 /*LINTED*/ 478 up = IPH_UDPH_CHECKSUMP(ipha, 479 IP_SIMPLE_HDR_LENGTH); 480 } 481 482 /* 483 * Pseudo-header checksum. 484 */ 485 src = ipha->ipha_src; 486 dst = ipha->ipha_dst; 487 len = ntohs(ipha->ipha_length) - 488 IP_SIMPLE_HDR_LENGTH; 489 490 cksum = (dst >> 16) + (dst & 0xFFFF) + 491 (src >> 16) + (src & 0xFFFF); 492 cksum += htons(len); 493 494 /* 495 * The checksum value stored in the packet needs 496 * to be correct. Compute it here. 497 */ 498 *up = 0; 499 cksum += (((proto) == IPPROTO_UDP) ? 500 IP_UDP_CSUM_COMP : IP_TCP_CSUM_COMP); 501 cksum = IP_CSUM(mp, IP_SIMPLE_HDR_LENGTH + 502 offset, cksum); 503 *(up) = (uint16_t)(cksum ? cksum : ~cksum); 504 505 flags |= HCK_FULLCKSUM_OK; 506 value = 0xffff; 507 } 508 509 if (flags & HCK_IPV4_HDRCKSUM) { 510 ASSERT(ipha != NULL); 511 ipha->ipha_hdr_checksum = 512 (uint16_t)ip_csum_hdr(ipha); 513 } 514 } 515 516 if (flags & HCK_PARTIALCKSUM) { 517 uint16_t *up, partial, cksum; 518 uchar_t *ipp; /* ptr to beginning of IP header */ 519 520 if (mp->b_cont != NULL) { 521 mblk_t *mp1; 522 523 mp1 = msgpullup(mp, offset + end); 524 if (mp1 == NULL) 525 continue; 526 mp1->b_next = mp->b_next; 527 mp->b_next = NULL; 528 freemsg(mp); 529 if (prev != NULL) 530 prev->b_next = mp1; 531 else 532 new_chain = mp1; 533 mp = mp1; 534 } 535 536 ipp = mp->b_rptr + offset; 537 /*LINTED*/ 538 up = (uint16_t *)((uchar_t *)ipp + stuff); 539 partial = *up; 540 *up = 0; 541 542 cksum = IP_BCSUM_PARTIAL(mp->b_rptr + offset + start, 543 end - start, partial); 544 cksum = ~cksum; 545 *up = cksum ? cksum : ~cksum; 546 547 /* 548 * Since we already computed the whole checksum, 549 * indicate to the stack that it has already 550 * been verified by the hardware. 551 */ 552 flags &= ~HCK_PARTIALCKSUM; 553 flags |= (HCK_FULLCKSUM | HCK_FULLCKSUM_OK); 554 value = 0xffff; 555 } 556 557 (void) hcksum_assoc(mp, NULL, NULL, start, stuff, end, 558 value, flags, KM_NOSLEEP); 559 } 560 561 return (new_chain); 562 } 563 564 static void 565 vnic_mac_close(vnic_mac_t *vnic_mac) 566 { 567 mutex_enter(&vnic_mac_lock); 568 VNIC_MAC_REFRELE(vnic_mac); 569 mutex_exit(&vnic_mac_lock); 570 } 571 572 static void 573 vnic_mac_free(vnic_mac_t *vnic_mac) 574 { 575 mod_hash_val_t val; 576 577 ASSERT(MUTEX_HELD(&vnic_mac_lock)); 578 vnic_fini_active_rx(vnic_mac); 579 mac_notify_remove(vnic_mac->va_mh, vnic_mac->va_notify_hdl); 580 if (vnic_mac->va_mac_set) { 581 vnic_mac->va_mac_set = B_FALSE; 582 mac_vnic_clear(vnic_mac->va_mh); 583 } 584 vnic_classifier_flow_tab_fini(vnic_mac); 585 mac_close(vnic_mac->va_mh); 586 587 (void) mod_hash_remove(vnic_mac_hash, 588 (mod_hash_key_t)vnic_mac->va_dev_name, &val); 589 ASSERT(vnic_mac == (vnic_mac_t *)val); 590 591 kmem_cache_free(vnic_mac_cache, vnic_mac); 592 } 593 594 /* 595 * Initial VNIC receive routine. Invoked for packets that are steered 596 * to a VNIC but the VNIC has not been started yet. 597 */ 598 /* ARGSUSED */ 599 static void 600 vnic_rx_initial(void *arg1, void *arg2, mblk_t *mp_chain) 601 { 602 vnic_t *vnic = arg1; 603 mblk_t *mp; 604 605 /* update stats */ 606 for (mp = mp_chain; mp != NULL; mp = mp->b_next) 607 vnic->vn_stat_ierrors++; 608 freemsgchain(mp_chain); 609 } 610 611 /* 612 * VNIC receive routine invoked after the classifier for the VNIC 613 * has been initialized and the VNIC has been started. 614 */ 615 /* ARGSUSED */ 616 void 617 vnic_rx(void *arg1, void *arg2, mblk_t *mp_chain) 618 { 619 vnic_t *vnic = arg1; 620 mblk_t *mp; 621 622 /* update stats */ 623 for (mp = mp_chain; mp != NULL; mp = mp->b_next) { 624 vnic->vn_stat_ipackets++; 625 vnic->vn_stat_rbytes += msgdsize(mp); 626 } 627 628 /* pass packet up */ 629 mac_rx(vnic->vn_mh, NULL, mp_chain); 630 } 631 632 /* 633 * Routine to create a MAC-based VNIC. Adds the passed MAC address 634 * to an unused slot in the NIC if one is available. Otherwise it 635 * sets the NIC in promiscuous mode and assigns the MAC address to 636 * a Rx ring if available or a soft ring. 637 */ 638 static int 639 vnic_add_unicstaddr(vnic_t *vnic, mac_multi_addr_t *maddr) 640 { 641 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 642 int err; 643 644 if (mac_unicst_verify(vnic_mac->va_mh, maddr->mma_addr, 645 maddr->mma_addrlen) == B_FALSE) 646 return (EINVAL); 647 648 if (mac_vnic_capab_get(vnic_mac->va_mh, MAC_CAPAB_MULTIADDRESS, 649 &(vnic->vn_mma_capab))) { 650 if (vnic->vn_maddr_naddrfree == 0) { 651 /* 652 * No free address slots available. 653 * Enable promiscuous mode. 654 */ 655 goto set_promisc; 656 } 657 658 err = vnic->vn_maddr_add(vnic->vn_maddr_handle, maddr); 659 if (err != 0) { 660 if (err == ENOSPC) { 661 /* 662 * There was a race to add addresses 663 * with other multiple address consumers, 664 * and we lost out. Use promisc mode. 665 */ 666 goto set_promisc; 667 } 668 669 return (err); 670 } 671 672 vnic->vn_slot_id = maddr->mma_slot; 673 vnic->vn_multi_mac = B_TRUE; 674 } else { 675 /* 676 * Either multiple MAC address support is not 677 * available or all available addresses have 678 * been used up. 679 */ 680 set_promisc: 681 err = mac_promisc_set(vnic_mac->va_mh, B_TRUE, MAC_DEVPROMISC); 682 if (err != 0) { 683 return (err); 684 } 685 686 vnic->vn_promisc_mac = B_TRUE; 687 } 688 return (err); 689 } 690 691 /* 692 * VNIC is getting deleted. Remove the MAC address from the slot. 693 * If promiscuous mode was being used, then unset the promiscuous mode. 694 */ 695 static int 696 vnic_remove_unicstaddr(vnic_t *vnic) 697 { 698 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 699 int err; 700 701 if (vnic->vn_multi_mac) { 702 ASSERT(vnic->vn_promisc_mac == B_FALSE); 703 err = vnic->vn_maddr_remove(vnic->vn_maddr_handle, 704 vnic->vn_slot_id); 705 vnic->vn_multi_mac = B_FALSE; 706 } 707 708 if (vnic->vn_promisc_mac) { 709 ASSERT(vnic->vn_multi_mac == B_FALSE); 710 err = mac_promisc_set(vnic_mac->va_mh, B_FALSE, MAC_DEVPROMISC); 711 vnic->vn_promisc_mac = B_FALSE; 712 } 713 714 return (err); 715 } 716 717 /* 718 * Create a new VNIC upon request from administrator. 719 * Returns 0 on success, an errno on failure. 720 */ 721 int 722 vnic_dev_create(uint_t vnic_id, char *dev_name, int mac_len, uchar_t *mac_addr) 723 { 724 vnic_t *vnic = NULL; 725 mac_register_t *mac; 726 int err; 727 vnic_mac_t *vnic_mac; 728 const mac_info_t *lower_mac_info; 729 mac_multi_addr_t maddr; 730 mac_txinfo_t tx_info; 731 732 if (mac_len != ETHERADDRL) { 733 /* currently only ethernet NICs are supported */ 734 return (EINVAL); 735 } 736 737 rw_enter(&vnic_lock, RW_WRITER); 738 739 /* does a VNIC with the same id already exist? */ 740 err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id), 741 (mod_hash_val_t *)&vnic); 742 if (err == 0) { 743 rw_exit(&vnic_lock); 744 return (EEXIST); 745 } 746 747 vnic = kmem_cache_alloc(vnic_cache, KM_NOSLEEP); 748 if (vnic == NULL) { 749 rw_exit(&vnic_lock); 750 return (ENOMEM); 751 } 752 753 /* open underlying MAC */ 754 err = vnic_mac_open(dev_name, &vnic_mac); 755 if (err != 0) { 756 kmem_cache_free(vnic_cache, vnic); 757 rw_exit(&vnic_lock); 758 return (err); 759 } 760 761 bzero(vnic, sizeof (*vnic)); 762 vnic->vn_id = vnic_id; 763 vnic->vn_vnic_mac = vnic_mac; 764 765 vnic->vn_started = B_FALSE; 766 vnic->vn_promisc = B_FALSE; 767 vnic->vn_multi_mac = B_FALSE; 768 vnic->vn_bcast_grp = B_FALSE; 769 770 /* set the VNIC MAC address */ 771 maddr.mma_addrlen = mac_len; 772 maddr.mma_slot = 0; 773 maddr.mma_flags = 0; 774 bcopy(mac_addr, maddr.mma_addr, mac_len); 775 if ((err = vnic_add_unicstaddr(vnic, &maddr)) != 0) 776 goto bail; 777 bcopy(mac_addr, vnic->vn_addr, mac_len); 778 779 /* set the initial VNIC capabilities */ 780 if (!mac_vnic_capab_get(vnic_mac->va_mh, MAC_CAPAB_HCKSUM, 781 &vnic->vn_hcksum_txflags)) 782 vnic->vn_hcksum_txflags = 0; 783 784 /* register with the MAC module */ 785 if ((mac = mac_alloc(MAC_VERSION)) == NULL) 786 goto bail; 787 788 mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 789 mac->m_driver = vnic; 790 mac->m_dip = vnic_get_dip(); 791 mac->m_instance = vnic_id; 792 mac->m_src_addr = vnic->vn_addr; 793 mac->m_callbacks = &vnic_m_callbacks; 794 795 lower_mac_info = mac_info(vnic_mac->va_mh); 796 mac->m_min_sdu = lower_mac_info->mi_sdu_min; 797 mac->m_max_sdu = lower_mac_info->mi_sdu_max; 798 799 err = mac_register(mac, &vnic->vn_mh); 800 mac_free(mac); 801 if (err != 0) 802 goto bail; 803 804 /* add new VNIC to hash table */ 805 err = mod_hash_insert(vnic_hash, VNIC_HASH_KEY(vnic_id), 806 (mod_hash_val_t)vnic); 807 ASSERT(err == 0); 808 vnic_count++; 809 810 rw_exit(&vnic_lock); 811 812 /* Create a flow, initialized with the MAC address of the VNIC */ 813 if ((vnic->vn_flow_ent = vnic_classifier_flow_create(mac_len, mac_addr, 814 NULL, B_FALSE, KM_SLEEP)) == NULL) { 815 (void) vnic_dev_delete(vnic_id); 816 vnic = NULL; 817 err = ENOMEM; 818 goto bail_unlocked; 819 } 820 821 vnic_classifier_flow_add(vnic_mac, vnic->vn_flow_ent, vnic_rx_initial, 822 vnic, vnic); 823 824 /* setup VNIC to receive broadcast packets */ 825 err = vnic_bcast_add(vnic, vnic_brdcst_mac, MAC_ADDRTYPE_BROADCAST); 826 if (err != 0) { 827 (void) vnic_dev_delete(vnic_id); 828 vnic = NULL; 829 goto bail_unlocked; 830 } 831 vnic->vn_bcast_grp = B_TRUE; 832 833 mutex_enter(&vnic_mac_lock); 834 if (!vnic_mac->va_mac_set) { 835 /* 836 * We want to MAC layer to call the VNIC tx outbound 837 * routine, so that local broadcast packets sent by 838 * the active interface sharing the underlying NIC (if 839 * any), can be broadcast to every VNIC. 840 */ 841 tx_info.mt_fn = vnic_active_tx; 842 tx_info.mt_arg = vnic_mac; 843 if (!mac_vnic_set(vnic_mac->va_mh, &tx_info, 844 vnic_m_capab_get, vnic)) { 845 mutex_exit(&vnic_mac_lock); 846 (void) vnic_dev_delete(vnic_id); 847 vnic = NULL; 848 err = EBUSY; 849 goto bail_unlocked; 850 } 851 vnic_mac->va_mac_set = B_TRUE; 852 } 853 mutex_exit(&vnic_mac_lock); 854 855 /* allow passing packets to NIC's active MAC client */ 856 if (!vnic_init_active_rx(vnic_mac)) { 857 (void) vnic_dev_delete(vnic_id); 858 vnic = NULL; 859 err = ENOMEM; 860 goto bail_unlocked; 861 } 862 863 return (0); 864 865 bail: 866 (void) vnic_remove_unicstaddr(vnic); 867 vnic_mac_close(vnic_mac); 868 rw_exit(&vnic_lock); 869 870 bail_unlocked: 871 if (vnic != NULL) { 872 kmem_cache_free(vnic_cache, vnic); 873 } 874 875 return (err); 876 } 877 878 /* 879 * Modify the properties of an existing VNIC. 880 */ 881 /* ARGSUSED */ 882 int 883 vnic_dev_modify(uint_t vnic_id, uint_t modify_mask, 884 vnic_mac_addr_type_t mac_addr_type, uint_t mac_len, uchar_t *mac_addr) 885 { 886 vnic_t *vnic = NULL; 887 int rv = 0; 888 boolean_t notify_mac_addr = B_FALSE; 889 890 rw_enter(&vnic_lock, RW_WRITER); 891 892 if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id), 893 (mod_hash_val_t *)&vnic) != 0) { 894 rw_exit(&vnic_lock); 895 return (ENOENT); 896 } 897 898 if (modify_mask & VNIC_IOC_MODIFY_ADDR) { 899 rv = vnic_modify_mac_addr(vnic, mac_len, mac_addr); 900 if (rv == 0) 901 notify_mac_addr = B_TRUE; 902 } 903 904 rw_exit(&vnic_lock); 905 906 if (notify_mac_addr) 907 mac_unicst_update(vnic->vn_mh, mac_addr); 908 909 return (rv); 910 } 911 912 int 913 vnic_dev_delete(uint_t vnic_id) 914 { 915 vnic_t *vnic = NULL; 916 mod_hash_val_t val; 917 vnic_flow_t *flent; 918 int rc; 919 vnic_mac_t *vnic_mac; 920 921 rw_enter(&vnic_lock, RW_WRITER); 922 923 if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id), 924 (mod_hash_val_t *)&vnic) != 0) { 925 rw_exit(&vnic_lock); 926 return (ENOENT); 927 } 928 929 /* 930 * We cannot unregister the MAC yet. Unregistering would 931 * free up mac_impl_t which should not happen at this time. 932 * Packets could be entering vnic_rx() through the 933 * flow entry and so mac_impl_t cannot be NULL. So disable 934 * mac_impl_t by calling mac_disable(). This will prevent any 935 * new claims on mac_impl_t. 936 */ 937 if (mac_disable(vnic->vn_mh) != 0) { 938 rw_exit(&vnic_lock); 939 return (EBUSY); 940 } 941 942 (void) mod_hash_remove(vnic_hash, VNIC_HASH_KEY(vnic_id), &val); 943 ASSERT(vnic == (vnic_t *)val); 944 945 if (vnic->vn_bcast_grp) 946 (void) vnic_bcast_delete(vnic, vnic_brdcst_mac); 947 948 flent = vnic->vn_flow_ent; 949 if (flent != NULL) { 950 /* 951 * vnic_classifier_flow_destroy() ensures that the 952 * flow is no longer used. 953 */ 954 vnic_classifier_flow_remove(vnic->vn_vnic_mac, flent); 955 vnic_classifier_flow_destroy(flent); 956 } 957 958 rc = mac_unregister(vnic->vn_mh); 959 ASSERT(rc == 0); 960 (void) vnic_remove_unicstaddr(vnic); 961 vnic_mac = vnic->vn_vnic_mac; 962 kmem_cache_free(vnic_cache, vnic); 963 vnic_count--; 964 rw_exit(&vnic_lock); 965 vnic_mac_close(vnic_mac); 966 return (0); 967 } 968 969 /* 970 * For the specified packet chain, return a sub-chain to be sent 971 * and the transmit function to be used to send the packet. Also 972 * return a pointer to the sub-chain of packets that should 973 * be re-classified. If the function returns NULL, the packet 974 * should be sent using the underlying NIC. 975 */ 976 static vnic_flow_t * 977 vnic_classify(vnic_mac_t *vnic_mac, mblk_t *mp, mblk_t **mp_chain_rest) 978 { 979 vnic_flow_t *flow_ent; 980 981 /* one packet at a time */ 982 *mp_chain_rest = mp->b_next; 983 mp->b_next = NULL; 984 985 /* do classification on the packet */ 986 flow_ent = vnic_classifier_get_flow(vnic_mac, mp); 987 988 return (flow_ent); 989 } 990 991 /* 992 * Send a packet chain to a local VNIC or an active MAC client. 993 */ 994 static void 995 vnic_local_tx(vnic_mac_t *vnic_mac, vnic_flow_t *flow_ent, mblk_t *mp_chain) 996 { 997 mblk_t *mp1; 998 const vnic_flow_fn_info_t *fn_info; 999 vnic_t *vnic; 1000 1001 if (!vnic_classifier_is_active(flow_ent) && 1002 mac_promisc_get(vnic_mac->va_mh, MAC_PROMISC)) { 1003 /* 1004 * If the MAC is in promiscous mode, 1005 * send a copy of the active client. 1006 */ 1007 if ((mp1 = vnic_copymsgchain_cksum(mp_chain)) == NULL) 1008 goto sendit; 1009 if ((mp1 = vnic_fix_cksum(mp1)) == NULL) 1010 goto sendit; 1011 mac_active_rx(vnic_mac->va_mh, NULL, mp1); 1012 } 1013 sendit: 1014 fn_info = vnic_classifier_get_fn_info(flow_ent); 1015 /* 1016 * If the vnic to which we would deliver this packet is in 1017 * promiscuous mode then it already received the packet via 1018 * vnic_promisc_rx(). 1019 * 1020 * XXX assumes that ff_arg2 is a vnic_t pointer if it is 1021 * non-NULL (currently always true). 1022 */ 1023 vnic = (vnic_t *)fn_info->ff_arg2; 1024 if ((vnic != NULL) && vnic->vn_promisc) 1025 freemsg(mp_chain); 1026 else if ((mp1 = vnic_fix_cksum(mp_chain)) != NULL) 1027 (fn_info->ff_fn)(fn_info->ff_arg1, fn_info->ff_arg2, mp1); 1028 } 1029 1030 /* 1031 * This function is invoked when a MAC client needs to send a packet 1032 * to a NIC which is shared by VNICs. It is passed to the MAC layer 1033 * by a call to mac_vnic_set() when the NIC is opened, and is returned 1034 * to MAC clients by mac_tx_get() when VNICs are present. 1035 */ 1036 mblk_t * 1037 vnic_active_tx(void *arg, mblk_t *mp_chain) 1038 { 1039 vnic_mac_t *vnic_mac = arg; 1040 mblk_t *mp, *extra_mp = NULL; 1041 vnic_flow_t *flow_ent; 1042 void *flow_cookie; 1043 const mac_txinfo_t *mtp = vnic_mac->va_txinfo; 1044 1045 for (mp = mp_chain; mp != NULL; mp = extra_mp) { 1046 mblk_t *next; 1047 1048 next = mp->b_next; 1049 mp->b_next = NULL; 1050 1051 vnic_promisc_rx(vnic_mac, (vnic_t *)-1, mp); 1052 1053 flow_ent = vnic_classify(vnic_mac, mp, &extra_mp); 1054 ASSERT(extra_mp == NULL); 1055 extra_mp = next; 1056 1057 if (flow_ent != NULL) { 1058 flow_cookie = vnic_classifier_get_client_cookie( 1059 flow_ent); 1060 if (flow_cookie != NULL) { 1061 /* 1062 * Send a copy to every VNIC defined on the 1063 * interface, as well as the underlying MAC. 1064 */ 1065 vnic_bcast_send(flow_cookie, (vnic_t *)-1, mp); 1066 } else { 1067 /* 1068 * loopback the packet to a local VNIC or 1069 * an active MAC client. 1070 */ 1071 vnic_local_tx(vnic_mac, flow_ent, mp); 1072 } 1073 VNIC_FLOW_REFRELE(flow_ent); 1074 mp_chain = NULL; 1075 } else { 1076 /* 1077 * Non-VNIC destination, send via the underlying 1078 * NIC. In order to avoid a recursive call 1079 * to this function, we ensured that mtp points 1080 * to the unerlying NIC transmit function 1081 * by inilizating through mac_vnic_tx_get(). 1082 */ 1083 mp_chain = mtp->mt_fn(mtp->mt_arg, mp); 1084 if (mp_chain != NULL) 1085 break; 1086 } 1087 } 1088 1089 if ((mp_chain != NULL) && (extra_mp != NULL)) { 1090 ASSERT(mp_chain->b_next == NULL); 1091 mp_chain->b_next = extra_mp; 1092 } 1093 return (mp_chain); 1094 } 1095 1096 /* 1097 * VNIC transmit function. 1098 */ 1099 mblk_t * 1100 vnic_m_tx(void *arg, mblk_t *mp_chain) 1101 { 1102 vnic_t *vnic = arg; 1103 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 1104 mblk_t *mp, *extra_mp = NULL; 1105 vnic_flow_t *flow_ent; 1106 void *flow_cookie; 1107 1108 /* 1109 * Update stats. 1110 */ 1111 for (mp = mp_chain; mp != NULL; mp = mp->b_next) { 1112 vnic->vn_stat_opackets++; 1113 vnic->vn_stat_obytes += msgdsize(mp); 1114 } 1115 1116 for (mp = mp_chain; mp != NULL; mp = extra_mp) { 1117 mblk_t *next; 1118 1119 next = mp->b_next; 1120 mp->b_next = NULL; 1121 1122 vnic_promisc_rx(vnic->vn_vnic_mac, vnic, mp); 1123 1124 flow_ent = vnic_classify(vnic->vn_vnic_mac, mp, &extra_mp); 1125 ASSERT(extra_mp == NULL); 1126 extra_mp = next; 1127 1128 if (flow_ent != NULL) { 1129 flow_cookie = vnic_classifier_get_client_cookie( 1130 flow_ent); 1131 if (flow_cookie != NULL) { 1132 /* 1133 * The vnic_bcast_send function expects 1134 * to receive the sender VNIC as value 1135 * for arg2. 1136 */ 1137 vnic_bcast_send(flow_cookie, vnic, mp); 1138 } else { 1139 /* 1140 * loopback the packet to a local VNIC or 1141 * an active MAC client. 1142 */ 1143 vnic_local_tx(vnic_mac, flow_ent, mp); 1144 } 1145 VNIC_FLOW_REFRELE(flow_ent); 1146 mp_chain = NULL; 1147 } else { 1148 /* 1149 * Non-local destination, send via the underlying 1150 * NIC. 1151 */ 1152 const mac_txinfo_t *mtp = vnic->vn_txinfo; 1153 mp_chain = mtp->mt_fn(mtp->mt_arg, mp); 1154 if (mp_chain != NULL) 1155 break; 1156 } 1157 } 1158 1159 /* update stats to account for unsent packets */ 1160 for (mp = mp_chain; mp != NULL; mp = mp->b_next) { 1161 vnic->vn_stat_opackets--; 1162 vnic->vn_stat_obytes -= msgdsize(mp); 1163 vnic->vn_stat_oerrors++; 1164 /* 1165 * link back in the last portion not counted due to bandwidth 1166 * control. 1167 */ 1168 if (mp->b_next == NULL) { 1169 mp->b_next = extra_mp; 1170 break; 1171 } 1172 } 1173 1174 return (mp_chain); 1175 } 1176 1177 /* ARGSUSED */ 1178 static void 1179 vnic_m_resources(void *arg) 1180 { 1181 /* no resources to advertise */ 1182 } 1183 1184 static int 1185 vnic_m_stat(void *arg, uint_t stat, uint64_t *val) 1186 { 1187 vnic_t *vnic = arg; 1188 int rval = 0; 1189 1190 rw_enter(&vnic_lock, RW_READER); 1191 1192 switch (stat) { 1193 case ETHER_STAT_LINK_DUPLEX: 1194 *val = mac_stat_get(vnic->vn_vnic_mac->va_mh, 1195 ETHER_STAT_LINK_DUPLEX); 1196 break; 1197 case MAC_STAT_IFSPEED: 1198 *val = mac_stat_get(vnic->vn_vnic_mac->va_mh, 1199 MAC_STAT_IFSPEED); 1200 break; 1201 case MAC_STAT_MULTIRCV: 1202 *val = vnic->vn_stat_multircv; 1203 break; 1204 case MAC_STAT_BRDCSTRCV: 1205 *val = vnic->vn_stat_brdcstrcv; 1206 break; 1207 case MAC_STAT_MULTIXMT: 1208 *val = vnic->vn_stat_multixmt; 1209 break; 1210 case MAC_STAT_BRDCSTXMT: 1211 *val = vnic->vn_stat_brdcstxmt; 1212 break; 1213 case MAC_STAT_IERRORS: 1214 *val = vnic->vn_stat_ierrors; 1215 break; 1216 case MAC_STAT_OERRORS: 1217 *val = vnic->vn_stat_oerrors; 1218 break; 1219 case MAC_STAT_RBYTES: 1220 *val = vnic->vn_stat_rbytes; 1221 break; 1222 case MAC_STAT_IPACKETS: 1223 *val = vnic->vn_stat_ipackets; 1224 break; 1225 case MAC_STAT_OBYTES: 1226 *val = vnic->vn_stat_obytes; 1227 break; 1228 case MAC_STAT_OPACKETS: 1229 *val = vnic->vn_stat_opackets; 1230 break; 1231 default: 1232 rval = ENOTSUP; 1233 } 1234 1235 rw_exit(&vnic_lock); 1236 return (rval); 1237 } 1238 1239 /* 1240 * Return information about the specified capability. 1241 */ 1242 /* ARGSUSED */ 1243 static boolean_t 1244 vnic_m_capab_get(void *arg, mac_capab_t cap, void *cap_data) 1245 { 1246 vnic_t *vnic = arg; 1247 1248 switch (cap) { 1249 case MAC_CAPAB_POLL: 1250 return (B_TRUE); 1251 case MAC_CAPAB_HCKSUM: { 1252 uint32_t *hcksum_txflags = cap_data; 1253 1254 *hcksum_txflags = vnic->vn_hcksum_txflags & 1255 (HCKSUM_INET_FULL_V4 | HCKSUM_IPHDRCKSUM | 1256 HCKSUM_INET_PARTIAL); 1257 break; 1258 } 1259 default: 1260 return (B_FALSE); 1261 } 1262 return (B_TRUE); 1263 } 1264 1265 static int 1266 vnic_m_start(void *arg) 1267 { 1268 vnic_t *vnic = arg; 1269 mac_handle_t lower_mh = vnic->vn_vnic_mac->va_mh; 1270 int rc; 1271 1272 rc = mac_start(lower_mh); 1273 if (rc != 0) 1274 return (rc); 1275 1276 vnic_classifier_flow_update_fn(vnic->vn_flow_ent, vnic_rx, vnic, vnic); 1277 return (0); 1278 } 1279 1280 static void 1281 vnic_m_stop(void *arg) 1282 { 1283 vnic_t *vnic = arg; 1284 mac_handle_t lower_mh = vnic->vn_vnic_mac->va_mh; 1285 1286 vnic_classifier_flow_update_fn(vnic->vn_flow_ent, vnic_rx_initial, 1287 vnic, vnic); 1288 mac_stop(lower_mh); 1289 } 1290 1291 /* ARGSUSED */ 1292 static int 1293 vnic_m_promisc(void *arg, boolean_t on) 1294 { 1295 vnic_t *vnic = arg; 1296 1297 return (vnic_promisc_set(vnic, on)); 1298 } 1299 1300 static int 1301 vnic_m_multicst(void *arg, boolean_t add, const uint8_t *addrp) 1302 { 1303 vnic_t *vnic = arg; 1304 int rc = 0; 1305 1306 if (add) 1307 rc = vnic_bcast_add(vnic, addrp, MAC_ADDRTYPE_MULTICAST); 1308 else 1309 vnic_bcast_delete(vnic, addrp); 1310 1311 return (rc); 1312 } 1313 1314 static int 1315 vnic_m_unicst(void *arg, const uint8_t *mac_addr) 1316 { 1317 vnic_t *vnic = arg; 1318 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 1319 int rv; 1320 1321 rw_enter(&vnic_lock, RW_WRITER); 1322 rv = vnic_modify_mac_addr(vnic, vnic_mac->va_addr_len, 1323 (uchar_t *)mac_addr); 1324 rw_exit(&vnic_lock); 1325 1326 if (rv == 0) 1327 mac_unicst_update(vnic->vn_mh, mac_addr); 1328 return (0); 1329 } 1330 1331 int 1332 vnic_info(uint_t *nvnics, uint32_t vnic_id, char *dev_name, void *fn_arg, 1333 vnic_info_new_vnic_fn_t new_vnic_fn) 1334 { 1335 vnic_info_state_t state; 1336 int rc = 0; 1337 1338 rw_enter(&vnic_lock, RW_READER); 1339 1340 *nvnics = vnic_count; 1341 1342 bzero(&state, sizeof (state)); 1343 state.vs_vnic_id = vnic_id; 1344 bcopy(state.vs_dev_name, dev_name, MAXNAMELEN); 1345 state.vs_new_vnic_fn = new_vnic_fn; 1346 state.vs_fn_arg = fn_arg; 1347 1348 mod_hash_walk(vnic_hash, vnic_info_walker, &state); 1349 1350 if ((rc = state.vs_rc) == 0 && vnic_id != 0 && 1351 !state.vs_vnic_found) 1352 rc = ENOENT; 1353 1354 rw_exit(&vnic_lock); 1355 return (rc); 1356 } 1357 1358 /* 1359 * Walker invoked when building a list of vnics that must be passed 1360 * up to user space. 1361 */ 1362 /*ARGSUSED*/ 1363 static uint_t 1364 vnic_info_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 1365 { 1366 vnic_t *vnic; 1367 vnic_info_state_t *state = arg; 1368 1369 if (state->vs_rc != 0) 1370 return (MH_WALK_TERMINATE); /* terminate walk */ 1371 1372 vnic = (vnic_t *)val; 1373 1374 if (state->vs_vnic_id != 0 && vnic->vn_id != state->vs_vnic_id) 1375 goto bail; 1376 1377 state->vs_vnic_found = B_TRUE; 1378 1379 state->vs_rc = state->vs_new_vnic_fn(state->vs_fn_arg, 1380 vnic->vn_id, vnic->vn_addr_type, vnic->vn_vnic_mac->va_addr_len, 1381 vnic->vn_addr, vnic->vn_vnic_mac->va_dev_name); 1382 bail: 1383 return ((state->vs_rc == 0) ? MH_WALK_CONTINUE : MH_WALK_TERMINATE); 1384 } 1385 1386 /* 1387 * vnic_notify_cb() and vnic_notify_walker() below are used to 1388 * process events received from an underlying NIC and, if needed, 1389 * forward these events to the VNICs defined on top of that NIC. 1390 */ 1391 1392 typedef struct vnic_notify_state { 1393 mac_notify_type_t vo_type; 1394 vnic_mac_t *vo_vnic_mac; 1395 } vnic_notify_state_t; 1396 1397 /* ARGSUSED */ 1398 static uint_t 1399 vnic_notify_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 1400 { 1401 vnic_t *vnic = (vnic_t *)val; 1402 vnic_notify_state_t *state = arg; 1403 1404 /* ignore VNICs that don't use the specified underlying MAC */ 1405 if (vnic->vn_vnic_mac != state->vo_vnic_mac) 1406 return (MH_WALK_CONTINUE); 1407 1408 switch (state->vo_type) { 1409 case MAC_NOTE_TX: 1410 mac_tx_update(vnic->vn_mh); 1411 break; 1412 case MAC_NOTE_LINK: 1413 /* 1414 * The VNIC link state must be up regardless of 1415 * the link state of the underlying NIC to maintain 1416 * connectivity between VNICs on the same host. 1417 */ 1418 mac_link_update(vnic->vn_mh, LINK_STATE_UP); 1419 break; 1420 case MAC_NOTE_UNICST: 1421 vnic_update_active_rx(vnic->vn_vnic_mac); 1422 break; 1423 case MAC_NOTE_VNIC: 1424 /* only for clients which share a NIC with a VNIC */ 1425 break; 1426 case MAC_NOTE_PROMISC: 1427 mutex_enter(&vnic_mac_lock); 1428 vnic->vn_vnic_mac->va_txinfo = mac_vnic_tx_get( 1429 vnic->vn_vnic_mac->va_mh); 1430 mutex_exit(&vnic_mac_lock); 1431 break; 1432 } 1433 1434 return (MH_WALK_CONTINUE); 1435 } 1436 1437 static void 1438 vnic_notify_cb(void *arg, mac_notify_type_t type) 1439 { 1440 vnic_mac_t *vnic = arg; 1441 vnic_notify_state_t state; 1442 1443 state.vo_type = type; 1444 state.vo_vnic_mac = vnic; 1445 1446 rw_enter(&vnic_lock, RW_READER); 1447 mod_hash_walk(vnic_hash, vnic_notify_walker, &state); 1448 rw_exit(&vnic_lock); 1449 } 1450 1451 static int 1452 vnic_modify_mac_addr(vnic_t *vnic, uint_t mac_len, uchar_t *mac_addr) 1453 { 1454 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 1455 vnic_flow_t *vnic_flow = vnic->vn_flow_ent; 1456 1457 ASSERT(RW_WRITE_HELD(&vnic_lock)); 1458 1459 if (mac_len != vnic_mac->va_addr_len) 1460 return (EINVAL); 1461 1462 vnic_classifier_flow_update_addr(vnic_flow, mac_addr); 1463 return (0); 1464 } 1465 1466 static int 1467 vnic_promisc_set(vnic_t *vnic, boolean_t on) 1468 { 1469 vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 1470 int r = -1; 1471 1472 if (vnic->vn_promisc == on) 1473 return (0); 1474 1475 if (on) { 1476 r = mac_promisc_set(vnic_mac->va_mh, B_TRUE, MAC_DEVPROMISC); 1477 if (r != 0) 1478 return (r); 1479 1480 rw_enter(&vnic_mac->va_promisc_lock, RW_WRITER); 1481 vnic->vn_promisc_next = vnic_mac->va_promisc; 1482 vnic_mac->va_promisc = vnic; 1483 vnic_mac->va_promisc_gen++; 1484 1485 vnic->vn_promisc = B_TRUE; 1486 rw_exit(&vnic_mac->va_promisc_lock); 1487 1488 return (0); 1489 } else { 1490 vnic_t *loop, *prev = NULL; 1491 1492 rw_enter(&vnic_mac->va_promisc_lock, RW_WRITER); 1493 loop = vnic_mac->va_promisc; 1494 1495 while ((loop != NULL) && (loop != vnic)) { 1496 prev = loop; 1497 loop = loop->vn_promisc_next; 1498 } 1499 1500 if ((loop != NULL) && 1501 ((r = mac_promisc_set(vnic_mac->va_mh, B_FALSE, 1502 MAC_DEVPROMISC)) == 0)) { 1503 if (prev != NULL) 1504 prev->vn_promisc_next = loop->vn_promisc_next; 1505 else 1506 vnic_mac->va_promisc = loop->vn_promisc_next; 1507 vnic_mac->va_promisc_gen++; 1508 1509 vnic->vn_promisc = B_FALSE; 1510 } 1511 rw_exit(&vnic_mac->va_promisc_lock); 1512 1513 return (r); 1514 } 1515 } 1516 1517 void 1518 vnic_promisc_rx(vnic_mac_t *vnic_mac, vnic_t *sender, mblk_t *mp) 1519 { 1520 vnic_t *loop; 1521 vnic_flow_t *flow; 1522 const vnic_flow_fn_info_t *fn_info; 1523 mac_header_info_t hdr_info; 1524 boolean_t dst_must_match = B_TRUE; 1525 1526 ASSERT(mp->b_next == NULL); 1527 1528 rw_enter(&vnic_mac->va_promisc_lock, RW_READER); 1529 if (vnic_mac->va_promisc == NULL) 1530 goto done; 1531 1532 if (mac_header_info(vnic_mac->va_mh, mp, &hdr_info) != 0) 1533 goto done; 1534 1535 /* 1536 * If this is broadcast or multicast then the destination 1537 * address need not match for us to deliver it. 1538 */ 1539 if ((hdr_info.mhi_dsttype == MAC_ADDRTYPE_BROADCAST) || 1540 (hdr_info.mhi_dsttype == MAC_ADDRTYPE_MULTICAST)) 1541 dst_must_match = B_FALSE; 1542 1543 for (loop = vnic_mac->va_promisc; 1544 loop != NULL; 1545 loop = loop->vn_promisc_next) { 1546 if (loop == sender) 1547 continue; 1548 1549 if (dst_must_match && 1550 (bcmp(hdr_info.mhi_daddr, loop->vn_addr, 1551 sizeof (loop->vn_addr)) != 0)) 1552 continue; 1553 1554 flow = loop->vn_flow_ent; 1555 ASSERT(flow != NULL); 1556 1557 if (!flow->vf_is_active) { 1558 mblk_t *copy; 1559 uint64_t gen; 1560 1561 if ((copy = vnic_copymsg_cksum(mp)) == NULL) 1562 break; 1563 if ((sender != NULL) && 1564 ((copy = vnic_fix_cksum(copy)) == NULL)) 1565 break; 1566 1567 VNIC_FLOW_REFHOLD(flow); 1568 gen = vnic_mac->va_promisc_gen; 1569 rw_exit(&vnic_mac->va_promisc_lock); 1570 1571 fn_info = vnic_classifier_get_fn_info(flow); 1572 (fn_info->ff_fn)(fn_info->ff_arg1, 1573 fn_info->ff_arg2, copy); 1574 1575 VNIC_FLOW_REFRELE(flow); 1576 rw_enter(&vnic_mac->va_promisc_lock, RW_READER); 1577 if (vnic_mac->va_promisc_gen != gen) 1578 break; 1579 } 1580 } 1581 done: 1582 rw_exit(&vnic_mac->va_promisc_lock); 1583 } 1584