1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/errno.h> 29 #include <sys/param.h> 30 #include <sys/stream.h> 31 #include <sys/kmem.h> 32 #include <sys/conf.h> 33 #include <sys/devops.h> 34 #include <sys/ksynch.h> 35 #include <sys/stat.h> 36 #include <sys/modctl.h> 37 #include <sys/modhash.h> 38 #include <sys/debug.h> 39 #include <sys/ethernet.h> 40 #include <sys/dlpi.h> 41 #include <net/if.h> 42 #include <sys/mac.h> 43 #include <sys/mac_ether.h> 44 #include <sys/ddi.h> 45 #include <sys/sunddi.h> 46 #include <sys/strsun.h> 47 #include <sys/note.h> 48 #include <sys/atomic.h> 49 #include <sys/vnet.h> 50 #include <sys/vlan.h> 51 #include <sys/vnet_mailbox.h> 52 #include <sys/vnet_common.h> 53 #include <sys/dds.h> 54 #include <sys/strsubr.h> 55 #include <sys/taskq.h> 56 57 /* 58 * Function prototypes. 59 */ 60 61 /* DDI entrypoints */ 62 static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 63 static int vnetattach(dev_info_t *, ddi_attach_cmd_t); 64 static int vnetdetach(dev_info_t *, ddi_detach_cmd_t); 65 66 /* MAC entrypoints */ 67 static int vnet_m_stat(void *, uint_t, uint64_t *); 68 static int vnet_m_start(void *); 69 static void vnet_m_stop(void *); 70 static int vnet_m_promisc(void *, boolean_t); 71 static int vnet_m_multicst(void *, boolean_t, const uint8_t *); 72 static int vnet_m_unicst(void *, const uint8_t *); 73 mblk_t *vnet_m_tx(void *, mblk_t *); 74 75 /* vnet internal functions */ 76 static int vnet_mac_register(vnet_t *); 77 static int vnet_read_mac_address(vnet_t *vnetp); 78 79 /* Forwarding database (FDB) routines */ 80 static void vnet_fdb_create(vnet_t *vnetp); 81 static void vnet_fdb_destroy(vnet_t *vnetp); 82 static vnet_res_t *vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp); 83 static void vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val); 84 void vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp); 85 static void vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp); 86 87 static void vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp); 88 static void vnet_rx(vio_net_handle_t vrh, mblk_t *mp); 89 static void vnet_tx_update(vio_net_handle_t vrh); 90 static void vnet_res_start_task(void *arg); 91 static void vnet_start_resources(vnet_t *vnetp); 92 static void vnet_stop_resources(vnet_t *vnetp); 93 static void vnet_dispatch_res_task(vnet_t *vnetp); 94 static void vnet_res_start_task(void *arg); 95 static void vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err); 96 int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu); 97 98 /* Exported to to vnet_dds */ 99 int vnet_send_dds_msg(vnet_t *vnetp, void *dmsg); 100 101 /* Externs that are imported from vnet_gen */ 102 extern int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip, 103 const uint8_t *macaddr, void **vgenhdl); 104 extern int vgen_uninit(void *arg); 105 extern int vgen_dds_tx(void *arg, void *dmsg); 106 107 /* Externs that are imported from vnet_dds */ 108 extern void vdds_mod_init(void); 109 extern void vdds_mod_fini(void); 110 extern int vdds_init(vnet_t *vnetp); 111 extern void vdds_cleanup(vnet_t *vnetp); 112 extern void vdds_process_dds_msg(vnet_t *vnetp, vio_dds_msg_t *dmsg); 113 extern void vdds_cleanup_hybrid_res(void *arg); 114 115 #define VNET_FDBE_REFHOLD(p) \ 116 { \ 117 atomic_inc_32(&(p)->refcnt); \ 118 ASSERT((p)->refcnt != 0); \ 119 } 120 121 #define VNET_FDBE_REFRELE(p) \ 122 { \ 123 ASSERT((p)->refcnt != 0); \ 124 atomic_dec_32(&(p)->refcnt); \ 125 } 126 127 static mac_callbacks_t vnet_m_callbacks = { 128 0, 129 vnet_m_stat, 130 vnet_m_start, 131 vnet_m_stop, 132 vnet_m_promisc, 133 vnet_m_multicst, 134 vnet_m_unicst, 135 vnet_m_tx, 136 NULL, 137 NULL, 138 NULL 139 }; 140 141 /* 142 * Linked list of "vnet_t" structures - one per instance. 143 */ 144 static vnet_t *vnet_headp = NULL; 145 static krwlock_t vnet_rw; 146 147 /* Tunables */ 148 uint32_t vnet_ntxds = VNET_NTXDS; /* power of 2 transmit descriptors */ 149 uint32_t vnet_ldcwd_interval = VNET_LDCWD_INTERVAL; /* watchdog freq in msec */ 150 uint32_t vnet_ldcwd_txtimeout = VNET_LDCWD_TXTIMEOUT; /* tx timeout in msec */ 151 uint32_t vnet_ldc_mtu = VNET_LDC_MTU; /* ldc mtu */ 152 153 /* 154 * Set this to non-zero to enable additional internal receive buffer pools 155 * based on the MTU of the device for better performance at the cost of more 156 * memory consumption. This is turned off by default, to use allocb(9F) for 157 * receive buffer allocations of sizes > 2K. 158 */ 159 boolean_t vnet_jumbo_rxpools = B_FALSE; 160 161 /* # of chains in fdb hash table */ 162 uint32_t vnet_fdb_nchains = VNET_NFDB_HASH; 163 164 /* Internal tunables */ 165 uint32_t vnet_ethermtu = 1500; /* mtu of the device */ 166 167 /* 168 * Default vlan id. This is only used internally when the "default-vlan-id" 169 * property is not present in the MD device node. Therefore, this should not be 170 * used as a tunable; if this value is changed, the corresponding variable 171 * should be updated to the same value in vsw and also other vnets connected to 172 * the same vsw. 173 */ 174 uint16_t vnet_default_vlan_id = 1; 175 176 /* delay in usec to wait for all references on a fdb entry to be dropped */ 177 uint32_t vnet_fdbe_refcnt_delay = 10; 178 179 static struct ether_addr etherbroadcastaddr = { 180 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 181 }; 182 183 184 /* 185 * Property names 186 */ 187 static char macaddr_propname[] = "local-mac-address"; 188 189 /* 190 * This is the string displayed by modinfo(1m). 191 */ 192 static char vnet_ident[] = "vnet driver"; 193 extern struct mod_ops mod_driverops; 194 static struct cb_ops cb_vnetops = { 195 nulldev, /* cb_open */ 196 nulldev, /* cb_close */ 197 nodev, /* cb_strategy */ 198 nodev, /* cb_print */ 199 nodev, /* cb_dump */ 200 nodev, /* cb_read */ 201 nodev, /* cb_write */ 202 nodev, /* cb_ioctl */ 203 nodev, /* cb_devmap */ 204 nodev, /* cb_mmap */ 205 nodev, /* cb_segmap */ 206 nochpoll, /* cb_chpoll */ 207 ddi_prop_op, /* cb_prop_op */ 208 NULL, /* cb_stream */ 209 (int)(D_MP) /* cb_flag */ 210 }; 211 212 static struct dev_ops vnetops = { 213 DEVO_REV, /* devo_rev */ 214 0, /* devo_refcnt */ 215 NULL, /* devo_getinfo */ 216 nulldev, /* devo_identify */ 217 nulldev, /* devo_probe */ 218 vnetattach, /* devo_attach */ 219 vnetdetach, /* devo_detach */ 220 nodev, /* devo_reset */ 221 &cb_vnetops, /* devo_cb_ops */ 222 (struct bus_ops *)NULL, /* devo_bus_ops */ 223 NULL, /* devo_power */ 224 ddi_quiesce_not_supported, /* devo_quiesce */ 225 }; 226 227 static struct modldrv modldrv = { 228 &mod_driverops, /* Type of module. This one is a driver */ 229 vnet_ident, /* ID string */ 230 &vnetops /* driver specific ops */ 231 }; 232 233 static struct modlinkage modlinkage = { 234 MODREV_1, (void *)&modldrv, NULL 235 }; 236 237 #ifdef DEBUG 238 239 /* 240 * Print debug messages - set to 0xf to enable all msgs 241 */ 242 int vnet_dbglevel = 0x8; 243 244 static void 245 debug_printf(const char *fname, void *arg, const char *fmt, ...) 246 { 247 char buf[512]; 248 va_list ap; 249 vnet_t *vnetp = (vnet_t *)arg; 250 char *bufp = buf; 251 252 if (vnetp == NULL) { 253 (void) sprintf(bufp, "%s: ", fname); 254 bufp += strlen(bufp); 255 } else { 256 (void) sprintf(bufp, "vnet%d:%s: ", vnetp->instance, fname); 257 bufp += strlen(bufp); 258 } 259 va_start(ap, fmt); 260 (void) vsprintf(bufp, fmt, ap); 261 va_end(ap); 262 cmn_err(CE_CONT, "%s\n", buf); 263 } 264 265 #endif 266 267 /* _init(9E): initialize the loadable module */ 268 int 269 _init(void) 270 { 271 int status; 272 273 DBG1(NULL, "enter\n"); 274 275 mac_init_ops(&vnetops, "vnet"); 276 status = mod_install(&modlinkage); 277 if (status != 0) { 278 mac_fini_ops(&vnetops); 279 } 280 vdds_mod_init(); 281 DBG1(NULL, "exit(%d)\n", status); 282 return (status); 283 } 284 285 /* _fini(9E): prepare the module for unloading. */ 286 int 287 _fini(void) 288 { 289 int status; 290 291 DBG1(NULL, "enter\n"); 292 293 status = mod_remove(&modlinkage); 294 if (status != 0) 295 return (status); 296 mac_fini_ops(&vnetops); 297 vdds_mod_fini(); 298 299 DBG1(NULL, "exit(%d)\n", status); 300 return (status); 301 } 302 303 /* _info(9E): return information about the loadable module */ 304 int 305 _info(struct modinfo *modinfop) 306 { 307 return (mod_info(&modlinkage, modinfop)); 308 } 309 310 /* 311 * attach(9E): attach a device to the system. 312 * called once for each instance of the device on the system. 313 */ 314 static int 315 vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd) 316 { 317 vnet_t *vnetp; 318 int status; 319 int instance; 320 uint64_t reg; 321 char qname[TASKQ_NAMELEN]; 322 enum { AST_init = 0x0, AST_vnet_alloc = 0x1, 323 AST_mac_alloc = 0x2, AST_read_macaddr = 0x4, 324 AST_vgen_init = 0x8, AST_fdbh_alloc = 0x10, 325 AST_vdds_init = 0x20, AST_taskq_create = 0x40, 326 AST_vnet_list = 0x80 } attach_state; 327 328 attach_state = AST_init; 329 330 switch (cmd) { 331 case DDI_ATTACH: 332 break; 333 case DDI_RESUME: 334 case DDI_PM_RESUME: 335 default: 336 goto vnet_attach_fail; 337 } 338 339 instance = ddi_get_instance(dip); 340 DBG1(NULL, "instance(%d) enter\n", instance); 341 342 /* allocate vnet_t and mac_t structures */ 343 vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP); 344 vnetp->dip = dip; 345 vnetp->instance = instance; 346 rw_init(&vnetp->vrwlock, NULL, RW_DRIVER, NULL); 347 rw_init(&vnetp->vsw_fp_rw, NULL, RW_DRIVER, NULL); 348 attach_state |= AST_vnet_alloc; 349 350 status = vdds_init(vnetp); 351 if (status != 0) { 352 goto vnet_attach_fail; 353 } 354 attach_state |= AST_vdds_init; 355 356 /* setup links to vnet_t from both devinfo and mac_t */ 357 ddi_set_driver_private(dip, (caddr_t)vnetp); 358 359 /* read the mac address */ 360 status = vnet_read_mac_address(vnetp); 361 if (status != DDI_SUCCESS) { 362 goto vnet_attach_fail; 363 } 364 attach_state |= AST_read_macaddr; 365 366 reg = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 367 DDI_PROP_DONTPASS, "reg", -1); 368 if (reg == -1) { 369 goto vnet_attach_fail; 370 } 371 vnetp->reg = reg; 372 373 vnet_fdb_create(vnetp); 374 attach_state |= AST_fdbh_alloc; 375 376 (void) snprintf(qname, TASKQ_NAMELEN, "vnet_taskq%d", instance); 377 if ((vnetp->taskqp = ddi_taskq_create(dip, qname, 1, 378 TASKQ_DEFAULTPRI, 0)) == NULL) { 379 cmn_err(CE_WARN, "!vnet%d: Unable to create task queue", 380 instance); 381 goto vnet_attach_fail; 382 } 383 attach_state |= AST_taskq_create; 384 385 /* add to the list of vnet devices */ 386 WRITE_ENTER(&vnet_rw); 387 vnetp->nextp = vnet_headp; 388 vnet_headp = vnetp; 389 RW_EXIT(&vnet_rw); 390 391 attach_state |= AST_vnet_list; 392 393 /* 394 * Initialize the generic vnet plugin which provides 395 * communication via sun4v LDC (logical domain channel) based 396 * resources. It will register the LDC resources as and when 397 * they become available. 398 */ 399 status = vgen_init(vnetp, reg, vnetp->dip, 400 (uint8_t *)vnetp->curr_macaddr, &vnetp->vgenhdl); 401 if (status != DDI_SUCCESS) { 402 DERR(vnetp, "vgen_init() failed\n"); 403 goto vnet_attach_fail; 404 } 405 attach_state |= AST_vgen_init; 406 407 /* register with MAC layer */ 408 status = vnet_mac_register(vnetp); 409 if (status != DDI_SUCCESS) { 410 goto vnet_attach_fail; 411 } 412 413 DBG1(NULL, "instance(%d) exit\n", instance); 414 return (DDI_SUCCESS); 415 416 vnet_attach_fail: 417 418 if (attach_state & AST_vnet_list) { 419 vnet_t **vnetpp; 420 /* unlink from instance(vnet_t) list */ 421 WRITE_ENTER(&vnet_rw); 422 for (vnetpp = &vnet_headp; *vnetpp; 423 vnetpp = &(*vnetpp)->nextp) { 424 if (*vnetpp == vnetp) { 425 *vnetpp = vnetp->nextp; 426 break; 427 } 428 } 429 RW_EXIT(&vnet_rw); 430 } 431 432 if (attach_state & AST_vdds_init) { 433 vdds_cleanup(vnetp); 434 } 435 if (attach_state & AST_taskq_create) { 436 ddi_taskq_destroy(vnetp->taskqp); 437 } 438 if (attach_state & AST_fdbh_alloc) { 439 vnet_fdb_destroy(vnetp); 440 } 441 if (attach_state & AST_vgen_init) { 442 (void) vgen_uninit(vnetp->vgenhdl); 443 } 444 if (attach_state & AST_vnet_alloc) { 445 rw_destroy(&vnetp->vrwlock); 446 rw_destroy(&vnetp->vsw_fp_rw); 447 KMEM_FREE(vnetp); 448 } 449 return (DDI_FAILURE); 450 } 451 452 /* 453 * detach(9E): detach a device from the system. 454 */ 455 static int 456 vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd) 457 { 458 vnet_t *vnetp; 459 vnet_t **vnetpp; 460 int instance; 461 int rv; 462 463 instance = ddi_get_instance(dip); 464 DBG1(NULL, "instance(%d) enter\n", instance); 465 466 vnetp = ddi_get_driver_private(dip); 467 if (vnetp == NULL) { 468 goto vnet_detach_fail; 469 } 470 471 switch (cmd) { 472 case DDI_DETACH: 473 break; 474 case DDI_SUSPEND: 475 case DDI_PM_SUSPEND: 476 default: 477 goto vnet_detach_fail; 478 } 479 480 (void) vdds_cleanup(vnetp); 481 rv = vgen_uninit(vnetp->vgenhdl); 482 if (rv != DDI_SUCCESS) { 483 goto vnet_detach_fail; 484 } 485 486 /* 487 * Unregister from the MAC subsystem. This can fail, in 488 * particular if there are DLPI style-2 streams still open - 489 * in which case we just return failure. 490 */ 491 if (mac_unregister(vnetp->mh) != 0) 492 goto vnet_detach_fail; 493 494 /* unlink from instance(vnet_t) list */ 495 WRITE_ENTER(&vnet_rw); 496 for (vnetpp = &vnet_headp; *vnetpp; vnetpp = &(*vnetpp)->nextp) { 497 if (*vnetpp == vnetp) { 498 *vnetpp = vnetp->nextp; 499 break; 500 } 501 } 502 RW_EXIT(&vnet_rw); 503 504 ddi_taskq_destroy(vnetp->taskqp); 505 /* destroy fdb */ 506 vnet_fdb_destroy(vnetp); 507 508 rw_destroy(&vnetp->vrwlock); 509 rw_destroy(&vnetp->vsw_fp_rw); 510 KMEM_FREE(vnetp); 511 512 return (DDI_SUCCESS); 513 514 vnet_detach_fail: 515 return (DDI_FAILURE); 516 } 517 518 /* enable the device for transmit/receive */ 519 static int 520 vnet_m_start(void *arg) 521 { 522 vnet_t *vnetp = arg; 523 524 DBG1(vnetp, "enter\n"); 525 526 WRITE_ENTER(&vnetp->vrwlock); 527 vnetp->flags |= VNET_STARTED; 528 vnet_start_resources(vnetp); 529 RW_EXIT(&vnetp->vrwlock); 530 531 DBG1(vnetp, "exit\n"); 532 return (VNET_SUCCESS); 533 534 } 535 536 /* stop transmit/receive for the device */ 537 static void 538 vnet_m_stop(void *arg) 539 { 540 vnet_t *vnetp = arg; 541 542 DBG1(vnetp, "enter\n"); 543 544 WRITE_ENTER(&vnetp->vrwlock); 545 if (vnetp->flags & VNET_STARTED) { 546 vnet_stop_resources(vnetp); 547 vnetp->flags &= ~VNET_STARTED; 548 } 549 RW_EXIT(&vnetp->vrwlock); 550 551 DBG1(vnetp, "exit\n"); 552 } 553 554 /* set the unicast mac address of the device */ 555 static int 556 vnet_m_unicst(void *arg, const uint8_t *macaddr) 557 { 558 _NOTE(ARGUNUSED(macaddr)) 559 560 vnet_t *vnetp = arg; 561 562 DBG1(vnetp, "enter\n"); 563 /* 564 * NOTE: setting mac address dynamically is not supported. 565 */ 566 DBG1(vnetp, "exit\n"); 567 568 return (VNET_FAILURE); 569 } 570 571 /* enable/disable a multicast address */ 572 static int 573 vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca) 574 { 575 _NOTE(ARGUNUSED(add, mca)) 576 577 vnet_t *vnetp = arg; 578 vnet_res_t *vresp; 579 mac_register_t *macp; 580 mac_callbacks_t *cbp; 581 int rv = VNET_SUCCESS; 582 583 DBG1(vnetp, "enter\n"); 584 585 READ_ENTER(&vnetp->vrwlock); 586 for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) { 587 if (vresp->type == VIO_NET_RES_LDC_SERVICE) { 588 macp = &vresp->macreg; 589 cbp = macp->m_callbacks; 590 rv = cbp->mc_multicst(macp->m_driver, add, mca); 591 } 592 } 593 RW_EXIT(&vnetp->vrwlock); 594 595 DBG1(vnetp, "exit(%d)\n", rv); 596 return (rv); 597 } 598 599 /* set or clear promiscuous mode on the device */ 600 static int 601 vnet_m_promisc(void *arg, boolean_t on) 602 { 603 _NOTE(ARGUNUSED(on)) 604 605 vnet_t *vnetp = arg; 606 DBG1(vnetp, "enter\n"); 607 /* 608 * NOTE: setting promiscuous mode is not supported, just return success. 609 */ 610 DBG1(vnetp, "exit\n"); 611 return (VNET_SUCCESS); 612 } 613 614 /* 615 * Transmit a chain of packets. This function provides switching functionality 616 * based on the destination mac address to reach other guests (within ldoms) or 617 * external hosts. 618 */ 619 mblk_t * 620 vnet_m_tx(void *arg, mblk_t *mp) 621 { 622 vnet_t *vnetp; 623 vnet_res_t *vresp; 624 mblk_t *next; 625 mblk_t *resid_mp; 626 mac_register_t *macp; 627 struct ether_header *ehp; 628 boolean_t is_unicast; 629 boolean_t is_pvid; /* non-default pvid ? */ 630 boolean_t hres; /* Hybrid resource ? */ 631 632 vnetp = (vnet_t *)arg; 633 DBG1(vnetp, "enter\n"); 634 ASSERT(mp != NULL); 635 636 is_pvid = (vnetp->pvid != vnetp->default_vlan_id) ? B_TRUE : B_FALSE; 637 638 while (mp != NULL) { 639 640 next = mp->b_next; 641 mp->b_next = NULL; 642 643 /* 644 * Find fdb entry for the destination 645 * and hold a reference to it. 646 */ 647 ehp = (struct ether_header *)mp->b_rptr; 648 vresp = vnet_fdbe_find(vnetp, &ehp->ether_dhost); 649 if (vresp != NULL) { 650 651 /* 652 * Destination found in FDB. 653 * The destination is a vnet device within ldoms 654 * and directly reachable, invoke the tx function 655 * in the fdb entry. 656 */ 657 macp = &vresp->macreg; 658 resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp); 659 660 /* tx done; now release ref on fdb entry */ 661 VNET_FDBE_REFRELE(vresp); 662 663 if (resid_mp != NULL) { 664 /* m_tx failed */ 665 mp->b_next = next; 666 break; 667 } 668 } else { 669 is_unicast = !(IS_BROADCAST(ehp) || 670 (IS_MULTICAST(ehp))); 671 /* 672 * Destination is not in FDB. 673 * If the destination is broadcast or multicast, 674 * then forward the packet to vswitch. 675 * If a Hybrid resource avilable, then send the 676 * unicast packet via hybrid resource, otherwise 677 * forward it to vswitch. 678 */ 679 READ_ENTER(&vnetp->vsw_fp_rw); 680 681 if ((is_unicast) && (vnetp->hio_fp != NULL)) { 682 vresp = vnetp->hio_fp; 683 hres = B_TRUE; 684 } else { 685 vresp = vnetp->vsw_fp; 686 hres = B_FALSE; 687 } 688 if (vresp == NULL) { 689 /* 690 * no fdb entry to vsw? drop the packet. 691 */ 692 RW_EXIT(&vnetp->vsw_fp_rw); 693 freemsg(mp); 694 mp = next; 695 continue; 696 } 697 698 /* ref hold the fdb entry to vsw */ 699 VNET_FDBE_REFHOLD(vresp); 700 701 RW_EXIT(&vnetp->vsw_fp_rw); 702 703 /* 704 * In the case of a hybrid resource we need to insert 705 * the tag for the pvid case here; unlike packets that 706 * are destined to a vnet/vsw in which case the vgen 707 * layer does the tagging before sending it over ldc. 708 */ 709 if (hres == B_TRUE) { 710 /* 711 * Determine if the frame being transmitted 712 * over the hybrid resource is untagged. If so, 713 * insert the tag before transmitting. 714 */ 715 if (is_pvid == B_TRUE && 716 ehp->ether_type != htons(ETHERTYPE_VLAN)) { 717 718 mp = vnet_vlan_insert_tag(mp, 719 vnetp->pvid); 720 if (mp == NULL) { 721 VNET_FDBE_REFRELE(vresp); 722 mp = next; 723 continue; 724 } 725 726 } 727 } 728 729 macp = &vresp->macreg; 730 resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp); 731 732 /* tx done; now release ref on fdb entry */ 733 VNET_FDBE_REFRELE(vresp); 734 735 if (resid_mp != NULL) { 736 /* m_tx failed */ 737 mp->b_next = next; 738 break; 739 } 740 } 741 742 mp = next; 743 } 744 745 DBG1(vnetp, "exit\n"); 746 return (mp); 747 } 748 749 /* get statistics from the device */ 750 int 751 vnet_m_stat(void *arg, uint_t stat, uint64_t *val) 752 { 753 vnet_t *vnetp = arg; 754 vnet_res_t *vresp; 755 mac_register_t *macp; 756 mac_callbacks_t *cbp; 757 uint64_t val_total = 0; 758 759 DBG1(vnetp, "enter\n"); 760 761 /* 762 * get the specified statistic from each transport and return the 763 * aggregate val. This obviously only works for counters. 764 */ 765 if ((IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat)) || 766 (IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat))) { 767 return (ENOTSUP); 768 } 769 770 READ_ENTER(&vnetp->vrwlock); 771 for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) { 772 macp = &vresp->macreg; 773 cbp = macp->m_callbacks; 774 if (cbp->mc_getstat(macp->m_driver, stat, val) == 0) 775 val_total += *val; 776 } 777 RW_EXIT(&vnetp->vrwlock); 778 779 *val = val_total; 780 781 DBG1(vnetp, "exit\n"); 782 return (0); 783 } 784 785 /* wrapper function for mac_register() */ 786 static int 787 vnet_mac_register(vnet_t *vnetp) 788 { 789 mac_register_t *macp; 790 int err; 791 792 if ((macp = mac_alloc(MAC_VERSION)) == NULL) 793 return (DDI_FAILURE); 794 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 795 macp->m_driver = vnetp; 796 macp->m_dip = vnetp->dip; 797 macp->m_src_addr = vnetp->curr_macaddr; 798 macp->m_callbacks = &vnet_m_callbacks; 799 macp->m_min_sdu = 0; 800 macp->m_max_sdu = vnetp->mtu; 801 macp->m_margin = VLAN_TAGSZ; 802 803 /* 804 * Finally, we're ready to register ourselves with the MAC layer 805 * interface; if this succeeds, we're all ready to start() 806 */ 807 err = mac_register(macp, &vnetp->mh); 808 mac_free(macp); 809 return (err == 0 ? DDI_SUCCESS : DDI_FAILURE); 810 } 811 812 /* read the mac address of the device */ 813 static int 814 vnet_read_mac_address(vnet_t *vnetp) 815 { 816 uchar_t *macaddr; 817 uint32_t size; 818 int rv; 819 820 rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip, 821 DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size); 822 if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) { 823 DWARN(vnetp, "prop_lookup failed(%s) err(%d)\n", 824 macaddr_propname, rv); 825 return (DDI_FAILURE); 826 } 827 bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL); 828 bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL); 829 ddi_prop_free(macaddr); 830 831 return (DDI_SUCCESS); 832 } 833 834 static void 835 vnet_fdb_create(vnet_t *vnetp) 836 { 837 char hashname[MAXNAMELEN]; 838 839 (void) snprintf(hashname, MAXNAMELEN, "vnet%d-fdbhash", 840 vnetp->instance); 841 vnetp->fdb_nchains = vnet_fdb_nchains; 842 vnetp->fdb_hashp = mod_hash_create_ptrhash(hashname, vnetp->fdb_nchains, 843 mod_hash_null_valdtor, sizeof (void *)); 844 } 845 846 static void 847 vnet_fdb_destroy(vnet_t *vnetp) 848 { 849 /* destroy fdb-hash-table */ 850 if (vnetp->fdb_hashp != NULL) { 851 mod_hash_destroy_hash(vnetp->fdb_hashp); 852 vnetp->fdb_hashp = NULL; 853 vnetp->fdb_nchains = 0; 854 } 855 } 856 857 /* 858 * Add an entry into the fdb. 859 */ 860 void 861 vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp) 862 { 863 uint64_t addr = 0; 864 int rv; 865 866 KEY_HASH(addr, vresp->rem_macaddr); 867 868 /* 869 * If the entry being added corresponds to LDC_SERVICE resource, 870 * that is, vswitch connection, it is added to the hash and also 871 * the entry is cached, an additional reference count reflects 872 * this. The HYBRID resource is not added to the hash, but only 873 * cached, as it is only used for sending out packets for unknown 874 * unicast destinations. 875 */ 876 (vresp->type == VIO_NET_RES_LDC_SERVICE) ? 877 (vresp->refcnt = 1) : (vresp->refcnt = 0); 878 879 /* 880 * Note: duplicate keys will be rejected by mod_hash. 881 */ 882 if (vresp->type != VIO_NET_RES_HYBRID) { 883 rv = mod_hash_insert(vnetp->fdb_hashp, (mod_hash_key_t)addr, 884 (mod_hash_val_t)vresp); 885 if (rv != 0) { 886 DWARN(vnetp, "Duplicate macaddr key(%lx)\n", addr); 887 return; 888 } 889 } 890 891 if (vresp->type == VIO_NET_RES_LDC_SERVICE) { 892 /* Cache the fdb entry to vsw-port */ 893 WRITE_ENTER(&vnetp->vsw_fp_rw); 894 if (vnetp->vsw_fp == NULL) 895 vnetp->vsw_fp = vresp; 896 RW_EXIT(&vnetp->vsw_fp_rw); 897 } else if (vresp->type == VIO_NET_RES_HYBRID) { 898 /* Cache the fdb entry to hybrid resource */ 899 WRITE_ENTER(&vnetp->vsw_fp_rw); 900 if (vnetp->hio_fp == NULL) 901 vnetp->hio_fp = vresp; 902 RW_EXIT(&vnetp->vsw_fp_rw); 903 } 904 } 905 906 /* 907 * Remove an entry from fdb. 908 */ 909 static void 910 vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp) 911 { 912 uint64_t addr = 0; 913 int rv; 914 uint32_t refcnt; 915 vnet_res_t *tmp; 916 917 KEY_HASH(addr, vresp->rem_macaddr); 918 919 /* 920 * Remove the entry from fdb hash table. 921 * This prevents further references to this fdb entry. 922 */ 923 if (vresp->type != VIO_NET_RES_HYBRID) { 924 rv = mod_hash_remove(vnetp->fdb_hashp, (mod_hash_key_t)addr, 925 (mod_hash_val_t *)&tmp); 926 if (rv != 0) { 927 /* 928 * As the resources are added to the hash only 929 * after they are started, this can occur if 930 * a resource unregisters before it is ever started. 931 */ 932 return; 933 } 934 } 935 936 if (vresp->type == VIO_NET_RES_LDC_SERVICE) { 937 WRITE_ENTER(&vnetp->vsw_fp_rw); 938 939 ASSERT(tmp == vnetp->vsw_fp); 940 vnetp->vsw_fp = NULL; 941 942 RW_EXIT(&vnetp->vsw_fp_rw); 943 } else if (vresp->type == VIO_NET_RES_HYBRID) { 944 WRITE_ENTER(&vnetp->vsw_fp_rw); 945 946 vnetp->hio_fp = NULL; 947 948 RW_EXIT(&vnetp->vsw_fp_rw); 949 } 950 951 /* 952 * If there are threads already ref holding before the entry was 953 * removed from hash table, then wait for ref count to drop to zero. 954 */ 955 (vresp->type == VIO_NET_RES_LDC_SERVICE) ? 956 (refcnt = 1) : (refcnt = 0); 957 while (vresp->refcnt > refcnt) { 958 delay(drv_usectohz(vnet_fdbe_refcnt_delay)); 959 } 960 } 961 962 /* 963 * Search fdb for a given mac address. If an entry is found, hold 964 * a reference to it and return the entry; else returns NULL. 965 */ 966 static vnet_res_t * 967 vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp) 968 { 969 uint64_t key = 0; 970 vnet_res_t *vresp; 971 int rv; 972 973 KEY_HASH(key, addrp->ether_addr_octet); 974 975 rv = mod_hash_find_cb(vnetp->fdb_hashp, (mod_hash_key_t)key, 976 (mod_hash_val_t *)&vresp, vnet_fdbe_find_cb); 977 978 if (rv != 0) 979 return (NULL); 980 981 return (vresp); 982 } 983 984 /* 985 * Callback function provided to mod_hash_find_cb(). After finding the fdb 986 * entry corresponding to the key (macaddr), this callback will be invoked by 987 * mod_hash_find_cb() to atomically increment the reference count on the fdb 988 * entry before returning the found entry. 989 */ 990 static void 991 vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val) 992 { 993 _NOTE(ARGUNUSED(key)) 994 VNET_FDBE_REFHOLD((vnet_res_t *)val); 995 } 996 997 /* 998 * Frames received that are tagged with the pvid of the vnet device must be 999 * untagged before sending up the stack. This function walks the chain of rx 1000 * frames, untags any such frames and returns the updated chain. 1001 * 1002 * Arguments: 1003 * pvid: pvid of the vnet device for which packets are being received 1004 * mp: head of pkt chain to be validated and untagged 1005 * 1006 * Returns: 1007 * mp: head of updated chain of packets 1008 */ 1009 static void 1010 vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp) 1011 { 1012 struct ether_vlan_header *evhp; 1013 mblk_t *bp; 1014 mblk_t *bpt; 1015 mblk_t *bph; 1016 mblk_t *bpn; 1017 1018 bpn = bph = bpt = NULL; 1019 1020 for (bp = *mp; bp != NULL; bp = bpn) { 1021 1022 bpn = bp->b_next; 1023 bp->b_next = bp->b_prev = NULL; 1024 1025 evhp = (struct ether_vlan_header *)bp->b_rptr; 1026 1027 if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN && 1028 VLAN_ID(ntohs(evhp->ether_tci)) == pvid) { 1029 1030 bp = vnet_vlan_remove_tag(bp); 1031 if (bp == NULL) { 1032 continue; 1033 } 1034 1035 } 1036 1037 /* build a chain of processed packets */ 1038 if (bph == NULL) { 1039 bph = bpt = bp; 1040 } else { 1041 bpt->b_next = bp; 1042 bpt = bp; 1043 } 1044 1045 } 1046 1047 *mp = bph; 1048 } 1049 1050 static void 1051 vnet_rx(vio_net_handle_t vrh, mblk_t *mp) 1052 { 1053 vnet_res_t *vresp = (vnet_res_t *)vrh; 1054 vnet_t *vnetp = vresp->vnetp; 1055 1056 if ((vnetp == NULL) || (vnetp->mh == 0)) { 1057 freemsgchain(mp); 1058 return; 1059 } 1060 1061 /* 1062 * Packets received over a hybrid resource need additional processing 1063 * to remove the tag, for the pvid case. The underlying resource is 1064 * not aware of the vnet's pvid and thus packets are received with the 1065 * vlan tag in the header; unlike packets that are received over a ldc 1066 * channel in which case the peer vnet/vsw would have already removed 1067 * the tag. 1068 */ 1069 if (vresp->type == VIO_NET_RES_HYBRID && 1070 vnetp->pvid != vnetp->default_vlan_id) { 1071 1072 vnet_rx_frames_untag(vnetp->pvid, &mp); 1073 if (mp == NULL) { 1074 return; 1075 } 1076 } 1077 1078 mac_rx(vnetp->mh, NULL, mp); 1079 } 1080 1081 void 1082 vnet_tx_update(vio_net_handle_t vrh) 1083 { 1084 vnet_res_t *vresp = (vnet_res_t *)vrh; 1085 vnet_t *vnetp = vresp->vnetp; 1086 1087 if ((vnetp != NULL) && (vnetp->mh != NULL)) { 1088 mac_tx_update(vnetp->mh); 1089 } 1090 } 1091 1092 /* 1093 * Update the new mtu of vnet into the mac layer. First check if the device has 1094 * been plumbed and if so fail the mtu update. Returns 0 on success. 1095 */ 1096 int 1097 vnet_mtu_update(vnet_t *vnetp, uint32_t mtu) 1098 { 1099 int rv; 1100 1101 if (vnetp == NULL || vnetp->mh == NULL) { 1102 return (EINVAL); 1103 } 1104 1105 WRITE_ENTER(&vnetp->vrwlock); 1106 1107 if (vnetp->flags & VNET_STARTED) { 1108 RW_EXIT(&vnetp->vrwlock); 1109 cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu " 1110 "update as the device is plumbed\n", 1111 vnetp->instance); 1112 return (EBUSY); 1113 } 1114 1115 /* update mtu in the mac layer */ 1116 rv = mac_maxsdu_update(vnetp->mh, mtu); 1117 if (rv != 0) { 1118 RW_EXIT(&vnetp->vrwlock); 1119 cmn_err(CE_NOTE, 1120 "!vnet%d: Unable to update mtu with mac layer\n", 1121 vnetp->instance); 1122 return (EIO); 1123 } 1124 1125 vnetp->mtu = mtu; 1126 1127 RW_EXIT(&vnetp->vrwlock); 1128 1129 return (0); 1130 } 1131 1132 /* 1133 * vio_net_resource_reg -- An interface called to register a resource 1134 * with vnet. 1135 * macp -- a GLDv3 mac_register that has all the details of 1136 * a resource and its callbacks etc. 1137 * type -- resource type. 1138 * local_macaddr -- resource's MAC address. This is used to 1139 * associate a resource with a corresponding vnet. 1140 * remote_macaddr -- remote side MAC address. This is ignored for 1141 * the Hybrid resources. 1142 * vhp -- A handle returned to the caller. 1143 * vcb -- A set of callbacks provided to the callers. 1144 */ 1145 int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type, 1146 ether_addr_t local_macaddr, ether_addr_t rem_macaddr, vio_net_handle_t *vhp, 1147 vio_net_callbacks_t *vcb) 1148 { 1149 vnet_t *vnetp; 1150 vnet_res_t *vresp; 1151 1152 vresp = kmem_zalloc(sizeof (vnet_res_t), KM_SLEEP); 1153 ether_copy(local_macaddr, vresp->local_macaddr); 1154 ether_copy(rem_macaddr, vresp->rem_macaddr); 1155 vresp->type = type; 1156 bcopy(macp, &vresp->macreg, sizeof (mac_register_t)); 1157 1158 DBG1(NULL, "Resource Registerig type=0%X\n", type); 1159 1160 READ_ENTER(&vnet_rw); 1161 vnetp = vnet_headp; 1162 while (vnetp != NULL) { 1163 if (VNET_MATCH_RES(vresp, vnetp)) { 1164 WRITE_ENTER(&vnetp->vrwlock); 1165 vresp->vnetp = vnetp; 1166 vresp->nextp = vnetp->vres_list; 1167 vnetp->vres_list = vresp; 1168 RW_EXIT(&vnetp->vrwlock); 1169 break; 1170 } 1171 vnetp = vnetp->nextp; 1172 } 1173 RW_EXIT(&vnet_rw); 1174 if (vresp->vnetp == NULL) { 1175 DWARN(NULL, "No vnet instance"); 1176 kmem_free(vresp, sizeof (vnet_res_t)); 1177 return (ENXIO); 1178 } 1179 1180 *vhp = vresp; 1181 vcb->vio_net_rx_cb = vnet_rx; 1182 vcb->vio_net_tx_update = vnet_tx_update; 1183 vcb->vio_net_report_err = vnet_handle_res_err; 1184 1185 /* Dispatch a task to start resources */ 1186 vnet_dispatch_res_task(vnetp); 1187 return (0); 1188 } 1189 1190 /* 1191 * vio_net_resource_unreg -- An interface to unregister a resource. 1192 */ 1193 void 1194 vio_net_resource_unreg(vio_net_handle_t vhp) 1195 { 1196 vnet_res_t *vresp = (vnet_res_t *)vhp; 1197 vnet_t *vnetp = vresp->vnetp; 1198 vnet_res_t *vrp; 1199 1200 DBG1(NULL, "Resource Registerig hdl=0x%p", vhp); 1201 1202 ASSERT(vnetp != NULL); 1203 vnet_fdbe_del(vnetp, vresp); 1204 1205 WRITE_ENTER(&vnetp->vrwlock); 1206 if (vresp == vnetp->vres_list) { 1207 vnetp->vres_list = vresp->nextp; 1208 } else { 1209 vrp = vnetp->vres_list; 1210 while (vrp->nextp != NULL) { 1211 if (vrp->nextp == vresp) { 1212 vrp->nextp = vresp->nextp; 1213 break; 1214 } 1215 vrp = vrp->nextp; 1216 } 1217 } 1218 vresp->vnetp = NULL; 1219 vresp->nextp = NULL; 1220 RW_EXIT(&vnetp->vrwlock); 1221 KMEM_FREE(vresp); 1222 } 1223 1224 /* 1225 * vnet_dds_rx -- an interface called by vgen to DDS messages. 1226 */ 1227 void 1228 vnet_dds_rx(void *arg, void *dmsg) 1229 { 1230 vnet_t *vnetp = arg; 1231 vdds_process_dds_msg(vnetp, dmsg); 1232 } 1233 1234 /* 1235 * vnet_send_dds_msg -- An interface provided to DDS to send 1236 * DDS messages. This simply sends meessages via vgen. 1237 */ 1238 int 1239 vnet_send_dds_msg(vnet_t *vnetp, void *dmsg) 1240 { 1241 int rv; 1242 1243 if (vnetp->vgenhdl != NULL) { 1244 rv = vgen_dds_tx(vnetp->vgenhdl, dmsg); 1245 } 1246 return (rv); 1247 } 1248 1249 /* 1250 * vnet_handle_res_err -- A callback function called by a resource 1251 * to report an error. For example, vgen can call to report 1252 * an LDC down/reset event. This will trigger cleanup of associated 1253 * Hybrid resource. 1254 */ 1255 /* ARGSUSED */ 1256 static void 1257 vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err) 1258 { 1259 vnet_res_t *vresp = (vnet_res_t *)vrh; 1260 vnet_t *vnetp = vresp->vnetp; 1261 int rv; 1262 1263 if (vnetp == NULL) { 1264 return; 1265 } 1266 if ((vresp->type != VIO_NET_RES_LDC_SERVICE) && 1267 (vresp->type != VIO_NET_RES_HYBRID)) { 1268 return; 1269 } 1270 rv = ddi_taskq_dispatch(vnetp->taskqp, vdds_cleanup_hybrid_res, 1271 vnetp, DDI_NOSLEEP); 1272 if (rv != DDI_SUCCESS) { 1273 cmn_err(CE_WARN, 1274 "vnet%d:Failed to dispatch task to cleanup hybrid resource", 1275 vnetp->instance); 1276 } 1277 } 1278 1279 /* 1280 * vnet_dispatch_res_task -- A function to dispatch tasks start resources. 1281 */ 1282 static void 1283 vnet_dispatch_res_task(vnet_t *vnetp) 1284 { 1285 int rv; 1286 1287 WRITE_ENTER(&vnetp->vrwlock); 1288 if (vnetp->flags & VNET_STARTED) { 1289 rv = ddi_taskq_dispatch(vnetp->taskqp, vnet_res_start_task, 1290 vnetp, DDI_NOSLEEP); 1291 if (rv != DDI_SUCCESS) { 1292 cmn_err(CE_WARN, 1293 "vnet%d:Can't dispatch start resource task", 1294 vnetp->instance); 1295 } 1296 } 1297 RW_EXIT(&vnetp->vrwlock); 1298 } 1299 1300 /* 1301 * vnet_res_start_task -- A taskq callback function that starts a resource. 1302 */ 1303 static void 1304 vnet_res_start_task(void *arg) 1305 { 1306 vnet_t *vnetp = arg; 1307 1308 WRITE_ENTER(&vnetp->vrwlock); 1309 if (vnetp->flags & VNET_STARTED) { 1310 vnet_start_resources(vnetp); 1311 } 1312 RW_EXIT(&vnetp->vrwlock); 1313 } 1314 1315 /* 1316 * vnet_start_resources -- starts all resources associated with 1317 * a vnet. 1318 */ 1319 static void 1320 vnet_start_resources(vnet_t *vnetp) 1321 { 1322 mac_register_t *macp; 1323 mac_callbacks_t *cbp; 1324 vnet_res_t *vresp; 1325 int rv; 1326 1327 DBG1(vnetp, "enter\n"); 1328 1329 for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) { 1330 /* skip if it is already started */ 1331 if (vresp->flags & VNET_STARTED) { 1332 continue; 1333 } 1334 macp = &vresp->macreg; 1335 cbp = macp->m_callbacks; 1336 rv = cbp->mc_start(macp->m_driver); 1337 if (rv == 0) { 1338 /* 1339 * Successfully started the resource, so now 1340 * add it to the fdb. 1341 */ 1342 vresp->flags |= VNET_STARTED; 1343 vnet_fdbe_add(vnetp, vresp); 1344 } 1345 } 1346 1347 DBG1(vnetp, "exit\n"); 1348 1349 } 1350 1351 /* 1352 * vnet_stop_resources -- stop all resources associated with a vnet. 1353 */ 1354 static void 1355 vnet_stop_resources(vnet_t *vnetp) 1356 { 1357 vnet_res_t *vresp; 1358 vnet_res_t *nvresp; 1359 mac_register_t *macp; 1360 mac_callbacks_t *cbp; 1361 1362 DBG1(vnetp, "enter\n"); 1363 1364 for (vresp = vnetp->vres_list; vresp != NULL; ) { 1365 nvresp = vresp->nextp; 1366 if (vresp->flags & VNET_STARTED) { 1367 macp = &vresp->macreg; 1368 cbp = macp->m_callbacks; 1369 cbp->mc_stop(macp->m_driver); 1370 vresp->flags &= ~VNET_STARTED; 1371 } 1372 vresp = nvresp; 1373 } 1374 DBG1(vnetp, "exit\n"); 1375 } 1376