1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/errno.h> 29 #include <sys/param.h> 30 #include <sys/stream.h> 31 #include <sys/kmem.h> 32 #include <sys/conf.h> 33 #include <sys/devops.h> 34 #include <sys/ksynch.h> 35 #include <sys/stat.h> 36 #include <sys/modctl.h> 37 #include <sys/modhash.h> 38 #include <sys/debug.h> 39 #include <sys/ethernet.h> 40 #include <sys/dlpi.h> 41 #include <net/if.h> 42 #include <sys/mac_provider.h> 43 #include <sys/mac_client.h> 44 #include <sys/mac_client_priv.h> 45 #include <sys/mac_ether.h> 46 #include <sys/ddi.h> 47 #include <sys/sunddi.h> 48 #include <sys/strsun.h> 49 #include <sys/note.h> 50 #include <sys/atomic.h> 51 #include <sys/vnet.h> 52 #include <sys/vlan.h> 53 #include <sys/vnet_mailbox.h> 54 #include <sys/vnet_common.h> 55 #include <sys/dds.h> 56 #include <sys/strsubr.h> 57 #include <sys/taskq.h> 58 59 /* 60 * Function prototypes. 61 */ 62 63 /* DDI entrypoints */ 64 static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 65 static int vnetattach(dev_info_t *, ddi_attach_cmd_t); 66 static int vnetdetach(dev_info_t *, ddi_detach_cmd_t); 67 68 /* MAC entrypoints */ 69 static int vnet_m_stat(void *, uint_t, uint64_t *); 70 static int vnet_m_start(void *); 71 static void vnet_m_stop(void *); 72 static int vnet_m_promisc(void *, boolean_t); 73 static int vnet_m_multicst(void *, boolean_t, const uint8_t *); 74 static int vnet_m_unicst(void *, const uint8_t *); 75 mblk_t *vnet_m_tx(void *, mblk_t *); 76 static void vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp); 77 #ifdef VNET_IOC_DEBUG 78 static void vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp); 79 #endif 80 static boolean_t vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data); 81 static void vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index, 82 const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle); 83 static void vnet_get_group(void *arg, mac_ring_type_t type, const int index, 84 mac_group_info_t *infop, mac_group_handle_t handle); 85 static int vnet_rx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num); 86 static void vnet_rx_ring_stop(mac_ring_driver_t rdriver); 87 static int vnet_tx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num); 88 static void vnet_tx_ring_stop(mac_ring_driver_t rdriver); 89 static int vnet_ring_enable_intr(void *arg); 90 static int vnet_ring_disable_intr(void *arg); 91 static mblk_t *vnet_rx_poll(void *arg, int bytes_to_pickup); 92 static int vnet_addmac(void *arg, const uint8_t *mac_addr); 93 static int vnet_remmac(void *arg, const uint8_t *mac_addr); 94 95 /* vnet internal functions */ 96 static int vnet_unattach(vnet_t *vnetp); 97 static void vnet_ring_grp_init(vnet_t *vnetp); 98 static void vnet_ring_grp_uninit(vnet_t *vnetp); 99 static int vnet_mac_register(vnet_t *); 100 static int vnet_read_mac_address(vnet_t *vnetp); 101 static int vnet_bind_vgenring(vnet_res_t *vresp); 102 static void vnet_unbind_vgenring(vnet_res_t *vresp); 103 static int vnet_bind_hwrings(vnet_t *vnetp); 104 static void vnet_unbind_hwrings(vnet_t *vnetp); 105 static int vnet_bind_rings(vnet_res_t *vresp); 106 static void vnet_unbind_rings(vnet_res_t *vresp); 107 static int vnet_hio_stat(void *, uint_t, uint64_t *); 108 static int vnet_hio_start(void *); 109 static void vnet_hio_stop(void *); 110 static void vnet_hio_notify_cb(void *arg, mac_notify_type_t type); 111 mblk_t *vnet_hio_tx(void *, mblk_t *); 112 113 /* Forwarding database (FDB) routines */ 114 static void vnet_fdb_create(vnet_t *vnetp); 115 static void vnet_fdb_destroy(vnet_t *vnetp); 116 static vnet_res_t *vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp); 117 static void vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val); 118 void vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp); 119 static void vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp); 120 121 static void vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp); 122 static void vnet_rx(vio_net_handle_t vrh, mblk_t *mp); 123 static void vnet_tx_update(vio_net_handle_t vrh); 124 static void vnet_res_start_task(void *arg); 125 static void vnet_start_resources(vnet_t *vnetp); 126 static void vnet_stop_resources(vnet_t *vnetp); 127 static void vnet_dispatch_res_task(vnet_t *vnetp); 128 static void vnet_res_start_task(void *arg); 129 static void vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err); 130 static void vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp); 131 static vnet_res_t *vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp); 132 133 /* Exported to vnet_gen */ 134 int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu); 135 void vnet_link_update(vnet_t *vnetp, link_state_t link_state); 136 void vnet_dds_cleanup_hio(vnet_t *vnetp); 137 138 static kstat_t *vnet_hio_setup_kstats(char *ks_mod, char *ks_name, 139 vnet_res_t *vresp); 140 static int vnet_hio_update_kstats(kstat_t *ksp, int rw); 141 static void vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp); 142 static void vnet_hio_destroy_kstats(kstat_t *ksp); 143 144 /* Exported to to vnet_dds */ 145 int vnet_send_dds_msg(vnet_t *vnetp, void *dmsg); 146 int vnet_hio_mac_init(vnet_t *vnetp, char *ifname); 147 void vnet_hio_mac_cleanup(vnet_t *vnetp); 148 149 /* Externs that are imported from vnet_gen */ 150 extern int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip, 151 const uint8_t *macaddr, void **vgenhdl); 152 extern int vgen_init_mdeg(void *arg); 153 extern void vgen_uninit(void *arg); 154 extern int vgen_dds_tx(void *arg, void *dmsg); 155 extern void vgen_mod_init(void); 156 extern int vgen_mod_cleanup(void); 157 extern void vgen_mod_fini(void); 158 extern int vgen_enable_intr(void *arg); 159 extern int vgen_disable_intr(void *arg); 160 extern mblk_t *vgen_poll(void *arg, int bytes_to_pickup); 161 162 /* Externs that are imported from vnet_dds */ 163 extern void vdds_mod_init(void); 164 extern void vdds_mod_fini(void); 165 extern int vdds_init(vnet_t *vnetp); 166 extern void vdds_cleanup(vnet_t *vnetp); 167 extern void vdds_process_dds_msg(vnet_t *vnetp, vio_dds_msg_t *dmsg); 168 extern void vdds_cleanup_hybrid_res(void *arg); 169 extern void vdds_cleanup_hio(vnet_t *vnetp); 170 171 /* Externs imported from mac_impl */ 172 extern mblk_t *mac_hwring_tx(mac_ring_handle_t, mblk_t *); 173 174 #define DRV_NAME "vnet" 175 #define VNET_FDBE_REFHOLD(p) \ 176 { \ 177 atomic_inc_32(&(p)->refcnt); \ 178 ASSERT((p)->refcnt != 0); \ 179 } 180 181 #define VNET_FDBE_REFRELE(p) \ 182 { \ 183 ASSERT((p)->refcnt != 0); \ 184 atomic_dec_32(&(p)->refcnt); \ 185 } 186 187 #ifdef VNET_IOC_DEBUG 188 #define VNET_M_CALLBACK_FLAGS (MC_IOCTL | MC_GETCAPAB) 189 #else 190 #define VNET_M_CALLBACK_FLAGS (MC_GETCAPAB) 191 #endif 192 193 static mac_callbacks_t vnet_m_callbacks = { 194 VNET_M_CALLBACK_FLAGS, 195 vnet_m_stat, 196 vnet_m_start, 197 vnet_m_stop, 198 vnet_m_promisc, 199 vnet_m_multicst, 200 NULL, /* m_unicst entry must be NULL while rx rings are exposed */ 201 NULL, /* m_tx entry must be NULL while tx rings are exposed */ 202 vnet_m_ioctl, 203 vnet_m_capab, 204 NULL 205 }; 206 207 static mac_callbacks_t vnet_hio_res_callbacks = { 208 0, 209 vnet_hio_stat, 210 vnet_hio_start, 211 vnet_hio_stop, 212 NULL, 213 NULL, 214 NULL, 215 vnet_hio_tx, 216 NULL, 217 NULL, 218 NULL 219 }; 220 221 /* 222 * Linked list of "vnet_t" structures - one per instance. 223 */ 224 static vnet_t *vnet_headp = NULL; 225 static krwlock_t vnet_rw; 226 227 /* Tunables */ 228 uint32_t vnet_ntxds = VNET_NTXDS; /* power of 2 transmit descriptors */ 229 uint32_t vnet_ldcwd_interval = VNET_LDCWD_INTERVAL; /* watchdog freq in msec */ 230 uint32_t vnet_ldcwd_txtimeout = VNET_LDCWD_TXTIMEOUT; /* tx timeout in msec */ 231 uint32_t vnet_ldc_mtu = VNET_LDC_MTU; /* ldc mtu */ 232 233 /* Configure tx serialization in mac layer for the vnet device */ 234 boolean_t vnet_mac_tx_serialize = B_TRUE; 235 236 /* 237 * Set this to non-zero to enable additional internal receive buffer pools 238 * based on the MTU of the device for better performance at the cost of more 239 * memory consumption. This is turned off by default, to use allocb(9F) for 240 * receive buffer allocations of sizes > 2K. 241 */ 242 boolean_t vnet_jumbo_rxpools = B_FALSE; 243 244 /* # of chains in fdb hash table */ 245 uint32_t vnet_fdb_nchains = VNET_NFDB_HASH; 246 247 /* Internal tunables */ 248 uint32_t vnet_ethermtu = 1500; /* mtu of the device */ 249 250 /* 251 * Default vlan id. This is only used internally when the "default-vlan-id" 252 * property is not present in the MD device node. Therefore, this should not be 253 * used as a tunable; if this value is changed, the corresponding variable 254 * should be updated to the same value in vsw and also other vnets connected to 255 * the same vsw. 256 */ 257 uint16_t vnet_default_vlan_id = 1; 258 259 /* delay in usec to wait for all references on a fdb entry to be dropped */ 260 uint32_t vnet_fdbe_refcnt_delay = 10; 261 262 static struct ether_addr etherbroadcastaddr = { 263 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 264 }; 265 266 /* mac_open() retry delay in usec */ 267 uint32_t vnet_mac_open_delay = 100; /* 0.1 ms */ 268 269 /* max # of mac_open() retries */ 270 uint32_t vnet_mac_open_retries = 100; 271 272 /* 273 * Property names 274 */ 275 static char macaddr_propname[] = "local-mac-address"; 276 277 /* 278 * This is the string displayed by modinfo(1m). 279 */ 280 static char vnet_ident[] = "vnet driver"; 281 extern struct mod_ops mod_driverops; 282 static struct cb_ops cb_vnetops = { 283 nulldev, /* cb_open */ 284 nulldev, /* cb_close */ 285 nodev, /* cb_strategy */ 286 nodev, /* cb_print */ 287 nodev, /* cb_dump */ 288 nodev, /* cb_read */ 289 nodev, /* cb_write */ 290 nodev, /* cb_ioctl */ 291 nodev, /* cb_devmap */ 292 nodev, /* cb_mmap */ 293 nodev, /* cb_segmap */ 294 nochpoll, /* cb_chpoll */ 295 ddi_prop_op, /* cb_prop_op */ 296 NULL, /* cb_stream */ 297 (int)(D_MP) /* cb_flag */ 298 }; 299 300 static struct dev_ops vnetops = { 301 DEVO_REV, /* devo_rev */ 302 0, /* devo_refcnt */ 303 NULL, /* devo_getinfo */ 304 nulldev, /* devo_identify */ 305 nulldev, /* devo_probe */ 306 vnetattach, /* devo_attach */ 307 vnetdetach, /* devo_detach */ 308 nodev, /* devo_reset */ 309 &cb_vnetops, /* devo_cb_ops */ 310 (struct bus_ops *)NULL, /* devo_bus_ops */ 311 NULL, /* devo_power */ 312 ddi_quiesce_not_supported, /* devo_quiesce */ 313 }; 314 315 static struct modldrv modldrv = { 316 &mod_driverops, /* Type of module. This one is a driver */ 317 vnet_ident, /* ID string */ 318 &vnetops /* driver specific ops */ 319 }; 320 321 static struct modlinkage modlinkage = { 322 MODREV_1, (void *)&modldrv, NULL 323 }; 324 325 #ifdef DEBUG 326 327 /* 328 * Print debug messages - set to 0xf to enable all msgs 329 */ 330 int vnet_dbglevel = 0x8; 331 332 static void 333 debug_printf(const char *fname, void *arg, const char *fmt, ...) 334 { 335 char buf[512]; 336 va_list ap; 337 vnet_t *vnetp = (vnet_t *)arg; 338 char *bufp = buf; 339 340 if (vnetp == NULL) { 341 (void) sprintf(bufp, "%s: ", fname); 342 bufp += strlen(bufp); 343 } else { 344 (void) sprintf(bufp, "vnet%d:%s: ", vnetp->instance, fname); 345 bufp += strlen(bufp); 346 } 347 va_start(ap, fmt); 348 (void) vsprintf(bufp, fmt, ap); 349 va_end(ap); 350 cmn_err(CE_CONT, "%s\n", buf); 351 } 352 353 #endif 354 355 /* _init(9E): initialize the loadable module */ 356 int 357 _init(void) 358 { 359 int status; 360 361 DBG1(NULL, "enter\n"); 362 363 mac_init_ops(&vnetops, "vnet"); 364 status = mod_install(&modlinkage); 365 if (status != 0) { 366 mac_fini_ops(&vnetops); 367 } 368 vdds_mod_init(); 369 vgen_mod_init(); 370 DBG1(NULL, "exit(%d)\n", status); 371 return (status); 372 } 373 374 /* _fini(9E): prepare the module for unloading. */ 375 int 376 _fini(void) 377 { 378 int status; 379 380 DBG1(NULL, "enter\n"); 381 382 status = vgen_mod_cleanup(); 383 if (status != 0) 384 return (status); 385 386 status = mod_remove(&modlinkage); 387 if (status != 0) 388 return (status); 389 mac_fini_ops(&vnetops); 390 vgen_mod_fini(); 391 vdds_mod_fini(); 392 393 DBG1(NULL, "exit(%d)\n", status); 394 return (status); 395 } 396 397 /* _info(9E): return information about the loadable module */ 398 int 399 _info(struct modinfo *modinfop) 400 { 401 return (mod_info(&modlinkage, modinfop)); 402 } 403 404 /* 405 * attach(9E): attach a device to the system. 406 * called once for each instance of the device on the system. 407 */ 408 static int 409 vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd) 410 { 411 vnet_t *vnetp; 412 int status; 413 int instance; 414 uint64_t reg; 415 char qname[TASKQ_NAMELEN]; 416 vnet_attach_progress_t attach_progress; 417 418 attach_progress = AST_init; 419 420 switch (cmd) { 421 case DDI_ATTACH: 422 break; 423 case DDI_RESUME: 424 case DDI_PM_RESUME: 425 default: 426 goto vnet_attach_fail; 427 } 428 429 instance = ddi_get_instance(dip); 430 DBG1(NULL, "instance(%d) enter\n", instance); 431 432 /* allocate vnet_t and mac_t structures */ 433 vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP); 434 vnetp->dip = dip; 435 vnetp->instance = instance; 436 rw_init(&vnetp->vrwlock, NULL, RW_DRIVER, NULL); 437 rw_init(&vnetp->vsw_fp_rw, NULL, RW_DRIVER, NULL); 438 attach_progress |= AST_vnet_alloc; 439 440 vnet_ring_grp_init(vnetp); 441 attach_progress |= AST_ring_init; 442 443 status = vdds_init(vnetp); 444 if (status != 0) { 445 goto vnet_attach_fail; 446 } 447 attach_progress |= AST_vdds_init; 448 449 /* setup links to vnet_t from both devinfo and mac_t */ 450 ddi_set_driver_private(dip, (caddr_t)vnetp); 451 452 /* read the mac address */ 453 status = vnet_read_mac_address(vnetp); 454 if (status != DDI_SUCCESS) { 455 goto vnet_attach_fail; 456 } 457 attach_progress |= AST_read_macaddr; 458 459 reg = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 460 DDI_PROP_DONTPASS, "reg", -1); 461 if (reg == -1) { 462 goto vnet_attach_fail; 463 } 464 vnetp->reg = reg; 465 466 vnet_fdb_create(vnetp); 467 attach_progress |= AST_fdbh_alloc; 468 469 (void) snprintf(qname, TASKQ_NAMELEN, "vnet_taskq%d", instance); 470 if ((vnetp->taskqp = ddi_taskq_create(dip, qname, 1, 471 TASKQ_DEFAULTPRI, 0)) == NULL) { 472 cmn_err(CE_WARN, "!vnet%d: Unable to create task queue", 473 instance); 474 goto vnet_attach_fail; 475 } 476 attach_progress |= AST_taskq_create; 477 478 /* add to the list of vnet devices */ 479 WRITE_ENTER(&vnet_rw); 480 vnetp->nextp = vnet_headp; 481 vnet_headp = vnetp; 482 RW_EXIT(&vnet_rw); 483 484 attach_progress |= AST_vnet_list; 485 486 /* 487 * Initialize the generic vnet plugin which provides communication via 488 * sun4v LDC (logical domain channel) based resources. This involves 2 489 * steps; first, vgen_init() is invoked to read the various properties 490 * of the vnet device from its MD node (including its mtu which is 491 * needed to mac_register()) and obtain a handle to the vgen layer. 492 * After mac_register() is done and we have a mac handle, we then 493 * invoke vgen_init_mdeg() which registers with the the MD event 494 * generator (mdeg) framework to allow LDC resource notifications. 495 * Note: this sequence also allows us to report the correct default # 496 * of pseudo rings (2TX and 3RX) in vnet_m_capab() which gets invoked 497 * in the context of mac_register(); and avoids conflicting with 498 * dynamic pseudo rx rings which get added/removed as a result of mdeg 499 * events in vgen. 500 */ 501 status = vgen_init(vnetp, reg, vnetp->dip, 502 (uint8_t *)vnetp->curr_macaddr, &vnetp->vgenhdl); 503 if (status != DDI_SUCCESS) { 504 DERR(vnetp, "vgen_init() failed\n"); 505 goto vnet_attach_fail; 506 } 507 attach_progress |= AST_vgen_init; 508 509 status = vnet_mac_register(vnetp); 510 if (status != DDI_SUCCESS) { 511 goto vnet_attach_fail; 512 } 513 vnetp->link_state = LINK_STATE_UNKNOWN; 514 attach_progress |= AST_macreg; 515 516 status = vgen_init_mdeg(vnetp->vgenhdl); 517 if (status != DDI_SUCCESS) { 518 goto vnet_attach_fail; 519 } 520 attach_progress |= AST_init_mdeg; 521 522 vnetp->attach_progress = attach_progress; 523 524 DBG1(NULL, "instance(%d) exit\n", instance); 525 return (DDI_SUCCESS); 526 527 vnet_attach_fail: 528 vnetp->attach_progress = attach_progress; 529 status = vnet_unattach(vnetp); 530 ASSERT(status == 0); 531 return (DDI_FAILURE); 532 } 533 534 /* 535 * detach(9E): detach a device from the system. 536 */ 537 static int 538 vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd) 539 { 540 vnet_t *vnetp; 541 int instance; 542 543 instance = ddi_get_instance(dip); 544 DBG1(NULL, "instance(%d) enter\n", instance); 545 546 vnetp = ddi_get_driver_private(dip); 547 if (vnetp == NULL) { 548 goto vnet_detach_fail; 549 } 550 551 switch (cmd) { 552 case DDI_DETACH: 553 break; 554 case DDI_SUSPEND: 555 case DDI_PM_SUSPEND: 556 default: 557 goto vnet_detach_fail; 558 } 559 560 if (vnet_unattach(vnetp) != 0) { 561 goto vnet_detach_fail; 562 } 563 564 return (DDI_SUCCESS); 565 566 vnet_detach_fail: 567 return (DDI_FAILURE); 568 } 569 570 /* 571 * Common routine to handle vnetattach() failure and vnetdetach(). Note that 572 * the only reason this function could fail is if mac_unregister() fails. 573 * Otherwise, this function must ensure that all resources are freed and return 574 * success. 575 */ 576 static int 577 vnet_unattach(vnet_t *vnetp) 578 { 579 vnet_attach_progress_t attach_progress; 580 581 attach_progress = vnetp->attach_progress; 582 583 /* 584 * Disable the mac device in the gldv3 subsystem. This can fail, in 585 * particular if there are still any open references to this mac 586 * device; in which case we just return failure without continuing to 587 * detach further. 588 * If it succeeds, we then invoke vgen_uninit() which should unregister 589 * any pseudo rings registered with the mac layer. Note we keep the 590 * AST_macreg flag on, so we can unregister with the mac layer at 591 * the end of this routine. 592 */ 593 if (attach_progress & AST_macreg) { 594 if (mac_disable(vnetp->mh) != 0) { 595 return (1); 596 } 597 } 598 599 /* 600 * Now that we have disabled the device, we must finish all other steps 601 * and successfully return from this function; otherwise we will end up 602 * leaving the device in a broken/unusable state. 603 * 604 * First, release any hybrid resources assigned to this vnet device. 605 */ 606 if (attach_progress & AST_vdds_init) { 607 vdds_cleanup(vnetp); 608 attach_progress &= ~AST_vdds_init; 609 } 610 611 /* 612 * Uninit vgen. This stops further mdeg callbacks to this vnet 613 * device and/or its ports; and detaches any existing ports. 614 */ 615 if (attach_progress & (AST_vgen_init|AST_init_mdeg)) { 616 vgen_uninit(vnetp->vgenhdl); 617 attach_progress &= ~AST_vgen_init; 618 attach_progress &= ~AST_init_mdeg; 619 } 620 621 /* Destroy the taskq. */ 622 if (attach_progress & AST_taskq_create) { 623 ddi_taskq_destroy(vnetp->taskqp); 624 attach_progress &= ~AST_taskq_create; 625 } 626 627 /* Destroy fdb. */ 628 if (attach_progress & AST_fdbh_alloc) { 629 vnet_fdb_destroy(vnetp); 630 attach_progress &= ~AST_fdbh_alloc; 631 } 632 633 /* Remove from the device list */ 634 if (attach_progress & AST_vnet_list) { 635 vnet_t **vnetpp; 636 /* unlink from instance(vnet_t) list */ 637 WRITE_ENTER(&vnet_rw); 638 for (vnetpp = &vnet_headp; *vnetpp; 639 vnetpp = &(*vnetpp)->nextp) { 640 if (*vnetpp == vnetp) { 641 *vnetpp = vnetp->nextp; 642 break; 643 } 644 } 645 RW_EXIT(&vnet_rw); 646 attach_progress &= ~AST_vnet_list; 647 } 648 649 if (attach_progress & AST_ring_init) { 650 vnet_ring_grp_uninit(vnetp); 651 attach_progress &= ~AST_ring_init; 652 } 653 654 if (attach_progress & AST_macreg) { 655 VERIFY(mac_unregister(vnetp->mh) == 0); 656 vnetp->mh = NULL; 657 attach_progress &= ~AST_macreg; 658 } 659 660 if (attach_progress & AST_vnet_alloc) { 661 rw_destroy(&vnetp->vrwlock); 662 rw_destroy(&vnetp->vsw_fp_rw); 663 attach_progress &= ~AST_vnet_list; 664 KMEM_FREE(vnetp); 665 } 666 667 return (0); 668 } 669 670 /* enable the device for transmit/receive */ 671 static int 672 vnet_m_start(void *arg) 673 { 674 vnet_t *vnetp = arg; 675 676 DBG1(vnetp, "enter\n"); 677 678 WRITE_ENTER(&vnetp->vrwlock); 679 vnetp->flags |= VNET_STARTED; 680 vnet_start_resources(vnetp); 681 RW_EXIT(&vnetp->vrwlock); 682 683 DBG1(vnetp, "exit\n"); 684 return (VNET_SUCCESS); 685 686 } 687 688 /* stop transmit/receive for the device */ 689 static void 690 vnet_m_stop(void *arg) 691 { 692 vnet_t *vnetp = arg; 693 694 DBG1(vnetp, "enter\n"); 695 696 WRITE_ENTER(&vnetp->vrwlock); 697 if (vnetp->flags & VNET_STARTED) { 698 /* 699 * Set the flags appropriately; this should prevent starting of 700 * any new resources that are added(see vnet_res_start_task()), 701 * while we release the vrwlock in vnet_stop_resources() before 702 * stopping each resource. 703 */ 704 vnetp->flags &= ~VNET_STARTED; 705 vnetp->flags |= VNET_STOPPING; 706 vnet_stop_resources(vnetp); 707 vnetp->flags &= ~VNET_STOPPING; 708 } 709 RW_EXIT(&vnetp->vrwlock); 710 711 DBG1(vnetp, "exit\n"); 712 } 713 714 /* set the unicast mac address of the device */ 715 static int 716 vnet_m_unicst(void *arg, const uint8_t *macaddr) 717 { 718 _NOTE(ARGUNUSED(macaddr)) 719 720 vnet_t *vnetp = arg; 721 722 DBG1(vnetp, "enter\n"); 723 /* 724 * NOTE: setting mac address dynamically is not supported. 725 */ 726 DBG1(vnetp, "exit\n"); 727 728 return (VNET_FAILURE); 729 } 730 731 /* enable/disable a multicast address */ 732 static int 733 vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca) 734 { 735 _NOTE(ARGUNUSED(add, mca)) 736 737 vnet_t *vnetp = arg; 738 vnet_res_t *vresp; 739 mac_register_t *macp; 740 mac_callbacks_t *cbp; 741 int rv = VNET_SUCCESS; 742 743 DBG1(vnetp, "enter\n"); 744 745 READ_ENTER(&vnetp->vrwlock); 746 for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) { 747 if (vresp->type == VIO_NET_RES_LDC_SERVICE) { 748 macp = &vresp->macreg; 749 cbp = macp->m_callbacks; 750 rv = cbp->mc_multicst(macp->m_driver, add, mca); 751 } 752 } 753 RW_EXIT(&vnetp->vrwlock); 754 755 DBG1(vnetp, "exit(%d)\n", rv); 756 return (rv); 757 } 758 759 /* set or clear promiscuous mode on the device */ 760 static int 761 vnet_m_promisc(void *arg, boolean_t on) 762 { 763 _NOTE(ARGUNUSED(on)) 764 765 vnet_t *vnetp = arg; 766 DBG1(vnetp, "enter\n"); 767 /* 768 * NOTE: setting promiscuous mode is not supported, just return success. 769 */ 770 DBG1(vnetp, "exit\n"); 771 return (VNET_SUCCESS); 772 } 773 774 /* 775 * Transmit a chain of packets. This function provides switching functionality 776 * based on the destination mac address to reach other guests (within ldoms) or 777 * external hosts. 778 */ 779 mblk_t * 780 vnet_tx_ring_send(void *arg, mblk_t *mp) 781 { 782 vnet_pseudo_tx_ring_t *tx_ringp; 783 vnet_t *vnetp; 784 vnet_res_t *vresp; 785 mblk_t *next; 786 mblk_t *resid_mp; 787 mac_register_t *macp; 788 struct ether_header *ehp; 789 boolean_t is_unicast; 790 boolean_t is_pvid; /* non-default pvid ? */ 791 boolean_t hres; /* Hybrid resource ? */ 792 void *tx_arg; 793 794 tx_ringp = (vnet_pseudo_tx_ring_t *)arg; 795 vnetp = (vnet_t *)tx_ringp->vnetp; 796 DBG1(vnetp, "enter\n"); 797 ASSERT(mp != NULL); 798 799 is_pvid = (vnetp->pvid != vnetp->default_vlan_id) ? B_TRUE : B_FALSE; 800 801 while (mp != NULL) { 802 803 next = mp->b_next; 804 mp->b_next = NULL; 805 806 /* 807 * Find fdb entry for the destination 808 * and hold a reference to it. 809 */ 810 ehp = (struct ether_header *)mp->b_rptr; 811 vresp = vnet_fdbe_find(vnetp, &ehp->ether_dhost); 812 if (vresp != NULL) { 813 814 /* 815 * Destination found in FDB. 816 * The destination is a vnet device within ldoms 817 * and directly reachable, invoke the tx function 818 * in the fdb entry. 819 */ 820 macp = &vresp->macreg; 821 resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp); 822 823 /* tx done; now release ref on fdb entry */ 824 VNET_FDBE_REFRELE(vresp); 825 826 if (resid_mp != NULL) { 827 /* m_tx failed */ 828 mp->b_next = next; 829 break; 830 } 831 } else { 832 is_unicast = !(IS_BROADCAST(ehp) || 833 (IS_MULTICAST(ehp))); 834 /* 835 * Destination is not in FDB. 836 * If the destination is broadcast or multicast, 837 * then forward the packet to vswitch. 838 * If a Hybrid resource avilable, then send the 839 * unicast packet via hybrid resource, otherwise 840 * forward it to vswitch. 841 */ 842 READ_ENTER(&vnetp->vsw_fp_rw); 843 844 if ((is_unicast) && (vnetp->hio_fp != NULL)) { 845 vresp = vnetp->hio_fp; 846 hres = B_TRUE; 847 } else { 848 vresp = vnetp->vsw_fp; 849 hres = B_FALSE; 850 } 851 if (vresp == NULL) { 852 /* 853 * no fdb entry to vsw? drop the packet. 854 */ 855 RW_EXIT(&vnetp->vsw_fp_rw); 856 freemsg(mp); 857 mp = next; 858 continue; 859 } 860 861 /* ref hold the fdb entry to vsw */ 862 VNET_FDBE_REFHOLD(vresp); 863 864 RW_EXIT(&vnetp->vsw_fp_rw); 865 866 /* 867 * In the case of a hybrid resource we need to insert 868 * the tag for the pvid case here; unlike packets that 869 * are destined to a vnet/vsw in which case the vgen 870 * layer does the tagging before sending it over ldc. 871 */ 872 if (hres == B_TRUE) { 873 /* 874 * Determine if the frame being transmitted 875 * over the hybrid resource is untagged. If so, 876 * insert the tag before transmitting. 877 */ 878 if (is_pvid == B_TRUE && 879 ehp->ether_type != htons(ETHERTYPE_VLAN)) { 880 881 mp = vnet_vlan_insert_tag(mp, 882 vnetp->pvid); 883 if (mp == NULL) { 884 VNET_FDBE_REFRELE(vresp); 885 mp = next; 886 continue; 887 } 888 889 } 890 891 macp = &vresp->macreg; 892 tx_arg = tx_ringp; 893 } else { 894 macp = &vresp->macreg; 895 tx_arg = macp->m_driver; 896 } 897 resid_mp = macp->m_callbacks->mc_tx(tx_arg, mp); 898 899 /* tx done; now release ref on fdb entry */ 900 VNET_FDBE_REFRELE(vresp); 901 902 if (resid_mp != NULL) { 903 /* m_tx failed */ 904 mp->b_next = next; 905 break; 906 } 907 } 908 909 mp = next; 910 } 911 912 DBG1(vnetp, "exit\n"); 913 return (mp); 914 } 915 916 /* get statistics from the device */ 917 int 918 vnet_m_stat(void *arg, uint_t stat, uint64_t *val) 919 { 920 vnet_t *vnetp = arg; 921 vnet_res_t *vresp; 922 mac_register_t *macp; 923 mac_callbacks_t *cbp; 924 uint64_t val_total = 0; 925 926 DBG1(vnetp, "enter\n"); 927 928 /* 929 * get the specified statistic from each transport and return the 930 * aggregate val. This obviously only works for counters. 931 */ 932 if ((IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat)) || 933 (IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat))) { 934 return (ENOTSUP); 935 } 936 937 READ_ENTER(&vnetp->vrwlock); 938 for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) { 939 macp = &vresp->macreg; 940 cbp = macp->m_callbacks; 941 if (cbp->mc_getstat(macp->m_driver, stat, val) == 0) 942 val_total += *val; 943 } 944 RW_EXIT(&vnetp->vrwlock); 945 946 *val = val_total; 947 948 DBG1(vnetp, "exit\n"); 949 return (0); 950 } 951 952 static void 953 vnet_ring_grp_init(vnet_t *vnetp) 954 { 955 vnet_pseudo_rx_group_t *rx_grp; 956 vnet_pseudo_rx_ring_t *rx_ringp; 957 vnet_pseudo_tx_group_t *tx_grp; 958 vnet_pseudo_tx_ring_t *tx_ringp; 959 int i; 960 961 tx_grp = &vnetp->tx_grp[0]; 962 tx_ringp = kmem_zalloc(sizeof (vnet_pseudo_tx_ring_t) * 963 VNET_NUM_PSEUDO_TXRINGS, KM_SLEEP); 964 for (i = 0; i < VNET_NUM_PSEUDO_TXRINGS; i++) { 965 tx_ringp[i].state |= VNET_TXRING_SHARED; 966 } 967 tx_grp->rings = tx_ringp; 968 tx_grp->ring_cnt = VNET_NUM_PSEUDO_TXRINGS; 969 970 rx_grp = &vnetp->rx_grp[0]; 971 rx_grp->max_ring_cnt = MAX_RINGS_PER_GROUP; 972 rw_init(&rx_grp->lock, NULL, RW_DRIVER, NULL); 973 rx_ringp = kmem_zalloc(sizeof (vnet_pseudo_rx_ring_t) * 974 rx_grp->max_ring_cnt, KM_SLEEP); 975 976 /* 977 * Setup the first 3 Pseudo RX Rings that are reserved; 978 * 1 for LDC resource to vswitch + 2 for RX rings of Hybrid resource. 979 */ 980 rx_ringp[0].state |= VNET_RXRING_INUSE|VNET_RXRING_LDC_SERVICE; 981 rx_ringp[0].index = 0; 982 rx_ringp[1].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID; 983 rx_ringp[1].index = 1; 984 rx_ringp[2].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID; 985 rx_ringp[2].index = 2; 986 987 rx_grp->ring_cnt = VNET_NUM_PSEUDO_RXRINGS_DEFAULT; 988 rx_grp->rings = rx_ringp; 989 990 for (i = VNET_NUM_PSEUDO_RXRINGS_DEFAULT; 991 i < rx_grp->max_ring_cnt; i++) { 992 rx_ringp = &rx_grp->rings[i]; 993 rx_ringp->state = VNET_RXRING_FREE; 994 rx_ringp->index = i; 995 } 996 } 997 998 static void 999 vnet_ring_grp_uninit(vnet_t *vnetp) 1000 { 1001 vnet_pseudo_rx_group_t *rx_grp; 1002 vnet_pseudo_tx_group_t *tx_grp; 1003 1004 tx_grp = &vnetp->tx_grp[0]; 1005 if (tx_grp->rings != NULL) { 1006 ASSERT(tx_grp->ring_cnt == VNET_NUM_PSEUDO_TXRINGS); 1007 kmem_free(tx_grp->rings, sizeof (vnet_pseudo_tx_ring_t) * 1008 tx_grp->ring_cnt); 1009 tx_grp->rings = NULL; 1010 } 1011 1012 rx_grp = &vnetp->rx_grp[0]; 1013 if (rx_grp->rings != NULL) { 1014 ASSERT(rx_grp->max_ring_cnt == MAX_RINGS_PER_GROUP); 1015 ASSERT(rx_grp->ring_cnt == VNET_NUM_PSEUDO_RXRINGS_DEFAULT); 1016 kmem_free(rx_grp->rings, sizeof (vnet_pseudo_rx_ring_t) * 1017 rx_grp->max_ring_cnt); 1018 rx_grp->rings = NULL; 1019 } 1020 } 1021 1022 static vnet_pseudo_rx_ring_t * 1023 vnet_alloc_pseudo_rx_ring(vnet_t *vnetp) 1024 { 1025 vnet_pseudo_rx_group_t *rx_grp; 1026 vnet_pseudo_rx_ring_t *rx_ringp; 1027 int index; 1028 1029 rx_grp = &vnetp->rx_grp[0]; 1030 WRITE_ENTER(&rx_grp->lock); 1031 1032 if (rx_grp->ring_cnt == rx_grp->max_ring_cnt) { 1033 /* no rings available */ 1034 RW_EXIT(&rx_grp->lock); 1035 return (NULL); 1036 } 1037 1038 for (index = VNET_NUM_PSEUDO_RXRINGS_DEFAULT; 1039 index < rx_grp->max_ring_cnt; index++) { 1040 rx_ringp = &rx_grp->rings[index]; 1041 if (rx_ringp->state == VNET_RXRING_FREE) { 1042 rx_ringp->state |= VNET_RXRING_INUSE; 1043 rx_grp->ring_cnt++; 1044 break; 1045 } 1046 } 1047 1048 RW_EXIT(&rx_grp->lock); 1049 return (rx_ringp); 1050 } 1051 1052 static void 1053 vnet_free_pseudo_rx_ring(vnet_t *vnetp, vnet_pseudo_rx_ring_t *ringp) 1054 { 1055 vnet_pseudo_rx_group_t *rx_grp; 1056 1057 ASSERT(ringp->index >= VNET_NUM_PSEUDO_RXRINGS_DEFAULT); 1058 rx_grp = &vnetp->rx_grp[0]; 1059 WRITE_ENTER(&rx_grp->lock); 1060 1061 if (ringp->state != VNET_RXRING_FREE) { 1062 ringp->state = VNET_RXRING_FREE; 1063 ringp->handle = NULL; 1064 rx_grp->ring_cnt--; 1065 } 1066 1067 RW_EXIT(&rx_grp->lock); 1068 } 1069 1070 /* wrapper function for mac_register() */ 1071 static int 1072 vnet_mac_register(vnet_t *vnetp) 1073 { 1074 mac_register_t *macp; 1075 int err; 1076 1077 if ((macp = mac_alloc(MAC_VERSION)) == NULL) 1078 return (DDI_FAILURE); 1079 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 1080 macp->m_driver = vnetp; 1081 macp->m_dip = vnetp->dip; 1082 macp->m_src_addr = vnetp->curr_macaddr; 1083 macp->m_callbacks = &vnet_m_callbacks; 1084 macp->m_min_sdu = 0; 1085 macp->m_max_sdu = vnetp->mtu; 1086 macp->m_margin = VLAN_TAGSZ; 1087 1088 /* 1089 * MAC_VIRT_SERIALIZE flag is needed while hybridIO is enabled to 1090 * workaround tx lock contention issues in nxge. 1091 */ 1092 macp->m_v12n = MAC_VIRT_LEVEL1; 1093 if (vnet_mac_tx_serialize == B_TRUE) { 1094 macp->m_v12n |= MAC_VIRT_SERIALIZE; 1095 } 1096 1097 /* 1098 * Finally, we're ready to register ourselves with the MAC layer 1099 * interface; if this succeeds, we're all ready to start() 1100 */ 1101 err = mac_register(macp, &vnetp->mh); 1102 mac_free(macp); 1103 return (err == 0 ? DDI_SUCCESS : DDI_FAILURE); 1104 } 1105 1106 /* read the mac address of the device */ 1107 static int 1108 vnet_read_mac_address(vnet_t *vnetp) 1109 { 1110 uchar_t *macaddr; 1111 uint32_t size; 1112 int rv; 1113 1114 rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip, 1115 DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size); 1116 if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) { 1117 DWARN(vnetp, "prop_lookup failed(%s) err(%d)\n", 1118 macaddr_propname, rv); 1119 return (DDI_FAILURE); 1120 } 1121 bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL); 1122 bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL); 1123 ddi_prop_free(macaddr); 1124 1125 return (DDI_SUCCESS); 1126 } 1127 1128 static void 1129 vnet_fdb_create(vnet_t *vnetp) 1130 { 1131 char hashname[MAXNAMELEN]; 1132 1133 (void) snprintf(hashname, MAXNAMELEN, "vnet%d-fdbhash", 1134 vnetp->instance); 1135 vnetp->fdb_nchains = vnet_fdb_nchains; 1136 vnetp->fdb_hashp = mod_hash_create_ptrhash(hashname, vnetp->fdb_nchains, 1137 mod_hash_null_valdtor, sizeof (void *)); 1138 } 1139 1140 static void 1141 vnet_fdb_destroy(vnet_t *vnetp) 1142 { 1143 /* destroy fdb-hash-table */ 1144 if (vnetp->fdb_hashp != NULL) { 1145 mod_hash_destroy_hash(vnetp->fdb_hashp); 1146 vnetp->fdb_hashp = NULL; 1147 vnetp->fdb_nchains = 0; 1148 } 1149 } 1150 1151 /* 1152 * Add an entry into the fdb. 1153 */ 1154 void 1155 vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp) 1156 { 1157 uint64_t addr = 0; 1158 int rv; 1159 1160 KEY_HASH(addr, vresp->rem_macaddr); 1161 1162 /* 1163 * If the entry being added corresponds to LDC_SERVICE resource, 1164 * that is, vswitch connection, it is added to the hash and also 1165 * the entry is cached, an additional reference count reflects 1166 * this. The HYBRID resource is not added to the hash, but only 1167 * cached, as it is only used for sending out packets for unknown 1168 * unicast destinations. 1169 */ 1170 (vresp->type == VIO_NET_RES_LDC_SERVICE) ? 1171 (vresp->refcnt = 1) : (vresp->refcnt = 0); 1172 1173 /* 1174 * Note: duplicate keys will be rejected by mod_hash. 1175 */ 1176 if (vresp->type != VIO_NET_RES_HYBRID) { 1177 rv = mod_hash_insert(vnetp->fdb_hashp, (mod_hash_key_t)addr, 1178 (mod_hash_val_t)vresp); 1179 if (rv != 0) { 1180 DWARN(vnetp, "Duplicate macaddr key(%lx)\n", addr); 1181 return; 1182 } 1183 } 1184 1185 if (vresp->type == VIO_NET_RES_LDC_SERVICE) { 1186 /* Cache the fdb entry to vsw-port */ 1187 WRITE_ENTER(&vnetp->vsw_fp_rw); 1188 if (vnetp->vsw_fp == NULL) 1189 vnetp->vsw_fp = vresp; 1190 RW_EXIT(&vnetp->vsw_fp_rw); 1191 } else if (vresp->type == VIO_NET_RES_HYBRID) { 1192 /* Cache the fdb entry to hybrid resource */ 1193 WRITE_ENTER(&vnetp->vsw_fp_rw); 1194 if (vnetp->hio_fp == NULL) 1195 vnetp->hio_fp = vresp; 1196 RW_EXIT(&vnetp->vsw_fp_rw); 1197 } 1198 } 1199 1200 /* 1201 * Remove an entry from fdb. 1202 */ 1203 static void 1204 vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp) 1205 { 1206 uint64_t addr = 0; 1207 int rv; 1208 uint32_t refcnt; 1209 vnet_res_t *tmp; 1210 1211 KEY_HASH(addr, vresp->rem_macaddr); 1212 1213 /* 1214 * Remove the entry from fdb hash table. 1215 * This prevents further references to this fdb entry. 1216 */ 1217 if (vresp->type != VIO_NET_RES_HYBRID) { 1218 rv = mod_hash_remove(vnetp->fdb_hashp, (mod_hash_key_t)addr, 1219 (mod_hash_val_t *)&tmp); 1220 if (rv != 0) { 1221 /* 1222 * As the resources are added to the hash only 1223 * after they are started, this can occur if 1224 * a resource unregisters before it is ever started. 1225 */ 1226 return; 1227 } 1228 } 1229 1230 if (vresp->type == VIO_NET_RES_LDC_SERVICE) { 1231 WRITE_ENTER(&vnetp->vsw_fp_rw); 1232 1233 ASSERT(tmp == vnetp->vsw_fp); 1234 vnetp->vsw_fp = NULL; 1235 1236 RW_EXIT(&vnetp->vsw_fp_rw); 1237 } else if (vresp->type == VIO_NET_RES_HYBRID) { 1238 WRITE_ENTER(&vnetp->vsw_fp_rw); 1239 1240 vnetp->hio_fp = NULL; 1241 1242 RW_EXIT(&vnetp->vsw_fp_rw); 1243 } 1244 1245 /* 1246 * If there are threads already ref holding before the entry was 1247 * removed from hash table, then wait for ref count to drop to zero. 1248 */ 1249 (vresp->type == VIO_NET_RES_LDC_SERVICE) ? 1250 (refcnt = 1) : (refcnt = 0); 1251 while (vresp->refcnt > refcnt) { 1252 delay(drv_usectohz(vnet_fdbe_refcnt_delay)); 1253 } 1254 } 1255 1256 /* 1257 * Search fdb for a given mac address. If an entry is found, hold 1258 * a reference to it and return the entry; else returns NULL. 1259 */ 1260 static vnet_res_t * 1261 vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp) 1262 { 1263 uint64_t key = 0; 1264 vnet_res_t *vresp; 1265 int rv; 1266 1267 KEY_HASH(key, addrp->ether_addr_octet); 1268 1269 rv = mod_hash_find_cb(vnetp->fdb_hashp, (mod_hash_key_t)key, 1270 (mod_hash_val_t *)&vresp, vnet_fdbe_find_cb); 1271 1272 if (rv != 0) 1273 return (NULL); 1274 1275 return (vresp); 1276 } 1277 1278 /* 1279 * Callback function provided to mod_hash_find_cb(). After finding the fdb 1280 * entry corresponding to the key (macaddr), this callback will be invoked by 1281 * mod_hash_find_cb() to atomically increment the reference count on the fdb 1282 * entry before returning the found entry. 1283 */ 1284 static void 1285 vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val) 1286 { 1287 _NOTE(ARGUNUSED(key)) 1288 VNET_FDBE_REFHOLD((vnet_res_t *)val); 1289 } 1290 1291 /* 1292 * Frames received that are tagged with the pvid of the vnet device must be 1293 * untagged before sending up the stack. This function walks the chain of rx 1294 * frames, untags any such frames and returns the updated chain. 1295 * 1296 * Arguments: 1297 * pvid: pvid of the vnet device for which packets are being received 1298 * mp: head of pkt chain to be validated and untagged 1299 * 1300 * Returns: 1301 * mp: head of updated chain of packets 1302 */ 1303 static void 1304 vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp) 1305 { 1306 struct ether_vlan_header *evhp; 1307 mblk_t *bp; 1308 mblk_t *bpt; 1309 mblk_t *bph; 1310 mblk_t *bpn; 1311 1312 bpn = bph = bpt = NULL; 1313 1314 for (bp = *mp; bp != NULL; bp = bpn) { 1315 1316 bpn = bp->b_next; 1317 bp->b_next = bp->b_prev = NULL; 1318 1319 evhp = (struct ether_vlan_header *)bp->b_rptr; 1320 1321 if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN && 1322 VLAN_ID(ntohs(evhp->ether_tci)) == pvid) { 1323 1324 bp = vnet_vlan_remove_tag(bp); 1325 if (bp == NULL) { 1326 continue; 1327 } 1328 1329 } 1330 1331 /* build a chain of processed packets */ 1332 if (bph == NULL) { 1333 bph = bpt = bp; 1334 } else { 1335 bpt->b_next = bp; 1336 bpt = bp; 1337 } 1338 1339 } 1340 1341 *mp = bph; 1342 } 1343 1344 static void 1345 vnet_rx(vio_net_handle_t vrh, mblk_t *mp) 1346 { 1347 vnet_res_t *vresp = (vnet_res_t *)vrh; 1348 vnet_t *vnetp = vresp->vnetp; 1349 vnet_pseudo_rx_ring_t *ringp; 1350 1351 if ((vnetp == NULL) || (vnetp->mh == 0)) { 1352 freemsgchain(mp); 1353 return; 1354 } 1355 1356 ringp = vresp->rx_ringp; 1357 mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num); 1358 } 1359 1360 void 1361 vnet_tx_update(vio_net_handle_t vrh) 1362 { 1363 vnet_res_t *vresp = (vnet_res_t *)vrh; 1364 vnet_t *vnetp = vresp->vnetp; 1365 vnet_pseudo_tx_ring_t *tx_ringp; 1366 vnet_pseudo_tx_group_t *tx_grp; 1367 int i; 1368 1369 if (vnetp == NULL || vnetp->mh == NULL) { 1370 return; 1371 } 1372 1373 /* 1374 * Currently, the tx hwring API (used to access rings that belong to 1375 * a Hybrid IO resource) does not provide us a per ring flow ctrl 1376 * update; also the pseudo rings are shared by the ports/ldcs in the 1377 * vgen layer. Thus we can't figure out which pseudo ring is being 1378 * re-enabled for transmits. To work around this, when we get a tx 1379 * restart notification from below, we simply propagate that to all 1380 * the tx pseudo rings registered with the mac layer above. 1381 * 1382 * There are a couple of side effects with this approach, but they are 1383 * not harmful, as outlined below: 1384 * 1385 * A) We might send an invalid ring_update() for a ring that is not 1386 * really flow controlled. This will not have any effect in the mac 1387 * layer and packets will continue to be transmitted on that ring. 1388 * 1389 * B) We might end up clearing the flow control in the mac layer for 1390 * a ring that is still flow controlled in the underlying resource. 1391 * This will result in the mac layer restarting transmit, only to be 1392 * flow controlled again on that ring. 1393 */ 1394 tx_grp = &vnetp->tx_grp[0]; 1395 for (i = 0; i < tx_grp->ring_cnt; i++) { 1396 tx_ringp = &tx_grp->rings[i]; 1397 mac_tx_ring_update(vnetp->mh, tx_ringp->handle); 1398 } 1399 } 1400 1401 /* 1402 * Update the new mtu of vnet into the mac layer. First check if the device has 1403 * been plumbed and if so fail the mtu update. Returns 0 on success. 1404 */ 1405 int 1406 vnet_mtu_update(vnet_t *vnetp, uint32_t mtu) 1407 { 1408 int rv; 1409 1410 if (vnetp == NULL || vnetp->mh == NULL) { 1411 return (EINVAL); 1412 } 1413 1414 WRITE_ENTER(&vnetp->vrwlock); 1415 1416 if (vnetp->flags & VNET_STARTED) { 1417 RW_EXIT(&vnetp->vrwlock); 1418 cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu " 1419 "update as the device is plumbed\n", 1420 vnetp->instance); 1421 return (EBUSY); 1422 } 1423 1424 /* update mtu in the mac layer */ 1425 rv = mac_maxsdu_update(vnetp->mh, mtu); 1426 if (rv != 0) { 1427 RW_EXIT(&vnetp->vrwlock); 1428 cmn_err(CE_NOTE, 1429 "!vnet%d: Unable to update mtu with mac layer\n", 1430 vnetp->instance); 1431 return (EIO); 1432 } 1433 1434 vnetp->mtu = mtu; 1435 1436 RW_EXIT(&vnetp->vrwlock); 1437 1438 return (0); 1439 } 1440 1441 /* 1442 * Update the link state of vnet to the mac layer. 1443 */ 1444 void 1445 vnet_link_update(vnet_t *vnetp, link_state_t link_state) 1446 { 1447 if (vnetp == NULL || vnetp->mh == NULL) { 1448 return; 1449 } 1450 1451 WRITE_ENTER(&vnetp->vrwlock); 1452 if (vnetp->link_state == link_state) { 1453 RW_EXIT(&vnetp->vrwlock); 1454 return; 1455 } 1456 vnetp->link_state = link_state; 1457 RW_EXIT(&vnetp->vrwlock); 1458 1459 mac_link_update(vnetp->mh, link_state); 1460 } 1461 1462 /* 1463 * vio_net_resource_reg -- An interface called to register a resource 1464 * with vnet. 1465 * macp -- a GLDv3 mac_register that has all the details of 1466 * a resource and its callbacks etc. 1467 * type -- resource type. 1468 * local_macaddr -- resource's MAC address. This is used to 1469 * associate a resource with a corresponding vnet. 1470 * remote_macaddr -- remote side MAC address. This is ignored for 1471 * the Hybrid resources. 1472 * vhp -- A handle returned to the caller. 1473 * vcb -- A set of callbacks provided to the callers. 1474 */ 1475 int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type, 1476 ether_addr_t local_macaddr, ether_addr_t rem_macaddr, vio_net_handle_t *vhp, 1477 vio_net_callbacks_t *vcb) 1478 { 1479 vnet_t *vnetp; 1480 vnet_res_t *vresp; 1481 1482 vresp = kmem_zalloc(sizeof (vnet_res_t), KM_SLEEP); 1483 ether_copy(local_macaddr, vresp->local_macaddr); 1484 ether_copy(rem_macaddr, vresp->rem_macaddr); 1485 vresp->type = type; 1486 bcopy(macp, &vresp->macreg, sizeof (mac_register_t)); 1487 1488 DBG1(NULL, "Resource Registerig type=0%X\n", type); 1489 1490 READ_ENTER(&vnet_rw); 1491 vnetp = vnet_headp; 1492 while (vnetp != NULL) { 1493 if (VNET_MATCH_RES(vresp, vnetp)) { 1494 vresp->vnetp = vnetp; 1495 1496 /* Setup kstats for hio resource */ 1497 if (vresp->type == VIO_NET_RES_HYBRID) { 1498 vresp->ksp = vnet_hio_setup_kstats(DRV_NAME, 1499 "hio", vresp); 1500 if (vresp->ksp == NULL) { 1501 cmn_err(CE_NOTE, "!vnet%d: Cannot " 1502 "create kstats for hio resource", 1503 vnetp->instance); 1504 } 1505 } 1506 vnet_add_resource(vnetp, vresp); 1507 break; 1508 } 1509 vnetp = vnetp->nextp; 1510 } 1511 RW_EXIT(&vnet_rw); 1512 if (vresp->vnetp == NULL) { 1513 DWARN(NULL, "No vnet instance"); 1514 kmem_free(vresp, sizeof (vnet_res_t)); 1515 return (ENXIO); 1516 } 1517 1518 *vhp = vresp; 1519 vcb->vio_net_rx_cb = vnet_rx; 1520 vcb->vio_net_tx_update = vnet_tx_update; 1521 vcb->vio_net_report_err = vnet_handle_res_err; 1522 1523 /* Bind the resource to pseudo ring(s) */ 1524 if (vnet_bind_rings(vresp) != 0) { 1525 (void) vnet_rem_resource(vnetp, vresp); 1526 vnet_hio_destroy_kstats(vresp->ksp); 1527 KMEM_FREE(vresp); 1528 return (1); 1529 } 1530 1531 /* Dispatch a task to start resources */ 1532 vnet_dispatch_res_task(vnetp); 1533 return (0); 1534 } 1535 1536 /* 1537 * vio_net_resource_unreg -- An interface to unregister a resource. 1538 */ 1539 void 1540 vio_net_resource_unreg(vio_net_handle_t vhp) 1541 { 1542 vnet_res_t *vresp = (vnet_res_t *)vhp; 1543 vnet_t *vnetp = vresp->vnetp; 1544 1545 DBG1(NULL, "Resource Registerig hdl=0x%p", vhp); 1546 1547 ASSERT(vnetp != NULL); 1548 /* 1549 * Remove the resource from fdb; this ensures 1550 * there are no references to the resource. 1551 */ 1552 vnet_fdbe_del(vnetp, vresp); 1553 1554 vnet_unbind_rings(vresp); 1555 1556 /* Now remove the resource from the list */ 1557 (void) vnet_rem_resource(vnetp, vresp); 1558 1559 vnet_hio_destroy_kstats(vresp->ksp); 1560 KMEM_FREE(vresp); 1561 } 1562 1563 static void 1564 vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp) 1565 { 1566 WRITE_ENTER(&vnetp->vrwlock); 1567 vresp->nextp = vnetp->vres_list; 1568 vnetp->vres_list = vresp; 1569 RW_EXIT(&vnetp->vrwlock); 1570 } 1571 1572 static vnet_res_t * 1573 vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp) 1574 { 1575 vnet_res_t *vrp; 1576 1577 WRITE_ENTER(&vnetp->vrwlock); 1578 if (vresp == vnetp->vres_list) { 1579 vnetp->vres_list = vresp->nextp; 1580 } else { 1581 vrp = vnetp->vres_list; 1582 while (vrp->nextp != NULL) { 1583 if (vrp->nextp == vresp) { 1584 vrp->nextp = vresp->nextp; 1585 break; 1586 } 1587 vrp = vrp->nextp; 1588 } 1589 } 1590 vresp->vnetp = NULL; 1591 vresp->nextp = NULL; 1592 1593 RW_EXIT(&vnetp->vrwlock); 1594 1595 return (vresp); 1596 } 1597 1598 /* 1599 * vnet_dds_rx -- an interface called by vgen to DDS messages. 1600 */ 1601 void 1602 vnet_dds_rx(void *arg, void *dmsg) 1603 { 1604 vnet_t *vnetp = arg; 1605 vdds_process_dds_msg(vnetp, dmsg); 1606 } 1607 1608 /* 1609 * vnet_send_dds_msg -- An interface provided to DDS to send 1610 * DDS messages. This simply sends meessages via vgen. 1611 */ 1612 int 1613 vnet_send_dds_msg(vnet_t *vnetp, void *dmsg) 1614 { 1615 int rv; 1616 1617 if (vnetp->vgenhdl != NULL) { 1618 rv = vgen_dds_tx(vnetp->vgenhdl, dmsg); 1619 } 1620 return (rv); 1621 } 1622 1623 /* 1624 * vnet_cleanup_hio -- an interface called by vgen to cleanup hio resources. 1625 */ 1626 void 1627 vnet_dds_cleanup_hio(vnet_t *vnetp) 1628 { 1629 vdds_cleanup_hio(vnetp); 1630 } 1631 1632 /* 1633 * vnet_handle_res_err -- A callback function called by a resource 1634 * to report an error. For example, vgen can call to report 1635 * an LDC down/reset event. This will trigger cleanup of associated 1636 * Hybrid resource. 1637 */ 1638 /* ARGSUSED */ 1639 static void 1640 vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err) 1641 { 1642 vnet_res_t *vresp = (vnet_res_t *)vrh; 1643 vnet_t *vnetp = vresp->vnetp; 1644 1645 if (vnetp == NULL) { 1646 return; 1647 } 1648 if ((vresp->type != VIO_NET_RES_LDC_SERVICE) && 1649 (vresp->type != VIO_NET_RES_HYBRID)) { 1650 return; 1651 } 1652 1653 vdds_cleanup_hio(vnetp); 1654 } 1655 1656 /* 1657 * vnet_dispatch_res_task -- A function to dispatch tasks start resources. 1658 */ 1659 static void 1660 vnet_dispatch_res_task(vnet_t *vnetp) 1661 { 1662 int rv; 1663 1664 /* 1665 * Dispatch the task. It could be the case that vnetp->flags does 1666 * not have VNET_STARTED set. This is ok as vnet_rest_start_task() 1667 * can abort the task when the task is started. See related comments 1668 * in vnet_m_stop() and vnet_stop_resources(). 1669 */ 1670 rv = ddi_taskq_dispatch(vnetp->taskqp, vnet_res_start_task, 1671 vnetp, DDI_NOSLEEP); 1672 if (rv != DDI_SUCCESS) { 1673 cmn_err(CE_WARN, 1674 "vnet%d:Can't dispatch start resource task", 1675 vnetp->instance); 1676 } 1677 } 1678 1679 /* 1680 * vnet_res_start_task -- A taskq callback function that starts a resource. 1681 */ 1682 static void 1683 vnet_res_start_task(void *arg) 1684 { 1685 vnet_t *vnetp = arg; 1686 1687 WRITE_ENTER(&vnetp->vrwlock); 1688 if (vnetp->flags & VNET_STARTED) { 1689 vnet_start_resources(vnetp); 1690 } 1691 RW_EXIT(&vnetp->vrwlock); 1692 } 1693 1694 /* 1695 * vnet_start_resources -- starts all resources associated with 1696 * a vnet. 1697 */ 1698 static void 1699 vnet_start_resources(vnet_t *vnetp) 1700 { 1701 mac_register_t *macp; 1702 mac_callbacks_t *cbp; 1703 vnet_res_t *vresp; 1704 int rv; 1705 1706 DBG1(vnetp, "enter\n"); 1707 1708 ASSERT(RW_WRITE_HELD(&vnetp->vrwlock)); 1709 1710 for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) { 1711 /* skip if it is already started */ 1712 if (vresp->flags & VNET_STARTED) { 1713 continue; 1714 } 1715 macp = &vresp->macreg; 1716 cbp = macp->m_callbacks; 1717 rv = cbp->mc_start(macp->m_driver); 1718 if (rv == 0) { 1719 /* 1720 * Successfully started the resource, so now 1721 * add it to the fdb. 1722 */ 1723 vresp->flags |= VNET_STARTED; 1724 vnet_fdbe_add(vnetp, vresp); 1725 } 1726 } 1727 1728 DBG1(vnetp, "exit\n"); 1729 1730 } 1731 1732 /* 1733 * vnet_stop_resources -- stop all resources associated with a vnet. 1734 */ 1735 static void 1736 vnet_stop_resources(vnet_t *vnetp) 1737 { 1738 vnet_res_t *vresp; 1739 mac_register_t *macp; 1740 mac_callbacks_t *cbp; 1741 1742 DBG1(vnetp, "enter\n"); 1743 1744 ASSERT(RW_WRITE_HELD(&vnetp->vrwlock)); 1745 1746 for (vresp = vnetp->vres_list; vresp != NULL; ) { 1747 if (vresp->flags & VNET_STARTED) { 1748 /* 1749 * Release the lock while invoking mc_stop() of the 1750 * underlying resource. We hold a reference to this 1751 * resource to prevent being removed from the list in 1752 * vio_net_resource_unreg(). Note that new resources 1753 * can be added to the head of the list while the lock 1754 * is released, but they won't be started, as 1755 * VNET_STARTED flag has been cleared for the vnet 1756 * device in vnet_m_stop(). Also, while the lock is 1757 * released a resource could be removed from the list 1758 * in vio_net_resource_unreg(); but that is ok, as we 1759 * re-acquire the lock and only then access the forward 1760 * link (vresp->nextp) to continue with the next 1761 * resource. 1762 */ 1763 vresp->flags &= ~VNET_STARTED; 1764 vresp->flags |= VNET_STOPPING; 1765 macp = &vresp->macreg; 1766 cbp = macp->m_callbacks; 1767 VNET_FDBE_REFHOLD(vresp); 1768 RW_EXIT(&vnetp->vrwlock); 1769 1770 cbp->mc_stop(macp->m_driver); 1771 1772 WRITE_ENTER(&vnetp->vrwlock); 1773 vresp->flags &= ~VNET_STOPPING; 1774 VNET_FDBE_REFRELE(vresp); 1775 } 1776 vresp = vresp->nextp; 1777 } 1778 DBG1(vnetp, "exit\n"); 1779 } 1780 1781 /* 1782 * Setup kstats for the HIO statistics. 1783 * NOTE: the synchronization for the statistics is the 1784 * responsibility of the caller. 1785 */ 1786 kstat_t * 1787 vnet_hio_setup_kstats(char *ks_mod, char *ks_name, vnet_res_t *vresp) 1788 { 1789 kstat_t *ksp; 1790 vnet_t *vnetp = vresp->vnetp; 1791 vnet_hio_kstats_t *hiokp; 1792 size_t size; 1793 1794 ASSERT(vnetp != NULL); 1795 size = sizeof (vnet_hio_kstats_t) / sizeof (kstat_named_t); 1796 ksp = kstat_create(ks_mod, vnetp->instance, ks_name, "net", 1797 KSTAT_TYPE_NAMED, size, 0); 1798 if (ksp == NULL) { 1799 return (NULL); 1800 } 1801 1802 hiokp = (vnet_hio_kstats_t *)ksp->ks_data; 1803 kstat_named_init(&hiokp->ipackets, "ipackets", 1804 KSTAT_DATA_ULONG); 1805 kstat_named_init(&hiokp->ierrors, "ierrors", 1806 KSTAT_DATA_ULONG); 1807 kstat_named_init(&hiokp->opackets, "opackets", 1808 KSTAT_DATA_ULONG); 1809 kstat_named_init(&hiokp->oerrors, "oerrors", 1810 KSTAT_DATA_ULONG); 1811 1812 1813 /* MIB II kstat variables */ 1814 kstat_named_init(&hiokp->rbytes, "rbytes", 1815 KSTAT_DATA_ULONG); 1816 kstat_named_init(&hiokp->obytes, "obytes", 1817 KSTAT_DATA_ULONG); 1818 kstat_named_init(&hiokp->multircv, "multircv", 1819 KSTAT_DATA_ULONG); 1820 kstat_named_init(&hiokp->multixmt, "multixmt", 1821 KSTAT_DATA_ULONG); 1822 kstat_named_init(&hiokp->brdcstrcv, "brdcstrcv", 1823 KSTAT_DATA_ULONG); 1824 kstat_named_init(&hiokp->brdcstxmt, "brdcstxmt", 1825 KSTAT_DATA_ULONG); 1826 kstat_named_init(&hiokp->norcvbuf, "norcvbuf", 1827 KSTAT_DATA_ULONG); 1828 kstat_named_init(&hiokp->noxmtbuf, "noxmtbuf", 1829 KSTAT_DATA_ULONG); 1830 1831 ksp->ks_update = vnet_hio_update_kstats; 1832 ksp->ks_private = (void *)vresp; 1833 kstat_install(ksp); 1834 return (ksp); 1835 } 1836 1837 /* 1838 * Destroy kstats. 1839 */ 1840 static void 1841 vnet_hio_destroy_kstats(kstat_t *ksp) 1842 { 1843 if (ksp != NULL) 1844 kstat_delete(ksp); 1845 } 1846 1847 /* 1848 * Update the kstats. 1849 */ 1850 static int 1851 vnet_hio_update_kstats(kstat_t *ksp, int rw) 1852 { 1853 vnet_t *vnetp; 1854 vnet_res_t *vresp; 1855 vnet_hio_stats_t statsp; 1856 vnet_hio_kstats_t *hiokp; 1857 1858 vresp = (vnet_res_t *)ksp->ks_private; 1859 vnetp = vresp->vnetp; 1860 1861 bzero(&statsp, sizeof (vnet_hio_stats_t)); 1862 1863 READ_ENTER(&vnetp->vsw_fp_rw); 1864 if (vnetp->hio_fp == NULL) { 1865 /* not using hio resources, just return */ 1866 RW_EXIT(&vnetp->vsw_fp_rw); 1867 return (0); 1868 } 1869 VNET_FDBE_REFHOLD(vnetp->hio_fp); 1870 RW_EXIT(&vnetp->vsw_fp_rw); 1871 vnet_hio_get_stats(vnetp->hio_fp, &statsp); 1872 VNET_FDBE_REFRELE(vnetp->hio_fp); 1873 1874 hiokp = (vnet_hio_kstats_t *)ksp->ks_data; 1875 1876 if (rw == KSTAT_READ) { 1877 /* Link Input/Output stats */ 1878 hiokp->ipackets.value.ul = (uint32_t)statsp.ipackets; 1879 hiokp->ipackets64.value.ull = statsp.ipackets; 1880 hiokp->ierrors.value.ul = statsp.ierrors; 1881 hiokp->opackets.value.ul = (uint32_t)statsp.opackets; 1882 hiokp->opackets64.value.ull = statsp.opackets; 1883 hiokp->oerrors.value.ul = statsp.oerrors; 1884 1885 /* MIB II kstat variables */ 1886 hiokp->rbytes.value.ul = (uint32_t)statsp.rbytes; 1887 hiokp->rbytes64.value.ull = statsp.rbytes; 1888 hiokp->obytes.value.ul = (uint32_t)statsp.obytes; 1889 hiokp->obytes64.value.ull = statsp.obytes; 1890 hiokp->multircv.value.ul = statsp.multircv; 1891 hiokp->multixmt.value.ul = statsp.multixmt; 1892 hiokp->brdcstrcv.value.ul = statsp.brdcstrcv; 1893 hiokp->brdcstxmt.value.ul = statsp.brdcstxmt; 1894 hiokp->norcvbuf.value.ul = statsp.norcvbuf; 1895 hiokp->noxmtbuf.value.ul = statsp.noxmtbuf; 1896 } else { 1897 return (EACCES); 1898 } 1899 1900 return (0); 1901 } 1902 1903 static void 1904 vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp) 1905 { 1906 mac_register_t *macp; 1907 mac_callbacks_t *cbp; 1908 uint64_t val; 1909 int stat; 1910 1911 /* 1912 * get the specified statistics from the underlying nxge. 1913 */ 1914 macp = &vresp->macreg; 1915 cbp = macp->m_callbacks; 1916 for (stat = MAC_STAT_MIN; stat < MAC_STAT_OVERFLOWS; stat++) { 1917 if (cbp->mc_getstat(macp->m_driver, stat, &val) == 0) { 1918 switch (stat) { 1919 case MAC_STAT_IPACKETS: 1920 statsp->ipackets = val; 1921 break; 1922 1923 case MAC_STAT_IERRORS: 1924 statsp->ierrors = val; 1925 break; 1926 1927 case MAC_STAT_OPACKETS: 1928 statsp->opackets = val; 1929 break; 1930 1931 case MAC_STAT_OERRORS: 1932 statsp->oerrors = val; 1933 break; 1934 1935 case MAC_STAT_RBYTES: 1936 statsp->rbytes = val; 1937 break; 1938 1939 case MAC_STAT_OBYTES: 1940 statsp->obytes = val; 1941 break; 1942 1943 case MAC_STAT_MULTIRCV: 1944 statsp->multircv = val; 1945 break; 1946 1947 case MAC_STAT_MULTIXMT: 1948 statsp->multixmt = val; 1949 break; 1950 1951 case MAC_STAT_BRDCSTRCV: 1952 statsp->brdcstrcv = val; 1953 break; 1954 1955 case MAC_STAT_BRDCSTXMT: 1956 statsp->brdcstxmt = val; 1957 break; 1958 1959 case MAC_STAT_NOXMTBUF: 1960 statsp->noxmtbuf = val; 1961 break; 1962 1963 case MAC_STAT_NORCVBUF: 1964 statsp->norcvbuf = val; 1965 break; 1966 1967 default: 1968 /* 1969 * parameters not interested. 1970 */ 1971 break; 1972 } 1973 } 1974 } 1975 } 1976 1977 static boolean_t 1978 vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data) 1979 { 1980 vnet_t *vnetp = (vnet_t *)arg; 1981 1982 if (vnetp == NULL) { 1983 return (0); 1984 } 1985 1986 switch (cap) { 1987 1988 case MAC_CAPAB_RINGS: { 1989 1990 mac_capab_rings_t *cap_rings = cap_data; 1991 /* 1992 * Rings Capability Notes: 1993 * We advertise rings to make use of the rings framework in 1994 * gldv3 mac layer, to improve the performance. This is 1995 * specifically needed when a Hybrid resource (with multiple 1996 * tx/rx hardware rings) is assigned to a vnet device. We also 1997 * leverage this for the normal case when no Hybrid resource is 1998 * assigned. 1999 * 2000 * Ring Allocation: 2001 * - TX path: 2002 * We expose a pseudo ring group with 2 pseudo tx rings (as 2003 * currently HybridIO exports only 2 rings) In the normal case, 2004 * transmit traffic that comes down to the driver through the 2005 * mri_tx (vnet_tx_ring_send()) entry point goes through the 2006 * distributed switching algorithm in vnet and gets transmitted 2007 * over a port/LDC in the vgen layer to either the vswitch or a 2008 * peer vnet. If and when a Hybrid resource is assigned to the 2009 * vnet, we obtain the tx ring information of the Hybrid device 2010 * (nxge) and map the pseudo rings 1:1 to the 2 hw tx rings. 2011 * Traffic being sent over the Hybrid resource by the mac layer 2012 * gets spread across both hw rings, as they are mapped to the 2013 * 2 pseudo tx rings in vnet. 2014 * 2015 * - RX path: 2016 * We expose a pseudo ring group with 3 pseudo rx rings (static 2017 * rings) initially. The first (default) pseudo rx ring is 2018 * reserved for the resource that connects to the vswitch 2019 * service. The next 2 rings are reserved for a Hybrid resource 2020 * that may be assigned to the vnet device. If and when a 2021 * Hybrid resource is assigned to the vnet, we obtain the rx 2022 * ring information of the Hybrid device (nxge) and map these 2023 * pseudo rings 1:1 to the 2 hw rx rings. For each additional 2024 * resource that connects to a peer vnet, we dynamically 2025 * allocate a pseudo rx ring and map it to that resource, when 2026 * the resource gets added; and the pseudo rx ring is 2027 * dynamically registered with the upper mac layer. We do the 2028 * reverse and unregister the ring with the mac layer when 2029 * the resource gets removed. 2030 * 2031 * Synchronization notes: 2032 * We don't need any lock to protect members of ring structure, 2033 * specifically ringp->hw_rh, in either the TX or the RX ring, 2034 * as explained below. 2035 * - TX ring: 2036 * ring->hw_rh is initialized only when a Hybrid resource is 2037 * associated; and gets referenced only in vnet_hio_tx(). The 2038 * Hybrid resource itself is available in fdb only after tx 2039 * hwrings are found and mapped; i.e, in vio_net_resource_reg() 2040 * we call vnet_bind_rings() first and then call 2041 * vnet_start_resources() which adds an entry to fdb. For 2042 * traffic going over LDC resources, we don't reference 2043 * ring->hw_rh at all. 2044 * - RX ring: 2045 * For rings mapped to Hybrid resource ring->hw_rh is 2046 * initialized and only then do we add the rx callback for 2047 * the underlying Hybrid resource; we disable callbacks before 2048 * we unmap ring->hw_rh. For rings mapped to LDC resources, we 2049 * stop the rx callbacks (in vgen) before we remove ring->hw_rh 2050 * (vio_net_resource_unreg()). 2051 */ 2052 2053 if (cap_rings->mr_type == MAC_RING_TYPE_RX) { 2054 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 2055 2056 /* 2057 * The ring_cnt for rx grp is initialized in 2058 * vnet_ring_grp_init(). Later, the ring_cnt gets 2059 * updated dynamically whenever LDC resources are added 2060 * or removed. 2061 */ 2062 cap_rings->mr_rnum = vnetp->rx_grp[0].ring_cnt; 2063 cap_rings->mr_rget = vnet_get_ring; 2064 2065 cap_rings->mr_gnum = VNET_NUM_PSEUDO_GROUPS; 2066 cap_rings->mr_gget = vnet_get_group; 2067 cap_rings->mr_gaddring = NULL; 2068 cap_rings->mr_gremring = NULL; 2069 } else { 2070 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 2071 2072 /* 2073 * The ring_cnt for tx grp is initialized in 2074 * vnet_ring_grp_init() and remains constant, as we 2075 * do not support dymanic tx rings for now. 2076 */ 2077 cap_rings->mr_rnum = vnetp->tx_grp[0].ring_cnt; 2078 cap_rings->mr_rget = vnet_get_ring; 2079 2080 /* 2081 * Transmit rings are not grouped; i.e, the number of 2082 * transmit ring groups advertised should be set to 0. 2083 */ 2084 cap_rings->mr_gnum = 0; 2085 2086 cap_rings->mr_gget = vnet_get_group; 2087 cap_rings->mr_gaddring = NULL; 2088 cap_rings->mr_gremring = NULL; 2089 } 2090 return (B_TRUE); 2091 2092 } 2093 2094 default: 2095 break; 2096 2097 } 2098 2099 return (B_FALSE); 2100 } 2101 2102 /* 2103 * Callback funtion for MAC layer to get ring information. 2104 */ 2105 static void 2106 vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index, 2107 const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle) 2108 { 2109 vnet_t *vnetp = arg; 2110 2111 switch (rtype) { 2112 2113 case MAC_RING_TYPE_RX: { 2114 2115 vnet_pseudo_rx_group_t *rx_grp; 2116 vnet_pseudo_rx_ring_t *rx_ringp; 2117 mac_intr_t *mintr; 2118 2119 /* We advertised only one RX group */ 2120 ASSERT(g_index == 0); 2121 rx_grp = &vnetp->rx_grp[g_index]; 2122 2123 /* Check the current # of rings in the rx group */ 2124 ASSERT((r_index >= 0) && (r_index < rx_grp->max_ring_cnt)); 2125 2126 /* Get the ring based on the index */ 2127 rx_ringp = &rx_grp->rings[r_index]; 2128 2129 rx_ringp->handle = r_handle; 2130 /* 2131 * Note: we don't need to save the incoming r_index in rx_ring, 2132 * as vnet_ring_grp_init() would have initialized the index for 2133 * each ring in the array. 2134 */ 2135 rx_ringp->grp = rx_grp; 2136 rx_ringp->vnetp = vnetp; 2137 2138 mintr = &infop->mri_intr; 2139 mintr->mi_handle = (mac_intr_handle_t)rx_ringp; 2140 mintr->mi_enable = (mac_intr_enable_t)vnet_ring_enable_intr; 2141 mintr->mi_disable = (mac_intr_disable_t)vnet_ring_disable_intr; 2142 2143 infop->mri_driver = (mac_ring_driver_t)rx_ringp; 2144 infop->mri_start = vnet_rx_ring_start; 2145 infop->mri_stop = vnet_rx_ring_stop; 2146 2147 /* Set the poll function, as this is an rx ring */ 2148 infop->mri_poll = vnet_rx_poll; 2149 2150 break; 2151 } 2152 2153 case MAC_RING_TYPE_TX: { 2154 vnet_pseudo_tx_group_t *tx_grp; 2155 vnet_pseudo_tx_ring_t *tx_ringp; 2156 2157 /* 2158 * No need to check grp index; mac layer passes -1 for it. 2159 */ 2160 tx_grp = &vnetp->tx_grp[0]; 2161 2162 /* Check the # of rings in the tx group */ 2163 ASSERT((r_index >= 0) && (r_index < tx_grp->ring_cnt)); 2164 2165 /* Get the ring based on the index */ 2166 tx_ringp = &tx_grp->rings[r_index]; 2167 2168 tx_ringp->handle = r_handle; 2169 tx_ringp->index = r_index; 2170 tx_ringp->grp = tx_grp; 2171 tx_ringp->vnetp = vnetp; 2172 2173 infop->mri_driver = (mac_ring_driver_t)tx_ringp; 2174 infop->mri_start = vnet_tx_ring_start; 2175 infop->mri_stop = vnet_tx_ring_stop; 2176 2177 /* Set the transmit function, as this is a tx ring */ 2178 infop->mri_tx = vnet_tx_ring_send; 2179 2180 break; 2181 } 2182 2183 default: 2184 break; 2185 } 2186 } 2187 2188 /* 2189 * Callback funtion for MAC layer to get group information. 2190 */ 2191 static void 2192 vnet_get_group(void *arg, mac_ring_type_t type, const int index, 2193 mac_group_info_t *infop, mac_group_handle_t handle) 2194 { 2195 vnet_t *vnetp = (vnet_t *)arg; 2196 2197 switch (type) { 2198 2199 case MAC_RING_TYPE_RX: 2200 { 2201 vnet_pseudo_rx_group_t *rx_grp; 2202 2203 /* We advertised only one RX group */ 2204 ASSERT(index == 0); 2205 2206 rx_grp = &vnetp->rx_grp[index]; 2207 rx_grp->handle = handle; 2208 rx_grp->index = index; 2209 rx_grp->vnetp = vnetp; 2210 2211 infop->mgi_driver = (mac_group_driver_t)rx_grp; 2212 infop->mgi_start = NULL; 2213 infop->mgi_stop = NULL; 2214 infop->mgi_addmac = vnet_addmac; 2215 infop->mgi_remmac = vnet_remmac; 2216 infop->mgi_count = rx_grp->ring_cnt; 2217 2218 break; 2219 } 2220 2221 case MAC_RING_TYPE_TX: 2222 { 2223 vnet_pseudo_tx_group_t *tx_grp; 2224 2225 /* We advertised only one TX group */ 2226 ASSERT(index == 0); 2227 2228 tx_grp = &vnetp->tx_grp[index]; 2229 tx_grp->handle = handle; 2230 tx_grp->index = index; 2231 tx_grp->vnetp = vnetp; 2232 2233 infop->mgi_driver = (mac_group_driver_t)tx_grp; 2234 infop->mgi_start = NULL; 2235 infop->mgi_stop = NULL; 2236 infop->mgi_addmac = NULL; 2237 infop->mgi_remmac = NULL; 2238 infop->mgi_count = VNET_NUM_PSEUDO_TXRINGS; 2239 2240 break; 2241 } 2242 2243 default: 2244 break; 2245 2246 } 2247 } 2248 2249 static int 2250 vnet_rx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num) 2251 { 2252 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; 2253 int err; 2254 2255 /* 2256 * If this ring is mapped to a LDC resource, simply mark the state to 2257 * indicate the ring is started and return. 2258 */ 2259 if ((rx_ringp->state & 2260 (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) { 2261 rx_ringp->gen_num = mr_gen_num; 2262 rx_ringp->state |= VNET_RXRING_STARTED; 2263 return (0); 2264 } 2265 2266 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0); 2267 2268 /* 2269 * This must be a ring reserved for a hwring. If the hwring is not 2270 * bound yet, simply mark the state to indicate the ring is started and 2271 * return. If and when a hybrid resource is activated for this vnet 2272 * device, we will bind the hwring and start it then. If a hwring is 2273 * already bound, start it now. 2274 */ 2275 if (rx_ringp->hw_rh == NULL) { 2276 rx_ringp->gen_num = mr_gen_num; 2277 rx_ringp->state |= VNET_RXRING_STARTED; 2278 return (0); 2279 } 2280 2281 err = mac_hwring_start(rx_ringp->hw_rh); 2282 if (err == 0) { 2283 rx_ringp->gen_num = mr_gen_num; 2284 rx_ringp->state |= VNET_RXRING_STARTED; 2285 } else { 2286 err = ENXIO; 2287 } 2288 2289 return (err); 2290 } 2291 2292 static void 2293 vnet_rx_ring_stop(mac_ring_driver_t arg) 2294 { 2295 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; 2296 2297 /* 2298 * If this ring is mapped to a LDC resource, simply mark the state to 2299 * indicate the ring is now stopped and return. 2300 */ 2301 if ((rx_ringp->state & 2302 (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) { 2303 rx_ringp->state &= ~VNET_RXRING_STARTED; 2304 } 2305 2306 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0); 2307 2308 /* 2309 * This must be a ring reserved for a hwring. If the hwring is not 2310 * bound yet, simply mark the state to indicate the ring is stopped and 2311 * return. If a hwring is already bound, stop it now. 2312 */ 2313 if (rx_ringp->hw_rh == NULL) { 2314 rx_ringp->state &= ~VNET_RXRING_STARTED; 2315 return; 2316 } 2317 2318 mac_hwring_stop(rx_ringp->hw_rh); 2319 rx_ringp->state &= ~VNET_RXRING_STARTED; 2320 } 2321 2322 /* ARGSUSED */ 2323 static int 2324 vnet_tx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num) 2325 { 2326 vnet_pseudo_tx_ring_t *tx_ringp = (vnet_pseudo_tx_ring_t *)arg; 2327 2328 tx_ringp->state |= VNET_TXRING_STARTED; 2329 return (0); 2330 } 2331 2332 static void 2333 vnet_tx_ring_stop(mac_ring_driver_t arg) 2334 { 2335 vnet_pseudo_tx_ring_t *tx_ringp = (vnet_pseudo_tx_ring_t *)arg; 2336 2337 tx_ringp->state &= ~VNET_TXRING_STARTED; 2338 } 2339 2340 /* 2341 * Disable polling for a ring and enable its interrupt. 2342 */ 2343 static int 2344 vnet_ring_enable_intr(void *arg) 2345 { 2346 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; 2347 vnet_res_t *vresp; 2348 2349 if (rx_ringp->hw_rh == NULL) { 2350 /* 2351 * Ring enable intr func is being invoked, but the ring is 2352 * not bound to any underlying resource ? This must be a ring 2353 * reserved for Hybrid resource and no such resource has been 2354 * assigned to this vnet device yet. We simply return success. 2355 */ 2356 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0); 2357 return (0); 2358 } 2359 2360 /* 2361 * The rx ring has been bound to either a LDC or a Hybrid resource. 2362 * Call the appropriate function to enable interrupts for the ring. 2363 */ 2364 if (rx_ringp->state & VNET_RXRING_HYBRID) { 2365 return (mac_hwring_enable_intr(rx_ringp->hw_rh)); 2366 } else { 2367 vresp = (vnet_res_t *)rx_ringp->hw_rh; 2368 return (vgen_enable_intr(vresp->macreg.m_driver)); 2369 } 2370 } 2371 2372 /* 2373 * Enable polling for a ring and disable its interrupt. 2374 */ 2375 static int 2376 vnet_ring_disable_intr(void *arg) 2377 { 2378 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; 2379 vnet_res_t *vresp; 2380 2381 if (rx_ringp->hw_rh == NULL) { 2382 /* 2383 * Ring disable intr func is being invoked, but the ring is 2384 * not bound to any underlying resource ? This must be a ring 2385 * reserved for Hybrid resource and no such resource has been 2386 * assigned to this vnet device yet. We simply return success. 2387 */ 2388 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0); 2389 return (0); 2390 } 2391 2392 /* 2393 * The rx ring has been bound to either a LDC or a Hybrid resource. 2394 * Call the appropriate function to disable interrupts for the ring. 2395 */ 2396 if (rx_ringp->state & VNET_RXRING_HYBRID) { 2397 return (mac_hwring_disable_intr(rx_ringp->hw_rh)); 2398 } else { 2399 vresp = (vnet_res_t *)rx_ringp->hw_rh; 2400 return (vgen_disable_intr(vresp->macreg.m_driver)); 2401 } 2402 } 2403 2404 /* 2405 * Poll 'bytes_to_pickup' bytes of message from the rx ring. 2406 */ 2407 static mblk_t * 2408 vnet_rx_poll(void *arg, int bytes_to_pickup) 2409 { 2410 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; 2411 mblk_t *mp = NULL; 2412 vnet_res_t *vresp; 2413 vnet_t *vnetp = rx_ringp->vnetp; 2414 2415 if (rx_ringp->hw_rh == NULL) { 2416 return (NULL); 2417 } 2418 2419 if (rx_ringp->state & VNET_RXRING_HYBRID) { 2420 mp = mac_hwring_poll(rx_ringp->hw_rh, bytes_to_pickup); 2421 /* 2422 * Packets received over a hybrid resource need additional 2423 * processing to remove the tag, for the pvid case. The 2424 * underlying resource is not aware of the vnet's pvid and thus 2425 * packets are received with the vlan tag in the header; unlike 2426 * packets that are received over a ldc channel in which case 2427 * the peer vnet/vsw would have already removed the tag. 2428 */ 2429 if (vnetp->pvid != vnetp->default_vlan_id) { 2430 vnet_rx_frames_untag(vnetp->pvid, &mp); 2431 } 2432 } else { 2433 vresp = (vnet_res_t *)rx_ringp->hw_rh; 2434 mp = vgen_poll(vresp->macreg.m_driver, bytes_to_pickup); 2435 } 2436 return (mp); 2437 } 2438 2439 /* ARGSUSED */ 2440 void 2441 vnet_hio_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 2442 boolean_t loopback) 2443 { 2444 vnet_t *vnetp = (vnet_t *)arg; 2445 vnet_pseudo_rx_ring_t *ringp = (vnet_pseudo_rx_ring_t *)mrh; 2446 2447 /* 2448 * Packets received over a hybrid resource need additional processing 2449 * to remove the tag, for the pvid case. The underlying resource is 2450 * not aware of the vnet's pvid and thus packets are received with the 2451 * vlan tag in the header; unlike packets that are received over a ldc 2452 * channel in which case the peer vnet/vsw would have already removed 2453 * the tag. 2454 */ 2455 if (vnetp->pvid != vnetp->default_vlan_id) { 2456 vnet_rx_frames_untag(vnetp->pvid, &mp); 2457 if (mp == NULL) { 2458 return; 2459 } 2460 } 2461 mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num); 2462 } 2463 2464 static int 2465 vnet_addmac(void *arg, const uint8_t *mac_addr) 2466 { 2467 vnet_pseudo_rx_group_t *rx_grp = (vnet_pseudo_rx_group_t *)arg; 2468 vnet_t *vnetp; 2469 2470 vnetp = rx_grp->vnetp; 2471 2472 if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) { 2473 return (0); 2474 } 2475 2476 cmn_err(CE_CONT, "!vnet%d: %s: Multiple macaddr unsupported\n", 2477 vnetp->instance, __func__); 2478 return (EINVAL); 2479 } 2480 2481 static int 2482 vnet_remmac(void *arg, const uint8_t *mac_addr) 2483 { 2484 vnet_pseudo_rx_group_t *rx_grp = (vnet_pseudo_rx_group_t *)arg; 2485 vnet_t *vnetp; 2486 2487 vnetp = rx_grp->vnetp; 2488 2489 if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) { 2490 return (0); 2491 } 2492 2493 cmn_err(CE_CONT, "!vnet%d: %s: Invalid macaddr: %s\n", 2494 vnetp->instance, __func__, ether_sprintf((void *)mac_addr)); 2495 return (EINVAL); 2496 } 2497 2498 int 2499 vnet_hio_mac_init(vnet_t *vnetp, char *ifname) 2500 { 2501 mac_handle_t mh; 2502 mac_client_handle_t mch = NULL; 2503 mac_unicast_handle_t muh = NULL; 2504 mac_diag_t diag; 2505 mac_register_t *macp; 2506 char client_name[MAXNAMELEN]; 2507 int rv; 2508 uint16_t mac_flags = MAC_UNICAST_TAG_DISABLE | 2509 MAC_UNICAST_STRIP_DISABLE | MAC_UNICAST_PRIMARY; 2510 vio_net_callbacks_t vcb; 2511 ether_addr_t rem_addr = 2512 { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 2513 uint32_t retries = 0; 2514 2515 if ((macp = mac_alloc(MAC_VERSION)) == NULL) { 2516 return (EAGAIN); 2517 } 2518 2519 do { 2520 rv = mac_open_by_linkname(ifname, &mh); 2521 if (rv == 0) { 2522 break; 2523 } 2524 if (rv != ENOENT || (retries++ >= vnet_mac_open_retries)) { 2525 mac_free(macp); 2526 return (rv); 2527 } 2528 drv_usecwait(vnet_mac_open_delay); 2529 } while (rv == ENOENT); 2530 2531 vnetp->hio_mh = mh; 2532 2533 (void) snprintf(client_name, MAXNAMELEN, "vnet%d-%s", vnetp->instance, 2534 ifname); 2535 rv = mac_client_open(mh, &mch, client_name, MAC_OPEN_FLAGS_EXCLUSIVE); 2536 if (rv != 0) { 2537 goto fail; 2538 } 2539 vnetp->hio_mch = mch; 2540 2541 rv = mac_unicast_add(mch, vnetp->curr_macaddr, mac_flags, &muh, 0, 2542 &diag); 2543 if (rv != 0) { 2544 goto fail; 2545 } 2546 vnetp->hio_muh = muh; 2547 2548 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 2549 macp->m_driver = vnetp; 2550 macp->m_dip = NULL; 2551 macp->m_src_addr = NULL; 2552 macp->m_callbacks = &vnet_hio_res_callbacks; 2553 macp->m_min_sdu = 0; 2554 macp->m_max_sdu = ETHERMTU; 2555 2556 rv = vio_net_resource_reg(macp, VIO_NET_RES_HYBRID, 2557 vnetp->curr_macaddr, rem_addr, &vnetp->hio_vhp, &vcb); 2558 if (rv != 0) { 2559 goto fail; 2560 } 2561 mac_free(macp); 2562 2563 /* add the recv callback */ 2564 mac_rx_set(vnetp->hio_mch, vnet_hio_rx_cb, vnetp); 2565 2566 /* add the notify callback - only tx updates for now */ 2567 vnetp->hio_mnh = mac_notify_add(vnetp->hio_mh, vnet_hio_notify_cb, 2568 vnetp); 2569 2570 return (0); 2571 2572 fail: 2573 mac_free(macp); 2574 vnet_hio_mac_cleanup(vnetp); 2575 return (1); 2576 } 2577 2578 void 2579 vnet_hio_mac_cleanup(vnet_t *vnetp) 2580 { 2581 if (vnetp->hio_mnh != NULL) { 2582 (void) mac_notify_remove(vnetp->hio_mnh, B_TRUE); 2583 vnetp->hio_mnh = NULL; 2584 } 2585 2586 if (vnetp->hio_vhp != NULL) { 2587 vio_net_resource_unreg(vnetp->hio_vhp); 2588 vnetp->hio_vhp = NULL; 2589 } 2590 2591 if (vnetp->hio_muh != NULL) { 2592 mac_unicast_remove(vnetp->hio_mch, vnetp->hio_muh); 2593 vnetp->hio_muh = NULL; 2594 } 2595 2596 if (vnetp->hio_mch != NULL) { 2597 mac_client_close(vnetp->hio_mch, 0); 2598 vnetp->hio_mch = NULL; 2599 } 2600 2601 if (vnetp->hio_mh != NULL) { 2602 mac_close(vnetp->hio_mh); 2603 vnetp->hio_mh = NULL; 2604 } 2605 } 2606 2607 /* Bind pseudo rings to hwrings */ 2608 static int 2609 vnet_bind_hwrings(vnet_t *vnetp) 2610 { 2611 mac_ring_handle_t hw_rh[VNET_NUM_HYBRID_RINGS]; 2612 mac_perim_handle_t mph1; 2613 vnet_pseudo_rx_group_t *rx_grp; 2614 vnet_pseudo_rx_ring_t *rx_ringp; 2615 vnet_pseudo_tx_group_t *tx_grp; 2616 vnet_pseudo_tx_ring_t *tx_ringp; 2617 int hw_ring_cnt; 2618 int i; 2619 int rv; 2620 2621 mac_perim_enter_by_mh(vnetp->hio_mh, &mph1); 2622 2623 /* Get the list of the underlying RX rings. */ 2624 hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->rx_hwgh, hw_rh, 2625 MAC_RING_TYPE_RX); 2626 2627 /* We expect the the # of hw rx rings to match VNET_NUM_HYBRID_RINGS */ 2628 if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) { 2629 cmn_err(CE_WARN, 2630 "!vnet%d: vnet_bind_hwrings: bad rx hw_ring_cnt(%d)\n", 2631 vnetp->instance, hw_ring_cnt); 2632 goto fail; 2633 } 2634 2635 if (vnetp->rx_hwgh != NULL) { 2636 /* 2637 * Quiesce the HW ring and the mac srs on the ring. Note 2638 * that the HW ring will be restarted when the pseudo ring 2639 * is started. At that time all the packets will be 2640 * directly passed up to the pseudo RX ring and handled 2641 * by mac srs created over the pseudo RX ring. 2642 */ 2643 mac_rx_client_quiesce(vnetp->hio_mch); 2644 mac_srs_perm_quiesce(vnetp->hio_mch, B_TRUE); 2645 } 2646 2647 /* 2648 * Bind the pseudo rings to the hwrings and start the hwrings. 2649 * Note we don't need to register these with the upper mac, as we have 2650 * statically exported these pseudo rxrings which are reserved for 2651 * rxrings of Hybrid resource. 2652 */ 2653 rx_grp = &vnetp->rx_grp[0]; 2654 for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) { 2655 /* Pick the rxrings reserved for Hybrid resource */ 2656 rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX]; 2657 2658 /* Store the hw ring handle */ 2659 rx_ringp->hw_rh = hw_rh[i]; 2660 2661 /* Bind the pseudo ring to the underlying hwring */ 2662 mac_hwring_setup(rx_ringp->hw_rh, 2663 (mac_resource_handle_t)rx_ringp); 2664 2665 /* Start the hwring if needed */ 2666 if (rx_ringp->state & VNET_RXRING_STARTED) { 2667 rv = mac_hwring_start(rx_ringp->hw_rh); 2668 if (rv != 0) { 2669 mac_hwring_teardown(rx_ringp->hw_rh); 2670 rx_ringp->hw_rh = NULL; 2671 goto fail; 2672 } 2673 } 2674 } 2675 2676 /* Get the list of the underlying TX rings. */ 2677 hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->tx_hwgh, hw_rh, 2678 MAC_RING_TYPE_TX); 2679 2680 /* We expect the # of hw tx rings to match VNET_NUM_HYBRID_RINGS */ 2681 if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) { 2682 cmn_err(CE_WARN, 2683 "!vnet%d: vnet_bind_hwrings: bad tx hw_ring_cnt(%d)\n", 2684 vnetp->instance, hw_ring_cnt); 2685 goto fail; 2686 } 2687 2688 /* 2689 * Now map the pseudo txrings to the hw txrings. Note we don't need 2690 * to register these with the upper mac, as we have statically exported 2691 * these rings. Note that these rings will continue to be used for LDC 2692 * resources to peer vnets and vswitch (shared ring). 2693 */ 2694 tx_grp = &vnetp->tx_grp[0]; 2695 for (i = 0; i < tx_grp->ring_cnt; i++) { 2696 tx_ringp = &tx_grp->rings[i]; 2697 tx_ringp->hw_rh = hw_rh[i]; 2698 tx_ringp->state |= VNET_TXRING_HYBRID; 2699 } 2700 2701 mac_perim_exit(mph1); 2702 return (0); 2703 2704 fail: 2705 mac_perim_exit(mph1); 2706 vnet_unbind_hwrings(vnetp); 2707 return (1); 2708 } 2709 2710 /* Unbind pseudo rings from hwrings */ 2711 static void 2712 vnet_unbind_hwrings(vnet_t *vnetp) 2713 { 2714 mac_perim_handle_t mph1; 2715 vnet_pseudo_rx_ring_t *rx_ringp; 2716 vnet_pseudo_rx_group_t *rx_grp; 2717 vnet_pseudo_tx_group_t *tx_grp; 2718 vnet_pseudo_tx_ring_t *tx_ringp; 2719 int i; 2720 2721 mac_perim_enter_by_mh(vnetp->hio_mh, &mph1); 2722 2723 tx_grp = &vnetp->tx_grp[0]; 2724 for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) { 2725 tx_ringp = &tx_grp->rings[i]; 2726 if (tx_ringp->state & VNET_TXRING_HYBRID) { 2727 tx_ringp->state &= ~VNET_TXRING_HYBRID; 2728 tx_ringp->hw_rh = NULL; 2729 } 2730 } 2731 2732 rx_grp = &vnetp->rx_grp[0]; 2733 for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) { 2734 rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX]; 2735 if (rx_ringp->hw_rh != NULL) { 2736 /* Stop the hwring */ 2737 mac_hwring_stop(rx_ringp->hw_rh); 2738 2739 /* Teardown the hwring */ 2740 mac_hwring_teardown(rx_ringp->hw_rh); 2741 rx_ringp->hw_rh = NULL; 2742 } 2743 } 2744 2745 if (vnetp->rx_hwgh != NULL) { 2746 vnetp->rx_hwgh = NULL; 2747 /* 2748 * First clear the permanent-quiesced flag of the RX srs then 2749 * restart the HW ring and the mac srs on the ring. 2750 */ 2751 mac_srs_perm_quiesce(vnetp->hio_mch, B_FALSE); 2752 mac_rx_client_restart(vnetp->hio_mch); 2753 } 2754 2755 mac_perim_exit(mph1); 2756 } 2757 2758 /* Bind pseudo ring to a LDC resource */ 2759 static int 2760 vnet_bind_vgenring(vnet_res_t *vresp) 2761 { 2762 vnet_t *vnetp; 2763 vnet_pseudo_rx_group_t *rx_grp; 2764 vnet_pseudo_rx_ring_t *rx_ringp; 2765 mac_perim_handle_t mph1; 2766 int rv; 2767 int type; 2768 2769 vnetp = vresp->vnetp; 2770 type = vresp->type; 2771 rx_grp = &vnetp->rx_grp[0]; 2772 2773 if (type == VIO_NET_RES_LDC_SERVICE) { 2774 /* 2775 * Ring Index 0 is the default ring in the group and is 2776 * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring 2777 * is allocated statically and is reported to the mac layer 2778 * in vnet_m_capab(). So, all we need to do here, is save a 2779 * reference to the associated vresp. 2780 */ 2781 rx_ringp = &rx_grp->rings[0]; 2782 rx_ringp->hw_rh = (mac_ring_handle_t)vresp; 2783 vresp->rx_ringp = (void *)rx_ringp; 2784 return (0); 2785 } 2786 ASSERT(type == VIO_NET_RES_LDC_GUEST); 2787 2788 mac_perim_enter_by_mh(vnetp->mh, &mph1); 2789 2790 rx_ringp = vnet_alloc_pseudo_rx_ring(vnetp); 2791 if (rx_ringp == NULL) { 2792 cmn_err(CE_WARN, "!vnet%d: Failed to allocate pseudo rx ring", 2793 vnetp->instance); 2794 goto fail; 2795 } 2796 2797 /* Store the LDC resource itself as the ring handle */ 2798 rx_ringp->hw_rh = (mac_ring_handle_t)vresp; 2799 2800 /* 2801 * Save a reference to the ring in the resource for lookup during 2802 * unbind. Note this is only done for LDC resources. We don't need this 2803 * in the case of a Hybrid resource (see vnet_bind_hwrings()), as its 2804 * rx rings are mapped to reserved pseudo rx rings (index 1 and 2). 2805 */ 2806 vresp->rx_ringp = (void *)rx_ringp; 2807 rx_ringp->state |= VNET_RXRING_LDC_GUEST; 2808 2809 /* Register the pseudo ring with upper-mac */ 2810 rv = mac_group_add_ring(rx_grp->handle, rx_ringp->index); 2811 if (rv != 0) { 2812 rx_ringp->state &= ~VNET_RXRING_LDC_GUEST; 2813 rx_ringp->hw_rh = NULL; 2814 vnet_free_pseudo_rx_ring(vnetp, rx_ringp); 2815 goto fail; 2816 } 2817 2818 mac_perim_exit(mph1); 2819 return (0); 2820 fail: 2821 mac_perim_exit(mph1); 2822 return (1); 2823 } 2824 2825 /* Unbind pseudo ring from a LDC resource */ 2826 static void 2827 vnet_unbind_vgenring(vnet_res_t *vresp) 2828 { 2829 vnet_t *vnetp; 2830 vnet_pseudo_rx_group_t *rx_grp; 2831 vnet_pseudo_rx_ring_t *rx_ringp; 2832 mac_perim_handle_t mph1; 2833 int type; 2834 2835 vnetp = vresp->vnetp; 2836 type = vresp->type; 2837 rx_grp = &vnetp->rx_grp[0]; 2838 2839 if (vresp->rx_ringp == NULL) { 2840 return; 2841 } 2842 2843 if (type == VIO_NET_RES_LDC_SERVICE) { 2844 /* 2845 * Ring Index 0 is the default ring in the group and is 2846 * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring 2847 * is allocated statically and is reported to the mac layer 2848 * in vnet_m_capab(). So, all we need to do here, is remove its 2849 * reference to the associated vresp. 2850 */ 2851 rx_ringp = &rx_grp->rings[0]; 2852 rx_ringp->hw_rh = NULL; 2853 vresp->rx_ringp = NULL; 2854 return; 2855 } 2856 ASSERT(type == VIO_NET_RES_LDC_GUEST); 2857 2858 mac_perim_enter_by_mh(vnetp->mh, &mph1); 2859 2860 rx_ringp = (vnet_pseudo_rx_ring_t *)vresp->rx_ringp; 2861 vresp->rx_ringp = NULL; 2862 2863 if (rx_ringp != NULL && (rx_ringp->state & VNET_RXRING_LDC_GUEST)) { 2864 /* Unregister the pseudo ring with upper-mac */ 2865 mac_group_rem_ring(rx_grp->handle, rx_ringp->handle); 2866 2867 rx_ringp->hw_rh = NULL; 2868 rx_ringp->state &= ~VNET_RXRING_LDC_GUEST; 2869 2870 /* Free the pseudo rx ring */ 2871 vnet_free_pseudo_rx_ring(vnetp, rx_ringp); 2872 } 2873 2874 mac_perim_exit(mph1); 2875 } 2876 2877 static void 2878 vnet_unbind_rings(vnet_res_t *vresp) 2879 { 2880 switch (vresp->type) { 2881 2882 case VIO_NET_RES_LDC_SERVICE: 2883 case VIO_NET_RES_LDC_GUEST: 2884 vnet_unbind_vgenring(vresp); 2885 break; 2886 2887 case VIO_NET_RES_HYBRID: 2888 vnet_unbind_hwrings(vresp->vnetp); 2889 break; 2890 2891 default: 2892 break; 2893 2894 } 2895 } 2896 2897 static int 2898 vnet_bind_rings(vnet_res_t *vresp) 2899 { 2900 int rv; 2901 2902 switch (vresp->type) { 2903 2904 case VIO_NET_RES_LDC_SERVICE: 2905 case VIO_NET_RES_LDC_GUEST: 2906 rv = vnet_bind_vgenring(vresp); 2907 break; 2908 2909 case VIO_NET_RES_HYBRID: 2910 rv = vnet_bind_hwrings(vresp->vnetp); 2911 break; 2912 2913 default: 2914 rv = 1; 2915 break; 2916 2917 } 2918 2919 return (rv); 2920 } 2921 2922 /* ARGSUSED */ 2923 int 2924 vnet_hio_stat(void *arg, uint_t stat, uint64_t *val) 2925 { 2926 vnet_t *vnetp = (vnet_t *)arg; 2927 2928 *val = mac_stat_get(vnetp->hio_mh, stat); 2929 return (0); 2930 } 2931 2932 /* 2933 * The start() and stop() routines for the Hybrid resource below, are just 2934 * dummy functions. This is provided to avoid resource type specific code in 2935 * vnet_start_resources() and vnet_stop_resources(). The starting and stopping 2936 * of the Hybrid resource happens in the context of the mac_client interfaces 2937 * that are invoked in vnet_hio_mac_init() and vnet_hio_mac_cleanup(). 2938 */ 2939 /* ARGSUSED */ 2940 static int 2941 vnet_hio_start(void *arg) 2942 { 2943 return (0); 2944 } 2945 2946 /* ARGSUSED */ 2947 static void 2948 vnet_hio_stop(void *arg) 2949 { 2950 } 2951 2952 mblk_t * 2953 vnet_hio_tx(void *arg, mblk_t *mp) 2954 { 2955 vnet_pseudo_tx_ring_t *tx_ringp; 2956 mblk_t *nextp; 2957 mblk_t *ret_mp; 2958 2959 tx_ringp = (vnet_pseudo_tx_ring_t *)arg; 2960 for (;;) { 2961 nextp = mp->b_next; 2962 mp->b_next = NULL; 2963 2964 ret_mp = mac_hwring_tx(tx_ringp->hw_rh, mp); 2965 if (ret_mp != NULL) { 2966 ret_mp->b_next = nextp; 2967 mp = ret_mp; 2968 break; 2969 } 2970 2971 if ((mp = nextp) == NULL) 2972 break; 2973 } 2974 return (mp); 2975 } 2976 2977 static void 2978 vnet_hio_notify_cb(void *arg, mac_notify_type_t type) 2979 { 2980 vnet_t *vnetp = (vnet_t *)arg; 2981 mac_perim_handle_t mph; 2982 2983 mac_perim_enter_by_mh(vnetp->hio_mh, &mph); 2984 switch (type) { 2985 case MAC_NOTE_TX: 2986 vnet_tx_update(vnetp->hio_vhp); 2987 break; 2988 2989 default: 2990 break; 2991 } 2992 mac_perim_exit(mph); 2993 } 2994 2995 #ifdef VNET_IOC_DEBUG 2996 2997 /* 2998 * The ioctl entry point is used only for debugging for now. The ioctl commands 2999 * can be used to force the link state of the channel connected to vsw. 3000 */ 3001 static void 3002 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp) 3003 { 3004 struct iocblk *iocp; 3005 vnet_t *vnetp; 3006 3007 iocp = (struct iocblk *)(uintptr_t)mp->b_rptr; 3008 iocp->ioc_error = 0; 3009 vnetp = (vnet_t *)arg; 3010 3011 if (vnetp == NULL) { 3012 miocnak(q, mp, 0, EINVAL); 3013 return; 3014 } 3015 3016 switch (iocp->ioc_cmd) { 3017 3018 case VNET_FORCE_LINK_DOWN: 3019 case VNET_FORCE_LINK_UP: 3020 vnet_force_link_state(vnetp, q, mp); 3021 break; 3022 3023 default: 3024 iocp->ioc_error = EINVAL; 3025 miocnak(q, mp, 0, iocp->ioc_error); 3026 break; 3027 3028 } 3029 } 3030 3031 static void 3032 vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp) 3033 { 3034 mac_register_t *macp; 3035 mac_callbacks_t *cbp; 3036 vnet_res_t *vresp; 3037 3038 READ_ENTER(&vnetp->vsw_fp_rw); 3039 3040 vresp = vnetp->vsw_fp; 3041 if (vresp == NULL) { 3042 RW_EXIT(&vnetp->vsw_fp_rw); 3043 return; 3044 } 3045 3046 macp = &vresp->macreg; 3047 cbp = macp->m_callbacks; 3048 cbp->mc_ioctl(macp->m_driver, q, mp); 3049 3050 RW_EXIT(&vnetp->vsw_fp_rw); 3051 } 3052 3053 #else 3054 3055 static void 3056 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp) 3057 { 3058 vnet_t *vnetp; 3059 3060 vnetp = (vnet_t *)arg; 3061 3062 if (vnetp == NULL) { 3063 miocnak(q, mp, 0, EINVAL); 3064 return; 3065 } 3066 3067 /* ioctl support only for debugging */ 3068 miocnak(q, mp, 0, ENOTSUP); 3069 } 3070 3071 #endif 3072