1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/errno.h> 29 #include <sys/param.h> 30 #include <sys/stream.h> 31 #include <sys/kmem.h> 32 #include <sys/conf.h> 33 #include <sys/devops.h> 34 #include <sys/ksynch.h> 35 #include <sys/stat.h> 36 #include <sys/modctl.h> 37 #include <sys/modhash.h> 38 #include <sys/debug.h> 39 #include <sys/ethernet.h> 40 #include <sys/dlpi.h> 41 #include <net/if.h> 42 #include <sys/mac_provider.h> 43 #include <sys/mac_client.h> 44 #include <sys/mac_client_priv.h> 45 #include <sys/mac_ether.h> 46 #include <sys/ddi.h> 47 #include <sys/sunddi.h> 48 #include <sys/strsun.h> 49 #include <sys/note.h> 50 #include <sys/atomic.h> 51 #include <sys/vnet.h> 52 #include <sys/vlan.h> 53 #include <sys/vnet_mailbox.h> 54 #include <sys/vnet_common.h> 55 #include <sys/dds.h> 56 #include <sys/strsubr.h> 57 #include <sys/taskq.h> 58 59 /* 60 * Function prototypes. 61 */ 62 63 /* DDI entrypoints */ 64 static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 65 static int vnetattach(dev_info_t *, ddi_attach_cmd_t); 66 static int vnetdetach(dev_info_t *, ddi_detach_cmd_t); 67 68 /* MAC entrypoints */ 69 static int vnet_m_stat(void *, uint_t, uint64_t *); 70 static int vnet_m_start(void *); 71 static void vnet_m_stop(void *); 72 static int vnet_m_promisc(void *, boolean_t); 73 static int vnet_m_multicst(void *, boolean_t, const uint8_t *); 74 static int vnet_m_unicst(void *, const uint8_t *); 75 mblk_t *vnet_m_tx(void *, mblk_t *); 76 static void vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp); 77 #ifdef VNET_IOC_DEBUG 78 static void vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp); 79 #endif 80 static boolean_t vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data); 81 static void vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index, 82 const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle); 83 static void vnet_get_group(void *arg, mac_ring_type_t type, const int index, 84 mac_group_info_t *infop, mac_group_handle_t handle); 85 static int vnet_rx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num); 86 static void vnet_rx_ring_stop(mac_ring_driver_t rdriver); 87 static int vnet_tx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num); 88 static void vnet_tx_ring_stop(mac_ring_driver_t rdriver); 89 static int vnet_ring_enable_intr(void *arg); 90 static int vnet_ring_disable_intr(void *arg); 91 static mblk_t *vnet_rx_poll(void *arg, int bytes_to_pickup); 92 static int vnet_addmac(void *arg, const uint8_t *mac_addr); 93 static int vnet_remmac(void *arg, const uint8_t *mac_addr); 94 95 /* vnet internal functions */ 96 static int vnet_unattach(vnet_t *vnetp); 97 static void vnet_ring_grp_init(vnet_t *vnetp); 98 static void vnet_ring_grp_uninit(vnet_t *vnetp); 99 static int vnet_mac_register(vnet_t *); 100 static int vnet_read_mac_address(vnet_t *vnetp); 101 static int vnet_bind_vgenring(vnet_res_t *vresp); 102 static void vnet_unbind_vgenring(vnet_res_t *vresp); 103 static int vnet_bind_hwrings(vnet_t *vnetp); 104 static void vnet_unbind_hwrings(vnet_t *vnetp); 105 static int vnet_bind_rings(vnet_res_t *vresp); 106 static void vnet_unbind_rings(vnet_res_t *vresp); 107 static int vnet_hio_stat(void *, uint_t, uint64_t *); 108 static int vnet_hio_start(void *); 109 static void vnet_hio_stop(void *); 110 static void vnet_hio_notify_cb(void *arg, mac_notify_type_t type); 111 mblk_t *vnet_hio_tx(void *, mblk_t *); 112 113 /* Forwarding database (FDB) routines */ 114 static void vnet_fdb_create(vnet_t *vnetp); 115 static void vnet_fdb_destroy(vnet_t *vnetp); 116 static vnet_res_t *vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp); 117 static void vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val); 118 void vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp); 119 static void vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp); 120 121 static void vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp); 122 static void vnet_rx(vio_net_handle_t vrh, mblk_t *mp); 123 static void vnet_tx_update(vio_net_handle_t vrh); 124 static void vnet_res_start_task(void *arg); 125 static void vnet_start_resources(vnet_t *vnetp); 126 static void vnet_stop_resources(vnet_t *vnetp); 127 static void vnet_dispatch_res_task(vnet_t *vnetp); 128 static void vnet_res_start_task(void *arg); 129 static void vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err); 130 static void vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp); 131 static vnet_res_t *vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp); 132 133 /* Exported to vnet_gen */ 134 int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu); 135 void vnet_link_update(vnet_t *vnetp, link_state_t link_state); 136 void vnet_dds_cleanup_hio(vnet_t *vnetp); 137 138 static kstat_t *vnet_hio_setup_kstats(char *ks_mod, char *ks_name, 139 vnet_res_t *vresp); 140 static int vnet_hio_update_kstats(kstat_t *ksp, int rw); 141 static void vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp); 142 static void vnet_hio_destroy_kstats(kstat_t *ksp); 143 144 /* Exported to to vnet_dds */ 145 int vnet_send_dds_msg(vnet_t *vnetp, void *dmsg); 146 int vnet_hio_mac_init(vnet_t *vnetp, char *ifname); 147 void vnet_hio_mac_cleanup(vnet_t *vnetp); 148 149 /* Externs that are imported from vnet_gen */ 150 extern int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip, 151 const uint8_t *macaddr, void **vgenhdl); 152 extern int vgen_init_mdeg(void *arg); 153 extern void vgen_uninit(void *arg); 154 extern int vgen_dds_tx(void *arg, void *dmsg); 155 extern void vgen_mod_init(void); 156 extern int vgen_mod_cleanup(void); 157 extern void vgen_mod_fini(void); 158 extern int vgen_enable_intr(void *arg); 159 extern int vgen_disable_intr(void *arg); 160 extern mblk_t *vgen_poll(void *arg, int bytes_to_pickup); 161 162 /* Externs that are imported from vnet_dds */ 163 extern void vdds_mod_init(void); 164 extern void vdds_mod_fini(void); 165 extern int vdds_init(vnet_t *vnetp); 166 extern void vdds_cleanup(vnet_t *vnetp); 167 extern void vdds_process_dds_msg(vnet_t *vnetp, vio_dds_msg_t *dmsg); 168 extern void vdds_cleanup_hybrid_res(void *arg); 169 extern void vdds_cleanup_hio(vnet_t *vnetp); 170 171 /* Externs imported from mac_impl */ 172 extern mblk_t *mac_hwring_tx(mac_ring_handle_t, mblk_t *); 173 174 #define DRV_NAME "vnet" 175 #define VNET_FDBE_REFHOLD(p) \ 176 { \ 177 atomic_inc_32(&(p)->refcnt); \ 178 ASSERT((p)->refcnt != 0); \ 179 } 180 181 #define VNET_FDBE_REFRELE(p) \ 182 { \ 183 ASSERT((p)->refcnt != 0); \ 184 atomic_dec_32(&(p)->refcnt); \ 185 } 186 187 #ifdef VNET_IOC_DEBUG 188 #define VNET_M_CALLBACK_FLAGS (MC_IOCTL | MC_GETCAPAB) 189 #else 190 #define VNET_M_CALLBACK_FLAGS (MC_GETCAPAB) 191 #endif 192 193 static mac_callbacks_t vnet_m_callbacks = { 194 VNET_M_CALLBACK_FLAGS, 195 vnet_m_stat, 196 vnet_m_start, 197 vnet_m_stop, 198 vnet_m_promisc, 199 vnet_m_multicst, 200 NULL, /* m_unicst entry must be NULL while rx rings are exposed */ 201 NULL, /* m_tx entry must be NULL while tx rings are exposed */ 202 vnet_m_ioctl, 203 vnet_m_capab, 204 NULL 205 }; 206 207 static mac_callbacks_t vnet_hio_res_callbacks = { 208 0, 209 vnet_hio_stat, 210 vnet_hio_start, 211 vnet_hio_stop, 212 NULL, 213 NULL, 214 NULL, 215 vnet_hio_tx, 216 NULL, 217 NULL, 218 NULL 219 }; 220 221 /* 222 * Linked list of "vnet_t" structures - one per instance. 223 */ 224 static vnet_t *vnet_headp = NULL; 225 static krwlock_t vnet_rw; 226 227 /* Tunables */ 228 uint32_t vnet_ntxds = VNET_NTXDS; /* power of 2 transmit descriptors */ 229 uint32_t vnet_ldcwd_interval = VNET_LDCWD_INTERVAL; /* watchdog freq in msec */ 230 uint32_t vnet_ldcwd_txtimeout = VNET_LDCWD_TXTIMEOUT; /* tx timeout in msec */ 231 uint32_t vnet_ldc_mtu = VNET_LDC_MTU; /* ldc mtu */ 232 233 /* Configure tx serialization in mac layer for the vnet device */ 234 boolean_t vnet_mac_tx_serialize = B_TRUE; 235 236 /* 237 * Set this to non-zero to enable additional internal receive buffer pools 238 * based on the MTU of the device for better performance at the cost of more 239 * memory consumption. This is turned off by default, to use allocb(9F) for 240 * receive buffer allocations of sizes > 2K. 241 */ 242 boolean_t vnet_jumbo_rxpools = B_FALSE; 243 244 /* # of chains in fdb hash table */ 245 uint32_t vnet_fdb_nchains = VNET_NFDB_HASH; 246 247 /* Internal tunables */ 248 uint32_t vnet_ethermtu = 1500; /* mtu of the device */ 249 250 /* 251 * Default vlan id. This is only used internally when the "default-vlan-id" 252 * property is not present in the MD device node. Therefore, this should not be 253 * used as a tunable; if this value is changed, the corresponding variable 254 * should be updated to the same value in vsw and also other vnets connected to 255 * the same vsw. 256 */ 257 uint16_t vnet_default_vlan_id = 1; 258 259 /* delay in usec to wait for all references on a fdb entry to be dropped */ 260 uint32_t vnet_fdbe_refcnt_delay = 10; 261 262 static struct ether_addr etherbroadcastaddr = { 263 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 264 }; 265 266 /* mac_open() retry delay in usec */ 267 uint32_t vnet_mac_open_delay = 100; /* 0.1 ms */ 268 269 /* max # of mac_open() retries */ 270 uint32_t vnet_mac_open_retries = 100; 271 272 /* 273 * Property names 274 */ 275 static char macaddr_propname[] = "local-mac-address"; 276 277 /* 278 * This is the string displayed by modinfo(1m). 279 */ 280 static char vnet_ident[] = "vnet driver"; 281 extern struct mod_ops mod_driverops; 282 static struct cb_ops cb_vnetops = { 283 nulldev, /* cb_open */ 284 nulldev, /* cb_close */ 285 nodev, /* cb_strategy */ 286 nodev, /* cb_print */ 287 nodev, /* cb_dump */ 288 nodev, /* cb_read */ 289 nodev, /* cb_write */ 290 nodev, /* cb_ioctl */ 291 nodev, /* cb_devmap */ 292 nodev, /* cb_mmap */ 293 nodev, /* cb_segmap */ 294 nochpoll, /* cb_chpoll */ 295 ddi_prop_op, /* cb_prop_op */ 296 NULL, /* cb_stream */ 297 (int)(D_MP) /* cb_flag */ 298 }; 299 300 static struct dev_ops vnetops = { 301 DEVO_REV, /* devo_rev */ 302 0, /* devo_refcnt */ 303 NULL, /* devo_getinfo */ 304 nulldev, /* devo_identify */ 305 nulldev, /* devo_probe */ 306 vnetattach, /* devo_attach */ 307 vnetdetach, /* devo_detach */ 308 nodev, /* devo_reset */ 309 &cb_vnetops, /* devo_cb_ops */ 310 (struct bus_ops *)NULL, /* devo_bus_ops */ 311 NULL, /* devo_power */ 312 ddi_quiesce_not_supported, /* devo_quiesce */ 313 }; 314 315 static struct modldrv modldrv = { 316 &mod_driverops, /* Type of module. This one is a driver */ 317 vnet_ident, /* ID string */ 318 &vnetops /* driver specific ops */ 319 }; 320 321 static struct modlinkage modlinkage = { 322 MODREV_1, (void *)&modldrv, NULL 323 }; 324 325 #ifdef DEBUG 326 327 /* 328 * Print debug messages - set to 0xf to enable all msgs 329 */ 330 int vnet_dbglevel = 0x8; 331 332 static void 333 debug_printf(const char *fname, void *arg, const char *fmt, ...) 334 { 335 char buf[512]; 336 va_list ap; 337 vnet_t *vnetp = (vnet_t *)arg; 338 char *bufp = buf; 339 340 if (vnetp == NULL) { 341 (void) sprintf(bufp, "%s: ", fname); 342 bufp += strlen(bufp); 343 } else { 344 (void) sprintf(bufp, "vnet%d:%s: ", vnetp->instance, fname); 345 bufp += strlen(bufp); 346 } 347 va_start(ap, fmt); 348 (void) vsprintf(bufp, fmt, ap); 349 va_end(ap); 350 cmn_err(CE_CONT, "%s\n", buf); 351 } 352 353 #endif 354 355 /* _init(9E): initialize the loadable module */ 356 int 357 _init(void) 358 { 359 int status; 360 361 DBG1(NULL, "enter\n"); 362 363 mac_init_ops(&vnetops, "vnet"); 364 status = mod_install(&modlinkage); 365 if (status != 0) { 366 mac_fini_ops(&vnetops); 367 } 368 vdds_mod_init(); 369 vgen_mod_init(); 370 DBG1(NULL, "exit(%d)\n", status); 371 return (status); 372 } 373 374 /* _fini(9E): prepare the module for unloading. */ 375 int 376 _fini(void) 377 { 378 int status; 379 380 DBG1(NULL, "enter\n"); 381 382 status = vgen_mod_cleanup(); 383 if (status != 0) 384 return (status); 385 386 status = mod_remove(&modlinkage); 387 if (status != 0) 388 return (status); 389 mac_fini_ops(&vnetops); 390 vgen_mod_fini(); 391 vdds_mod_fini(); 392 393 DBG1(NULL, "exit(%d)\n", status); 394 return (status); 395 } 396 397 /* _info(9E): return information about the loadable module */ 398 int 399 _info(struct modinfo *modinfop) 400 { 401 return (mod_info(&modlinkage, modinfop)); 402 } 403 404 /* 405 * attach(9E): attach a device to the system. 406 * called once for each instance of the device on the system. 407 */ 408 static int 409 vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd) 410 { 411 vnet_t *vnetp; 412 int status; 413 int instance; 414 uint64_t reg; 415 char qname[TASKQ_NAMELEN]; 416 vnet_attach_progress_t attach_progress; 417 418 attach_progress = AST_init; 419 420 switch (cmd) { 421 case DDI_ATTACH: 422 break; 423 case DDI_RESUME: 424 case DDI_PM_RESUME: 425 default: 426 goto vnet_attach_fail; 427 } 428 429 instance = ddi_get_instance(dip); 430 DBG1(NULL, "instance(%d) enter\n", instance); 431 432 /* allocate vnet_t and mac_t structures */ 433 vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP); 434 vnetp->dip = dip; 435 vnetp->instance = instance; 436 rw_init(&vnetp->vrwlock, NULL, RW_DRIVER, NULL); 437 rw_init(&vnetp->vsw_fp_rw, NULL, RW_DRIVER, NULL); 438 attach_progress |= AST_vnet_alloc; 439 440 vnet_ring_grp_init(vnetp); 441 attach_progress |= AST_ring_init; 442 443 status = vdds_init(vnetp); 444 if (status != 0) { 445 goto vnet_attach_fail; 446 } 447 attach_progress |= AST_vdds_init; 448 449 /* setup links to vnet_t from both devinfo and mac_t */ 450 ddi_set_driver_private(dip, (caddr_t)vnetp); 451 452 /* read the mac address */ 453 status = vnet_read_mac_address(vnetp); 454 if (status != DDI_SUCCESS) { 455 goto vnet_attach_fail; 456 } 457 attach_progress |= AST_read_macaddr; 458 459 reg = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 460 DDI_PROP_DONTPASS, "reg", -1); 461 if (reg == -1) { 462 goto vnet_attach_fail; 463 } 464 vnetp->reg = reg; 465 466 vnet_fdb_create(vnetp); 467 attach_progress |= AST_fdbh_alloc; 468 469 (void) snprintf(qname, TASKQ_NAMELEN, "vnet_taskq%d", instance); 470 if ((vnetp->taskqp = ddi_taskq_create(dip, qname, 1, 471 TASKQ_DEFAULTPRI, 0)) == NULL) { 472 cmn_err(CE_WARN, "!vnet%d: Unable to create task queue", 473 instance); 474 goto vnet_attach_fail; 475 } 476 attach_progress |= AST_taskq_create; 477 478 /* add to the list of vnet devices */ 479 WRITE_ENTER(&vnet_rw); 480 vnetp->nextp = vnet_headp; 481 vnet_headp = vnetp; 482 RW_EXIT(&vnet_rw); 483 484 attach_progress |= AST_vnet_list; 485 486 /* 487 * Initialize the generic vnet plugin which provides communication via 488 * sun4v LDC (logical domain channel) based resources. This involves 2 489 * steps; first, vgen_init() is invoked to read the various properties 490 * of the vnet device from its MD node (including its mtu which is 491 * needed to mac_register()) and obtain a handle to the vgen layer. 492 * After mac_register() is done and we have a mac handle, we then 493 * invoke vgen_init_mdeg() which registers with the the MD event 494 * generator (mdeg) framework to allow LDC resource notifications. 495 * Note: this sequence also allows us to report the correct default # 496 * of pseudo rings (2TX and 3RX) in vnet_m_capab() which gets invoked 497 * in the context of mac_register(); and avoids conflicting with 498 * dynamic pseudo rx rings which get added/removed as a result of mdeg 499 * events in vgen. 500 */ 501 status = vgen_init(vnetp, reg, vnetp->dip, 502 (uint8_t *)vnetp->curr_macaddr, &vnetp->vgenhdl); 503 if (status != DDI_SUCCESS) { 504 DERR(vnetp, "vgen_init() failed\n"); 505 goto vnet_attach_fail; 506 } 507 attach_progress |= AST_vgen_init; 508 509 status = vnet_mac_register(vnetp); 510 if (status != DDI_SUCCESS) { 511 goto vnet_attach_fail; 512 } 513 vnetp->link_state = LINK_STATE_UNKNOWN; 514 attach_progress |= AST_macreg; 515 516 status = vgen_init_mdeg(vnetp->vgenhdl); 517 if (status != DDI_SUCCESS) { 518 goto vnet_attach_fail; 519 } 520 attach_progress |= AST_init_mdeg; 521 522 vnetp->attach_progress = attach_progress; 523 524 DBG1(NULL, "instance(%d) exit\n", instance); 525 return (DDI_SUCCESS); 526 527 vnet_attach_fail: 528 vnetp->attach_progress = attach_progress; 529 status = vnet_unattach(vnetp); 530 ASSERT(status == 0); 531 return (DDI_FAILURE); 532 } 533 534 /* 535 * detach(9E): detach a device from the system. 536 */ 537 static int 538 vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd) 539 { 540 vnet_t *vnetp; 541 int instance; 542 543 instance = ddi_get_instance(dip); 544 DBG1(NULL, "instance(%d) enter\n", instance); 545 546 vnetp = ddi_get_driver_private(dip); 547 if (vnetp == NULL) { 548 goto vnet_detach_fail; 549 } 550 551 switch (cmd) { 552 case DDI_DETACH: 553 break; 554 case DDI_SUSPEND: 555 case DDI_PM_SUSPEND: 556 default: 557 goto vnet_detach_fail; 558 } 559 560 if (vnet_unattach(vnetp) != 0) { 561 goto vnet_detach_fail; 562 } 563 564 return (DDI_SUCCESS); 565 566 vnet_detach_fail: 567 return (DDI_FAILURE); 568 } 569 570 /* 571 * Common routine to handle vnetattach() failure and vnetdetach(). Note that 572 * the only reason this function could fail is if mac_unregister() fails. 573 * Otherwise, this function must ensure that all resources are freed and return 574 * success. 575 */ 576 static int 577 vnet_unattach(vnet_t *vnetp) 578 { 579 vnet_attach_progress_t attach_progress; 580 581 attach_progress = vnetp->attach_progress; 582 583 /* 584 * Disable the mac device in the gldv3 subsystem. This can fail, in 585 * particular if there are still any open references to this mac 586 * device; in which case we just return failure without continuing to 587 * detach further. 588 * If it succeeds, we then invoke vgen_uninit() which should unregister 589 * any pseudo rings registered with the mac layer. Note we keep the 590 * AST_macreg flag on, so we can unregister with the mac layer at 591 * the end of this routine. 592 */ 593 if (attach_progress & AST_macreg) { 594 if (mac_disable(vnetp->mh) != 0) { 595 return (1); 596 } 597 } 598 599 /* 600 * Now that we have disabled the device, we must finish all other steps 601 * and successfully return from this function; otherwise we will end up 602 * leaving the device in a broken/unusable state. 603 * 604 * First, release any hybrid resources assigned to this vnet device. 605 */ 606 if (attach_progress & AST_vdds_init) { 607 vdds_cleanup(vnetp); 608 attach_progress &= ~AST_vdds_init; 609 } 610 611 /* 612 * Uninit vgen. This stops further mdeg callbacks to this vnet 613 * device and/or its ports; and detaches any existing ports. 614 */ 615 if (attach_progress & (AST_vgen_init|AST_init_mdeg)) { 616 vgen_uninit(vnetp->vgenhdl); 617 attach_progress &= ~AST_vgen_init; 618 attach_progress &= ~AST_init_mdeg; 619 } 620 621 /* Destroy the taskq. */ 622 if (attach_progress & AST_taskq_create) { 623 ddi_taskq_destroy(vnetp->taskqp); 624 attach_progress &= ~AST_taskq_create; 625 } 626 627 /* Destroy fdb. */ 628 if (attach_progress & AST_fdbh_alloc) { 629 vnet_fdb_destroy(vnetp); 630 attach_progress &= ~AST_fdbh_alloc; 631 } 632 633 /* Remove from the device list */ 634 if (attach_progress & AST_vnet_list) { 635 vnet_t **vnetpp; 636 /* unlink from instance(vnet_t) list */ 637 WRITE_ENTER(&vnet_rw); 638 for (vnetpp = &vnet_headp; *vnetpp; 639 vnetpp = &(*vnetpp)->nextp) { 640 if (*vnetpp == vnetp) { 641 *vnetpp = vnetp->nextp; 642 break; 643 } 644 } 645 RW_EXIT(&vnet_rw); 646 attach_progress &= ~AST_vnet_list; 647 } 648 649 if (attach_progress & AST_ring_init) { 650 vnet_ring_grp_uninit(vnetp); 651 attach_progress &= ~AST_ring_init; 652 } 653 654 if (attach_progress & AST_macreg) { 655 VERIFY(mac_unregister(vnetp->mh) == 0); 656 vnetp->mh = NULL; 657 attach_progress &= ~AST_macreg; 658 } 659 660 if (attach_progress & AST_vnet_alloc) { 661 rw_destroy(&vnetp->vrwlock); 662 rw_destroy(&vnetp->vsw_fp_rw); 663 attach_progress &= ~AST_vnet_list; 664 KMEM_FREE(vnetp); 665 } 666 667 return (0); 668 } 669 670 /* enable the device for transmit/receive */ 671 static int 672 vnet_m_start(void *arg) 673 { 674 vnet_t *vnetp = arg; 675 676 DBG1(vnetp, "enter\n"); 677 678 WRITE_ENTER(&vnetp->vrwlock); 679 vnetp->flags |= VNET_STARTED; 680 vnet_start_resources(vnetp); 681 RW_EXIT(&vnetp->vrwlock); 682 683 DBG1(vnetp, "exit\n"); 684 return (VNET_SUCCESS); 685 686 } 687 688 /* stop transmit/receive for the device */ 689 static void 690 vnet_m_stop(void *arg) 691 { 692 vnet_t *vnetp = arg; 693 694 DBG1(vnetp, "enter\n"); 695 696 WRITE_ENTER(&vnetp->vrwlock); 697 if (vnetp->flags & VNET_STARTED) { 698 /* 699 * Set the flags appropriately; this should prevent starting of 700 * any new resources that are added(see vnet_res_start_task()), 701 * while we release the vrwlock in vnet_stop_resources() before 702 * stopping each resource. 703 */ 704 vnetp->flags &= ~VNET_STARTED; 705 vnetp->flags |= VNET_STOPPING; 706 vnet_stop_resources(vnetp); 707 vnetp->flags &= ~VNET_STOPPING; 708 } 709 RW_EXIT(&vnetp->vrwlock); 710 711 DBG1(vnetp, "exit\n"); 712 } 713 714 /* set the unicast mac address of the device */ 715 static int 716 vnet_m_unicst(void *arg, const uint8_t *macaddr) 717 { 718 _NOTE(ARGUNUSED(macaddr)) 719 720 vnet_t *vnetp = arg; 721 722 DBG1(vnetp, "enter\n"); 723 /* 724 * NOTE: setting mac address dynamically is not supported. 725 */ 726 DBG1(vnetp, "exit\n"); 727 728 return (VNET_FAILURE); 729 } 730 731 /* enable/disable a multicast address */ 732 static int 733 vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca) 734 { 735 _NOTE(ARGUNUSED(add, mca)) 736 737 vnet_t *vnetp = arg; 738 vnet_res_t *vresp; 739 mac_register_t *macp; 740 mac_callbacks_t *cbp; 741 int rv = VNET_SUCCESS; 742 743 DBG1(vnetp, "enter\n"); 744 745 READ_ENTER(&vnetp->vrwlock); 746 for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) { 747 if (vresp->type == VIO_NET_RES_LDC_SERVICE) { 748 macp = &vresp->macreg; 749 cbp = macp->m_callbacks; 750 rv = cbp->mc_multicst(macp->m_driver, add, mca); 751 } 752 } 753 RW_EXIT(&vnetp->vrwlock); 754 755 DBG1(vnetp, "exit(%d)\n", rv); 756 return (rv); 757 } 758 759 /* set or clear promiscuous mode on the device */ 760 static int 761 vnet_m_promisc(void *arg, boolean_t on) 762 { 763 _NOTE(ARGUNUSED(on)) 764 765 vnet_t *vnetp = arg; 766 DBG1(vnetp, "enter\n"); 767 /* 768 * NOTE: setting promiscuous mode is not supported, just return success. 769 */ 770 DBG1(vnetp, "exit\n"); 771 return (VNET_SUCCESS); 772 } 773 774 /* 775 * Transmit a chain of packets. This function provides switching functionality 776 * based on the destination mac address to reach other guests (within ldoms) or 777 * external hosts. 778 */ 779 mblk_t * 780 vnet_tx_ring_send(void *arg, mblk_t *mp) 781 { 782 vnet_pseudo_tx_ring_t *tx_ringp; 783 vnet_t *vnetp; 784 vnet_res_t *vresp; 785 mblk_t *next; 786 mblk_t *resid_mp; 787 mac_register_t *macp; 788 struct ether_header *ehp; 789 boolean_t is_unicast; 790 boolean_t is_pvid; /* non-default pvid ? */ 791 boolean_t hres; /* Hybrid resource ? */ 792 void *tx_arg; 793 794 tx_ringp = (vnet_pseudo_tx_ring_t *)arg; 795 vnetp = (vnet_t *)tx_ringp->vnetp; 796 DBG1(vnetp, "enter\n"); 797 ASSERT(mp != NULL); 798 799 is_pvid = (vnetp->pvid != vnetp->default_vlan_id) ? B_TRUE : B_FALSE; 800 801 while (mp != NULL) { 802 803 next = mp->b_next; 804 mp->b_next = NULL; 805 806 /* 807 * Find fdb entry for the destination 808 * and hold a reference to it. 809 */ 810 ehp = (struct ether_header *)mp->b_rptr; 811 vresp = vnet_fdbe_find(vnetp, &ehp->ether_dhost); 812 if (vresp != NULL) { 813 814 /* 815 * Destination found in FDB. 816 * The destination is a vnet device within ldoms 817 * and directly reachable, invoke the tx function 818 * in the fdb entry. 819 */ 820 macp = &vresp->macreg; 821 resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp); 822 823 /* tx done; now release ref on fdb entry */ 824 VNET_FDBE_REFRELE(vresp); 825 826 if (resid_mp != NULL) { 827 /* m_tx failed */ 828 mp->b_next = next; 829 break; 830 } 831 } else { 832 is_unicast = !(IS_BROADCAST(ehp) || 833 (IS_MULTICAST(ehp))); 834 /* 835 * Destination is not in FDB. 836 * If the destination is broadcast or multicast, 837 * then forward the packet to vswitch. 838 * If a Hybrid resource avilable, then send the 839 * unicast packet via hybrid resource, otherwise 840 * forward it to vswitch. 841 */ 842 READ_ENTER(&vnetp->vsw_fp_rw); 843 844 if ((is_unicast) && (vnetp->hio_fp != NULL)) { 845 vresp = vnetp->hio_fp; 846 hres = B_TRUE; 847 } else { 848 vresp = vnetp->vsw_fp; 849 hres = B_FALSE; 850 } 851 if (vresp == NULL) { 852 /* 853 * no fdb entry to vsw? drop the packet. 854 */ 855 RW_EXIT(&vnetp->vsw_fp_rw); 856 freemsg(mp); 857 mp = next; 858 continue; 859 } 860 861 /* ref hold the fdb entry to vsw */ 862 VNET_FDBE_REFHOLD(vresp); 863 864 RW_EXIT(&vnetp->vsw_fp_rw); 865 866 /* 867 * In the case of a hybrid resource we need to insert 868 * the tag for the pvid case here; unlike packets that 869 * are destined to a vnet/vsw in which case the vgen 870 * layer does the tagging before sending it over ldc. 871 */ 872 if (hres == B_TRUE) { 873 /* 874 * Determine if the frame being transmitted 875 * over the hybrid resource is untagged. If so, 876 * insert the tag before transmitting. 877 */ 878 if (is_pvid == B_TRUE && 879 ehp->ether_type != htons(ETHERTYPE_VLAN)) { 880 881 mp = vnet_vlan_insert_tag(mp, 882 vnetp->pvid); 883 if (mp == NULL) { 884 VNET_FDBE_REFRELE(vresp); 885 mp = next; 886 continue; 887 } 888 889 } 890 891 macp = &vresp->macreg; 892 tx_arg = tx_ringp; 893 } else { 894 macp = &vresp->macreg; 895 tx_arg = macp->m_driver; 896 } 897 resid_mp = macp->m_callbacks->mc_tx(tx_arg, mp); 898 899 /* tx done; now release ref on fdb entry */ 900 VNET_FDBE_REFRELE(vresp); 901 902 if (resid_mp != NULL) { 903 /* m_tx failed */ 904 mp->b_next = next; 905 break; 906 } 907 } 908 909 mp = next; 910 } 911 912 DBG1(vnetp, "exit\n"); 913 return (mp); 914 } 915 916 /* get statistics from the device */ 917 int 918 vnet_m_stat(void *arg, uint_t stat, uint64_t *val) 919 { 920 vnet_t *vnetp = arg; 921 vnet_res_t *vresp; 922 mac_register_t *macp; 923 mac_callbacks_t *cbp; 924 uint64_t val_total = 0; 925 926 DBG1(vnetp, "enter\n"); 927 928 /* 929 * get the specified statistic from each transport and return the 930 * aggregate val. This obviously only works for counters. 931 */ 932 if ((IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat)) || 933 (IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat))) { 934 return (ENOTSUP); 935 } 936 937 READ_ENTER(&vnetp->vrwlock); 938 for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) { 939 macp = &vresp->macreg; 940 cbp = macp->m_callbacks; 941 if (cbp->mc_getstat(macp->m_driver, stat, val) == 0) 942 val_total += *val; 943 } 944 RW_EXIT(&vnetp->vrwlock); 945 946 *val = val_total; 947 948 DBG1(vnetp, "exit\n"); 949 return (0); 950 } 951 952 static void 953 vnet_ring_grp_init(vnet_t *vnetp) 954 { 955 vnet_pseudo_rx_group_t *rx_grp; 956 vnet_pseudo_rx_ring_t *rx_ringp; 957 vnet_pseudo_tx_group_t *tx_grp; 958 vnet_pseudo_tx_ring_t *tx_ringp; 959 int i; 960 961 tx_grp = &vnetp->tx_grp[0]; 962 tx_ringp = kmem_zalloc(sizeof (vnet_pseudo_tx_ring_t) * 963 VNET_NUM_PSEUDO_TXRINGS, KM_SLEEP); 964 for (i = 0; i < VNET_NUM_PSEUDO_TXRINGS; i++) { 965 tx_ringp[i].state |= VNET_TXRING_SHARED; 966 } 967 tx_grp->rings = tx_ringp; 968 tx_grp->ring_cnt = VNET_NUM_PSEUDO_TXRINGS; 969 970 rx_grp = &vnetp->rx_grp[0]; 971 rx_grp->max_ring_cnt = MAX_RINGS_PER_GROUP; 972 rw_init(&rx_grp->lock, NULL, RW_DRIVER, NULL); 973 rx_ringp = kmem_zalloc(sizeof (vnet_pseudo_rx_ring_t) * 974 rx_grp->max_ring_cnt, KM_SLEEP); 975 976 /* 977 * Setup the first 3 Pseudo RX Rings that are reserved; 978 * 1 for LDC resource to vswitch + 2 for RX rings of Hybrid resource. 979 */ 980 rx_ringp[0].state |= VNET_RXRING_INUSE|VNET_RXRING_LDC_SERVICE; 981 rx_ringp[0].index = 0; 982 rx_ringp[1].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID; 983 rx_ringp[1].index = 1; 984 rx_ringp[2].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID; 985 rx_ringp[2].index = 2; 986 987 rx_grp->ring_cnt = VNET_NUM_PSEUDO_RXRINGS_DEFAULT; 988 rx_grp->rings = rx_ringp; 989 990 for (i = VNET_NUM_PSEUDO_RXRINGS_DEFAULT; 991 i < rx_grp->max_ring_cnt; i++) { 992 rx_ringp = &rx_grp->rings[i]; 993 rx_ringp->state = VNET_RXRING_FREE; 994 rx_ringp->index = i; 995 } 996 } 997 998 static void 999 vnet_ring_grp_uninit(vnet_t *vnetp) 1000 { 1001 vnet_pseudo_rx_group_t *rx_grp; 1002 vnet_pseudo_tx_group_t *tx_grp; 1003 1004 tx_grp = &vnetp->tx_grp[0]; 1005 if (tx_grp->rings != NULL) { 1006 ASSERT(tx_grp->ring_cnt == VNET_NUM_PSEUDO_TXRINGS); 1007 kmem_free(tx_grp->rings, sizeof (vnet_pseudo_tx_ring_t) * 1008 tx_grp->ring_cnt); 1009 tx_grp->rings = NULL; 1010 } 1011 1012 rx_grp = &vnetp->rx_grp[0]; 1013 if (rx_grp->rings != NULL) { 1014 ASSERT(rx_grp->max_ring_cnt == MAX_RINGS_PER_GROUP); 1015 ASSERT(rx_grp->ring_cnt == VNET_NUM_PSEUDO_RXRINGS_DEFAULT); 1016 kmem_free(rx_grp->rings, sizeof (vnet_pseudo_rx_ring_t) * 1017 rx_grp->max_ring_cnt); 1018 rx_grp->rings = NULL; 1019 } 1020 } 1021 1022 static vnet_pseudo_rx_ring_t * 1023 vnet_alloc_pseudo_rx_ring(vnet_t *vnetp) 1024 { 1025 vnet_pseudo_rx_group_t *rx_grp; 1026 vnet_pseudo_rx_ring_t *rx_ringp; 1027 int index; 1028 1029 rx_grp = &vnetp->rx_grp[0]; 1030 WRITE_ENTER(&rx_grp->lock); 1031 1032 if (rx_grp->ring_cnt == rx_grp->max_ring_cnt) { 1033 /* no rings available */ 1034 RW_EXIT(&rx_grp->lock); 1035 return (NULL); 1036 } 1037 1038 for (index = VNET_NUM_PSEUDO_RXRINGS_DEFAULT; 1039 index < rx_grp->max_ring_cnt; index++) { 1040 rx_ringp = &rx_grp->rings[index]; 1041 if (rx_ringp->state == VNET_RXRING_FREE) { 1042 rx_ringp->state |= VNET_RXRING_INUSE; 1043 rx_grp->ring_cnt++; 1044 break; 1045 } 1046 } 1047 1048 RW_EXIT(&rx_grp->lock); 1049 return (rx_ringp); 1050 } 1051 1052 static void 1053 vnet_free_pseudo_rx_ring(vnet_t *vnetp, vnet_pseudo_rx_ring_t *ringp) 1054 { 1055 vnet_pseudo_rx_group_t *rx_grp; 1056 1057 ASSERT(ringp->index >= VNET_NUM_PSEUDO_RXRINGS_DEFAULT); 1058 rx_grp = &vnetp->rx_grp[0]; 1059 WRITE_ENTER(&rx_grp->lock); 1060 1061 if (ringp->state != VNET_RXRING_FREE) { 1062 ringp->state = VNET_RXRING_FREE; 1063 ringp->handle = NULL; 1064 rx_grp->ring_cnt--; 1065 } 1066 1067 RW_EXIT(&rx_grp->lock); 1068 } 1069 1070 /* wrapper function for mac_register() */ 1071 static int 1072 vnet_mac_register(vnet_t *vnetp) 1073 { 1074 mac_register_t *macp; 1075 int err; 1076 1077 if ((macp = mac_alloc(MAC_VERSION)) == NULL) 1078 return (DDI_FAILURE); 1079 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 1080 macp->m_driver = vnetp; 1081 macp->m_dip = vnetp->dip; 1082 macp->m_src_addr = vnetp->curr_macaddr; 1083 macp->m_callbacks = &vnet_m_callbacks; 1084 macp->m_min_sdu = 0; 1085 macp->m_max_sdu = vnetp->mtu; 1086 macp->m_margin = VLAN_TAGSZ; 1087 1088 /* 1089 * MAC_VIRT_SERIALIZE flag is needed while hybridIO is enabled to 1090 * workaround tx lock contention issues in nxge. 1091 */ 1092 macp->m_v12n = MAC_VIRT_LEVEL1; 1093 if (vnet_mac_tx_serialize == B_TRUE) { 1094 macp->m_v12n |= MAC_VIRT_SERIALIZE; 1095 } 1096 1097 /* 1098 * Finally, we're ready to register ourselves with the MAC layer 1099 * interface; if this succeeds, we're all ready to start() 1100 */ 1101 err = mac_register(macp, &vnetp->mh); 1102 mac_free(macp); 1103 return (err == 0 ? DDI_SUCCESS : DDI_FAILURE); 1104 } 1105 1106 /* read the mac address of the device */ 1107 static int 1108 vnet_read_mac_address(vnet_t *vnetp) 1109 { 1110 uchar_t *macaddr; 1111 uint32_t size; 1112 int rv; 1113 1114 rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip, 1115 DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size); 1116 if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) { 1117 DWARN(vnetp, "prop_lookup failed(%s) err(%d)\n", 1118 macaddr_propname, rv); 1119 return (DDI_FAILURE); 1120 } 1121 bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL); 1122 bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL); 1123 ddi_prop_free(macaddr); 1124 1125 return (DDI_SUCCESS); 1126 } 1127 1128 static void 1129 vnet_fdb_create(vnet_t *vnetp) 1130 { 1131 char hashname[MAXNAMELEN]; 1132 1133 (void) snprintf(hashname, MAXNAMELEN, "vnet%d-fdbhash", 1134 vnetp->instance); 1135 vnetp->fdb_nchains = vnet_fdb_nchains; 1136 vnetp->fdb_hashp = mod_hash_create_ptrhash(hashname, vnetp->fdb_nchains, 1137 mod_hash_null_valdtor, sizeof (void *)); 1138 } 1139 1140 static void 1141 vnet_fdb_destroy(vnet_t *vnetp) 1142 { 1143 /* destroy fdb-hash-table */ 1144 if (vnetp->fdb_hashp != NULL) { 1145 mod_hash_destroy_hash(vnetp->fdb_hashp); 1146 vnetp->fdb_hashp = NULL; 1147 vnetp->fdb_nchains = 0; 1148 } 1149 } 1150 1151 /* 1152 * Add an entry into the fdb. 1153 */ 1154 void 1155 vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp) 1156 { 1157 uint64_t addr = 0; 1158 int rv; 1159 1160 KEY_HASH(addr, vresp->rem_macaddr); 1161 1162 /* 1163 * If the entry being added corresponds to LDC_SERVICE resource, 1164 * that is, vswitch connection, it is added to the hash and also 1165 * the entry is cached, an additional reference count reflects 1166 * this. The HYBRID resource is not added to the hash, but only 1167 * cached, as it is only used for sending out packets for unknown 1168 * unicast destinations. 1169 */ 1170 (vresp->type == VIO_NET_RES_LDC_SERVICE) ? 1171 (vresp->refcnt = 1) : (vresp->refcnt = 0); 1172 1173 /* 1174 * Note: duplicate keys will be rejected by mod_hash. 1175 */ 1176 if (vresp->type != VIO_NET_RES_HYBRID) { 1177 rv = mod_hash_insert(vnetp->fdb_hashp, (mod_hash_key_t)addr, 1178 (mod_hash_val_t)vresp); 1179 if (rv != 0) { 1180 DWARN(vnetp, "Duplicate macaddr key(%lx)\n", addr); 1181 return; 1182 } 1183 } 1184 1185 if (vresp->type == VIO_NET_RES_LDC_SERVICE) { 1186 /* Cache the fdb entry to vsw-port */ 1187 WRITE_ENTER(&vnetp->vsw_fp_rw); 1188 if (vnetp->vsw_fp == NULL) 1189 vnetp->vsw_fp = vresp; 1190 RW_EXIT(&vnetp->vsw_fp_rw); 1191 } else if (vresp->type == VIO_NET_RES_HYBRID) { 1192 /* Cache the fdb entry to hybrid resource */ 1193 WRITE_ENTER(&vnetp->vsw_fp_rw); 1194 if (vnetp->hio_fp == NULL) 1195 vnetp->hio_fp = vresp; 1196 RW_EXIT(&vnetp->vsw_fp_rw); 1197 } 1198 } 1199 1200 /* 1201 * Remove an entry from fdb. 1202 */ 1203 static void 1204 vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp) 1205 { 1206 uint64_t addr = 0; 1207 int rv; 1208 uint32_t refcnt; 1209 vnet_res_t *tmp; 1210 1211 KEY_HASH(addr, vresp->rem_macaddr); 1212 1213 /* 1214 * Remove the entry from fdb hash table. 1215 * This prevents further references to this fdb entry. 1216 */ 1217 if (vresp->type != VIO_NET_RES_HYBRID) { 1218 rv = mod_hash_remove(vnetp->fdb_hashp, (mod_hash_key_t)addr, 1219 (mod_hash_val_t *)&tmp); 1220 if (rv != 0) { 1221 /* 1222 * As the resources are added to the hash only 1223 * after they are started, this can occur if 1224 * a resource unregisters before it is ever started. 1225 */ 1226 return; 1227 } 1228 } 1229 1230 if (vresp->type == VIO_NET_RES_LDC_SERVICE) { 1231 WRITE_ENTER(&vnetp->vsw_fp_rw); 1232 1233 ASSERT(tmp == vnetp->vsw_fp); 1234 vnetp->vsw_fp = NULL; 1235 1236 RW_EXIT(&vnetp->vsw_fp_rw); 1237 } else if (vresp->type == VIO_NET_RES_HYBRID) { 1238 WRITE_ENTER(&vnetp->vsw_fp_rw); 1239 1240 vnetp->hio_fp = NULL; 1241 1242 RW_EXIT(&vnetp->vsw_fp_rw); 1243 } 1244 1245 /* 1246 * If there are threads already ref holding before the entry was 1247 * removed from hash table, then wait for ref count to drop to zero. 1248 */ 1249 (vresp->type == VIO_NET_RES_LDC_SERVICE) ? 1250 (refcnt = 1) : (refcnt = 0); 1251 while (vresp->refcnt > refcnt) { 1252 delay(drv_usectohz(vnet_fdbe_refcnt_delay)); 1253 } 1254 } 1255 1256 /* 1257 * Search fdb for a given mac address. If an entry is found, hold 1258 * a reference to it and return the entry; else returns NULL. 1259 */ 1260 static vnet_res_t * 1261 vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp) 1262 { 1263 uint64_t key = 0; 1264 vnet_res_t *vresp; 1265 int rv; 1266 1267 KEY_HASH(key, addrp->ether_addr_octet); 1268 1269 rv = mod_hash_find_cb(vnetp->fdb_hashp, (mod_hash_key_t)key, 1270 (mod_hash_val_t *)&vresp, vnet_fdbe_find_cb); 1271 1272 if (rv != 0) 1273 return (NULL); 1274 1275 return (vresp); 1276 } 1277 1278 /* 1279 * Callback function provided to mod_hash_find_cb(). After finding the fdb 1280 * entry corresponding to the key (macaddr), this callback will be invoked by 1281 * mod_hash_find_cb() to atomically increment the reference count on the fdb 1282 * entry before returning the found entry. 1283 */ 1284 static void 1285 vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val) 1286 { 1287 _NOTE(ARGUNUSED(key)) 1288 VNET_FDBE_REFHOLD((vnet_res_t *)val); 1289 } 1290 1291 /* 1292 * Frames received that are tagged with the pvid of the vnet device must be 1293 * untagged before sending up the stack. This function walks the chain of rx 1294 * frames, untags any such frames and returns the updated chain. 1295 * 1296 * Arguments: 1297 * pvid: pvid of the vnet device for which packets are being received 1298 * mp: head of pkt chain to be validated and untagged 1299 * 1300 * Returns: 1301 * mp: head of updated chain of packets 1302 */ 1303 static void 1304 vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp) 1305 { 1306 struct ether_vlan_header *evhp; 1307 mblk_t *bp; 1308 mblk_t *bpt; 1309 mblk_t *bph; 1310 mblk_t *bpn; 1311 1312 bpn = bph = bpt = NULL; 1313 1314 for (bp = *mp; bp != NULL; bp = bpn) { 1315 1316 bpn = bp->b_next; 1317 bp->b_next = bp->b_prev = NULL; 1318 1319 evhp = (struct ether_vlan_header *)bp->b_rptr; 1320 1321 if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN && 1322 VLAN_ID(ntohs(evhp->ether_tci)) == pvid) { 1323 1324 bp = vnet_vlan_remove_tag(bp); 1325 if (bp == NULL) { 1326 continue; 1327 } 1328 1329 } 1330 1331 /* build a chain of processed packets */ 1332 if (bph == NULL) { 1333 bph = bpt = bp; 1334 } else { 1335 bpt->b_next = bp; 1336 bpt = bp; 1337 } 1338 1339 } 1340 1341 *mp = bph; 1342 } 1343 1344 static void 1345 vnet_rx(vio_net_handle_t vrh, mblk_t *mp) 1346 { 1347 vnet_res_t *vresp = (vnet_res_t *)vrh; 1348 vnet_t *vnetp = vresp->vnetp; 1349 vnet_pseudo_rx_ring_t *ringp; 1350 1351 if ((vnetp == NULL) || (vnetp->mh == 0)) { 1352 freemsgchain(mp); 1353 return; 1354 } 1355 1356 ringp = vresp->rx_ringp; 1357 mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num); 1358 } 1359 1360 void 1361 vnet_tx_update(vio_net_handle_t vrh) 1362 { 1363 vnet_res_t *vresp = (vnet_res_t *)vrh; 1364 vnet_t *vnetp = vresp->vnetp; 1365 vnet_pseudo_tx_ring_t *tx_ringp; 1366 vnet_pseudo_tx_group_t *tx_grp; 1367 int i; 1368 1369 if (vnetp == NULL || vnetp->mh == NULL) { 1370 return; 1371 } 1372 1373 /* 1374 * Currently, the tx hwring API (used to access rings that belong to 1375 * a Hybrid IO resource) does not provide us a per ring flow ctrl 1376 * update; also the pseudo rings are shared by the ports/ldcs in the 1377 * vgen layer. Thus we can't figure out which pseudo ring is being 1378 * re-enabled for transmits. To work around this, when we get a tx 1379 * restart notification from below, we simply propagate that to all 1380 * the tx pseudo rings registered with the mac layer above. 1381 * 1382 * There are a couple of side effects with this approach, but they are 1383 * not harmful, as outlined below: 1384 * 1385 * A) We might send an invalid ring_update() for a ring that is not 1386 * really flow controlled. This will not have any effect in the mac 1387 * layer and packets will continue to be transmitted on that ring. 1388 * 1389 * B) We might end up clearing the flow control in the mac layer for 1390 * a ring that is still flow controlled in the underlying resource. 1391 * This will result in the mac layer restarting transmit, only to be 1392 * flow controlled again on that ring. 1393 */ 1394 tx_grp = &vnetp->tx_grp[0]; 1395 for (i = 0; i < tx_grp->ring_cnt; i++) { 1396 tx_ringp = &tx_grp->rings[i]; 1397 mac_tx_ring_update(vnetp->mh, tx_ringp->handle); 1398 } 1399 } 1400 1401 /* 1402 * Update the new mtu of vnet into the mac layer. First check if the device has 1403 * been plumbed and if so fail the mtu update. Returns 0 on success. 1404 */ 1405 int 1406 vnet_mtu_update(vnet_t *vnetp, uint32_t mtu) 1407 { 1408 int rv; 1409 1410 if (vnetp == NULL || vnetp->mh == NULL) { 1411 return (EINVAL); 1412 } 1413 1414 WRITE_ENTER(&vnetp->vrwlock); 1415 1416 if (vnetp->flags & VNET_STARTED) { 1417 RW_EXIT(&vnetp->vrwlock); 1418 cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu " 1419 "update as the device is plumbed\n", 1420 vnetp->instance); 1421 return (EBUSY); 1422 } 1423 1424 /* update mtu in the mac layer */ 1425 rv = mac_maxsdu_update(vnetp->mh, mtu); 1426 if (rv != 0) { 1427 RW_EXIT(&vnetp->vrwlock); 1428 cmn_err(CE_NOTE, 1429 "!vnet%d: Unable to update mtu with mac layer\n", 1430 vnetp->instance); 1431 return (EIO); 1432 } 1433 1434 vnetp->mtu = mtu; 1435 1436 RW_EXIT(&vnetp->vrwlock); 1437 1438 return (0); 1439 } 1440 1441 /* 1442 * Update the link state of vnet to the mac layer. 1443 */ 1444 void 1445 vnet_link_update(vnet_t *vnetp, link_state_t link_state) 1446 { 1447 if (vnetp == NULL || vnetp->mh == NULL) { 1448 return; 1449 } 1450 1451 WRITE_ENTER(&vnetp->vrwlock); 1452 if (vnetp->link_state == link_state) { 1453 RW_EXIT(&vnetp->vrwlock); 1454 return; 1455 } 1456 vnetp->link_state = link_state; 1457 RW_EXIT(&vnetp->vrwlock); 1458 1459 mac_link_update(vnetp->mh, link_state); 1460 } 1461 1462 /* 1463 * vio_net_resource_reg -- An interface called to register a resource 1464 * with vnet. 1465 * macp -- a GLDv3 mac_register that has all the details of 1466 * a resource and its callbacks etc. 1467 * type -- resource type. 1468 * local_macaddr -- resource's MAC address. This is used to 1469 * associate a resource with a corresponding vnet. 1470 * remote_macaddr -- remote side MAC address. This is ignored for 1471 * the Hybrid resources. 1472 * vhp -- A handle returned to the caller. 1473 * vcb -- A set of callbacks provided to the callers. 1474 */ 1475 int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type, 1476 ether_addr_t local_macaddr, ether_addr_t rem_macaddr, vio_net_handle_t *vhp, 1477 vio_net_callbacks_t *vcb) 1478 { 1479 vnet_t *vnetp; 1480 vnet_res_t *vresp; 1481 1482 vresp = kmem_zalloc(sizeof (vnet_res_t), KM_SLEEP); 1483 ether_copy(local_macaddr, vresp->local_macaddr); 1484 ether_copy(rem_macaddr, vresp->rem_macaddr); 1485 vresp->type = type; 1486 bcopy(macp, &vresp->macreg, sizeof (mac_register_t)); 1487 1488 DBG1(NULL, "Resource Registerig type=0%X\n", type); 1489 1490 READ_ENTER(&vnet_rw); 1491 vnetp = vnet_headp; 1492 while (vnetp != NULL) { 1493 if (VNET_MATCH_RES(vresp, vnetp)) { 1494 vresp->vnetp = vnetp; 1495 1496 /* Setup kstats for hio resource */ 1497 if (vresp->type == VIO_NET_RES_HYBRID) { 1498 vresp->ksp = vnet_hio_setup_kstats(DRV_NAME, 1499 "hio", vresp); 1500 if (vresp->ksp == NULL) { 1501 cmn_err(CE_NOTE, "!vnet%d: Cannot " 1502 "create kstats for hio resource", 1503 vnetp->instance); 1504 } 1505 } 1506 vnet_add_resource(vnetp, vresp); 1507 break; 1508 } 1509 vnetp = vnetp->nextp; 1510 } 1511 RW_EXIT(&vnet_rw); 1512 if (vresp->vnetp == NULL) { 1513 DWARN(NULL, "No vnet instance"); 1514 kmem_free(vresp, sizeof (vnet_res_t)); 1515 return (ENXIO); 1516 } 1517 1518 *vhp = vresp; 1519 vcb->vio_net_rx_cb = vnet_rx; 1520 vcb->vio_net_tx_update = vnet_tx_update; 1521 vcb->vio_net_report_err = vnet_handle_res_err; 1522 1523 /* Bind the resource to pseudo ring(s) */ 1524 if (vnet_bind_rings(vresp) != 0) { 1525 (void) vnet_rem_resource(vnetp, vresp); 1526 vnet_hio_destroy_kstats(vresp->ksp); 1527 KMEM_FREE(vresp); 1528 return (1); 1529 } 1530 1531 /* Dispatch a task to start resources */ 1532 vnet_dispatch_res_task(vnetp); 1533 return (0); 1534 } 1535 1536 /* 1537 * vio_net_resource_unreg -- An interface to unregister a resource. 1538 */ 1539 void 1540 vio_net_resource_unreg(vio_net_handle_t vhp) 1541 { 1542 vnet_res_t *vresp = (vnet_res_t *)vhp; 1543 vnet_t *vnetp = vresp->vnetp; 1544 1545 DBG1(NULL, "Resource Registerig hdl=0x%p", vhp); 1546 1547 ASSERT(vnetp != NULL); 1548 /* 1549 * Remove the resource from fdb; this ensures 1550 * there are no references to the resource. 1551 */ 1552 vnet_fdbe_del(vnetp, vresp); 1553 1554 vnet_unbind_rings(vresp); 1555 1556 /* Now remove the resource from the list */ 1557 (void) vnet_rem_resource(vnetp, vresp); 1558 1559 vnet_hio_destroy_kstats(vresp->ksp); 1560 KMEM_FREE(vresp); 1561 } 1562 1563 static void 1564 vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp) 1565 { 1566 WRITE_ENTER(&vnetp->vrwlock); 1567 vresp->nextp = vnetp->vres_list; 1568 vnetp->vres_list = vresp; 1569 RW_EXIT(&vnetp->vrwlock); 1570 } 1571 1572 static vnet_res_t * 1573 vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp) 1574 { 1575 vnet_res_t *vrp; 1576 1577 WRITE_ENTER(&vnetp->vrwlock); 1578 if (vresp == vnetp->vres_list) { 1579 vnetp->vres_list = vresp->nextp; 1580 } else { 1581 vrp = vnetp->vres_list; 1582 while (vrp->nextp != NULL) { 1583 if (vrp->nextp == vresp) { 1584 vrp->nextp = vresp->nextp; 1585 break; 1586 } 1587 vrp = vrp->nextp; 1588 } 1589 } 1590 vresp->vnetp = NULL; 1591 vresp->nextp = NULL; 1592 1593 RW_EXIT(&vnetp->vrwlock); 1594 1595 return (vresp); 1596 } 1597 1598 /* 1599 * vnet_dds_rx -- an interface called by vgen to DDS messages. 1600 */ 1601 void 1602 vnet_dds_rx(void *arg, void *dmsg) 1603 { 1604 vnet_t *vnetp = arg; 1605 vdds_process_dds_msg(vnetp, dmsg); 1606 } 1607 1608 /* 1609 * vnet_send_dds_msg -- An interface provided to DDS to send 1610 * DDS messages. This simply sends meessages via vgen. 1611 */ 1612 int 1613 vnet_send_dds_msg(vnet_t *vnetp, void *dmsg) 1614 { 1615 int rv; 1616 1617 if (vnetp->vgenhdl != NULL) { 1618 rv = vgen_dds_tx(vnetp->vgenhdl, dmsg); 1619 } 1620 return (rv); 1621 } 1622 1623 /* 1624 * vnet_cleanup_hio -- an interface called by vgen to cleanup hio resources. 1625 */ 1626 void 1627 vnet_dds_cleanup_hio(vnet_t *vnetp) 1628 { 1629 vdds_cleanup_hio(vnetp); 1630 } 1631 1632 /* 1633 * vnet_handle_res_err -- A callback function called by a resource 1634 * to report an error. For example, vgen can call to report 1635 * an LDC down/reset event. This will trigger cleanup of associated 1636 * Hybrid resource. 1637 */ 1638 /* ARGSUSED */ 1639 static void 1640 vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err) 1641 { 1642 vnet_res_t *vresp = (vnet_res_t *)vrh; 1643 vnet_t *vnetp = vresp->vnetp; 1644 1645 if (vnetp == NULL) { 1646 return; 1647 } 1648 if ((vresp->type != VIO_NET_RES_LDC_SERVICE) && 1649 (vresp->type != VIO_NET_RES_HYBRID)) { 1650 return; 1651 } 1652 1653 vdds_cleanup_hio(vnetp); 1654 } 1655 1656 /* 1657 * vnet_dispatch_res_task -- A function to dispatch tasks start resources. 1658 */ 1659 static void 1660 vnet_dispatch_res_task(vnet_t *vnetp) 1661 { 1662 int rv; 1663 1664 /* 1665 * Dispatch the task. It could be the case that vnetp->flags does 1666 * not have VNET_STARTED set. This is ok as vnet_rest_start_task() 1667 * can abort the task when the task is started. See related comments 1668 * in vnet_m_stop() and vnet_stop_resources(). 1669 */ 1670 rv = ddi_taskq_dispatch(vnetp->taskqp, vnet_res_start_task, 1671 vnetp, DDI_NOSLEEP); 1672 if (rv != DDI_SUCCESS) { 1673 cmn_err(CE_WARN, 1674 "vnet%d:Can't dispatch start resource task", 1675 vnetp->instance); 1676 } 1677 } 1678 1679 /* 1680 * vnet_res_start_task -- A taskq callback function that starts a resource. 1681 */ 1682 static void 1683 vnet_res_start_task(void *arg) 1684 { 1685 vnet_t *vnetp = arg; 1686 1687 WRITE_ENTER(&vnetp->vrwlock); 1688 if (vnetp->flags & VNET_STARTED) { 1689 vnet_start_resources(vnetp); 1690 } 1691 RW_EXIT(&vnetp->vrwlock); 1692 } 1693 1694 /* 1695 * vnet_start_resources -- starts all resources associated with 1696 * a vnet. 1697 */ 1698 static void 1699 vnet_start_resources(vnet_t *vnetp) 1700 { 1701 mac_register_t *macp; 1702 mac_callbacks_t *cbp; 1703 vnet_res_t *vresp; 1704 int rv; 1705 1706 DBG1(vnetp, "enter\n"); 1707 1708 ASSERT(RW_WRITE_HELD(&vnetp->vrwlock)); 1709 1710 for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) { 1711 /* skip if it is already started */ 1712 if (vresp->flags & VNET_STARTED) { 1713 continue; 1714 } 1715 macp = &vresp->macreg; 1716 cbp = macp->m_callbacks; 1717 rv = cbp->mc_start(macp->m_driver); 1718 if (rv == 0) { 1719 /* 1720 * Successfully started the resource, so now 1721 * add it to the fdb. 1722 */ 1723 vresp->flags |= VNET_STARTED; 1724 vnet_fdbe_add(vnetp, vresp); 1725 } 1726 } 1727 1728 DBG1(vnetp, "exit\n"); 1729 1730 } 1731 1732 /* 1733 * vnet_stop_resources -- stop all resources associated with a vnet. 1734 */ 1735 static void 1736 vnet_stop_resources(vnet_t *vnetp) 1737 { 1738 vnet_res_t *vresp; 1739 mac_register_t *macp; 1740 mac_callbacks_t *cbp; 1741 1742 DBG1(vnetp, "enter\n"); 1743 1744 ASSERT(RW_WRITE_HELD(&vnetp->vrwlock)); 1745 1746 for (vresp = vnetp->vres_list; vresp != NULL; ) { 1747 if (vresp->flags & VNET_STARTED) { 1748 /* 1749 * Release the lock while invoking mc_stop() of the 1750 * underlying resource. We hold a reference to this 1751 * resource to prevent being removed from the list in 1752 * vio_net_resource_unreg(). Note that new resources 1753 * can be added to the head of the list while the lock 1754 * is released, but they won't be started, as 1755 * VNET_STARTED flag has been cleared for the vnet 1756 * device in vnet_m_stop(). Also, while the lock is 1757 * released a resource could be removed from the list 1758 * in vio_net_resource_unreg(); but that is ok, as we 1759 * re-acquire the lock and only then access the forward 1760 * link (vresp->nextp) to continue with the next 1761 * resource. 1762 */ 1763 vresp->flags &= ~VNET_STARTED; 1764 vresp->flags |= VNET_STOPPING; 1765 macp = &vresp->macreg; 1766 cbp = macp->m_callbacks; 1767 VNET_FDBE_REFHOLD(vresp); 1768 RW_EXIT(&vnetp->vrwlock); 1769 1770 cbp->mc_stop(macp->m_driver); 1771 1772 WRITE_ENTER(&vnetp->vrwlock); 1773 vresp->flags &= ~VNET_STOPPING; 1774 VNET_FDBE_REFRELE(vresp); 1775 } 1776 vresp = vresp->nextp; 1777 } 1778 DBG1(vnetp, "exit\n"); 1779 } 1780 1781 /* 1782 * Setup kstats for the HIO statistics. 1783 * NOTE: the synchronization for the statistics is the 1784 * responsibility of the caller. 1785 */ 1786 kstat_t * 1787 vnet_hio_setup_kstats(char *ks_mod, char *ks_name, vnet_res_t *vresp) 1788 { 1789 kstat_t *ksp; 1790 vnet_t *vnetp = vresp->vnetp; 1791 vnet_hio_kstats_t *hiokp; 1792 size_t size; 1793 1794 ASSERT(vnetp != NULL); 1795 size = sizeof (vnet_hio_kstats_t) / sizeof (kstat_named_t); 1796 ksp = kstat_create(ks_mod, vnetp->instance, ks_name, "net", 1797 KSTAT_TYPE_NAMED, size, 0); 1798 if (ksp == NULL) { 1799 return (NULL); 1800 } 1801 1802 hiokp = (vnet_hio_kstats_t *)ksp->ks_data; 1803 kstat_named_init(&hiokp->ipackets, "ipackets", 1804 KSTAT_DATA_ULONG); 1805 kstat_named_init(&hiokp->ierrors, "ierrors", 1806 KSTAT_DATA_ULONG); 1807 kstat_named_init(&hiokp->opackets, "opackets", 1808 KSTAT_DATA_ULONG); 1809 kstat_named_init(&hiokp->oerrors, "oerrors", 1810 KSTAT_DATA_ULONG); 1811 1812 1813 /* MIB II kstat variables */ 1814 kstat_named_init(&hiokp->rbytes, "rbytes", 1815 KSTAT_DATA_ULONG); 1816 kstat_named_init(&hiokp->obytes, "obytes", 1817 KSTAT_DATA_ULONG); 1818 kstat_named_init(&hiokp->multircv, "multircv", 1819 KSTAT_DATA_ULONG); 1820 kstat_named_init(&hiokp->multixmt, "multixmt", 1821 KSTAT_DATA_ULONG); 1822 kstat_named_init(&hiokp->brdcstrcv, "brdcstrcv", 1823 KSTAT_DATA_ULONG); 1824 kstat_named_init(&hiokp->brdcstxmt, "brdcstxmt", 1825 KSTAT_DATA_ULONG); 1826 kstat_named_init(&hiokp->norcvbuf, "norcvbuf", 1827 KSTAT_DATA_ULONG); 1828 kstat_named_init(&hiokp->noxmtbuf, "noxmtbuf", 1829 KSTAT_DATA_ULONG); 1830 1831 ksp->ks_update = vnet_hio_update_kstats; 1832 ksp->ks_private = (void *)vresp; 1833 kstat_install(ksp); 1834 return (ksp); 1835 } 1836 1837 /* 1838 * Destroy kstats. 1839 */ 1840 static void 1841 vnet_hio_destroy_kstats(kstat_t *ksp) 1842 { 1843 if (ksp != NULL) 1844 kstat_delete(ksp); 1845 } 1846 1847 /* 1848 * Update the kstats. 1849 */ 1850 static int 1851 vnet_hio_update_kstats(kstat_t *ksp, int rw) 1852 { 1853 vnet_t *vnetp; 1854 vnet_res_t *vresp; 1855 vnet_hio_stats_t statsp; 1856 vnet_hio_kstats_t *hiokp; 1857 1858 vresp = (vnet_res_t *)ksp->ks_private; 1859 vnetp = vresp->vnetp; 1860 1861 bzero(&statsp, sizeof (vnet_hio_stats_t)); 1862 1863 READ_ENTER(&vnetp->vsw_fp_rw); 1864 if (vnetp->hio_fp == NULL) { 1865 /* not using hio resources, just return */ 1866 RW_EXIT(&vnetp->vsw_fp_rw); 1867 return (0); 1868 } 1869 VNET_FDBE_REFHOLD(vnetp->hio_fp); 1870 RW_EXIT(&vnetp->vsw_fp_rw); 1871 vnet_hio_get_stats(vnetp->hio_fp, &statsp); 1872 VNET_FDBE_REFRELE(vnetp->hio_fp); 1873 1874 hiokp = (vnet_hio_kstats_t *)ksp->ks_data; 1875 1876 if (rw == KSTAT_READ) { 1877 /* Link Input/Output stats */ 1878 hiokp->ipackets.value.ul = (uint32_t)statsp.ipackets; 1879 hiokp->ipackets64.value.ull = statsp.ipackets; 1880 hiokp->ierrors.value.ul = statsp.ierrors; 1881 hiokp->opackets.value.ul = (uint32_t)statsp.opackets; 1882 hiokp->opackets64.value.ull = statsp.opackets; 1883 hiokp->oerrors.value.ul = statsp.oerrors; 1884 1885 /* MIB II kstat variables */ 1886 hiokp->rbytes.value.ul = (uint32_t)statsp.rbytes; 1887 hiokp->rbytes64.value.ull = statsp.rbytes; 1888 hiokp->obytes.value.ul = (uint32_t)statsp.obytes; 1889 hiokp->obytes64.value.ull = statsp.obytes; 1890 hiokp->multircv.value.ul = statsp.multircv; 1891 hiokp->multixmt.value.ul = statsp.multixmt; 1892 hiokp->brdcstrcv.value.ul = statsp.brdcstrcv; 1893 hiokp->brdcstxmt.value.ul = statsp.brdcstxmt; 1894 hiokp->norcvbuf.value.ul = statsp.norcvbuf; 1895 hiokp->noxmtbuf.value.ul = statsp.noxmtbuf; 1896 } else { 1897 return (EACCES); 1898 } 1899 1900 return (0); 1901 } 1902 1903 static void 1904 vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp) 1905 { 1906 mac_register_t *macp; 1907 mac_callbacks_t *cbp; 1908 uint64_t val; 1909 int stat; 1910 1911 /* 1912 * get the specified statistics from the underlying nxge. 1913 */ 1914 macp = &vresp->macreg; 1915 cbp = macp->m_callbacks; 1916 for (stat = MAC_STAT_MIN; stat < MAC_STAT_OVERFLOWS; stat++) { 1917 if (cbp->mc_getstat(macp->m_driver, stat, &val) == 0) { 1918 switch (stat) { 1919 case MAC_STAT_IPACKETS: 1920 statsp->ipackets = val; 1921 break; 1922 1923 case MAC_STAT_IERRORS: 1924 statsp->ierrors = val; 1925 break; 1926 1927 case MAC_STAT_OPACKETS: 1928 statsp->opackets = val; 1929 break; 1930 1931 case MAC_STAT_OERRORS: 1932 statsp->oerrors = val; 1933 break; 1934 1935 case MAC_STAT_RBYTES: 1936 statsp->rbytes = val; 1937 break; 1938 1939 case MAC_STAT_OBYTES: 1940 statsp->obytes = val; 1941 break; 1942 1943 case MAC_STAT_MULTIRCV: 1944 statsp->multircv = val; 1945 break; 1946 1947 case MAC_STAT_MULTIXMT: 1948 statsp->multixmt = val; 1949 break; 1950 1951 case MAC_STAT_BRDCSTRCV: 1952 statsp->brdcstrcv = val; 1953 break; 1954 1955 case MAC_STAT_BRDCSTXMT: 1956 statsp->brdcstxmt = val; 1957 break; 1958 1959 case MAC_STAT_NOXMTBUF: 1960 statsp->noxmtbuf = val; 1961 break; 1962 1963 case MAC_STAT_NORCVBUF: 1964 statsp->norcvbuf = val; 1965 break; 1966 1967 default: 1968 /* 1969 * parameters not interested. 1970 */ 1971 break; 1972 } 1973 } 1974 } 1975 } 1976 1977 static boolean_t 1978 vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data) 1979 { 1980 vnet_t *vnetp = (vnet_t *)arg; 1981 1982 if (vnetp == NULL) { 1983 return (0); 1984 } 1985 1986 switch (cap) { 1987 1988 case MAC_CAPAB_RINGS: { 1989 1990 mac_capab_rings_t *cap_rings = cap_data; 1991 /* 1992 * Rings Capability Notes: 1993 * We advertise rings to make use of the rings framework in 1994 * gldv3 mac layer, to improve the performance. This is 1995 * specifically needed when a Hybrid resource (with multiple 1996 * tx/rx hardware rings) is assigned to a vnet device. We also 1997 * leverage this for the normal case when no Hybrid resource is 1998 * assigned. 1999 * 2000 * Ring Allocation: 2001 * - TX path: 2002 * We expose a pseudo ring group with 2 pseudo tx rings (as 2003 * currently HybridIO exports only 2 rings) In the normal case, 2004 * transmit traffic that comes down to the driver through the 2005 * mri_tx (vnet_tx_ring_send()) entry point goes through the 2006 * distributed switching algorithm in vnet and gets transmitted 2007 * over a port/LDC in the vgen layer to either the vswitch or a 2008 * peer vnet. If and when a Hybrid resource is assigned to the 2009 * vnet, we obtain the tx ring information of the Hybrid device 2010 * (nxge) and map the pseudo rings 1:1 to the 2 hw tx rings. 2011 * Traffic being sent over the Hybrid resource by the mac layer 2012 * gets spread across both hw rings, as they are mapped to the 2013 * 2 pseudo tx rings in vnet. 2014 * 2015 * - RX path: 2016 * We expose a pseudo ring group with 3 pseudo rx rings (static 2017 * rings) initially. The first (default) pseudo rx ring is 2018 * reserved for the resource that connects to the vswitch 2019 * service. The next 2 rings are reserved for a Hybrid resource 2020 * that may be assigned to the vnet device. If and when a 2021 * Hybrid resource is assigned to the vnet, we obtain the rx 2022 * ring information of the Hybrid device (nxge) and map these 2023 * pseudo rings 1:1 to the 2 hw rx rings. For each additional 2024 * resource that connects to a peer vnet, we dynamically 2025 * allocate a pseudo rx ring and map it to that resource, when 2026 * the resource gets added; and the pseudo rx ring is 2027 * dynamically registered with the upper mac layer. We do the 2028 * reverse and unregister the ring with the mac layer when 2029 * the resource gets removed. 2030 * 2031 * Synchronization notes: 2032 * We don't need any lock to protect members of ring structure, 2033 * specifically ringp->hw_rh, in either the TX or the RX ring, 2034 * as explained below. 2035 * - TX ring: 2036 * ring->hw_rh is initialized only when a Hybrid resource is 2037 * associated; and gets referenced only in vnet_hio_tx(). The 2038 * Hybrid resource itself is available in fdb only after tx 2039 * hwrings are found and mapped; i.e, in vio_net_resource_reg() 2040 * we call vnet_bind_rings() first and then call 2041 * vnet_start_resources() which adds an entry to fdb. For 2042 * traffic going over LDC resources, we don't reference 2043 * ring->hw_rh at all. 2044 * - RX ring: 2045 * For rings mapped to Hybrid resource ring->hw_rh is 2046 * initialized and only then do we add the rx callback for 2047 * the underlying Hybrid resource; we disable callbacks before 2048 * we unmap ring->hw_rh. For rings mapped to LDC resources, we 2049 * stop the rx callbacks (in vgen) before we remove ring->hw_rh 2050 * (vio_net_resource_unreg()). 2051 */ 2052 2053 if (cap_rings->mr_type == MAC_RING_TYPE_RX) { 2054 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 2055 2056 /* 2057 * The ring_cnt for rx grp is initialized in 2058 * vnet_ring_grp_init(). Later, the ring_cnt gets 2059 * updated dynamically whenever LDC resources are added 2060 * or removed. 2061 */ 2062 cap_rings->mr_rnum = vnetp->rx_grp[0].ring_cnt; 2063 cap_rings->mr_rget = vnet_get_ring; 2064 2065 cap_rings->mr_gnum = VNET_NUM_PSEUDO_GROUPS; 2066 cap_rings->mr_gget = vnet_get_group; 2067 cap_rings->mr_gaddring = NULL; 2068 cap_rings->mr_gremring = NULL; 2069 } else { 2070 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 2071 2072 /* 2073 * The ring_cnt for tx grp is initialized in 2074 * vnet_ring_grp_init() and remains constant, as we 2075 * do not support dymanic tx rings for now. 2076 */ 2077 cap_rings->mr_rnum = vnetp->tx_grp[0].ring_cnt; 2078 cap_rings->mr_rget = vnet_get_ring; 2079 2080 /* 2081 * Transmit rings are not grouped; i.e, the number of 2082 * transmit ring groups advertised should be set to 0. 2083 */ 2084 cap_rings->mr_gnum = 0; 2085 2086 cap_rings->mr_gget = vnet_get_group; 2087 cap_rings->mr_gaddring = NULL; 2088 cap_rings->mr_gremring = NULL; 2089 } 2090 return (B_TRUE); 2091 2092 } 2093 2094 default: 2095 break; 2096 2097 } 2098 2099 return (B_FALSE); 2100 } 2101 2102 /* 2103 * Callback funtion for MAC layer to get ring information. 2104 */ 2105 static void 2106 vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index, 2107 const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle) 2108 { 2109 vnet_t *vnetp = arg; 2110 2111 switch (rtype) { 2112 2113 case MAC_RING_TYPE_RX: { 2114 2115 vnet_pseudo_rx_group_t *rx_grp; 2116 vnet_pseudo_rx_ring_t *rx_ringp; 2117 mac_intr_t *mintr; 2118 2119 /* We advertised only one RX group */ 2120 ASSERT(g_index == 0); 2121 rx_grp = &vnetp->rx_grp[g_index]; 2122 2123 /* Check the current # of rings in the rx group */ 2124 ASSERT((r_index >= 0) && (r_index < rx_grp->max_ring_cnt)); 2125 2126 /* Get the ring based on the index */ 2127 rx_ringp = &rx_grp->rings[r_index]; 2128 2129 rx_ringp->handle = r_handle; 2130 /* 2131 * Note: we don't need to save the incoming r_index in rx_ring, 2132 * as vnet_ring_grp_init() would have initialized the index for 2133 * each ring in the array. 2134 */ 2135 rx_ringp->grp = rx_grp; 2136 rx_ringp->vnetp = vnetp; 2137 2138 mintr = &infop->mri_intr; 2139 mintr->mi_handle = (mac_intr_handle_t)rx_ringp; 2140 mintr->mi_enable = (mac_intr_enable_t)vnet_ring_enable_intr; 2141 mintr->mi_disable = (mac_intr_disable_t)vnet_ring_disable_intr; 2142 2143 infop->mri_driver = (mac_ring_driver_t)rx_ringp; 2144 infop->mri_start = vnet_rx_ring_start; 2145 infop->mri_stop = vnet_rx_ring_stop; 2146 2147 /* Set the poll function, as this is an rx ring */ 2148 infop->mri_poll = vnet_rx_poll; 2149 2150 break; 2151 } 2152 2153 case MAC_RING_TYPE_TX: { 2154 vnet_pseudo_tx_group_t *tx_grp; 2155 vnet_pseudo_tx_ring_t *tx_ringp; 2156 2157 /* 2158 * No need to check grp index; mac layer passes -1 for it. 2159 */ 2160 tx_grp = &vnetp->tx_grp[0]; 2161 2162 /* Check the # of rings in the tx group */ 2163 ASSERT((r_index >= 0) && (r_index < tx_grp->ring_cnt)); 2164 2165 /* Get the ring based on the index */ 2166 tx_ringp = &tx_grp->rings[r_index]; 2167 2168 tx_ringp->handle = r_handle; 2169 tx_ringp->index = r_index; 2170 tx_ringp->grp = tx_grp; 2171 tx_ringp->vnetp = vnetp; 2172 2173 infop->mri_driver = (mac_ring_driver_t)tx_ringp; 2174 infop->mri_start = vnet_tx_ring_start; 2175 infop->mri_stop = vnet_tx_ring_stop; 2176 2177 /* Set the transmit function, as this is a tx ring */ 2178 infop->mri_tx = vnet_tx_ring_send; 2179 2180 break; 2181 } 2182 2183 default: 2184 break; 2185 } 2186 } 2187 2188 /* 2189 * Callback funtion for MAC layer to get group information. 2190 */ 2191 static void 2192 vnet_get_group(void *arg, mac_ring_type_t type, const int index, 2193 mac_group_info_t *infop, mac_group_handle_t handle) 2194 { 2195 vnet_t *vnetp = (vnet_t *)arg; 2196 2197 switch (type) { 2198 2199 case MAC_RING_TYPE_RX: 2200 { 2201 vnet_pseudo_rx_group_t *rx_grp; 2202 2203 /* We advertised only one RX group */ 2204 ASSERT(index == 0); 2205 2206 rx_grp = &vnetp->rx_grp[index]; 2207 rx_grp->handle = handle; 2208 rx_grp->index = index; 2209 rx_grp->vnetp = vnetp; 2210 2211 infop->mgi_driver = (mac_group_driver_t)rx_grp; 2212 infop->mgi_start = NULL; 2213 infop->mgi_stop = NULL; 2214 infop->mgi_addmac = vnet_addmac; 2215 infop->mgi_remmac = vnet_remmac; 2216 infop->mgi_count = rx_grp->ring_cnt; 2217 2218 break; 2219 } 2220 2221 case MAC_RING_TYPE_TX: 2222 { 2223 vnet_pseudo_tx_group_t *tx_grp; 2224 2225 /* We advertised only one TX group */ 2226 ASSERT(index == 0); 2227 2228 tx_grp = &vnetp->tx_grp[index]; 2229 tx_grp->handle = handle; 2230 tx_grp->index = index; 2231 tx_grp->vnetp = vnetp; 2232 2233 infop->mgi_driver = (mac_group_driver_t)tx_grp; 2234 infop->mgi_start = NULL; 2235 infop->mgi_stop = NULL; 2236 infop->mgi_addmac = NULL; 2237 infop->mgi_remmac = NULL; 2238 infop->mgi_count = VNET_NUM_PSEUDO_TXRINGS; 2239 2240 break; 2241 } 2242 2243 default: 2244 break; 2245 2246 } 2247 } 2248 2249 static int 2250 vnet_rx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num) 2251 { 2252 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; 2253 int err; 2254 2255 /* 2256 * If this ring is mapped to a LDC resource, simply mark the state to 2257 * indicate the ring is started and return. 2258 */ 2259 if ((rx_ringp->state & 2260 (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) { 2261 rx_ringp->gen_num = mr_gen_num; 2262 rx_ringp->state |= VNET_RXRING_STARTED; 2263 return (0); 2264 } 2265 2266 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0); 2267 2268 /* 2269 * This must be a ring reserved for a hwring. If the hwring is not 2270 * bound yet, simply mark the state to indicate the ring is started and 2271 * return. If and when a hybrid resource is activated for this vnet 2272 * device, we will bind the hwring and start it then. If a hwring is 2273 * already bound, start it now. 2274 */ 2275 if (rx_ringp->hw_rh == NULL) { 2276 rx_ringp->gen_num = mr_gen_num; 2277 rx_ringp->state |= VNET_RXRING_STARTED; 2278 return (0); 2279 } 2280 2281 err = mac_hwring_start(rx_ringp->hw_rh); 2282 if (err == 0) { 2283 rx_ringp->gen_num = mr_gen_num; 2284 rx_ringp->state |= VNET_RXRING_STARTED; 2285 } else { 2286 err = ENXIO; 2287 } 2288 2289 return (err); 2290 } 2291 2292 static void 2293 vnet_rx_ring_stop(mac_ring_driver_t arg) 2294 { 2295 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; 2296 2297 /* 2298 * If this ring is mapped to a LDC resource, simply mark the state to 2299 * indicate the ring is now stopped and return. 2300 */ 2301 if ((rx_ringp->state & 2302 (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) { 2303 rx_ringp->state &= ~VNET_RXRING_STARTED; 2304 return; 2305 } 2306 2307 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0); 2308 2309 /* 2310 * This must be a ring reserved for a hwring. If the hwring is not 2311 * bound yet, simply mark the state to indicate the ring is stopped and 2312 * return. If a hwring is already bound, stop it now. 2313 */ 2314 if (rx_ringp->hw_rh == NULL) { 2315 rx_ringp->state &= ~VNET_RXRING_STARTED; 2316 return; 2317 } 2318 2319 mac_hwring_stop(rx_ringp->hw_rh); 2320 rx_ringp->state &= ~VNET_RXRING_STARTED; 2321 } 2322 2323 /* ARGSUSED */ 2324 static int 2325 vnet_tx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num) 2326 { 2327 vnet_pseudo_tx_ring_t *tx_ringp = (vnet_pseudo_tx_ring_t *)arg; 2328 2329 tx_ringp->state |= VNET_TXRING_STARTED; 2330 return (0); 2331 } 2332 2333 static void 2334 vnet_tx_ring_stop(mac_ring_driver_t arg) 2335 { 2336 vnet_pseudo_tx_ring_t *tx_ringp = (vnet_pseudo_tx_ring_t *)arg; 2337 2338 tx_ringp->state &= ~VNET_TXRING_STARTED; 2339 } 2340 2341 /* 2342 * Disable polling for a ring and enable its interrupt. 2343 */ 2344 static int 2345 vnet_ring_enable_intr(void *arg) 2346 { 2347 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; 2348 vnet_res_t *vresp; 2349 2350 if (rx_ringp->hw_rh == NULL) { 2351 /* 2352 * Ring enable intr func is being invoked, but the ring is 2353 * not bound to any underlying resource ? This must be a ring 2354 * reserved for Hybrid resource and no such resource has been 2355 * assigned to this vnet device yet. We simply return success. 2356 */ 2357 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0); 2358 return (0); 2359 } 2360 2361 /* 2362 * The rx ring has been bound to either a LDC or a Hybrid resource. 2363 * Call the appropriate function to enable interrupts for the ring. 2364 */ 2365 if (rx_ringp->state & VNET_RXRING_HYBRID) { 2366 return (mac_hwring_enable_intr(rx_ringp->hw_rh)); 2367 } else { 2368 vresp = (vnet_res_t *)rx_ringp->hw_rh; 2369 return (vgen_enable_intr(vresp->macreg.m_driver)); 2370 } 2371 } 2372 2373 /* 2374 * Enable polling for a ring and disable its interrupt. 2375 */ 2376 static int 2377 vnet_ring_disable_intr(void *arg) 2378 { 2379 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; 2380 vnet_res_t *vresp; 2381 2382 if (rx_ringp->hw_rh == NULL) { 2383 /* 2384 * Ring disable intr func is being invoked, but the ring is 2385 * not bound to any underlying resource ? This must be a ring 2386 * reserved for Hybrid resource and no such resource has been 2387 * assigned to this vnet device yet. We simply return success. 2388 */ 2389 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0); 2390 return (0); 2391 } 2392 2393 /* 2394 * The rx ring has been bound to either a LDC or a Hybrid resource. 2395 * Call the appropriate function to disable interrupts for the ring. 2396 */ 2397 if (rx_ringp->state & VNET_RXRING_HYBRID) { 2398 return (mac_hwring_disable_intr(rx_ringp->hw_rh)); 2399 } else { 2400 vresp = (vnet_res_t *)rx_ringp->hw_rh; 2401 return (vgen_disable_intr(vresp->macreg.m_driver)); 2402 } 2403 } 2404 2405 /* 2406 * Poll 'bytes_to_pickup' bytes of message from the rx ring. 2407 */ 2408 static mblk_t * 2409 vnet_rx_poll(void *arg, int bytes_to_pickup) 2410 { 2411 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; 2412 mblk_t *mp = NULL; 2413 vnet_res_t *vresp; 2414 vnet_t *vnetp = rx_ringp->vnetp; 2415 2416 if (rx_ringp->hw_rh == NULL) { 2417 return (NULL); 2418 } 2419 2420 if (rx_ringp->state & VNET_RXRING_HYBRID) { 2421 mp = mac_hwring_poll(rx_ringp->hw_rh, bytes_to_pickup); 2422 /* 2423 * Packets received over a hybrid resource need additional 2424 * processing to remove the tag, for the pvid case. The 2425 * underlying resource is not aware of the vnet's pvid and thus 2426 * packets are received with the vlan tag in the header; unlike 2427 * packets that are received over a ldc channel in which case 2428 * the peer vnet/vsw would have already removed the tag. 2429 */ 2430 if (vnetp->pvid != vnetp->default_vlan_id) { 2431 vnet_rx_frames_untag(vnetp->pvid, &mp); 2432 } 2433 } else { 2434 vresp = (vnet_res_t *)rx_ringp->hw_rh; 2435 mp = vgen_poll(vresp->macreg.m_driver, bytes_to_pickup); 2436 } 2437 return (mp); 2438 } 2439 2440 /* ARGSUSED */ 2441 void 2442 vnet_hio_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 2443 boolean_t loopback) 2444 { 2445 vnet_t *vnetp = (vnet_t *)arg; 2446 vnet_pseudo_rx_ring_t *ringp = (vnet_pseudo_rx_ring_t *)mrh; 2447 2448 /* 2449 * Packets received over a hybrid resource need additional processing 2450 * to remove the tag, for the pvid case. The underlying resource is 2451 * not aware of the vnet's pvid and thus packets are received with the 2452 * vlan tag in the header; unlike packets that are received over a ldc 2453 * channel in which case the peer vnet/vsw would have already removed 2454 * the tag. 2455 */ 2456 if (vnetp->pvid != vnetp->default_vlan_id) { 2457 vnet_rx_frames_untag(vnetp->pvid, &mp); 2458 if (mp == NULL) { 2459 return; 2460 } 2461 } 2462 mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num); 2463 } 2464 2465 static int 2466 vnet_addmac(void *arg, const uint8_t *mac_addr) 2467 { 2468 vnet_pseudo_rx_group_t *rx_grp = (vnet_pseudo_rx_group_t *)arg; 2469 vnet_t *vnetp; 2470 2471 vnetp = rx_grp->vnetp; 2472 2473 if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) { 2474 return (0); 2475 } 2476 2477 cmn_err(CE_CONT, "!vnet%d: %s: Multiple macaddr unsupported\n", 2478 vnetp->instance, __func__); 2479 return (EINVAL); 2480 } 2481 2482 static int 2483 vnet_remmac(void *arg, const uint8_t *mac_addr) 2484 { 2485 vnet_pseudo_rx_group_t *rx_grp = (vnet_pseudo_rx_group_t *)arg; 2486 vnet_t *vnetp; 2487 2488 vnetp = rx_grp->vnetp; 2489 2490 if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) { 2491 return (0); 2492 } 2493 2494 cmn_err(CE_CONT, "!vnet%d: %s: Invalid macaddr: %s\n", 2495 vnetp->instance, __func__, ether_sprintf((void *)mac_addr)); 2496 return (EINVAL); 2497 } 2498 2499 int 2500 vnet_hio_mac_init(vnet_t *vnetp, char *ifname) 2501 { 2502 mac_handle_t mh; 2503 mac_client_handle_t mch = NULL; 2504 mac_unicast_handle_t muh = NULL; 2505 mac_diag_t diag; 2506 mac_register_t *macp; 2507 char client_name[MAXNAMELEN]; 2508 int rv; 2509 uint16_t mac_flags = MAC_UNICAST_TAG_DISABLE | 2510 MAC_UNICAST_STRIP_DISABLE | MAC_UNICAST_PRIMARY; 2511 vio_net_callbacks_t vcb; 2512 ether_addr_t rem_addr = 2513 { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 2514 uint32_t retries = 0; 2515 2516 if ((macp = mac_alloc(MAC_VERSION)) == NULL) { 2517 return (EAGAIN); 2518 } 2519 2520 do { 2521 rv = mac_open_by_linkname(ifname, &mh); 2522 if (rv == 0) { 2523 break; 2524 } 2525 if (rv != ENOENT || (retries++ >= vnet_mac_open_retries)) { 2526 mac_free(macp); 2527 return (rv); 2528 } 2529 drv_usecwait(vnet_mac_open_delay); 2530 } while (rv == ENOENT); 2531 2532 vnetp->hio_mh = mh; 2533 2534 (void) snprintf(client_name, MAXNAMELEN, "vnet%d-%s", vnetp->instance, 2535 ifname); 2536 rv = mac_client_open(mh, &mch, client_name, MAC_OPEN_FLAGS_EXCLUSIVE); 2537 if (rv != 0) { 2538 goto fail; 2539 } 2540 vnetp->hio_mch = mch; 2541 2542 rv = mac_unicast_add(mch, vnetp->curr_macaddr, mac_flags, &muh, 0, 2543 &diag); 2544 if (rv != 0) { 2545 goto fail; 2546 } 2547 vnetp->hio_muh = muh; 2548 2549 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 2550 macp->m_driver = vnetp; 2551 macp->m_dip = NULL; 2552 macp->m_src_addr = NULL; 2553 macp->m_callbacks = &vnet_hio_res_callbacks; 2554 macp->m_min_sdu = 0; 2555 macp->m_max_sdu = ETHERMTU; 2556 2557 rv = vio_net_resource_reg(macp, VIO_NET_RES_HYBRID, 2558 vnetp->curr_macaddr, rem_addr, &vnetp->hio_vhp, &vcb); 2559 if (rv != 0) { 2560 goto fail; 2561 } 2562 mac_free(macp); 2563 2564 /* add the recv callback */ 2565 mac_rx_set(vnetp->hio_mch, vnet_hio_rx_cb, vnetp); 2566 2567 /* add the notify callback - only tx updates for now */ 2568 vnetp->hio_mnh = mac_notify_add(vnetp->hio_mh, vnet_hio_notify_cb, 2569 vnetp); 2570 2571 return (0); 2572 2573 fail: 2574 mac_free(macp); 2575 vnet_hio_mac_cleanup(vnetp); 2576 return (1); 2577 } 2578 2579 void 2580 vnet_hio_mac_cleanup(vnet_t *vnetp) 2581 { 2582 if (vnetp->hio_mnh != NULL) { 2583 (void) mac_notify_remove(vnetp->hio_mnh, B_TRUE); 2584 vnetp->hio_mnh = NULL; 2585 } 2586 2587 if (vnetp->hio_vhp != NULL) { 2588 vio_net_resource_unreg(vnetp->hio_vhp); 2589 vnetp->hio_vhp = NULL; 2590 } 2591 2592 if (vnetp->hio_muh != NULL) { 2593 (void) mac_unicast_remove(vnetp->hio_mch, vnetp->hio_muh); 2594 vnetp->hio_muh = NULL; 2595 } 2596 2597 if (vnetp->hio_mch != NULL) { 2598 mac_client_close(vnetp->hio_mch, 0); 2599 vnetp->hio_mch = NULL; 2600 } 2601 2602 if (vnetp->hio_mh != NULL) { 2603 mac_close(vnetp->hio_mh); 2604 vnetp->hio_mh = NULL; 2605 } 2606 } 2607 2608 /* Bind pseudo rings to hwrings */ 2609 static int 2610 vnet_bind_hwrings(vnet_t *vnetp) 2611 { 2612 mac_ring_handle_t hw_rh[VNET_NUM_HYBRID_RINGS]; 2613 mac_perim_handle_t mph1; 2614 vnet_pseudo_rx_group_t *rx_grp; 2615 vnet_pseudo_rx_ring_t *rx_ringp; 2616 vnet_pseudo_tx_group_t *tx_grp; 2617 vnet_pseudo_tx_ring_t *tx_ringp; 2618 int hw_ring_cnt; 2619 int i; 2620 int rv; 2621 2622 mac_perim_enter_by_mh(vnetp->hio_mh, &mph1); 2623 2624 /* Get the list of the underlying RX rings. */ 2625 hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->rx_hwgh, hw_rh, 2626 MAC_RING_TYPE_RX); 2627 2628 /* We expect the the # of hw rx rings to match VNET_NUM_HYBRID_RINGS */ 2629 if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) { 2630 cmn_err(CE_WARN, 2631 "!vnet%d: vnet_bind_hwrings: bad rx hw_ring_cnt(%d)\n", 2632 vnetp->instance, hw_ring_cnt); 2633 goto fail; 2634 } 2635 2636 if (vnetp->rx_hwgh != NULL) { 2637 /* 2638 * Quiesce the HW ring and the mac srs on the ring. Note 2639 * that the HW ring will be restarted when the pseudo ring 2640 * is started. At that time all the packets will be 2641 * directly passed up to the pseudo RX ring and handled 2642 * by mac srs created over the pseudo RX ring. 2643 */ 2644 mac_rx_client_quiesce(vnetp->hio_mch); 2645 mac_srs_perm_quiesce(vnetp->hio_mch, B_TRUE); 2646 } 2647 2648 /* 2649 * Bind the pseudo rings to the hwrings and start the hwrings. 2650 * Note we don't need to register these with the upper mac, as we have 2651 * statically exported these pseudo rxrings which are reserved for 2652 * rxrings of Hybrid resource. 2653 */ 2654 rx_grp = &vnetp->rx_grp[0]; 2655 for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) { 2656 /* Pick the rxrings reserved for Hybrid resource */ 2657 rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX]; 2658 2659 /* Store the hw ring handle */ 2660 rx_ringp->hw_rh = hw_rh[i]; 2661 2662 /* Bind the pseudo ring to the underlying hwring */ 2663 mac_hwring_setup(rx_ringp->hw_rh, 2664 (mac_resource_handle_t)rx_ringp); 2665 2666 /* Start the hwring if needed */ 2667 if (rx_ringp->state & VNET_RXRING_STARTED) { 2668 rv = mac_hwring_start(rx_ringp->hw_rh); 2669 if (rv != 0) { 2670 mac_hwring_teardown(rx_ringp->hw_rh); 2671 rx_ringp->hw_rh = NULL; 2672 goto fail; 2673 } 2674 } 2675 } 2676 2677 /* Get the list of the underlying TX rings. */ 2678 hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->tx_hwgh, hw_rh, 2679 MAC_RING_TYPE_TX); 2680 2681 /* We expect the # of hw tx rings to match VNET_NUM_HYBRID_RINGS */ 2682 if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) { 2683 cmn_err(CE_WARN, 2684 "!vnet%d: vnet_bind_hwrings: bad tx hw_ring_cnt(%d)\n", 2685 vnetp->instance, hw_ring_cnt); 2686 goto fail; 2687 } 2688 2689 /* 2690 * Now map the pseudo txrings to the hw txrings. Note we don't need 2691 * to register these with the upper mac, as we have statically exported 2692 * these rings. Note that these rings will continue to be used for LDC 2693 * resources to peer vnets and vswitch (shared ring). 2694 */ 2695 tx_grp = &vnetp->tx_grp[0]; 2696 for (i = 0; i < tx_grp->ring_cnt; i++) { 2697 tx_ringp = &tx_grp->rings[i]; 2698 tx_ringp->hw_rh = hw_rh[i]; 2699 tx_ringp->state |= VNET_TXRING_HYBRID; 2700 } 2701 2702 mac_perim_exit(mph1); 2703 return (0); 2704 2705 fail: 2706 mac_perim_exit(mph1); 2707 vnet_unbind_hwrings(vnetp); 2708 return (1); 2709 } 2710 2711 /* Unbind pseudo rings from hwrings */ 2712 static void 2713 vnet_unbind_hwrings(vnet_t *vnetp) 2714 { 2715 mac_perim_handle_t mph1; 2716 vnet_pseudo_rx_ring_t *rx_ringp; 2717 vnet_pseudo_rx_group_t *rx_grp; 2718 vnet_pseudo_tx_group_t *tx_grp; 2719 vnet_pseudo_tx_ring_t *tx_ringp; 2720 int i; 2721 2722 mac_perim_enter_by_mh(vnetp->hio_mh, &mph1); 2723 2724 tx_grp = &vnetp->tx_grp[0]; 2725 for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) { 2726 tx_ringp = &tx_grp->rings[i]; 2727 if (tx_ringp->state & VNET_TXRING_HYBRID) { 2728 tx_ringp->state &= ~VNET_TXRING_HYBRID; 2729 tx_ringp->hw_rh = NULL; 2730 } 2731 } 2732 2733 rx_grp = &vnetp->rx_grp[0]; 2734 for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) { 2735 rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX]; 2736 if (rx_ringp->hw_rh != NULL) { 2737 /* Stop the hwring */ 2738 mac_hwring_stop(rx_ringp->hw_rh); 2739 2740 /* Teardown the hwring */ 2741 mac_hwring_teardown(rx_ringp->hw_rh); 2742 rx_ringp->hw_rh = NULL; 2743 } 2744 } 2745 2746 if (vnetp->rx_hwgh != NULL) { 2747 vnetp->rx_hwgh = NULL; 2748 /* 2749 * First clear the permanent-quiesced flag of the RX srs then 2750 * restart the HW ring and the mac srs on the ring. 2751 */ 2752 mac_srs_perm_quiesce(vnetp->hio_mch, B_FALSE); 2753 mac_rx_client_restart(vnetp->hio_mch); 2754 } 2755 2756 mac_perim_exit(mph1); 2757 } 2758 2759 /* Bind pseudo ring to a LDC resource */ 2760 static int 2761 vnet_bind_vgenring(vnet_res_t *vresp) 2762 { 2763 vnet_t *vnetp; 2764 vnet_pseudo_rx_group_t *rx_grp; 2765 vnet_pseudo_rx_ring_t *rx_ringp; 2766 mac_perim_handle_t mph1; 2767 int rv; 2768 int type; 2769 2770 vnetp = vresp->vnetp; 2771 type = vresp->type; 2772 rx_grp = &vnetp->rx_grp[0]; 2773 2774 if (type == VIO_NET_RES_LDC_SERVICE) { 2775 /* 2776 * Ring Index 0 is the default ring in the group and is 2777 * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring 2778 * is allocated statically and is reported to the mac layer 2779 * in vnet_m_capab(). So, all we need to do here, is save a 2780 * reference to the associated vresp. 2781 */ 2782 rx_ringp = &rx_grp->rings[0]; 2783 rx_ringp->hw_rh = (mac_ring_handle_t)vresp; 2784 vresp->rx_ringp = (void *)rx_ringp; 2785 return (0); 2786 } 2787 ASSERT(type == VIO_NET_RES_LDC_GUEST); 2788 2789 mac_perim_enter_by_mh(vnetp->mh, &mph1); 2790 2791 rx_ringp = vnet_alloc_pseudo_rx_ring(vnetp); 2792 if (rx_ringp == NULL) { 2793 cmn_err(CE_WARN, "!vnet%d: Failed to allocate pseudo rx ring", 2794 vnetp->instance); 2795 goto fail; 2796 } 2797 2798 /* Store the LDC resource itself as the ring handle */ 2799 rx_ringp->hw_rh = (mac_ring_handle_t)vresp; 2800 2801 /* 2802 * Save a reference to the ring in the resource for lookup during 2803 * unbind. Note this is only done for LDC resources. We don't need this 2804 * in the case of a Hybrid resource (see vnet_bind_hwrings()), as its 2805 * rx rings are mapped to reserved pseudo rx rings (index 1 and 2). 2806 */ 2807 vresp->rx_ringp = (void *)rx_ringp; 2808 rx_ringp->state |= VNET_RXRING_LDC_GUEST; 2809 2810 /* Register the pseudo ring with upper-mac */ 2811 rv = mac_group_add_ring(rx_grp->handle, rx_ringp->index); 2812 if (rv != 0) { 2813 rx_ringp->state &= ~VNET_RXRING_LDC_GUEST; 2814 rx_ringp->hw_rh = NULL; 2815 vnet_free_pseudo_rx_ring(vnetp, rx_ringp); 2816 goto fail; 2817 } 2818 2819 mac_perim_exit(mph1); 2820 return (0); 2821 fail: 2822 mac_perim_exit(mph1); 2823 return (1); 2824 } 2825 2826 /* Unbind pseudo ring from a LDC resource */ 2827 static void 2828 vnet_unbind_vgenring(vnet_res_t *vresp) 2829 { 2830 vnet_t *vnetp; 2831 vnet_pseudo_rx_group_t *rx_grp; 2832 vnet_pseudo_rx_ring_t *rx_ringp; 2833 mac_perim_handle_t mph1; 2834 int type; 2835 2836 vnetp = vresp->vnetp; 2837 type = vresp->type; 2838 rx_grp = &vnetp->rx_grp[0]; 2839 2840 if (vresp->rx_ringp == NULL) { 2841 return; 2842 } 2843 2844 if (type == VIO_NET_RES_LDC_SERVICE) { 2845 /* 2846 * Ring Index 0 is the default ring in the group and is 2847 * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring 2848 * is allocated statically and is reported to the mac layer 2849 * in vnet_m_capab(). So, all we need to do here, is remove its 2850 * reference to the associated vresp. 2851 */ 2852 rx_ringp = &rx_grp->rings[0]; 2853 rx_ringp->hw_rh = NULL; 2854 vresp->rx_ringp = NULL; 2855 return; 2856 } 2857 ASSERT(type == VIO_NET_RES_LDC_GUEST); 2858 2859 mac_perim_enter_by_mh(vnetp->mh, &mph1); 2860 2861 rx_ringp = (vnet_pseudo_rx_ring_t *)vresp->rx_ringp; 2862 vresp->rx_ringp = NULL; 2863 2864 if (rx_ringp != NULL && (rx_ringp->state & VNET_RXRING_LDC_GUEST)) { 2865 /* Unregister the pseudo ring with upper-mac */ 2866 mac_group_rem_ring(rx_grp->handle, rx_ringp->handle); 2867 2868 rx_ringp->hw_rh = NULL; 2869 rx_ringp->state &= ~VNET_RXRING_LDC_GUEST; 2870 2871 /* Free the pseudo rx ring */ 2872 vnet_free_pseudo_rx_ring(vnetp, rx_ringp); 2873 } 2874 2875 mac_perim_exit(mph1); 2876 } 2877 2878 static void 2879 vnet_unbind_rings(vnet_res_t *vresp) 2880 { 2881 switch (vresp->type) { 2882 2883 case VIO_NET_RES_LDC_SERVICE: 2884 case VIO_NET_RES_LDC_GUEST: 2885 vnet_unbind_vgenring(vresp); 2886 break; 2887 2888 case VIO_NET_RES_HYBRID: 2889 vnet_unbind_hwrings(vresp->vnetp); 2890 break; 2891 2892 default: 2893 break; 2894 2895 } 2896 } 2897 2898 static int 2899 vnet_bind_rings(vnet_res_t *vresp) 2900 { 2901 int rv; 2902 2903 switch (vresp->type) { 2904 2905 case VIO_NET_RES_LDC_SERVICE: 2906 case VIO_NET_RES_LDC_GUEST: 2907 rv = vnet_bind_vgenring(vresp); 2908 break; 2909 2910 case VIO_NET_RES_HYBRID: 2911 rv = vnet_bind_hwrings(vresp->vnetp); 2912 break; 2913 2914 default: 2915 rv = 1; 2916 break; 2917 2918 } 2919 2920 return (rv); 2921 } 2922 2923 /* ARGSUSED */ 2924 int 2925 vnet_hio_stat(void *arg, uint_t stat, uint64_t *val) 2926 { 2927 vnet_t *vnetp = (vnet_t *)arg; 2928 2929 *val = mac_stat_get(vnetp->hio_mh, stat); 2930 return (0); 2931 } 2932 2933 /* 2934 * The start() and stop() routines for the Hybrid resource below, are just 2935 * dummy functions. This is provided to avoid resource type specific code in 2936 * vnet_start_resources() and vnet_stop_resources(). The starting and stopping 2937 * of the Hybrid resource happens in the context of the mac_client interfaces 2938 * that are invoked in vnet_hio_mac_init() and vnet_hio_mac_cleanup(). 2939 */ 2940 /* ARGSUSED */ 2941 static int 2942 vnet_hio_start(void *arg) 2943 { 2944 return (0); 2945 } 2946 2947 /* ARGSUSED */ 2948 static void 2949 vnet_hio_stop(void *arg) 2950 { 2951 } 2952 2953 mblk_t * 2954 vnet_hio_tx(void *arg, mblk_t *mp) 2955 { 2956 vnet_pseudo_tx_ring_t *tx_ringp; 2957 mblk_t *nextp; 2958 mblk_t *ret_mp; 2959 2960 tx_ringp = (vnet_pseudo_tx_ring_t *)arg; 2961 for (;;) { 2962 nextp = mp->b_next; 2963 mp->b_next = NULL; 2964 2965 ret_mp = mac_hwring_tx(tx_ringp->hw_rh, mp); 2966 if (ret_mp != NULL) { 2967 ret_mp->b_next = nextp; 2968 mp = ret_mp; 2969 break; 2970 } 2971 2972 if ((mp = nextp) == NULL) 2973 break; 2974 } 2975 return (mp); 2976 } 2977 2978 static void 2979 vnet_hio_notify_cb(void *arg, mac_notify_type_t type) 2980 { 2981 vnet_t *vnetp = (vnet_t *)arg; 2982 mac_perim_handle_t mph; 2983 2984 mac_perim_enter_by_mh(vnetp->hio_mh, &mph); 2985 switch (type) { 2986 case MAC_NOTE_TX: 2987 vnet_tx_update(vnetp->hio_vhp); 2988 break; 2989 2990 default: 2991 break; 2992 } 2993 mac_perim_exit(mph); 2994 } 2995 2996 #ifdef VNET_IOC_DEBUG 2997 2998 /* 2999 * The ioctl entry point is used only for debugging for now. The ioctl commands 3000 * can be used to force the link state of the channel connected to vsw. 3001 */ 3002 static void 3003 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp) 3004 { 3005 struct iocblk *iocp; 3006 vnet_t *vnetp; 3007 3008 iocp = (struct iocblk *)(uintptr_t)mp->b_rptr; 3009 iocp->ioc_error = 0; 3010 vnetp = (vnet_t *)arg; 3011 3012 if (vnetp == NULL) { 3013 miocnak(q, mp, 0, EINVAL); 3014 return; 3015 } 3016 3017 switch (iocp->ioc_cmd) { 3018 3019 case VNET_FORCE_LINK_DOWN: 3020 case VNET_FORCE_LINK_UP: 3021 vnet_force_link_state(vnetp, q, mp); 3022 break; 3023 3024 default: 3025 iocp->ioc_error = EINVAL; 3026 miocnak(q, mp, 0, iocp->ioc_error); 3027 break; 3028 3029 } 3030 } 3031 3032 static void 3033 vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp) 3034 { 3035 mac_register_t *macp; 3036 mac_callbacks_t *cbp; 3037 vnet_res_t *vresp; 3038 3039 READ_ENTER(&vnetp->vsw_fp_rw); 3040 3041 vresp = vnetp->vsw_fp; 3042 if (vresp == NULL) { 3043 RW_EXIT(&vnetp->vsw_fp_rw); 3044 return; 3045 } 3046 3047 macp = &vresp->macreg; 3048 cbp = macp->m_callbacks; 3049 cbp->mc_ioctl(macp->m_driver, q, mp); 3050 3051 RW_EXIT(&vnetp->vsw_fp_rw); 3052 } 3053 3054 #else 3055 3056 static void 3057 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp) 3058 { 3059 vnet_t *vnetp; 3060 3061 vnetp = (vnet_t *)arg; 3062 3063 if (vnetp == NULL) { 3064 miocnak(q, mp, 0, EINVAL); 3065 return; 3066 } 3067 3068 /* ioctl support only for debugging */ 3069 miocnak(q, mp, 0, ENOTSUP); 3070 } 3071 3072 #endif 3073