1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/errno.h> 29 #include <sys/param.h> 30 #include <sys/callb.h> 31 #include <sys/stream.h> 32 #include <sys/kmem.h> 33 #include <sys/conf.h> 34 #include <sys/devops.h> 35 #include <sys/ksynch.h> 36 #include <sys/stat.h> 37 #include <sys/modctl.h> 38 #include <sys/modhash.h> 39 #include <sys/debug.h> 40 #include <sys/ethernet.h> 41 #include <sys/dlpi.h> 42 #include <net/if.h> 43 #include <sys/mac_provider.h> 44 #include <sys/mac_client.h> 45 #include <sys/mac_client_priv.h> 46 #include <sys/mac_ether.h> 47 #include <sys/ddi.h> 48 #include <sys/sunddi.h> 49 #include <sys/strsun.h> 50 #include <sys/note.h> 51 #include <sys/atomic.h> 52 #include <sys/vnet.h> 53 #include <sys/vlan.h> 54 #include <sys/vnet_mailbox.h> 55 #include <sys/vnet_common.h> 56 #include <sys/dds.h> 57 #include <sys/strsubr.h> 58 #include <sys/taskq.h> 59 60 /* 61 * Function prototypes. 62 */ 63 64 /* DDI entrypoints */ 65 static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 66 static int vnetattach(dev_info_t *, ddi_attach_cmd_t); 67 static int vnetdetach(dev_info_t *, ddi_detach_cmd_t); 68 69 /* MAC entrypoints */ 70 static int vnet_m_stat(void *, uint_t, uint64_t *); 71 static int vnet_m_start(void *); 72 static void vnet_m_stop(void *); 73 static int vnet_m_promisc(void *, boolean_t); 74 static int vnet_m_multicst(void *, boolean_t, const uint8_t *); 75 static int vnet_m_unicst(void *, const uint8_t *); 76 mblk_t *vnet_m_tx(void *, mblk_t *); 77 static void vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp); 78 #ifdef VNET_IOC_DEBUG 79 static void vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp); 80 #endif 81 static boolean_t vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data); 82 static void vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index, 83 const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle); 84 static void vnet_get_group(void *arg, mac_ring_type_t type, const int index, 85 mac_group_info_t *infop, mac_group_handle_t handle); 86 static int vnet_rx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num); 87 static void vnet_rx_ring_stop(mac_ring_driver_t rdriver); 88 static int vnet_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, 89 uint64_t *val); 90 static int vnet_tx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num); 91 static void vnet_tx_ring_stop(mac_ring_driver_t rdriver); 92 static int vnet_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, 93 uint64_t *val); 94 static int vnet_ring_enable_intr(void *arg); 95 static int vnet_ring_disable_intr(void *arg); 96 static mblk_t *vnet_rx_poll(void *arg, int bytes_to_pickup); 97 static int vnet_addmac(void *arg, const uint8_t *mac_addr); 98 static int vnet_remmac(void *arg, const uint8_t *mac_addr); 99 100 /* vnet internal functions */ 101 static int vnet_unattach(vnet_t *vnetp); 102 static void vnet_ring_grp_init(vnet_t *vnetp); 103 static void vnet_ring_grp_uninit(vnet_t *vnetp); 104 static int vnet_mac_register(vnet_t *); 105 static int vnet_read_mac_address(vnet_t *vnetp); 106 static int vnet_bind_vgenring(vnet_res_t *vresp); 107 static void vnet_unbind_vgenring(vnet_res_t *vresp); 108 static int vnet_bind_hwrings(vnet_t *vnetp); 109 static void vnet_unbind_hwrings(vnet_t *vnetp); 110 static int vnet_bind_rings(vnet_res_t *vresp); 111 static void vnet_unbind_rings(vnet_res_t *vresp); 112 static int vnet_hio_stat(void *, uint_t, uint64_t *); 113 static int vnet_hio_start(void *); 114 static void vnet_hio_stop(void *); 115 mblk_t *vnet_hio_tx(void *, mblk_t *); 116 117 /* Forwarding database (FDB) routines */ 118 static void vnet_fdb_create(vnet_t *vnetp); 119 static void vnet_fdb_destroy(vnet_t *vnetp); 120 static vnet_res_t *vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp); 121 static void vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val); 122 void vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp); 123 static void vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp); 124 125 static void vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp); 126 static void vnet_rx(vio_net_handle_t vrh, mblk_t *mp); 127 static void vnet_tx_update(vio_net_handle_t vrh); 128 static void vnet_res_start_task(void *arg); 129 static void vnet_start_resources(vnet_t *vnetp); 130 static void vnet_stop_resources(vnet_t *vnetp); 131 static void vnet_dispatch_res_task(vnet_t *vnetp); 132 static void vnet_res_start_task(void *arg); 133 static void vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err); 134 static void vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp); 135 static vnet_res_t *vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp); 136 static void vnet_tx_notify_thread(void *); 137 138 /* Exported to vnet_gen */ 139 int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu); 140 void vnet_link_update(vnet_t *vnetp, link_state_t link_state); 141 void vnet_dds_cleanup_hio(vnet_t *vnetp); 142 143 static kstat_t *vnet_hio_setup_kstats(char *ks_mod, char *ks_name, 144 vnet_res_t *vresp); 145 static int vnet_hio_update_kstats(kstat_t *ksp, int rw); 146 static void vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp); 147 static void vnet_hio_destroy_kstats(kstat_t *ksp); 148 149 /* Exported to to vnet_dds */ 150 int vnet_send_dds_msg(vnet_t *vnetp, void *dmsg); 151 int vnet_hio_mac_init(vnet_t *vnetp, char *ifname); 152 void vnet_hio_mac_cleanup(vnet_t *vnetp); 153 154 /* Externs that are imported from vnet_gen */ 155 extern int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip, 156 const uint8_t *macaddr, void **vgenhdl); 157 extern int vgen_init_mdeg(void *arg); 158 extern void vgen_uninit(void *arg); 159 extern int vgen_dds_tx(void *arg, void *dmsg); 160 extern int vgen_enable_intr(void *arg); 161 extern int vgen_disable_intr(void *arg); 162 extern mblk_t *vgen_rx_poll(void *arg, int bytes_to_pickup); 163 164 /* Externs that are imported from vnet_dds */ 165 extern void vdds_mod_init(void); 166 extern void vdds_mod_fini(void); 167 extern int vdds_init(vnet_t *vnetp); 168 extern void vdds_cleanup(vnet_t *vnetp); 169 extern void vdds_process_dds_msg(vnet_t *vnetp, vio_dds_msg_t *dmsg); 170 extern void vdds_cleanup_hybrid_res(void *arg); 171 extern void vdds_cleanup_hio(vnet_t *vnetp); 172 173 extern pri_t minclsyspri; 174 175 #define DRV_NAME "vnet" 176 #define VNET_FDBE_REFHOLD(p) \ 177 { \ 178 atomic_inc_32(&(p)->refcnt); \ 179 ASSERT((p)->refcnt != 0); \ 180 } 181 182 #define VNET_FDBE_REFRELE(p) \ 183 { \ 184 ASSERT((p)->refcnt != 0); \ 185 atomic_dec_32(&(p)->refcnt); \ 186 } 187 188 #ifdef VNET_IOC_DEBUG 189 #define VNET_M_CALLBACK_FLAGS (MC_IOCTL | MC_GETCAPAB) 190 #else 191 #define VNET_M_CALLBACK_FLAGS (MC_GETCAPAB) 192 #endif 193 194 static mac_callbacks_t vnet_m_callbacks = { 195 VNET_M_CALLBACK_FLAGS, 196 vnet_m_stat, 197 vnet_m_start, 198 vnet_m_stop, 199 vnet_m_promisc, 200 vnet_m_multicst, 201 NULL, /* m_unicst entry must be NULL while rx rings are exposed */ 202 NULL, /* m_tx entry must be NULL while tx rings are exposed */ 203 NULL, 204 vnet_m_ioctl, 205 vnet_m_capab, 206 NULL 207 }; 208 209 static mac_callbacks_t vnet_hio_res_callbacks = { 210 0, 211 vnet_hio_stat, 212 vnet_hio_start, 213 vnet_hio_stop, 214 NULL, 215 NULL, 216 NULL, 217 vnet_hio_tx, 218 NULL, 219 NULL, 220 NULL 221 }; 222 223 /* 224 * Linked list of "vnet_t" structures - one per instance. 225 */ 226 static vnet_t *vnet_headp = NULL; 227 static krwlock_t vnet_rw; 228 229 /* Tunables */ 230 uint32_t vnet_num_descriptors = VNET_NUM_DESCRIPTORS; 231 232 /* 233 * Configure tx serialization in mac layer for the vnet device. This tunable 234 * should be enabled to improve performance only if HybridIO is configured for 235 * the vnet device. 236 */ 237 boolean_t vnet_mac_tx_serialize = B_FALSE; 238 239 /* Configure enqueing at Rx soft rings in mac layer for the vnet device */ 240 boolean_t vnet_mac_rx_queuing = B_TRUE; 241 242 /* 243 * Set this to non-zero to enable additional internal receive buffer pools 244 * based on the MTU of the device for better performance at the cost of more 245 * memory consumption. This is turned off by default, to use allocb(9F) for 246 * receive buffer allocations of sizes > 2K. 247 */ 248 boolean_t vnet_jumbo_rxpools = B_FALSE; 249 250 /* # of chains in fdb hash table */ 251 uint32_t vnet_fdb_nchains = VNET_NFDB_HASH; 252 253 /* Internal tunables */ 254 uint32_t vnet_ethermtu = 1500; /* mtu of the device */ 255 256 /* 257 * Default vlan id. This is only used internally when the "default-vlan-id" 258 * property is not present in the MD device node. Therefore, this should not be 259 * used as a tunable; if this value is changed, the corresponding variable 260 * should be updated to the same value in vsw and also other vnets connected to 261 * the same vsw. 262 */ 263 uint16_t vnet_default_vlan_id = 1; 264 265 /* delay in usec to wait for all references on a fdb entry to be dropped */ 266 uint32_t vnet_fdbe_refcnt_delay = 10; 267 268 static struct ether_addr etherbroadcastaddr = { 269 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 270 }; 271 272 /* mac_open() retry delay in usec */ 273 uint32_t vnet_mac_open_delay = 100; /* 0.1 ms */ 274 275 /* max # of mac_open() retries */ 276 uint32_t vnet_mac_open_retries = 100; 277 278 /* 279 * Property names 280 */ 281 static char macaddr_propname[] = "local-mac-address"; 282 283 /* 284 * This is the string displayed by modinfo(1m). 285 */ 286 static char vnet_ident[] = "vnet driver"; 287 extern struct mod_ops mod_driverops; 288 static struct cb_ops cb_vnetops = { 289 nulldev, /* cb_open */ 290 nulldev, /* cb_close */ 291 nodev, /* cb_strategy */ 292 nodev, /* cb_print */ 293 nodev, /* cb_dump */ 294 nodev, /* cb_read */ 295 nodev, /* cb_write */ 296 nodev, /* cb_ioctl */ 297 nodev, /* cb_devmap */ 298 nodev, /* cb_mmap */ 299 nodev, /* cb_segmap */ 300 nochpoll, /* cb_chpoll */ 301 ddi_prop_op, /* cb_prop_op */ 302 NULL, /* cb_stream */ 303 (int)(D_MP) /* cb_flag */ 304 }; 305 306 static struct dev_ops vnetops = { 307 DEVO_REV, /* devo_rev */ 308 0, /* devo_refcnt */ 309 NULL, /* devo_getinfo */ 310 nulldev, /* devo_identify */ 311 nulldev, /* devo_probe */ 312 vnetattach, /* devo_attach */ 313 vnetdetach, /* devo_detach */ 314 nodev, /* devo_reset */ 315 &cb_vnetops, /* devo_cb_ops */ 316 (struct bus_ops *)NULL, /* devo_bus_ops */ 317 NULL, /* devo_power */ 318 ddi_quiesce_not_supported, /* devo_quiesce */ 319 }; 320 321 static struct modldrv modldrv = { 322 &mod_driverops, /* Type of module. This one is a driver */ 323 vnet_ident, /* ID string */ 324 &vnetops /* driver specific ops */ 325 }; 326 327 static struct modlinkage modlinkage = { 328 MODREV_1, (void *)&modldrv, NULL 329 }; 330 331 #ifdef DEBUG 332 333 #define DEBUG_PRINTF debug_printf 334 335 /* 336 * Print debug messages - set to 0xf to enable all msgs 337 */ 338 int vnet_dbglevel = 0x8; 339 340 static void 341 debug_printf(const char *fname, void *arg, const char *fmt, ...) 342 { 343 char buf[512]; 344 va_list ap; 345 vnet_t *vnetp = (vnet_t *)arg; 346 char *bufp = buf; 347 348 if (vnetp == NULL) { 349 (void) sprintf(bufp, "%s: ", fname); 350 bufp += strlen(bufp); 351 } else { 352 (void) sprintf(bufp, "vnet%d:%s: ", vnetp->instance, fname); 353 bufp += strlen(bufp); 354 } 355 va_start(ap, fmt); 356 (void) vsprintf(bufp, fmt, ap); 357 va_end(ap); 358 cmn_err(CE_CONT, "%s\n", buf); 359 } 360 361 #endif 362 363 /* _init(9E): initialize the loadable module */ 364 int 365 _init(void) 366 { 367 int status; 368 369 DBG1(NULL, "enter\n"); 370 371 mac_init_ops(&vnetops, "vnet"); 372 status = mod_install(&modlinkage); 373 if (status != 0) { 374 mac_fini_ops(&vnetops); 375 } 376 vdds_mod_init(); 377 DBG1(NULL, "exit(%d)\n", status); 378 return (status); 379 } 380 381 /* _fini(9E): prepare the module for unloading. */ 382 int 383 _fini(void) 384 { 385 int status; 386 387 DBG1(NULL, "enter\n"); 388 389 status = mod_remove(&modlinkage); 390 if (status != 0) 391 return (status); 392 mac_fini_ops(&vnetops); 393 vdds_mod_fini(); 394 395 DBG1(NULL, "exit(%d)\n", status); 396 return (status); 397 } 398 399 /* _info(9E): return information about the loadable module */ 400 int 401 _info(struct modinfo *modinfop) 402 { 403 return (mod_info(&modlinkage, modinfop)); 404 } 405 406 /* 407 * attach(9E): attach a device to the system. 408 * called once for each instance of the device on the system. 409 */ 410 static int 411 vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd) 412 { 413 vnet_t *vnetp; 414 int status; 415 int instance; 416 uint64_t reg; 417 char qname[TASKQ_NAMELEN]; 418 vnet_attach_progress_t attach_progress; 419 420 attach_progress = AST_init; 421 422 switch (cmd) { 423 case DDI_ATTACH: 424 break; 425 case DDI_RESUME: 426 case DDI_PM_RESUME: 427 default: 428 goto vnet_attach_fail; 429 } 430 431 instance = ddi_get_instance(dip); 432 DBG1(NULL, "instance(%d) enter\n", instance); 433 434 /* allocate vnet_t and mac_t structures */ 435 vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP); 436 vnetp->dip = dip; 437 vnetp->instance = instance; 438 rw_init(&vnetp->vrwlock, NULL, RW_DRIVER, NULL); 439 rw_init(&vnetp->vsw_fp_rw, NULL, RW_DRIVER, NULL); 440 attach_progress |= AST_vnet_alloc; 441 442 vnet_ring_grp_init(vnetp); 443 attach_progress |= AST_ring_init; 444 445 status = vdds_init(vnetp); 446 if (status != 0) { 447 goto vnet_attach_fail; 448 } 449 attach_progress |= AST_vdds_init; 450 451 /* setup links to vnet_t from both devinfo and mac_t */ 452 ddi_set_driver_private(dip, (caddr_t)vnetp); 453 454 /* read the mac address */ 455 status = vnet_read_mac_address(vnetp); 456 if (status != DDI_SUCCESS) { 457 goto vnet_attach_fail; 458 } 459 attach_progress |= AST_read_macaddr; 460 461 reg = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 462 DDI_PROP_DONTPASS, "reg", -1); 463 if (reg == -1) { 464 goto vnet_attach_fail; 465 } 466 vnetp->reg = reg; 467 468 vnet_fdb_create(vnetp); 469 attach_progress |= AST_fdbh_alloc; 470 471 (void) snprintf(qname, TASKQ_NAMELEN, "vres_taskq%d", instance); 472 if ((vnetp->taskqp = ddi_taskq_create(dip, qname, 1, 473 TASKQ_DEFAULTPRI, 0)) == NULL) { 474 cmn_err(CE_WARN, "!vnet%d: Unable to create task queue", 475 instance); 476 goto vnet_attach_fail; 477 } 478 attach_progress |= AST_taskq_create; 479 480 /* add to the list of vnet devices */ 481 WRITE_ENTER(&vnet_rw); 482 vnetp->nextp = vnet_headp; 483 vnet_headp = vnetp; 484 RW_EXIT(&vnet_rw); 485 486 attach_progress |= AST_vnet_list; 487 488 /* 489 * Initialize the generic vnet plugin which provides communication via 490 * sun4v LDC (logical domain channel) based resources. This involves 2 491 * steps; first, vgen_init() is invoked to read the various properties 492 * of the vnet device from its MD node (including its mtu which is 493 * needed to mac_register()) and obtain a handle to the vgen layer. 494 * After mac_register() is done and we have a mac handle, we then 495 * invoke vgen_init_mdeg() which registers with the the MD event 496 * generator (mdeg) framework to allow LDC resource notifications. 497 * Note: this sequence also allows us to report the correct default # 498 * of pseudo rings (2TX and 3RX) in vnet_m_capab() which gets invoked 499 * in the context of mac_register(); and avoids conflicting with 500 * dynamic pseudo rx rings which get added/removed as a result of mdeg 501 * events in vgen. 502 */ 503 status = vgen_init(vnetp, reg, vnetp->dip, 504 (uint8_t *)vnetp->curr_macaddr, &vnetp->vgenhdl); 505 if (status != DDI_SUCCESS) { 506 DERR(vnetp, "vgen_init() failed\n"); 507 goto vnet_attach_fail; 508 } 509 attach_progress |= AST_vgen_init; 510 511 status = vnet_mac_register(vnetp); 512 if (status != DDI_SUCCESS) { 513 goto vnet_attach_fail; 514 } 515 vnetp->link_state = LINK_STATE_UNKNOWN; 516 attach_progress |= AST_macreg; 517 518 status = vgen_init_mdeg(vnetp->vgenhdl); 519 if (status != DDI_SUCCESS) { 520 goto vnet_attach_fail; 521 } 522 attach_progress |= AST_init_mdeg; 523 524 vnetp->attach_progress = attach_progress; 525 526 DBG1(NULL, "instance(%d) exit\n", instance); 527 return (DDI_SUCCESS); 528 529 vnet_attach_fail: 530 vnetp->attach_progress = attach_progress; 531 status = vnet_unattach(vnetp); 532 ASSERT(status == 0); 533 return (DDI_FAILURE); 534 } 535 536 /* 537 * detach(9E): detach a device from the system. 538 */ 539 static int 540 vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd) 541 { 542 vnet_t *vnetp; 543 int instance; 544 545 instance = ddi_get_instance(dip); 546 DBG1(NULL, "instance(%d) enter\n", instance); 547 548 vnetp = ddi_get_driver_private(dip); 549 if (vnetp == NULL) { 550 goto vnet_detach_fail; 551 } 552 553 switch (cmd) { 554 case DDI_DETACH: 555 break; 556 case DDI_SUSPEND: 557 case DDI_PM_SUSPEND: 558 default: 559 goto vnet_detach_fail; 560 } 561 562 if (vnet_unattach(vnetp) != 0) { 563 goto vnet_detach_fail; 564 } 565 566 return (DDI_SUCCESS); 567 568 vnet_detach_fail: 569 return (DDI_FAILURE); 570 } 571 572 /* 573 * Common routine to handle vnetattach() failure and vnetdetach(). Note that 574 * the only reason this function could fail is if mac_unregister() fails. 575 * Otherwise, this function must ensure that all resources are freed and return 576 * success. 577 */ 578 static int 579 vnet_unattach(vnet_t *vnetp) 580 { 581 vnet_attach_progress_t attach_progress; 582 583 attach_progress = vnetp->attach_progress; 584 585 /* 586 * Disable the mac device in the gldv3 subsystem. This can fail, in 587 * particular if there are still any open references to this mac 588 * device; in which case we just return failure without continuing to 589 * detach further. 590 * If it succeeds, we then invoke vgen_uninit() which should unregister 591 * any pseudo rings registered with the mac layer. Note we keep the 592 * AST_macreg flag on, so we can unregister with the mac layer at 593 * the end of this routine. 594 */ 595 if (attach_progress & AST_macreg) { 596 if (mac_disable(vnetp->mh) != 0) { 597 return (1); 598 } 599 } 600 601 /* 602 * Now that we have disabled the device, we must finish all other steps 603 * and successfully return from this function; otherwise we will end up 604 * leaving the device in a broken/unusable state. 605 * 606 * First, release any hybrid resources assigned to this vnet device. 607 */ 608 if (attach_progress & AST_vdds_init) { 609 vdds_cleanup(vnetp); 610 attach_progress &= ~AST_vdds_init; 611 } 612 613 /* 614 * Uninit vgen. This stops further mdeg callbacks to this vnet 615 * device and/or its ports; and detaches any existing ports. 616 */ 617 if (attach_progress & (AST_vgen_init|AST_init_mdeg)) { 618 vgen_uninit(vnetp->vgenhdl); 619 attach_progress &= ~AST_vgen_init; 620 attach_progress &= ~AST_init_mdeg; 621 } 622 623 /* Destroy the taskq. */ 624 if (attach_progress & AST_taskq_create) { 625 ddi_taskq_destroy(vnetp->taskqp); 626 attach_progress &= ~AST_taskq_create; 627 } 628 629 /* Destroy fdb. */ 630 if (attach_progress & AST_fdbh_alloc) { 631 vnet_fdb_destroy(vnetp); 632 attach_progress &= ~AST_fdbh_alloc; 633 } 634 635 /* Remove from the device list */ 636 if (attach_progress & AST_vnet_list) { 637 vnet_t **vnetpp; 638 /* unlink from instance(vnet_t) list */ 639 WRITE_ENTER(&vnet_rw); 640 for (vnetpp = &vnet_headp; *vnetpp; 641 vnetpp = &(*vnetpp)->nextp) { 642 if (*vnetpp == vnetp) { 643 *vnetpp = vnetp->nextp; 644 break; 645 } 646 } 647 RW_EXIT(&vnet_rw); 648 attach_progress &= ~AST_vnet_list; 649 } 650 651 if (attach_progress & AST_ring_init) { 652 vnet_ring_grp_uninit(vnetp); 653 attach_progress &= ~AST_ring_init; 654 } 655 656 if (attach_progress & AST_macreg) { 657 VERIFY(mac_unregister(vnetp->mh) == 0); 658 vnetp->mh = NULL; 659 attach_progress &= ~AST_macreg; 660 } 661 662 if (attach_progress & AST_vnet_alloc) { 663 rw_destroy(&vnetp->vrwlock); 664 rw_destroy(&vnetp->vsw_fp_rw); 665 attach_progress &= ~AST_vnet_list; 666 KMEM_FREE(vnetp); 667 } 668 669 return (0); 670 } 671 672 /* enable the device for transmit/receive */ 673 static int 674 vnet_m_start(void *arg) 675 { 676 vnet_t *vnetp = arg; 677 678 DBG1(vnetp, "enter\n"); 679 680 WRITE_ENTER(&vnetp->vrwlock); 681 vnetp->flags |= VNET_STARTED; 682 vnet_start_resources(vnetp); 683 RW_EXIT(&vnetp->vrwlock); 684 685 DBG1(vnetp, "exit\n"); 686 return (VNET_SUCCESS); 687 688 } 689 690 /* stop transmit/receive for the device */ 691 static void 692 vnet_m_stop(void *arg) 693 { 694 vnet_t *vnetp = arg; 695 696 DBG1(vnetp, "enter\n"); 697 698 WRITE_ENTER(&vnetp->vrwlock); 699 if (vnetp->flags & VNET_STARTED) { 700 /* 701 * Set the flags appropriately; this should prevent starting of 702 * any new resources that are added(see vnet_res_start_task()), 703 * while we release the vrwlock in vnet_stop_resources() before 704 * stopping each resource. 705 */ 706 vnetp->flags &= ~VNET_STARTED; 707 vnetp->flags |= VNET_STOPPING; 708 vnet_stop_resources(vnetp); 709 vnetp->flags &= ~VNET_STOPPING; 710 } 711 RW_EXIT(&vnetp->vrwlock); 712 713 DBG1(vnetp, "exit\n"); 714 } 715 716 /* set the unicast mac address of the device */ 717 static int 718 vnet_m_unicst(void *arg, const uint8_t *macaddr) 719 { 720 _NOTE(ARGUNUSED(macaddr)) 721 722 vnet_t *vnetp = arg; 723 724 DBG1(vnetp, "enter\n"); 725 /* 726 * NOTE: setting mac address dynamically is not supported. 727 */ 728 DBG1(vnetp, "exit\n"); 729 730 return (VNET_FAILURE); 731 } 732 733 /* enable/disable a multicast address */ 734 static int 735 vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca) 736 { 737 _NOTE(ARGUNUSED(add, mca)) 738 739 vnet_t *vnetp = arg; 740 vnet_res_t *vresp; 741 mac_register_t *macp; 742 mac_callbacks_t *cbp; 743 int rv = VNET_SUCCESS; 744 745 DBG1(vnetp, "enter\n"); 746 747 READ_ENTER(&vnetp->vsw_fp_rw); 748 if (vnetp->vsw_fp == NULL) { 749 RW_EXIT(&vnetp->vsw_fp_rw); 750 return (EAGAIN); 751 } 752 VNET_FDBE_REFHOLD(vnetp->vsw_fp); 753 RW_EXIT(&vnetp->vsw_fp_rw); 754 755 vresp = vnetp->vsw_fp; 756 macp = &vresp->macreg; 757 cbp = macp->m_callbacks; 758 rv = cbp->mc_multicst(macp->m_driver, add, mca); 759 760 VNET_FDBE_REFRELE(vnetp->vsw_fp); 761 762 DBG1(vnetp, "exit(%d)\n", rv); 763 return (rv); 764 } 765 766 /* set or clear promiscuous mode on the device */ 767 static int 768 vnet_m_promisc(void *arg, boolean_t on) 769 { 770 _NOTE(ARGUNUSED(on)) 771 772 vnet_t *vnetp = arg; 773 DBG1(vnetp, "enter\n"); 774 /* 775 * NOTE: setting promiscuous mode is not supported, just return success. 776 */ 777 DBG1(vnetp, "exit\n"); 778 return (VNET_SUCCESS); 779 } 780 781 /* 782 * Transmit a chain of packets. This function provides switching functionality 783 * based on the destination mac address to reach other guests (within ldoms) or 784 * external hosts. 785 */ 786 mblk_t * 787 vnet_tx_ring_send(void *arg, mblk_t *mp) 788 { 789 vnet_pseudo_tx_ring_t *tx_ringp; 790 vnet_tx_ring_stats_t *statsp; 791 vnet_t *vnetp; 792 vnet_res_t *vresp; 793 mblk_t *next; 794 mblk_t *resid_mp; 795 mac_register_t *macp; 796 struct ether_header *ehp; 797 boolean_t is_unicast; 798 boolean_t is_pvid; /* non-default pvid ? */ 799 boolean_t hres; /* Hybrid resource ? */ 800 void *tx_arg; 801 size_t size; 802 803 tx_ringp = (vnet_pseudo_tx_ring_t *)arg; 804 statsp = &tx_ringp->tx_ring_stats; 805 vnetp = (vnet_t *)tx_ringp->vnetp; 806 DBG1(vnetp, "enter\n"); 807 ASSERT(mp != NULL); 808 809 is_pvid = (vnetp->pvid != vnetp->default_vlan_id) ? B_TRUE : B_FALSE; 810 811 while (mp != NULL) { 812 813 next = mp->b_next; 814 mp->b_next = NULL; 815 816 /* update stats */ 817 size = msgsize(mp); 818 819 /* 820 * Find fdb entry for the destination 821 * and hold a reference to it. 822 */ 823 ehp = (struct ether_header *)mp->b_rptr; 824 vresp = vnet_fdbe_find(vnetp, &ehp->ether_dhost); 825 if (vresp != NULL) { 826 827 /* 828 * Destination found in FDB. 829 * The destination is a vnet device within ldoms 830 * and directly reachable, invoke the tx function 831 * in the fdb entry. 832 */ 833 macp = &vresp->macreg; 834 resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp); 835 836 /* tx done; now release ref on fdb entry */ 837 VNET_FDBE_REFRELE(vresp); 838 839 if (resid_mp != NULL) { 840 /* m_tx failed */ 841 mp->b_next = next; 842 break; 843 } 844 } else { 845 is_unicast = !(IS_BROADCAST(ehp) || 846 (IS_MULTICAST(ehp))); 847 /* 848 * Destination is not in FDB. 849 * If the destination is broadcast or multicast, 850 * then forward the packet to vswitch. 851 * If a Hybrid resource avilable, then send the 852 * unicast packet via hybrid resource, otherwise 853 * forward it to vswitch. 854 */ 855 READ_ENTER(&vnetp->vsw_fp_rw); 856 857 if ((is_unicast) && (vnetp->hio_fp != NULL)) { 858 vresp = vnetp->hio_fp; 859 hres = B_TRUE; 860 } else { 861 vresp = vnetp->vsw_fp; 862 hres = B_FALSE; 863 } 864 if (vresp == NULL) { 865 /* 866 * no fdb entry to vsw? drop the packet. 867 */ 868 RW_EXIT(&vnetp->vsw_fp_rw); 869 freemsg(mp); 870 mp = next; 871 continue; 872 } 873 874 /* ref hold the fdb entry to vsw */ 875 VNET_FDBE_REFHOLD(vresp); 876 877 RW_EXIT(&vnetp->vsw_fp_rw); 878 879 /* 880 * In the case of a hybrid resource we need to insert 881 * the tag for the pvid case here; unlike packets that 882 * are destined to a vnet/vsw in which case the vgen 883 * layer does the tagging before sending it over ldc. 884 */ 885 if (hres == B_TRUE) { 886 /* 887 * Determine if the frame being transmitted 888 * over the hybrid resource is untagged. If so, 889 * insert the tag before transmitting. 890 */ 891 if (is_pvid == B_TRUE && 892 ehp->ether_type != htons(ETHERTYPE_VLAN)) { 893 894 mp = vnet_vlan_insert_tag(mp, 895 vnetp->pvid); 896 if (mp == NULL) { 897 VNET_FDBE_REFRELE(vresp); 898 mp = next; 899 continue; 900 } 901 902 } 903 904 macp = &vresp->macreg; 905 tx_arg = tx_ringp; 906 } else { 907 macp = &vresp->macreg; 908 tx_arg = macp->m_driver; 909 } 910 resid_mp = macp->m_callbacks->mc_tx(tx_arg, mp); 911 912 /* tx done; now release ref on fdb entry */ 913 VNET_FDBE_REFRELE(vresp); 914 915 if (resid_mp != NULL) { 916 /* m_tx failed */ 917 mp->b_next = next; 918 break; 919 } 920 } 921 922 statsp->obytes += size; 923 statsp->opackets++; 924 mp = next; 925 } 926 927 DBG1(vnetp, "exit\n"); 928 return (mp); 929 } 930 931 /* get statistics from the device */ 932 int 933 vnet_m_stat(void *arg, uint_t stat, uint64_t *val) 934 { 935 vnet_t *vnetp = arg; 936 vnet_res_t *vresp; 937 mac_register_t *macp; 938 mac_callbacks_t *cbp; 939 uint64_t val_total = 0; 940 941 DBG1(vnetp, "enter\n"); 942 943 /* 944 * get the specified statistic from each transport and return the 945 * aggregate val. This obviously only works for counters. 946 */ 947 if ((IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat)) || 948 (IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat))) { 949 return (ENOTSUP); 950 } 951 952 READ_ENTER(&vnetp->vrwlock); 953 for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) { 954 macp = &vresp->macreg; 955 cbp = macp->m_callbacks; 956 if (cbp->mc_getstat(macp->m_driver, stat, val) == 0) 957 val_total += *val; 958 } 959 RW_EXIT(&vnetp->vrwlock); 960 961 *val = val_total; 962 963 DBG1(vnetp, "exit\n"); 964 return (0); 965 } 966 967 static void 968 vnet_ring_grp_init(vnet_t *vnetp) 969 { 970 vnet_pseudo_rx_group_t *rx_grp; 971 vnet_pseudo_rx_ring_t *rx_ringp; 972 vnet_pseudo_tx_group_t *tx_grp; 973 vnet_pseudo_tx_ring_t *tx_ringp; 974 int i; 975 976 tx_grp = &vnetp->tx_grp[0]; 977 tx_ringp = kmem_zalloc(sizeof (vnet_pseudo_tx_ring_t) * 978 VNET_NUM_PSEUDO_TXRINGS, KM_SLEEP); 979 for (i = 0; i < VNET_NUM_PSEUDO_TXRINGS; i++) { 980 tx_ringp[i].state |= VNET_TXRING_SHARED; 981 } 982 tx_grp->rings = tx_ringp; 983 tx_grp->ring_cnt = VNET_NUM_PSEUDO_TXRINGS; 984 mutex_init(&tx_grp->flowctl_lock, NULL, MUTEX_DRIVER, NULL); 985 cv_init(&tx_grp->flowctl_cv, NULL, CV_DRIVER, NULL); 986 tx_grp->flowctl_thread = thread_create(NULL, 0, 987 vnet_tx_notify_thread, tx_grp, 0, &p0, TS_RUN, minclsyspri); 988 989 rx_grp = &vnetp->rx_grp[0]; 990 rx_grp->max_ring_cnt = MAX_RINGS_PER_GROUP; 991 rw_init(&rx_grp->lock, NULL, RW_DRIVER, NULL); 992 rx_ringp = kmem_zalloc(sizeof (vnet_pseudo_rx_ring_t) * 993 rx_grp->max_ring_cnt, KM_SLEEP); 994 995 /* 996 * Setup the first 3 Pseudo RX Rings that are reserved; 997 * 1 for LDC resource to vswitch + 2 for RX rings of Hybrid resource. 998 */ 999 rx_ringp[0].state |= VNET_RXRING_INUSE|VNET_RXRING_LDC_SERVICE; 1000 rx_ringp[0].index = 0; 1001 rx_ringp[1].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID; 1002 rx_ringp[1].index = 1; 1003 rx_ringp[2].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID; 1004 rx_ringp[2].index = 2; 1005 1006 rx_grp->ring_cnt = VNET_NUM_PSEUDO_RXRINGS_DEFAULT; 1007 rx_grp->rings = rx_ringp; 1008 1009 for (i = VNET_NUM_PSEUDO_RXRINGS_DEFAULT; 1010 i < rx_grp->max_ring_cnt; i++) { 1011 rx_ringp = &rx_grp->rings[i]; 1012 rx_ringp->state = VNET_RXRING_FREE; 1013 rx_ringp->index = i; 1014 } 1015 } 1016 1017 static void 1018 vnet_ring_grp_uninit(vnet_t *vnetp) 1019 { 1020 vnet_pseudo_rx_group_t *rx_grp; 1021 vnet_pseudo_tx_group_t *tx_grp; 1022 kt_did_t tid = 0; 1023 1024 tx_grp = &vnetp->tx_grp[0]; 1025 1026 /* Inform tx_notify_thread to exit */ 1027 mutex_enter(&tx_grp->flowctl_lock); 1028 if (tx_grp->flowctl_thread != NULL) { 1029 tid = tx_grp->flowctl_thread->t_did; 1030 tx_grp->flowctl_done = B_TRUE; 1031 cv_signal(&tx_grp->flowctl_cv); 1032 } 1033 mutex_exit(&tx_grp->flowctl_lock); 1034 if (tid != 0) 1035 thread_join(tid); 1036 1037 if (tx_grp->rings != NULL) { 1038 ASSERT(tx_grp->ring_cnt == VNET_NUM_PSEUDO_TXRINGS); 1039 kmem_free(tx_grp->rings, sizeof (vnet_pseudo_tx_ring_t) * 1040 tx_grp->ring_cnt); 1041 tx_grp->rings = NULL; 1042 } 1043 1044 rx_grp = &vnetp->rx_grp[0]; 1045 if (rx_grp->rings != NULL) { 1046 ASSERT(rx_grp->max_ring_cnt == MAX_RINGS_PER_GROUP); 1047 ASSERT(rx_grp->ring_cnt == VNET_NUM_PSEUDO_RXRINGS_DEFAULT); 1048 kmem_free(rx_grp->rings, sizeof (vnet_pseudo_rx_ring_t) * 1049 rx_grp->max_ring_cnt); 1050 rx_grp->rings = NULL; 1051 } 1052 } 1053 1054 static vnet_pseudo_rx_ring_t * 1055 vnet_alloc_pseudo_rx_ring(vnet_t *vnetp) 1056 { 1057 vnet_pseudo_rx_group_t *rx_grp; 1058 vnet_pseudo_rx_ring_t *rx_ringp; 1059 int index; 1060 1061 rx_grp = &vnetp->rx_grp[0]; 1062 WRITE_ENTER(&rx_grp->lock); 1063 1064 if (rx_grp->ring_cnt == rx_grp->max_ring_cnt) { 1065 /* no rings available */ 1066 RW_EXIT(&rx_grp->lock); 1067 return (NULL); 1068 } 1069 1070 for (index = VNET_NUM_PSEUDO_RXRINGS_DEFAULT; 1071 index < rx_grp->max_ring_cnt; index++) { 1072 rx_ringp = &rx_grp->rings[index]; 1073 if (rx_ringp->state == VNET_RXRING_FREE) { 1074 rx_ringp->state |= VNET_RXRING_INUSE; 1075 rx_grp->ring_cnt++; 1076 break; 1077 } 1078 } 1079 1080 RW_EXIT(&rx_grp->lock); 1081 return (rx_ringp); 1082 } 1083 1084 static void 1085 vnet_free_pseudo_rx_ring(vnet_t *vnetp, vnet_pseudo_rx_ring_t *ringp) 1086 { 1087 vnet_pseudo_rx_group_t *rx_grp; 1088 1089 ASSERT(ringp->index >= VNET_NUM_PSEUDO_RXRINGS_DEFAULT); 1090 rx_grp = &vnetp->rx_grp[0]; 1091 WRITE_ENTER(&rx_grp->lock); 1092 1093 if (ringp->state != VNET_RXRING_FREE) { 1094 ringp->state = VNET_RXRING_FREE; 1095 ringp->handle = NULL; 1096 rx_grp->ring_cnt--; 1097 } 1098 1099 RW_EXIT(&rx_grp->lock); 1100 } 1101 1102 /* wrapper function for mac_register() */ 1103 static int 1104 vnet_mac_register(vnet_t *vnetp) 1105 { 1106 mac_register_t *macp; 1107 int err; 1108 1109 if ((macp = mac_alloc(MAC_VERSION)) == NULL) 1110 return (DDI_FAILURE); 1111 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 1112 macp->m_driver = vnetp; 1113 macp->m_dip = vnetp->dip; 1114 macp->m_src_addr = vnetp->curr_macaddr; 1115 macp->m_callbacks = &vnet_m_callbacks; 1116 macp->m_min_sdu = 0; 1117 macp->m_max_sdu = vnetp->mtu; 1118 macp->m_margin = VLAN_TAGSZ; 1119 1120 macp->m_v12n = MAC_VIRT_LEVEL1; 1121 1122 /* 1123 * Finally, we're ready to register ourselves with the MAC layer 1124 * interface; if this succeeds, we're all ready to start() 1125 */ 1126 err = mac_register(macp, &vnetp->mh); 1127 mac_free(macp); 1128 return (err == 0 ? DDI_SUCCESS : DDI_FAILURE); 1129 } 1130 1131 /* read the mac address of the device */ 1132 static int 1133 vnet_read_mac_address(vnet_t *vnetp) 1134 { 1135 uchar_t *macaddr; 1136 uint32_t size; 1137 int rv; 1138 1139 rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip, 1140 DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size); 1141 if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) { 1142 DWARN(vnetp, "prop_lookup failed(%s) err(%d)\n", 1143 macaddr_propname, rv); 1144 return (DDI_FAILURE); 1145 } 1146 bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL); 1147 bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL); 1148 ddi_prop_free(macaddr); 1149 1150 return (DDI_SUCCESS); 1151 } 1152 1153 static void 1154 vnet_fdb_create(vnet_t *vnetp) 1155 { 1156 char hashname[MAXNAMELEN]; 1157 1158 (void) snprintf(hashname, MAXNAMELEN, "vnet%d-fdbhash", 1159 vnetp->instance); 1160 vnetp->fdb_nchains = vnet_fdb_nchains; 1161 vnetp->fdb_hashp = mod_hash_create_ptrhash(hashname, vnetp->fdb_nchains, 1162 mod_hash_null_valdtor, sizeof (void *)); 1163 } 1164 1165 static void 1166 vnet_fdb_destroy(vnet_t *vnetp) 1167 { 1168 /* destroy fdb-hash-table */ 1169 if (vnetp->fdb_hashp != NULL) { 1170 mod_hash_destroy_hash(vnetp->fdb_hashp); 1171 vnetp->fdb_hashp = NULL; 1172 vnetp->fdb_nchains = 0; 1173 } 1174 } 1175 1176 /* 1177 * Add an entry into the fdb. 1178 */ 1179 void 1180 vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp) 1181 { 1182 uint64_t addr = 0; 1183 int rv; 1184 1185 KEY_HASH(addr, vresp->rem_macaddr); 1186 1187 /* 1188 * If the entry being added corresponds to LDC_SERVICE resource, 1189 * that is, vswitch connection, it is added to the hash and also 1190 * the entry is cached, an additional reference count reflects 1191 * this. The HYBRID resource is not added to the hash, but only 1192 * cached, as it is only used for sending out packets for unknown 1193 * unicast destinations. 1194 */ 1195 (vresp->type == VIO_NET_RES_LDC_SERVICE) ? 1196 (vresp->refcnt = 1) : (vresp->refcnt = 0); 1197 1198 /* 1199 * Note: duplicate keys will be rejected by mod_hash. 1200 */ 1201 if (vresp->type != VIO_NET_RES_HYBRID) { 1202 rv = mod_hash_insert(vnetp->fdb_hashp, (mod_hash_key_t)addr, 1203 (mod_hash_val_t)vresp); 1204 if (rv != 0) { 1205 DWARN(vnetp, "Duplicate macaddr key(%lx)\n", addr); 1206 return; 1207 } 1208 } 1209 1210 if (vresp->type == VIO_NET_RES_LDC_SERVICE) { 1211 /* Cache the fdb entry to vsw-port */ 1212 WRITE_ENTER(&vnetp->vsw_fp_rw); 1213 if (vnetp->vsw_fp == NULL) 1214 vnetp->vsw_fp = vresp; 1215 RW_EXIT(&vnetp->vsw_fp_rw); 1216 } else if (vresp->type == VIO_NET_RES_HYBRID) { 1217 /* Cache the fdb entry to hybrid resource */ 1218 WRITE_ENTER(&vnetp->vsw_fp_rw); 1219 if (vnetp->hio_fp == NULL) 1220 vnetp->hio_fp = vresp; 1221 RW_EXIT(&vnetp->vsw_fp_rw); 1222 } 1223 } 1224 1225 /* 1226 * Remove an entry from fdb. 1227 */ 1228 static void 1229 vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp) 1230 { 1231 uint64_t addr = 0; 1232 int rv; 1233 uint32_t refcnt; 1234 vnet_res_t *tmp; 1235 1236 KEY_HASH(addr, vresp->rem_macaddr); 1237 1238 /* 1239 * Remove the entry from fdb hash table. 1240 * This prevents further references to this fdb entry. 1241 */ 1242 if (vresp->type != VIO_NET_RES_HYBRID) { 1243 rv = mod_hash_remove(vnetp->fdb_hashp, (mod_hash_key_t)addr, 1244 (mod_hash_val_t *)&tmp); 1245 if (rv != 0) { 1246 /* 1247 * As the resources are added to the hash only 1248 * after they are started, this can occur if 1249 * a resource unregisters before it is ever started. 1250 */ 1251 return; 1252 } 1253 } 1254 1255 if (vresp->type == VIO_NET_RES_LDC_SERVICE) { 1256 WRITE_ENTER(&vnetp->vsw_fp_rw); 1257 1258 ASSERT(tmp == vnetp->vsw_fp); 1259 vnetp->vsw_fp = NULL; 1260 1261 RW_EXIT(&vnetp->vsw_fp_rw); 1262 } else if (vresp->type == VIO_NET_RES_HYBRID) { 1263 WRITE_ENTER(&vnetp->vsw_fp_rw); 1264 1265 vnetp->hio_fp = NULL; 1266 1267 RW_EXIT(&vnetp->vsw_fp_rw); 1268 } 1269 1270 /* 1271 * If there are threads already ref holding before the entry was 1272 * removed from hash table, then wait for ref count to drop to zero. 1273 */ 1274 (vresp->type == VIO_NET_RES_LDC_SERVICE) ? 1275 (refcnt = 1) : (refcnt = 0); 1276 while (vresp->refcnt > refcnt) { 1277 delay(drv_usectohz(vnet_fdbe_refcnt_delay)); 1278 } 1279 } 1280 1281 /* 1282 * Search fdb for a given mac address. If an entry is found, hold 1283 * a reference to it and return the entry; else returns NULL. 1284 */ 1285 static vnet_res_t * 1286 vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp) 1287 { 1288 uint64_t key = 0; 1289 vnet_res_t *vresp; 1290 int rv; 1291 1292 KEY_HASH(key, addrp->ether_addr_octet); 1293 1294 rv = mod_hash_find_cb(vnetp->fdb_hashp, (mod_hash_key_t)key, 1295 (mod_hash_val_t *)&vresp, vnet_fdbe_find_cb); 1296 1297 if (rv != 0) 1298 return (NULL); 1299 1300 return (vresp); 1301 } 1302 1303 /* 1304 * Callback function provided to mod_hash_find_cb(). After finding the fdb 1305 * entry corresponding to the key (macaddr), this callback will be invoked by 1306 * mod_hash_find_cb() to atomically increment the reference count on the fdb 1307 * entry before returning the found entry. 1308 */ 1309 static void 1310 vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val) 1311 { 1312 _NOTE(ARGUNUSED(key)) 1313 VNET_FDBE_REFHOLD((vnet_res_t *)val); 1314 } 1315 1316 /* 1317 * Frames received that are tagged with the pvid of the vnet device must be 1318 * untagged before sending up the stack. This function walks the chain of rx 1319 * frames, untags any such frames and returns the updated chain. 1320 * 1321 * Arguments: 1322 * pvid: pvid of the vnet device for which packets are being received 1323 * mp: head of pkt chain to be validated and untagged 1324 * 1325 * Returns: 1326 * mp: head of updated chain of packets 1327 */ 1328 static void 1329 vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp) 1330 { 1331 struct ether_vlan_header *evhp; 1332 mblk_t *bp; 1333 mblk_t *bpt; 1334 mblk_t *bph; 1335 mblk_t *bpn; 1336 1337 bpn = bph = bpt = NULL; 1338 1339 for (bp = *mp; bp != NULL; bp = bpn) { 1340 1341 bpn = bp->b_next; 1342 bp->b_next = bp->b_prev = NULL; 1343 1344 evhp = (struct ether_vlan_header *)bp->b_rptr; 1345 1346 if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN && 1347 VLAN_ID(ntohs(evhp->ether_tci)) == pvid) { 1348 1349 bp = vnet_vlan_remove_tag(bp); 1350 if (bp == NULL) { 1351 continue; 1352 } 1353 1354 } 1355 1356 /* build a chain of processed packets */ 1357 if (bph == NULL) { 1358 bph = bpt = bp; 1359 } else { 1360 bpt->b_next = bp; 1361 bpt = bp; 1362 } 1363 1364 } 1365 1366 *mp = bph; 1367 } 1368 1369 static void 1370 vnet_rx(vio_net_handle_t vrh, mblk_t *mp) 1371 { 1372 vnet_res_t *vresp = (vnet_res_t *)vrh; 1373 vnet_t *vnetp = vresp->vnetp; 1374 vnet_pseudo_rx_ring_t *ringp; 1375 1376 if ((vnetp == NULL) || (vnetp->mh == 0)) { 1377 freemsgchain(mp); 1378 return; 1379 } 1380 1381 ringp = vresp->rx_ringp; 1382 mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num); 1383 } 1384 1385 void 1386 vnet_tx_update(vio_net_handle_t vrh) 1387 { 1388 vnet_res_t *vresp = (vnet_res_t *)vrh; 1389 vnet_t *vnetp = vresp->vnetp; 1390 vnet_pseudo_tx_ring_t *tx_ringp; 1391 vnet_pseudo_tx_group_t *tx_grp; 1392 int i; 1393 1394 if (vnetp == NULL || vnetp->mh == NULL) { 1395 return; 1396 } 1397 1398 /* 1399 * Currently, the tx hwring API (used to access rings that belong to 1400 * a Hybrid IO resource) does not provide us a per ring flow ctrl 1401 * update; also the pseudo rings are shared by the ports/ldcs in the 1402 * vgen layer. Thus we can't figure out which pseudo ring is being 1403 * re-enabled for transmits. To work around this, when we get a tx 1404 * restart notification from below, we simply propagate that to all 1405 * the tx pseudo rings registered with the mac layer above. 1406 * 1407 * There are a couple of side effects with this approach, but they are 1408 * not harmful, as outlined below: 1409 * 1410 * A) We might send an invalid ring_update() for a ring that is not 1411 * really flow controlled. This will not have any effect in the mac 1412 * layer and packets will continue to be transmitted on that ring. 1413 * 1414 * B) We might end up clearing the flow control in the mac layer for 1415 * a ring that is still flow controlled in the underlying resource. 1416 * This will result in the mac layer restarting transmit, only to be 1417 * flow controlled again on that ring. 1418 */ 1419 tx_grp = &vnetp->tx_grp[0]; 1420 for (i = 0; i < tx_grp->ring_cnt; i++) { 1421 tx_ringp = &tx_grp->rings[i]; 1422 mac_tx_ring_update(vnetp->mh, tx_ringp->handle); 1423 } 1424 } 1425 1426 /* 1427 * vnet_tx_notify_thread: 1428 * 1429 * vnet_tx_ring_update() callback function wakes up this thread when 1430 * it gets called. This thread will call mac_tx_ring_update() to 1431 * notify upper mac of flow control getting relieved. Note that 1432 * vnet_tx_ring_update() cannot call mac_tx_ring_update() directly 1433 * because vnet_tx_ring_update() is called from lower mac with 1434 * mi_rw_lock held and mac_tx_ring_update() would also try to grab 1435 * the same lock. 1436 */ 1437 static void 1438 vnet_tx_notify_thread(void *arg) 1439 { 1440 callb_cpr_t cprinfo; 1441 vnet_pseudo_tx_group_t *tx_grp = (vnet_pseudo_tx_group_t *)arg; 1442 vnet_pseudo_tx_ring_t *tx_ringp; 1443 vnet_t *vnetp; 1444 int i; 1445 1446 CALLB_CPR_INIT(&cprinfo, &tx_grp->flowctl_lock, callb_generic_cpr, 1447 "vnet_tx_notify_thread"); 1448 1449 mutex_enter(&tx_grp->flowctl_lock); 1450 while (!tx_grp->flowctl_done) { 1451 CALLB_CPR_SAFE_BEGIN(&cprinfo); 1452 cv_wait(&tx_grp->flowctl_cv, &tx_grp->flowctl_lock); 1453 CALLB_CPR_SAFE_END(&cprinfo, &tx_grp->flowctl_lock); 1454 1455 for (i = 0; i < tx_grp->ring_cnt; i++) { 1456 tx_ringp = &tx_grp->rings[i]; 1457 if (tx_ringp->woken_up) { 1458 tx_ringp->woken_up = B_FALSE; 1459 vnetp = tx_ringp->vnetp; 1460 mac_tx_ring_update(vnetp->mh, tx_ringp->handle); 1461 } 1462 } 1463 } 1464 /* 1465 * The tx_grp is being destroyed, exit the thread. 1466 */ 1467 tx_grp->flowctl_thread = NULL; 1468 CALLB_CPR_EXIT(&cprinfo); 1469 thread_exit(); 1470 } 1471 1472 void 1473 vnet_tx_ring_update(void *arg1, uintptr_t arg2) 1474 { 1475 vnet_t *vnetp = (vnet_t *)arg1; 1476 vnet_pseudo_tx_group_t *tx_grp; 1477 vnet_pseudo_tx_ring_t *tx_ringp; 1478 int i; 1479 1480 tx_grp = &vnetp->tx_grp[0]; 1481 for (i = 0; i < tx_grp->ring_cnt; i++) { 1482 tx_ringp = &tx_grp->rings[i]; 1483 if (tx_ringp->hw_rh == (mac_ring_handle_t)arg2) { 1484 mutex_enter(&tx_grp->flowctl_lock); 1485 tx_ringp->woken_up = B_TRUE; 1486 cv_signal(&tx_grp->flowctl_cv); 1487 mutex_exit(&tx_grp->flowctl_lock); 1488 break; 1489 } 1490 } 1491 } 1492 1493 /* 1494 * Update the new mtu of vnet into the mac layer. First check if the device has 1495 * been plumbed and if so fail the mtu update. Returns 0 on success. 1496 */ 1497 int 1498 vnet_mtu_update(vnet_t *vnetp, uint32_t mtu) 1499 { 1500 int rv; 1501 1502 if (vnetp == NULL || vnetp->mh == NULL) { 1503 return (EINVAL); 1504 } 1505 1506 WRITE_ENTER(&vnetp->vrwlock); 1507 1508 if (vnetp->flags & VNET_STARTED) { 1509 RW_EXIT(&vnetp->vrwlock); 1510 cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu " 1511 "update as the device is plumbed\n", 1512 vnetp->instance); 1513 return (EBUSY); 1514 } 1515 1516 /* update mtu in the mac layer */ 1517 rv = mac_maxsdu_update(vnetp->mh, mtu); 1518 if (rv != 0) { 1519 RW_EXIT(&vnetp->vrwlock); 1520 cmn_err(CE_NOTE, 1521 "!vnet%d: Unable to update mtu with mac layer\n", 1522 vnetp->instance); 1523 return (EIO); 1524 } 1525 1526 vnetp->mtu = mtu; 1527 1528 RW_EXIT(&vnetp->vrwlock); 1529 1530 return (0); 1531 } 1532 1533 /* 1534 * Update the link state of vnet to the mac layer. 1535 */ 1536 void 1537 vnet_link_update(vnet_t *vnetp, link_state_t link_state) 1538 { 1539 if (vnetp == NULL || vnetp->mh == NULL) { 1540 return; 1541 } 1542 1543 WRITE_ENTER(&vnetp->vrwlock); 1544 if (vnetp->link_state == link_state) { 1545 RW_EXIT(&vnetp->vrwlock); 1546 return; 1547 } 1548 vnetp->link_state = link_state; 1549 RW_EXIT(&vnetp->vrwlock); 1550 1551 mac_link_update(vnetp->mh, link_state); 1552 } 1553 1554 /* 1555 * vio_net_resource_reg -- An interface called to register a resource 1556 * with vnet. 1557 * macp -- a GLDv3 mac_register that has all the details of 1558 * a resource and its callbacks etc. 1559 * type -- resource type. 1560 * local_macaddr -- resource's MAC address. This is used to 1561 * associate a resource with a corresponding vnet. 1562 * remote_macaddr -- remote side MAC address. This is ignored for 1563 * the Hybrid resources. 1564 * vhp -- A handle returned to the caller. 1565 * vcb -- A set of callbacks provided to the callers. 1566 */ 1567 int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type, 1568 ether_addr_t local_macaddr, ether_addr_t rem_macaddr, vio_net_handle_t *vhp, 1569 vio_net_callbacks_t *vcb) 1570 { 1571 vnet_t *vnetp; 1572 vnet_res_t *vresp; 1573 1574 vresp = kmem_zalloc(sizeof (vnet_res_t), KM_SLEEP); 1575 ether_copy(local_macaddr, vresp->local_macaddr); 1576 ether_copy(rem_macaddr, vresp->rem_macaddr); 1577 vresp->type = type; 1578 bcopy(macp, &vresp->macreg, sizeof (mac_register_t)); 1579 1580 DBG1(NULL, "Resource Registerig type=0%X\n", type); 1581 1582 READ_ENTER(&vnet_rw); 1583 vnetp = vnet_headp; 1584 while (vnetp != NULL) { 1585 if (VNET_MATCH_RES(vresp, vnetp)) { 1586 vresp->vnetp = vnetp; 1587 1588 /* Setup kstats for hio resource */ 1589 if (vresp->type == VIO_NET_RES_HYBRID) { 1590 vresp->ksp = vnet_hio_setup_kstats(DRV_NAME, 1591 "hio", vresp); 1592 if (vresp->ksp == NULL) { 1593 cmn_err(CE_NOTE, "!vnet%d: Cannot " 1594 "create kstats for hio resource", 1595 vnetp->instance); 1596 } 1597 } 1598 vnet_add_resource(vnetp, vresp); 1599 break; 1600 } 1601 vnetp = vnetp->nextp; 1602 } 1603 RW_EXIT(&vnet_rw); 1604 if (vresp->vnetp == NULL) { 1605 DWARN(NULL, "No vnet instance"); 1606 kmem_free(vresp, sizeof (vnet_res_t)); 1607 return (ENXIO); 1608 } 1609 1610 *vhp = vresp; 1611 vcb->vio_net_rx_cb = vnet_rx; 1612 vcb->vio_net_tx_update = vnet_tx_update; 1613 vcb->vio_net_report_err = vnet_handle_res_err; 1614 1615 /* Bind the resource to pseudo ring(s) */ 1616 if (vnet_bind_rings(vresp) != 0) { 1617 (void) vnet_rem_resource(vnetp, vresp); 1618 vnet_hio_destroy_kstats(vresp->ksp); 1619 KMEM_FREE(vresp); 1620 return (1); 1621 } 1622 1623 /* Dispatch a task to start resources */ 1624 vnet_dispatch_res_task(vnetp); 1625 return (0); 1626 } 1627 1628 /* 1629 * vio_net_resource_unreg -- An interface to unregister a resource. 1630 */ 1631 void 1632 vio_net_resource_unreg(vio_net_handle_t vhp) 1633 { 1634 vnet_res_t *vresp = (vnet_res_t *)vhp; 1635 vnet_t *vnetp = vresp->vnetp; 1636 1637 DBG1(NULL, "Resource Registerig hdl=0x%p", vhp); 1638 1639 ASSERT(vnetp != NULL); 1640 /* 1641 * Remove the resource from fdb; this ensures 1642 * there are no references to the resource. 1643 */ 1644 vnet_fdbe_del(vnetp, vresp); 1645 1646 vnet_unbind_rings(vresp); 1647 1648 /* Now remove the resource from the list */ 1649 (void) vnet_rem_resource(vnetp, vresp); 1650 1651 vnet_hio_destroy_kstats(vresp->ksp); 1652 KMEM_FREE(vresp); 1653 } 1654 1655 static void 1656 vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp) 1657 { 1658 WRITE_ENTER(&vnetp->vrwlock); 1659 vresp->nextp = vnetp->vres_list; 1660 vnetp->vres_list = vresp; 1661 RW_EXIT(&vnetp->vrwlock); 1662 } 1663 1664 static vnet_res_t * 1665 vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp) 1666 { 1667 vnet_res_t *vrp; 1668 1669 WRITE_ENTER(&vnetp->vrwlock); 1670 if (vresp == vnetp->vres_list) { 1671 vnetp->vres_list = vresp->nextp; 1672 } else { 1673 vrp = vnetp->vres_list; 1674 while (vrp->nextp != NULL) { 1675 if (vrp->nextp == vresp) { 1676 vrp->nextp = vresp->nextp; 1677 break; 1678 } 1679 vrp = vrp->nextp; 1680 } 1681 } 1682 vresp->vnetp = NULL; 1683 vresp->nextp = NULL; 1684 1685 RW_EXIT(&vnetp->vrwlock); 1686 1687 return (vresp); 1688 } 1689 1690 /* 1691 * vnet_dds_rx -- an interface called by vgen to DDS messages. 1692 */ 1693 void 1694 vnet_dds_rx(void *arg, void *dmsg) 1695 { 1696 vnet_t *vnetp = arg; 1697 vdds_process_dds_msg(vnetp, dmsg); 1698 } 1699 1700 /* 1701 * vnet_send_dds_msg -- An interface provided to DDS to send 1702 * DDS messages. This simply sends meessages via vgen. 1703 */ 1704 int 1705 vnet_send_dds_msg(vnet_t *vnetp, void *dmsg) 1706 { 1707 int rv; 1708 1709 if (vnetp->vgenhdl != NULL) { 1710 rv = vgen_dds_tx(vnetp->vgenhdl, dmsg); 1711 } 1712 return (rv); 1713 } 1714 1715 /* 1716 * vnet_cleanup_hio -- an interface called by vgen to cleanup hio resources. 1717 */ 1718 void 1719 vnet_dds_cleanup_hio(vnet_t *vnetp) 1720 { 1721 vdds_cleanup_hio(vnetp); 1722 } 1723 1724 /* 1725 * vnet_handle_res_err -- A callback function called by a resource 1726 * to report an error. For example, vgen can call to report 1727 * an LDC down/reset event. This will trigger cleanup of associated 1728 * Hybrid resource. 1729 */ 1730 /* ARGSUSED */ 1731 static void 1732 vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err) 1733 { 1734 vnet_res_t *vresp = (vnet_res_t *)vrh; 1735 vnet_t *vnetp = vresp->vnetp; 1736 1737 if (vnetp == NULL) { 1738 return; 1739 } 1740 if ((vresp->type != VIO_NET_RES_LDC_SERVICE) && 1741 (vresp->type != VIO_NET_RES_HYBRID)) { 1742 return; 1743 } 1744 1745 vdds_cleanup_hio(vnetp); 1746 } 1747 1748 /* 1749 * vnet_dispatch_res_task -- A function to dispatch tasks start resources. 1750 */ 1751 static void 1752 vnet_dispatch_res_task(vnet_t *vnetp) 1753 { 1754 int rv; 1755 1756 /* 1757 * Dispatch the task. It could be the case that vnetp->flags does 1758 * not have VNET_STARTED set. This is ok as vnet_rest_start_task() 1759 * can abort the task when the task is started. See related comments 1760 * in vnet_m_stop() and vnet_stop_resources(). 1761 */ 1762 rv = ddi_taskq_dispatch(vnetp->taskqp, vnet_res_start_task, 1763 vnetp, DDI_NOSLEEP); 1764 if (rv != DDI_SUCCESS) { 1765 cmn_err(CE_WARN, 1766 "vnet%d:Can't dispatch start resource task", 1767 vnetp->instance); 1768 } 1769 } 1770 1771 /* 1772 * vnet_res_start_task -- A taskq callback function that starts a resource. 1773 */ 1774 static void 1775 vnet_res_start_task(void *arg) 1776 { 1777 vnet_t *vnetp = arg; 1778 1779 WRITE_ENTER(&vnetp->vrwlock); 1780 if (vnetp->flags & VNET_STARTED) { 1781 vnet_start_resources(vnetp); 1782 } 1783 RW_EXIT(&vnetp->vrwlock); 1784 } 1785 1786 /* 1787 * vnet_start_resources -- starts all resources associated with 1788 * a vnet. 1789 */ 1790 static void 1791 vnet_start_resources(vnet_t *vnetp) 1792 { 1793 mac_register_t *macp; 1794 mac_callbacks_t *cbp; 1795 vnet_res_t *vresp; 1796 int rv; 1797 1798 DBG1(vnetp, "enter\n"); 1799 1800 ASSERT(RW_WRITE_HELD(&vnetp->vrwlock)); 1801 1802 for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) { 1803 /* skip if it is already started */ 1804 if (vresp->flags & VNET_STARTED) { 1805 continue; 1806 } 1807 macp = &vresp->macreg; 1808 cbp = macp->m_callbacks; 1809 rv = cbp->mc_start(macp->m_driver); 1810 if (rv == 0) { 1811 /* 1812 * Successfully started the resource, so now 1813 * add it to the fdb. 1814 */ 1815 vresp->flags |= VNET_STARTED; 1816 vnet_fdbe_add(vnetp, vresp); 1817 } 1818 } 1819 1820 DBG1(vnetp, "exit\n"); 1821 1822 } 1823 1824 /* 1825 * vnet_stop_resources -- stop all resources associated with a vnet. 1826 */ 1827 static void 1828 vnet_stop_resources(vnet_t *vnetp) 1829 { 1830 vnet_res_t *vresp; 1831 mac_register_t *macp; 1832 mac_callbacks_t *cbp; 1833 1834 DBG1(vnetp, "enter\n"); 1835 1836 ASSERT(RW_WRITE_HELD(&vnetp->vrwlock)); 1837 1838 for (vresp = vnetp->vres_list; vresp != NULL; ) { 1839 if (vresp->flags & VNET_STARTED) { 1840 /* 1841 * Release the lock while invoking mc_stop() of the 1842 * underlying resource. We hold a reference to this 1843 * resource to prevent being removed from the list in 1844 * vio_net_resource_unreg(). Note that new resources 1845 * can be added to the head of the list while the lock 1846 * is released, but they won't be started, as 1847 * VNET_STARTED flag has been cleared for the vnet 1848 * device in vnet_m_stop(). Also, while the lock is 1849 * released a resource could be removed from the list 1850 * in vio_net_resource_unreg(); but that is ok, as we 1851 * re-acquire the lock and only then access the forward 1852 * link (vresp->nextp) to continue with the next 1853 * resource. 1854 */ 1855 vresp->flags &= ~VNET_STARTED; 1856 vresp->flags |= VNET_STOPPING; 1857 macp = &vresp->macreg; 1858 cbp = macp->m_callbacks; 1859 VNET_FDBE_REFHOLD(vresp); 1860 RW_EXIT(&vnetp->vrwlock); 1861 1862 cbp->mc_stop(macp->m_driver); 1863 1864 WRITE_ENTER(&vnetp->vrwlock); 1865 vresp->flags &= ~VNET_STOPPING; 1866 VNET_FDBE_REFRELE(vresp); 1867 } 1868 vresp = vresp->nextp; 1869 } 1870 DBG1(vnetp, "exit\n"); 1871 } 1872 1873 /* 1874 * Setup kstats for the HIO statistics. 1875 * NOTE: the synchronization for the statistics is the 1876 * responsibility of the caller. 1877 */ 1878 kstat_t * 1879 vnet_hio_setup_kstats(char *ks_mod, char *ks_name, vnet_res_t *vresp) 1880 { 1881 kstat_t *ksp; 1882 vnet_t *vnetp = vresp->vnetp; 1883 vnet_hio_kstats_t *hiokp; 1884 size_t size; 1885 1886 ASSERT(vnetp != NULL); 1887 size = sizeof (vnet_hio_kstats_t) / sizeof (kstat_named_t); 1888 ksp = kstat_create(ks_mod, vnetp->instance, ks_name, "net", 1889 KSTAT_TYPE_NAMED, size, 0); 1890 if (ksp == NULL) { 1891 return (NULL); 1892 } 1893 1894 hiokp = (vnet_hio_kstats_t *)ksp->ks_data; 1895 kstat_named_init(&hiokp->ipackets, "ipackets", 1896 KSTAT_DATA_ULONG); 1897 kstat_named_init(&hiokp->ierrors, "ierrors", 1898 KSTAT_DATA_ULONG); 1899 kstat_named_init(&hiokp->opackets, "opackets", 1900 KSTAT_DATA_ULONG); 1901 kstat_named_init(&hiokp->oerrors, "oerrors", 1902 KSTAT_DATA_ULONG); 1903 1904 1905 /* MIB II kstat variables */ 1906 kstat_named_init(&hiokp->rbytes, "rbytes", 1907 KSTAT_DATA_ULONG); 1908 kstat_named_init(&hiokp->obytes, "obytes", 1909 KSTAT_DATA_ULONG); 1910 kstat_named_init(&hiokp->multircv, "multircv", 1911 KSTAT_DATA_ULONG); 1912 kstat_named_init(&hiokp->multixmt, "multixmt", 1913 KSTAT_DATA_ULONG); 1914 kstat_named_init(&hiokp->brdcstrcv, "brdcstrcv", 1915 KSTAT_DATA_ULONG); 1916 kstat_named_init(&hiokp->brdcstxmt, "brdcstxmt", 1917 KSTAT_DATA_ULONG); 1918 kstat_named_init(&hiokp->norcvbuf, "norcvbuf", 1919 KSTAT_DATA_ULONG); 1920 kstat_named_init(&hiokp->noxmtbuf, "noxmtbuf", 1921 KSTAT_DATA_ULONG); 1922 1923 ksp->ks_update = vnet_hio_update_kstats; 1924 ksp->ks_private = (void *)vresp; 1925 kstat_install(ksp); 1926 return (ksp); 1927 } 1928 1929 /* 1930 * Destroy kstats. 1931 */ 1932 static void 1933 vnet_hio_destroy_kstats(kstat_t *ksp) 1934 { 1935 if (ksp != NULL) 1936 kstat_delete(ksp); 1937 } 1938 1939 /* 1940 * Update the kstats. 1941 */ 1942 static int 1943 vnet_hio_update_kstats(kstat_t *ksp, int rw) 1944 { 1945 vnet_t *vnetp; 1946 vnet_res_t *vresp; 1947 vnet_hio_stats_t statsp; 1948 vnet_hio_kstats_t *hiokp; 1949 1950 vresp = (vnet_res_t *)ksp->ks_private; 1951 vnetp = vresp->vnetp; 1952 1953 bzero(&statsp, sizeof (vnet_hio_stats_t)); 1954 1955 READ_ENTER(&vnetp->vsw_fp_rw); 1956 if (vnetp->hio_fp == NULL) { 1957 /* not using hio resources, just return */ 1958 RW_EXIT(&vnetp->vsw_fp_rw); 1959 return (0); 1960 } 1961 VNET_FDBE_REFHOLD(vnetp->hio_fp); 1962 RW_EXIT(&vnetp->vsw_fp_rw); 1963 vnet_hio_get_stats(vnetp->hio_fp, &statsp); 1964 VNET_FDBE_REFRELE(vnetp->hio_fp); 1965 1966 hiokp = (vnet_hio_kstats_t *)ksp->ks_data; 1967 1968 if (rw == KSTAT_READ) { 1969 /* Link Input/Output stats */ 1970 hiokp->ipackets.value.ul = (uint32_t)statsp.ipackets; 1971 hiokp->ipackets64.value.ull = statsp.ipackets; 1972 hiokp->ierrors.value.ul = statsp.ierrors; 1973 hiokp->opackets.value.ul = (uint32_t)statsp.opackets; 1974 hiokp->opackets64.value.ull = statsp.opackets; 1975 hiokp->oerrors.value.ul = statsp.oerrors; 1976 1977 /* MIB II kstat variables */ 1978 hiokp->rbytes.value.ul = (uint32_t)statsp.rbytes; 1979 hiokp->rbytes64.value.ull = statsp.rbytes; 1980 hiokp->obytes.value.ul = (uint32_t)statsp.obytes; 1981 hiokp->obytes64.value.ull = statsp.obytes; 1982 hiokp->multircv.value.ul = statsp.multircv; 1983 hiokp->multixmt.value.ul = statsp.multixmt; 1984 hiokp->brdcstrcv.value.ul = statsp.brdcstrcv; 1985 hiokp->brdcstxmt.value.ul = statsp.brdcstxmt; 1986 hiokp->norcvbuf.value.ul = statsp.norcvbuf; 1987 hiokp->noxmtbuf.value.ul = statsp.noxmtbuf; 1988 } else { 1989 return (EACCES); 1990 } 1991 1992 return (0); 1993 } 1994 1995 static void 1996 vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp) 1997 { 1998 mac_register_t *macp; 1999 mac_callbacks_t *cbp; 2000 uint64_t val; 2001 int stat; 2002 2003 /* 2004 * get the specified statistics from the underlying nxge. 2005 */ 2006 macp = &vresp->macreg; 2007 cbp = macp->m_callbacks; 2008 for (stat = MAC_STAT_MIN; stat < MAC_STAT_OVERFLOWS; stat++) { 2009 if (cbp->mc_getstat(macp->m_driver, stat, &val) == 0) { 2010 switch (stat) { 2011 case MAC_STAT_IPACKETS: 2012 statsp->ipackets = val; 2013 break; 2014 2015 case MAC_STAT_IERRORS: 2016 statsp->ierrors = val; 2017 break; 2018 2019 case MAC_STAT_OPACKETS: 2020 statsp->opackets = val; 2021 break; 2022 2023 case MAC_STAT_OERRORS: 2024 statsp->oerrors = val; 2025 break; 2026 2027 case MAC_STAT_RBYTES: 2028 statsp->rbytes = val; 2029 break; 2030 2031 case MAC_STAT_OBYTES: 2032 statsp->obytes = val; 2033 break; 2034 2035 case MAC_STAT_MULTIRCV: 2036 statsp->multircv = val; 2037 break; 2038 2039 case MAC_STAT_MULTIXMT: 2040 statsp->multixmt = val; 2041 break; 2042 2043 case MAC_STAT_BRDCSTRCV: 2044 statsp->brdcstrcv = val; 2045 break; 2046 2047 case MAC_STAT_BRDCSTXMT: 2048 statsp->brdcstxmt = val; 2049 break; 2050 2051 case MAC_STAT_NOXMTBUF: 2052 statsp->noxmtbuf = val; 2053 break; 2054 2055 case MAC_STAT_NORCVBUF: 2056 statsp->norcvbuf = val; 2057 break; 2058 2059 default: 2060 /* 2061 * parameters not interested. 2062 */ 2063 break; 2064 } 2065 } 2066 } 2067 } 2068 2069 static boolean_t 2070 vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data) 2071 { 2072 vnet_t *vnetp = (vnet_t *)arg; 2073 2074 if (vnetp == NULL) { 2075 return (0); 2076 } 2077 2078 switch (cap) { 2079 2080 case MAC_CAPAB_RINGS: { 2081 2082 mac_capab_rings_t *cap_rings = cap_data; 2083 /* 2084 * Rings Capability Notes: 2085 * We advertise rings to make use of the rings framework in 2086 * gldv3 mac layer, to improve the performance. This is 2087 * specifically needed when a Hybrid resource (with multiple 2088 * tx/rx hardware rings) is assigned to a vnet device. We also 2089 * leverage this for the normal case when no Hybrid resource is 2090 * assigned. 2091 * 2092 * Ring Allocation: 2093 * - TX path: 2094 * We expose a pseudo ring group with 2 pseudo tx rings (as 2095 * currently HybridIO exports only 2 rings) In the normal case, 2096 * transmit traffic that comes down to the driver through the 2097 * mri_tx (vnet_tx_ring_send()) entry point goes through the 2098 * distributed switching algorithm in vnet and gets transmitted 2099 * over a port/LDC in the vgen layer to either the vswitch or a 2100 * peer vnet. If and when a Hybrid resource is assigned to the 2101 * vnet, we obtain the tx ring information of the Hybrid device 2102 * (nxge) and map the pseudo rings 1:1 to the 2 hw tx rings. 2103 * Traffic being sent over the Hybrid resource by the mac layer 2104 * gets spread across both hw rings, as they are mapped to the 2105 * 2 pseudo tx rings in vnet. 2106 * 2107 * - RX path: 2108 * We expose a pseudo ring group with 3 pseudo rx rings (static 2109 * rings) initially. The first (default) pseudo rx ring is 2110 * reserved for the resource that connects to the vswitch 2111 * service. The next 2 rings are reserved for a Hybrid resource 2112 * that may be assigned to the vnet device. If and when a 2113 * Hybrid resource is assigned to the vnet, we obtain the rx 2114 * ring information of the Hybrid device (nxge) and map these 2115 * pseudo rings 1:1 to the 2 hw rx rings. For each additional 2116 * resource that connects to a peer vnet, we dynamically 2117 * allocate a pseudo rx ring and map it to that resource, when 2118 * the resource gets added; and the pseudo rx ring is 2119 * dynamically registered with the upper mac layer. We do the 2120 * reverse and unregister the ring with the mac layer when 2121 * the resource gets removed. 2122 * 2123 * Synchronization notes: 2124 * We don't need any lock to protect members of ring structure, 2125 * specifically ringp->hw_rh, in either the TX or the RX ring, 2126 * as explained below. 2127 * - TX ring: 2128 * ring->hw_rh is initialized only when a Hybrid resource is 2129 * associated; and gets referenced only in vnet_hio_tx(). The 2130 * Hybrid resource itself is available in fdb only after tx 2131 * hwrings are found and mapped; i.e, in vio_net_resource_reg() 2132 * we call vnet_bind_rings() first and then call 2133 * vnet_start_resources() which adds an entry to fdb. For 2134 * traffic going over LDC resources, we don't reference 2135 * ring->hw_rh at all. 2136 * - RX ring: 2137 * For rings mapped to Hybrid resource ring->hw_rh is 2138 * initialized and only then do we add the rx callback for 2139 * the underlying Hybrid resource; we disable callbacks before 2140 * we unmap ring->hw_rh. For rings mapped to LDC resources, we 2141 * stop the rx callbacks (in vgen) before we remove ring->hw_rh 2142 * (vio_net_resource_unreg()). 2143 * Also, we access ring->hw_rh in vnet_rx_ring_stat(). 2144 * Note that for rings mapped to Hybrid resource, though the 2145 * rings are statically registered with the mac layer, its 2146 * hardware ring mapping (ringp->hw_rh) can be torn down in 2147 * vnet_unbind_hwrings() while the kstat operation is in 2148 * progress. To protect against this, we hold a reference to 2149 * the resource in FDB; this ensures that the thread in 2150 * vio_net_resource_unreg() waits for the reference to be 2151 * dropped before unbinding the ring. 2152 * 2153 * We don't need to do this for rings mapped to LDC resources. 2154 * These rings are registered/unregistered dynamically with 2155 * the mac layer and so any attempt to unregister the ring 2156 * while kstat operation is in progress will block in 2157 * mac_group_rem_ring(). Thus implicitly protects the 2158 * resource (ringp->hw_rh) from disappearing. 2159 */ 2160 2161 if (cap_rings->mr_type == MAC_RING_TYPE_RX) { 2162 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 2163 2164 /* 2165 * The ring_cnt for rx grp is initialized in 2166 * vnet_ring_grp_init(). Later, the ring_cnt gets 2167 * updated dynamically whenever LDC resources are added 2168 * or removed. 2169 */ 2170 cap_rings->mr_rnum = vnetp->rx_grp[0].ring_cnt; 2171 cap_rings->mr_rget = vnet_get_ring; 2172 2173 cap_rings->mr_gnum = VNET_NUM_PSEUDO_GROUPS; 2174 cap_rings->mr_gget = vnet_get_group; 2175 cap_rings->mr_gaddring = NULL; 2176 cap_rings->mr_gremring = NULL; 2177 } else { 2178 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 2179 2180 /* 2181 * The ring_cnt for tx grp is initialized in 2182 * vnet_ring_grp_init() and remains constant, as we 2183 * do not support dymanic tx rings for now. 2184 */ 2185 cap_rings->mr_rnum = vnetp->tx_grp[0].ring_cnt; 2186 cap_rings->mr_rget = vnet_get_ring; 2187 2188 /* 2189 * Transmit rings are not grouped; i.e, the number of 2190 * transmit ring groups advertised should be set to 0. 2191 */ 2192 cap_rings->mr_gnum = 0; 2193 2194 cap_rings->mr_gget = vnet_get_group; 2195 cap_rings->mr_gaddring = NULL; 2196 cap_rings->mr_gremring = NULL; 2197 } 2198 return (B_TRUE); 2199 2200 } 2201 2202 default: 2203 break; 2204 2205 } 2206 2207 return (B_FALSE); 2208 } 2209 2210 /* 2211 * Callback funtion for MAC layer to get ring information. 2212 */ 2213 static void 2214 vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index, 2215 const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle) 2216 { 2217 vnet_t *vnetp = arg; 2218 2219 switch (rtype) { 2220 2221 case MAC_RING_TYPE_RX: { 2222 2223 vnet_pseudo_rx_group_t *rx_grp; 2224 vnet_pseudo_rx_ring_t *rx_ringp; 2225 mac_intr_t *mintr; 2226 2227 /* We advertised only one RX group */ 2228 ASSERT(g_index == 0); 2229 rx_grp = &vnetp->rx_grp[g_index]; 2230 2231 /* Check the current # of rings in the rx group */ 2232 ASSERT((r_index >= 0) && (r_index < rx_grp->max_ring_cnt)); 2233 2234 /* Get the ring based on the index */ 2235 rx_ringp = &rx_grp->rings[r_index]; 2236 2237 rx_ringp->handle = r_handle; 2238 /* 2239 * Note: we don't need to save the incoming r_index in rx_ring, 2240 * as vnet_ring_grp_init() would have initialized the index for 2241 * each ring in the array. 2242 */ 2243 rx_ringp->grp = rx_grp; 2244 rx_ringp->vnetp = vnetp; 2245 2246 mintr = &infop->mri_intr; 2247 mintr->mi_handle = (mac_intr_handle_t)rx_ringp; 2248 mintr->mi_enable = (mac_intr_enable_t)vnet_ring_enable_intr; 2249 mintr->mi_disable = (mac_intr_disable_t)vnet_ring_disable_intr; 2250 2251 infop->mri_driver = (mac_ring_driver_t)rx_ringp; 2252 infop->mri_start = vnet_rx_ring_start; 2253 infop->mri_stop = vnet_rx_ring_stop; 2254 infop->mri_stat = vnet_rx_ring_stat; 2255 2256 /* Set the poll function, as this is an rx ring */ 2257 infop->mri_poll = vnet_rx_poll; 2258 /* 2259 * MAC_RING_RX_ENQUEUE bit needed to be set for nxge 2260 * which was not sending packet chains in interrupt 2261 * context. For such drivers, packets are queued in 2262 * Rx soft rings so that we get a chance to switch 2263 * into a polling mode under backlog. This bug (not 2264 * sending packet chains) has now been fixed. Once 2265 * the performance impact is measured, this change 2266 * will be removed. 2267 */ 2268 infop->mri_flags = (vnet_mac_rx_queuing ? 2269 MAC_RING_RX_ENQUEUE : 0); 2270 break; 2271 } 2272 2273 case MAC_RING_TYPE_TX: { 2274 vnet_pseudo_tx_group_t *tx_grp; 2275 vnet_pseudo_tx_ring_t *tx_ringp; 2276 2277 /* 2278 * No need to check grp index; mac layer passes -1 for it. 2279 */ 2280 tx_grp = &vnetp->tx_grp[0]; 2281 2282 /* Check the # of rings in the tx group */ 2283 ASSERT((r_index >= 0) && (r_index < tx_grp->ring_cnt)); 2284 2285 /* Get the ring based on the index */ 2286 tx_ringp = &tx_grp->rings[r_index]; 2287 2288 tx_ringp->handle = r_handle; 2289 tx_ringp->index = r_index; 2290 tx_ringp->grp = tx_grp; 2291 tx_ringp->vnetp = vnetp; 2292 2293 infop->mri_driver = (mac_ring_driver_t)tx_ringp; 2294 infop->mri_start = vnet_tx_ring_start; 2295 infop->mri_stop = vnet_tx_ring_stop; 2296 infop->mri_stat = vnet_tx_ring_stat; 2297 2298 /* Set the transmit function, as this is a tx ring */ 2299 infop->mri_tx = vnet_tx_ring_send; 2300 /* 2301 * MAC_RING_TX_SERIALIZE bit needs to be set while 2302 * hybridIO is enabled to workaround tx lock 2303 * contention issues in nxge. 2304 */ 2305 infop->mri_flags = (vnet_mac_tx_serialize ? 2306 MAC_RING_TX_SERIALIZE : 0); 2307 break; 2308 } 2309 2310 default: 2311 break; 2312 } 2313 } 2314 2315 /* 2316 * Callback funtion for MAC layer to get group information. 2317 */ 2318 static void 2319 vnet_get_group(void *arg, mac_ring_type_t type, const int index, 2320 mac_group_info_t *infop, mac_group_handle_t handle) 2321 { 2322 vnet_t *vnetp = (vnet_t *)arg; 2323 2324 switch (type) { 2325 2326 case MAC_RING_TYPE_RX: 2327 { 2328 vnet_pseudo_rx_group_t *rx_grp; 2329 2330 /* We advertised only one RX group */ 2331 ASSERT(index == 0); 2332 2333 rx_grp = &vnetp->rx_grp[index]; 2334 rx_grp->handle = handle; 2335 rx_grp->index = index; 2336 rx_grp->vnetp = vnetp; 2337 2338 infop->mgi_driver = (mac_group_driver_t)rx_grp; 2339 infop->mgi_start = NULL; 2340 infop->mgi_stop = NULL; 2341 infop->mgi_addmac = vnet_addmac; 2342 infop->mgi_remmac = vnet_remmac; 2343 infop->mgi_count = rx_grp->ring_cnt; 2344 2345 break; 2346 } 2347 2348 case MAC_RING_TYPE_TX: 2349 { 2350 vnet_pseudo_tx_group_t *tx_grp; 2351 2352 /* We advertised only one TX group */ 2353 ASSERT(index == 0); 2354 2355 tx_grp = &vnetp->tx_grp[index]; 2356 tx_grp->handle = handle; 2357 tx_grp->index = index; 2358 tx_grp->vnetp = vnetp; 2359 2360 infop->mgi_driver = (mac_group_driver_t)tx_grp; 2361 infop->mgi_start = NULL; 2362 infop->mgi_stop = NULL; 2363 infop->mgi_addmac = NULL; 2364 infop->mgi_remmac = NULL; 2365 infop->mgi_count = VNET_NUM_PSEUDO_TXRINGS; 2366 2367 break; 2368 } 2369 2370 default: 2371 break; 2372 2373 } 2374 } 2375 2376 static int 2377 vnet_rx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num) 2378 { 2379 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; 2380 int err; 2381 2382 /* 2383 * If this ring is mapped to a LDC resource, simply mark the state to 2384 * indicate the ring is started and return. 2385 */ 2386 if ((rx_ringp->state & 2387 (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) { 2388 rx_ringp->gen_num = mr_gen_num; 2389 rx_ringp->state |= VNET_RXRING_STARTED; 2390 return (0); 2391 } 2392 2393 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0); 2394 2395 /* 2396 * This must be a ring reserved for a hwring. If the hwring is not 2397 * bound yet, simply mark the state to indicate the ring is started and 2398 * return. If and when a hybrid resource is activated for this vnet 2399 * device, we will bind the hwring and start it then. If a hwring is 2400 * already bound, start it now. 2401 */ 2402 if (rx_ringp->hw_rh == NULL) { 2403 rx_ringp->gen_num = mr_gen_num; 2404 rx_ringp->state |= VNET_RXRING_STARTED; 2405 return (0); 2406 } 2407 2408 err = mac_hwring_start(rx_ringp->hw_rh); 2409 if (err == 0) { 2410 rx_ringp->gen_num = mr_gen_num; 2411 rx_ringp->state |= VNET_RXRING_STARTED; 2412 } else { 2413 err = ENXIO; 2414 } 2415 2416 return (err); 2417 } 2418 2419 static void 2420 vnet_rx_ring_stop(mac_ring_driver_t arg) 2421 { 2422 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; 2423 2424 /* 2425 * If this ring is mapped to a LDC resource, simply mark the state to 2426 * indicate the ring is now stopped and return. 2427 */ 2428 if ((rx_ringp->state & 2429 (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) { 2430 rx_ringp->state &= ~VNET_RXRING_STARTED; 2431 return; 2432 } 2433 2434 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0); 2435 2436 /* 2437 * This must be a ring reserved for a hwring. If the hwring is not 2438 * bound yet, simply mark the state to indicate the ring is stopped and 2439 * return. If a hwring is already bound, stop it now. 2440 */ 2441 if (rx_ringp->hw_rh == NULL) { 2442 rx_ringp->state &= ~VNET_RXRING_STARTED; 2443 return; 2444 } 2445 2446 mac_hwring_stop(rx_ringp->hw_rh); 2447 rx_ringp->state &= ~VNET_RXRING_STARTED; 2448 } 2449 2450 static int 2451 vnet_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) 2452 { 2453 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)rdriver; 2454 vnet_t *vnetp = (vnet_t *)rx_ringp->vnetp; 2455 vnet_res_t *vresp; 2456 mac_register_t *macp; 2457 mac_callbacks_t *cbp; 2458 2459 /* 2460 * Refer to vnet_m_capab() function for detailed comments on ring 2461 * synchronization. 2462 */ 2463 if ((rx_ringp->state & VNET_RXRING_HYBRID) != 0) { 2464 READ_ENTER(&vnetp->vsw_fp_rw); 2465 if (vnetp->hio_fp == NULL) { 2466 RW_EXIT(&vnetp->vsw_fp_rw); 2467 return (0); 2468 } 2469 2470 VNET_FDBE_REFHOLD(vnetp->hio_fp); 2471 RW_EXIT(&vnetp->vsw_fp_rw); 2472 (void) mac_hwring_getstat(rx_ringp->hw_rh, stat, val); 2473 VNET_FDBE_REFRELE(vnetp->hio_fp); 2474 return (0); 2475 } 2476 2477 ASSERT((rx_ringp->state & 2478 (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0); 2479 vresp = (vnet_res_t *)rx_ringp->hw_rh; 2480 macp = &vresp->macreg; 2481 cbp = macp->m_callbacks; 2482 2483 cbp->mc_getstat(macp->m_driver, stat, val); 2484 2485 return (0); 2486 } 2487 2488 /* ARGSUSED */ 2489 static int 2490 vnet_tx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num) 2491 { 2492 vnet_pseudo_tx_ring_t *tx_ringp = (vnet_pseudo_tx_ring_t *)arg; 2493 2494 tx_ringp->state |= VNET_TXRING_STARTED; 2495 return (0); 2496 } 2497 2498 static void 2499 vnet_tx_ring_stop(mac_ring_driver_t arg) 2500 { 2501 vnet_pseudo_tx_ring_t *tx_ringp = (vnet_pseudo_tx_ring_t *)arg; 2502 2503 tx_ringp->state &= ~VNET_TXRING_STARTED; 2504 } 2505 2506 static int 2507 vnet_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) 2508 { 2509 vnet_pseudo_tx_ring_t *tx_ringp = (vnet_pseudo_tx_ring_t *)rdriver; 2510 vnet_tx_ring_stats_t *statsp; 2511 2512 statsp = &tx_ringp->tx_ring_stats; 2513 2514 switch (stat) { 2515 case MAC_STAT_OPACKETS: 2516 *val = statsp->opackets; 2517 break; 2518 2519 case MAC_STAT_OBYTES: 2520 *val = statsp->obytes; 2521 break; 2522 2523 default: 2524 *val = 0; 2525 return (ENOTSUP); 2526 } 2527 2528 return (0); 2529 } 2530 2531 /* 2532 * Disable polling for a ring and enable its interrupt. 2533 */ 2534 static int 2535 vnet_ring_enable_intr(void *arg) 2536 { 2537 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; 2538 vnet_res_t *vresp; 2539 2540 if (rx_ringp->hw_rh == NULL) { 2541 /* 2542 * Ring enable intr func is being invoked, but the ring is 2543 * not bound to any underlying resource ? This must be a ring 2544 * reserved for Hybrid resource and no such resource has been 2545 * assigned to this vnet device yet. We simply return success. 2546 */ 2547 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0); 2548 return (0); 2549 } 2550 2551 /* 2552 * The rx ring has been bound to either a LDC or a Hybrid resource. 2553 * Call the appropriate function to enable interrupts for the ring. 2554 */ 2555 if (rx_ringp->state & VNET_RXRING_HYBRID) { 2556 return (mac_hwring_enable_intr(rx_ringp->hw_rh)); 2557 } else { 2558 vresp = (vnet_res_t *)rx_ringp->hw_rh; 2559 return (vgen_enable_intr(vresp->macreg.m_driver)); 2560 } 2561 } 2562 2563 /* 2564 * Enable polling for a ring and disable its interrupt. 2565 */ 2566 static int 2567 vnet_ring_disable_intr(void *arg) 2568 { 2569 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; 2570 vnet_res_t *vresp; 2571 2572 if (rx_ringp->hw_rh == NULL) { 2573 /* 2574 * Ring disable intr func is being invoked, but the ring is 2575 * not bound to any underlying resource ? This must be a ring 2576 * reserved for Hybrid resource and no such resource has been 2577 * assigned to this vnet device yet. We simply return success. 2578 */ 2579 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0); 2580 return (0); 2581 } 2582 2583 /* 2584 * The rx ring has been bound to either a LDC or a Hybrid resource. 2585 * Call the appropriate function to disable interrupts for the ring. 2586 */ 2587 if (rx_ringp->state & VNET_RXRING_HYBRID) { 2588 return (mac_hwring_disable_intr(rx_ringp->hw_rh)); 2589 } else { 2590 vresp = (vnet_res_t *)rx_ringp->hw_rh; 2591 return (vgen_disable_intr(vresp->macreg.m_driver)); 2592 } 2593 } 2594 2595 /* 2596 * Poll 'bytes_to_pickup' bytes of message from the rx ring. 2597 */ 2598 static mblk_t * 2599 vnet_rx_poll(void *arg, int bytes_to_pickup) 2600 { 2601 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; 2602 mblk_t *mp = NULL; 2603 vnet_res_t *vresp; 2604 vnet_t *vnetp = rx_ringp->vnetp; 2605 2606 if (rx_ringp->hw_rh == NULL) { 2607 return (NULL); 2608 } 2609 2610 if (rx_ringp->state & VNET_RXRING_HYBRID) { 2611 mp = mac_hwring_poll(rx_ringp->hw_rh, bytes_to_pickup); 2612 /* 2613 * Packets received over a hybrid resource need additional 2614 * processing to remove the tag, for the pvid case. The 2615 * underlying resource is not aware of the vnet's pvid and thus 2616 * packets are received with the vlan tag in the header; unlike 2617 * packets that are received over a ldc channel in which case 2618 * the peer vnet/vsw would have already removed the tag. 2619 */ 2620 if (vnetp->pvid != vnetp->default_vlan_id) { 2621 vnet_rx_frames_untag(vnetp->pvid, &mp); 2622 } 2623 } else { 2624 vresp = (vnet_res_t *)rx_ringp->hw_rh; 2625 mp = vgen_rx_poll(vresp->macreg.m_driver, bytes_to_pickup); 2626 } 2627 return (mp); 2628 } 2629 2630 /* ARGSUSED */ 2631 void 2632 vnet_hio_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 2633 boolean_t loopback) 2634 { 2635 vnet_t *vnetp = (vnet_t *)arg; 2636 vnet_pseudo_rx_ring_t *ringp = (vnet_pseudo_rx_ring_t *)mrh; 2637 2638 /* 2639 * Packets received over a hybrid resource need additional processing 2640 * to remove the tag, for the pvid case. The underlying resource is 2641 * not aware of the vnet's pvid and thus packets are received with the 2642 * vlan tag in the header; unlike packets that are received over a ldc 2643 * channel in which case the peer vnet/vsw would have already removed 2644 * the tag. 2645 */ 2646 if (vnetp->pvid != vnetp->default_vlan_id) { 2647 vnet_rx_frames_untag(vnetp->pvid, &mp); 2648 if (mp == NULL) { 2649 return; 2650 } 2651 } 2652 mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num); 2653 } 2654 2655 static int 2656 vnet_addmac(void *arg, const uint8_t *mac_addr) 2657 { 2658 vnet_pseudo_rx_group_t *rx_grp = (vnet_pseudo_rx_group_t *)arg; 2659 vnet_t *vnetp; 2660 2661 vnetp = rx_grp->vnetp; 2662 2663 if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) { 2664 return (0); 2665 } 2666 2667 cmn_err(CE_CONT, "!vnet%d: %s: Multiple macaddr unsupported\n", 2668 vnetp->instance, __func__); 2669 return (EINVAL); 2670 } 2671 2672 static int 2673 vnet_remmac(void *arg, const uint8_t *mac_addr) 2674 { 2675 vnet_pseudo_rx_group_t *rx_grp = (vnet_pseudo_rx_group_t *)arg; 2676 vnet_t *vnetp; 2677 2678 vnetp = rx_grp->vnetp; 2679 2680 if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) { 2681 return (0); 2682 } 2683 2684 cmn_err(CE_CONT, "!vnet%d: %s: Invalid macaddr: %s\n", 2685 vnetp->instance, __func__, ether_sprintf((void *)mac_addr)); 2686 return (EINVAL); 2687 } 2688 2689 int 2690 vnet_hio_mac_init(vnet_t *vnetp, char *ifname) 2691 { 2692 mac_handle_t mh; 2693 mac_client_handle_t mch = NULL; 2694 mac_unicast_handle_t muh = NULL; 2695 mac_diag_t diag; 2696 mac_register_t *macp; 2697 char client_name[MAXNAMELEN]; 2698 int rv; 2699 uint16_t mac_flags = MAC_UNICAST_TAG_DISABLE | 2700 MAC_UNICAST_STRIP_DISABLE | MAC_UNICAST_PRIMARY; 2701 vio_net_callbacks_t vcb; 2702 ether_addr_t rem_addr = 2703 { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 2704 uint32_t retries = 0; 2705 2706 if ((macp = mac_alloc(MAC_VERSION)) == NULL) { 2707 return (EAGAIN); 2708 } 2709 2710 do { 2711 rv = mac_open_by_linkname(ifname, &mh); 2712 if (rv == 0) { 2713 break; 2714 } 2715 if (rv != ENOENT || (retries++ >= vnet_mac_open_retries)) { 2716 mac_free(macp); 2717 return (rv); 2718 } 2719 drv_usecwait(vnet_mac_open_delay); 2720 } while (rv == ENOENT); 2721 2722 vnetp->hio_mh = mh; 2723 2724 (void) snprintf(client_name, MAXNAMELEN, "vnet%d-%s", vnetp->instance, 2725 ifname); 2726 rv = mac_client_open(mh, &mch, client_name, MAC_OPEN_FLAGS_EXCLUSIVE); 2727 if (rv != 0) { 2728 goto fail; 2729 } 2730 vnetp->hio_mch = mch; 2731 2732 rv = mac_unicast_add(mch, vnetp->curr_macaddr, mac_flags, &muh, 0, 2733 &diag); 2734 if (rv != 0) { 2735 goto fail; 2736 } 2737 vnetp->hio_muh = muh; 2738 2739 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 2740 macp->m_driver = vnetp; 2741 macp->m_dip = NULL; 2742 macp->m_src_addr = NULL; 2743 macp->m_callbacks = &vnet_hio_res_callbacks; 2744 macp->m_min_sdu = 0; 2745 macp->m_max_sdu = ETHERMTU; 2746 2747 rv = vio_net_resource_reg(macp, VIO_NET_RES_HYBRID, 2748 vnetp->curr_macaddr, rem_addr, &vnetp->hio_vhp, &vcb); 2749 if (rv != 0) { 2750 goto fail; 2751 } 2752 mac_free(macp); 2753 2754 /* add the recv callback */ 2755 mac_rx_set(vnetp->hio_mch, vnet_hio_rx_cb, vnetp); 2756 2757 return (0); 2758 2759 fail: 2760 mac_free(macp); 2761 vnet_hio_mac_cleanup(vnetp); 2762 return (1); 2763 } 2764 2765 void 2766 vnet_hio_mac_cleanup(vnet_t *vnetp) 2767 { 2768 if (vnetp->hio_vhp != NULL) { 2769 vio_net_resource_unreg(vnetp->hio_vhp); 2770 vnetp->hio_vhp = NULL; 2771 } 2772 2773 if (vnetp->hio_muh != NULL) { 2774 (void) mac_unicast_remove(vnetp->hio_mch, vnetp->hio_muh); 2775 vnetp->hio_muh = NULL; 2776 } 2777 2778 if (vnetp->hio_mch != NULL) { 2779 mac_client_close(vnetp->hio_mch, 0); 2780 vnetp->hio_mch = NULL; 2781 } 2782 2783 if (vnetp->hio_mh != NULL) { 2784 mac_close(vnetp->hio_mh); 2785 vnetp->hio_mh = NULL; 2786 } 2787 } 2788 2789 /* Bind pseudo rings to hwrings */ 2790 static int 2791 vnet_bind_hwrings(vnet_t *vnetp) 2792 { 2793 mac_ring_handle_t hw_rh[VNET_NUM_HYBRID_RINGS]; 2794 mac_perim_handle_t mph1; 2795 vnet_pseudo_rx_group_t *rx_grp; 2796 vnet_pseudo_rx_ring_t *rx_ringp; 2797 vnet_pseudo_tx_group_t *tx_grp; 2798 vnet_pseudo_tx_ring_t *tx_ringp; 2799 int hw_ring_cnt; 2800 int i; 2801 int rv; 2802 2803 mac_perim_enter_by_mh(vnetp->hio_mh, &mph1); 2804 2805 /* Get the list of the underlying RX rings. */ 2806 hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->rx_hwgh, hw_rh, 2807 MAC_RING_TYPE_RX); 2808 2809 /* We expect the the # of hw rx rings to match VNET_NUM_HYBRID_RINGS */ 2810 if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) { 2811 cmn_err(CE_WARN, 2812 "!vnet%d: vnet_bind_hwrings: bad rx hw_ring_cnt(%d)\n", 2813 vnetp->instance, hw_ring_cnt); 2814 goto fail; 2815 } 2816 2817 if (vnetp->rx_hwgh != NULL) { 2818 /* 2819 * Quiesce the HW ring and the mac srs on the ring. Note 2820 * that the HW ring will be restarted when the pseudo ring 2821 * is started. At that time all the packets will be 2822 * directly passed up to the pseudo RX ring and handled 2823 * by mac srs created over the pseudo RX ring. 2824 */ 2825 mac_rx_client_quiesce(vnetp->hio_mch); 2826 mac_srs_perm_quiesce(vnetp->hio_mch, B_TRUE); 2827 } 2828 2829 /* 2830 * Bind the pseudo rings to the hwrings and start the hwrings. 2831 * Note we don't need to register these with the upper mac, as we have 2832 * statically exported these pseudo rxrings which are reserved for 2833 * rxrings of Hybrid resource. 2834 */ 2835 rx_grp = &vnetp->rx_grp[0]; 2836 for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) { 2837 /* Pick the rxrings reserved for Hybrid resource */ 2838 rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX]; 2839 2840 /* Store the hw ring handle */ 2841 rx_ringp->hw_rh = hw_rh[i]; 2842 2843 /* Bind the pseudo ring to the underlying hwring */ 2844 mac_hwring_setup(rx_ringp->hw_rh, 2845 (mac_resource_handle_t)rx_ringp, NULL); 2846 2847 /* Start the hwring if needed */ 2848 if (rx_ringp->state & VNET_RXRING_STARTED) { 2849 rv = mac_hwring_start(rx_ringp->hw_rh); 2850 if (rv != 0) { 2851 mac_hwring_teardown(rx_ringp->hw_rh); 2852 rx_ringp->hw_rh = NULL; 2853 goto fail; 2854 } 2855 } 2856 } 2857 2858 /* Get the list of the underlying TX rings. */ 2859 hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->tx_hwgh, hw_rh, 2860 MAC_RING_TYPE_TX); 2861 2862 /* We expect the # of hw tx rings to match VNET_NUM_HYBRID_RINGS */ 2863 if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) { 2864 cmn_err(CE_WARN, 2865 "!vnet%d: vnet_bind_hwrings: bad tx hw_ring_cnt(%d)\n", 2866 vnetp->instance, hw_ring_cnt); 2867 goto fail; 2868 } 2869 2870 /* 2871 * Now map the pseudo txrings to the hw txrings. Note we don't need 2872 * to register these with the upper mac, as we have statically exported 2873 * these rings. Note that these rings will continue to be used for LDC 2874 * resources to peer vnets and vswitch (shared ring). 2875 */ 2876 tx_grp = &vnetp->tx_grp[0]; 2877 for (i = 0; i < tx_grp->ring_cnt; i++) { 2878 tx_ringp = &tx_grp->rings[i]; 2879 tx_ringp->hw_rh = hw_rh[i]; 2880 tx_ringp->state |= VNET_TXRING_HYBRID; 2881 } 2882 tx_grp->tx_notify_handle = 2883 mac_client_tx_notify(vnetp->hio_mch, vnet_tx_ring_update, vnetp); 2884 2885 mac_perim_exit(mph1); 2886 return (0); 2887 2888 fail: 2889 mac_perim_exit(mph1); 2890 vnet_unbind_hwrings(vnetp); 2891 return (1); 2892 } 2893 2894 /* Unbind pseudo rings from hwrings */ 2895 static void 2896 vnet_unbind_hwrings(vnet_t *vnetp) 2897 { 2898 mac_perim_handle_t mph1; 2899 vnet_pseudo_rx_ring_t *rx_ringp; 2900 vnet_pseudo_rx_group_t *rx_grp; 2901 vnet_pseudo_tx_group_t *tx_grp; 2902 vnet_pseudo_tx_ring_t *tx_ringp; 2903 int i; 2904 2905 mac_perim_enter_by_mh(vnetp->hio_mh, &mph1); 2906 2907 tx_grp = &vnetp->tx_grp[0]; 2908 for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) { 2909 tx_ringp = &tx_grp->rings[i]; 2910 if (tx_ringp->state & VNET_TXRING_HYBRID) { 2911 tx_ringp->state &= ~VNET_TXRING_HYBRID; 2912 tx_ringp->hw_rh = NULL; 2913 } 2914 } 2915 (void) mac_client_tx_notify(vnetp->hio_mch, NULL, 2916 tx_grp->tx_notify_handle); 2917 2918 rx_grp = &vnetp->rx_grp[0]; 2919 for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) { 2920 rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX]; 2921 if (rx_ringp->hw_rh != NULL) { 2922 /* Stop the hwring */ 2923 mac_hwring_stop(rx_ringp->hw_rh); 2924 2925 /* Teardown the hwring */ 2926 mac_hwring_teardown(rx_ringp->hw_rh); 2927 rx_ringp->hw_rh = NULL; 2928 } 2929 } 2930 2931 if (vnetp->rx_hwgh != NULL) { 2932 vnetp->rx_hwgh = NULL; 2933 /* 2934 * First clear the permanent-quiesced flag of the RX srs then 2935 * restart the HW ring and the mac srs on the ring. 2936 */ 2937 mac_srs_perm_quiesce(vnetp->hio_mch, B_FALSE); 2938 mac_rx_client_restart(vnetp->hio_mch); 2939 } 2940 2941 mac_perim_exit(mph1); 2942 } 2943 2944 /* Bind pseudo ring to a LDC resource */ 2945 static int 2946 vnet_bind_vgenring(vnet_res_t *vresp) 2947 { 2948 vnet_t *vnetp; 2949 vnet_pseudo_rx_group_t *rx_grp; 2950 vnet_pseudo_rx_ring_t *rx_ringp; 2951 mac_perim_handle_t mph1; 2952 int rv; 2953 int type; 2954 2955 vnetp = vresp->vnetp; 2956 type = vresp->type; 2957 rx_grp = &vnetp->rx_grp[0]; 2958 2959 if (type == VIO_NET_RES_LDC_SERVICE) { 2960 /* 2961 * Ring Index 0 is the default ring in the group and is 2962 * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring 2963 * is allocated statically and is reported to the mac layer 2964 * in vnet_m_capab(). So, all we need to do here, is save a 2965 * reference to the associated vresp. 2966 */ 2967 rx_ringp = &rx_grp->rings[0]; 2968 rx_ringp->hw_rh = (mac_ring_handle_t)vresp; 2969 vresp->rx_ringp = (void *)rx_ringp; 2970 return (0); 2971 } 2972 ASSERT(type == VIO_NET_RES_LDC_GUEST); 2973 2974 mac_perim_enter_by_mh(vnetp->mh, &mph1); 2975 2976 rx_ringp = vnet_alloc_pseudo_rx_ring(vnetp); 2977 if (rx_ringp == NULL) { 2978 cmn_err(CE_WARN, "!vnet%d: Failed to allocate pseudo rx ring", 2979 vnetp->instance); 2980 goto fail; 2981 } 2982 2983 /* Store the LDC resource itself as the ring handle */ 2984 rx_ringp->hw_rh = (mac_ring_handle_t)vresp; 2985 2986 /* 2987 * Save a reference to the ring in the resource for lookup during 2988 * unbind. Note this is only done for LDC resources. We don't need this 2989 * in the case of a Hybrid resource (see vnet_bind_hwrings()), as its 2990 * rx rings are mapped to reserved pseudo rx rings (index 1 and 2). 2991 */ 2992 vresp->rx_ringp = (void *)rx_ringp; 2993 rx_ringp->state |= VNET_RXRING_LDC_GUEST; 2994 2995 /* Register the pseudo ring with upper-mac */ 2996 rv = mac_group_add_ring(rx_grp->handle, rx_ringp->index); 2997 if (rv != 0) { 2998 rx_ringp->state &= ~VNET_RXRING_LDC_GUEST; 2999 rx_ringp->hw_rh = NULL; 3000 vnet_free_pseudo_rx_ring(vnetp, rx_ringp); 3001 goto fail; 3002 } 3003 3004 mac_perim_exit(mph1); 3005 return (0); 3006 fail: 3007 mac_perim_exit(mph1); 3008 return (1); 3009 } 3010 3011 /* Unbind pseudo ring from a LDC resource */ 3012 static void 3013 vnet_unbind_vgenring(vnet_res_t *vresp) 3014 { 3015 vnet_t *vnetp; 3016 vnet_pseudo_rx_group_t *rx_grp; 3017 vnet_pseudo_rx_ring_t *rx_ringp; 3018 mac_perim_handle_t mph1; 3019 int type; 3020 3021 vnetp = vresp->vnetp; 3022 type = vresp->type; 3023 rx_grp = &vnetp->rx_grp[0]; 3024 3025 if (vresp->rx_ringp == NULL) { 3026 return; 3027 } 3028 3029 if (type == VIO_NET_RES_LDC_SERVICE) { 3030 /* 3031 * Ring Index 0 is the default ring in the group and is 3032 * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring 3033 * is allocated statically and is reported to the mac layer 3034 * in vnet_m_capab(). So, all we need to do here, is remove its 3035 * reference to the associated vresp. 3036 */ 3037 rx_ringp = &rx_grp->rings[0]; 3038 rx_ringp->hw_rh = NULL; 3039 vresp->rx_ringp = NULL; 3040 return; 3041 } 3042 ASSERT(type == VIO_NET_RES_LDC_GUEST); 3043 3044 mac_perim_enter_by_mh(vnetp->mh, &mph1); 3045 3046 rx_ringp = (vnet_pseudo_rx_ring_t *)vresp->rx_ringp; 3047 vresp->rx_ringp = NULL; 3048 3049 if (rx_ringp != NULL && (rx_ringp->state & VNET_RXRING_LDC_GUEST)) { 3050 /* Unregister the pseudo ring with upper-mac */ 3051 mac_group_rem_ring(rx_grp->handle, rx_ringp->handle); 3052 3053 rx_ringp->hw_rh = NULL; 3054 rx_ringp->state &= ~VNET_RXRING_LDC_GUEST; 3055 3056 /* Free the pseudo rx ring */ 3057 vnet_free_pseudo_rx_ring(vnetp, rx_ringp); 3058 } 3059 3060 mac_perim_exit(mph1); 3061 } 3062 3063 static void 3064 vnet_unbind_rings(vnet_res_t *vresp) 3065 { 3066 switch (vresp->type) { 3067 3068 case VIO_NET_RES_LDC_SERVICE: 3069 case VIO_NET_RES_LDC_GUEST: 3070 vnet_unbind_vgenring(vresp); 3071 break; 3072 3073 case VIO_NET_RES_HYBRID: 3074 vnet_unbind_hwrings(vresp->vnetp); 3075 break; 3076 3077 default: 3078 break; 3079 3080 } 3081 } 3082 3083 static int 3084 vnet_bind_rings(vnet_res_t *vresp) 3085 { 3086 int rv; 3087 3088 switch (vresp->type) { 3089 3090 case VIO_NET_RES_LDC_SERVICE: 3091 case VIO_NET_RES_LDC_GUEST: 3092 rv = vnet_bind_vgenring(vresp); 3093 break; 3094 3095 case VIO_NET_RES_HYBRID: 3096 rv = vnet_bind_hwrings(vresp->vnetp); 3097 break; 3098 3099 default: 3100 rv = 1; 3101 break; 3102 3103 } 3104 3105 return (rv); 3106 } 3107 3108 /* ARGSUSED */ 3109 int 3110 vnet_hio_stat(void *arg, uint_t stat, uint64_t *val) 3111 { 3112 vnet_t *vnetp = (vnet_t *)arg; 3113 3114 *val = mac_stat_get(vnetp->hio_mh, stat); 3115 return (0); 3116 } 3117 3118 /* 3119 * The start() and stop() routines for the Hybrid resource below, are just 3120 * dummy functions. This is provided to avoid resource type specific code in 3121 * vnet_start_resources() and vnet_stop_resources(). The starting and stopping 3122 * of the Hybrid resource happens in the context of the mac_client interfaces 3123 * that are invoked in vnet_hio_mac_init() and vnet_hio_mac_cleanup(). 3124 */ 3125 /* ARGSUSED */ 3126 static int 3127 vnet_hio_start(void *arg) 3128 { 3129 return (0); 3130 } 3131 3132 /* ARGSUSED */ 3133 static void 3134 vnet_hio_stop(void *arg) 3135 { 3136 } 3137 3138 mblk_t * 3139 vnet_hio_tx(void *arg, mblk_t *mp) 3140 { 3141 vnet_pseudo_tx_ring_t *tx_ringp; 3142 mblk_t *nextp; 3143 mblk_t *ret_mp; 3144 3145 tx_ringp = (vnet_pseudo_tx_ring_t *)arg; 3146 for (;;) { 3147 nextp = mp->b_next; 3148 mp->b_next = NULL; 3149 3150 ret_mp = mac_hwring_tx(tx_ringp->hw_rh, mp); 3151 if (ret_mp != NULL) { 3152 ret_mp->b_next = nextp; 3153 mp = ret_mp; 3154 break; 3155 } 3156 3157 if ((mp = nextp) == NULL) 3158 break; 3159 } 3160 return (mp); 3161 } 3162 3163 #ifdef VNET_IOC_DEBUG 3164 3165 /* 3166 * The ioctl entry point is used only for debugging for now. The ioctl commands 3167 * can be used to force the link state of the channel connected to vsw. 3168 */ 3169 static void 3170 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp) 3171 { 3172 struct iocblk *iocp; 3173 vnet_t *vnetp; 3174 3175 iocp = (struct iocblk *)(uintptr_t)mp->b_rptr; 3176 iocp->ioc_error = 0; 3177 vnetp = (vnet_t *)arg; 3178 3179 if (vnetp == NULL) { 3180 miocnak(q, mp, 0, EINVAL); 3181 return; 3182 } 3183 3184 switch (iocp->ioc_cmd) { 3185 3186 case VNET_FORCE_LINK_DOWN: 3187 case VNET_FORCE_LINK_UP: 3188 vnet_force_link_state(vnetp, q, mp); 3189 break; 3190 3191 default: 3192 iocp->ioc_error = EINVAL; 3193 miocnak(q, mp, 0, iocp->ioc_error); 3194 break; 3195 3196 } 3197 } 3198 3199 static void 3200 vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp) 3201 { 3202 mac_register_t *macp; 3203 mac_callbacks_t *cbp; 3204 vnet_res_t *vresp; 3205 3206 READ_ENTER(&vnetp->vsw_fp_rw); 3207 3208 vresp = vnetp->vsw_fp; 3209 if (vresp == NULL) { 3210 RW_EXIT(&vnetp->vsw_fp_rw); 3211 return; 3212 } 3213 3214 macp = &vresp->macreg; 3215 cbp = macp->m_callbacks; 3216 cbp->mc_ioctl(macp->m_driver, q, mp); 3217 3218 RW_EXIT(&vnetp->vsw_fp_rw); 3219 } 3220 3221 #else 3222 3223 static void 3224 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp) 3225 { 3226 vnet_t *vnetp; 3227 3228 vnetp = (vnet_t *)arg; 3229 3230 if (vnetp == NULL) { 3231 miocnak(q, mp, 0, EINVAL); 3232 return; 3233 } 3234 3235 /* ioctl support only for debugging */ 3236 miocnak(q, mp, 0, ENOTSUP); 3237 } 3238 3239 #endif 3240