1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * Copyright 2018 Joyent, Inc. 26 */ 27 28 #include <sys/types.h> 29 #include <sys/errno.h> 30 #include <sys/param.h> 31 #include <sys/callb.h> 32 #include <sys/stream.h> 33 #include <sys/kmem.h> 34 #include <sys/conf.h> 35 #include <sys/devops.h> 36 #include <sys/ksynch.h> 37 #include <sys/stat.h> 38 #include <sys/modctl.h> 39 #include <sys/modhash.h> 40 #include <sys/debug.h> 41 #include <sys/ethernet.h> 42 #include <sys/dlpi.h> 43 #include <net/if.h> 44 #include <sys/mac_provider.h> 45 #include <sys/mac_client.h> 46 #include <sys/mac_client_priv.h> 47 #include <sys/mac_ether.h> 48 #include <sys/ddi.h> 49 #include <sys/sunddi.h> 50 #include <sys/strsun.h> 51 #include <sys/note.h> 52 #include <sys/atomic.h> 53 #include <sys/vnet.h> 54 #include <sys/vlan.h> 55 #include <sys/vnet_mailbox.h> 56 #include <sys/vnet_common.h> 57 #include <sys/dds.h> 58 #include <sys/strsubr.h> 59 #include <sys/taskq.h> 60 61 /* 62 * Function prototypes. 63 */ 64 65 /* DDI entrypoints */ 66 static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 67 static int vnetattach(dev_info_t *, ddi_attach_cmd_t); 68 static int vnetdetach(dev_info_t *, ddi_detach_cmd_t); 69 70 /* MAC entrypoints */ 71 static int vnet_m_stat(void *, uint_t, uint64_t *); 72 static int vnet_m_start(void *); 73 static void vnet_m_stop(void *); 74 static int vnet_m_promisc(void *, boolean_t); 75 static int vnet_m_multicst(void *, boolean_t, const uint8_t *); 76 static int vnet_m_unicst(void *, const uint8_t *); 77 mblk_t *vnet_m_tx(void *, mblk_t *); 78 static void vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp); 79 #ifdef VNET_IOC_DEBUG 80 static void vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp); 81 #endif 82 static boolean_t vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data); 83 static void vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index, 84 const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle); 85 static void vnet_get_group(void *arg, mac_ring_type_t type, const int index, 86 mac_group_info_t *infop, mac_group_handle_t handle); 87 static int vnet_rx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num); 88 static void vnet_rx_ring_stop(mac_ring_driver_t rdriver); 89 static int vnet_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, 90 uint64_t *val); 91 static int vnet_tx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num); 92 static void vnet_tx_ring_stop(mac_ring_driver_t rdriver); 93 static int vnet_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, 94 uint64_t *val); 95 static int vnet_ring_enable_intr(void *arg); 96 static int vnet_ring_disable_intr(void *arg); 97 static mblk_t *vnet_rx_poll(void *arg, int bytes_to_pickup); 98 static int vnet_addmac(void *arg, const uint8_t *mac_addr); 99 static int vnet_remmac(void *arg, const uint8_t *mac_addr); 100 101 /* vnet internal functions */ 102 static int vnet_unattach(vnet_t *vnetp); 103 static void vnet_ring_grp_init(vnet_t *vnetp); 104 static void vnet_ring_grp_uninit(vnet_t *vnetp); 105 static int vnet_mac_register(vnet_t *); 106 static int vnet_read_mac_address(vnet_t *vnetp); 107 static int vnet_bind_vgenring(vnet_res_t *vresp); 108 static void vnet_unbind_vgenring(vnet_res_t *vresp); 109 static int vnet_bind_hwrings(vnet_t *vnetp); 110 static void vnet_unbind_hwrings(vnet_t *vnetp); 111 static int vnet_bind_rings(vnet_res_t *vresp); 112 static void vnet_unbind_rings(vnet_res_t *vresp); 113 static int vnet_hio_stat(void *, uint_t, uint64_t *); 114 static int vnet_hio_start(void *); 115 static void vnet_hio_stop(void *); 116 mblk_t *vnet_hio_tx(void *, mblk_t *); 117 118 /* Forwarding database (FDB) routines */ 119 static void vnet_fdb_create(vnet_t *vnetp); 120 static void vnet_fdb_destroy(vnet_t *vnetp); 121 static vnet_res_t *vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp); 122 static void vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val); 123 void vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp); 124 static void vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp); 125 126 static void vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp); 127 static void vnet_rx(vio_net_handle_t vrh, mblk_t *mp); 128 static void vnet_tx_update(vio_net_handle_t vrh); 129 static void vnet_res_start_task(void *arg); 130 static void vnet_start_resources(vnet_t *vnetp); 131 static void vnet_stop_resources(vnet_t *vnetp); 132 static void vnet_dispatch_res_task(vnet_t *vnetp); 133 static void vnet_res_start_task(void *arg); 134 static void vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err); 135 static void vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp); 136 static vnet_res_t *vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp); 137 static void vnet_tx_notify_thread(void *); 138 139 /* Exported to vnet_gen */ 140 int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu); 141 void vnet_link_update(vnet_t *vnetp, link_state_t link_state); 142 void vnet_dds_cleanup_hio(vnet_t *vnetp); 143 144 static kstat_t *vnet_hio_setup_kstats(char *ks_mod, char *ks_name, 145 vnet_res_t *vresp); 146 static int vnet_hio_update_kstats(kstat_t *ksp, int rw); 147 static void vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp); 148 static void vnet_hio_destroy_kstats(kstat_t *ksp); 149 150 /* Exported to to vnet_dds */ 151 int vnet_send_dds_msg(vnet_t *vnetp, void *dmsg); 152 int vnet_hio_mac_init(vnet_t *vnetp, char *ifname); 153 void vnet_hio_mac_cleanup(vnet_t *vnetp); 154 155 /* Externs that are imported from vnet_gen */ 156 extern int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip, 157 const uint8_t *macaddr, void **vgenhdl); 158 extern int vgen_init_mdeg(void *arg); 159 extern void vgen_uninit(void *arg); 160 extern int vgen_dds_tx(void *arg, void *dmsg); 161 extern int vgen_enable_intr(void *arg); 162 extern int vgen_disable_intr(void *arg); 163 extern mblk_t *vgen_rx_poll(void *arg, int bytes_to_pickup); 164 165 /* Externs that are imported from vnet_dds */ 166 extern void vdds_mod_init(void); 167 extern void vdds_mod_fini(void); 168 extern int vdds_init(vnet_t *vnetp); 169 extern void vdds_cleanup(vnet_t *vnetp); 170 extern void vdds_process_dds_msg(vnet_t *vnetp, vio_dds_msg_t *dmsg); 171 extern void vdds_cleanup_hybrid_res(void *arg); 172 extern void vdds_cleanup_hio(vnet_t *vnetp); 173 174 extern pri_t minclsyspri; 175 176 #define DRV_NAME "vnet" 177 #define VNET_FDBE_REFHOLD(p) \ 178 { \ 179 atomic_inc_32(&(p)->refcnt); \ 180 ASSERT((p)->refcnt != 0); \ 181 } 182 183 #define VNET_FDBE_REFRELE(p) \ 184 { \ 185 ASSERT((p)->refcnt != 0); \ 186 atomic_dec_32(&(p)->refcnt); \ 187 } 188 189 #ifdef VNET_IOC_DEBUG 190 #define VNET_M_CALLBACK_FLAGS (MC_IOCTL | MC_GETCAPAB) 191 #else 192 #define VNET_M_CALLBACK_FLAGS (MC_GETCAPAB) 193 #endif 194 195 static mac_callbacks_t vnet_m_callbacks = { 196 VNET_M_CALLBACK_FLAGS, 197 vnet_m_stat, 198 vnet_m_start, 199 vnet_m_stop, 200 vnet_m_promisc, 201 vnet_m_multicst, 202 NULL, /* m_unicst entry must be NULL while rx rings are exposed */ 203 NULL, /* m_tx entry must be NULL while tx rings are exposed */ 204 NULL, 205 vnet_m_ioctl, 206 vnet_m_capab, 207 NULL 208 }; 209 210 static mac_callbacks_t vnet_hio_res_callbacks = { 211 0, 212 vnet_hio_stat, 213 vnet_hio_start, 214 vnet_hio_stop, 215 NULL, 216 NULL, 217 NULL, 218 vnet_hio_tx, 219 NULL, 220 NULL, 221 NULL 222 }; 223 224 /* 225 * Linked list of "vnet_t" structures - one per instance. 226 */ 227 static vnet_t *vnet_headp = NULL; 228 static krwlock_t vnet_rw; 229 230 /* Tunables */ 231 uint32_t vnet_num_descriptors = VNET_NUM_DESCRIPTORS; 232 233 /* 234 * Configure tx serialization in mac layer for the vnet device. This tunable 235 * should be enabled to improve performance only if HybridIO is configured for 236 * the vnet device. 237 */ 238 boolean_t vnet_mac_tx_serialize = B_FALSE; 239 240 /* Configure enqueing at Rx soft rings in mac layer for the vnet device */ 241 boolean_t vnet_mac_rx_queuing = B_TRUE; 242 243 /* 244 * Set this to non-zero to enable additional internal receive buffer pools 245 * based on the MTU of the device for better performance at the cost of more 246 * memory consumption. This is turned off by default, to use allocb(9F) for 247 * receive buffer allocations of sizes > 2K. 248 */ 249 boolean_t vnet_jumbo_rxpools = B_FALSE; 250 251 /* # of chains in fdb hash table */ 252 uint32_t vnet_fdb_nchains = VNET_NFDB_HASH; 253 254 /* Internal tunables */ 255 uint32_t vnet_ethermtu = 1500; /* mtu of the device */ 256 257 /* 258 * Default vlan id. This is only used internally when the "default-vlan-id" 259 * property is not present in the MD device node. Therefore, this should not be 260 * used as a tunable; if this value is changed, the corresponding variable 261 * should be updated to the same value in vsw and also other vnets connected to 262 * the same vsw. 263 */ 264 uint16_t vnet_default_vlan_id = 1; 265 266 /* delay in usec to wait for all references on a fdb entry to be dropped */ 267 uint32_t vnet_fdbe_refcnt_delay = 10; 268 269 static struct ether_addr etherbroadcastaddr = { 270 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 271 }; 272 273 /* mac_open() retry delay in usec */ 274 uint32_t vnet_mac_open_delay = 100; /* 0.1 ms */ 275 276 /* max # of mac_open() retries */ 277 uint32_t vnet_mac_open_retries = 100; 278 279 /* 280 * Property names 281 */ 282 static char macaddr_propname[] = "local-mac-address"; 283 284 /* 285 * This is the string displayed by modinfo(1m). 286 */ 287 static char vnet_ident[] = "vnet driver"; 288 extern struct mod_ops mod_driverops; 289 static struct cb_ops cb_vnetops = { 290 nulldev, /* cb_open */ 291 nulldev, /* cb_close */ 292 nodev, /* cb_strategy */ 293 nodev, /* cb_print */ 294 nodev, /* cb_dump */ 295 nodev, /* cb_read */ 296 nodev, /* cb_write */ 297 nodev, /* cb_ioctl */ 298 nodev, /* cb_devmap */ 299 nodev, /* cb_mmap */ 300 nodev, /* cb_segmap */ 301 nochpoll, /* cb_chpoll */ 302 ddi_prop_op, /* cb_prop_op */ 303 NULL, /* cb_stream */ 304 (int)(D_MP) /* cb_flag */ 305 }; 306 307 static struct dev_ops vnetops = { 308 DEVO_REV, /* devo_rev */ 309 0, /* devo_refcnt */ 310 NULL, /* devo_getinfo */ 311 nulldev, /* devo_identify */ 312 nulldev, /* devo_probe */ 313 vnetattach, /* devo_attach */ 314 vnetdetach, /* devo_detach */ 315 nodev, /* devo_reset */ 316 &cb_vnetops, /* devo_cb_ops */ 317 (struct bus_ops *)NULL, /* devo_bus_ops */ 318 NULL, /* devo_power */ 319 ddi_quiesce_not_supported, /* devo_quiesce */ 320 }; 321 322 static struct modldrv modldrv = { 323 &mod_driverops, /* Type of module. This one is a driver */ 324 vnet_ident, /* ID string */ 325 &vnetops /* driver specific ops */ 326 }; 327 328 static struct modlinkage modlinkage = { 329 MODREV_1, (void *)&modldrv, NULL 330 }; 331 332 #ifdef DEBUG 333 334 #define DEBUG_PRINTF debug_printf 335 336 /* 337 * Print debug messages - set to 0xf to enable all msgs 338 */ 339 int vnet_dbglevel = 0x8; 340 341 static void 342 debug_printf(const char *fname, void *arg, const char *fmt, ...) 343 { 344 char buf[512]; 345 va_list ap; 346 vnet_t *vnetp = (vnet_t *)arg; 347 char *bufp = buf; 348 349 if (vnetp == NULL) { 350 (void) sprintf(bufp, "%s: ", fname); 351 bufp += strlen(bufp); 352 } else { 353 (void) sprintf(bufp, "vnet%d:%s: ", vnetp->instance, fname); 354 bufp += strlen(bufp); 355 } 356 va_start(ap, fmt); 357 (void) vsprintf(bufp, fmt, ap); 358 va_end(ap); 359 cmn_err(CE_CONT, "%s\n", buf); 360 } 361 362 #endif 363 364 /* _init(9E): initialize the loadable module */ 365 int 366 _init(void) 367 { 368 int status; 369 370 DBG1(NULL, "enter\n"); 371 372 mac_init_ops(&vnetops, "vnet"); 373 status = mod_install(&modlinkage); 374 if (status != 0) { 375 mac_fini_ops(&vnetops); 376 } 377 vdds_mod_init(); 378 DBG1(NULL, "exit(%d)\n", status); 379 return (status); 380 } 381 382 /* _fini(9E): prepare the module for unloading. */ 383 int 384 _fini(void) 385 { 386 int status; 387 388 DBG1(NULL, "enter\n"); 389 390 status = mod_remove(&modlinkage); 391 if (status != 0) 392 return (status); 393 mac_fini_ops(&vnetops); 394 vdds_mod_fini(); 395 396 DBG1(NULL, "exit(%d)\n", status); 397 return (status); 398 } 399 400 /* _info(9E): return information about the loadable module */ 401 int 402 _info(struct modinfo *modinfop) 403 { 404 return (mod_info(&modlinkage, modinfop)); 405 } 406 407 /* 408 * attach(9E): attach a device to the system. 409 * called once for each instance of the device on the system. 410 */ 411 static int 412 vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd) 413 { 414 vnet_t *vnetp; 415 int status; 416 int instance; 417 uint64_t reg; 418 char qname[TASKQ_NAMELEN]; 419 vnet_attach_progress_t attach_progress; 420 421 attach_progress = AST_init; 422 423 switch (cmd) { 424 case DDI_ATTACH: 425 break; 426 case DDI_RESUME: 427 case DDI_PM_RESUME: 428 default: 429 goto vnet_attach_fail; 430 } 431 432 instance = ddi_get_instance(dip); 433 DBG1(NULL, "instance(%d) enter\n", instance); 434 435 /* allocate vnet_t and mac_t structures */ 436 vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP); 437 vnetp->dip = dip; 438 vnetp->instance = instance; 439 rw_init(&vnetp->vrwlock, NULL, RW_DRIVER, NULL); 440 rw_init(&vnetp->vsw_fp_rw, NULL, RW_DRIVER, NULL); 441 attach_progress |= AST_vnet_alloc; 442 443 vnet_ring_grp_init(vnetp); 444 attach_progress |= AST_ring_init; 445 446 status = vdds_init(vnetp); 447 if (status != 0) { 448 goto vnet_attach_fail; 449 } 450 attach_progress |= AST_vdds_init; 451 452 /* setup links to vnet_t from both devinfo and mac_t */ 453 ddi_set_driver_private(dip, (caddr_t)vnetp); 454 455 /* read the mac address */ 456 status = vnet_read_mac_address(vnetp); 457 if (status != DDI_SUCCESS) { 458 goto vnet_attach_fail; 459 } 460 attach_progress |= AST_read_macaddr; 461 462 reg = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 463 DDI_PROP_DONTPASS, "reg", -1); 464 if (reg == -1) { 465 goto vnet_attach_fail; 466 } 467 vnetp->reg = reg; 468 469 vnet_fdb_create(vnetp); 470 attach_progress |= AST_fdbh_alloc; 471 472 (void) snprintf(qname, TASKQ_NAMELEN, "vres_taskq%d", instance); 473 if ((vnetp->taskqp = ddi_taskq_create(dip, qname, 1, 474 TASKQ_DEFAULTPRI, 0)) == NULL) { 475 cmn_err(CE_WARN, "!vnet%d: Unable to create task queue", 476 instance); 477 goto vnet_attach_fail; 478 } 479 attach_progress |= AST_taskq_create; 480 481 /* add to the list of vnet devices */ 482 WRITE_ENTER(&vnet_rw); 483 vnetp->nextp = vnet_headp; 484 vnet_headp = vnetp; 485 RW_EXIT(&vnet_rw); 486 487 attach_progress |= AST_vnet_list; 488 489 /* 490 * Initialize the generic vnet plugin which provides communication via 491 * sun4v LDC (logical domain channel) based resources. This involves 2 492 * steps; first, vgen_init() is invoked to read the various properties 493 * of the vnet device from its MD node (including its mtu which is 494 * needed to mac_register()) and obtain a handle to the vgen layer. 495 * After mac_register() is done and we have a mac handle, we then 496 * invoke vgen_init_mdeg() which registers with the the MD event 497 * generator (mdeg) framework to allow LDC resource notifications. 498 * Note: this sequence also allows us to report the correct default # 499 * of pseudo rings (2TX and 3RX) in vnet_m_capab() which gets invoked 500 * in the context of mac_register(); and avoids conflicting with 501 * dynamic pseudo rx rings which get added/removed as a result of mdeg 502 * events in vgen. 503 */ 504 status = vgen_init(vnetp, reg, vnetp->dip, 505 (uint8_t *)vnetp->curr_macaddr, &vnetp->vgenhdl); 506 if (status != DDI_SUCCESS) { 507 DERR(vnetp, "vgen_init() failed\n"); 508 goto vnet_attach_fail; 509 } 510 attach_progress |= AST_vgen_init; 511 512 status = vnet_mac_register(vnetp); 513 if (status != DDI_SUCCESS) { 514 goto vnet_attach_fail; 515 } 516 vnetp->link_state = LINK_STATE_UNKNOWN; 517 attach_progress |= AST_macreg; 518 519 status = vgen_init_mdeg(vnetp->vgenhdl); 520 if (status != DDI_SUCCESS) { 521 goto vnet_attach_fail; 522 } 523 attach_progress |= AST_init_mdeg; 524 525 vnetp->attach_progress = attach_progress; 526 527 DBG1(NULL, "instance(%d) exit\n", instance); 528 return (DDI_SUCCESS); 529 530 vnet_attach_fail: 531 vnetp->attach_progress = attach_progress; 532 status = vnet_unattach(vnetp); 533 ASSERT(status == 0); 534 return (DDI_FAILURE); 535 } 536 537 /* 538 * detach(9E): detach a device from the system. 539 */ 540 static int 541 vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd) 542 { 543 vnet_t *vnetp; 544 int instance; 545 546 instance = ddi_get_instance(dip); 547 DBG1(NULL, "instance(%d) enter\n", instance); 548 549 vnetp = ddi_get_driver_private(dip); 550 if (vnetp == NULL) { 551 goto vnet_detach_fail; 552 } 553 554 switch (cmd) { 555 case DDI_DETACH: 556 break; 557 case DDI_SUSPEND: 558 case DDI_PM_SUSPEND: 559 default: 560 goto vnet_detach_fail; 561 } 562 563 if (vnet_unattach(vnetp) != 0) { 564 goto vnet_detach_fail; 565 } 566 567 return (DDI_SUCCESS); 568 569 vnet_detach_fail: 570 return (DDI_FAILURE); 571 } 572 573 /* 574 * Common routine to handle vnetattach() failure and vnetdetach(). Note that 575 * the only reason this function could fail is if mac_unregister() fails. 576 * Otherwise, this function must ensure that all resources are freed and return 577 * success. 578 */ 579 static int 580 vnet_unattach(vnet_t *vnetp) 581 { 582 vnet_attach_progress_t attach_progress; 583 584 attach_progress = vnetp->attach_progress; 585 586 /* 587 * Disable the mac device in the gldv3 subsystem. This can fail, in 588 * particular if there are still any open references to this mac 589 * device; in which case we just return failure without continuing to 590 * detach further. 591 * If it succeeds, we then invoke vgen_uninit() which should unregister 592 * any pseudo rings registered with the mac layer. Note we keep the 593 * AST_macreg flag on, so we can unregister with the mac layer at 594 * the end of this routine. 595 */ 596 if (attach_progress & AST_macreg) { 597 if (mac_disable(vnetp->mh) != 0) { 598 return (1); 599 } 600 } 601 602 /* 603 * Now that we have disabled the device, we must finish all other steps 604 * and successfully return from this function; otherwise we will end up 605 * leaving the device in a broken/unusable state. 606 * 607 * First, release any hybrid resources assigned to this vnet device. 608 */ 609 if (attach_progress & AST_vdds_init) { 610 vdds_cleanup(vnetp); 611 attach_progress &= ~AST_vdds_init; 612 } 613 614 /* 615 * Uninit vgen. This stops further mdeg callbacks to this vnet 616 * device and/or its ports; and detaches any existing ports. 617 */ 618 if (attach_progress & (AST_vgen_init|AST_init_mdeg)) { 619 vgen_uninit(vnetp->vgenhdl); 620 attach_progress &= ~AST_vgen_init; 621 attach_progress &= ~AST_init_mdeg; 622 } 623 624 /* Destroy the taskq. */ 625 if (attach_progress & AST_taskq_create) { 626 ddi_taskq_destroy(vnetp->taskqp); 627 attach_progress &= ~AST_taskq_create; 628 } 629 630 /* Destroy fdb. */ 631 if (attach_progress & AST_fdbh_alloc) { 632 vnet_fdb_destroy(vnetp); 633 attach_progress &= ~AST_fdbh_alloc; 634 } 635 636 /* Remove from the device list */ 637 if (attach_progress & AST_vnet_list) { 638 vnet_t **vnetpp; 639 /* unlink from instance(vnet_t) list */ 640 WRITE_ENTER(&vnet_rw); 641 for (vnetpp = &vnet_headp; *vnetpp; 642 vnetpp = &(*vnetpp)->nextp) { 643 if (*vnetpp == vnetp) { 644 *vnetpp = vnetp->nextp; 645 break; 646 } 647 } 648 RW_EXIT(&vnet_rw); 649 attach_progress &= ~AST_vnet_list; 650 } 651 652 if (attach_progress & AST_ring_init) { 653 vnet_ring_grp_uninit(vnetp); 654 attach_progress &= ~AST_ring_init; 655 } 656 657 if (attach_progress & AST_macreg) { 658 VERIFY(mac_unregister(vnetp->mh) == 0); 659 vnetp->mh = NULL; 660 attach_progress &= ~AST_macreg; 661 } 662 663 if (attach_progress & AST_vnet_alloc) { 664 rw_destroy(&vnetp->vrwlock); 665 rw_destroy(&vnetp->vsw_fp_rw); 666 attach_progress &= ~AST_vnet_list; 667 KMEM_FREE(vnetp); 668 } 669 670 return (0); 671 } 672 673 /* enable the device for transmit/receive */ 674 static int 675 vnet_m_start(void *arg) 676 { 677 vnet_t *vnetp = arg; 678 679 DBG1(vnetp, "enter\n"); 680 681 WRITE_ENTER(&vnetp->vrwlock); 682 vnetp->flags |= VNET_STARTED; 683 vnet_start_resources(vnetp); 684 RW_EXIT(&vnetp->vrwlock); 685 686 DBG1(vnetp, "exit\n"); 687 return (VNET_SUCCESS); 688 689 } 690 691 /* stop transmit/receive for the device */ 692 static void 693 vnet_m_stop(void *arg) 694 { 695 vnet_t *vnetp = arg; 696 697 DBG1(vnetp, "enter\n"); 698 699 WRITE_ENTER(&vnetp->vrwlock); 700 if (vnetp->flags & VNET_STARTED) { 701 /* 702 * Set the flags appropriately; this should prevent starting of 703 * any new resources that are added(see vnet_res_start_task()), 704 * while we release the vrwlock in vnet_stop_resources() before 705 * stopping each resource. 706 */ 707 vnetp->flags &= ~VNET_STARTED; 708 vnetp->flags |= VNET_STOPPING; 709 vnet_stop_resources(vnetp); 710 vnetp->flags &= ~VNET_STOPPING; 711 } 712 RW_EXIT(&vnetp->vrwlock); 713 714 DBG1(vnetp, "exit\n"); 715 } 716 717 /* set the unicast mac address of the device */ 718 static int 719 vnet_m_unicst(void *arg, const uint8_t *macaddr) 720 { 721 _NOTE(ARGUNUSED(macaddr)) 722 723 vnet_t *vnetp = arg; 724 725 DBG1(vnetp, "enter\n"); 726 /* 727 * NOTE: setting mac address dynamically is not supported. 728 */ 729 DBG1(vnetp, "exit\n"); 730 731 return (VNET_FAILURE); 732 } 733 734 /* enable/disable a multicast address */ 735 static int 736 vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca) 737 { 738 _NOTE(ARGUNUSED(add, mca)) 739 740 vnet_t *vnetp = arg; 741 vnet_res_t *vresp; 742 mac_register_t *macp; 743 mac_callbacks_t *cbp; 744 int rv = VNET_SUCCESS; 745 746 DBG1(vnetp, "enter\n"); 747 748 READ_ENTER(&vnetp->vsw_fp_rw); 749 if (vnetp->vsw_fp == NULL) { 750 RW_EXIT(&vnetp->vsw_fp_rw); 751 return (EAGAIN); 752 } 753 VNET_FDBE_REFHOLD(vnetp->vsw_fp); 754 RW_EXIT(&vnetp->vsw_fp_rw); 755 756 vresp = vnetp->vsw_fp; 757 macp = &vresp->macreg; 758 cbp = macp->m_callbacks; 759 rv = cbp->mc_multicst(macp->m_driver, add, mca); 760 761 VNET_FDBE_REFRELE(vnetp->vsw_fp); 762 763 DBG1(vnetp, "exit(%d)\n", rv); 764 return (rv); 765 } 766 767 /* set or clear promiscuous mode on the device */ 768 static int 769 vnet_m_promisc(void *arg, boolean_t on) 770 { 771 _NOTE(ARGUNUSED(on)) 772 773 vnet_t *vnetp = arg; 774 DBG1(vnetp, "enter\n"); 775 /* 776 * NOTE: setting promiscuous mode is not supported, just return success. 777 */ 778 DBG1(vnetp, "exit\n"); 779 return (VNET_SUCCESS); 780 } 781 782 /* 783 * Transmit a chain of packets. This function provides switching functionality 784 * based on the destination mac address to reach other guests (within ldoms) or 785 * external hosts. 786 */ 787 mblk_t * 788 vnet_tx_ring_send(void *arg, mblk_t *mp) 789 { 790 vnet_pseudo_tx_ring_t *tx_ringp; 791 vnet_tx_ring_stats_t *statsp; 792 vnet_t *vnetp; 793 vnet_res_t *vresp; 794 mblk_t *next; 795 mblk_t *resid_mp; 796 mac_register_t *macp; 797 struct ether_header *ehp; 798 boolean_t is_unicast; 799 boolean_t is_pvid; /* non-default pvid ? */ 800 boolean_t hres; /* Hybrid resource ? */ 801 void *tx_arg; 802 size_t size; 803 804 tx_ringp = (vnet_pseudo_tx_ring_t *)arg; 805 statsp = &tx_ringp->tx_ring_stats; 806 vnetp = (vnet_t *)tx_ringp->vnetp; 807 DBG1(vnetp, "enter\n"); 808 ASSERT(mp != NULL); 809 810 is_pvid = (vnetp->pvid != vnetp->default_vlan_id) ? B_TRUE : B_FALSE; 811 812 while (mp != NULL) { 813 814 next = mp->b_next; 815 mp->b_next = NULL; 816 817 /* update stats */ 818 size = msgsize(mp); 819 820 /* 821 * Find fdb entry for the destination 822 * and hold a reference to it. 823 */ 824 ehp = (struct ether_header *)mp->b_rptr; 825 vresp = vnet_fdbe_find(vnetp, &ehp->ether_dhost); 826 if (vresp != NULL) { 827 828 /* 829 * Destination found in FDB. 830 * The destination is a vnet device within ldoms 831 * and directly reachable, invoke the tx function 832 * in the fdb entry. 833 */ 834 macp = &vresp->macreg; 835 resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp); 836 837 /* tx done; now release ref on fdb entry */ 838 VNET_FDBE_REFRELE(vresp); 839 840 if (resid_mp != NULL) { 841 /* m_tx failed */ 842 mp->b_next = next; 843 break; 844 } 845 } else { 846 is_unicast = !(IS_BROADCAST(ehp) || 847 (IS_MULTICAST(ehp))); 848 /* 849 * Destination is not in FDB. 850 * If the destination is broadcast or multicast, 851 * then forward the packet to vswitch. 852 * If a Hybrid resource avilable, then send the 853 * unicast packet via hybrid resource, otherwise 854 * forward it to vswitch. 855 */ 856 READ_ENTER(&vnetp->vsw_fp_rw); 857 858 if ((is_unicast) && (vnetp->hio_fp != NULL)) { 859 vresp = vnetp->hio_fp; 860 hres = B_TRUE; 861 } else { 862 vresp = vnetp->vsw_fp; 863 hres = B_FALSE; 864 } 865 if (vresp == NULL) { 866 /* 867 * no fdb entry to vsw? drop the packet. 868 */ 869 RW_EXIT(&vnetp->vsw_fp_rw); 870 freemsg(mp); 871 mp = next; 872 continue; 873 } 874 875 /* ref hold the fdb entry to vsw */ 876 VNET_FDBE_REFHOLD(vresp); 877 878 RW_EXIT(&vnetp->vsw_fp_rw); 879 880 /* 881 * In the case of a hybrid resource we need to insert 882 * the tag for the pvid case here; unlike packets that 883 * are destined to a vnet/vsw in which case the vgen 884 * layer does the tagging before sending it over ldc. 885 */ 886 if (hres == B_TRUE) { 887 /* 888 * Determine if the frame being transmitted 889 * over the hybrid resource is untagged. If so, 890 * insert the tag before transmitting. 891 */ 892 if (is_pvid == B_TRUE && 893 ehp->ether_type != htons(ETHERTYPE_VLAN)) { 894 895 mp = vnet_vlan_insert_tag(mp, 896 vnetp->pvid); 897 if (mp == NULL) { 898 VNET_FDBE_REFRELE(vresp); 899 mp = next; 900 continue; 901 } 902 903 } 904 905 macp = &vresp->macreg; 906 tx_arg = tx_ringp; 907 } else { 908 macp = &vresp->macreg; 909 tx_arg = macp->m_driver; 910 } 911 resid_mp = macp->m_callbacks->mc_tx(tx_arg, mp); 912 913 /* tx done; now release ref on fdb entry */ 914 VNET_FDBE_REFRELE(vresp); 915 916 if (resid_mp != NULL) { 917 /* m_tx failed */ 918 mp->b_next = next; 919 break; 920 } 921 } 922 923 statsp->obytes += size; 924 statsp->opackets++; 925 mp = next; 926 } 927 928 DBG1(vnetp, "exit\n"); 929 return (mp); 930 } 931 932 /* get statistics from the device */ 933 int 934 vnet_m_stat(void *arg, uint_t stat, uint64_t *val) 935 { 936 vnet_t *vnetp = arg; 937 vnet_res_t *vresp; 938 mac_register_t *macp; 939 mac_callbacks_t *cbp; 940 uint64_t val_total = 0; 941 942 DBG1(vnetp, "enter\n"); 943 944 /* 945 * get the specified statistic from each transport and return the 946 * aggregate val. This obviously only works for counters. 947 */ 948 if ((IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat)) || 949 (IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat))) { 950 return (ENOTSUP); 951 } 952 953 READ_ENTER(&vnetp->vrwlock); 954 for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) { 955 macp = &vresp->macreg; 956 cbp = macp->m_callbacks; 957 if (cbp->mc_getstat(macp->m_driver, stat, val) == 0) 958 val_total += *val; 959 } 960 RW_EXIT(&vnetp->vrwlock); 961 962 *val = val_total; 963 964 DBG1(vnetp, "exit\n"); 965 return (0); 966 } 967 968 static void 969 vnet_ring_grp_init(vnet_t *vnetp) 970 { 971 vnet_pseudo_rx_group_t *rx_grp; 972 vnet_pseudo_rx_ring_t *rx_ringp; 973 vnet_pseudo_tx_group_t *tx_grp; 974 vnet_pseudo_tx_ring_t *tx_ringp; 975 int i; 976 977 tx_grp = &vnetp->tx_grp[0]; 978 tx_ringp = kmem_zalloc(sizeof (vnet_pseudo_tx_ring_t) * 979 VNET_NUM_PSEUDO_TXRINGS, KM_SLEEP); 980 for (i = 0; i < VNET_NUM_PSEUDO_TXRINGS; i++) { 981 tx_ringp[i].state |= VNET_TXRING_SHARED; 982 } 983 tx_grp->rings = tx_ringp; 984 tx_grp->ring_cnt = VNET_NUM_PSEUDO_TXRINGS; 985 mutex_init(&tx_grp->flowctl_lock, NULL, MUTEX_DRIVER, NULL); 986 cv_init(&tx_grp->flowctl_cv, NULL, CV_DRIVER, NULL); 987 tx_grp->flowctl_thread = thread_create(NULL, 0, 988 vnet_tx_notify_thread, tx_grp, 0, &p0, TS_RUN, minclsyspri); 989 990 rx_grp = &vnetp->rx_grp[0]; 991 rx_grp->max_ring_cnt = MAX_RINGS_PER_GROUP; 992 rw_init(&rx_grp->lock, NULL, RW_DRIVER, NULL); 993 rx_ringp = kmem_zalloc(sizeof (vnet_pseudo_rx_ring_t) * 994 rx_grp->max_ring_cnt, KM_SLEEP); 995 996 /* 997 * Setup the first 3 Pseudo RX Rings that are reserved; 998 * 1 for LDC resource to vswitch + 2 for RX rings of Hybrid resource. 999 */ 1000 rx_ringp[0].state |= VNET_RXRING_INUSE|VNET_RXRING_LDC_SERVICE; 1001 rx_ringp[0].index = 0; 1002 rx_ringp[1].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID; 1003 rx_ringp[1].index = 1; 1004 rx_ringp[2].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID; 1005 rx_ringp[2].index = 2; 1006 1007 rx_grp->ring_cnt = VNET_NUM_PSEUDO_RXRINGS_DEFAULT; 1008 rx_grp->rings = rx_ringp; 1009 1010 for (i = VNET_NUM_PSEUDO_RXRINGS_DEFAULT; 1011 i < rx_grp->max_ring_cnt; i++) { 1012 rx_ringp = &rx_grp->rings[i]; 1013 rx_ringp->state = VNET_RXRING_FREE; 1014 rx_ringp->index = i; 1015 } 1016 } 1017 1018 static void 1019 vnet_ring_grp_uninit(vnet_t *vnetp) 1020 { 1021 vnet_pseudo_rx_group_t *rx_grp; 1022 vnet_pseudo_tx_group_t *tx_grp; 1023 kt_did_t tid = 0; 1024 1025 tx_grp = &vnetp->tx_grp[0]; 1026 1027 /* Inform tx_notify_thread to exit */ 1028 mutex_enter(&tx_grp->flowctl_lock); 1029 if (tx_grp->flowctl_thread != NULL) { 1030 tid = tx_grp->flowctl_thread->t_did; 1031 tx_grp->flowctl_done = B_TRUE; 1032 cv_signal(&tx_grp->flowctl_cv); 1033 } 1034 mutex_exit(&tx_grp->flowctl_lock); 1035 if (tid != 0) 1036 thread_join(tid); 1037 1038 if (tx_grp->rings != NULL) { 1039 ASSERT(tx_grp->ring_cnt == VNET_NUM_PSEUDO_TXRINGS); 1040 kmem_free(tx_grp->rings, sizeof (vnet_pseudo_tx_ring_t) * 1041 tx_grp->ring_cnt); 1042 tx_grp->rings = NULL; 1043 } 1044 1045 rx_grp = &vnetp->rx_grp[0]; 1046 if (rx_grp->rings != NULL) { 1047 ASSERT(rx_grp->max_ring_cnt == MAX_RINGS_PER_GROUP); 1048 ASSERT(rx_grp->ring_cnt == VNET_NUM_PSEUDO_RXRINGS_DEFAULT); 1049 kmem_free(rx_grp->rings, sizeof (vnet_pseudo_rx_ring_t) * 1050 rx_grp->max_ring_cnt); 1051 rx_grp->rings = NULL; 1052 } 1053 } 1054 1055 static vnet_pseudo_rx_ring_t * 1056 vnet_alloc_pseudo_rx_ring(vnet_t *vnetp) 1057 { 1058 vnet_pseudo_rx_group_t *rx_grp; 1059 vnet_pseudo_rx_ring_t *rx_ringp; 1060 int index; 1061 1062 rx_grp = &vnetp->rx_grp[0]; 1063 WRITE_ENTER(&rx_grp->lock); 1064 1065 if (rx_grp->ring_cnt == rx_grp->max_ring_cnt) { 1066 /* no rings available */ 1067 RW_EXIT(&rx_grp->lock); 1068 return (NULL); 1069 } 1070 1071 for (index = VNET_NUM_PSEUDO_RXRINGS_DEFAULT; 1072 index < rx_grp->max_ring_cnt; index++) { 1073 rx_ringp = &rx_grp->rings[index]; 1074 if (rx_ringp->state == VNET_RXRING_FREE) { 1075 rx_ringp->state |= VNET_RXRING_INUSE; 1076 rx_grp->ring_cnt++; 1077 break; 1078 } 1079 } 1080 1081 RW_EXIT(&rx_grp->lock); 1082 return (rx_ringp); 1083 } 1084 1085 static void 1086 vnet_free_pseudo_rx_ring(vnet_t *vnetp, vnet_pseudo_rx_ring_t *ringp) 1087 { 1088 vnet_pseudo_rx_group_t *rx_grp; 1089 1090 ASSERT(ringp->index >= VNET_NUM_PSEUDO_RXRINGS_DEFAULT); 1091 rx_grp = &vnetp->rx_grp[0]; 1092 WRITE_ENTER(&rx_grp->lock); 1093 1094 if (ringp->state != VNET_RXRING_FREE) { 1095 ringp->state = VNET_RXRING_FREE; 1096 ringp->handle = NULL; 1097 rx_grp->ring_cnt--; 1098 } 1099 1100 RW_EXIT(&rx_grp->lock); 1101 } 1102 1103 /* wrapper function for mac_register() */ 1104 static int 1105 vnet_mac_register(vnet_t *vnetp) 1106 { 1107 mac_register_t *macp; 1108 int err; 1109 1110 if ((macp = mac_alloc(MAC_VERSION)) == NULL) 1111 return (DDI_FAILURE); 1112 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 1113 macp->m_driver = vnetp; 1114 macp->m_dip = vnetp->dip; 1115 macp->m_src_addr = vnetp->curr_macaddr; 1116 macp->m_callbacks = &vnet_m_callbacks; 1117 macp->m_min_sdu = 0; 1118 macp->m_max_sdu = vnetp->mtu; 1119 macp->m_margin = VLAN_TAGSZ; 1120 1121 macp->m_v12n = MAC_VIRT_LEVEL1; 1122 1123 /* 1124 * Finally, we're ready to register ourselves with the MAC layer 1125 * interface; if this succeeds, we're all ready to start() 1126 */ 1127 err = mac_register(macp, &vnetp->mh); 1128 mac_free(macp); 1129 return (err == 0 ? DDI_SUCCESS : DDI_FAILURE); 1130 } 1131 1132 /* read the mac address of the device */ 1133 static int 1134 vnet_read_mac_address(vnet_t *vnetp) 1135 { 1136 uchar_t *macaddr; 1137 uint32_t size; 1138 int rv; 1139 1140 rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip, 1141 DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size); 1142 if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) { 1143 DWARN(vnetp, "prop_lookup failed(%s) err(%d)\n", 1144 macaddr_propname, rv); 1145 return (DDI_FAILURE); 1146 } 1147 bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL); 1148 bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL); 1149 ddi_prop_free(macaddr); 1150 1151 return (DDI_SUCCESS); 1152 } 1153 1154 static void 1155 vnet_fdb_create(vnet_t *vnetp) 1156 { 1157 char hashname[MAXNAMELEN]; 1158 1159 (void) snprintf(hashname, MAXNAMELEN, "vnet%d-fdbhash", 1160 vnetp->instance); 1161 vnetp->fdb_nchains = vnet_fdb_nchains; 1162 vnetp->fdb_hashp = mod_hash_create_ptrhash(hashname, vnetp->fdb_nchains, 1163 mod_hash_null_valdtor, sizeof (void *)); 1164 } 1165 1166 static void 1167 vnet_fdb_destroy(vnet_t *vnetp) 1168 { 1169 /* destroy fdb-hash-table */ 1170 if (vnetp->fdb_hashp != NULL) { 1171 mod_hash_destroy_hash(vnetp->fdb_hashp); 1172 vnetp->fdb_hashp = NULL; 1173 vnetp->fdb_nchains = 0; 1174 } 1175 } 1176 1177 /* 1178 * Add an entry into the fdb. 1179 */ 1180 void 1181 vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp) 1182 { 1183 uint64_t addr = 0; 1184 int rv; 1185 1186 KEY_HASH(addr, vresp->rem_macaddr); 1187 1188 /* 1189 * If the entry being added corresponds to LDC_SERVICE resource, 1190 * that is, vswitch connection, it is added to the hash and also 1191 * the entry is cached, an additional reference count reflects 1192 * this. The HYBRID resource is not added to the hash, but only 1193 * cached, as it is only used for sending out packets for unknown 1194 * unicast destinations. 1195 */ 1196 (vresp->type == VIO_NET_RES_LDC_SERVICE) ? 1197 (vresp->refcnt = 1) : (vresp->refcnt = 0); 1198 1199 /* 1200 * Note: duplicate keys will be rejected by mod_hash. 1201 */ 1202 if (vresp->type != VIO_NET_RES_HYBRID) { 1203 rv = mod_hash_insert(vnetp->fdb_hashp, (mod_hash_key_t)addr, 1204 (mod_hash_val_t)vresp); 1205 if (rv != 0) { 1206 DWARN(vnetp, "Duplicate macaddr key(%lx)\n", addr); 1207 return; 1208 } 1209 } 1210 1211 if (vresp->type == VIO_NET_RES_LDC_SERVICE) { 1212 /* Cache the fdb entry to vsw-port */ 1213 WRITE_ENTER(&vnetp->vsw_fp_rw); 1214 if (vnetp->vsw_fp == NULL) 1215 vnetp->vsw_fp = vresp; 1216 RW_EXIT(&vnetp->vsw_fp_rw); 1217 } else if (vresp->type == VIO_NET_RES_HYBRID) { 1218 /* Cache the fdb entry to hybrid resource */ 1219 WRITE_ENTER(&vnetp->vsw_fp_rw); 1220 if (vnetp->hio_fp == NULL) 1221 vnetp->hio_fp = vresp; 1222 RW_EXIT(&vnetp->vsw_fp_rw); 1223 } 1224 } 1225 1226 /* 1227 * Remove an entry from fdb. 1228 */ 1229 static void 1230 vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp) 1231 { 1232 uint64_t addr = 0; 1233 int rv; 1234 uint32_t refcnt; 1235 vnet_res_t *tmp; 1236 1237 KEY_HASH(addr, vresp->rem_macaddr); 1238 1239 /* 1240 * Remove the entry from fdb hash table. 1241 * This prevents further references to this fdb entry. 1242 */ 1243 if (vresp->type != VIO_NET_RES_HYBRID) { 1244 rv = mod_hash_remove(vnetp->fdb_hashp, (mod_hash_key_t)addr, 1245 (mod_hash_val_t *)&tmp); 1246 if (rv != 0) { 1247 /* 1248 * As the resources are added to the hash only 1249 * after they are started, this can occur if 1250 * a resource unregisters before it is ever started. 1251 */ 1252 return; 1253 } 1254 } 1255 1256 if (vresp->type == VIO_NET_RES_LDC_SERVICE) { 1257 WRITE_ENTER(&vnetp->vsw_fp_rw); 1258 1259 ASSERT(tmp == vnetp->vsw_fp); 1260 vnetp->vsw_fp = NULL; 1261 1262 RW_EXIT(&vnetp->vsw_fp_rw); 1263 } else if (vresp->type == VIO_NET_RES_HYBRID) { 1264 WRITE_ENTER(&vnetp->vsw_fp_rw); 1265 1266 vnetp->hio_fp = NULL; 1267 1268 RW_EXIT(&vnetp->vsw_fp_rw); 1269 } 1270 1271 /* 1272 * If there are threads already ref holding before the entry was 1273 * removed from hash table, then wait for ref count to drop to zero. 1274 */ 1275 (vresp->type == VIO_NET_RES_LDC_SERVICE) ? 1276 (refcnt = 1) : (refcnt = 0); 1277 while (vresp->refcnt > refcnt) { 1278 delay(drv_usectohz(vnet_fdbe_refcnt_delay)); 1279 } 1280 } 1281 1282 /* 1283 * Search fdb for a given mac address. If an entry is found, hold 1284 * a reference to it and return the entry; else returns NULL. 1285 */ 1286 static vnet_res_t * 1287 vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp) 1288 { 1289 uint64_t key = 0; 1290 vnet_res_t *vresp; 1291 int rv; 1292 1293 KEY_HASH(key, addrp->ether_addr_octet); 1294 1295 rv = mod_hash_find_cb(vnetp->fdb_hashp, (mod_hash_key_t)key, 1296 (mod_hash_val_t *)&vresp, vnet_fdbe_find_cb); 1297 1298 if (rv != 0) 1299 return (NULL); 1300 1301 return (vresp); 1302 } 1303 1304 /* 1305 * Callback function provided to mod_hash_find_cb(). After finding the fdb 1306 * entry corresponding to the key (macaddr), this callback will be invoked by 1307 * mod_hash_find_cb() to atomically increment the reference count on the fdb 1308 * entry before returning the found entry. 1309 */ 1310 static void 1311 vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val) 1312 { 1313 _NOTE(ARGUNUSED(key)) 1314 VNET_FDBE_REFHOLD((vnet_res_t *)val); 1315 } 1316 1317 /* 1318 * Frames received that are tagged with the pvid of the vnet device must be 1319 * untagged before sending up the stack. This function walks the chain of rx 1320 * frames, untags any such frames and returns the updated chain. 1321 * 1322 * Arguments: 1323 * pvid: pvid of the vnet device for which packets are being received 1324 * mp: head of pkt chain to be validated and untagged 1325 * 1326 * Returns: 1327 * mp: head of updated chain of packets 1328 */ 1329 static void 1330 vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp) 1331 { 1332 struct ether_vlan_header *evhp; 1333 mblk_t *bp; 1334 mblk_t *bpt; 1335 mblk_t *bph; 1336 mblk_t *bpn; 1337 1338 bpn = bph = bpt = NULL; 1339 1340 for (bp = *mp; bp != NULL; bp = bpn) { 1341 1342 bpn = bp->b_next; 1343 bp->b_next = bp->b_prev = NULL; 1344 1345 evhp = (struct ether_vlan_header *)bp->b_rptr; 1346 1347 if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN && 1348 VLAN_ID(ntohs(evhp->ether_tci)) == pvid) { 1349 1350 bp = vnet_vlan_remove_tag(bp); 1351 if (bp == NULL) { 1352 continue; 1353 } 1354 1355 } 1356 1357 /* build a chain of processed packets */ 1358 if (bph == NULL) { 1359 bph = bpt = bp; 1360 } else { 1361 bpt->b_next = bp; 1362 bpt = bp; 1363 } 1364 1365 } 1366 1367 *mp = bph; 1368 } 1369 1370 static void 1371 vnet_rx(vio_net_handle_t vrh, mblk_t *mp) 1372 { 1373 vnet_res_t *vresp = (vnet_res_t *)vrh; 1374 vnet_t *vnetp = vresp->vnetp; 1375 vnet_pseudo_rx_ring_t *ringp; 1376 1377 if ((vnetp == NULL) || (vnetp->mh == 0)) { 1378 freemsgchain(mp); 1379 return; 1380 } 1381 1382 ringp = vresp->rx_ringp; 1383 mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num); 1384 } 1385 1386 void 1387 vnet_tx_update(vio_net_handle_t vrh) 1388 { 1389 vnet_res_t *vresp = (vnet_res_t *)vrh; 1390 vnet_t *vnetp = vresp->vnetp; 1391 vnet_pseudo_tx_ring_t *tx_ringp; 1392 vnet_pseudo_tx_group_t *tx_grp; 1393 int i; 1394 1395 if (vnetp == NULL || vnetp->mh == NULL) { 1396 return; 1397 } 1398 1399 /* 1400 * Currently, the tx hwring API (used to access rings that belong to 1401 * a Hybrid IO resource) does not provide us a per ring flow ctrl 1402 * update; also the pseudo rings are shared by the ports/ldcs in the 1403 * vgen layer. Thus we can't figure out which pseudo ring is being 1404 * re-enabled for transmits. To work around this, when we get a tx 1405 * restart notification from below, we simply propagate that to all 1406 * the tx pseudo rings registered with the mac layer above. 1407 * 1408 * There are a couple of side effects with this approach, but they are 1409 * not harmful, as outlined below: 1410 * 1411 * A) We might send an invalid ring_update() for a ring that is not 1412 * really flow controlled. This will not have any effect in the mac 1413 * layer and packets will continue to be transmitted on that ring. 1414 * 1415 * B) We might end up clearing the flow control in the mac layer for 1416 * a ring that is still flow controlled in the underlying resource. 1417 * This will result in the mac layer restarting transmit, only to be 1418 * flow controlled again on that ring. 1419 */ 1420 tx_grp = &vnetp->tx_grp[0]; 1421 for (i = 0; i < tx_grp->ring_cnt; i++) { 1422 tx_ringp = &tx_grp->rings[i]; 1423 mac_tx_ring_update(vnetp->mh, tx_ringp->handle); 1424 } 1425 } 1426 1427 /* 1428 * vnet_tx_notify_thread: 1429 * 1430 * vnet_tx_ring_update() callback function wakes up this thread when 1431 * it gets called. This thread will call mac_tx_ring_update() to 1432 * notify upper mac of flow control getting relieved. Note that 1433 * vnet_tx_ring_update() cannot call mac_tx_ring_update() directly 1434 * because vnet_tx_ring_update() is called from lower mac with 1435 * mi_rw_lock held and mac_tx_ring_update() would also try to grab 1436 * the same lock. 1437 */ 1438 static void 1439 vnet_tx_notify_thread(void *arg) 1440 { 1441 callb_cpr_t cprinfo; 1442 vnet_pseudo_tx_group_t *tx_grp = (vnet_pseudo_tx_group_t *)arg; 1443 vnet_pseudo_tx_ring_t *tx_ringp; 1444 vnet_t *vnetp; 1445 int i; 1446 1447 CALLB_CPR_INIT(&cprinfo, &tx_grp->flowctl_lock, callb_generic_cpr, 1448 "vnet_tx_notify_thread"); 1449 1450 mutex_enter(&tx_grp->flowctl_lock); 1451 while (!tx_grp->flowctl_done) { 1452 CALLB_CPR_SAFE_BEGIN(&cprinfo); 1453 cv_wait(&tx_grp->flowctl_cv, &tx_grp->flowctl_lock); 1454 CALLB_CPR_SAFE_END(&cprinfo, &tx_grp->flowctl_lock); 1455 1456 for (i = 0; i < tx_grp->ring_cnt; i++) { 1457 tx_ringp = &tx_grp->rings[i]; 1458 if (tx_ringp->woken_up) { 1459 tx_ringp->woken_up = B_FALSE; 1460 vnetp = tx_ringp->vnetp; 1461 mac_tx_ring_update(vnetp->mh, tx_ringp->handle); 1462 } 1463 } 1464 } 1465 /* 1466 * The tx_grp is being destroyed, exit the thread. 1467 */ 1468 tx_grp->flowctl_thread = NULL; 1469 CALLB_CPR_EXIT(&cprinfo); 1470 thread_exit(); 1471 } 1472 1473 void 1474 vnet_tx_ring_update(void *arg1, uintptr_t arg2) 1475 { 1476 vnet_t *vnetp = (vnet_t *)arg1; 1477 vnet_pseudo_tx_group_t *tx_grp; 1478 vnet_pseudo_tx_ring_t *tx_ringp; 1479 int i; 1480 1481 tx_grp = &vnetp->tx_grp[0]; 1482 for (i = 0; i < tx_grp->ring_cnt; i++) { 1483 tx_ringp = &tx_grp->rings[i]; 1484 if (tx_ringp->hw_rh == (mac_ring_handle_t)arg2) { 1485 mutex_enter(&tx_grp->flowctl_lock); 1486 tx_ringp->woken_up = B_TRUE; 1487 cv_signal(&tx_grp->flowctl_cv); 1488 mutex_exit(&tx_grp->flowctl_lock); 1489 break; 1490 } 1491 } 1492 } 1493 1494 /* 1495 * Update the new mtu of vnet into the mac layer. First check if the device has 1496 * been plumbed and if so fail the mtu update. Returns 0 on success. 1497 */ 1498 int 1499 vnet_mtu_update(vnet_t *vnetp, uint32_t mtu) 1500 { 1501 int rv; 1502 1503 if (vnetp == NULL || vnetp->mh == NULL) { 1504 return (EINVAL); 1505 } 1506 1507 WRITE_ENTER(&vnetp->vrwlock); 1508 1509 if (vnetp->flags & VNET_STARTED) { 1510 RW_EXIT(&vnetp->vrwlock); 1511 cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu " 1512 "update as the device is plumbed\n", 1513 vnetp->instance); 1514 return (EBUSY); 1515 } 1516 1517 /* update mtu in the mac layer */ 1518 rv = mac_maxsdu_update(vnetp->mh, mtu); 1519 if (rv != 0) { 1520 RW_EXIT(&vnetp->vrwlock); 1521 cmn_err(CE_NOTE, 1522 "!vnet%d: Unable to update mtu with mac layer\n", 1523 vnetp->instance); 1524 return (EIO); 1525 } 1526 1527 vnetp->mtu = mtu; 1528 1529 RW_EXIT(&vnetp->vrwlock); 1530 1531 return (0); 1532 } 1533 1534 /* 1535 * Update the link state of vnet to the mac layer. 1536 */ 1537 void 1538 vnet_link_update(vnet_t *vnetp, link_state_t link_state) 1539 { 1540 if (vnetp == NULL || vnetp->mh == NULL) { 1541 return; 1542 } 1543 1544 WRITE_ENTER(&vnetp->vrwlock); 1545 if (vnetp->link_state == link_state) { 1546 RW_EXIT(&vnetp->vrwlock); 1547 return; 1548 } 1549 vnetp->link_state = link_state; 1550 RW_EXIT(&vnetp->vrwlock); 1551 1552 mac_link_update(vnetp->mh, link_state); 1553 } 1554 1555 /* 1556 * vio_net_resource_reg -- An interface called to register a resource 1557 * with vnet. 1558 * macp -- a GLDv3 mac_register that has all the details of 1559 * a resource and its callbacks etc. 1560 * type -- resource type. 1561 * local_macaddr -- resource's MAC address. This is used to 1562 * associate a resource with a corresponding vnet. 1563 * remote_macaddr -- remote side MAC address. This is ignored for 1564 * the Hybrid resources. 1565 * vhp -- A handle returned to the caller. 1566 * vcb -- A set of callbacks provided to the callers. 1567 */ 1568 int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type, 1569 ether_addr_t local_macaddr, ether_addr_t rem_macaddr, vio_net_handle_t *vhp, 1570 vio_net_callbacks_t *vcb) 1571 { 1572 vnet_t *vnetp; 1573 vnet_res_t *vresp; 1574 1575 vresp = kmem_zalloc(sizeof (vnet_res_t), KM_SLEEP); 1576 ether_copy(local_macaddr, vresp->local_macaddr); 1577 ether_copy(rem_macaddr, vresp->rem_macaddr); 1578 vresp->type = type; 1579 bcopy(macp, &vresp->macreg, sizeof (mac_register_t)); 1580 1581 DBG1(NULL, "Resource Registerig type=0%X\n", type); 1582 1583 READ_ENTER(&vnet_rw); 1584 vnetp = vnet_headp; 1585 while (vnetp != NULL) { 1586 if (VNET_MATCH_RES(vresp, vnetp)) { 1587 vresp->vnetp = vnetp; 1588 1589 /* Setup kstats for hio resource */ 1590 if (vresp->type == VIO_NET_RES_HYBRID) { 1591 vresp->ksp = vnet_hio_setup_kstats(DRV_NAME, 1592 "hio", vresp); 1593 if (vresp->ksp == NULL) { 1594 cmn_err(CE_NOTE, "!vnet%d: Cannot " 1595 "create kstats for hio resource", 1596 vnetp->instance); 1597 } 1598 } 1599 vnet_add_resource(vnetp, vresp); 1600 break; 1601 } 1602 vnetp = vnetp->nextp; 1603 } 1604 RW_EXIT(&vnet_rw); 1605 if (vresp->vnetp == NULL) { 1606 DWARN(NULL, "No vnet instance"); 1607 kmem_free(vresp, sizeof (vnet_res_t)); 1608 return (ENXIO); 1609 } 1610 1611 *vhp = vresp; 1612 vcb->vio_net_rx_cb = vnet_rx; 1613 vcb->vio_net_tx_update = vnet_tx_update; 1614 vcb->vio_net_report_err = vnet_handle_res_err; 1615 1616 /* Bind the resource to pseudo ring(s) */ 1617 if (vnet_bind_rings(vresp) != 0) { 1618 (void) vnet_rem_resource(vnetp, vresp); 1619 vnet_hio_destroy_kstats(vresp->ksp); 1620 KMEM_FREE(vresp); 1621 return (1); 1622 } 1623 1624 /* Dispatch a task to start resources */ 1625 vnet_dispatch_res_task(vnetp); 1626 return (0); 1627 } 1628 1629 /* 1630 * vio_net_resource_unreg -- An interface to unregister a resource. 1631 */ 1632 void 1633 vio_net_resource_unreg(vio_net_handle_t vhp) 1634 { 1635 vnet_res_t *vresp = (vnet_res_t *)vhp; 1636 vnet_t *vnetp = vresp->vnetp; 1637 1638 DBG1(NULL, "Resource Registerig hdl=0x%p", vhp); 1639 1640 ASSERT(vnetp != NULL); 1641 /* 1642 * Remove the resource from fdb; this ensures 1643 * there are no references to the resource. 1644 */ 1645 vnet_fdbe_del(vnetp, vresp); 1646 1647 vnet_unbind_rings(vresp); 1648 1649 /* Now remove the resource from the list */ 1650 (void) vnet_rem_resource(vnetp, vresp); 1651 1652 vnet_hio_destroy_kstats(vresp->ksp); 1653 KMEM_FREE(vresp); 1654 } 1655 1656 static void 1657 vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp) 1658 { 1659 WRITE_ENTER(&vnetp->vrwlock); 1660 vresp->nextp = vnetp->vres_list; 1661 vnetp->vres_list = vresp; 1662 RW_EXIT(&vnetp->vrwlock); 1663 } 1664 1665 static vnet_res_t * 1666 vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp) 1667 { 1668 vnet_res_t *vrp; 1669 1670 WRITE_ENTER(&vnetp->vrwlock); 1671 if (vresp == vnetp->vres_list) { 1672 vnetp->vres_list = vresp->nextp; 1673 } else { 1674 vrp = vnetp->vres_list; 1675 while (vrp->nextp != NULL) { 1676 if (vrp->nextp == vresp) { 1677 vrp->nextp = vresp->nextp; 1678 break; 1679 } 1680 vrp = vrp->nextp; 1681 } 1682 } 1683 vresp->vnetp = NULL; 1684 vresp->nextp = NULL; 1685 1686 RW_EXIT(&vnetp->vrwlock); 1687 1688 return (vresp); 1689 } 1690 1691 /* 1692 * vnet_dds_rx -- an interface called by vgen to DDS messages. 1693 */ 1694 void 1695 vnet_dds_rx(void *arg, void *dmsg) 1696 { 1697 vnet_t *vnetp = arg; 1698 vdds_process_dds_msg(vnetp, dmsg); 1699 } 1700 1701 /* 1702 * vnet_send_dds_msg -- An interface provided to DDS to send 1703 * DDS messages. This simply sends meessages via vgen. 1704 */ 1705 int 1706 vnet_send_dds_msg(vnet_t *vnetp, void *dmsg) 1707 { 1708 int rv; 1709 1710 if (vnetp->vgenhdl != NULL) { 1711 rv = vgen_dds_tx(vnetp->vgenhdl, dmsg); 1712 } 1713 return (rv); 1714 } 1715 1716 /* 1717 * vnet_cleanup_hio -- an interface called by vgen to cleanup hio resources. 1718 */ 1719 void 1720 vnet_dds_cleanup_hio(vnet_t *vnetp) 1721 { 1722 vdds_cleanup_hio(vnetp); 1723 } 1724 1725 /* 1726 * vnet_handle_res_err -- A callback function called by a resource 1727 * to report an error. For example, vgen can call to report 1728 * an LDC down/reset event. This will trigger cleanup of associated 1729 * Hybrid resource. 1730 */ 1731 /* ARGSUSED */ 1732 static void 1733 vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err) 1734 { 1735 vnet_res_t *vresp = (vnet_res_t *)vrh; 1736 vnet_t *vnetp = vresp->vnetp; 1737 1738 if (vnetp == NULL) { 1739 return; 1740 } 1741 if ((vresp->type != VIO_NET_RES_LDC_SERVICE) && 1742 (vresp->type != VIO_NET_RES_HYBRID)) { 1743 return; 1744 } 1745 1746 vdds_cleanup_hio(vnetp); 1747 } 1748 1749 /* 1750 * vnet_dispatch_res_task -- A function to dispatch tasks start resources. 1751 */ 1752 static void 1753 vnet_dispatch_res_task(vnet_t *vnetp) 1754 { 1755 int rv; 1756 1757 /* 1758 * Dispatch the task. It could be the case that vnetp->flags does 1759 * not have VNET_STARTED set. This is ok as vnet_rest_start_task() 1760 * can abort the task when the task is started. See related comments 1761 * in vnet_m_stop() and vnet_stop_resources(). 1762 */ 1763 rv = ddi_taskq_dispatch(vnetp->taskqp, vnet_res_start_task, 1764 vnetp, DDI_NOSLEEP); 1765 if (rv != DDI_SUCCESS) { 1766 cmn_err(CE_WARN, 1767 "vnet%d:Can't dispatch start resource task", 1768 vnetp->instance); 1769 } 1770 } 1771 1772 /* 1773 * vnet_res_start_task -- A taskq callback function that starts a resource. 1774 */ 1775 static void 1776 vnet_res_start_task(void *arg) 1777 { 1778 vnet_t *vnetp = arg; 1779 1780 WRITE_ENTER(&vnetp->vrwlock); 1781 if (vnetp->flags & VNET_STARTED) { 1782 vnet_start_resources(vnetp); 1783 } 1784 RW_EXIT(&vnetp->vrwlock); 1785 } 1786 1787 /* 1788 * vnet_start_resources -- starts all resources associated with 1789 * a vnet. 1790 */ 1791 static void 1792 vnet_start_resources(vnet_t *vnetp) 1793 { 1794 mac_register_t *macp; 1795 mac_callbacks_t *cbp; 1796 vnet_res_t *vresp; 1797 int rv; 1798 1799 DBG1(vnetp, "enter\n"); 1800 1801 ASSERT(RW_WRITE_HELD(&vnetp->vrwlock)); 1802 1803 for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) { 1804 /* skip if it is already started */ 1805 if (vresp->flags & VNET_STARTED) { 1806 continue; 1807 } 1808 macp = &vresp->macreg; 1809 cbp = macp->m_callbacks; 1810 rv = cbp->mc_start(macp->m_driver); 1811 if (rv == 0) { 1812 /* 1813 * Successfully started the resource, so now 1814 * add it to the fdb. 1815 */ 1816 vresp->flags |= VNET_STARTED; 1817 vnet_fdbe_add(vnetp, vresp); 1818 } 1819 } 1820 1821 DBG1(vnetp, "exit\n"); 1822 1823 } 1824 1825 /* 1826 * vnet_stop_resources -- stop all resources associated with a vnet. 1827 */ 1828 static void 1829 vnet_stop_resources(vnet_t *vnetp) 1830 { 1831 vnet_res_t *vresp; 1832 mac_register_t *macp; 1833 mac_callbacks_t *cbp; 1834 1835 DBG1(vnetp, "enter\n"); 1836 1837 ASSERT(RW_WRITE_HELD(&vnetp->vrwlock)); 1838 1839 for (vresp = vnetp->vres_list; vresp != NULL; ) { 1840 if (vresp->flags & VNET_STARTED) { 1841 /* 1842 * Release the lock while invoking mc_stop() of the 1843 * underlying resource. We hold a reference to this 1844 * resource to prevent being removed from the list in 1845 * vio_net_resource_unreg(). Note that new resources 1846 * can be added to the head of the list while the lock 1847 * is released, but they won't be started, as 1848 * VNET_STARTED flag has been cleared for the vnet 1849 * device in vnet_m_stop(). Also, while the lock is 1850 * released a resource could be removed from the list 1851 * in vio_net_resource_unreg(); but that is ok, as we 1852 * re-acquire the lock and only then access the forward 1853 * link (vresp->nextp) to continue with the next 1854 * resource. 1855 */ 1856 vresp->flags &= ~VNET_STARTED; 1857 vresp->flags |= VNET_STOPPING; 1858 macp = &vresp->macreg; 1859 cbp = macp->m_callbacks; 1860 VNET_FDBE_REFHOLD(vresp); 1861 RW_EXIT(&vnetp->vrwlock); 1862 1863 cbp->mc_stop(macp->m_driver); 1864 1865 WRITE_ENTER(&vnetp->vrwlock); 1866 vresp->flags &= ~VNET_STOPPING; 1867 VNET_FDBE_REFRELE(vresp); 1868 } 1869 vresp = vresp->nextp; 1870 } 1871 DBG1(vnetp, "exit\n"); 1872 } 1873 1874 /* 1875 * Setup kstats for the HIO statistics. 1876 * NOTE: the synchronization for the statistics is the 1877 * responsibility of the caller. 1878 */ 1879 kstat_t * 1880 vnet_hio_setup_kstats(char *ks_mod, char *ks_name, vnet_res_t *vresp) 1881 { 1882 kstat_t *ksp; 1883 vnet_t *vnetp = vresp->vnetp; 1884 vnet_hio_kstats_t *hiokp; 1885 size_t size; 1886 1887 ASSERT(vnetp != NULL); 1888 size = sizeof (vnet_hio_kstats_t) / sizeof (kstat_named_t); 1889 ksp = kstat_create(ks_mod, vnetp->instance, ks_name, "net", 1890 KSTAT_TYPE_NAMED, size, 0); 1891 if (ksp == NULL) { 1892 return (NULL); 1893 } 1894 1895 hiokp = (vnet_hio_kstats_t *)ksp->ks_data; 1896 kstat_named_init(&hiokp->ipackets, "ipackets", 1897 KSTAT_DATA_ULONG); 1898 kstat_named_init(&hiokp->ierrors, "ierrors", 1899 KSTAT_DATA_ULONG); 1900 kstat_named_init(&hiokp->opackets, "opackets", 1901 KSTAT_DATA_ULONG); 1902 kstat_named_init(&hiokp->oerrors, "oerrors", 1903 KSTAT_DATA_ULONG); 1904 1905 1906 /* MIB II kstat variables */ 1907 kstat_named_init(&hiokp->rbytes, "rbytes", 1908 KSTAT_DATA_ULONG); 1909 kstat_named_init(&hiokp->obytes, "obytes", 1910 KSTAT_DATA_ULONG); 1911 kstat_named_init(&hiokp->multircv, "multircv", 1912 KSTAT_DATA_ULONG); 1913 kstat_named_init(&hiokp->multixmt, "multixmt", 1914 KSTAT_DATA_ULONG); 1915 kstat_named_init(&hiokp->brdcstrcv, "brdcstrcv", 1916 KSTAT_DATA_ULONG); 1917 kstat_named_init(&hiokp->brdcstxmt, "brdcstxmt", 1918 KSTAT_DATA_ULONG); 1919 kstat_named_init(&hiokp->norcvbuf, "norcvbuf", 1920 KSTAT_DATA_ULONG); 1921 kstat_named_init(&hiokp->noxmtbuf, "noxmtbuf", 1922 KSTAT_DATA_ULONG); 1923 1924 ksp->ks_update = vnet_hio_update_kstats; 1925 ksp->ks_private = (void *)vresp; 1926 kstat_install(ksp); 1927 return (ksp); 1928 } 1929 1930 /* 1931 * Destroy kstats. 1932 */ 1933 static void 1934 vnet_hio_destroy_kstats(kstat_t *ksp) 1935 { 1936 if (ksp != NULL) 1937 kstat_delete(ksp); 1938 } 1939 1940 /* 1941 * Update the kstats. 1942 */ 1943 static int 1944 vnet_hio_update_kstats(kstat_t *ksp, int rw) 1945 { 1946 vnet_t *vnetp; 1947 vnet_res_t *vresp; 1948 vnet_hio_stats_t statsp; 1949 vnet_hio_kstats_t *hiokp; 1950 1951 vresp = (vnet_res_t *)ksp->ks_private; 1952 vnetp = vresp->vnetp; 1953 1954 bzero(&statsp, sizeof (vnet_hio_stats_t)); 1955 1956 READ_ENTER(&vnetp->vsw_fp_rw); 1957 if (vnetp->hio_fp == NULL) { 1958 /* not using hio resources, just return */ 1959 RW_EXIT(&vnetp->vsw_fp_rw); 1960 return (0); 1961 } 1962 VNET_FDBE_REFHOLD(vnetp->hio_fp); 1963 RW_EXIT(&vnetp->vsw_fp_rw); 1964 vnet_hio_get_stats(vnetp->hio_fp, &statsp); 1965 VNET_FDBE_REFRELE(vnetp->hio_fp); 1966 1967 hiokp = (vnet_hio_kstats_t *)ksp->ks_data; 1968 1969 if (rw == KSTAT_READ) { 1970 /* Link Input/Output stats */ 1971 hiokp->ipackets.value.ul = (uint32_t)statsp.ipackets; 1972 hiokp->ipackets64.value.ull = statsp.ipackets; 1973 hiokp->ierrors.value.ul = statsp.ierrors; 1974 hiokp->opackets.value.ul = (uint32_t)statsp.opackets; 1975 hiokp->opackets64.value.ull = statsp.opackets; 1976 hiokp->oerrors.value.ul = statsp.oerrors; 1977 1978 /* MIB II kstat variables */ 1979 hiokp->rbytes.value.ul = (uint32_t)statsp.rbytes; 1980 hiokp->rbytes64.value.ull = statsp.rbytes; 1981 hiokp->obytes.value.ul = (uint32_t)statsp.obytes; 1982 hiokp->obytes64.value.ull = statsp.obytes; 1983 hiokp->multircv.value.ul = statsp.multircv; 1984 hiokp->multixmt.value.ul = statsp.multixmt; 1985 hiokp->brdcstrcv.value.ul = statsp.brdcstrcv; 1986 hiokp->brdcstxmt.value.ul = statsp.brdcstxmt; 1987 hiokp->norcvbuf.value.ul = statsp.norcvbuf; 1988 hiokp->noxmtbuf.value.ul = statsp.noxmtbuf; 1989 } else { 1990 return (EACCES); 1991 } 1992 1993 return (0); 1994 } 1995 1996 static void 1997 vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp) 1998 { 1999 mac_register_t *macp; 2000 mac_callbacks_t *cbp; 2001 uint64_t val; 2002 int stat; 2003 2004 /* 2005 * get the specified statistics from the underlying nxge. 2006 */ 2007 macp = &vresp->macreg; 2008 cbp = macp->m_callbacks; 2009 for (stat = MAC_STAT_MIN; stat < MAC_STAT_OVERFLOWS; stat++) { 2010 if (cbp->mc_getstat(macp->m_driver, stat, &val) == 0) { 2011 switch (stat) { 2012 case MAC_STAT_IPACKETS: 2013 statsp->ipackets = val; 2014 break; 2015 2016 case MAC_STAT_IERRORS: 2017 statsp->ierrors = val; 2018 break; 2019 2020 case MAC_STAT_OPACKETS: 2021 statsp->opackets = val; 2022 break; 2023 2024 case MAC_STAT_OERRORS: 2025 statsp->oerrors = val; 2026 break; 2027 2028 case MAC_STAT_RBYTES: 2029 statsp->rbytes = val; 2030 break; 2031 2032 case MAC_STAT_OBYTES: 2033 statsp->obytes = val; 2034 break; 2035 2036 case MAC_STAT_MULTIRCV: 2037 statsp->multircv = val; 2038 break; 2039 2040 case MAC_STAT_MULTIXMT: 2041 statsp->multixmt = val; 2042 break; 2043 2044 case MAC_STAT_BRDCSTRCV: 2045 statsp->brdcstrcv = val; 2046 break; 2047 2048 case MAC_STAT_BRDCSTXMT: 2049 statsp->brdcstxmt = val; 2050 break; 2051 2052 case MAC_STAT_NOXMTBUF: 2053 statsp->noxmtbuf = val; 2054 break; 2055 2056 case MAC_STAT_NORCVBUF: 2057 statsp->norcvbuf = val; 2058 break; 2059 2060 default: 2061 /* 2062 * parameters not interested. 2063 */ 2064 break; 2065 } 2066 } 2067 } 2068 } 2069 2070 static boolean_t 2071 vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data) 2072 { 2073 vnet_t *vnetp = (vnet_t *)arg; 2074 2075 if (vnetp == NULL) { 2076 return (0); 2077 } 2078 2079 switch (cap) { 2080 2081 case MAC_CAPAB_RINGS: { 2082 2083 mac_capab_rings_t *cap_rings = cap_data; 2084 /* 2085 * Rings Capability Notes: 2086 * We advertise rings to make use of the rings framework in 2087 * gldv3 mac layer, to improve the performance. This is 2088 * specifically needed when a Hybrid resource (with multiple 2089 * tx/rx hardware rings) is assigned to a vnet device. We also 2090 * leverage this for the normal case when no Hybrid resource is 2091 * assigned. 2092 * 2093 * Ring Allocation: 2094 * - TX path: 2095 * We expose a pseudo ring group with 2 pseudo tx rings (as 2096 * currently HybridIO exports only 2 rings) In the normal case, 2097 * transmit traffic that comes down to the driver through the 2098 * mri_tx (vnet_tx_ring_send()) entry point goes through the 2099 * distributed switching algorithm in vnet and gets transmitted 2100 * over a port/LDC in the vgen layer to either the vswitch or a 2101 * peer vnet. If and when a Hybrid resource is assigned to the 2102 * vnet, we obtain the tx ring information of the Hybrid device 2103 * (nxge) and map the pseudo rings 1:1 to the 2 hw tx rings. 2104 * Traffic being sent over the Hybrid resource by the mac layer 2105 * gets spread across both hw rings, as they are mapped to the 2106 * 2 pseudo tx rings in vnet. 2107 * 2108 * - RX path: 2109 * We expose a pseudo ring group with 3 pseudo rx rings (static 2110 * rings) initially. The first (default) pseudo rx ring is 2111 * reserved for the resource that connects to the vswitch 2112 * service. The next 2 rings are reserved for a Hybrid resource 2113 * that may be assigned to the vnet device. If and when a 2114 * Hybrid resource is assigned to the vnet, we obtain the rx 2115 * ring information of the Hybrid device (nxge) and map these 2116 * pseudo rings 1:1 to the 2 hw rx rings. For each additional 2117 * resource that connects to a peer vnet, we dynamically 2118 * allocate a pseudo rx ring and map it to that resource, when 2119 * the resource gets added; and the pseudo rx ring is 2120 * dynamically registered with the upper mac layer. We do the 2121 * reverse and unregister the ring with the mac layer when 2122 * the resource gets removed. 2123 * 2124 * Synchronization notes: 2125 * We don't need any lock to protect members of ring structure, 2126 * specifically ringp->hw_rh, in either the TX or the RX ring, 2127 * as explained below. 2128 * - TX ring: 2129 * ring->hw_rh is initialized only when a Hybrid resource is 2130 * associated; and gets referenced only in vnet_hio_tx(). The 2131 * Hybrid resource itself is available in fdb only after tx 2132 * hwrings are found and mapped; i.e, in vio_net_resource_reg() 2133 * we call vnet_bind_rings() first and then call 2134 * vnet_start_resources() which adds an entry to fdb. For 2135 * traffic going over LDC resources, we don't reference 2136 * ring->hw_rh at all. 2137 * - RX ring: 2138 * For rings mapped to Hybrid resource ring->hw_rh is 2139 * initialized and only then do we add the rx callback for 2140 * the underlying Hybrid resource; we disable callbacks before 2141 * we unmap ring->hw_rh. For rings mapped to LDC resources, we 2142 * stop the rx callbacks (in vgen) before we remove ring->hw_rh 2143 * (vio_net_resource_unreg()). 2144 * Also, we access ring->hw_rh in vnet_rx_ring_stat(). 2145 * Note that for rings mapped to Hybrid resource, though the 2146 * rings are statically registered with the mac layer, its 2147 * hardware ring mapping (ringp->hw_rh) can be torn down in 2148 * vnet_unbind_hwrings() while the kstat operation is in 2149 * progress. To protect against this, we hold a reference to 2150 * the resource in FDB; this ensures that the thread in 2151 * vio_net_resource_unreg() waits for the reference to be 2152 * dropped before unbinding the ring. 2153 * 2154 * We don't need to do this for rings mapped to LDC resources. 2155 * These rings are registered/unregistered dynamically with 2156 * the mac layer and so any attempt to unregister the ring 2157 * while kstat operation is in progress will block in 2158 * mac_group_rem_ring(). Thus implicitly protects the 2159 * resource (ringp->hw_rh) from disappearing. 2160 */ 2161 2162 if (cap_rings->mr_type == MAC_RING_TYPE_RX) { 2163 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 2164 2165 /* 2166 * The ring_cnt for rx grp is initialized in 2167 * vnet_ring_grp_init(). Later, the ring_cnt gets 2168 * updated dynamically whenever LDC resources are added 2169 * or removed. 2170 */ 2171 cap_rings->mr_rnum = vnetp->rx_grp[0].ring_cnt; 2172 cap_rings->mr_rget = vnet_get_ring; 2173 2174 cap_rings->mr_gnum = VNET_NUM_PSEUDO_GROUPS; 2175 cap_rings->mr_gget = vnet_get_group; 2176 cap_rings->mr_gaddring = NULL; 2177 cap_rings->mr_gremring = NULL; 2178 } else { 2179 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 2180 2181 /* 2182 * The ring_cnt for tx grp is initialized in 2183 * vnet_ring_grp_init() and remains constant, as we 2184 * do not support dymanic tx rings for now. 2185 */ 2186 cap_rings->mr_rnum = vnetp->tx_grp[0].ring_cnt; 2187 cap_rings->mr_rget = vnet_get_ring; 2188 2189 /* 2190 * Transmit rings are not grouped; i.e, the number of 2191 * transmit ring groups advertised should be set to 0. 2192 */ 2193 cap_rings->mr_gnum = 0; 2194 2195 cap_rings->mr_gget = vnet_get_group; 2196 cap_rings->mr_gaddring = NULL; 2197 cap_rings->mr_gremring = NULL; 2198 } 2199 return (B_TRUE); 2200 2201 } 2202 2203 default: 2204 break; 2205 2206 } 2207 2208 return (B_FALSE); 2209 } 2210 2211 /* 2212 * Callback funtion for MAC layer to get ring information. 2213 */ 2214 static void 2215 vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index, 2216 const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle) 2217 { 2218 vnet_t *vnetp = arg; 2219 2220 switch (rtype) { 2221 2222 case MAC_RING_TYPE_RX: { 2223 2224 vnet_pseudo_rx_group_t *rx_grp; 2225 vnet_pseudo_rx_ring_t *rx_ringp; 2226 mac_intr_t *mintr; 2227 2228 /* We advertised only one RX group */ 2229 ASSERT(g_index == 0); 2230 rx_grp = &vnetp->rx_grp[g_index]; 2231 2232 /* Check the current # of rings in the rx group */ 2233 ASSERT((r_index >= 0) && (r_index < rx_grp->max_ring_cnt)); 2234 2235 /* Get the ring based on the index */ 2236 rx_ringp = &rx_grp->rings[r_index]; 2237 2238 rx_ringp->handle = r_handle; 2239 /* 2240 * Note: we don't need to save the incoming r_index in rx_ring, 2241 * as vnet_ring_grp_init() would have initialized the index for 2242 * each ring in the array. 2243 */ 2244 rx_ringp->grp = rx_grp; 2245 rx_ringp->vnetp = vnetp; 2246 2247 mintr = &infop->mri_intr; 2248 mintr->mi_handle = (mac_intr_handle_t)rx_ringp; 2249 mintr->mi_enable = (mac_intr_enable_t)vnet_ring_enable_intr; 2250 mintr->mi_disable = (mac_intr_disable_t)vnet_ring_disable_intr; 2251 2252 infop->mri_driver = (mac_ring_driver_t)rx_ringp; 2253 infop->mri_start = vnet_rx_ring_start; 2254 infop->mri_stop = vnet_rx_ring_stop; 2255 infop->mri_stat = vnet_rx_ring_stat; 2256 2257 /* Set the poll function, as this is an rx ring */ 2258 infop->mri_poll = vnet_rx_poll; 2259 /* 2260 * MAC_RING_RX_ENQUEUE bit needed to be set for nxge 2261 * which was not sending packet chains in interrupt 2262 * context. For such drivers, packets are queued in 2263 * Rx soft rings so that we get a chance to switch 2264 * into a polling mode under backlog. This bug (not 2265 * sending packet chains) has now been fixed. Once 2266 * the performance impact is measured, this change 2267 * will be removed. 2268 */ 2269 infop->mri_flags = (vnet_mac_rx_queuing ? 2270 MAC_RING_RX_ENQUEUE : 0); 2271 break; 2272 } 2273 2274 case MAC_RING_TYPE_TX: { 2275 vnet_pseudo_tx_group_t *tx_grp; 2276 vnet_pseudo_tx_ring_t *tx_ringp; 2277 2278 /* 2279 * No need to check grp index; mac layer passes -1 for it. 2280 */ 2281 tx_grp = &vnetp->tx_grp[0]; 2282 2283 /* Check the # of rings in the tx group */ 2284 ASSERT((r_index >= 0) && (r_index < tx_grp->ring_cnt)); 2285 2286 /* Get the ring based on the index */ 2287 tx_ringp = &tx_grp->rings[r_index]; 2288 2289 tx_ringp->handle = r_handle; 2290 tx_ringp->index = r_index; 2291 tx_ringp->grp = tx_grp; 2292 tx_ringp->vnetp = vnetp; 2293 2294 infop->mri_driver = (mac_ring_driver_t)tx_ringp; 2295 infop->mri_start = vnet_tx_ring_start; 2296 infop->mri_stop = vnet_tx_ring_stop; 2297 infop->mri_stat = vnet_tx_ring_stat; 2298 2299 /* Set the transmit function, as this is a tx ring */ 2300 infop->mri_tx = vnet_tx_ring_send; 2301 /* 2302 * MAC_RING_TX_SERIALIZE bit needs to be set while 2303 * hybridIO is enabled to workaround tx lock 2304 * contention issues in nxge. 2305 */ 2306 infop->mri_flags = (vnet_mac_tx_serialize ? 2307 MAC_RING_TX_SERIALIZE : 0); 2308 break; 2309 } 2310 2311 default: 2312 break; 2313 } 2314 } 2315 2316 /* 2317 * Callback funtion for MAC layer to get group information. 2318 */ 2319 static void 2320 vnet_get_group(void *arg, mac_ring_type_t type, const int index, 2321 mac_group_info_t *infop, mac_group_handle_t handle) 2322 { 2323 vnet_t *vnetp = (vnet_t *)arg; 2324 2325 switch (type) { 2326 2327 case MAC_RING_TYPE_RX: 2328 { 2329 vnet_pseudo_rx_group_t *rx_grp; 2330 2331 /* We advertised only one RX group */ 2332 ASSERT(index == 0); 2333 2334 rx_grp = &vnetp->rx_grp[index]; 2335 rx_grp->handle = handle; 2336 rx_grp->index = index; 2337 rx_grp->vnetp = vnetp; 2338 2339 infop->mgi_driver = (mac_group_driver_t)rx_grp; 2340 infop->mgi_start = NULL; 2341 infop->mgi_stop = NULL; 2342 infop->mgi_addmac = vnet_addmac; 2343 infop->mgi_remmac = vnet_remmac; 2344 infop->mgi_count = rx_grp->ring_cnt; 2345 2346 break; 2347 } 2348 2349 case MAC_RING_TYPE_TX: 2350 { 2351 vnet_pseudo_tx_group_t *tx_grp; 2352 2353 /* We advertised only one TX group */ 2354 ASSERT(index == 0); 2355 2356 tx_grp = &vnetp->tx_grp[index]; 2357 tx_grp->handle = handle; 2358 tx_grp->index = index; 2359 tx_grp->vnetp = vnetp; 2360 2361 infop->mgi_driver = (mac_group_driver_t)tx_grp; 2362 infop->mgi_start = NULL; 2363 infop->mgi_stop = NULL; 2364 infop->mgi_addmac = NULL; 2365 infop->mgi_remmac = NULL; 2366 infop->mgi_count = VNET_NUM_PSEUDO_TXRINGS; 2367 2368 break; 2369 } 2370 2371 default: 2372 break; 2373 2374 } 2375 } 2376 2377 static int 2378 vnet_rx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num) 2379 { 2380 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; 2381 int err; 2382 2383 /* 2384 * If this ring is mapped to a LDC resource, simply mark the state to 2385 * indicate the ring is started and return. 2386 */ 2387 if ((rx_ringp->state & 2388 (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) { 2389 rx_ringp->gen_num = mr_gen_num; 2390 rx_ringp->state |= VNET_RXRING_STARTED; 2391 return (0); 2392 } 2393 2394 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0); 2395 2396 /* 2397 * This must be a ring reserved for a hwring. If the hwring is not 2398 * bound yet, simply mark the state to indicate the ring is started and 2399 * return. If and when a hybrid resource is activated for this vnet 2400 * device, we will bind the hwring and start it then. If a hwring is 2401 * already bound, start it now. 2402 */ 2403 if (rx_ringp->hw_rh == NULL) { 2404 rx_ringp->gen_num = mr_gen_num; 2405 rx_ringp->state |= VNET_RXRING_STARTED; 2406 return (0); 2407 } 2408 2409 err = mac_hwring_activate(rx_ringp->hw_rh); 2410 if (err == 0) { 2411 rx_ringp->gen_num = mr_gen_num; 2412 rx_ringp->state |= VNET_RXRING_STARTED; 2413 } else { 2414 err = ENXIO; 2415 } 2416 2417 return (err); 2418 } 2419 2420 static void 2421 vnet_rx_ring_stop(mac_ring_driver_t arg) 2422 { 2423 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; 2424 2425 /* 2426 * If this ring is mapped to a LDC resource, simply mark the state to 2427 * indicate the ring is now stopped and return. 2428 */ 2429 if ((rx_ringp->state & 2430 (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) { 2431 rx_ringp->state &= ~VNET_RXRING_STARTED; 2432 return; 2433 } 2434 2435 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0); 2436 2437 /* 2438 * This must be a ring reserved for a hwring. If the hwring is not 2439 * bound yet, simply mark the state to indicate the ring is stopped and 2440 * return. If a hwring is already bound, stop it now. 2441 */ 2442 if (rx_ringp->hw_rh == NULL) { 2443 rx_ringp->state &= ~VNET_RXRING_STARTED; 2444 return; 2445 } 2446 2447 mac_hwring_quiesce(rx_ringp->hw_rh); 2448 rx_ringp->state &= ~VNET_RXRING_STARTED; 2449 } 2450 2451 static int 2452 vnet_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) 2453 { 2454 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)rdriver; 2455 vnet_t *vnetp = (vnet_t *)rx_ringp->vnetp; 2456 vnet_res_t *vresp; 2457 mac_register_t *macp; 2458 mac_callbacks_t *cbp; 2459 2460 /* 2461 * Refer to vnet_m_capab() function for detailed comments on ring 2462 * synchronization. 2463 */ 2464 if ((rx_ringp->state & VNET_RXRING_HYBRID) != 0) { 2465 READ_ENTER(&vnetp->vsw_fp_rw); 2466 if (vnetp->hio_fp == NULL) { 2467 RW_EXIT(&vnetp->vsw_fp_rw); 2468 return (0); 2469 } 2470 2471 VNET_FDBE_REFHOLD(vnetp->hio_fp); 2472 RW_EXIT(&vnetp->vsw_fp_rw); 2473 (void) mac_hwring_getstat(rx_ringp->hw_rh, stat, val); 2474 VNET_FDBE_REFRELE(vnetp->hio_fp); 2475 return (0); 2476 } 2477 2478 ASSERT((rx_ringp->state & 2479 (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0); 2480 vresp = (vnet_res_t *)rx_ringp->hw_rh; 2481 macp = &vresp->macreg; 2482 cbp = macp->m_callbacks; 2483 2484 cbp->mc_getstat(macp->m_driver, stat, val); 2485 2486 return (0); 2487 } 2488 2489 /* ARGSUSED */ 2490 static int 2491 vnet_tx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num) 2492 { 2493 vnet_pseudo_tx_ring_t *tx_ringp = (vnet_pseudo_tx_ring_t *)arg; 2494 2495 tx_ringp->state |= VNET_TXRING_STARTED; 2496 return (0); 2497 } 2498 2499 static void 2500 vnet_tx_ring_stop(mac_ring_driver_t arg) 2501 { 2502 vnet_pseudo_tx_ring_t *tx_ringp = (vnet_pseudo_tx_ring_t *)arg; 2503 2504 tx_ringp->state &= ~VNET_TXRING_STARTED; 2505 } 2506 2507 static int 2508 vnet_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) 2509 { 2510 vnet_pseudo_tx_ring_t *tx_ringp = (vnet_pseudo_tx_ring_t *)rdriver; 2511 vnet_tx_ring_stats_t *statsp; 2512 2513 statsp = &tx_ringp->tx_ring_stats; 2514 2515 switch (stat) { 2516 case MAC_STAT_OPACKETS: 2517 *val = statsp->opackets; 2518 break; 2519 2520 case MAC_STAT_OBYTES: 2521 *val = statsp->obytes; 2522 break; 2523 2524 default: 2525 *val = 0; 2526 return (ENOTSUP); 2527 } 2528 2529 return (0); 2530 } 2531 2532 /* 2533 * Disable polling for a ring and enable its interrupt. 2534 */ 2535 static int 2536 vnet_ring_enable_intr(void *arg) 2537 { 2538 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; 2539 vnet_res_t *vresp; 2540 2541 if (rx_ringp->hw_rh == NULL) { 2542 /* 2543 * Ring enable intr func is being invoked, but the ring is 2544 * not bound to any underlying resource ? This must be a ring 2545 * reserved for Hybrid resource and no such resource has been 2546 * assigned to this vnet device yet. We simply return success. 2547 */ 2548 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0); 2549 return (0); 2550 } 2551 2552 /* 2553 * The rx ring has been bound to either a LDC or a Hybrid resource. 2554 * Call the appropriate function to enable interrupts for the ring. 2555 */ 2556 if (rx_ringp->state & VNET_RXRING_HYBRID) { 2557 return (mac_hwring_enable_intr(rx_ringp->hw_rh)); 2558 } else { 2559 vresp = (vnet_res_t *)rx_ringp->hw_rh; 2560 return (vgen_enable_intr(vresp->macreg.m_driver)); 2561 } 2562 } 2563 2564 /* 2565 * Enable polling for a ring and disable its interrupt. 2566 */ 2567 static int 2568 vnet_ring_disable_intr(void *arg) 2569 { 2570 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; 2571 vnet_res_t *vresp; 2572 2573 if (rx_ringp->hw_rh == NULL) { 2574 /* 2575 * Ring disable intr func is being invoked, but the ring is 2576 * not bound to any underlying resource ? This must be a ring 2577 * reserved for Hybrid resource and no such resource has been 2578 * assigned to this vnet device yet. We simply return success. 2579 */ 2580 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0); 2581 return (0); 2582 } 2583 2584 /* 2585 * The rx ring has been bound to either a LDC or a Hybrid resource. 2586 * Call the appropriate function to disable interrupts for the ring. 2587 */ 2588 if (rx_ringp->state & VNET_RXRING_HYBRID) { 2589 return (mac_hwring_disable_intr(rx_ringp->hw_rh)); 2590 } else { 2591 vresp = (vnet_res_t *)rx_ringp->hw_rh; 2592 return (vgen_disable_intr(vresp->macreg.m_driver)); 2593 } 2594 } 2595 2596 /* 2597 * Poll 'bytes_to_pickup' bytes of message from the rx ring. 2598 */ 2599 static mblk_t * 2600 vnet_rx_poll(void *arg, int bytes_to_pickup) 2601 { 2602 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; 2603 mblk_t *mp = NULL; 2604 vnet_res_t *vresp; 2605 vnet_t *vnetp = rx_ringp->vnetp; 2606 2607 if (rx_ringp->hw_rh == NULL) { 2608 return (NULL); 2609 } 2610 2611 if (rx_ringp->state & VNET_RXRING_HYBRID) { 2612 mp = mac_hwring_poll(rx_ringp->hw_rh, bytes_to_pickup); 2613 /* 2614 * Packets received over a hybrid resource need additional 2615 * processing to remove the tag, for the pvid case. The 2616 * underlying resource is not aware of the vnet's pvid and thus 2617 * packets are received with the vlan tag in the header; unlike 2618 * packets that are received over a ldc channel in which case 2619 * the peer vnet/vsw would have already removed the tag. 2620 */ 2621 if (vnetp->pvid != vnetp->default_vlan_id) { 2622 vnet_rx_frames_untag(vnetp->pvid, &mp); 2623 } 2624 } else { 2625 vresp = (vnet_res_t *)rx_ringp->hw_rh; 2626 mp = vgen_rx_poll(vresp->macreg.m_driver, bytes_to_pickup); 2627 } 2628 return (mp); 2629 } 2630 2631 /* ARGSUSED */ 2632 void 2633 vnet_hio_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 2634 boolean_t loopback) 2635 { 2636 vnet_t *vnetp = (vnet_t *)arg; 2637 vnet_pseudo_rx_ring_t *ringp = (vnet_pseudo_rx_ring_t *)mrh; 2638 2639 /* 2640 * Packets received over a hybrid resource need additional processing 2641 * to remove the tag, for the pvid case. The underlying resource is 2642 * not aware of the vnet's pvid and thus packets are received with the 2643 * vlan tag in the header; unlike packets that are received over a ldc 2644 * channel in which case the peer vnet/vsw would have already removed 2645 * the tag. 2646 */ 2647 if (vnetp->pvid != vnetp->default_vlan_id) { 2648 vnet_rx_frames_untag(vnetp->pvid, &mp); 2649 if (mp == NULL) { 2650 return; 2651 } 2652 } 2653 mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num); 2654 } 2655 2656 static int 2657 vnet_addmac(void *arg, const uint8_t *mac_addr) 2658 { 2659 vnet_pseudo_rx_group_t *rx_grp = (vnet_pseudo_rx_group_t *)arg; 2660 vnet_t *vnetp; 2661 2662 vnetp = rx_grp->vnetp; 2663 2664 if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) { 2665 return (0); 2666 } 2667 2668 cmn_err(CE_CONT, "!vnet%d: %s: Multiple macaddr unsupported\n", 2669 vnetp->instance, __func__); 2670 return (EINVAL); 2671 } 2672 2673 static int 2674 vnet_remmac(void *arg, const uint8_t *mac_addr) 2675 { 2676 vnet_pseudo_rx_group_t *rx_grp = (vnet_pseudo_rx_group_t *)arg; 2677 vnet_t *vnetp; 2678 2679 vnetp = rx_grp->vnetp; 2680 2681 if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) { 2682 return (0); 2683 } 2684 2685 cmn_err(CE_CONT, "!vnet%d: %s: Invalid macaddr: %s\n", 2686 vnetp->instance, __func__, ether_sprintf((void *)mac_addr)); 2687 return (EINVAL); 2688 } 2689 2690 int 2691 vnet_hio_mac_init(vnet_t *vnetp, char *ifname) 2692 { 2693 mac_handle_t mh; 2694 mac_client_handle_t mch = NULL; 2695 mac_unicast_handle_t muh = NULL; 2696 mac_diag_t diag; 2697 mac_register_t *macp; 2698 char client_name[MAXNAMELEN]; 2699 int rv; 2700 uint16_t mac_flags = MAC_UNICAST_TAG_DISABLE | 2701 MAC_UNICAST_STRIP_DISABLE | MAC_UNICAST_PRIMARY; 2702 vio_net_callbacks_t vcb; 2703 ether_addr_t rem_addr = 2704 { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 2705 uint32_t retries = 0; 2706 2707 if ((macp = mac_alloc(MAC_VERSION)) == NULL) { 2708 return (EAGAIN); 2709 } 2710 2711 do { 2712 rv = mac_open_by_linkname(ifname, &mh); 2713 if (rv == 0) { 2714 break; 2715 } 2716 if (rv != ENOENT || (retries++ >= vnet_mac_open_retries)) { 2717 mac_free(macp); 2718 return (rv); 2719 } 2720 drv_usecwait(vnet_mac_open_delay); 2721 } while (rv == ENOENT); 2722 2723 vnetp->hio_mh = mh; 2724 2725 (void) snprintf(client_name, MAXNAMELEN, "vnet%d-%s", vnetp->instance, 2726 ifname); 2727 rv = mac_client_open(mh, &mch, client_name, MAC_OPEN_FLAGS_EXCLUSIVE); 2728 if (rv != 0) { 2729 goto fail; 2730 } 2731 vnetp->hio_mch = mch; 2732 2733 rv = mac_unicast_add(mch, vnetp->curr_macaddr, mac_flags, &muh, 0, 2734 &diag); 2735 if (rv != 0) { 2736 goto fail; 2737 } 2738 vnetp->hio_muh = muh; 2739 2740 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 2741 macp->m_driver = vnetp; 2742 macp->m_dip = NULL; 2743 macp->m_src_addr = NULL; 2744 macp->m_callbacks = &vnet_hio_res_callbacks; 2745 macp->m_min_sdu = 0; 2746 macp->m_max_sdu = ETHERMTU; 2747 2748 rv = vio_net_resource_reg(macp, VIO_NET_RES_HYBRID, 2749 vnetp->curr_macaddr, rem_addr, &vnetp->hio_vhp, &vcb); 2750 if (rv != 0) { 2751 goto fail; 2752 } 2753 mac_free(macp); 2754 2755 /* add the recv callback */ 2756 mac_rx_set(vnetp->hio_mch, vnet_hio_rx_cb, vnetp); 2757 2758 return (0); 2759 2760 fail: 2761 mac_free(macp); 2762 vnet_hio_mac_cleanup(vnetp); 2763 return (1); 2764 } 2765 2766 void 2767 vnet_hio_mac_cleanup(vnet_t *vnetp) 2768 { 2769 if (vnetp->hio_vhp != NULL) { 2770 vio_net_resource_unreg(vnetp->hio_vhp); 2771 vnetp->hio_vhp = NULL; 2772 } 2773 2774 if (vnetp->hio_muh != NULL) { 2775 (void) mac_unicast_remove(vnetp->hio_mch, vnetp->hio_muh); 2776 vnetp->hio_muh = NULL; 2777 } 2778 2779 if (vnetp->hio_mch != NULL) { 2780 mac_client_close(vnetp->hio_mch, 0); 2781 vnetp->hio_mch = NULL; 2782 } 2783 2784 if (vnetp->hio_mh != NULL) { 2785 mac_close(vnetp->hio_mh); 2786 vnetp->hio_mh = NULL; 2787 } 2788 } 2789 2790 /* Bind pseudo rings to hwrings */ 2791 static int 2792 vnet_bind_hwrings(vnet_t *vnetp) 2793 { 2794 mac_ring_handle_t hw_rh[VNET_NUM_HYBRID_RINGS]; 2795 mac_perim_handle_t mph1; 2796 vnet_pseudo_rx_group_t *rx_grp; 2797 vnet_pseudo_rx_ring_t *rx_ringp; 2798 vnet_pseudo_tx_group_t *tx_grp; 2799 vnet_pseudo_tx_ring_t *tx_ringp; 2800 int hw_ring_cnt; 2801 int i; 2802 int rv; 2803 2804 mac_perim_enter_by_mh(vnetp->hio_mh, &mph1); 2805 2806 /* Get the list of the underlying RX rings. */ 2807 hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->rx_hwgh, hw_rh, 2808 MAC_RING_TYPE_RX); 2809 2810 /* We expect the the # of hw rx rings to match VNET_NUM_HYBRID_RINGS */ 2811 if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) { 2812 cmn_err(CE_WARN, 2813 "!vnet%d: vnet_bind_hwrings: bad rx hw_ring_cnt(%d)\n", 2814 vnetp->instance, hw_ring_cnt); 2815 goto fail; 2816 } 2817 2818 if (vnetp->rx_hwgh != NULL) { 2819 /* 2820 * Quiesce the HW ring and the mac srs on the ring. Note 2821 * that the HW ring will be restarted when the pseudo ring 2822 * is started. At that time all the packets will be 2823 * directly passed up to the pseudo RX ring and handled 2824 * by mac srs created over the pseudo RX ring. 2825 */ 2826 mac_rx_client_quiesce(vnetp->hio_mch); 2827 mac_srs_perm_quiesce(vnetp->hio_mch, B_TRUE); 2828 } 2829 2830 /* 2831 * Bind the pseudo rings to the hwrings and start the hwrings. 2832 * Note we don't need to register these with the upper mac, as we have 2833 * statically exported these pseudo rxrings which are reserved for 2834 * rxrings of Hybrid resource. 2835 */ 2836 rx_grp = &vnetp->rx_grp[0]; 2837 for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) { 2838 /* Pick the rxrings reserved for Hybrid resource */ 2839 rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX]; 2840 2841 /* Store the hw ring handle */ 2842 rx_ringp->hw_rh = hw_rh[i]; 2843 2844 /* Bind the pseudo ring to the underlying hwring */ 2845 mac_hwring_setup(rx_ringp->hw_rh, 2846 (mac_resource_handle_t)rx_ringp, NULL); 2847 2848 /* Start the hwring if needed */ 2849 if (rx_ringp->state & VNET_RXRING_STARTED) { 2850 rv = mac_hwring_activate(rx_ringp->hw_rh); 2851 if (rv != 0) { 2852 mac_hwring_teardown(rx_ringp->hw_rh); 2853 rx_ringp->hw_rh = NULL; 2854 goto fail; 2855 } 2856 } 2857 } 2858 2859 /* Get the list of the underlying TX rings. */ 2860 hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->tx_hwgh, hw_rh, 2861 MAC_RING_TYPE_TX); 2862 2863 /* We expect the # of hw tx rings to match VNET_NUM_HYBRID_RINGS */ 2864 if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) { 2865 cmn_err(CE_WARN, 2866 "!vnet%d: vnet_bind_hwrings: bad tx hw_ring_cnt(%d)\n", 2867 vnetp->instance, hw_ring_cnt); 2868 goto fail; 2869 } 2870 2871 /* 2872 * Now map the pseudo txrings to the hw txrings. Note we don't need 2873 * to register these with the upper mac, as we have statically exported 2874 * these rings. Note that these rings will continue to be used for LDC 2875 * resources to peer vnets and vswitch (shared ring). 2876 */ 2877 tx_grp = &vnetp->tx_grp[0]; 2878 for (i = 0; i < tx_grp->ring_cnt; i++) { 2879 tx_ringp = &tx_grp->rings[i]; 2880 tx_ringp->hw_rh = hw_rh[i]; 2881 tx_ringp->state |= VNET_TXRING_HYBRID; 2882 } 2883 tx_grp->tx_notify_handle = 2884 mac_client_tx_notify(vnetp->hio_mch, vnet_tx_ring_update, vnetp); 2885 2886 mac_perim_exit(mph1); 2887 return (0); 2888 2889 fail: 2890 mac_perim_exit(mph1); 2891 vnet_unbind_hwrings(vnetp); 2892 return (1); 2893 } 2894 2895 /* Unbind pseudo rings from hwrings */ 2896 static void 2897 vnet_unbind_hwrings(vnet_t *vnetp) 2898 { 2899 mac_perim_handle_t mph1; 2900 vnet_pseudo_rx_ring_t *rx_ringp; 2901 vnet_pseudo_rx_group_t *rx_grp; 2902 vnet_pseudo_tx_group_t *tx_grp; 2903 vnet_pseudo_tx_ring_t *tx_ringp; 2904 int i; 2905 2906 mac_perim_enter_by_mh(vnetp->hio_mh, &mph1); 2907 2908 tx_grp = &vnetp->tx_grp[0]; 2909 for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) { 2910 tx_ringp = &tx_grp->rings[i]; 2911 if (tx_ringp->state & VNET_TXRING_HYBRID) { 2912 tx_ringp->state &= ~VNET_TXRING_HYBRID; 2913 tx_ringp->hw_rh = NULL; 2914 } 2915 } 2916 (void) mac_client_tx_notify(vnetp->hio_mch, NULL, 2917 tx_grp->tx_notify_handle); 2918 2919 rx_grp = &vnetp->rx_grp[0]; 2920 for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) { 2921 rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX]; 2922 if (rx_ringp->hw_rh != NULL) { 2923 /* Stop the hwring */ 2924 mac_hwring_quiesce(rx_ringp->hw_rh); 2925 2926 /* Teardown the hwring */ 2927 mac_hwring_teardown(rx_ringp->hw_rh); 2928 rx_ringp->hw_rh = NULL; 2929 } 2930 } 2931 2932 if (vnetp->rx_hwgh != NULL) { 2933 vnetp->rx_hwgh = NULL; 2934 /* 2935 * First clear the permanent-quiesced flag of the RX srs then 2936 * restart the HW ring and the mac srs on the ring. 2937 */ 2938 mac_srs_perm_quiesce(vnetp->hio_mch, B_FALSE); 2939 mac_rx_client_restart(vnetp->hio_mch); 2940 } 2941 2942 mac_perim_exit(mph1); 2943 } 2944 2945 /* Bind pseudo ring to a LDC resource */ 2946 static int 2947 vnet_bind_vgenring(vnet_res_t *vresp) 2948 { 2949 vnet_t *vnetp; 2950 vnet_pseudo_rx_group_t *rx_grp; 2951 vnet_pseudo_rx_ring_t *rx_ringp; 2952 mac_perim_handle_t mph1; 2953 int rv; 2954 int type; 2955 2956 vnetp = vresp->vnetp; 2957 type = vresp->type; 2958 rx_grp = &vnetp->rx_grp[0]; 2959 2960 if (type == VIO_NET_RES_LDC_SERVICE) { 2961 /* 2962 * Ring Index 0 is the default ring in the group and is 2963 * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring 2964 * is allocated statically and is reported to the mac layer 2965 * in vnet_m_capab(). So, all we need to do here, is save a 2966 * reference to the associated vresp. 2967 */ 2968 rx_ringp = &rx_grp->rings[0]; 2969 rx_ringp->hw_rh = (mac_ring_handle_t)vresp; 2970 vresp->rx_ringp = (void *)rx_ringp; 2971 return (0); 2972 } 2973 ASSERT(type == VIO_NET_RES_LDC_GUEST); 2974 2975 mac_perim_enter_by_mh(vnetp->mh, &mph1); 2976 2977 rx_ringp = vnet_alloc_pseudo_rx_ring(vnetp); 2978 if (rx_ringp == NULL) { 2979 cmn_err(CE_WARN, "!vnet%d: Failed to allocate pseudo rx ring", 2980 vnetp->instance); 2981 goto fail; 2982 } 2983 2984 /* Store the LDC resource itself as the ring handle */ 2985 rx_ringp->hw_rh = (mac_ring_handle_t)vresp; 2986 2987 /* 2988 * Save a reference to the ring in the resource for lookup during 2989 * unbind. Note this is only done for LDC resources. We don't need this 2990 * in the case of a Hybrid resource (see vnet_bind_hwrings()), as its 2991 * rx rings are mapped to reserved pseudo rx rings (index 1 and 2). 2992 */ 2993 vresp->rx_ringp = (void *)rx_ringp; 2994 rx_ringp->state |= VNET_RXRING_LDC_GUEST; 2995 2996 /* Register the pseudo ring with upper-mac */ 2997 rv = mac_group_add_ring(rx_grp->handle, rx_ringp->index); 2998 if (rv != 0) { 2999 rx_ringp->state &= ~VNET_RXRING_LDC_GUEST; 3000 rx_ringp->hw_rh = NULL; 3001 vnet_free_pseudo_rx_ring(vnetp, rx_ringp); 3002 goto fail; 3003 } 3004 3005 mac_perim_exit(mph1); 3006 return (0); 3007 fail: 3008 mac_perim_exit(mph1); 3009 return (1); 3010 } 3011 3012 /* Unbind pseudo ring from a LDC resource */ 3013 static void 3014 vnet_unbind_vgenring(vnet_res_t *vresp) 3015 { 3016 vnet_t *vnetp; 3017 vnet_pseudo_rx_group_t *rx_grp; 3018 vnet_pseudo_rx_ring_t *rx_ringp; 3019 mac_perim_handle_t mph1; 3020 int type; 3021 3022 vnetp = vresp->vnetp; 3023 type = vresp->type; 3024 rx_grp = &vnetp->rx_grp[0]; 3025 3026 if (vresp->rx_ringp == NULL) { 3027 return; 3028 } 3029 3030 if (type == VIO_NET_RES_LDC_SERVICE) { 3031 /* 3032 * Ring Index 0 is the default ring in the group and is 3033 * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring 3034 * is allocated statically and is reported to the mac layer 3035 * in vnet_m_capab(). So, all we need to do here, is remove its 3036 * reference to the associated vresp. 3037 */ 3038 rx_ringp = &rx_grp->rings[0]; 3039 rx_ringp->hw_rh = NULL; 3040 vresp->rx_ringp = NULL; 3041 return; 3042 } 3043 ASSERT(type == VIO_NET_RES_LDC_GUEST); 3044 3045 mac_perim_enter_by_mh(vnetp->mh, &mph1); 3046 3047 rx_ringp = (vnet_pseudo_rx_ring_t *)vresp->rx_ringp; 3048 vresp->rx_ringp = NULL; 3049 3050 if (rx_ringp != NULL && (rx_ringp->state & VNET_RXRING_LDC_GUEST)) { 3051 /* Unregister the pseudo ring with upper-mac */ 3052 mac_group_rem_ring(rx_grp->handle, rx_ringp->handle); 3053 3054 rx_ringp->hw_rh = NULL; 3055 rx_ringp->state &= ~VNET_RXRING_LDC_GUEST; 3056 3057 /* Free the pseudo rx ring */ 3058 vnet_free_pseudo_rx_ring(vnetp, rx_ringp); 3059 } 3060 3061 mac_perim_exit(mph1); 3062 } 3063 3064 static void 3065 vnet_unbind_rings(vnet_res_t *vresp) 3066 { 3067 switch (vresp->type) { 3068 3069 case VIO_NET_RES_LDC_SERVICE: 3070 case VIO_NET_RES_LDC_GUEST: 3071 vnet_unbind_vgenring(vresp); 3072 break; 3073 3074 case VIO_NET_RES_HYBRID: 3075 vnet_unbind_hwrings(vresp->vnetp); 3076 break; 3077 3078 default: 3079 break; 3080 3081 } 3082 } 3083 3084 static int 3085 vnet_bind_rings(vnet_res_t *vresp) 3086 { 3087 int rv; 3088 3089 switch (vresp->type) { 3090 3091 case VIO_NET_RES_LDC_SERVICE: 3092 case VIO_NET_RES_LDC_GUEST: 3093 rv = vnet_bind_vgenring(vresp); 3094 break; 3095 3096 case VIO_NET_RES_HYBRID: 3097 rv = vnet_bind_hwrings(vresp->vnetp); 3098 break; 3099 3100 default: 3101 rv = 1; 3102 break; 3103 3104 } 3105 3106 return (rv); 3107 } 3108 3109 /* ARGSUSED */ 3110 int 3111 vnet_hio_stat(void *arg, uint_t stat, uint64_t *val) 3112 { 3113 vnet_t *vnetp = (vnet_t *)arg; 3114 3115 *val = mac_stat_get(vnetp->hio_mh, stat); 3116 return (0); 3117 } 3118 3119 /* 3120 * The start() and stop() routines for the Hybrid resource below, are just 3121 * dummy functions. This is provided to avoid resource type specific code in 3122 * vnet_start_resources() and vnet_stop_resources(). The starting and stopping 3123 * of the Hybrid resource happens in the context of the mac_client interfaces 3124 * that are invoked in vnet_hio_mac_init() and vnet_hio_mac_cleanup(). 3125 */ 3126 /* ARGSUSED */ 3127 static int 3128 vnet_hio_start(void *arg) 3129 { 3130 return (0); 3131 } 3132 3133 /* ARGSUSED */ 3134 static void 3135 vnet_hio_stop(void *arg) 3136 { 3137 } 3138 3139 mblk_t * 3140 vnet_hio_tx(void *arg, mblk_t *mp) 3141 { 3142 vnet_pseudo_tx_ring_t *tx_ringp; 3143 mblk_t *nextp; 3144 mblk_t *ret_mp; 3145 3146 tx_ringp = (vnet_pseudo_tx_ring_t *)arg; 3147 for (;;) { 3148 nextp = mp->b_next; 3149 mp->b_next = NULL; 3150 3151 ret_mp = mac_hwring_tx(tx_ringp->hw_rh, mp); 3152 if (ret_mp != NULL) { 3153 ret_mp->b_next = nextp; 3154 mp = ret_mp; 3155 break; 3156 } 3157 3158 if ((mp = nextp) == NULL) 3159 break; 3160 } 3161 return (mp); 3162 } 3163 3164 #ifdef VNET_IOC_DEBUG 3165 3166 /* 3167 * The ioctl entry point is used only for debugging for now. The ioctl commands 3168 * can be used to force the link state of the channel connected to vsw. 3169 */ 3170 static void 3171 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp) 3172 { 3173 struct iocblk *iocp; 3174 vnet_t *vnetp; 3175 3176 iocp = (struct iocblk *)(uintptr_t)mp->b_rptr; 3177 iocp->ioc_error = 0; 3178 vnetp = (vnet_t *)arg; 3179 3180 if (vnetp == NULL) { 3181 miocnak(q, mp, 0, EINVAL); 3182 return; 3183 } 3184 3185 switch (iocp->ioc_cmd) { 3186 3187 case VNET_FORCE_LINK_DOWN: 3188 case VNET_FORCE_LINK_UP: 3189 vnet_force_link_state(vnetp, q, mp); 3190 break; 3191 3192 default: 3193 iocp->ioc_error = EINVAL; 3194 miocnak(q, mp, 0, iocp->ioc_error); 3195 break; 3196 3197 } 3198 } 3199 3200 static void 3201 vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp) 3202 { 3203 mac_register_t *macp; 3204 mac_callbacks_t *cbp; 3205 vnet_res_t *vresp; 3206 3207 READ_ENTER(&vnetp->vsw_fp_rw); 3208 3209 vresp = vnetp->vsw_fp; 3210 if (vresp == NULL) { 3211 RW_EXIT(&vnetp->vsw_fp_rw); 3212 return; 3213 } 3214 3215 macp = &vresp->macreg; 3216 cbp = macp->m_callbacks; 3217 cbp->mc_ioctl(macp->m_driver, q, mp); 3218 3219 RW_EXIT(&vnetp->vsw_fp_rw); 3220 } 3221 3222 #else 3223 3224 static void 3225 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp) 3226 { 3227 vnet_t *vnetp; 3228 3229 vnetp = (vnet_t *)arg; 3230 3231 if (vnetp == NULL) { 3232 miocnak(q, mp, 0, EINVAL); 3233 return; 3234 } 3235 3236 /* ioctl support only for debugging */ 3237 miocnak(q, mp, 0, ENOTSUP); 3238 } 3239 3240 #endif 3241