1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/errno.h> 29 #include <sys/param.h> 30 #include <sys/callb.h> 31 #include <sys/stream.h> 32 #include <sys/kmem.h> 33 #include <sys/conf.h> 34 #include <sys/devops.h> 35 #include <sys/ksynch.h> 36 #include <sys/stat.h> 37 #include <sys/modctl.h> 38 #include <sys/modhash.h> 39 #include <sys/debug.h> 40 #include <sys/ethernet.h> 41 #include <sys/dlpi.h> 42 #include <net/if.h> 43 #include <sys/mac_provider.h> 44 #include <sys/mac_client.h> 45 #include <sys/mac_client_priv.h> 46 #include <sys/mac_ether.h> 47 #include <sys/ddi.h> 48 #include <sys/sunddi.h> 49 #include <sys/strsun.h> 50 #include <sys/note.h> 51 #include <sys/atomic.h> 52 #include <sys/vnet.h> 53 #include <sys/vlan.h> 54 #include <sys/vnet_mailbox.h> 55 #include <sys/vnet_common.h> 56 #include <sys/dds.h> 57 #include <sys/strsubr.h> 58 #include <sys/taskq.h> 59 60 /* 61 * Function prototypes. 62 */ 63 64 /* DDI entrypoints */ 65 static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 66 static int vnetattach(dev_info_t *, ddi_attach_cmd_t); 67 static int vnetdetach(dev_info_t *, ddi_detach_cmd_t); 68 69 /* MAC entrypoints */ 70 static int vnet_m_stat(void *, uint_t, uint64_t *); 71 static int vnet_m_start(void *); 72 static void vnet_m_stop(void *); 73 static int vnet_m_promisc(void *, boolean_t); 74 static int vnet_m_multicst(void *, boolean_t, const uint8_t *); 75 static int vnet_m_unicst(void *, const uint8_t *); 76 mblk_t *vnet_m_tx(void *, mblk_t *); 77 static void vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp); 78 #ifdef VNET_IOC_DEBUG 79 static void vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp); 80 #endif 81 static boolean_t vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data); 82 static void vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index, 83 const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle); 84 static void vnet_get_group(void *arg, mac_ring_type_t type, const int index, 85 mac_group_info_t *infop, mac_group_handle_t handle); 86 static int vnet_rx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num); 87 static void vnet_rx_ring_stop(mac_ring_driver_t rdriver); 88 static int vnet_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, 89 uint64_t *val); 90 static int vnet_tx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num); 91 static void vnet_tx_ring_stop(mac_ring_driver_t rdriver); 92 static int vnet_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, 93 uint64_t *val); 94 static int vnet_ring_enable_intr(void *arg); 95 static int vnet_ring_disable_intr(void *arg); 96 static mblk_t *vnet_rx_poll(void *arg, int bytes_to_pickup); 97 static int vnet_addmac(void *arg, const uint8_t *mac_addr); 98 static int vnet_remmac(void *arg, const uint8_t *mac_addr); 99 100 /* vnet internal functions */ 101 static int vnet_unattach(vnet_t *vnetp); 102 static void vnet_ring_grp_init(vnet_t *vnetp); 103 static void vnet_ring_grp_uninit(vnet_t *vnetp); 104 static int vnet_mac_register(vnet_t *); 105 static int vnet_read_mac_address(vnet_t *vnetp); 106 static int vnet_bind_vgenring(vnet_res_t *vresp); 107 static void vnet_unbind_vgenring(vnet_res_t *vresp); 108 static int vnet_bind_hwrings(vnet_t *vnetp); 109 static void vnet_unbind_hwrings(vnet_t *vnetp); 110 static int vnet_bind_rings(vnet_res_t *vresp); 111 static void vnet_unbind_rings(vnet_res_t *vresp); 112 static int vnet_hio_stat(void *, uint_t, uint64_t *); 113 static int vnet_hio_start(void *); 114 static void vnet_hio_stop(void *); 115 mblk_t *vnet_hio_tx(void *, mblk_t *); 116 117 /* Forwarding database (FDB) routines */ 118 static void vnet_fdb_create(vnet_t *vnetp); 119 static void vnet_fdb_destroy(vnet_t *vnetp); 120 static vnet_res_t *vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp); 121 static void vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val); 122 void vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp); 123 static void vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp); 124 125 static void vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp); 126 static void vnet_rx(vio_net_handle_t vrh, mblk_t *mp); 127 static void vnet_tx_update(vio_net_handle_t vrh); 128 static void vnet_res_start_task(void *arg); 129 static void vnet_start_resources(vnet_t *vnetp); 130 static void vnet_stop_resources(vnet_t *vnetp); 131 static void vnet_dispatch_res_task(vnet_t *vnetp); 132 static void vnet_res_start_task(void *arg); 133 static void vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err); 134 static void vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp); 135 static vnet_res_t *vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp); 136 static void vnet_tx_notify_thread(void *); 137 138 /* Exported to vnet_gen */ 139 int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu); 140 void vnet_link_update(vnet_t *vnetp, link_state_t link_state); 141 void vnet_dds_cleanup_hio(vnet_t *vnetp); 142 143 static kstat_t *vnet_hio_setup_kstats(char *ks_mod, char *ks_name, 144 vnet_res_t *vresp); 145 static int vnet_hio_update_kstats(kstat_t *ksp, int rw); 146 static void vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp); 147 static void vnet_hio_destroy_kstats(kstat_t *ksp); 148 149 /* Exported to to vnet_dds */ 150 int vnet_send_dds_msg(vnet_t *vnetp, void *dmsg); 151 int vnet_hio_mac_init(vnet_t *vnetp, char *ifname); 152 void vnet_hio_mac_cleanup(vnet_t *vnetp); 153 154 /* Externs that are imported from vnet_gen */ 155 extern int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip, 156 const uint8_t *macaddr, void **vgenhdl); 157 extern int vgen_init_mdeg(void *arg); 158 extern void vgen_uninit(void *arg); 159 extern int vgen_dds_tx(void *arg, void *dmsg); 160 extern void vgen_mod_init(void); 161 extern int vgen_mod_cleanup(void); 162 extern void vgen_mod_fini(void); 163 extern int vgen_enable_intr(void *arg); 164 extern int vgen_disable_intr(void *arg); 165 extern mblk_t *vgen_poll(void *arg, int bytes_to_pickup); 166 167 /* Externs that are imported from vnet_dds */ 168 extern void vdds_mod_init(void); 169 extern void vdds_mod_fini(void); 170 extern int vdds_init(vnet_t *vnetp); 171 extern void vdds_cleanup(vnet_t *vnetp); 172 extern void vdds_process_dds_msg(vnet_t *vnetp, vio_dds_msg_t *dmsg); 173 extern void vdds_cleanup_hybrid_res(void *arg); 174 extern void vdds_cleanup_hio(vnet_t *vnetp); 175 176 extern pri_t minclsyspri; 177 178 #define DRV_NAME "vnet" 179 #define VNET_FDBE_REFHOLD(p) \ 180 { \ 181 atomic_inc_32(&(p)->refcnt); \ 182 ASSERT((p)->refcnt != 0); \ 183 } 184 185 #define VNET_FDBE_REFRELE(p) \ 186 { \ 187 ASSERT((p)->refcnt != 0); \ 188 atomic_dec_32(&(p)->refcnt); \ 189 } 190 191 #ifdef VNET_IOC_DEBUG 192 #define VNET_M_CALLBACK_FLAGS (MC_IOCTL | MC_GETCAPAB) 193 #else 194 #define VNET_M_CALLBACK_FLAGS (MC_GETCAPAB) 195 #endif 196 197 static mac_callbacks_t vnet_m_callbacks = { 198 VNET_M_CALLBACK_FLAGS, 199 vnet_m_stat, 200 vnet_m_start, 201 vnet_m_stop, 202 vnet_m_promisc, 203 vnet_m_multicst, 204 NULL, /* m_unicst entry must be NULL while rx rings are exposed */ 205 NULL, /* m_tx entry must be NULL while tx rings are exposed */ 206 NULL, 207 vnet_m_ioctl, 208 vnet_m_capab, 209 NULL 210 }; 211 212 static mac_callbacks_t vnet_hio_res_callbacks = { 213 0, 214 vnet_hio_stat, 215 vnet_hio_start, 216 vnet_hio_stop, 217 NULL, 218 NULL, 219 NULL, 220 vnet_hio_tx, 221 NULL, 222 NULL, 223 NULL 224 }; 225 226 /* 227 * Linked list of "vnet_t" structures - one per instance. 228 */ 229 static vnet_t *vnet_headp = NULL; 230 static krwlock_t vnet_rw; 231 232 /* Tunables */ 233 uint32_t vnet_ntxds = VNET_NTXDS; /* power of 2 transmit descriptors */ 234 uint32_t vnet_ldcwd_interval = VNET_LDCWD_INTERVAL; /* watchdog freq in msec */ 235 uint32_t vnet_ldcwd_txtimeout = VNET_LDCWD_TXTIMEOUT; /* tx timeout in msec */ 236 uint32_t vnet_ldc_mtu = VNET_LDC_MTU; /* ldc mtu */ 237 238 /* Configure tx serialization in mac layer for the vnet device */ 239 boolean_t vnet_mac_tx_serialize = B_TRUE; 240 /* Configure enqueing at Rx soft rings in mac layer for the vnet device */ 241 boolean_t vnet_mac_rx_queuing = B_TRUE; 242 243 /* 244 * Set this to non-zero to enable additional internal receive buffer pools 245 * based on the MTU of the device for better performance at the cost of more 246 * memory consumption. This is turned off by default, to use allocb(9F) for 247 * receive buffer allocations of sizes > 2K. 248 */ 249 boolean_t vnet_jumbo_rxpools = B_FALSE; 250 251 /* # of chains in fdb hash table */ 252 uint32_t vnet_fdb_nchains = VNET_NFDB_HASH; 253 254 /* Internal tunables */ 255 uint32_t vnet_ethermtu = 1500; /* mtu of the device */ 256 257 /* 258 * Default vlan id. This is only used internally when the "default-vlan-id" 259 * property is not present in the MD device node. Therefore, this should not be 260 * used as a tunable; if this value is changed, the corresponding variable 261 * should be updated to the same value in vsw and also other vnets connected to 262 * the same vsw. 263 */ 264 uint16_t vnet_default_vlan_id = 1; 265 266 /* delay in usec to wait for all references on a fdb entry to be dropped */ 267 uint32_t vnet_fdbe_refcnt_delay = 10; 268 269 static struct ether_addr etherbroadcastaddr = { 270 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 271 }; 272 273 /* mac_open() retry delay in usec */ 274 uint32_t vnet_mac_open_delay = 100; /* 0.1 ms */ 275 276 /* max # of mac_open() retries */ 277 uint32_t vnet_mac_open_retries = 100; 278 279 /* 280 * Property names 281 */ 282 static char macaddr_propname[] = "local-mac-address"; 283 284 /* 285 * This is the string displayed by modinfo(1m). 286 */ 287 static char vnet_ident[] = "vnet driver"; 288 extern struct mod_ops mod_driverops; 289 static struct cb_ops cb_vnetops = { 290 nulldev, /* cb_open */ 291 nulldev, /* cb_close */ 292 nodev, /* cb_strategy */ 293 nodev, /* cb_print */ 294 nodev, /* cb_dump */ 295 nodev, /* cb_read */ 296 nodev, /* cb_write */ 297 nodev, /* cb_ioctl */ 298 nodev, /* cb_devmap */ 299 nodev, /* cb_mmap */ 300 nodev, /* cb_segmap */ 301 nochpoll, /* cb_chpoll */ 302 ddi_prop_op, /* cb_prop_op */ 303 NULL, /* cb_stream */ 304 (int)(D_MP) /* cb_flag */ 305 }; 306 307 static struct dev_ops vnetops = { 308 DEVO_REV, /* devo_rev */ 309 0, /* devo_refcnt */ 310 NULL, /* devo_getinfo */ 311 nulldev, /* devo_identify */ 312 nulldev, /* devo_probe */ 313 vnetattach, /* devo_attach */ 314 vnetdetach, /* devo_detach */ 315 nodev, /* devo_reset */ 316 &cb_vnetops, /* devo_cb_ops */ 317 (struct bus_ops *)NULL, /* devo_bus_ops */ 318 NULL, /* devo_power */ 319 ddi_quiesce_not_supported, /* devo_quiesce */ 320 }; 321 322 static struct modldrv modldrv = { 323 &mod_driverops, /* Type of module. This one is a driver */ 324 vnet_ident, /* ID string */ 325 &vnetops /* driver specific ops */ 326 }; 327 328 static struct modlinkage modlinkage = { 329 MODREV_1, (void *)&modldrv, NULL 330 }; 331 332 #ifdef DEBUG 333 334 /* 335 * Print debug messages - set to 0xf to enable all msgs 336 */ 337 int vnet_dbglevel = 0x8; 338 339 static void 340 debug_printf(const char *fname, void *arg, const char *fmt, ...) 341 { 342 char buf[512]; 343 va_list ap; 344 vnet_t *vnetp = (vnet_t *)arg; 345 char *bufp = buf; 346 347 if (vnetp == NULL) { 348 (void) sprintf(bufp, "%s: ", fname); 349 bufp += strlen(bufp); 350 } else { 351 (void) sprintf(bufp, "vnet%d:%s: ", vnetp->instance, fname); 352 bufp += strlen(bufp); 353 } 354 va_start(ap, fmt); 355 (void) vsprintf(bufp, fmt, ap); 356 va_end(ap); 357 cmn_err(CE_CONT, "%s\n", buf); 358 } 359 360 #endif 361 362 /* _init(9E): initialize the loadable module */ 363 int 364 _init(void) 365 { 366 int status; 367 368 DBG1(NULL, "enter\n"); 369 370 mac_init_ops(&vnetops, "vnet"); 371 status = mod_install(&modlinkage); 372 if (status != 0) { 373 mac_fini_ops(&vnetops); 374 } 375 vdds_mod_init(); 376 vgen_mod_init(); 377 DBG1(NULL, "exit(%d)\n", status); 378 return (status); 379 } 380 381 /* _fini(9E): prepare the module for unloading. */ 382 int 383 _fini(void) 384 { 385 int status; 386 387 DBG1(NULL, "enter\n"); 388 389 status = vgen_mod_cleanup(); 390 if (status != 0) 391 return (status); 392 393 status = mod_remove(&modlinkage); 394 if (status != 0) 395 return (status); 396 mac_fini_ops(&vnetops); 397 vgen_mod_fini(); 398 vdds_mod_fini(); 399 400 DBG1(NULL, "exit(%d)\n", status); 401 return (status); 402 } 403 404 /* _info(9E): return information about the loadable module */ 405 int 406 _info(struct modinfo *modinfop) 407 { 408 return (mod_info(&modlinkage, modinfop)); 409 } 410 411 /* 412 * attach(9E): attach a device to the system. 413 * called once for each instance of the device on the system. 414 */ 415 static int 416 vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd) 417 { 418 vnet_t *vnetp; 419 int status; 420 int instance; 421 uint64_t reg; 422 char qname[TASKQ_NAMELEN]; 423 vnet_attach_progress_t attach_progress; 424 425 attach_progress = AST_init; 426 427 switch (cmd) { 428 case DDI_ATTACH: 429 break; 430 case DDI_RESUME: 431 case DDI_PM_RESUME: 432 default: 433 goto vnet_attach_fail; 434 } 435 436 instance = ddi_get_instance(dip); 437 DBG1(NULL, "instance(%d) enter\n", instance); 438 439 /* allocate vnet_t and mac_t structures */ 440 vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP); 441 vnetp->dip = dip; 442 vnetp->instance = instance; 443 rw_init(&vnetp->vrwlock, NULL, RW_DRIVER, NULL); 444 rw_init(&vnetp->vsw_fp_rw, NULL, RW_DRIVER, NULL); 445 attach_progress |= AST_vnet_alloc; 446 447 vnet_ring_grp_init(vnetp); 448 attach_progress |= AST_ring_init; 449 450 status = vdds_init(vnetp); 451 if (status != 0) { 452 goto vnet_attach_fail; 453 } 454 attach_progress |= AST_vdds_init; 455 456 /* setup links to vnet_t from both devinfo and mac_t */ 457 ddi_set_driver_private(dip, (caddr_t)vnetp); 458 459 /* read the mac address */ 460 status = vnet_read_mac_address(vnetp); 461 if (status != DDI_SUCCESS) { 462 goto vnet_attach_fail; 463 } 464 attach_progress |= AST_read_macaddr; 465 466 reg = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 467 DDI_PROP_DONTPASS, "reg", -1); 468 if (reg == -1) { 469 goto vnet_attach_fail; 470 } 471 vnetp->reg = reg; 472 473 vnet_fdb_create(vnetp); 474 attach_progress |= AST_fdbh_alloc; 475 476 (void) snprintf(qname, TASKQ_NAMELEN, "vnet_taskq%d", instance); 477 if ((vnetp->taskqp = ddi_taskq_create(dip, qname, 1, 478 TASKQ_DEFAULTPRI, 0)) == NULL) { 479 cmn_err(CE_WARN, "!vnet%d: Unable to create task queue", 480 instance); 481 goto vnet_attach_fail; 482 } 483 attach_progress |= AST_taskq_create; 484 485 /* add to the list of vnet devices */ 486 WRITE_ENTER(&vnet_rw); 487 vnetp->nextp = vnet_headp; 488 vnet_headp = vnetp; 489 RW_EXIT(&vnet_rw); 490 491 attach_progress |= AST_vnet_list; 492 493 /* 494 * Initialize the generic vnet plugin which provides communication via 495 * sun4v LDC (logical domain channel) based resources. This involves 2 496 * steps; first, vgen_init() is invoked to read the various properties 497 * of the vnet device from its MD node (including its mtu which is 498 * needed to mac_register()) and obtain a handle to the vgen layer. 499 * After mac_register() is done and we have a mac handle, we then 500 * invoke vgen_init_mdeg() which registers with the the MD event 501 * generator (mdeg) framework to allow LDC resource notifications. 502 * Note: this sequence also allows us to report the correct default # 503 * of pseudo rings (2TX and 3RX) in vnet_m_capab() which gets invoked 504 * in the context of mac_register(); and avoids conflicting with 505 * dynamic pseudo rx rings which get added/removed as a result of mdeg 506 * events in vgen. 507 */ 508 status = vgen_init(vnetp, reg, vnetp->dip, 509 (uint8_t *)vnetp->curr_macaddr, &vnetp->vgenhdl); 510 if (status != DDI_SUCCESS) { 511 DERR(vnetp, "vgen_init() failed\n"); 512 goto vnet_attach_fail; 513 } 514 attach_progress |= AST_vgen_init; 515 516 status = vnet_mac_register(vnetp); 517 if (status != DDI_SUCCESS) { 518 goto vnet_attach_fail; 519 } 520 vnetp->link_state = LINK_STATE_UNKNOWN; 521 attach_progress |= AST_macreg; 522 523 status = vgen_init_mdeg(vnetp->vgenhdl); 524 if (status != DDI_SUCCESS) { 525 goto vnet_attach_fail; 526 } 527 attach_progress |= AST_init_mdeg; 528 529 vnetp->attach_progress = attach_progress; 530 531 DBG1(NULL, "instance(%d) exit\n", instance); 532 return (DDI_SUCCESS); 533 534 vnet_attach_fail: 535 vnetp->attach_progress = attach_progress; 536 status = vnet_unattach(vnetp); 537 ASSERT(status == 0); 538 return (DDI_FAILURE); 539 } 540 541 /* 542 * detach(9E): detach a device from the system. 543 */ 544 static int 545 vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd) 546 { 547 vnet_t *vnetp; 548 int instance; 549 550 instance = ddi_get_instance(dip); 551 DBG1(NULL, "instance(%d) enter\n", instance); 552 553 vnetp = ddi_get_driver_private(dip); 554 if (vnetp == NULL) { 555 goto vnet_detach_fail; 556 } 557 558 switch (cmd) { 559 case DDI_DETACH: 560 break; 561 case DDI_SUSPEND: 562 case DDI_PM_SUSPEND: 563 default: 564 goto vnet_detach_fail; 565 } 566 567 if (vnet_unattach(vnetp) != 0) { 568 goto vnet_detach_fail; 569 } 570 571 return (DDI_SUCCESS); 572 573 vnet_detach_fail: 574 return (DDI_FAILURE); 575 } 576 577 /* 578 * Common routine to handle vnetattach() failure and vnetdetach(). Note that 579 * the only reason this function could fail is if mac_unregister() fails. 580 * Otherwise, this function must ensure that all resources are freed and return 581 * success. 582 */ 583 static int 584 vnet_unattach(vnet_t *vnetp) 585 { 586 vnet_attach_progress_t attach_progress; 587 588 attach_progress = vnetp->attach_progress; 589 590 /* 591 * Disable the mac device in the gldv3 subsystem. This can fail, in 592 * particular if there are still any open references to this mac 593 * device; in which case we just return failure without continuing to 594 * detach further. 595 * If it succeeds, we then invoke vgen_uninit() which should unregister 596 * any pseudo rings registered with the mac layer. Note we keep the 597 * AST_macreg flag on, so we can unregister with the mac layer at 598 * the end of this routine. 599 */ 600 if (attach_progress & AST_macreg) { 601 if (mac_disable(vnetp->mh) != 0) { 602 return (1); 603 } 604 } 605 606 /* 607 * Now that we have disabled the device, we must finish all other steps 608 * and successfully return from this function; otherwise we will end up 609 * leaving the device in a broken/unusable state. 610 * 611 * First, release any hybrid resources assigned to this vnet device. 612 */ 613 if (attach_progress & AST_vdds_init) { 614 vdds_cleanup(vnetp); 615 attach_progress &= ~AST_vdds_init; 616 } 617 618 /* 619 * Uninit vgen. This stops further mdeg callbacks to this vnet 620 * device and/or its ports; and detaches any existing ports. 621 */ 622 if (attach_progress & (AST_vgen_init|AST_init_mdeg)) { 623 vgen_uninit(vnetp->vgenhdl); 624 attach_progress &= ~AST_vgen_init; 625 attach_progress &= ~AST_init_mdeg; 626 } 627 628 /* Destroy the taskq. */ 629 if (attach_progress & AST_taskq_create) { 630 ddi_taskq_destroy(vnetp->taskqp); 631 attach_progress &= ~AST_taskq_create; 632 } 633 634 /* Destroy fdb. */ 635 if (attach_progress & AST_fdbh_alloc) { 636 vnet_fdb_destroy(vnetp); 637 attach_progress &= ~AST_fdbh_alloc; 638 } 639 640 /* Remove from the device list */ 641 if (attach_progress & AST_vnet_list) { 642 vnet_t **vnetpp; 643 /* unlink from instance(vnet_t) list */ 644 WRITE_ENTER(&vnet_rw); 645 for (vnetpp = &vnet_headp; *vnetpp; 646 vnetpp = &(*vnetpp)->nextp) { 647 if (*vnetpp == vnetp) { 648 *vnetpp = vnetp->nextp; 649 break; 650 } 651 } 652 RW_EXIT(&vnet_rw); 653 attach_progress &= ~AST_vnet_list; 654 } 655 656 if (attach_progress & AST_ring_init) { 657 vnet_ring_grp_uninit(vnetp); 658 attach_progress &= ~AST_ring_init; 659 } 660 661 if (attach_progress & AST_macreg) { 662 VERIFY(mac_unregister(vnetp->mh) == 0); 663 vnetp->mh = NULL; 664 attach_progress &= ~AST_macreg; 665 } 666 667 if (attach_progress & AST_vnet_alloc) { 668 rw_destroy(&vnetp->vrwlock); 669 rw_destroy(&vnetp->vsw_fp_rw); 670 attach_progress &= ~AST_vnet_list; 671 KMEM_FREE(vnetp); 672 } 673 674 return (0); 675 } 676 677 /* enable the device for transmit/receive */ 678 static int 679 vnet_m_start(void *arg) 680 { 681 vnet_t *vnetp = arg; 682 683 DBG1(vnetp, "enter\n"); 684 685 WRITE_ENTER(&vnetp->vrwlock); 686 vnetp->flags |= VNET_STARTED; 687 vnet_start_resources(vnetp); 688 RW_EXIT(&vnetp->vrwlock); 689 690 DBG1(vnetp, "exit\n"); 691 return (VNET_SUCCESS); 692 693 } 694 695 /* stop transmit/receive for the device */ 696 static void 697 vnet_m_stop(void *arg) 698 { 699 vnet_t *vnetp = arg; 700 701 DBG1(vnetp, "enter\n"); 702 703 WRITE_ENTER(&vnetp->vrwlock); 704 if (vnetp->flags & VNET_STARTED) { 705 /* 706 * Set the flags appropriately; this should prevent starting of 707 * any new resources that are added(see vnet_res_start_task()), 708 * while we release the vrwlock in vnet_stop_resources() before 709 * stopping each resource. 710 */ 711 vnetp->flags &= ~VNET_STARTED; 712 vnetp->flags |= VNET_STOPPING; 713 vnet_stop_resources(vnetp); 714 vnetp->flags &= ~VNET_STOPPING; 715 } 716 RW_EXIT(&vnetp->vrwlock); 717 718 DBG1(vnetp, "exit\n"); 719 } 720 721 /* set the unicast mac address of the device */ 722 static int 723 vnet_m_unicst(void *arg, const uint8_t *macaddr) 724 { 725 _NOTE(ARGUNUSED(macaddr)) 726 727 vnet_t *vnetp = arg; 728 729 DBG1(vnetp, "enter\n"); 730 /* 731 * NOTE: setting mac address dynamically is not supported. 732 */ 733 DBG1(vnetp, "exit\n"); 734 735 return (VNET_FAILURE); 736 } 737 738 /* enable/disable a multicast address */ 739 static int 740 vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca) 741 { 742 _NOTE(ARGUNUSED(add, mca)) 743 744 vnet_t *vnetp = arg; 745 vnet_res_t *vresp; 746 mac_register_t *macp; 747 mac_callbacks_t *cbp; 748 int rv = VNET_SUCCESS; 749 750 DBG1(vnetp, "enter\n"); 751 752 READ_ENTER(&vnetp->vsw_fp_rw); 753 if (vnetp->vsw_fp == NULL) { 754 RW_EXIT(&vnetp->vsw_fp_rw); 755 return (EAGAIN); 756 } 757 VNET_FDBE_REFHOLD(vnetp->vsw_fp); 758 RW_EXIT(&vnetp->vsw_fp_rw); 759 760 vresp = vnetp->vsw_fp; 761 macp = &vresp->macreg; 762 cbp = macp->m_callbacks; 763 rv = cbp->mc_multicst(macp->m_driver, add, mca); 764 765 VNET_FDBE_REFRELE(vnetp->vsw_fp); 766 767 DBG1(vnetp, "exit(%d)\n", rv); 768 return (rv); 769 } 770 771 /* set or clear promiscuous mode on the device */ 772 static int 773 vnet_m_promisc(void *arg, boolean_t on) 774 { 775 _NOTE(ARGUNUSED(on)) 776 777 vnet_t *vnetp = arg; 778 DBG1(vnetp, "enter\n"); 779 /* 780 * NOTE: setting promiscuous mode is not supported, just return success. 781 */ 782 DBG1(vnetp, "exit\n"); 783 return (VNET_SUCCESS); 784 } 785 786 /* 787 * Transmit a chain of packets. This function provides switching functionality 788 * based on the destination mac address to reach other guests (within ldoms) or 789 * external hosts. 790 */ 791 mblk_t * 792 vnet_tx_ring_send(void *arg, mblk_t *mp) 793 { 794 vnet_pseudo_tx_ring_t *tx_ringp; 795 vnet_tx_ring_stats_t *statsp; 796 vnet_t *vnetp; 797 vnet_res_t *vresp; 798 mblk_t *next; 799 mblk_t *resid_mp; 800 mac_register_t *macp; 801 struct ether_header *ehp; 802 boolean_t is_unicast; 803 boolean_t is_pvid; /* non-default pvid ? */ 804 boolean_t hres; /* Hybrid resource ? */ 805 void *tx_arg; 806 size_t size; 807 808 tx_ringp = (vnet_pseudo_tx_ring_t *)arg; 809 statsp = &tx_ringp->tx_ring_stats; 810 vnetp = (vnet_t *)tx_ringp->vnetp; 811 DBG1(vnetp, "enter\n"); 812 ASSERT(mp != NULL); 813 814 is_pvid = (vnetp->pvid != vnetp->default_vlan_id) ? B_TRUE : B_FALSE; 815 816 while (mp != NULL) { 817 818 next = mp->b_next; 819 mp->b_next = NULL; 820 821 /* update stats */ 822 size = msgsize(mp); 823 824 /* 825 * Find fdb entry for the destination 826 * and hold a reference to it. 827 */ 828 ehp = (struct ether_header *)mp->b_rptr; 829 vresp = vnet_fdbe_find(vnetp, &ehp->ether_dhost); 830 if (vresp != NULL) { 831 832 /* 833 * Destination found in FDB. 834 * The destination is a vnet device within ldoms 835 * and directly reachable, invoke the tx function 836 * in the fdb entry. 837 */ 838 macp = &vresp->macreg; 839 resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp); 840 841 /* tx done; now release ref on fdb entry */ 842 VNET_FDBE_REFRELE(vresp); 843 844 if (resid_mp != NULL) { 845 /* m_tx failed */ 846 mp->b_next = next; 847 break; 848 } 849 } else { 850 is_unicast = !(IS_BROADCAST(ehp) || 851 (IS_MULTICAST(ehp))); 852 /* 853 * Destination is not in FDB. 854 * If the destination is broadcast or multicast, 855 * then forward the packet to vswitch. 856 * If a Hybrid resource avilable, then send the 857 * unicast packet via hybrid resource, otherwise 858 * forward it to vswitch. 859 */ 860 READ_ENTER(&vnetp->vsw_fp_rw); 861 862 if ((is_unicast) && (vnetp->hio_fp != NULL)) { 863 vresp = vnetp->hio_fp; 864 hres = B_TRUE; 865 } else { 866 vresp = vnetp->vsw_fp; 867 hres = B_FALSE; 868 } 869 if (vresp == NULL) { 870 /* 871 * no fdb entry to vsw? drop the packet. 872 */ 873 RW_EXIT(&vnetp->vsw_fp_rw); 874 freemsg(mp); 875 mp = next; 876 continue; 877 } 878 879 /* ref hold the fdb entry to vsw */ 880 VNET_FDBE_REFHOLD(vresp); 881 882 RW_EXIT(&vnetp->vsw_fp_rw); 883 884 /* 885 * In the case of a hybrid resource we need to insert 886 * the tag for the pvid case here; unlike packets that 887 * are destined to a vnet/vsw in which case the vgen 888 * layer does the tagging before sending it over ldc. 889 */ 890 if (hres == B_TRUE) { 891 /* 892 * Determine if the frame being transmitted 893 * over the hybrid resource is untagged. If so, 894 * insert the tag before transmitting. 895 */ 896 if (is_pvid == B_TRUE && 897 ehp->ether_type != htons(ETHERTYPE_VLAN)) { 898 899 mp = vnet_vlan_insert_tag(mp, 900 vnetp->pvid); 901 if (mp == NULL) { 902 VNET_FDBE_REFRELE(vresp); 903 mp = next; 904 continue; 905 } 906 907 } 908 909 macp = &vresp->macreg; 910 tx_arg = tx_ringp; 911 } else { 912 macp = &vresp->macreg; 913 tx_arg = macp->m_driver; 914 } 915 resid_mp = macp->m_callbacks->mc_tx(tx_arg, mp); 916 917 /* tx done; now release ref on fdb entry */ 918 VNET_FDBE_REFRELE(vresp); 919 920 if (resid_mp != NULL) { 921 /* m_tx failed */ 922 mp->b_next = next; 923 break; 924 } 925 } 926 927 statsp->obytes += size; 928 statsp->opackets++; 929 mp = next; 930 } 931 932 DBG1(vnetp, "exit\n"); 933 return (mp); 934 } 935 936 /* get statistics from the device */ 937 int 938 vnet_m_stat(void *arg, uint_t stat, uint64_t *val) 939 { 940 vnet_t *vnetp = arg; 941 vnet_res_t *vresp; 942 mac_register_t *macp; 943 mac_callbacks_t *cbp; 944 uint64_t val_total = 0; 945 946 DBG1(vnetp, "enter\n"); 947 948 /* 949 * get the specified statistic from each transport and return the 950 * aggregate val. This obviously only works for counters. 951 */ 952 if ((IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat)) || 953 (IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat))) { 954 return (ENOTSUP); 955 } 956 957 READ_ENTER(&vnetp->vrwlock); 958 for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) { 959 macp = &vresp->macreg; 960 cbp = macp->m_callbacks; 961 if (cbp->mc_getstat(macp->m_driver, stat, val) == 0) 962 val_total += *val; 963 } 964 RW_EXIT(&vnetp->vrwlock); 965 966 *val = val_total; 967 968 DBG1(vnetp, "exit\n"); 969 return (0); 970 } 971 972 static void 973 vnet_ring_grp_init(vnet_t *vnetp) 974 { 975 vnet_pseudo_rx_group_t *rx_grp; 976 vnet_pseudo_rx_ring_t *rx_ringp; 977 vnet_pseudo_tx_group_t *tx_grp; 978 vnet_pseudo_tx_ring_t *tx_ringp; 979 int i; 980 981 tx_grp = &vnetp->tx_grp[0]; 982 tx_ringp = kmem_zalloc(sizeof (vnet_pseudo_tx_ring_t) * 983 VNET_NUM_PSEUDO_TXRINGS, KM_SLEEP); 984 for (i = 0; i < VNET_NUM_PSEUDO_TXRINGS; i++) { 985 tx_ringp[i].state |= VNET_TXRING_SHARED; 986 } 987 tx_grp->rings = tx_ringp; 988 tx_grp->ring_cnt = VNET_NUM_PSEUDO_TXRINGS; 989 mutex_init(&tx_grp->flowctl_lock, NULL, MUTEX_DRIVER, NULL); 990 cv_init(&tx_grp->flowctl_cv, NULL, CV_DRIVER, NULL); 991 tx_grp->flowctl_thread = thread_create(NULL, 0, 992 vnet_tx_notify_thread, tx_grp, 0, &p0, TS_RUN, minclsyspri); 993 994 rx_grp = &vnetp->rx_grp[0]; 995 rx_grp->max_ring_cnt = MAX_RINGS_PER_GROUP; 996 rw_init(&rx_grp->lock, NULL, RW_DRIVER, NULL); 997 rx_ringp = kmem_zalloc(sizeof (vnet_pseudo_rx_ring_t) * 998 rx_grp->max_ring_cnt, KM_SLEEP); 999 1000 /* 1001 * Setup the first 3 Pseudo RX Rings that are reserved; 1002 * 1 for LDC resource to vswitch + 2 for RX rings of Hybrid resource. 1003 */ 1004 rx_ringp[0].state |= VNET_RXRING_INUSE|VNET_RXRING_LDC_SERVICE; 1005 rx_ringp[0].index = 0; 1006 rx_ringp[1].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID; 1007 rx_ringp[1].index = 1; 1008 rx_ringp[2].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID; 1009 rx_ringp[2].index = 2; 1010 1011 rx_grp->ring_cnt = VNET_NUM_PSEUDO_RXRINGS_DEFAULT; 1012 rx_grp->rings = rx_ringp; 1013 1014 for (i = VNET_NUM_PSEUDO_RXRINGS_DEFAULT; 1015 i < rx_grp->max_ring_cnt; i++) { 1016 rx_ringp = &rx_grp->rings[i]; 1017 rx_ringp->state = VNET_RXRING_FREE; 1018 rx_ringp->index = i; 1019 } 1020 } 1021 1022 static void 1023 vnet_ring_grp_uninit(vnet_t *vnetp) 1024 { 1025 vnet_pseudo_rx_group_t *rx_grp; 1026 vnet_pseudo_tx_group_t *tx_grp; 1027 kt_did_t tid = 0; 1028 1029 tx_grp = &vnetp->tx_grp[0]; 1030 1031 /* Inform tx_notify_thread to exit */ 1032 mutex_enter(&tx_grp->flowctl_lock); 1033 if (tx_grp->flowctl_thread != NULL) { 1034 tid = tx_grp->flowctl_thread->t_did; 1035 tx_grp->flowctl_done = B_TRUE; 1036 cv_signal(&tx_grp->flowctl_cv); 1037 } 1038 mutex_exit(&tx_grp->flowctl_lock); 1039 if (tid != 0) 1040 thread_join(tid); 1041 1042 if (tx_grp->rings != NULL) { 1043 ASSERT(tx_grp->ring_cnt == VNET_NUM_PSEUDO_TXRINGS); 1044 kmem_free(tx_grp->rings, sizeof (vnet_pseudo_tx_ring_t) * 1045 tx_grp->ring_cnt); 1046 tx_grp->rings = NULL; 1047 } 1048 1049 rx_grp = &vnetp->rx_grp[0]; 1050 if (rx_grp->rings != NULL) { 1051 ASSERT(rx_grp->max_ring_cnt == MAX_RINGS_PER_GROUP); 1052 ASSERT(rx_grp->ring_cnt == VNET_NUM_PSEUDO_RXRINGS_DEFAULT); 1053 kmem_free(rx_grp->rings, sizeof (vnet_pseudo_rx_ring_t) * 1054 rx_grp->max_ring_cnt); 1055 rx_grp->rings = NULL; 1056 } 1057 } 1058 1059 static vnet_pseudo_rx_ring_t * 1060 vnet_alloc_pseudo_rx_ring(vnet_t *vnetp) 1061 { 1062 vnet_pseudo_rx_group_t *rx_grp; 1063 vnet_pseudo_rx_ring_t *rx_ringp; 1064 int index; 1065 1066 rx_grp = &vnetp->rx_grp[0]; 1067 WRITE_ENTER(&rx_grp->lock); 1068 1069 if (rx_grp->ring_cnt == rx_grp->max_ring_cnt) { 1070 /* no rings available */ 1071 RW_EXIT(&rx_grp->lock); 1072 return (NULL); 1073 } 1074 1075 for (index = VNET_NUM_PSEUDO_RXRINGS_DEFAULT; 1076 index < rx_grp->max_ring_cnt; index++) { 1077 rx_ringp = &rx_grp->rings[index]; 1078 if (rx_ringp->state == VNET_RXRING_FREE) { 1079 rx_ringp->state |= VNET_RXRING_INUSE; 1080 rx_grp->ring_cnt++; 1081 break; 1082 } 1083 } 1084 1085 RW_EXIT(&rx_grp->lock); 1086 return (rx_ringp); 1087 } 1088 1089 static void 1090 vnet_free_pseudo_rx_ring(vnet_t *vnetp, vnet_pseudo_rx_ring_t *ringp) 1091 { 1092 vnet_pseudo_rx_group_t *rx_grp; 1093 1094 ASSERT(ringp->index >= VNET_NUM_PSEUDO_RXRINGS_DEFAULT); 1095 rx_grp = &vnetp->rx_grp[0]; 1096 WRITE_ENTER(&rx_grp->lock); 1097 1098 if (ringp->state != VNET_RXRING_FREE) { 1099 ringp->state = VNET_RXRING_FREE; 1100 ringp->handle = NULL; 1101 rx_grp->ring_cnt--; 1102 } 1103 1104 RW_EXIT(&rx_grp->lock); 1105 } 1106 1107 /* wrapper function for mac_register() */ 1108 static int 1109 vnet_mac_register(vnet_t *vnetp) 1110 { 1111 mac_register_t *macp; 1112 int err; 1113 1114 if ((macp = mac_alloc(MAC_VERSION)) == NULL) 1115 return (DDI_FAILURE); 1116 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 1117 macp->m_driver = vnetp; 1118 macp->m_dip = vnetp->dip; 1119 macp->m_src_addr = vnetp->curr_macaddr; 1120 macp->m_callbacks = &vnet_m_callbacks; 1121 macp->m_min_sdu = 0; 1122 macp->m_max_sdu = vnetp->mtu; 1123 macp->m_margin = VLAN_TAGSZ; 1124 1125 macp->m_v12n = MAC_VIRT_LEVEL1; 1126 1127 /* 1128 * Finally, we're ready to register ourselves with the MAC layer 1129 * interface; if this succeeds, we're all ready to start() 1130 */ 1131 err = mac_register(macp, &vnetp->mh); 1132 mac_free(macp); 1133 return (err == 0 ? DDI_SUCCESS : DDI_FAILURE); 1134 } 1135 1136 /* read the mac address of the device */ 1137 static int 1138 vnet_read_mac_address(vnet_t *vnetp) 1139 { 1140 uchar_t *macaddr; 1141 uint32_t size; 1142 int rv; 1143 1144 rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip, 1145 DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size); 1146 if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) { 1147 DWARN(vnetp, "prop_lookup failed(%s) err(%d)\n", 1148 macaddr_propname, rv); 1149 return (DDI_FAILURE); 1150 } 1151 bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL); 1152 bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL); 1153 ddi_prop_free(macaddr); 1154 1155 return (DDI_SUCCESS); 1156 } 1157 1158 static void 1159 vnet_fdb_create(vnet_t *vnetp) 1160 { 1161 char hashname[MAXNAMELEN]; 1162 1163 (void) snprintf(hashname, MAXNAMELEN, "vnet%d-fdbhash", 1164 vnetp->instance); 1165 vnetp->fdb_nchains = vnet_fdb_nchains; 1166 vnetp->fdb_hashp = mod_hash_create_ptrhash(hashname, vnetp->fdb_nchains, 1167 mod_hash_null_valdtor, sizeof (void *)); 1168 } 1169 1170 static void 1171 vnet_fdb_destroy(vnet_t *vnetp) 1172 { 1173 /* destroy fdb-hash-table */ 1174 if (vnetp->fdb_hashp != NULL) { 1175 mod_hash_destroy_hash(vnetp->fdb_hashp); 1176 vnetp->fdb_hashp = NULL; 1177 vnetp->fdb_nchains = 0; 1178 } 1179 } 1180 1181 /* 1182 * Add an entry into the fdb. 1183 */ 1184 void 1185 vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp) 1186 { 1187 uint64_t addr = 0; 1188 int rv; 1189 1190 KEY_HASH(addr, vresp->rem_macaddr); 1191 1192 /* 1193 * If the entry being added corresponds to LDC_SERVICE resource, 1194 * that is, vswitch connection, it is added to the hash and also 1195 * the entry is cached, an additional reference count reflects 1196 * this. The HYBRID resource is not added to the hash, but only 1197 * cached, as it is only used for sending out packets for unknown 1198 * unicast destinations. 1199 */ 1200 (vresp->type == VIO_NET_RES_LDC_SERVICE) ? 1201 (vresp->refcnt = 1) : (vresp->refcnt = 0); 1202 1203 /* 1204 * Note: duplicate keys will be rejected by mod_hash. 1205 */ 1206 if (vresp->type != VIO_NET_RES_HYBRID) { 1207 rv = mod_hash_insert(vnetp->fdb_hashp, (mod_hash_key_t)addr, 1208 (mod_hash_val_t)vresp); 1209 if (rv != 0) { 1210 DWARN(vnetp, "Duplicate macaddr key(%lx)\n", addr); 1211 return; 1212 } 1213 } 1214 1215 if (vresp->type == VIO_NET_RES_LDC_SERVICE) { 1216 /* Cache the fdb entry to vsw-port */ 1217 WRITE_ENTER(&vnetp->vsw_fp_rw); 1218 if (vnetp->vsw_fp == NULL) 1219 vnetp->vsw_fp = vresp; 1220 RW_EXIT(&vnetp->vsw_fp_rw); 1221 } else if (vresp->type == VIO_NET_RES_HYBRID) { 1222 /* Cache the fdb entry to hybrid resource */ 1223 WRITE_ENTER(&vnetp->vsw_fp_rw); 1224 if (vnetp->hio_fp == NULL) 1225 vnetp->hio_fp = vresp; 1226 RW_EXIT(&vnetp->vsw_fp_rw); 1227 } 1228 } 1229 1230 /* 1231 * Remove an entry from fdb. 1232 */ 1233 static void 1234 vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp) 1235 { 1236 uint64_t addr = 0; 1237 int rv; 1238 uint32_t refcnt; 1239 vnet_res_t *tmp; 1240 1241 KEY_HASH(addr, vresp->rem_macaddr); 1242 1243 /* 1244 * Remove the entry from fdb hash table. 1245 * This prevents further references to this fdb entry. 1246 */ 1247 if (vresp->type != VIO_NET_RES_HYBRID) { 1248 rv = mod_hash_remove(vnetp->fdb_hashp, (mod_hash_key_t)addr, 1249 (mod_hash_val_t *)&tmp); 1250 if (rv != 0) { 1251 /* 1252 * As the resources are added to the hash only 1253 * after they are started, this can occur if 1254 * a resource unregisters before it is ever started. 1255 */ 1256 return; 1257 } 1258 } 1259 1260 if (vresp->type == VIO_NET_RES_LDC_SERVICE) { 1261 WRITE_ENTER(&vnetp->vsw_fp_rw); 1262 1263 ASSERT(tmp == vnetp->vsw_fp); 1264 vnetp->vsw_fp = NULL; 1265 1266 RW_EXIT(&vnetp->vsw_fp_rw); 1267 } else if (vresp->type == VIO_NET_RES_HYBRID) { 1268 WRITE_ENTER(&vnetp->vsw_fp_rw); 1269 1270 vnetp->hio_fp = NULL; 1271 1272 RW_EXIT(&vnetp->vsw_fp_rw); 1273 } 1274 1275 /* 1276 * If there are threads already ref holding before the entry was 1277 * removed from hash table, then wait for ref count to drop to zero. 1278 */ 1279 (vresp->type == VIO_NET_RES_LDC_SERVICE) ? 1280 (refcnt = 1) : (refcnt = 0); 1281 while (vresp->refcnt > refcnt) { 1282 delay(drv_usectohz(vnet_fdbe_refcnt_delay)); 1283 } 1284 } 1285 1286 /* 1287 * Search fdb for a given mac address. If an entry is found, hold 1288 * a reference to it and return the entry; else returns NULL. 1289 */ 1290 static vnet_res_t * 1291 vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp) 1292 { 1293 uint64_t key = 0; 1294 vnet_res_t *vresp; 1295 int rv; 1296 1297 KEY_HASH(key, addrp->ether_addr_octet); 1298 1299 rv = mod_hash_find_cb(vnetp->fdb_hashp, (mod_hash_key_t)key, 1300 (mod_hash_val_t *)&vresp, vnet_fdbe_find_cb); 1301 1302 if (rv != 0) 1303 return (NULL); 1304 1305 return (vresp); 1306 } 1307 1308 /* 1309 * Callback function provided to mod_hash_find_cb(). After finding the fdb 1310 * entry corresponding to the key (macaddr), this callback will be invoked by 1311 * mod_hash_find_cb() to atomically increment the reference count on the fdb 1312 * entry before returning the found entry. 1313 */ 1314 static void 1315 vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val) 1316 { 1317 _NOTE(ARGUNUSED(key)) 1318 VNET_FDBE_REFHOLD((vnet_res_t *)val); 1319 } 1320 1321 /* 1322 * Frames received that are tagged with the pvid of the vnet device must be 1323 * untagged before sending up the stack. This function walks the chain of rx 1324 * frames, untags any such frames and returns the updated chain. 1325 * 1326 * Arguments: 1327 * pvid: pvid of the vnet device for which packets are being received 1328 * mp: head of pkt chain to be validated and untagged 1329 * 1330 * Returns: 1331 * mp: head of updated chain of packets 1332 */ 1333 static void 1334 vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp) 1335 { 1336 struct ether_vlan_header *evhp; 1337 mblk_t *bp; 1338 mblk_t *bpt; 1339 mblk_t *bph; 1340 mblk_t *bpn; 1341 1342 bpn = bph = bpt = NULL; 1343 1344 for (bp = *mp; bp != NULL; bp = bpn) { 1345 1346 bpn = bp->b_next; 1347 bp->b_next = bp->b_prev = NULL; 1348 1349 evhp = (struct ether_vlan_header *)bp->b_rptr; 1350 1351 if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN && 1352 VLAN_ID(ntohs(evhp->ether_tci)) == pvid) { 1353 1354 bp = vnet_vlan_remove_tag(bp); 1355 if (bp == NULL) { 1356 continue; 1357 } 1358 1359 } 1360 1361 /* build a chain of processed packets */ 1362 if (bph == NULL) { 1363 bph = bpt = bp; 1364 } else { 1365 bpt->b_next = bp; 1366 bpt = bp; 1367 } 1368 1369 } 1370 1371 *mp = bph; 1372 } 1373 1374 static void 1375 vnet_rx(vio_net_handle_t vrh, mblk_t *mp) 1376 { 1377 vnet_res_t *vresp = (vnet_res_t *)vrh; 1378 vnet_t *vnetp = vresp->vnetp; 1379 vnet_pseudo_rx_ring_t *ringp; 1380 1381 if ((vnetp == NULL) || (vnetp->mh == 0)) { 1382 freemsgchain(mp); 1383 return; 1384 } 1385 1386 ringp = vresp->rx_ringp; 1387 mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num); 1388 } 1389 1390 void 1391 vnet_tx_update(vio_net_handle_t vrh) 1392 { 1393 vnet_res_t *vresp = (vnet_res_t *)vrh; 1394 vnet_t *vnetp = vresp->vnetp; 1395 vnet_pseudo_tx_ring_t *tx_ringp; 1396 vnet_pseudo_tx_group_t *tx_grp; 1397 int i; 1398 1399 if (vnetp == NULL || vnetp->mh == NULL) { 1400 return; 1401 } 1402 1403 /* 1404 * Currently, the tx hwring API (used to access rings that belong to 1405 * a Hybrid IO resource) does not provide us a per ring flow ctrl 1406 * update; also the pseudo rings are shared by the ports/ldcs in the 1407 * vgen layer. Thus we can't figure out which pseudo ring is being 1408 * re-enabled for transmits. To work around this, when we get a tx 1409 * restart notification from below, we simply propagate that to all 1410 * the tx pseudo rings registered with the mac layer above. 1411 * 1412 * There are a couple of side effects with this approach, but they are 1413 * not harmful, as outlined below: 1414 * 1415 * A) We might send an invalid ring_update() for a ring that is not 1416 * really flow controlled. This will not have any effect in the mac 1417 * layer and packets will continue to be transmitted on that ring. 1418 * 1419 * B) We might end up clearing the flow control in the mac layer for 1420 * a ring that is still flow controlled in the underlying resource. 1421 * This will result in the mac layer restarting transmit, only to be 1422 * flow controlled again on that ring. 1423 */ 1424 tx_grp = &vnetp->tx_grp[0]; 1425 for (i = 0; i < tx_grp->ring_cnt; i++) { 1426 tx_ringp = &tx_grp->rings[i]; 1427 mac_tx_ring_update(vnetp->mh, tx_ringp->handle); 1428 } 1429 } 1430 1431 /* 1432 * vnet_tx_notify_thread: 1433 * 1434 * vnet_tx_ring_update() callback function wakes up this thread when 1435 * it gets called. This thread will call mac_tx_ring_update() to 1436 * notify upper mac of flow control getting relieved. Note that 1437 * vnet_tx_ring_update() cannot call mac_tx_ring_update() directly 1438 * because vnet_tx_ring_update() is called from lower mac with 1439 * mi_rw_lock held and mac_tx_ring_update() would also try to grab 1440 * the same lock. 1441 */ 1442 static void 1443 vnet_tx_notify_thread(void *arg) 1444 { 1445 callb_cpr_t cprinfo; 1446 vnet_pseudo_tx_group_t *tx_grp = (vnet_pseudo_tx_group_t *)arg; 1447 vnet_pseudo_tx_ring_t *tx_ringp; 1448 vnet_t *vnetp; 1449 int i; 1450 1451 CALLB_CPR_INIT(&cprinfo, &tx_grp->flowctl_lock, callb_generic_cpr, 1452 "vnet_tx_notify_thread"); 1453 1454 mutex_enter(&tx_grp->flowctl_lock); 1455 while (!tx_grp->flowctl_done) { 1456 CALLB_CPR_SAFE_BEGIN(&cprinfo); 1457 cv_wait(&tx_grp->flowctl_cv, &tx_grp->flowctl_lock); 1458 CALLB_CPR_SAFE_END(&cprinfo, &tx_grp->flowctl_lock); 1459 1460 for (i = 0; i < tx_grp->ring_cnt; i++) { 1461 tx_ringp = &tx_grp->rings[i]; 1462 if (tx_ringp->woken_up) { 1463 tx_ringp->woken_up = B_FALSE; 1464 vnetp = tx_ringp->vnetp; 1465 mac_tx_ring_update(vnetp->mh, tx_ringp->handle); 1466 } 1467 } 1468 } 1469 /* 1470 * The tx_grp is being destroyed, exit the thread. 1471 */ 1472 tx_grp->flowctl_thread = NULL; 1473 CALLB_CPR_EXIT(&cprinfo); 1474 thread_exit(); 1475 } 1476 1477 void 1478 vnet_tx_ring_update(void *arg1, uintptr_t arg2) 1479 { 1480 vnet_t *vnetp = (vnet_t *)arg1; 1481 vnet_pseudo_tx_group_t *tx_grp; 1482 vnet_pseudo_tx_ring_t *tx_ringp; 1483 int i; 1484 1485 tx_grp = &vnetp->tx_grp[0]; 1486 for (i = 0; i < tx_grp->ring_cnt; i++) { 1487 tx_ringp = &tx_grp->rings[i]; 1488 if (tx_ringp->hw_rh == (mac_ring_handle_t)arg2) { 1489 mutex_enter(&tx_grp->flowctl_lock); 1490 tx_ringp->woken_up = B_TRUE; 1491 cv_signal(&tx_grp->flowctl_cv); 1492 mutex_exit(&tx_grp->flowctl_lock); 1493 break; 1494 } 1495 } 1496 } 1497 1498 /* 1499 * Update the new mtu of vnet into the mac layer. First check if the device has 1500 * been plumbed and if so fail the mtu update. Returns 0 on success. 1501 */ 1502 int 1503 vnet_mtu_update(vnet_t *vnetp, uint32_t mtu) 1504 { 1505 int rv; 1506 1507 if (vnetp == NULL || vnetp->mh == NULL) { 1508 return (EINVAL); 1509 } 1510 1511 WRITE_ENTER(&vnetp->vrwlock); 1512 1513 if (vnetp->flags & VNET_STARTED) { 1514 RW_EXIT(&vnetp->vrwlock); 1515 cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu " 1516 "update as the device is plumbed\n", 1517 vnetp->instance); 1518 return (EBUSY); 1519 } 1520 1521 /* update mtu in the mac layer */ 1522 rv = mac_maxsdu_update(vnetp->mh, mtu); 1523 if (rv != 0) { 1524 RW_EXIT(&vnetp->vrwlock); 1525 cmn_err(CE_NOTE, 1526 "!vnet%d: Unable to update mtu with mac layer\n", 1527 vnetp->instance); 1528 return (EIO); 1529 } 1530 1531 vnetp->mtu = mtu; 1532 1533 RW_EXIT(&vnetp->vrwlock); 1534 1535 return (0); 1536 } 1537 1538 /* 1539 * Update the link state of vnet to the mac layer. 1540 */ 1541 void 1542 vnet_link_update(vnet_t *vnetp, link_state_t link_state) 1543 { 1544 if (vnetp == NULL || vnetp->mh == NULL) { 1545 return; 1546 } 1547 1548 WRITE_ENTER(&vnetp->vrwlock); 1549 if (vnetp->link_state == link_state) { 1550 RW_EXIT(&vnetp->vrwlock); 1551 return; 1552 } 1553 vnetp->link_state = link_state; 1554 RW_EXIT(&vnetp->vrwlock); 1555 1556 mac_link_update(vnetp->mh, link_state); 1557 } 1558 1559 /* 1560 * vio_net_resource_reg -- An interface called to register a resource 1561 * with vnet. 1562 * macp -- a GLDv3 mac_register that has all the details of 1563 * a resource and its callbacks etc. 1564 * type -- resource type. 1565 * local_macaddr -- resource's MAC address. This is used to 1566 * associate a resource with a corresponding vnet. 1567 * remote_macaddr -- remote side MAC address. This is ignored for 1568 * the Hybrid resources. 1569 * vhp -- A handle returned to the caller. 1570 * vcb -- A set of callbacks provided to the callers. 1571 */ 1572 int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type, 1573 ether_addr_t local_macaddr, ether_addr_t rem_macaddr, vio_net_handle_t *vhp, 1574 vio_net_callbacks_t *vcb) 1575 { 1576 vnet_t *vnetp; 1577 vnet_res_t *vresp; 1578 1579 vresp = kmem_zalloc(sizeof (vnet_res_t), KM_SLEEP); 1580 ether_copy(local_macaddr, vresp->local_macaddr); 1581 ether_copy(rem_macaddr, vresp->rem_macaddr); 1582 vresp->type = type; 1583 bcopy(macp, &vresp->macreg, sizeof (mac_register_t)); 1584 1585 DBG1(NULL, "Resource Registerig type=0%X\n", type); 1586 1587 READ_ENTER(&vnet_rw); 1588 vnetp = vnet_headp; 1589 while (vnetp != NULL) { 1590 if (VNET_MATCH_RES(vresp, vnetp)) { 1591 vresp->vnetp = vnetp; 1592 1593 /* Setup kstats for hio resource */ 1594 if (vresp->type == VIO_NET_RES_HYBRID) { 1595 vresp->ksp = vnet_hio_setup_kstats(DRV_NAME, 1596 "hio", vresp); 1597 if (vresp->ksp == NULL) { 1598 cmn_err(CE_NOTE, "!vnet%d: Cannot " 1599 "create kstats for hio resource", 1600 vnetp->instance); 1601 } 1602 } 1603 vnet_add_resource(vnetp, vresp); 1604 break; 1605 } 1606 vnetp = vnetp->nextp; 1607 } 1608 RW_EXIT(&vnet_rw); 1609 if (vresp->vnetp == NULL) { 1610 DWARN(NULL, "No vnet instance"); 1611 kmem_free(vresp, sizeof (vnet_res_t)); 1612 return (ENXIO); 1613 } 1614 1615 *vhp = vresp; 1616 vcb->vio_net_rx_cb = vnet_rx; 1617 vcb->vio_net_tx_update = vnet_tx_update; 1618 vcb->vio_net_report_err = vnet_handle_res_err; 1619 1620 /* Bind the resource to pseudo ring(s) */ 1621 if (vnet_bind_rings(vresp) != 0) { 1622 (void) vnet_rem_resource(vnetp, vresp); 1623 vnet_hio_destroy_kstats(vresp->ksp); 1624 KMEM_FREE(vresp); 1625 return (1); 1626 } 1627 1628 /* Dispatch a task to start resources */ 1629 vnet_dispatch_res_task(vnetp); 1630 return (0); 1631 } 1632 1633 /* 1634 * vio_net_resource_unreg -- An interface to unregister a resource. 1635 */ 1636 void 1637 vio_net_resource_unreg(vio_net_handle_t vhp) 1638 { 1639 vnet_res_t *vresp = (vnet_res_t *)vhp; 1640 vnet_t *vnetp = vresp->vnetp; 1641 1642 DBG1(NULL, "Resource Registerig hdl=0x%p", vhp); 1643 1644 ASSERT(vnetp != NULL); 1645 /* 1646 * Remove the resource from fdb; this ensures 1647 * there are no references to the resource. 1648 */ 1649 vnet_fdbe_del(vnetp, vresp); 1650 1651 vnet_unbind_rings(vresp); 1652 1653 /* Now remove the resource from the list */ 1654 (void) vnet_rem_resource(vnetp, vresp); 1655 1656 vnet_hio_destroy_kstats(vresp->ksp); 1657 KMEM_FREE(vresp); 1658 } 1659 1660 static void 1661 vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp) 1662 { 1663 WRITE_ENTER(&vnetp->vrwlock); 1664 vresp->nextp = vnetp->vres_list; 1665 vnetp->vres_list = vresp; 1666 RW_EXIT(&vnetp->vrwlock); 1667 } 1668 1669 static vnet_res_t * 1670 vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp) 1671 { 1672 vnet_res_t *vrp; 1673 1674 WRITE_ENTER(&vnetp->vrwlock); 1675 if (vresp == vnetp->vres_list) { 1676 vnetp->vres_list = vresp->nextp; 1677 } else { 1678 vrp = vnetp->vres_list; 1679 while (vrp->nextp != NULL) { 1680 if (vrp->nextp == vresp) { 1681 vrp->nextp = vresp->nextp; 1682 break; 1683 } 1684 vrp = vrp->nextp; 1685 } 1686 } 1687 vresp->vnetp = NULL; 1688 vresp->nextp = NULL; 1689 1690 RW_EXIT(&vnetp->vrwlock); 1691 1692 return (vresp); 1693 } 1694 1695 /* 1696 * vnet_dds_rx -- an interface called by vgen to DDS messages. 1697 */ 1698 void 1699 vnet_dds_rx(void *arg, void *dmsg) 1700 { 1701 vnet_t *vnetp = arg; 1702 vdds_process_dds_msg(vnetp, dmsg); 1703 } 1704 1705 /* 1706 * vnet_send_dds_msg -- An interface provided to DDS to send 1707 * DDS messages. This simply sends meessages via vgen. 1708 */ 1709 int 1710 vnet_send_dds_msg(vnet_t *vnetp, void *dmsg) 1711 { 1712 int rv; 1713 1714 if (vnetp->vgenhdl != NULL) { 1715 rv = vgen_dds_tx(vnetp->vgenhdl, dmsg); 1716 } 1717 return (rv); 1718 } 1719 1720 /* 1721 * vnet_cleanup_hio -- an interface called by vgen to cleanup hio resources. 1722 */ 1723 void 1724 vnet_dds_cleanup_hio(vnet_t *vnetp) 1725 { 1726 vdds_cleanup_hio(vnetp); 1727 } 1728 1729 /* 1730 * vnet_handle_res_err -- A callback function called by a resource 1731 * to report an error. For example, vgen can call to report 1732 * an LDC down/reset event. This will trigger cleanup of associated 1733 * Hybrid resource. 1734 */ 1735 /* ARGSUSED */ 1736 static void 1737 vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err) 1738 { 1739 vnet_res_t *vresp = (vnet_res_t *)vrh; 1740 vnet_t *vnetp = vresp->vnetp; 1741 1742 if (vnetp == NULL) { 1743 return; 1744 } 1745 if ((vresp->type != VIO_NET_RES_LDC_SERVICE) && 1746 (vresp->type != VIO_NET_RES_HYBRID)) { 1747 return; 1748 } 1749 1750 vdds_cleanup_hio(vnetp); 1751 } 1752 1753 /* 1754 * vnet_dispatch_res_task -- A function to dispatch tasks start resources. 1755 */ 1756 static void 1757 vnet_dispatch_res_task(vnet_t *vnetp) 1758 { 1759 int rv; 1760 1761 /* 1762 * Dispatch the task. It could be the case that vnetp->flags does 1763 * not have VNET_STARTED set. This is ok as vnet_rest_start_task() 1764 * can abort the task when the task is started. See related comments 1765 * in vnet_m_stop() and vnet_stop_resources(). 1766 */ 1767 rv = ddi_taskq_dispatch(vnetp->taskqp, vnet_res_start_task, 1768 vnetp, DDI_NOSLEEP); 1769 if (rv != DDI_SUCCESS) { 1770 cmn_err(CE_WARN, 1771 "vnet%d:Can't dispatch start resource task", 1772 vnetp->instance); 1773 } 1774 } 1775 1776 /* 1777 * vnet_res_start_task -- A taskq callback function that starts a resource. 1778 */ 1779 static void 1780 vnet_res_start_task(void *arg) 1781 { 1782 vnet_t *vnetp = arg; 1783 1784 WRITE_ENTER(&vnetp->vrwlock); 1785 if (vnetp->flags & VNET_STARTED) { 1786 vnet_start_resources(vnetp); 1787 } 1788 RW_EXIT(&vnetp->vrwlock); 1789 } 1790 1791 /* 1792 * vnet_start_resources -- starts all resources associated with 1793 * a vnet. 1794 */ 1795 static void 1796 vnet_start_resources(vnet_t *vnetp) 1797 { 1798 mac_register_t *macp; 1799 mac_callbacks_t *cbp; 1800 vnet_res_t *vresp; 1801 int rv; 1802 1803 DBG1(vnetp, "enter\n"); 1804 1805 ASSERT(RW_WRITE_HELD(&vnetp->vrwlock)); 1806 1807 for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) { 1808 /* skip if it is already started */ 1809 if (vresp->flags & VNET_STARTED) { 1810 continue; 1811 } 1812 macp = &vresp->macreg; 1813 cbp = macp->m_callbacks; 1814 rv = cbp->mc_start(macp->m_driver); 1815 if (rv == 0) { 1816 /* 1817 * Successfully started the resource, so now 1818 * add it to the fdb. 1819 */ 1820 vresp->flags |= VNET_STARTED; 1821 vnet_fdbe_add(vnetp, vresp); 1822 } 1823 } 1824 1825 DBG1(vnetp, "exit\n"); 1826 1827 } 1828 1829 /* 1830 * vnet_stop_resources -- stop all resources associated with a vnet. 1831 */ 1832 static void 1833 vnet_stop_resources(vnet_t *vnetp) 1834 { 1835 vnet_res_t *vresp; 1836 mac_register_t *macp; 1837 mac_callbacks_t *cbp; 1838 1839 DBG1(vnetp, "enter\n"); 1840 1841 ASSERT(RW_WRITE_HELD(&vnetp->vrwlock)); 1842 1843 for (vresp = vnetp->vres_list; vresp != NULL; ) { 1844 if (vresp->flags & VNET_STARTED) { 1845 /* 1846 * Release the lock while invoking mc_stop() of the 1847 * underlying resource. We hold a reference to this 1848 * resource to prevent being removed from the list in 1849 * vio_net_resource_unreg(). Note that new resources 1850 * can be added to the head of the list while the lock 1851 * is released, but they won't be started, as 1852 * VNET_STARTED flag has been cleared for the vnet 1853 * device in vnet_m_stop(). Also, while the lock is 1854 * released a resource could be removed from the list 1855 * in vio_net_resource_unreg(); but that is ok, as we 1856 * re-acquire the lock and only then access the forward 1857 * link (vresp->nextp) to continue with the next 1858 * resource. 1859 */ 1860 vresp->flags &= ~VNET_STARTED; 1861 vresp->flags |= VNET_STOPPING; 1862 macp = &vresp->macreg; 1863 cbp = macp->m_callbacks; 1864 VNET_FDBE_REFHOLD(vresp); 1865 RW_EXIT(&vnetp->vrwlock); 1866 1867 cbp->mc_stop(macp->m_driver); 1868 1869 WRITE_ENTER(&vnetp->vrwlock); 1870 vresp->flags &= ~VNET_STOPPING; 1871 VNET_FDBE_REFRELE(vresp); 1872 } 1873 vresp = vresp->nextp; 1874 } 1875 DBG1(vnetp, "exit\n"); 1876 } 1877 1878 /* 1879 * Setup kstats for the HIO statistics. 1880 * NOTE: the synchronization for the statistics is the 1881 * responsibility of the caller. 1882 */ 1883 kstat_t * 1884 vnet_hio_setup_kstats(char *ks_mod, char *ks_name, vnet_res_t *vresp) 1885 { 1886 kstat_t *ksp; 1887 vnet_t *vnetp = vresp->vnetp; 1888 vnet_hio_kstats_t *hiokp; 1889 size_t size; 1890 1891 ASSERT(vnetp != NULL); 1892 size = sizeof (vnet_hio_kstats_t) / sizeof (kstat_named_t); 1893 ksp = kstat_create(ks_mod, vnetp->instance, ks_name, "net", 1894 KSTAT_TYPE_NAMED, size, 0); 1895 if (ksp == NULL) { 1896 return (NULL); 1897 } 1898 1899 hiokp = (vnet_hio_kstats_t *)ksp->ks_data; 1900 kstat_named_init(&hiokp->ipackets, "ipackets", 1901 KSTAT_DATA_ULONG); 1902 kstat_named_init(&hiokp->ierrors, "ierrors", 1903 KSTAT_DATA_ULONG); 1904 kstat_named_init(&hiokp->opackets, "opackets", 1905 KSTAT_DATA_ULONG); 1906 kstat_named_init(&hiokp->oerrors, "oerrors", 1907 KSTAT_DATA_ULONG); 1908 1909 1910 /* MIB II kstat variables */ 1911 kstat_named_init(&hiokp->rbytes, "rbytes", 1912 KSTAT_DATA_ULONG); 1913 kstat_named_init(&hiokp->obytes, "obytes", 1914 KSTAT_DATA_ULONG); 1915 kstat_named_init(&hiokp->multircv, "multircv", 1916 KSTAT_DATA_ULONG); 1917 kstat_named_init(&hiokp->multixmt, "multixmt", 1918 KSTAT_DATA_ULONG); 1919 kstat_named_init(&hiokp->brdcstrcv, "brdcstrcv", 1920 KSTAT_DATA_ULONG); 1921 kstat_named_init(&hiokp->brdcstxmt, "brdcstxmt", 1922 KSTAT_DATA_ULONG); 1923 kstat_named_init(&hiokp->norcvbuf, "norcvbuf", 1924 KSTAT_DATA_ULONG); 1925 kstat_named_init(&hiokp->noxmtbuf, "noxmtbuf", 1926 KSTAT_DATA_ULONG); 1927 1928 ksp->ks_update = vnet_hio_update_kstats; 1929 ksp->ks_private = (void *)vresp; 1930 kstat_install(ksp); 1931 return (ksp); 1932 } 1933 1934 /* 1935 * Destroy kstats. 1936 */ 1937 static void 1938 vnet_hio_destroy_kstats(kstat_t *ksp) 1939 { 1940 if (ksp != NULL) 1941 kstat_delete(ksp); 1942 } 1943 1944 /* 1945 * Update the kstats. 1946 */ 1947 static int 1948 vnet_hio_update_kstats(kstat_t *ksp, int rw) 1949 { 1950 vnet_t *vnetp; 1951 vnet_res_t *vresp; 1952 vnet_hio_stats_t statsp; 1953 vnet_hio_kstats_t *hiokp; 1954 1955 vresp = (vnet_res_t *)ksp->ks_private; 1956 vnetp = vresp->vnetp; 1957 1958 bzero(&statsp, sizeof (vnet_hio_stats_t)); 1959 1960 READ_ENTER(&vnetp->vsw_fp_rw); 1961 if (vnetp->hio_fp == NULL) { 1962 /* not using hio resources, just return */ 1963 RW_EXIT(&vnetp->vsw_fp_rw); 1964 return (0); 1965 } 1966 VNET_FDBE_REFHOLD(vnetp->hio_fp); 1967 RW_EXIT(&vnetp->vsw_fp_rw); 1968 vnet_hio_get_stats(vnetp->hio_fp, &statsp); 1969 VNET_FDBE_REFRELE(vnetp->hio_fp); 1970 1971 hiokp = (vnet_hio_kstats_t *)ksp->ks_data; 1972 1973 if (rw == KSTAT_READ) { 1974 /* Link Input/Output stats */ 1975 hiokp->ipackets.value.ul = (uint32_t)statsp.ipackets; 1976 hiokp->ipackets64.value.ull = statsp.ipackets; 1977 hiokp->ierrors.value.ul = statsp.ierrors; 1978 hiokp->opackets.value.ul = (uint32_t)statsp.opackets; 1979 hiokp->opackets64.value.ull = statsp.opackets; 1980 hiokp->oerrors.value.ul = statsp.oerrors; 1981 1982 /* MIB II kstat variables */ 1983 hiokp->rbytes.value.ul = (uint32_t)statsp.rbytes; 1984 hiokp->rbytes64.value.ull = statsp.rbytes; 1985 hiokp->obytes.value.ul = (uint32_t)statsp.obytes; 1986 hiokp->obytes64.value.ull = statsp.obytes; 1987 hiokp->multircv.value.ul = statsp.multircv; 1988 hiokp->multixmt.value.ul = statsp.multixmt; 1989 hiokp->brdcstrcv.value.ul = statsp.brdcstrcv; 1990 hiokp->brdcstxmt.value.ul = statsp.brdcstxmt; 1991 hiokp->norcvbuf.value.ul = statsp.norcvbuf; 1992 hiokp->noxmtbuf.value.ul = statsp.noxmtbuf; 1993 } else { 1994 return (EACCES); 1995 } 1996 1997 return (0); 1998 } 1999 2000 static void 2001 vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp) 2002 { 2003 mac_register_t *macp; 2004 mac_callbacks_t *cbp; 2005 uint64_t val; 2006 int stat; 2007 2008 /* 2009 * get the specified statistics from the underlying nxge. 2010 */ 2011 macp = &vresp->macreg; 2012 cbp = macp->m_callbacks; 2013 for (stat = MAC_STAT_MIN; stat < MAC_STAT_OVERFLOWS; stat++) { 2014 if (cbp->mc_getstat(macp->m_driver, stat, &val) == 0) { 2015 switch (stat) { 2016 case MAC_STAT_IPACKETS: 2017 statsp->ipackets = val; 2018 break; 2019 2020 case MAC_STAT_IERRORS: 2021 statsp->ierrors = val; 2022 break; 2023 2024 case MAC_STAT_OPACKETS: 2025 statsp->opackets = val; 2026 break; 2027 2028 case MAC_STAT_OERRORS: 2029 statsp->oerrors = val; 2030 break; 2031 2032 case MAC_STAT_RBYTES: 2033 statsp->rbytes = val; 2034 break; 2035 2036 case MAC_STAT_OBYTES: 2037 statsp->obytes = val; 2038 break; 2039 2040 case MAC_STAT_MULTIRCV: 2041 statsp->multircv = val; 2042 break; 2043 2044 case MAC_STAT_MULTIXMT: 2045 statsp->multixmt = val; 2046 break; 2047 2048 case MAC_STAT_BRDCSTRCV: 2049 statsp->brdcstrcv = val; 2050 break; 2051 2052 case MAC_STAT_BRDCSTXMT: 2053 statsp->brdcstxmt = val; 2054 break; 2055 2056 case MAC_STAT_NOXMTBUF: 2057 statsp->noxmtbuf = val; 2058 break; 2059 2060 case MAC_STAT_NORCVBUF: 2061 statsp->norcvbuf = val; 2062 break; 2063 2064 default: 2065 /* 2066 * parameters not interested. 2067 */ 2068 break; 2069 } 2070 } 2071 } 2072 } 2073 2074 static boolean_t 2075 vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data) 2076 { 2077 vnet_t *vnetp = (vnet_t *)arg; 2078 2079 if (vnetp == NULL) { 2080 return (0); 2081 } 2082 2083 switch (cap) { 2084 2085 case MAC_CAPAB_RINGS: { 2086 2087 mac_capab_rings_t *cap_rings = cap_data; 2088 /* 2089 * Rings Capability Notes: 2090 * We advertise rings to make use of the rings framework in 2091 * gldv3 mac layer, to improve the performance. This is 2092 * specifically needed when a Hybrid resource (with multiple 2093 * tx/rx hardware rings) is assigned to a vnet device. We also 2094 * leverage this for the normal case when no Hybrid resource is 2095 * assigned. 2096 * 2097 * Ring Allocation: 2098 * - TX path: 2099 * We expose a pseudo ring group with 2 pseudo tx rings (as 2100 * currently HybridIO exports only 2 rings) In the normal case, 2101 * transmit traffic that comes down to the driver through the 2102 * mri_tx (vnet_tx_ring_send()) entry point goes through the 2103 * distributed switching algorithm in vnet and gets transmitted 2104 * over a port/LDC in the vgen layer to either the vswitch or a 2105 * peer vnet. If and when a Hybrid resource is assigned to the 2106 * vnet, we obtain the tx ring information of the Hybrid device 2107 * (nxge) and map the pseudo rings 1:1 to the 2 hw tx rings. 2108 * Traffic being sent over the Hybrid resource by the mac layer 2109 * gets spread across both hw rings, as they are mapped to the 2110 * 2 pseudo tx rings in vnet. 2111 * 2112 * - RX path: 2113 * We expose a pseudo ring group with 3 pseudo rx rings (static 2114 * rings) initially. The first (default) pseudo rx ring is 2115 * reserved for the resource that connects to the vswitch 2116 * service. The next 2 rings are reserved for a Hybrid resource 2117 * that may be assigned to the vnet device. If and when a 2118 * Hybrid resource is assigned to the vnet, we obtain the rx 2119 * ring information of the Hybrid device (nxge) and map these 2120 * pseudo rings 1:1 to the 2 hw rx rings. For each additional 2121 * resource that connects to a peer vnet, we dynamically 2122 * allocate a pseudo rx ring and map it to that resource, when 2123 * the resource gets added; and the pseudo rx ring is 2124 * dynamically registered with the upper mac layer. We do the 2125 * reverse and unregister the ring with the mac layer when 2126 * the resource gets removed. 2127 * 2128 * Synchronization notes: 2129 * We don't need any lock to protect members of ring structure, 2130 * specifically ringp->hw_rh, in either the TX or the RX ring, 2131 * as explained below. 2132 * - TX ring: 2133 * ring->hw_rh is initialized only when a Hybrid resource is 2134 * associated; and gets referenced only in vnet_hio_tx(). The 2135 * Hybrid resource itself is available in fdb only after tx 2136 * hwrings are found and mapped; i.e, in vio_net_resource_reg() 2137 * we call vnet_bind_rings() first and then call 2138 * vnet_start_resources() which adds an entry to fdb. For 2139 * traffic going over LDC resources, we don't reference 2140 * ring->hw_rh at all. 2141 * - RX ring: 2142 * For rings mapped to Hybrid resource ring->hw_rh is 2143 * initialized and only then do we add the rx callback for 2144 * the underlying Hybrid resource; we disable callbacks before 2145 * we unmap ring->hw_rh. For rings mapped to LDC resources, we 2146 * stop the rx callbacks (in vgen) before we remove ring->hw_rh 2147 * (vio_net_resource_unreg()). 2148 * Also, we access ring->hw_rh in vnet_rx_ring_stat(). 2149 * Note that for rings mapped to Hybrid resource, though the 2150 * rings are statically registered with the mac layer, its 2151 * hardware ring mapping (ringp->hw_rh) can be torn down in 2152 * vnet_unbind_hwrings() while the kstat operation is in 2153 * progress. To protect against this, we hold a reference to 2154 * the resource in FDB; this ensures that the thread in 2155 * vio_net_resource_unreg() waits for the reference to be 2156 * dropped before unbinding the ring. 2157 * 2158 * We don't need to do this for rings mapped to LDC resources. 2159 * These rings are registered/unregistered dynamically with 2160 * the mac layer and so any attempt to unregister the ring 2161 * while kstat operation is in progress will block in 2162 * mac_group_rem_ring(). Thus implicitly protects the 2163 * resource (ringp->hw_rh) from disappearing. 2164 */ 2165 2166 if (cap_rings->mr_type == MAC_RING_TYPE_RX) { 2167 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 2168 2169 /* 2170 * The ring_cnt for rx grp is initialized in 2171 * vnet_ring_grp_init(). Later, the ring_cnt gets 2172 * updated dynamically whenever LDC resources are added 2173 * or removed. 2174 */ 2175 cap_rings->mr_rnum = vnetp->rx_grp[0].ring_cnt; 2176 cap_rings->mr_rget = vnet_get_ring; 2177 2178 cap_rings->mr_gnum = VNET_NUM_PSEUDO_GROUPS; 2179 cap_rings->mr_gget = vnet_get_group; 2180 cap_rings->mr_gaddring = NULL; 2181 cap_rings->mr_gremring = NULL; 2182 } else { 2183 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 2184 2185 /* 2186 * The ring_cnt for tx grp is initialized in 2187 * vnet_ring_grp_init() and remains constant, as we 2188 * do not support dymanic tx rings for now. 2189 */ 2190 cap_rings->mr_rnum = vnetp->tx_grp[0].ring_cnt; 2191 cap_rings->mr_rget = vnet_get_ring; 2192 2193 /* 2194 * Transmit rings are not grouped; i.e, the number of 2195 * transmit ring groups advertised should be set to 0. 2196 */ 2197 cap_rings->mr_gnum = 0; 2198 2199 cap_rings->mr_gget = vnet_get_group; 2200 cap_rings->mr_gaddring = NULL; 2201 cap_rings->mr_gremring = NULL; 2202 } 2203 return (B_TRUE); 2204 2205 } 2206 2207 default: 2208 break; 2209 2210 } 2211 2212 return (B_FALSE); 2213 } 2214 2215 /* 2216 * Callback funtion for MAC layer to get ring information. 2217 */ 2218 static void 2219 vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index, 2220 const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle) 2221 { 2222 vnet_t *vnetp = arg; 2223 2224 switch (rtype) { 2225 2226 case MAC_RING_TYPE_RX: { 2227 2228 vnet_pseudo_rx_group_t *rx_grp; 2229 vnet_pseudo_rx_ring_t *rx_ringp; 2230 mac_intr_t *mintr; 2231 2232 /* We advertised only one RX group */ 2233 ASSERT(g_index == 0); 2234 rx_grp = &vnetp->rx_grp[g_index]; 2235 2236 /* Check the current # of rings in the rx group */ 2237 ASSERT((r_index >= 0) && (r_index < rx_grp->max_ring_cnt)); 2238 2239 /* Get the ring based on the index */ 2240 rx_ringp = &rx_grp->rings[r_index]; 2241 2242 rx_ringp->handle = r_handle; 2243 /* 2244 * Note: we don't need to save the incoming r_index in rx_ring, 2245 * as vnet_ring_grp_init() would have initialized the index for 2246 * each ring in the array. 2247 */ 2248 rx_ringp->grp = rx_grp; 2249 rx_ringp->vnetp = vnetp; 2250 2251 mintr = &infop->mri_intr; 2252 mintr->mi_handle = (mac_intr_handle_t)rx_ringp; 2253 mintr->mi_enable = (mac_intr_enable_t)vnet_ring_enable_intr; 2254 mintr->mi_disable = (mac_intr_disable_t)vnet_ring_disable_intr; 2255 2256 infop->mri_driver = (mac_ring_driver_t)rx_ringp; 2257 infop->mri_start = vnet_rx_ring_start; 2258 infop->mri_stop = vnet_rx_ring_stop; 2259 infop->mri_stat = vnet_rx_ring_stat; 2260 2261 /* Set the poll function, as this is an rx ring */ 2262 infop->mri_poll = vnet_rx_poll; 2263 /* 2264 * MAC_RING_RX_ENQUEUE bit needed to be set for nxge 2265 * which was not sending packet chains in interrupt 2266 * context. For such drivers, packets are queued in 2267 * Rx soft rings so that we get a chance to switch 2268 * into a polling mode under backlog. This bug (not 2269 * sending packet chains) has now been fixed. Once 2270 * the performance impact is measured, this change 2271 * will be removed. 2272 */ 2273 infop->mri_flags = (vnet_mac_rx_queuing ? 2274 MAC_RING_RX_ENQUEUE : 0); 2275 break; 2276 } 2277 2278 case MAC_RING_TYPE_TX: { 2279 vnet_pseudo_tx_group_t *tx_grp; 2280 vnet_pseudo_tx_ring_t *tx_ringp; 2281 2282 /* 2283 * No need to check grp index; mac layer passes -1 for it. 2284 */ 2285 tx_grp = &vnetp->tx_grp[0]; 2286 2287 /* Check the # of rings in the tx group */ 2288 ASSERT((r_index >= 0) && (r_index < tx_grp->ring_cnt)); 2289 2290 /* Get the ring based on the index */ 2291 tx_ringp = &tx_grp->rings[r_index]; 2292 2293 tx_ringp->handle = r_handle; 2294 tx_ringp->index = r_index; 2295 tx_ringp->grp = tx_grp; 2296 tx_ringp->vnetp = vnetp; 2297 2298 infop->mri_driver = (mac_ring_driver_t)tx_ringp; 2299 infop->mri_start = vnet_tx_ring_start; 2300 infop->mri_stop = vnet_tx_ring_stop; 2301 infop->mri_stat = vnet_tx_ring_stat; 2302 2303 /* Set the transmit function, as this is a tx ring */ 2304 infop->mri_tx = vnet_tx_ring_send; 2305 /* 2306 * MAC_RING_TX_SERIALIZE bit needs to be set while 2307 * hybridIO is enabled to workaround tx lock 2308 * contention issues in nxge. 2309 */ 2310 infop->mri_flags = (vnet_mac_tx_serialize ? 2311 MAC_RING_TX_SERIALIZE : 0); 2312 break; 2313 } 2314 2315 default: 2316 break; 2317 } 2318 } 2319 2320 /* 2321 * Callback funtion for MAC layer to get group information. 2322 */ 2323 static void 2324 vnet_get_group(void *arg, mac_ring_type_t type, const int index, 2325 mac_group_info_t *infop, mac_group_handle_t handle) 2326 { 2327 vnet_t *vnetp = (vnet_t *)arg; 2328 2329 switch (type) { 2330 2331 case MAC_RING_TYPE_RX: 2332 { 2333 vnet_pseudo_rx_group_t *rx_grp; 2334 2335 /* We advertised only one RX group */ 2336 ASSERT(index == 0); 2337 2338 rx_grp = &vnetp->rx_grp[index]; 2339 rx_grp->handle = handle; 2340 rx_grp->index = index; 2341 rx_grp->vnetp = vnetp; 2342 2343 infop->mgi_driver = (mac_group_driver_t)rx_grp; 2344 infop->mgi_start = NULL; 2345 infop->mgi_stop = NULL; 2346 infop->mgi_addmac = vnet_addmac; 2347 infop->mgi_remmac = vnet_remmac; 2348 infop->mgi_count = rx_grp->ring_cnt; 2349 2350 break; 2351 } 2352 2353 case MAC_RING_TYPE_TX: 2354 { 2355 vnet_pseudo_tx_group_t *tx_grp; 2356 2357 /* We advertised only one TX group */ 2358 ASSERT(index == 0); 2359 2360 tx_grp = &vnetp->tx_grp[index]; 2361 tx_grp->handle = handle; 2362 tx_grp->index = index; 2363 tx_grp->vnetp = vnetp; 2364 2365 infop->mgi_driver = (mac_group_driver_t)tx_grp; 2366 infop->mgi_start = NULL; 2367 infop->mgi_stop = NULL; 2368 infop->mgi_addmac = NULL; 2369 infop->mgi_remmac = NULL; 2370 infop->mgi_count = VNET_NUM_PSEUDO_TXRINGS; 2371 2372 break; 2373 } 2374 2375 default: 2376 break; 2377 2378 } 2379 } 2380 2381 static int 2382 vnet_rx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num) 2383 { 2384 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; 2385 int err; 2386 2387 /* 2388 * If this ring is mapped to a LDC resource, simply mark the state to 2389 * indicate the ring is started and return. 2390 */ 2391 if ((rx_ringp->state & 2392 (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) { 2393 rx_ringp->gen_num = mr_gen_num; 2394 rx_ringp->state |= VNET_RXRING_STARTED; 2395 return (0); 2396 } 2397 2398 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0); 2399 2400 /* 2401 * This must be a ring reserved for a hwring. If the hwring is not 2402 * bound yet, simply mark the state to indicate the ring is started and 2403 * return. If and when a hybrid resource is activated for this vnet 2404 * device, we will bind the hwring and start it then. If a hwring is 2405 * already bound, start it now. 2406 */ 2407 if (rx_ringp->hw_rh == NULL) { 2408 rx_ringp->gen_num = mr_gen_num; 2409 rx_ringp->state |= VNET_RXRING_STARTED; 2410 return (0); 2411 } 2412 2413 err = mac_hwring_start(rx_ringp->hw_rh); 2414 if (err == 0) { 2415 rx_ringp->gen_num = mr_gen_num; 2416 rx_ringp->state |= VNET_RXRING_STARTED; 2417 } else { 2418 err = ENXIO; 2419 } 2420 2421 return (err); 2422 } 2423 2424 static void 2425 vnet_rx_ring_stop(mac_ring_driver_t arg) 2426 { 2427 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; 2428 2429 /* 2430 * If this ring is mapped to a LDC resource, simply mark the state to 2431 * indicate the ring is now stopped and return. 2432 */ 2433 if ((rx_ringp->state & 2434 (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) { 2435 rx_ringp->state &= ~VNET_RXRING_STARTED; 2436 return; 2437 } 2438 2439 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0); 2440 2441 /* 2442 * This must be a ring reserved for a hwring. If the hwring is not 2443 * bound yet, simply mark the state to indicate the ring is stopped and 2444 * return. If a hwring is already bound, stop it now. 2445 */ 2446 if (rx_ringp->hw_rh == NULL) { 2447 rx_ringp->state &= ~VNET_RXRING_STARTED; 2448 return; 2449 } 2450 2451 mac_hwring_stop(rx_ringp->hw_rh); 2452 rx_ringp->state &= ~VNET_RXRING_STARTED; 2453 } 2454 2455 static int 2456 vnet_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) 2457 { 2458 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)rdriver; 2459 vnet_t *vnetp = (vnet_t *)rx_ringp->vnetp; 2460 vnet_res_t *vresp; 2461 mac_register_t *macp; 2462 mac_callbacks_t *cbp; 2463 2464 /* 2465 * Refer to vnet_m_capab() function for detailed comments on ring 2466 * synchronization. 2467 */ 2468 if ((rx_ringp->state & VNET_RXRING_HYBRID) != 0) { 2469 READ_ENTER(&vnetp->vsw_fp_rw); 2470 if (vnetp->hio_fp == NULL) { 2471 RW_EXIT(&vnetp->vsw_fp_rw); 2472 return (0); 2473 } 2474 2475 VNET_FDBE_REFHOLD(vnetp->hio_fp); 2476 RW_EXIT(&vnetp->vsw_fp_rw); 2477 mac_hwring_getstat(rx_ringp->hw_rh, stat, val); 2478 VNET_FDBE_REFRELE(vnetp->hio_fp); 2479 return (0); 2480 } 2481 2482 ASSERT((rx_ringp->state & 2483 (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0); 2484 vresp = (vnet_res_t *)rx_ringp->hw_rh; 2485 macp = &vresp->macreg; 2486 cbp = macp->m_callbacks; 2487 2488 cbp->mc_getstat(macp->m_driver, stat, val); 2489 2490 return (0); 2491 } 2492 2493 /* ARGSUSED */ 2494 static int 2495 vnet_tx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num) 2496 { 2497 vnet_pseudo_tx_ring_t *tx_ringp = (vnet_pseudo_tx_ring_t *)arg; 2498 2499 tx_ringp->state |= VNET_TXRING_STARTED; 2500 return (0); 2501 } 2502 2503 static void 2504 vnet_tx_ring_stop(mac_ring_driver_t arg) 2505 { 2506 vnet_pseudo_tx_ring_t *tx_ringp = (vnet_pseudo_tx_ring_t *)arg; 2507 2508 tx_ringp->state &= ~VNET_TXRING_STARTED; 2509 } 2510 2511 static int 2512 vnet_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) 2513 { 2514 vnet_pseudo_tx_ring_t *tx_ringp = (vnet_pseudo_tx_ring_t *)rdriver; 2515 vnet_tx_ring_stats_t *statsp; 2516 2517 statsp = &tx_ringp->tx_ring_stats; 2518 2519 switch (stat) { 2520 case MAC_STAT_OPACKETS: 2521 *val = statsp->opackets; 2522 break; 2523 2524 case MAC_STAT_OBYTES: 2525 *val = statsp->obytes; 2526 break; 2527 2528 default: 2529 *val = 0; 2530 return (ENOTSUP); 2531 } 2532 2533 return (0); 2534 } 2535 2536 /* 2537 * Disable polling for a ring and enable its interrupt. 2538 */ 2539 static int 2540 vnet_ring_enable_intr(void *arg) 2541 { 2542 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; 2543 vnet_res_t *vresp; 2544 2545 if (rx_ringp->hw_rh == NULL) { 2546 /* 2547 * Ring enable intr func is being invoked, but the ring is 2548 * not bound to any underlying resource ? This must be a ring 2549 * reserved for Hybrid resource and no such resource has been 2550 * assigned to this vnet device yet. We simply return success. 2551 */ 2552 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0); 2553 return (0); 2554 } 2555 2556 /* 2557 * The rx ring has been bound to either a LDC or a Hybrid resource. 2558 * Call the appropriate function to enable interrupts for the ring. 2559 */ 2560 if (rx_ringp->state & VNET_RXRING_HYBRID) { 2561 return (mac_hwring_enable_intr(rx_ringp->hw_rh)); 2562 } else { 2563 vresp = (vnet_res_t *)rx_ringp->hw_rh; 2564 return (vgen_enable_intr(vresp->macreg.m_driver)); 2565 } 2566 } 2567 2568 /* 2569 * Enable polling for a ring and disable its interrupt. 2570 */ 2571 static int 2572 vnet_ring_disable_intr(void *arg) 2573 { 2574 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; 2575 vnet_res_t *vresp; 2576 2577 if (rx_ringp->hw_rh == NULL) { 2578 /* 2579 * Ring disable intr func is being invoked, but the ring is 2580 * not bound to any underlying resource ? This must be a ring 2581 * reserved for Hybrid resource and no such resource has been 2582 * assigned to this vnet device yet. We simply return success. 2583 */ 2584 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0); 2585 return (0); 2586 } 2587 2588 /* 2589 * The rx ring has been bound to either a LDC or a Hybrid resource. 2590 * Call the appropriate function to disable interrupts for the ring. 2591 */ 2592 if (rx_ringp->state & VNET_RXRING_HYBRID) { 2593 return (mac_hwring_disable_intr(rx_ringp->hw_rh)); 2594 } else { 2595 vresp = (vnet_res_t *)rx_ringp->hw_rh; 2596 return (vgen_disable_intr(vresp->macreg.m_driver)); 2597 } 2598 } 2599 2600 /* 2601 * Poll 'bytes_to_pickup' bytes of message from the rx ring. 2602 */ 2603 static mblk_t * 2604 vnet_rx_poll(void *arg, int bytes_to_pickup) 2605 { 2606 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; 2607 mblk_t *mp = NULL; 2608 vnet_res_t *vresp; 2609 vnet_t *vnetp = rx_ringp->vnetp; 2610 2611 if (rx_ringp->hw_rh == NULL) { 2612 return (NULL); 2613 } 2614 2615 if (rx_ringp->state & VNET_RXRING_HYBRID) { 2616 mp = mac_hwring_poll(rx_ringp->hw_rh, bytes_to_pickup); 2617 /* 2618 * Packets received over a hybrid resource need additional 2619 * processing to remove the tag, for the pvid case. The 2620 * underlying resource is not aware of the vnet's pvid and thus 2621 * packets are received with the vlan tag in the header; unlike 2622 * packets that are received over a ldc channel in which case 2623 * the peer vnet/vsw would have already removed the tag. 2624 */ 2625 if (vnetp->pvid != vnetp->default_vlan_id) { 2626 vnet_rx_frames_untag(vnetp->pvid, &mp); 2627 } 2628 } else { 2629 vresp = (vnet_res_t *)rx_ringp->hw_rh; 2630 mp = vgen_poll(vresp->macreg.m_driver, bytes_to_pickup); 2631 } 2632 return (mp); 2633 } 2634 2635 /* ARGSUSED */ 2636 void 2637 vnet_hio_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 2638 boolean_t loopback) 2639 { 2640 vnet_t *vnetp = (vnet_t *)arg; 2641 vnet_pseudo_rx_ring_t *ringp = (vnet_pseudo_rx_ring_t *)mrh; 2642 2643 /* 2644 * Packets received over a hybrid resource need additional processing 2645 * to remove the tag, for the pvid case. The underlying resource is 2646 * not aware of the vnet's pvid and thus packets are received with the 2647 * vlan tag in the header; unlike packets that are received over a ldc 2648 * channel in which case the peer vnet/vsw would have already removed 2649 * the tag. 2650 */ 2651 if (vnetp->pvid != vnetp->default_vlan_id) { 2652 vnet_rx_frames_untag(vnetp->pvid, &mp); 2653 if (mp == NULL) { 2654 return; 2655 } 2656 } 2657 mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num); 2658 } 2659 2660 static int 2661 vnet_addmac(void *arg, const uint8_t *mac_addr) 2662 { 2663 vnet_pseudo_rx_group_t *rx_grp = (vnet_pseudo_rx_group_t *)arg; 2664 vnet_t *vnetp; 2665 2666 vnetp = rx_grp->vnetp; 2667 2668 if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) { 2669 return (0); 2670 } 2671 2672 cmn_err(CE_CONT, "!vnet%d: %s: Multiple macaddr unsupported\n", 2673 vnetp->instance, __func__); 2674 return (EINVAL); 2675 } 2676 2677 static int 2678 vnet_remmac(void *arg, const uint8_t *mac_addr) 2679 { 2680 vnet_pseudo_rx_group_t *rx_grp = (vnet_pseudo_rx_group_t *)arg; 2681 vnet_t *vnetp; 2682 2683 vnetp = rx_grp->vnetp; 2684 2685 if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) { 2686 return (0); 2687 } 2688 2689 cmn_err(CE_CONT, "!vnet%d: %s: Invalid macaddr: %s\n", 2690 vnetp->instance, __func__, ether_sprintf((void *)mac_addr)); 2691 return (EINVAL); 2692 } 2693 2694 int 2695 vnet_hio_mac_init(vnet_t *vnetp, char *ifname) 2696 { 2697 mac_handle_t mh; 2698 mac_client_handle_t mch = NULL; 2699 mac_unicast_handle_t muh = NULL; 2700 mac_diag_t diag; 2701 mac_register_t *macp; 2702 char client_name[MAXNAMELEN]; 2703 int rv; 2704 uint16_t mac_flags = MAC_UNICAST_TAG_DISABLE | 2705 MAC_UNICAST_STRIP_DISABLE | MAC_UNICAST_PRIMARY; 2706 vio_net_callbacks_t vcb; 2707 ether_addr_t rem_addr = 2708 { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 2709 uint32_t retries = 0; 2710 2711 if ((macp = mac_alloc(MAC_VERSION)) == NULL) { 2712 return (EAGAIN); 2713 } 2714 2715 do { 2716 rv = mac_open_by_linkname(ifname, &mh); 2717 if (rv == 0) { 2718 break; 2719 } 2720 if (rv != ENOENT || (retries++ >= vnet_mac_open_retries)) { 2721 mac_free(macp); 2722 return (rv); 2723 } 2724 drv_usecwait(vnet_mac_open_delay); 2725 } while (rv == ENOENT); 2726 2727 vnetp->hio_mh = mh; 2728 2729 (void) snprintf(client_name, MAXNAMELEN, "vnet%d-%s", vnetp->instance, 2730 ifname); 2731 rv = mac_client_open(mh, &mch, client_name, MAC_OPEN_FLAGS_EXCLUSIVE); 2732 if (rv != 0) { 2733 goto fail; 2734 } 2735 vnetp->hio_mch = mch; 2736 2737 rv = mac_unicast_add(mch, vnetp->curr_macaddr, mac_flags, &muh, 0, 2738 &diag); 2739 if (rv != 0) { 2740 goto fail; 2741 } 2742 vnetp->hio_muh = muh; 2743 2744 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 2745 macp->m_driver = vnetp; 2746 macp->m_dip = NULL; 2747 macp->m_src_addr = NULL; 2748 macp->m_callbacks = &vnet_hio_res_callbacks; 2749 macp->m_min_sdu = 0; 2750 macp->m_max_sdu = ETHERMTU; 2751 2752 rv = vio_net_resource_reg(macp, VIO_NET_RES_HYBRID, 2753 vnetp->curr_macaddr, rem_addr, &vnetp->hio_vhp, &vcb); 2754 if (rv != 0) { 2755 goto fail; 2756 } 2757 mac_free(macp); 2758 2759 /* add the recv callback */ 2760 mac_rx_set(vnetp->hio_mch, vnet_hio_rx_cb, vnetp); 2761 2762 return (0); 2763 2764 fail: 2765 mac_free(macp); 2766 vnet_hio_mac_cleanup(vnetp); 2767 return (1); 2768 } 2769 2770 void 2771 vnet_hio_mac_cleanup(vnet_t *vnetp) 2772 { 2773 if (vnetp->hio_vhp != NULL) { 2774 vio_net_resource_unreg(vnetp->hio_vhp); 2775 vnetp->hio_vhp = NULL; 2776 } 2777 2778 if (vnetp->hio_muh != NULL) { 2779 (void) mac_unicast_remove(vnetp->hio_mch, vnetp->hio_muh); 2780 vnetp->hio_muh = NULL; 2781 } 2782 2783 if (vnetp->hio_mch != NULL) { 2784 mac_client_close(vnetp->hio_mch, 0); 2785 vnetp->hio_mch = NULL; 2786 } 2787 2788 if (vnetp->hio_mh != NULL) { 2789 mac_close(vnetp->hio_mh); 2790 vnetp->hio_mh = NULL; 2791 } 2792 } 2793 2794 /* Bind pseudo rings to hwrings */ 2795 static int 2796 vnet_bind_hwrings(vnet_t *vnetp) 2797 { 2798 mac_ring_handle_t hw_rh[VNET_NUM_HYBRID_RINGS]; 2799 mac_perim_handle_t mph1; 2800 vnet_pseudo_rx_group_t *rx_grp; 2801 vnet_pseudo_rx_ring_t *rx_ringp; 2802 vnet_pseudo_tx_group_t *tx_grp; 2803 vnet_pseudo_tx_ring_t *tx_ringp; 2804 int hw_ring_cnt; 2805 int i; 2806 int rv; 2807 2808 mac_perim_enter_by_mh(vnetp->hio_mh, &mph1); 2809 2810 /* Get the list of the underlying RX rings. */ 2811 hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->rx_hwgh, hw_rh, 2812 MAC_RING_TYPE_RX); 2813 2814 /* We expect the the # of hw rx rings to match VNET_NUM_HYBRID_RINGS */ 2815 if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) { 2816 cmn_err(CE_WARN, 2817 "!vnet%d: vnet_bind_hwrings: bad rx hw_ring_cnt(%d)\n", 2818 vnetp->instance, hw_ring_cnt); 2819 goto fail; 2820 } 2821 2822 if (vnetp->rx_hwgh != NULL) { 2823 /* 2824 * Quiesce the HW ring and the mac srs on the ring. Note 2825 * that the HW ring will be restarted when the pseudo ring 2826 * is started. At that time all the packets will be 2827 * directly passed up to the pseudo RX ring and handled 2828 * by mac srs created over the pseudo RX ring. 2829 */ 2830 mac_rx_client_quiesce(vnetp->hio_mch); 2831 mac_srs_perm_quiesce(vnetp->hio_mch, B_TRUE); 2832 } 2833 2834 /* 2835 * Bind the pseudo rings to the hwrings and start the hwrings. 2836 * Note we don't need to register these with the upper mac, as we have 2837 * statically exported these pseudo rxrings which are reserved for 2838 * rxrings of Hybrid resource. 2839 */ 2840 rx_grp = &vnetp->rx_grp[0]; 2841 for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) { 2842 /* Pick the rxrings reserved for Hybrid resource */ 2843 rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX]; 2844 2845 /* Store the hw ring handle */ 2846 rx_ringp->hw_rh = hw_rh[i]; 2847 2848 /* Bind the pseudo ring to the underlying hwring */ 2849 mac_hwring_setup(rx_ringp->hw_rh, 2850 (mac_resource_handle_t)rx_ringp, NULL); 2851 2852 /* Start the hwring if needed */ 2853 if (rx_ringp->state & VNET_RXRING_STARTED) { 2854 rv = mac_hwring_start(rx_ringp->hw_rh); 2855 if (rv != 0) { 2856 mac_hwring_teardown(rx_ringp->hw_rh); 2857 rx_ringp->hw_rh = NULL; 2858 goto fail; 2859 } 2860 } 2861 } 2862 2863 /* Get the list of the underlying TX rings. */ 2864 hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->tx_hwgh, hw_rh, 2865 MAC_RING_TYPE_TX); 2866 2867 /* We expect the # of hw tx rings to match VNET_NUM_HYBRID_RINGS */ 2868 if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) { 2869 cmn_err(CE_WARN, 2870 "!vnet%d: vnet_bind_hwrings: bad tx hw_ring_cnt(%d)\n", 2871 vnetp->instance, hw_ring_cnt); 2872 goto fail; 2873 } 2874 2875 /* 2876 * Now map the pseudo txrings to the hw txrings. Note we don't need 2877 * to register these with the upper mac, as we have statically exported 2878 * these rings. Note that these rings will continue to be used for LDC 2879 * resources to peer vnets and vswitch (shared ring). 2880 */ 2881 tx_grp = &vnetp->tx_grp[0]; 2882 for (i = 0; i < tx_grp->ring_cnt; i++) { 2883 tx_ringp = &tx_grp->rings[i]; 2884 tx_ringp->hw_rh = hw_rh[i]; 2885 tx_ringp->state |= VNET_TXRING_HYBRID; 2886 } 2887 tx_grp->tx_notify_handle = 2888 mac_client_tx_notify(vnetp->hio_mch, vnet_tx_ring_update, vnetp); 2889 2890 mac_perim_exit(mph1); 2891 return (0); 2892 2893 fail: 2894 mac_perim_exit(mph1); 2895 vnet_unbind_hwrings(vnetp); 2896 return (1); 2897 } 2898 2899 /* Unbind pseudo rings from hwrings */ 2900 static void 2901 vnet_unbind_hwrings(vnet_t *vnetp) 2902 { 2903 mac_perim_handle_t mph1; 2904 vnet_pseudo_rx_ring_t *rx_ringp; 2905 vnet_pseudo_rx_group_t *rx_grp; 2906 vnet_pseudo_tx_group_t *tx_grp; 2907 vnet_pseudo_tx_ring_t *tx_ringp; 2908 int i; 2909 2910 mac_perim_enter_by_mh(vnetp->hio_mh, &mph1); 2911 2912 tx_grp = &vnetp->tx_grp[0]; 2913 for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) { 2914 tx_ringp = &tx_grp->rings[i]; 2915 if (tx_ringp->state & VNET_TXRING_HYBRID) { 2916 tx_ringp->state &= ~VNET_TXRING_HYBRID; 2917 tx_ringp->hw_rh = NULL; 2918 } 2919 } 2920 (void) mac_client_tx_notify(vnetp->hio_mch, NULL, 2921 tx_grp->tx_notify_handle); 2922 2923 rx_grp = &vnetp->rx_grp[0]; 2924 for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) { 2925 rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX]; 2926 if (rx_ringp->hw_rh != NULL) { 2927 /* Stop the hwring */ 2928 mac_hwring_stop(rx_ringp->hw_rh); 2929 2930 /* Teardown the hwring */ 2931 mac_hwring_teardown(rx_ringp->hw_rh); 2932 rx_ringp->hw_rh = NULL; 2933 } 2934 } 2935 2936 if (vnetp->rx_hwgh != NULL) { 2937 vnetp->rx_hwgh = NULL; 2938 /* 2939 * First clear the permanent-quiesced flag of the RX srs then 2940 * restart the HW ring and the mac srs on the ring. 2941 */ 2942 mac_srs_perm_quiesce(vnetp->hio_mch, B_FALSE); 2943 mac_rx_client_restart(vnetp->hio_mch); 2944 } 2945 2946 mac_perim_exit(mph1); 2947 } 2948 2949 /* Bind pseudo ring to a LDC resource */ 2950 static int 2951 vnet_bind_vgenring(vnet_res_t *vresp) 2952 { 2953 vnet_t *vnetp; 2954 vnet_pseudo_rx_group_t *rx_grp; 2955 vnet_pseudo_rx_ring_t *rx_ringp; 2956 mac_perim_handle_t mph1; 2957 int rv; 2958 int type; 2959 2960 vnetp = vresp->vnetp; 2961 type = vresp->type; 2962 rx_grp = &vnetp->rx_grp[0]; 2963 2964 if (type == VIO_NET_RES_LDC_SERVICE) { 2965 /* 2966 * Ring Index 0 is the default ring in the group and is 2967 * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring 2968 * is allocated statically and is reported to the mac layer 2969 * in vnet_m_capab(). So, all we need to do here, is save a 2970 * reference to the associated vresp. 2971 */ 2972 rx_ringp = &rx_grp->rings[0]; 2973 rx_ringp->hw_rh = (mac_ring_handle_t)vresp; 2974 vresp->rx_ringp = (void *)rx_ringp; 2975 return (0); 2976 } 2977 ASSERT(type == VIO_NET_RES_LDC_GUEST); 2978 2979 mac_perim_enter_by_mh(vnetp->mh, &mph1); 2980 2981 rx_ringp = vnet_alloc_pseudo_rx_ring(vnetp); 2982 if (rx_ringp == NULL) { 2983 cmn_err(CE_WARN, "!vnet%d: Failed to allocate pseudo rx ring", 2984 vnetp->instance); 2985 goto fail; 2986 } 2987 2988 /* Store the LDC resource itself as the ring handle */ 2989 rx_ringp->hw_rh = (mac_ring_handle_t)vresp; 2990 2991 /* 2992 * Save a reference to the ring in the resource for lookup during 2993 * unbind. Note this is only done for LDC resources. We don't need this 2994 * in the case of a Hybrid resource (see vnet_bind_hwrings()), as its 2995 * rx rings are mapped to reserved pseudo rx rings (index 1 and 2). 2996 */ 2997 vresp->rx_ringp = (void *)rx_ringp; 2998 rx_ringp->state |= VNET_RXRING_LDC_GUEST; 2999 3000 /* Register the pseudo ring with upper-mac */ 3001 rv = mac_group_add_ring(rx_grp->handle, rx_ringp->index); 3002 if (rv != 0) { 3003 rx_ringp->state &= ~VNET_RXRING_LDC_GUEST; 3004 rx_ringp->hw_rh = NULL; 3005 vnet_free_pseudo_rx_ring(vnetp, rx_ringp); 3006 goto fail; 3007 } 3008 3009 mac_perim_exit(mph1); 3010 return (0); 3011 fail: 3012 mac_perim_exit(mph1); 3013 return (1); 3014 } 3015 3016 /* Unbind pseudo ring from a LDC resource */ 3017 static void 3018 vnet_unbind_vgenring(vnet_res_t *vresp) 3019 { 3020 vnet_t *vnetp; 3021 vnet_pseudo_rx_group_t *rx_grp; 3022 vnet_pseudo_rx_ring_t *rx_ringp; 3023 mac_perim_handle_t mph1; 3024 int type; 3025 3026 vnetp = vresp->vnetp; 3027 type = vresp->type; 3028 rx_grp = &vnetp->rx_grp[0]; 3029 3030 if (vresp->rx_ringp == NULL) { 3031 return; 3032 } 3033 3034 if (type == VIO_NET_RES_LDC_SERVICE) { 3035 /* 3036 * Ring Index 0 is the default ring in the group and is 3037 * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring 3038 * is allocated statically and is reported to the mac layer 3039 * in vnet_m_capab(). So, all we need to do here, is remove its 3040 * reference to the associated vresp. 3041 */ 3042 rx_ringp = &rx_grp->rings[0]; 3043 rx_ringp->hw_rh = NULL; 3044 vresp->rx_ringp = NULL; 3045 return; 3046 } 3047 ASSERT(type == VIO_NET_RES_LDC_GUEST); 3048 3049 mac_perim_enter_by_mh(vnetp->mh, &mph1); 3050 3051 rx_ringp = (vnet_pseudo_rx_ring_t *)vresp->rx_ringp; 3052 vresp->rx_ringp = NULL; 3053 3054 if (rx_ringp != NULL && (rx_ringp->state & VNET_RXRING_LDC_GUEST)) { 3055 /* Unregister the pseudo ring with upper-mac */ 3056 mac_group_rem_ring(rx_grp->handle, rx_ringp->handle); 3057 3058 rx_ringp->hw_rh = NULL; 3059 rx_ringp->state &= ~VNET_RXRING_LDC_GUEST; 3060 3061 /* Free the pseudo rx ring */ 3062 vnet_free_pseudo_rx_ring(vnetp, rx_ringp); 3063 } 3064 3065 mac_perim_exit(mph1); 3066 } 3067 3068 static void 3069 vnet_unbind_rings(vnet_res_t *vresp) 3070 { 3071 switch (vresp->type) { 3072 3073 case VIO_NET_RES_LDC_SERVICE: 3074 case VIO_NET_RES_LDC_GUEST: 3075 vnet_unbind_vgenring(vresp); 3076 break; 3077 3078 case VIO_NET_RES_HYBRID: 3079 vnet_unbind_hwrings(vresp->vnetp); 3080 break; 3081 3082 default: 3083 break; 3084 3085 } 3086 } 3087 3088 static int 3089 vnet_bind_rings(vnet_res_t *vresp) 3090 { 3091 int rv; 3092 3093 switch (vresp->type) { 3094 3095 case VIO_NET_RES_LDC_SERVICE: 3096 case VIO_NET_RES_LDC_GUEST: 3097 rv = vnet_bind_vgenring(vresp); 3098 break; 3099 3100 case VIO_NET_RES_HYBRID: 3101 rv = vnet_bind_hwrings(vresp->vnetp); 3102 break; 3103 3104 default: 3105 rv = 1; 3106 break; 3107 3108 } 3109 3110 return (rv); 3111 } 3112 3113 /* ARGSUSED */ 3114 int 3115 vnet_hio_stat(void *arg, uint_t stat, uint64_t *val) 3116 { 3117 vnet_t *vnetp = (vnet_t *)arg; 3118 3119 *val = mac_stat_get(vnetp->hio_mh, stat); 3120 return (0); 3121 } 3122 3123 /* 3124 * The start() and stop() routines for the Hybrid resource below, are just 3125 * dummy functions. This is provided to avoid resource type specific code in 3126 * vnet_start_resources() and vnet_stop_resources(). The starting and stopping 3127 * of the Hybrid resource happens in the context of the mac_client interfaces 3128 * that are invoked in vnet_hio_mac_init() and vnet_hio_mac_cleanup(). 3129 */ 3130 /* ARGSUSED */ 3131 static int 3132 vnet_hio_start(void *arg) 3133 { 3134 return (0); 3135 } 3136 3137 /* ARGSUSED */ 3138 static void 3139 vnet_hio_stop(void *arg) 3140 { 3141 } 3142 3143 mblk_t * 3144 vnet_hio_tx(void *arg, mblk_t *mp) 3145 { 3146 vnet_pseudo_tx_ring_t *tx_ringp; 3147 mblk_t *nextp; 3148 mblk_t *ret_mp; 3149 3150 tx_ringp = (vnet_pseudo_tx_ring_t *)arg; 3151 for (;;) { 3152 nextp = mp->b_next; 3153 mp->b_next = NULL; 3154 3155 ret_mp = mac_hwring_tx(tx_ringp->hw_rh, mp); 3156 if (ret_mp != NULL) { 3157 ret_mp->b_next = nextp; 3158 mp = ret_mp; 3159 break; 3160 } 3161 3162 if ((mp = nextp) == NULL) 3163 break; 3164 } 3165 return (mp); 3166 } 3167 3168 #ifdef VNET_IOC_DEBUG 3169 3170 /* 3171 * The ioctl entry point is used only for debugging for now. The ioctl commands 3172 * can be used to force the link state of the channel connected to vsw. 3173 */ 3174 static void 3175 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp) 3176 { 3177 struct iocblk *iocp; 3178 vnet_t *vnetp; 3179 3180 iocp = (struct iocblk *)(uintptr_t)mp->b_rptr; 3181 iocp->ioc_error = 0; 3182 vnetp = (vnet_t *)arg; 3183 3184 if (vnetp == NULL) { 3185 miocnak(q, mp, 0, EINVAL); 3186 return; 3187 } 3188 3189 switch (iocp->ioc_cmd) { 3190 3191 case VNET_FORCE_LINK_DOWN: 3192 case VNET_FORCE_LINK_UP: 3193 vnet_force_link_state(vnetp, q, mp); 3194 break; 3195 3196 default: 3197 iocp->ioc_error = EINVAL; 3198 miocnak(q, mp, 0, iocp->ioc_error); 3199 break; 3200 3201 } 3202 } 3203 3204 static void 3205 vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp) 3206 { 3207 mac_register_t *macp; 3208 mac_callbacks_t *cbp; 3209 vnet_res_t *vresp; 3210 3211 READ_ENTER(&vnetp->vsw_fp_rw); 3212 3213 vresp = vnetp->vsw_fp; 3214 if (vresp == NULL) { 3215 RW_EXIT(&vnetp->vsw_fp_rw); 3216 return; 3217 } 3218 3219 macp = &vresp->macreg; 3220 cbp = macp->m_callbacks; 3221 cbp->mc_ioctl(macp->m_driver, q, mp); 3222 3223 RW_EXIT(&vnetp->vsw_fp_rw); 3224 } 3225 3226 #else 3227 3228 static void 3229 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp) 3230 { 3231 vnet_t *vnetp; 3232 3233 vnetp = (vnet_t *)arg; 3234 3235 if (vnetp == NULL) { 3236 miocnak(q, mp, 0, EINVAL); 3237 return; 3238 } 3239 3240 /* ioctl support only for debugging */ 3241 miocnak(q, mp, 0, ENOTSUP); 3242 } 3243 3244 #endif 3245