1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/errno.h> 29 #include <sys/param.h> 30 #include <sys/stream.h> 31 #include <sys/kmem.h> 32 #include <sys/conf.h> 33 #include <sys/devops.h> 34 #include <sys/ksynch.h> 35 #include <sys/stat.h> 36 #include <sys/modctl.h> 37 #include <sys/modhash.h> 38 #include <sys/debug.h> 39 #include <sys/ethernet.h> 40 #include <sys/dlpi.h> 41 #include <net/if.h> 42 #include <sys/mac_provider.h> 43 #include <sys/mac_client.h> 44 #include <sys/mac_client_priv.h> 45 #include <sys/mac_ether.h> 46 #include <sys/ddi.h> 47 #include <sys/sunddi.h> 48 #include <sys/strsun.h> 49 #include <sys/note.h> 50 #include <sys/atomic.h> 51 #include <sys/vnet.h> 52 #include <sys/vlan.h> 53 #include <sys/vnet_mailbox.h> 54 #include <sys/vnet_common.h> 55 #include <sys/dds.h> 56 #include <sys/strsubr.h> 57 #include <sys/taskq.h> 58 59 /* 60 * Function prototypes. 61 */ 62 63 /* DDI entrypoints */ 64 static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 65 static int vnetattach(dev_info_t *, ddi_attach_cmd_t); 66 static int vnetdetach(dev_info_t *, ddi_detach_cmd_t); 67 68 /* MAC entrypoints */ 69 static int vnet_m_stat(void *, uint_t, uint64_t *); 70 static int vnet_m_start(void *); 71 static void vnet_m_stop(void *); 72 static int vnet_m_promisc(void *, boolean_t); 73 static int vnet_m_multicst(void *, boolean_t, const uint8_t *); 74 static int vnet_m_unicst(void *, const uint8_t *); 75 mblk_t *vnet_m_tx(void *, mblk_t *); 76 static void vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp); 77 #ifdef VNET_IOC_DEBUG 78 static void vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp); 79 #endif 80 static boolean_t vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data); 81 static void vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index, 82 const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle); 83 static void vnet_get_group(void *arg, mac_ring_type_t type, const int index, 84 mac_group_info_t *infop, mac_group_handle_t handle); 85 static int vnet_rx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num); 86 static void vnet_rx_ring_stop(mac_ring_driver_t rdriver); 87 static int vnet_tx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num); 88 static void vnet_tx_ring_stop(mac_ring_driver_t rdriver); 89 static int vnet_ring_enable_intr(void *arg); 90 static int vnet_ring_disable_intr(void *arg); 91 static mblk_t *vnet_rx_poll(void *arg, int bytes_to_pickup); 92 static int vnet_addmac(void *arg, const uint8_t *mac_addr); 93 static int vnet_remmac(void *arg, const uint8_t *mac_addr); 94 95 /* vnet internal functions */ 96 static int vnet_unattach(vnet_t *vnetp); 97 static void vnet_ring_grp_init(vnet_t *vnetp); 98 static void vnet_ring_grp_uninit(vnet_t *vnetp); 99 static int vnet_mac_register(vnet_t *); 100 static int vnet_read_mac_address(vnet_t *vnetp); 101 static int vnet_bind_vgenring(vnet_res_t *vresp); 102 static void vnet_unbind_vgenring(vnet_res_t *vresp); 103 static int vnet_bind_hwrings(vnet_t *vnetp); 104 static void vnet_unbind_hwrings(vnet_t *vnetp); 105 static int vnet_bind_rings(vnet_res_t *vresp); 106 static void vnet_unbind_rings(vnet_res_t *vresp); 107 static int vnet_hio_stat(void *, uint_t, uint64_t *); 108 static int vnet_hio_start(void *); 109 static void vnet_hio_stop(void *); 110 static void vnet_hio_notify_cb(void *arg, mac_notify_type_t type); 111 mblk_t *vnet_hio_tx(void *, mblk_t *); 112 113 /* Forwarding database (FDB) routines */ 114 static void vnet_fdb_create(vnet_t *vnetp); 115 static void vnet_fdb_destroy(vnet_t *vnetp); 116 static vnet_res_t *vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp); 117 static void vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val); 118 void vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp); 119 static void vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp); 120 121 static void vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp); 122 static void vnet_rx(vio_net_handle_t vrh, mblk_t *mp); 123 static void vnet_tx_update(vio_net_handle_t vrh); 124 static void vnet_res_start_task(void *arg); 125 static void vnet_start_resources(vnet_t *vnetp); 126 static void vnet_stop_resources(vnet_t *vnetp); 127 static void vnet_dispatch_res_task(vnet_t *vnetp); 128 static void vnet_res_start_task(void *arg); 129 static void vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err); 130 static void vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp); 131 static vnet_res_t *vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp); 132 133 /* Exported to vnet_gen */ 134 int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu); 135 void vnet_link_update(vnet_t *vnetp, link_state_t link_state); 136 void vnet_dds_cleanup_hio(vnet_t *vnetp); 137 138 static kstat_t *vnet_hio_setup_kstats(char *ks_mod, char *ks_name, 139 vnet_res_t *vresp); 140 static int vnet_hio_update_kstats(kstat_t *ksp, int rw); 141 static void vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp); 142 static void vnet_hio_destroy_kstats(kstat_t *ksp); 143 144 /* Exported to to vnet_dds */ 145 int vnet_send_dds_msg(vnet_t *vnetp, void *dmsg); 146 int vnet_hio_mac_init(vnet_t *vnetp, char *ifname); 147 void vnet_hio_mac_cleanup(vnet_t *vnetp); 148 149 /* Externs that are imported from vnet_gen */ 150 extern int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip, 151 const uint8_t *macaddr, void **vgenhdl); 152 extern int vgen_init_mdeg(void *arg); 153 extern void vgen_uninit(void *arg); 154 extern int vgen_dds_tx(void *arg, void *dmsg); 155 extern void vgen_mod_init(void); 156 extern int vgen_mod_cleanup(void); 157 extern void vgen_mod_fini(void); 158 extern int vgen_enable_intr(void *arg); 159 extern int vgen_disable_intr(void *arg); 160 extern mblk_t *vgen_poll(void *arg, int bytes_to_pickup); 161 162 /* Externs that are imported from vnet_dds */ 163 extern void vdds_mod_init(void); 164 extern void vdds_mod_fini(void); 165 extern int vdds_init(vnet_t *vnetp); 166 extern void vdds_cleanup(vnet_t *vnetp); 167 extern void vdds_process_dds_msg(vnet_t *vnetp, vio_dds_msg_t *dmsg); 168 extern void vdds_cleanup_hybrid_res(void *arg); 169 extern void vdds_cleanup_hio(vnet_t *vnetp); 170 171 /* Externs imported from mac_impl */ 172 extern mblk_t *mac_hwring_tx(mac_ring_handle_t, mblk_t *); 173 174 #define DRV_NAME "vnet" 175 #define VNET_FDBE_REFHOLD(p) \ 176 { \ 177 atomic_inc_32(&(p)->refcnt); \ 178 ASSERT((p)->refcnt != 0); \ 179 } 180 181 #define VNET_FDBE_REFRELE(p) \ 182 { \ 183 ASSERT((p)->refcnt != 0); \ 184 atomic_dec_32(&(p)->refcnt); \ 185 } 186 187 #ifdef VNET_IOC_DEBUG 188 #define VNET_M_CALLBACK_FLAGS (MC_IOCTL | MC_GETCAPAB) 189 #else 190 #define VNET_M_CALLBACK_FLAGS (MC_GETCAPAB) 191 #endif 192 193 static mac_callbacks_t vnet_m_callbacks = { 194 VNET_M_CALLBACK_FLAGS, 195 vnet_m_stat, 196 vnet_m_start, 197 vnet_m_stop, 198 vnet_m_promisc, 199 vnet_m_multicst, 200 NULL, /* m_unicst entry must be NULL while rx rings are exposed */ 201 NULL, /* m_tx entry must be NULL while tx rings are exposed */ 202 vnet_m_ioctl, 203 vnet_m_capab, 204 NULL 205 }; 206 207 static mac_callbacks_t vnet_hio_res_callbacks = { 208 0, 209 vnet_hio_stat, 210 vnet_hio_start, 211 vnet_hio_stop, 212 NULL, 213 NULL, 214 NULL, 215 vnet_hio_tx, 216 NULL, 217 NULL, 218 NULL 219 }; 220 221 /* 222 * Linked list of "vnet_t" structures - one per instance. 223 */ 224 static vnet_t *vnet_headp = NULL; 225 static krwlock_t vnet_rw; 226 227 /* Tunables */ 228 uint32_t vnet_ntxds = VNET_NTXDS; /* power of 2 transmit descriptors */ 229 uint32_t vnet_ldcwd_interval = VNET_LDCWD_INTERVAL; /* watchdog freq in msec */ 230 uint32_t vnet_ldcwd_txtimeout = VNET_LDCWD_TXTIMEOUT; /* tx timeout in msec */ 231 uint32_t vnet_ldc_mtu = VNET_LDC_MTU; /* ldc mtu */ 232 233 /* Configure tx serialization in mac layer for the vnet device */ 234 boolean_t vnet_mac_tx_serialize = B_TRUE; 235 236 /* 237 * Set this to non-zero to enable additional internal receive buffer pools 238 * based on the MTU of the device for better performance at the cost of more 239 * memory consumption. This is turned off by default, to use allocb(9F) for 240 * receive buffer allocations of sizes > 2K. 241 */ 242 boolean_t vnet_jumbo_rxpools = B_FALSE; 243 244 /* # of chains in fdb hash table */ 245 uint32_t vnet_fdb_nchains = VNET_NFDB_HASH; 246 247 /* Internal tunables */ 248 uint32_t vnet_ethermtu = 1500; /* mtu of the device */ 249 250 /* 251 * Default vlan id. This is only used internally when the "default-vlan-id" 252 * property is not present in the MD device node. Therefore, this should not be 253 * used as a tunable; if this value is changed, the corresponding variable 254 * should be updated to the same value in vsw and also other vnets connected to 255 * the same vsw. 256 */ 257 uint16_t vnet_default_vlan_id = 1; 258 259 /* delay in usec to wait for all references on a fdb entry to be dropped */ 260 uint32_t vnet_fdbe_refcnt_delay = 10; 261 262 static struct ether_addr etherbroadcastaddr = { 263 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 264 }; 265 266 /* mac_open() retry delay in usec */ 267 uint32_t vnet_mac_open_delay = 100; /* 0.1 ms */ 268 269 /* max # of mac_open() retries */ 270 uint32_t vnet_mac_open_retries = 100; 271 272 /* 273 * Property names 274 */ 275 static char macaddr_propname[] = "local-mac-address"; 276 277 /* 278 * This is the string displayed by modinfo(1m). 279 */ 280 static char vnet_ident[] = "vnet driver"; 281 extern struct mod_ops mod_driverops; 282 static struct cb_ops cb_vnetops = { 283 nulldev, /* cb_open */ 284 nulldev, /* cb_close */ 285 nodev, /* cb_strategy */ 286 nodev, /* cb_print */ 287 nodev, /* cb_dump */ 288 nodev, /* cb_read */ 289 nodev, /* cb_write */ 290 nodev, /* cb_ioctl */ 291 nodev, /* cb_devmap */ 292 nodev, /* cb_mmap */ 293 nodev, /* cb_segmap */ 294 nochpoll, /* cb_chpoll */ 295 ddi_prop_op, /* cb_prop_op */ 296 NULL, /* cb_stream */ 297 (int)(D_MP) /* cb_flag */ 298 }; 299 300 static struct dev_ops vnetops = { 301 DEVO_REV, /* devo_rev */ 302 0, /* devo_refcnt */ 303 NULL, /* devo_getinfo */ 304 nulldev, /* devo_identify */ 305 nulldev, /* devo_probe */ 306 vnetattach, /* devo_attach */ 307 vnetdetach, /* devo_detach */ 308 nodev, /* devo_reset */ 309 &cb_vnetops, /* devo_cb_ops */ 310 (struct bus_ops *)NULL, /* devo_bus_ops */ 311 NULL, /* devo_power */ 312 ddi_quiesce_not_supported, /* devo_quiesce */ 313 }; 314 315 static struct modldrv modldrv = { 316 &mod_driverops, /* Type of module. This one is a driver */ 317 vnet_ident, /* ID string */ 318 &vnetops /* driver specific ops */ 319 }; 320 321 static struct modlinkage modlinkage = { 322 MODREV_1, (void *)&modldrv, NULL 323 }; 324 325 #ifdef DEBUG 326 327 /* 328 * Print debug messages - set to 0xf to enable all msgs 329 */ 330 int vnet_dbglevel = 0x8; 331 332 static void 333 debug_printf(const char *fname, void *arg, const char *fmt, ...) 334 { 335 char buf[512]; 336 va_list ap; 337 vnet_t *vnetp = (vnet_t *)arg; 338 char *bufp = buf; 339 340 if (vnetp == NULL) { 341 (void) sprintf(bufp, "%s: ", fname); 342 bufp += strlen(bufp); 343 } else { 344 (void) sprintf(bufp, "vnet%d:%s: ", vnetp->instance, fname); 345 bufp += strlen(bufp); 346 } 347 va_start(ap, fmt); 348 (void) vsprintf(bufp, fmt, ap); 349 va_end(ap); 350 cmn_err(CE_CONT, "%s\n", buf); 351 } 352 353 #endif 354 355 /* _init(9E): initialize the loadable module */ 356 int 357 _init(void) 358 { 359 int status; 360 361 DBG1(NULL, "enter\n"); 362 363 mac_init_ops(&vnetops, "vnet"); 364 status = mod_install(&modlinkage); 365 if (status != 0) { 366 mac_fini_ops(&vnetops); 367 } 368 vdds_mod_init(); 369 vgen_mod_init(); 370 DBG1(NULL, "exit(%d)\n", status); 371 return (status); 372 } 373 374 /* _fini(9E): prepare the module for unloading. */ 375 int 376 _fini(void) 377 { 378 int status; 379 380 DBG1(NULL, "enter\n"); 381 382 status = vgen_mod_cleanup(); 383 if (status != 0) 384 return (status); 385 386 status = mod_remove(&modlinkage); 387 if (status != 0) 388 return (status); 389 mac_fini_ops(&vnetops); 390 vgen_mod_fini(); 391 vdds_mod_fini(); 392 393 DBG1(NULL, "exit(%d)\n", status); 394 return (status); 395 } 396 397 /* _info(9E): return information about the loadable module */ 398 int 399 _info(struct modinfo *modinfop) 400 { 401 return (mod_info(&modlinkage, modinfop)); 402 } 403 404 /* 405 * attach(9E): attach a device to the system. 406 * called once for each instance of the device on the system. 407 */ 408 static int 409 vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd) 410 { 411 vnet_t *vnetp; 412 int status; 413 int instance; 414 uint64_t reg; 415 char qname[TASKQ_NAMELEN]; 416 vnet_attach_progress_t attach_progress; 417 418 attach_progress = AST_init; 419 420 switch (cmd) { 421 case DDI_ATTACH: 422 break; 423 case DDI_RESUME: 424 case DDI_PM_RESUME: 425 default: 426 goto vnet_attach_fail; 427 } 428 429 instance = ddi_get_instance(dip); 430 DBG1(NULL, "instance(%d) enter\n", instance); 431 432 /* allocate vnet_t and mac_t structures */ 433 vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP); 434 vnetp->dip = dip; 435 vnetp->instance = instance; 436 rw_init(&vnetp->vrwlock, NULL, RW_DRIVER, NULL); 437 rw_init(&vnetp->vsw_fp_rw, NULL, RW_DRIVER, NULL); 438 attach_progress |= AST_vnet_alloc; 439 440 vnet_ring_grp_init(vnetp); 441 attach_progress |= AST_ring_init; 442 443 status = vdds_init(vnetp); 444 if (status != 0) { 445 goto vnet_attach_fail; 446 } 447 attach_progress |= AST_vdds_init; 448 449 /* setup links to vnet_t from both devinfo and mac_t */ 450 ddi_set_driver_private(dip, (caddr_t)vnetp); 451 452 /* read the mac address */ 453 status = vnet_read_mac_address(vnetp); 454 if (status != DDI_SUCCESS) { 455 goto vnet_attach_fail; 456 } 457 attach_progress |= AST_read_macaddr; 458 459 reg = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 460 DDI_PROP_DONTPASS, "reg", -1); 461 if (reg == -1) { 462 goto vnet_attach_fail; 463 } 464 vnetp->reg = reg; 465 466 vnet_fdb_create(vnetp); 467 attach_progress |= AST_fdbh_alloc; 468 469 (void) snprintf(qname, TASKQ_NAMELEN, "vnet_taskq%d", instance); 470 if ((vnetp->taskqp = ddi_taskq_create(dip, qname, 1, 471 TASKQ_DEFAULTPRI, 0)) == NULL) { 472 cmn_err(CE_WARN, "!vnet%d: Unable to create task queue", 473 instance); 474 goto vnet_attach_fail; 475 } 476 attach_progress |= AST_taskq_create; 477 478 /* add to the list of vnet devices */ 479 WRITE_ENTER(&vnet_rw); 480 vnetp->nextp = vnet_headp; 481 vnet_headp = vnetp; 482 RW_EXIT(&vnet_rw); 483 484 attach_progress |= AST_vnet_list; 485 486 /* 487 * Initialize the generic vnet plugin which provides communication via 488 * sun4v LDC (logical domain channel) based resources. This involves 2 489 * steps; first, vgen_init() is invoked to read the various properties 490 * of the vnet device from its MD node (including its mtu which is 491 * needed to mac_register()) and obtain a handle to the vgen layer. 492 * After mac_register() is done and we have a mac handle, we then 493 * invoke vgen_init_mdeg() which registers with the the MD event 494 * generator (mdeg) framework to allow LDC resource notifications. 495 * Note: this sequence also allows us to report the correct default # 496 * of pseudo rings (2TX and 3RX) in vnet_m_capab() which gets invoked 497 * in the context of mac_register(); and avoids conflicting with 498 * dynamic pseudo rx rings which get added/removed as a result of mdeg 499 * events in vgen. 500 */ 501 status = vgen_init(vnetp, reg, vnetp->dip, 502 (uint8_t *)vnetp->curr_macaddr, &vnetp->vgenhdl); 503 if (status != DDI_SUCCESS) { 504 DERR(vnetp, "vgen_init() failed\n"); 505 goto vnet_attach_fail; 506 } 507 attach_progress |= AST_vgen_init; 508 509 status = vnet_mac_register(vnetp); 510 if (status != DDI_SUCCESS) { 511 goto vnet_attach_fail; 512 } 513 vnetp->link_state = LINK_STATE_UNKNOWN; 514 attach_progress |= AST_macreg; 515 516 status = vgen_init_mdeg(vnetp->vgenhdl); 517 if (status != DDI_SUCCESS) { 518 goto vnet_attach_fail; 519 } 520 attach_progress |= AST_init_mdeg; 521 522 vnetp->attach_progress = attach_progress; 523 524 DBG1(NULL, "instance(%d) exit\n", instance); 525 return (DDI_SUCCESS); 526 527 vnet_attach_fail: 528 vnetp->attach_progress = attach_progress; 529 status = vnet_unattach(vnetp); 530 ASSERT(status == 0); 531 return (DDI_FAILURE); 532 } 533 534 /* 535 * detach(9E): detach a device from the system. 536 */ 537 static int 538 vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd) 539 { 540 vnet_t *vnetp; 541 int instance; 542 543 instance = ddi_get_instance(dip); 544 DBG1(NULL, "instance(%d) enter\n", instance); 545 546 vnetp = ddi_get_driver_private(dip); 547 if (vnetp == NULL) { 548 goto vnet_detach_fail; 549 } 550 551 switch (cmd) { 552 case DDI_DETACH: 553 break; 554 case DDI_SUSPEND: 555 case DDI_PM_SUSPEND: 556 default: 557 goto vnet_detach_fail; 558 } 559 560 if (vnet_unattach(vnetp) != 0) { 561 goto vnet_detach_fail; 562 } 563 564 return (DDI_SUCCESS); 565 566 vnet_detach_fail: 567 return (DDI_FAILURE); 568 } 569 570 /* 571 * Common routine to handle vnetattach() failure and vnetdetach(). Note that 572 * the only reason this function could fail is if mac_unregister() fails. 573 * Otherwise, this function must ensure that all resources are freed and return 574 * success. 575 */ 576 static int 577 vnet_unattach(vnet_t *vnetp) 578 { 579 vnet_attach_progress_t attach_progress; 580 581 attach_progress = vnetp->attach_progress; 582 583 /* 584 * Disable the mac device in the gldv3 subsystem. This can fail, in 585 * particular if there are still any open references to this mac 586 * device; in which case we just return failure without continuing to 587 * detach further. 588 * If it succeeds, we then invoke vgen_uninit() which should unregister 589 * any pseudo rings registered with the mac layer. Note we keep the 590 * AST_macreg flag on, so we can unregister with the mac layer at 591 * the end of this routine. 592 */ 593 if (attach_progress & AST_macreg) { 594 if (mac_disable(vnetp->mh) != 0) { 595 return (1); 596 } 597 } 598 599 /* 600 * Now that we have disabled the device, we must finish all other steps 601 * and successfully return from this function; otherwise we will end up 602 * leaving the device in a broken/unusable state. 603 * 604 * First, release any hybrid resources assigned to this vnet device. 605 */ 606 if (attach_progress & AST_vdds_init) { 607 vdds_cleanup(vnetp); 608 attach_progress &= ~AST_vdds_init; 609 } 610 611 /* 612 * Uninit vgen. This stops further mdeg callbacks to this vnet 613 * device and/or its ports; and detaches any existing ports. 614 */ 615 if (attach_progress & (AST_vgen_init|AST_init_mdeg)) { 616 vgen_uninit(vnetp->vgenhdl); 617 attach_progress &= ~AST_vgen_init; 618 attach_progress &= ~AST_init_mdeg; 619 } 620 621 /* Destroy the taskq. */ 622 if (attach_progress & AST_taskq_create) { 623 ddi_taskq_destroy(vnetp->taskqp); 624 attach_progress &= ~AST_taskq_create; 625 } 626 627 /* Destroy fdb. */ 628 if (attach_progress & AST_fdbh_alloc) { 629 vnet_fdb_destroy(vnetp); 630 attach_progress &= ~AST_fdbh_alloc; 631 } 632 633 /* Remove from the device list */ 634 if (attach_progress & AST_vnet_list) { 635 vnet_t **vnetpp; 636 /* unlink from instance(vnet_t) list */ 637 WRITE_ENTER(&vnet_rw); 638 for (vnetpp = &vnet_headp; *vnetpp; 639 vnetpp = &(*vnetpp)->nextp) { 640 if (*vnetpp == vnetp) { 641 *vnetpp = vnetp->nextp; 642 break; 643 } 644 } 645 RW_EXIT(&vnet_rw); 646 attach_progress &= ~AST_vnet_list; 647 } 648 649 if (attach_progress & AST_ring_init) { 650 vnet_ring_grp_uninit(vnetp); 651 attach_progress &= ~AST_ring_init; 652 } 653 654 if (attach_progress & AST_macreg) { 655 VERIFY(mac_unregister(vnetp->mh) == 0); 656 vnetp->mh = NULL; 657 attach_progress &= ~AST_macreg; 658 } 659 660 if (attach_progress & AST_vnet_alloc) { 661 rw_destroy(&vnetp->vrwlock); 662 rw_destroy(&vnetp->vsw_fp_rw); 663 attach_progress &= ~AST_vnet_list; 664 KMEM_FREE(vnetp); 665 } 666 667 return (0); 668 } 669 670 /* enable the device for transmit/receive */ 671 static int 672 vnet_m_start(void *arg) 673 { 674 vnet_t *vnetp = arg; 675 676 DBG1(vnetp, "enter\n"); 677 678 WRITE_ENTER(&vnetp->vrwlock); 679 vnetp->flags |= VNET_STARTED; 680 vnet_start_resources(vnetp); 681 RW_EXIT(&vnetp->vrwlock); 682 683 DBG1(vnetp, "exit\n"); 684 return (VNET_SUCCESS); 685 686 } 687 688 /* stop transmit/receive for the device */ 689 static void 690 vnet_m_stop(void *arg) 691 { 692 vnet_t *vnetp = arg; 693 694 DBG1(vnetp, "enter\n"); 695 696 WRITE_ENTER(&vnetp->vrwlock); 697 if (vnetp->flags & VNET_STARTED) { 698 /* 699 * Set the flags appropriately; this should prevent starting of 700 * any new resources that are added(see vnet_res_start_task()), 701 * while we release the vrwlock in vnet_stop_resources() before 702 * stopping each resource. 703 */ 704 vnetp->flags &= ~VNET_STARTED; 705 vnetp->flags |= VNET_STOPPING; 706 vnet_stop_resources(vnetp); 707 vnetp->flags &= ~VNET_STOPPING; 708 } 709 RW_EXIT(&vnetp->vrwlock); 710 711 DBG1(vnetp, "exit\n"); 712 } 713 714 /* set the unicast mac address of the device */ 715 static int 716 vnet_m_unicst(void *arg, const uint8_t *macaddr) 717 { 718 _NOTE(ARGUNUSED(macaddr)) 719 720 vnet_t *vnetp = arg; 721 722 DBG1(vnetp, "enter\n"); 723 /* 724 * NOTE: setting mac address dynamically is not supported. 725 */ 726 DBG1(vnetp, "exit\n"); 727 728 return (VNET_FAILURE); 729 } 730 731 /* enable/disable a multicast address */ 732 static int 733 vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca) 734 { 735 _NOTE(ARGUNUSED(add, mca)) 736 737 vnet_t *vnetp = arg; 738 vnet_res_t *vresp; 739 mac_register_t *macp; 740 mac_callbacks_t *cbp; 741 int rv = VNET_SUCCESS; 742 743 DBG1(vnetp, "enter\n"); 744 745 READ_ENTER(&vnetp->vsw_fp_rw); 746 if (vnetp->vsw_fp == NULL) { 747 RW_EXIT(&vnetp->vsw_fp_rw); 748 return (EAGAIN); 749 } 750 VNET_FDBE_REFHOLD(vnetp->vsw_fp); 751 RW_EXIT(&vnetp->vsw_fp_rw); 752 753 vresp = vnetp->vsw_fp; 754 macp = &vresp->macreg; 755 cbp = macp->m_callbacks; 756 rv = cbp->mc_multicst(macp->m_driver, add, mca); 757 758 VNET_FDBE_REFRELE(vnetp->vsw_fp); 759 760 DBG1(vnetp, "exit(%d)\n", rv); 761 return (rv); 762 } 763 764 /* set or clear promiscuous mode on the device */ 765 static int 766 vnet_m_promisc(void *arg, boolean_t on) 767 { 768 _NOTE(ARGUNUSED(on)) 769 770 vnet_t *vnetp = arg; 771 DBG1(vnetp, "enter\n"); 772 /* 773 * NOTE: setting promiscuous mode is not supported, just return success. 774 */ 775 DBG1(vnetp, "exit\n"); 776 return (VNET_SUCCESS); 777 } 778 779 /* 780 * Transmit a chain of packets. This function provides switching functionality 781 * based on the destination mac address to reach other guests (within ldoms) or 782 * external hosts. 783 */ 784 mblk_t * 785 vnet_tx_ring_send(void *arg, mblk_t *mp) 786 { 787 vnet_pseudo_tx_ring_t *tx_ringp; 788 vnet_t *vnetp; 789 vnet_res_t *vresp; 790 mblk_t *next; 791 mblk_t *resid_mp; 792 mac_register_t *macp; 793 struct ether_header *ehp; 794 boolean_t is_unicast; 795 boolean_t is_pvid; /* non-default pvid ? */ 796 boolean_t hres; /* Hybrid resource ? */ 797 void *tx_arg; 798 799 tx_ringp = (vnet_pseudo_tx_ring_t *)arg; 800 vnetp = (vnet_t *)tx_ringp->vnetp; 801 DBG1(vnetp, "enter\n"); 802 ASSERT(mp != NULL); 803 804 is_pvid = (vnetp->pvid != vnetp->default_vlan_id) ? B_TRUE : B_FALSE; 805 806 while (mp != NULL) { 807 808 next = mp->b_next; 809 mp->b_next = NULL; 810 811 /* 812 * Find fdb entry for the destination 813 * and hold a reference to it. 814 */ 815 ehp = (struct ether_header *)mp->b_rptr; 816 vresp = vnet_fdbe_find(vnetp, &ehp->ether_dhost); 817 if (vresp != NULL) { 818 819 /* 820 * Destination found in FDB. 821 * The destination is a vnet device within ldoms 822 * and directly reachable, invoke the tx function 823 * in the fdb entry. 824 */ 825 macp = &vresp->macreg; 826 resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp); 827 828 /* tx done; now release ref on fdb entry */ 829 VNET_FDBE_REFRELE(vresp); 830 831 if (resid_mp != NULL) { 832 /* m_tx failed */ 833 mp->b_next = next; 834 break; 835 } 836 } else { 837 is_unicast = !(IS_BROADCAST(ehp) || 838 (IS_MULTICAST(ehp))); 839 /* 840 * Destination is not in FDB. 841 * If the destination is broadcast or multicast, 842 * then forward the packet to vswitch. 843 * If a Hybrid resource avilable, then send the 844 * unicast packet via hybrid resource, otherwise 845 * forward it to vswitch. 846 */ 847 READ_ENTER(&vnetp->vsw_fp_rw); 848 849 if ((is_unicast) && (vnetp->hio_fp != NULL)) { 850 vresp = vnetp->hio_fp; 851 hres = B_TRUE; 852 } else { 853 vresp = vnetp->vsw_fp; 854 hres = B_FALSE; 855 } 856 if (vresp == NULL) { 857 /* 858 * no fdb entry to vsw? drop the packet. 859 */ 860 RW_EXIT(&vnetp->vsw_fp_rw); 861 freemsg(mp); 862 mp = next; 863 continue; 864 } 865 866 /* ref hold the fdb entry to vsw */ 867 VNET_FDBE_REFHOLD(vresp); 868 869 RW_EXIT(&vnetp->vsw_fp_rw); 870 871 /* 872 * In the case of a hybrid resource we need to insert 873 * the tag for the pvid case here; unlike packets that 874 * are destined to a vnet/vsw in which case the vgen 875 * layer does the tagging before sending it over ldc. 876 */ 877 if (hres == B_TRUE) { 878 /* 879 * Determine if the frame being transmitted 880 * over the hybrid resource is untagged. If so, 881 * insert the tag before transmitting. 882 */ 883 if (is_pvid == B_TRUE && 884 ehp->ether_type != htons(ETHERTYPE_VLAN)) { 885 886 mp = vnet_vlan_insert_tag(mp, 887 vnetp->pvid); 888 if (mp == NULL) { 889 VNET_FDBE_REFRELE(vresp); 890 mp = next; 891 continue; 892 } 893 894 } 895 896 macp = &vresp->macreg; 897 tx_arg = tx_ringp; 898 } else { 899 macp = &vresp->macreg; 900 tx_arg = macp->m_driver; 901 } 902 resid_mp = macp->m_callbacks->mc_tx(tx_arg, mp); 903 904 /* tx done; now release ref on fdb entry */ 905 VNET_FDBE_REFRELE(vresp); 906 907 if (resid_mp != NULL) { 908 /* m_tx failed */ 909 mp->b_next = next; 910 break; 911 } 912 } 913 914 mp = next; 915 } 916 917 DBG1(vnetp, "exit\n"); 918 return (mp); 919 } 920 921 /* get statistics from the device */ 922 int 923 vnet_m_stat(void *arg, uint_t stat, uint64_t *val) 924 { 925 vnet_t *vnetp = arg; 926 vnet_res_t *vresp; 927 mac_register_t *macp; 928 mac_callbacks_t *cbp; 929 uint64_t val_total = 0; 930 931 DBG1(vnetp, "enter\n"); 932 933 /* 934 * get the specified statistic from each transport and return the 935 * aggregate val. This obviously only works for counters. 936 */ 937 if ((IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat)) || 938 (IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat))) { 939 return (ENOTSUP); 940 } 941 942 READ_ENTER(&vnetp->vrwlock); 943 for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) { 944 macp = &vresp->macreg; 945 cbp = macp->m_callbacks; 946 if (cbp->mc_getstat(macp->m_driver, stat, val) == 0) 947 val_total += *val; 948 } 949 RW_EXIT(&vnetp->vrwlock); 950 951 *val = val_total; 952 953 DBG1(vnetp, "exit\n"); 954 return (0); 955 } 956 957 static void 958 vnet_ring_grp_init(vnet_t *vnetp) 959 { 960 vnet_pseudo_rx_group_t *rx_grp; 961 vnet_pseudo_rx_ring_t *rx_ringp; 962 vnet_pseudo_tx_group_t *tx_grp; 963 vnet_pseudo_tx_ring_t *tx_ringp; 964 int i; 965 966 tx_grp = &vnetp->tx_grp[0]; 967 tx_ringp = kmem_zalloc(sizeof (vnet_pseudo_tx_ring_t) * 968 VNET_NUM_PSEUDO_TXRINGS, KM_SLEEP); 969 for (i = 0; i < VNET_NUM_PSEUDO_TXRINGS; i++) { 970 tx_ringp[i].state |= VNET_TXRING_SHARED; 971 } 972 tx_grp->rings = tx_ringp; 973 tx_grp->ring_cnt = VNET_NUM_PSEUDO_TXRINGS; 974 975 rx_grp = &vnetp->rx_grp[0]; 976 rx_grp->max_ring_cnt = MAX_RINGS_PER_GROUP; 977 rw_init(&rx_grp->lock, NULL, RW_DRIVER, NULL); 978 rx_ringp = kmem_zalloc(sizeof (vnet_pseudo_rx_ring_t) * 979 rx_grp->max_ring_cnt, KM_SLEEP); 980 981 /* 982 * Setup the first 3 Pseudo RX Rings that are reserved; 983 * 1 for LDC resource to vswitch + 2 for RX rings of Hybrid resource. 984 */ 985 rx_ringp[0].state |= VNET_RXRING_INUSE|VNET_RXRING_LDC_SERVICE; 986 rx_ringp[0].index = 0; 987 rx_ringp[1].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID; 988 rx_ringp[1].index = 1; 989 rx_ringp[2].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID; 990 rx_ringp[2].index = 2; 991 992 rx_grp->ring_cnt = VNET_NUM_PSEUDO_RXRINGS_DEFAULT; 993 rx_grp->rings = rx_ringp; 994 995 for (i = VNET_NUM_PSEUDO_RXRINGS_DEFAULT; 996 i < rx_grp->max_ring_cnt; i++) { 997 rx_ringp = &rx_grp->rings[i]; 998 rx_ringp->state = VNET_RXRING_FREE; 999 rx_ringp->index = i; 1000 } 1001 } 1002 1003 static void 1004 vnet_ring_grp_uninit(vnet_t *vnetp) 1005 { 1006 vnet_pseudo_rx_group_t *rx_grp; 1007 vnet_pseudo_tx_group_t *tx_grp; 1008 1009 tx_grp = &vnetp->tx_grp[0]; 1010 if (tx_grp->rings != NULL) { 1011 ASSERT(tx_grp->ring_cnt == VNET_NUM_PSEUDO_TXRINGS); 1012 kmem_free(tx_grp->rings, sizeof (vnet_pseudo_tx_ring_t) * 1013 tx_grp->ring_cnt); 1014 tx_grp->rings = NULL; 1015 } 1016 1017 rx_grp = &vnetp->rx_grp[0]; 1018 if (rx_grp->rings != NULL) { 1019 ASSERT(rx_grp->max_ring_cnt == MAX_RINGS_PER_GROUP); 1020 ASSERT(rx_grp->ring_cnt == VNET_NUM_PSEUDO_RXRINGS_DEFAULT); 1021 kmem_free(rx_grp->rings, sizeof (vnet_pseudo_rx_ring_t) * 1022 rx_grp->max_ring_cnt); 1023 rx_grp->rings = NULL; 1024 } 1025 } 1026 1027 static vnet_pseudo_rx_ring_t * 1028 vnet_alloc_pseudo_rx_ring(vnet_t *vnetp) 1029 { 1030 vnet_pseudo_rx_group_t *rx_grp; 1031 vnet_pseudo_rx_ring_t *rx_ringp; 1032 int index; 1033 1034 rx_grp = &vnetp->rx_grp[0]; 1035 WRITE_ENTER(&rx_grp->lock); 1036 1037 if (rx_grp->ring_cnt == rx_grp->max_ring_cnt) { 1038 /* no rings available */ 1039 RW_EXIT(&rx_grp->lock); 1040 return (NULL); 1041 } 1042 1043 for (index = VNET_NUM_PSEUDO_RXRINGS_DEFAULT; 1044 index < rx_grp->max_ring_cnt; index++) { 1045 rx_ringp = &rx_grp->rings[index]; 1046 if (rx_ringp->state == VNET_RXRING_FREE) { 1047 rx_ringp->state |= VNET_RXRING_INUSE; 1048 rx_grp->ring_cnt++; 1049 break; 1050 } 1051 } 1052 1053 RW_EXIT(&rx_grp->lock); 1054 return (rx_ringp); 1055 } 1056 1057 static void 1058 vnet_free_pseudo_rx_ring(vnet_t *vnetp, vnet_pseudo_rx_ring_t *ringp) 1059 { 1060 vnet_pseudo_rx_group_t *rx_grp; 1061 1062 ASSERT(ringp->index >= VNET_NUM_PSEUDO_RXRINGS_DEFAULT); 1063 rx_grp = &vnetp->rx_grp[0]; 1064 WRITE_ENTER(&rx_grp->lock); 1065 1066 if (ringp->state != VNET_RXRING_FREE) { 1067 ringp->state = VNET_RXRING_FREE; 1068 ringp->handle = NULL; 1069 rx_grp->ring_cnt--; 1070 } 1071 1072 RW_EXIT(&rx_grp->lock); 1073 } 1074 1075 /* wrapper function for mac_register() */ 1076 static int 1077 vnet_mac_register(vnet_t *vnetp) 1078 { 1079 mac_register_t *macp; 1080 int err; 1081 1082 if ((macp = mac_alloc(MAC_VERSION)) == NULL) 1083 return (DDI_FAILURE); 1084 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 1085 macp->m_driver = vnetp; 1086 macp->m_dip = vnetp->dip; 1087 macp->m_src_addr = vnetp->curr_macaddr; 1088 macp->m_callbacks = &vnet_m_callbacks; 1089 macp->m_min_sdu = 0; 1090 macp->m_max_sdu = vnetp->mtu; 1091 macp->m_margin = VLAN_TAGSZ; 1092 1093 /* 1094 * MAC_VIRT_SERIALIZE flag is needed while hybridIO is enabled to 1095 * workaround tx lock contention issues in nxge. 1096 */ 1097 macp->m_v12n = MAC_VIRT_LEVEL1; 1098 if (vnet_mac_tx_serialize == B_TRUE) { 1099 macp->m_v12n |= MAC_VIRT_SERIALIZE; 1100 } 1101 1102 /* 1103 * Finally, we're ready to register ourselves with the MAC layer 1104 * interface; if this succeeds, we're all ready to start() 1105 */ 1106 err = mac_register(macp, &vnetp->mh); 1107 mac_free(macp); 1108 return (err == 0 ? DDI_SUCCESS : DDI_FAILURE); 1109 } 1110 1111 /* read the mac address of the device */ 1112 static int 1113 vnet_read_mac_address(vnet_t *vnetp) 1114 { 1115 uchar_t *macaddr; 1116 uint32_t size; 1117 int rv; 1118 1119 rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip, 1120 DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size); 1121 if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) { 1122 DWARN(vnetp, "prop_lookup failed(%s) err(%d)\n", 1123 macaddr_propname, rv); 1124 return (DDI_FAILURE); 1125 } 1126 bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL); 1127 bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL); 1128 ddi_prop_free(macaddr); 1129 1130 return (DDI_SUCCESS); 1131 } 1132 1133 static void 1134 vnet_fdb_create(vnet_t *vnetp) 1135 { 1136 char hashname[MAXNAMELEN]; 1137 1138 (void) snprintf(hashname, MAXNAMELEN, "vnet%d-fdbhash", 1139 vnetp->instance); 1140 vnetp->fdb_nchains = vnet_fdb_nchains; 1141 vnetp->fdb_hashp = mod_hash_create_ptrhash(hashname, vnetp->fdb_nchains, 1142 mod_hash_null_valdtor, sizeof (void *)); 1143 } 1144 1145 static void 1146 vnet_fdb_destroy(vnet_t *vnetp) 1147 { 1148 /* destroy fdb-hash-table */ 1149 if (vnetp->fdb_hashp != NULL) { 1150 mod_hash_destroy_hash(vnetp->fdb_hashp); 1151 vnetp->fdb_hashp = NULL; 1152 vnetp->fdb_nchains = 0; 1153 } 1154 } 1155 1156 /* 1157 * Add an entry into the fdb. 1158 */ 1159 void 1160 vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp) 1161 { 1162 uint64_t addr = 0; 1163 int rv; 1164 1165 KEY_HASH(addr, vresp->rem_macaddr); 1166 1167 /* 1168 * If the entry being added corresponds to LDC_SERVICE resource, 1169 * that is, vswitch connection, it is added to the hash and also 1170 * the entry is cached, an additional reference count reflects 1171 * this. The HYBRID resource is not added to the hash, but only 1172 * cached, as it is only used for sending out packets for unknown 1173 * unicast destinations. 1174 */ 1175 (vresp->type == VIO_NET_RES_LDC_SERVICE) ? 1176 (vresp->refcnt = 1) : (vresp->refcnt = 0); 1177 1178 /* 1179 * Note: duplicate keys will be rejected by mod_hash. 1180 */ 1181 if (vresp->type != VIO_NET_RES_HYBRID) { 1182 rv = mod_hash_insert(vnetp->fdb_hashp, (mod_hash_key_t)addr, 1183 (mod_hash_val_t)vresp); 1184 if (rv != 0) { 1185 DWARN(vnetp, "Duplicate macaddr key(%lx)\n", addr); 1186 return; 1187 } 1188 } 1189 1190 if (vresp->type == VIO_NET_RES_LDC_SERVICE) { 1191 /* Cache the fdb entry to vsw-port */ 1192 WRITE_ENTER(&vnetp->vsw_fp_rw); 1193 if (vnetp->vsw_fp == NULL) 1194 vnetp->vsw_fp = vresp; 1195 RW_EXIT(&vnetp->vsw_fp_rw); 1196 } else if (vresp->type == VIO_NET_RES_HYBRID) { 1197 /* Cache the fdb entry to hybrid resource */ 1198 WRITE_ENTER(&vnetp->vsw_fp_rw); 1199 if (vnetp->hio_fp == NULL) 1200 vnetp->hio_fp = vresp; 1201 RW_EXIT(&vnetp->vsw_fp_rw); 1202 } 1203 } 1204 1205 /* 1206 * Remove an entry from fdb. 1207 */ 1208 static void 1209 vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp) 1210 { 1211 uint64_t addr = 0; 1212 int rv; 1213 uint32_t refcnt; 1214 vnet_res_t *tmp; 1215 1216 KEY_HASH(addr, vresp->rem_macaddr); 1217 1218 /* 1219 * Remove the entry from fdb hash table. 1220 * This prevents further references to this fdb entry. 1221 */ 1222 if (vresp->type != VIO_NET_RES_HYBRID) { 1223 rv = mod_hash_remove(vnetp->fdb_hashp, (mod_hash_key_t)addr, 1224 (mod_hash_val_t *)&tmp); 1225 if (rv != 0) { 1226 /* 1227 * As the resources are added to the hash only 1228 * after they are started, this can occur if 1229 * a resource unregisters before it is ever started. 1230 */ 1231 return; 1232 } 1233 } 1234 1235 if (vresp->type == VIO_NET_RES_LDC_SERVICE) { 1236 WRITE_ENTER(&vnetp->vsw_fp_rw); 1237 1238 ASSERT(tmp == vnetp->vsw_fp); 1239 vnetp->vsw_fp = NULL; 1240 1241 RW_EXIT(&vnetp->vsw_fp_rw); 1242 } else if (vresp->type == VIO_NET_RES_HYBRID) { 1243 WRITE_ENTER(&vnetp->vsw_fp_rw); 1244 1245 vnetp->hio_fp = NULL; 1246 1247 RW_EXIT(&vnetp->vsw_fp_rw); 1248 } 1249 1250 /* 1251 * If there are threads already ref holding before the entry was 1252 * removed from hash table, then wait for ref count to drop to zero. 1253 */ 1254 (vresp->type == VIO_NET_RES_LDC_SERVICE) ? 1255 (refcnt = 1) : (refcnt = 0); 1256 while (vresp->refcnt > refcnt) { 1257 delay(drv_usectohz(vnet_fdbe_refcnt_delay)); 1258 } 1259 } 1260 1261 /* 1262 * Search fdb for a given mac address. If an entry is found, hold 1263 * a reference to it and return the entry; else returns NULL. 1264 */ 1265 static vnet_res_t * 1266 vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp) 1267 { 1268 uint64_t key = 0; 1269 vnet_res_t *vresp; 1270 int rv; 1271 1272 KEY_HASH(key, addrp->ether_addr_octet); 1273 1274 rv = mod_hash_find_cb(vnetp->fdb_hashp, (mod_hash_key_t)key, 1275 (mod_hash_val_t *)&vresp, vnet_fdbe_find_cb); 1276 1277 if (rv != 0) 1278 return (NULL); 1279 1280 return (vresp); 1281 } 1282 1283 /* 1284 * Callback function provided to mod_hash_find_cb(). After finding the fdb 1285 * entry corresponding to the key (macaddr), this callback will be invoked by 1286 * mod_hash_find_cb() to atomically increment the reference count on the fdb 1287 * entry before returning the found entry. 1288 */ 1289 static void 1290 vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val) 1291 { 1292 _NOTE(ARGUNUSED(key)) 1293 VNET_FDBE_REFHOLD((vnet_res_t *)val); 1294 } 1295 1296 /* 1297 * Frames received that are tagged with the pvid of the vnet device must be 1298 * untagged before sending up the stack. This function walks the chain of rx 1299 * frames, untags any such frames and returns the updated chain. 1300 * 1301 * Arguments: 1302 * pvid: pvid of the vnet device for which packets are being received 1303 * mp: head of pkt chain to be validated and untagged 1304 * 1305 * Returns: 1306 * mp: head of updated chain of packets 1307 */ 1308 static void 1309 vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp) 1310 { 1311 struct ether_vlan_header *evhp; 1312 mblk_t *bp; 1313 mblk_t *bpt; 1314 mblk_t *bph; 1315 mblk_t *bpn; 1316 1317 bpn = bph = bpt = NULL; 1318 1319 for (bp = *mp; bp != NULL; bp = bpn) { 1320 1321 bpn = bp->b_next; 1322 bp->b_next = bp->b_prev = NULL; 1323 1324 evhp = (struct ether_vlan_header *)bp->b_rptr; 1325 1326 if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN && 1327 VLAN_ID(ntohs(evhp->ether_tci)) == pvid) { 1328 1329 bp = vnet_vlan_remove_tag(bp); 1330 if (bp == NULL) { 1331 continue; 1332 } 1333 1334 } 1335 1336 /* build a chain of processed packets */ 1337 if (bph == NULL) { 1338 bph = bpt = bp; 1339 } else { 1340 bpt->b_next = bp; 1341 bpt = bp; 1342 } 1343 1344 } 1345 1346 *mp = bph; 1347 } 1348 1349 static void 1350 vnet_rx(vio_net_handle_t vrh, mblk_t *mp) 1351 { 1352 vnet_res_t *vresp = (vnet_res_t *)vrh; 1353 vnet_t *vnetp = vresp->vnetp; 1354 vnet_pseudo_rx_ring_t *ringp; 1355 1356 if ((vnetp == NULL) || (vnetp->mh == 0)) { 1357 freemsgchain(mp); 1358 return; 1359 } 1360 1361 ringp = vresp->rx_ringp; 1362 mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num); 1363 } 1364 1365 void 1366 vnet_tx_update(vio_net_handle_t vrh) 1367 { 1368 vnet_res_t *vresp = (vnet_res_t *)vrh; 1369 vnet_t *vnetp = vresp->vnetp; 1370 vnet_pseudo_tx_ring_t *tx_ringp; 1371 vnet_pseudo_tx_group_t *tx_grp; 1372 int i; 1373 1374 if (vnetp == NULL || vnetp->mh == NULL) { 1375 return; 1376 } 1377 1378 /* 1379 * Currently, the tx hwring API (used to access rings that belong to 1380 * a Hybrid IO resource) does not provide us a per ring flow ctrl 1381 * update; also the pseudo rings are shared by the ports/ldcs in the 1382 * vgen layer. Thus we can't figure out which pseudo ring is being 1383 * re-enabled for transmits. To work around this, when we get a tx 1384 * restart notification from below, we simply propagate that to all 1385 * the tx pseudo rings registered with the mac layer above. 1386 * 1387 * There are a couple of side effects with this approach, but they are 1388 * not harmful, as outlined below: 1389 * 1390 * A) We might send an invalid ring_update() for a ring that is not 1391 * really flow controlled. This will not have any effect in the mac 1392 * layer and packets will continue to be transmitted on that ring. 1393 * 1394 * B) We might end up clearing the flow control in the mac layer for 1395 * a ring that is still flow controlled in the underlying resource. 1396 * This will result in the mac layer restarting transmit, only to be 1397 * flow controlled again on that ring. 1398 */ 1399 tx_grp = &vnetp->tx_grp[0]; 1400 for (i = 0; i < tx_grp->ring_cnt; i++) { 1401 tx_ringp = &tx_grp->rings[i]; 1402 mac_tx_ring_update(vnetp->mh, tx_ringp->handle); 1403 } 1404 } 1405 1406 /* 1407 * Update the new mtu of vnet into the mac layer. First check if the device has 1408 * been plumbed and if so fail the mtu update. Returns 0 on success. 1409 */ 1410 int 1411 vnet_mtu_update(vnet_t *vnetp, uint32_t mtu) 1412 { 1413 int rv; 1414 1415 if (vnetp == NULL || vnetp->mh == NULL) { 1416 return (EINVAL); 1417 } 1418 1419 WRITE_ENTER(&vnetp->vrwlock); 1420 1421 if (vnetp->flags & VNET_STARTED) { 1422 RW_EXIT(&vnetp->vrwlock); 1423 cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu " 1424 "update as the device is plumbed\n", 1425 vnetp->instance); 1426 return (EBUSY); 1427 } 1428 1429 /* update mtu in the mac layer */ 1430 rv = mac_maxsdu_update(vnetp->mh, mtu); 1431 if (rv != 0) { 1432 RW_EXIT(&vnetp->vrwlock); 1433 cmn_err(CE_NOTE, 1434 "!vnet%d: Unable to update mtu with mac layer\n", 1435 vnetp->instance); 1436 return (EIO); 1437 } 1438 1439 vnetp->mtu = mtu; 1440 1441 RW_EXIT(&vnetp->vrwlock); 1442 1443 return (0); 1444 } 1445 1446 /* 1447 * Update the link state of vnet to the mac layer. 1448 */ 1449 void 1450 vnet_link_update(vnet_t *vnetp, link_state_t link_state) 1451 { 1452 if (vnetp == NULL || vnetp->mh == NULL) { 1453 return; 1454 } 1455 1456 WRITE_ENTER(&vnetp->vrwlock); 1457 if (vnetp->link_state == link_state) { 1458 RW_EXIT(&vnetp->vrwlock); 1459 return; 1460 } 1461 vnetp->link_state = link_state; 1462 RW_EXIT(&vnetp->vrwlock); 1463 1464 mac_link_update(vnetp->mh, link_state); 1465 } 1466 1467 /* 1468 * vio_net_resource_reg -- An interface called to register a resource 1469 * with vnet. 1470 * macp -- a GLDv3 mac_register that has all the details of 1471 * a resource and its callbacks etc. 1472 * type -- resource type. 1473 * local_macaddr -- resource's MAC address. This is used to 1474 * associate a resource with a corresponding vnet. 1475 * remote_macaddr -- remote side MAC address. This is ignored for 1476 * the Hybrid resources. 1477 * vhp -- A handle returned to the caller. 1478 * vcb -- A set of callbacks provided to the callers. 1479 */ 1480 int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type, 1481 ether_addr_t local_macaddr, ether_addr_t rem_macaddr, vio_net_handle_t *vhp, 1482 vio_net_callbacks_t *vcb) 1483 { 1484 vnet_t *vnetp; 1485 vnet_res_t *vresp; 1486 1487 vresp = kmem_zalloc(sizeof (vnet_res_t), KM_SLEEP); 1488 ether_copy(local_macaddr, vresp->local_macaddr); 1489 ether_copy(rem_macaddr, vresp->rem_macaddr); 1490 vresp->type = type; 1491 bcopy(macp, &vresp->macreg, sizeof (mac_register_t)); 1492 1493 DBG1(NULL, "Resource Registerig type=0%X\n", type); 1494 1495 READ_ENTER(&vnet_rw); 1496 vnetp = vnet_headp; 1497 while (vnetp != NULL) { 1498 if (VNET_MATCH_RES(vresp, vnetp)) { 1499 vresp->vnetp = vnetp; 1500 1501 /* Setup kstats for hio resource */ 1502 if (vresp->type == VIO_NET_RES_HYBRID) { 1503 vresp->ksp = vnet_hio_setup_kstats(DRV_NAME, 1504 "hio", vresp); 1505 if (vresp->ksp == NULL) { 1506 cmn_err(CE_NOTE, "!vnet%d: Cannot " 1507 "create kstats for hio resource", 1508 vnetp->instance); 1509 } 1510 } 1511 vnet_add_resource(vnetp, vresp); 1512 break; 1513 } 1514 vnetp = vnetp->nextp; 1515 } 1516 RW_EXIT(&vnet_rw); 1517 if (vresp->vnetp == NULL) { 1518 DWARN(NULL, "No vnet instance"); 1519 kmem_free(vresp, sizeof (vnet_res_t)); 1520 return (ENXIO); 1521 } 1522 1523 *vhp = vresp; 1524 vcb->vio_net_rx_cb = vnet_rx; 1525 vcb->vio_net_tx_update = vnet_tx_update; 1526 vcb->vio_net_report_err = vnet_handle_res_err; 1527 1528 /* Bind the resource to pseudo ring(s) */ 1529 if (vnet_bind_rings(vresp) != 0) { 1530 (void) vnet_rem_resource(vnetp, vresp); 1531 vnet_hio_destroy_kstats(vresp->ksp); 1532 KMEM_FREE(vresp); 1533 return (1); 1534 } 1535 1536 /* Dispatch a task to start resources */ 1537 vnet_dispatch_res_task(vnetp); 1538 return (0); 1539 } 1540 1541 /* 1542 * vio_net_resource_unreg -- An interface to unregister a resource. 1543 */ 1544 void 1545 vio_net_resource_unreg(vio_net_handle_t vhp) 1546 { 1547 vnet_res_t *vresp = (vnet_res_t *)vhp; 1548 vnet_t *vnetp = vresp->vnetp; 1549 1550 DBG1(NULL, "Resource Registerig hdl=0x%p", vhp); 1551 1552 ASSERT(vnetp != NULL); 1553 /* 1554 * Remove the resource from fdb; this ensures 1555 * there are no references to the resource. 1556 */ 1557 vnet_fdbe_del(vnetp, vresp); 1558 1559 vnet_unbind_rings(vresp); 1560 1561 /* Now remove the resource from the list */ 1562 (void) vnet_rem_resource(vnetp, vresp); 1563 1564 vnet_hio_destroy_kstats(vresp->ksp); 1565 KMEM_FREE(vresp); 1566 } 1567 1568 static void 1569 vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp) 1570 { 1571 WRITE_ENTER(&vnetp->vrwlock); 1572 vresp->nextp = vnetp->vres_list; 1573 vnetp->vres_list = vresp; 1574 RW_EXIT(&vnetp->vrwlock); 1575 } 1576 1577 static vnet_res_t * 1578 vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp) 1579 { 1580 vnet_res_t *vrp; 1581 1582 WRITE_ENTER(&vnetp->vrwlock); 1583 if (vresp == vnetp->vres_list) { 1584 vnetp->vres_list = vresp->nextp; 1585 } else { 1586 vrp = vnetp->vres_list; 1587 while (vrp->nextp != NULL) { 1588 if (vrp->nextp == vresp) { 1589 vrp->nextp = vresp->nextp; 1590 break; 1591 } 1592 vrp = vrp->nextp; 1593 } 1594 } 1595 vresp->vnetp = NULL; 1596 vresp->nextp = NULL; 1597 1598 RW_EXIT(&vnetp->vrwlock); 1599 1600 return (vresp); 1601 } 1602 1603 /* 1604 * vnet_dds_rx -- an interface called by vgen to DDS messages. 1605 */ 1606 void 1607 vnet_dds_rx(void *arg, void *dmsg) 1608 { 1609 vnet_t *vnetp = arg; 1610 vdds_process_dds_msg(vnetp, dmsg); 1611 } 1612 1613 /* 1614 * vnet_send_dds_msg -- An interface provided to DDS to send 1615 * DDS messages. This simply sends meessages via vgen. 1616 */ 1617 int 1618 vnet_send_dds_msg(vnet_t *vnetp, void *dmsg) 1619 { 1620 int rv; 1621 1622 if (vnetp->vgenhdl != NULL) { 1623 rv = vgen_dds_tx(vnetp->vgenhdl, dmsg); 1624 } 1625 return (rv); 1626 } 1627 1628 /* 1629 * vnet_cleanup_hio -- an interface called by vgen to cleanup hio resources. 1630 */ 1631 void 1632 vnet_dds_cleanup_hio(vnet_t *vnetp) 1633 { 1634 vdds_cleanup_hio(vnetp); 1635 } 1636 1637 /* 1638 * vnet_handle_res_err -- A callback function called by a resource 1639 * to report an error. For example, vgen can call to report 1640 * an LDC down/reset event. This will trigger cleanup of associated 1641 * Hybrid resource. 1642 */ 1643 /* ARGSUSED */ 1644 static void 1645 vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err) 1646 { 1647 vnet_res_t *vresp = (vnet_res_t *)vrh; 1648 vnet_t *vnetp = vresp->vnetp; 1649 1650 if (vnetp == NULL) { 1651 return; 1652 } 1653 if ((vresp->type != VIO_NET_RES_LDC_SERVICE) && 1654 (vresp->type != VIO_NET_RES_HYBRID)) { 1655 return; 1656 } 1657 1658 vdds_cleanup_hio(vnetp); 1659 } 1660 1661 /* 1662 * vnet_dispatch_res_task -- A function to dispatch tasks start resources. 1663 */ 1664 static void 1665 vnet_dispatch_res_task(vnet_t *vnetp) 1666 { 1667 int rv; 1668 1669 /* 1670 * Dispatch the task. It could be the case that vnetp->flags does 1671 * not have VNET_STARTED set. This is ok as vnet_rest_start_task() 1672 * can abort the task when the task is started. See related comments 1673 * in vnet_m_stop() and vnet_stop_resources(). 1674 */ 1675 rv = ddi_taskq_dispatch(vnetp->taskqp, vnet_res_start_task, 1676 vnetp, DDI_NOSLEEP); 1677 if (rv != DDI_SUCCESS) { 1678 cmn_err(CE_WARN, 1679 "vnet%d:Can't dispatch start resource task", 1680 vnetp->instance); 1681 } 1682 } 1683 1684 /* 1685 * vnet_res_start_task -- A taskq callback function that starts a resource. 1686 */ 1687 static void 1688 vnet_res_start_task(void *arg) 1689 { 1690 vnet_t *vnetp = arg; 1691 1692 WRITE_ENTER(&vnetp->vrwlock); 1693 if (vnetp->flags & VNET_STARTED) { 1694 vnet_start_resources(vnetp); 1695 } 1696 RW_EXIT(&vnetp->vrwlock); 1697 } 1698 1699 /* 1700 * vnet_start_resources -- starts all resources associated with 1701 * a vnet. 1702 */ 1703 static void 1704 vnet_start_resources(vnet_t *vnetp) 1705 { 1706 mac_register_t *macp; 1707 mac_callbacks_t *cbp; 1708 vnet_res_t *vresp; 1709 int rv; 1710 1711 DBG1(vnetp, "enter\n"); 1712 1713 ASSERT(RW_WRITE_HELD(&vnetp->vrwlock)); 1714 1715 for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) { 1716 /* skip if it is already started */ 1717 if (vresp->flags & VNET_STARTED) { 1718 continue; 1719 } 1720 macp = &vresp->macreg; 1721 cbp = macp->m_callbacks; 1722 rv = cbp->mc_start(macp->m_driver); 1723 if (rv == 0) { 1724 /* 1725 * Successfully started the resource, so now 1726 * add it to the fdb. 1727 */ 1728 vresp->flags |= VNET_STARTED; 1729 vnet_fdbe_add(vnetp, vresp); 1730 } 1731 } 1732 1733 DBG1(vnetp, "exit\n"); 1734 1735 } 1736 1737 /* 1738 * vnet_stop_resources -- stop all resources associated with a vnet. 1739 */ 1740 static void 1741 vnet_stop_resources(vnet_t *vnetp) 1742 { 1743 vnet_res_t *vresp; 1744 mac_register_t *macp; 1745 mac_callbacks_t *cbp; 1746 1747 DBG1(vnetp, "enter\n"); 1748 1749 ASSERT(RW_WRITE_HELD(&vnetp->vrwlock)); 1750 1751 for (vresp = vnetp->vres_list; vresp != NULL; ) { 1752 if (vresp->flags & VNET_STARTED) { 1753 /* 1754 * Release the lock while invoking mc_stop() of the 1755 * underlying resource. We hold a reference to this 1756 * resource to prevent being removed from the list in 1757 * vio_net_resource_unreg(). Note that new resources 1758 * can be added to the head of the list while the lock 1759 * is released, but they won't be started, as 1760 * VNET_STARTED flag has been cleared for the vnet 1761 * device in vnet_m_stop(). Also, while the lock is 1762 * released a resource could be removed from the list 1763 * in vio_net_resource_unreg(); but that is ok, as we 1764 * re-acquire the lock and only then access the forward 1765 * link (vresp->nextp) to continue with the next 1766 * resource. 1767 */ 1768 vresp->flags &= ~VNET_STARTED; 1769 vresp->flags |= VNET_STOPPING; 1770 macp = &vresp->macreg; 1771 cbp = macp->m_callbacks; 1772 VNET_FDBE_REFHOLD(vresp); 1773 RW_EXIT(&vnetp->vrwlock); 1774 1775 cbp->mc_stop(macp->m_driver); 1776 1777 WRITE_ENTER(&vnetp->vrwlock); 1778 vresp->flags &= ~VNET_STOPPING; 1779 VNET_FDBE_REFRELE(vresp); 1780 } 1781 vresp = vresp->nextp; 1782 } 1783 DBG1(vnetp, "exit\n"); 1784 } 1785 1786 /* 1787 * Setup kstats for the HIO statistics. 1788 * NOTE: the synchronization for the statistics is the 1789 * responsibility of the caller. 1790 */ 1791 kstat_t * 1792 vnet_hio_setup_kstats(char *ks_mod, char *ks_name, vnet_res_t *vresp) 1793 { 1794 kstat_t *ksp; 1795 vnet_t *vnetp = vresp->vnetp; 1796 vnet_hio_kstats_t *hiokp; 1797 size_t size; 1798 1799 ASSERT(vnetp != NULL); 1800 size = sizeof (vnet_hio_kstats_t) / sizeof (kstat_named_t); 1801 ksp = kstat_create(ks_mod, vnetp->instance, ks_name, "net", 1802 KSTAT_TYPE_NAMED, size, 0); 1803 if (ksp == NULL) { 1804 return (NULL); 1805 } 1806 1807 hiokp = (vnet_hio_kstats_t *)ksp->ks_data; 1808 kstat_named_init(&hiokp->ipackets, "ipackets", 1809 KSTAT_DATA_ULONG); 1810 kstat_named_init(&hiokp->ierrors, "ierrors", 1811 KSTAT_DATA_ULONG); 1812 kstat_named_init(&hiokp->opackets, "opackets", 1813 KSTAT_DATA_ULONG); 1814 kstat_named_init(&hiokp->oerrors, "oerrors", 1815 KSTAT_DATA_ULONG); 1816 1817 1818 /* MIB II kstat variables */ 1819 kstat_named_init(&hiokp->rbytes, "rbytes", 1820 KSTAT_DATA_ULONG); 1821 kstat_named_init(&hiokp->obytes, "obytes", 1822 KSTAT_DATA_ULONG); 1823 kstat_named_init(&hiokp->multircv, "multircv", 1824 KSTAT_DATA_ULONG); 1825 kstat_named_init(&hiokp->multixmt, "multixmt", 1826 KSTAT_DATA_ULONG); 1827 kstat_named_init(&hiokp->brdcstrcv, "brdcstrcv", 1828 KSTAT_DATA_ULONG); 1829 kstat_named_init(&hiokp->brdcstxmt, "brdcstxmt", 1830 KSTAT_DATA_ULONG); 1831 kstat_named_init(&hiokp->norcvbuf, "norcvbuf", 1832 KSTAT_DATA_ULONG); 1833 kstat_named_init(&hiokp->noxmtbuf, "noxmtbuf", 1834 KSTAT_DATA_ULONG); 1835 1836 ksp->ks_update = vnet_hio_update_kstats; 1837 ksp->ks_private = (void *)vresp; 1838 kstat_install(ksp); 1839 return (ksp); 1840 } 1841 1842 /* 1843 * Destroy kstats. 1844 */ 1845 static void 1846 vnet_hio_destroy_kstats(kstat_t *ksp) 1847 { 1848 if (ksp != NULL) 1849 kstat_delete(ksp); 1850 } 1851 1852 /* 1853 * Update the kstats. 1854 */ 1855 static int 1856 vnet_hio_update_kstats(kstat_t *ksp, int rw) 1857 { 1858 vnet_t *vnetp; 1859 vnet_res_t *vresp; 1860 vnet_hio_stats_t statsp; 1861 vnet_hio_kstats_t *hiokp; 1862 1863 vresp = (vnet_res_t *)ksp->ks_private; 1864 vnetp = vresp->vnetp; 1865 1866 bzero(&statsp, sizeof (vnet_hio_stats_t)); 1867 1868 READ_ENTER(&vnetp->vsw_fp_rw); 1869 if (vnetp->hio_fp == NULL) { 1870 /* not using hio resources, just return */ 1871 RW_EXIT(&vnetp->vsw_fp_rw); 1872 return (0); 1873 } 1874 VNET_FDBE_REFHOLD(vnetp->hio_fp); 1875 RW_EXIT(&vnetp->vsw_fp_rw); 1876 vnet_hio_get_stats(vnetp->hio_fp, &statsp); 1877 VNET_FDBE_REFRELE(vnetp->hio_fp); 1878 1879 hiokp = (vnet_hio_kstats_t *)ksp->ks_data; 1880 1881 if (rw == KSTAT_READ) { 1882 /* Link Input/Output stats */ 1883 hiokp->ipackets.value.ul = (uint32_t)statsp.ipackets; 1884 hiokp->ipackets64.value.ull = statsp.ipackets; 1885 hiokp->ierrors.value.ul = statsp.ierrors; 1886 hiokp->opackets.value.ul = (uint32_t)statsp.opackets; 1887 hiokp->opackets64.value.ull = statsp.opackets; 1888 hiokp->oerrors.value.ul = statsp.oerrors; 1889 1890 /* MIB II kstat variables */ 1891 hiokp->rbytes.value.ul = (uint32_t)statsp.rbytes; 1892 hiokp->rbytes64.value.ull = statsp.rbytes; 1893 hiokp->obytes.value.ul = (uint32_t)statsp.obytes; 1894 hiokp->obytes64.value.ull = statsp.obytes; 1895 hiokp->multircv.value.ul = statsp.multircv; 1896 hiokp->multixmt.value.ul = statsp.multixmt; 1897 hiokp->brdcstrcv.value.ul = statsp.brdcstrcv; 1898 hiokp->brdcstxmt.value.ul = statsp.brdcstxmt; 1899 hiokp->norcvbuf.value.ul = statsp.norcvbuf; 1900 hiokp->noxmtbuf.value.ul = statsp.noxmtbuf; 1901 } else { 1902 return (EACCES); 1903 } 1904 1905 return (0); 1906 } 1907 1908 static void 1909 vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp) 1910 { 1911 mac_register_t *macp; 1912 mac_callbacks_t *cbp; 1913 uint64_t val; 1914 int stat; 1915 1916 /* 1917 * get the specified statistics from the underlying nxge. 1918 */ 1919 macp = &vresp->macreg; 1920 cbp = macp->m_callbacks; 1921 for (stat = MAC_STAT_MIN; stat < MAC_STAT_OVERFLOWS; stat++) { 1922 if (cbp->mc_getstat(macp->m_driver, stat, &val) == 0) { 1923 switch (stat) { 1924 case MAC_STAT_IPACKETS: 1925 statsp->ipackets = val; 1926 break; 1927 1928 case MAC_STAT_IERRORS: 1929 statsp->ierrors = val; 1930 break; 1931 1932 case MAC_STAT_OPACKETS: 1933 statsp->opackets = val; 1934 break; 1935 1936 case MAC_STAT_OERRORS: 1937 statsp->oerrors = val; 1938 break; 1939 1940 case MAC_STAT_RBYTES: 1941 statsp->rbytes = val; 1942 break; 1943 1944 case MAC_STAT_OBYTES: 1945 statsp->obytes = val; 1946 break; 1947 1948 case MAC_STAT_MULTIRCV: 1949 statsp->multircv = val; 1950 break; 1951 1952 case MAC_STAT_MULTIXMT: 1953 statsp->multixmt = val; 1954 break; 1955 1956 case MAC_STAT_BRDCSTRCV: 1957 statsp->brdcstrcv = val; 1958 break; 1959 1960 case MAC_STAT_BRDCSTXMT: 1961 statsp->brdcstxmt = val; 1962 break; 1963 1964 case MAC_STAT_NOXMTBUF: 1965 statsp->noxmtbuf = val; 1966 break; 1967 1968 case MAC_STAT_NORCVBUF: 1969 statsp->norcvbuf = val; 1970 break; 1971 1972 default: 1973 /* 1974 * parameters not interested. 1975 */ 1976 break; 1977 } 1978 } 1979 } 1980 } 1981 1982 static boolean_t 1983 vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data) 1984 { 1985 vnet_t *vnetp = (vnet_t *)arg; 1986 1987 if (vnetp == NULL) { 1988 return (0); 1989 } 1990 1991 switch (cap) { 1992 1993 case MAC_CAPAB_RINGS: { 1994 1995 mac_capab_rings_t *cap_rings = cap_data; 1996 /* 1997 * Rings Capability Notes: 1998 * We advertise rings to make use of the rings framework in 1999 * gldv3 mac layer, to improve the performance. This is 2000 * specifically needed when a Hybrid resource (with multiple 2001 * tx/rx hardware rings) is assigned to a vnet device. We also 2002 * leverage this for the normal case when no Hybrid resource is 2003 * assigned. 2004 * 2005 * Ring Allocation: 2006 * - TX path: 2007 * We expose a pseudo ring group with 2 pseudo tx rings (as 2008 * currently HybridIO exports only 2 rings) In the normal case, 2009 * transmit traffic that comes down to the driver through the 2010 * mri_tx (vnet_tx_ring_send()) entry point goes through the 2011 * distributed switching algorithm in vnet and gets transmitted 2012 * over a port/LDC in the vgen layer to either the vswitch or a 2013 * peer vnet. If and when a Hybrid resource is assigned to the 2014 * vnet, we obtain the tx ring information of the Hybrid device 2015 * (nxge) and map the pseudo rings 1:1 to the 2 hw tx rings. 2016 * Traffic being sent over the Hybrid resource by the mac layer 2017 * gets spread across both hw rings, as they are mapped to the 2018 * 2 pseudo tx rings in vnet. 2019 * 2020 * - RX path: 2021 * We expose a pseudo ring group with 3 pseudo rx rings (static 2022 * rings) initially. The first (default) pseudo rx ring is 2023 * reserved for the resource that connects to the vswitch 2024 * service. The next 2 rings are reserved for a Hybrid resource 2025 * that may be assigned to the vnet device. If and when a 2026 * Hybrid resource is assigned to the vnet, we obtain the rx 2027 * ring information of the Hybrid device (nxge) and map these 2028 * pseudo rings 1:1 to the 2 hw rx rings. For each additional 2029 * resource that connects to a peer vnet, we dynamically 2030 * allocate a pseudo rx ring and map it to that resource, when 2031 * the resource gets added; and the pseudo rx ring is 2032 * dynamically registered with the upper mac layer. We do the 2033 * reverse and unregister the ring with the mac layer when 2034 * the resource gets removed. 2035 * 2036 * Synchronization notes: 2037 * We don't need any lock to protect members of ring structure, 2038 * specifically ringp->hw_rh, in either the TX or the RX ring, 2039 * as explained below. 2040 * - TX ring: 2041 * ring->hw_rh is initialized only when a Hybrid resource is 2042 * associated; and gets referenced only in vnet_hio_tx(). The 2043 * Hybrid resource itself is available in fdb only after tx 2044 * hwrings are found and mapped; i.e, in vio_net_resource_reg() 2045 * we call vnet_bind_rings() first and then call 2046 * vnet_start_resources() which adds an entry to fdb. For 2047 * traffic going over LDC resources, we don't reference 2048 * ring->hw_rh at all. 2049 * - RX ring: 2050 * For rings mapped to Hybrid resource ring->hw_rh is 2051 * initialized and only then do we add the rx callback for 2052 * the underlying Hybrid resource; we disable callbacks before 2053 * we unmap ring->hw_rh. For rings mapped to LDC resources, we 2054 * stop the rx callbacks (in vgen) before we remove ring->hw_rh 2055 * (vio_net_resource_unreg()). 2056 */ 2057 2058 if (cap_rings->mr_type == MAC_RING_TYPE_RX) { 2059 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 2060 2061 /* 2062 * The ring_cnt for rx grp is initialized in 2063 * vnet_ring_grp_init(). Later, the ring_cnt gets 2064 * updated dynamically whenever LDC resources are added 2065 * or removed. 2066 */ 2067 cap_rings->mr_rnum = vnetp->rx_grp[0].ring_cnt; 2068 cap_rings->mr_rget = vnet_get_ring; 2069 2070 cap_rings->mr_gnum = VNET_NUM_PSEUDO_GROUPS; 2071 cap_rings->mr_gget = vnet_get_group; 2072 cap_rings->mr_gaddring = NULL; 2073 cap_rings->mr_gremring = NULL; 2074 } else { 2075 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 2076 2077 /* 2078 * The ring_cnt for tx grp is initialized in 2079 * vnet_ring_grp_init() and remains constant, as we 2080 * do not support dymanic tx rings for now. 2081 */ 2082 cap_rings->mr_rnum = vnetp->tx_grp[0].ring_cnt; 2083 cap_rings->mr_rget = vnet_get_ring; 2084 2085 /* 2086 * Transmit rings are not grouped; i.e, the number of 2087 * transmit ring groups advertised should be set to 0. 2088 */ 2089 cap_rings->mr_gnum = 0; 2090 2091 cap_rings->mr_gget = vnet_get_group; 2092 cap_rings->mr_gaddring = NULL; 2093 cap_rings->mr_gremring = NULL; 2094 } 2095 return (B_TRUE); 2096 2097 } 2098 2099 default: 2100 break; 2101 2102 } 2103 2104 return (B_FALSE); 2105 } 2106 2107 /* 2108 * Callback funtion for MAC layer to get ring information. 2109 */ 2110 static void 2111 vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index, 2112 const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle) 2113 { 2114 vnet_t *vnetp = arg; 2115 2116 switch (rtype) { 2117 2118 case MAC_RING_TYPE_RX: { 2119 2120 vnet_pseudo_rx_group_t *rx_grp; 2121 vnet_pseudo_rx_ring_t *rx_ringp; 2122 mac_intr_t *mintr; 2123 2124 /* We advertised only one RX group */ 2125 ASSERT(g_index == 0); 2126 rx_grp = &vnetp->rx_grp[g_index]; 2127 2128 /* Check the current # of rings in the rx group */ 2129 ASSERT((r_index >= 0) && (r_index < rx_grp->max_ring_cnt)); 2130 2131 /* Get the ring based on the index */ 2132 rx_ringp = &rx_grp->rings[r_index]; 2133 2134 rx_ringp->handle = r_handle; 2135 /* 2136 * Note: we don't need to save the incoming r_index in rx_ring, 2137 * as vnet_ring_grp_init() would have initialized the index for 2138 * each ring in the array. 2139 */ 2140 rx_ringp->grp = rx_grp; 2141 rx_ringp->vnetp = vnetp; 2142 2143 mintr = &infop->mri_intr; 2144 mintr->mi_handle = (mac_intr_handle_t)rx_ringp; 2145 mintr->mi_enable = (mac_intr_enable_t)vnet_ring_enable_intr; 2146 mintr->mi_disable = (mac_intr_disable_t)vnet_ring_disable_intr; 2147 2148 infop->mri_driver = (mac_ring_driver_t)rx_ringp; 2149 infop->mri_start = vnet_rx_ring_start; 2150 infop->mri_stop = vnet_rx_ring_stop; 2151 2152 /* Set the poll function, as this is an rx ring */ 2153 infop->mri_poll = vnet_rx_poll; 2154 2155 break; 2156 } 2157 2158 case MAC_RING_TYPE_TX: { 2159 vnet_pseudo_tx_group_t *tx_grp; 2160 vnet_pseudo_tx_ring_t *tx_ringp; 2161 2162 /* 2163 * No need to check grp index; mac layer passes -1 for it. 2164 */ 2165 tx_grp = &vnetp->tx_grp[0]; 2166 2167 /* Check the # of rings in the tx group */ 2168 ASSERT((r_index >= 0) && (r_index < tx_grp->ring_cnt)); 2169 2170 /* Get the ring based on the index */ 2171 tx_ringp = &tx_grp->rings[r_index]; 2172 2173 tx_ringp->handle = r_handle; 2174 tx_ringp->index = r_index; 2175 tx_ringp->grp = tx_grp; 2176 tx_ringp->vnetp = vnetp; 2177 2178 infop->mri_driver = (mac_ring_driver_t)tx_ringp; 2179 infop->mri_start = vnet_tx_ring_start; 2180 infop->mri_stop = vnet_tx_ring_stop; 2181 2182 /* Set the transmit function, as this is a tx ring */ 2183 infop->mri_tx = vnet_tx_ring_send; 2184 2185 break; 2186 } 2187 2188 default: 2189 break; 2190 } 2191 } 2192 2193 /* 2194 * Callback funtion for MAC layer to get group information. 2195 */ 2196 static void 2197 vnet_get_group(void *arg, mac_ring_type_t type, const int index, 2198 mac_group_info_t *infop, mac_group_handle_t handle) 2199 { 2200 vnet_t *vnetp = (vnet_t *)arg; 2201 2202 switch (type) { 2203 2204 case MAC_RING_TYPE_RX: 2205 { 2206 vnet_pseudo_rx_group_t *rx_grp; 2207 2208 /* We advertised only one RX group */ 2209 ASSERT(index == 0); 2210 2211 rx_grp = &vnetp->rx_grp[index]; 2212 rx_grp->handle = handle; 2213 rx_grp->index = index; 2214 rx_grp->vnetp = vnetp; 2215 2216 infop->mgi_driver = (mac_group_driver_t)rx_grp; 2217 infop->mgi_start = NULL; 2218 infop->mgi_stop = NULL; 2219 infop->mgi_addmac = vnet_addmac; 2220 infop->mgi_remmac = vnet_remmac; 2221 infop->mgi_count = rx_grp->ring_cnt; 2222 2223 break; 2224 } 2225 2226 case MAC_RING_TYPE_TX: 2227 { 2228 vnet_pseudo_tx_group_t *tx_grp; 2229 2230 /* We advertised only one TX group */ 2231 ASSERT(index == 0); 2232 2233 tx_grp = &vnetp->tx_grp[index]; 2234 tx_grp->handle = handle; 2235 tx_grp->index = index; 2236 tx_grp->vnetp = vnetp; 2237 2238 infop->mgi_driver = (mac_group_driver_t)tx_grp; 2239 infop->mgi_start = NULL; 2240 infop->mgi_stop = NULL; 2241 infop->mgi_addmac = NULL; 2242 infop->mgi_remmac = NULL; 2243 infop->mgi_count = VNET_NUM_PSEUDO_TXRINGS; 2244 2245 break; 2246 } 2247 2248 default: 2249 break; 2250 2251 } 2252 } 2253 2254 static int 2255 vnet_rx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num) 2256 { 2257 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; 2258 int err; 2259 2260 /* 2261 * If this ring is mapped to a LDC resource, simply mark the state to 2262 * indicate the ring is started and return. 2263 */ 2264 if ((rx_ringp->state & 2265 (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) { 2266 rx_ringp->gen_num = mr_gen_num; 2267 rx_ringp->state |= VNET_RXRING_STARTED; 2268 return (0); 2269 } 2270 2271 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0); 2272 2273 /* 2274 * This must be a ring reserved for a hwring. If the hwring is not 2275 * bound yet, simply mark the state to indicate the ring is started and 2276 * return. If and when a hybrid resource is activated for this vnet 2277 * device, we will bind the hwring and start it then. If a hwring is 2278 * already bound, start it now. 2279 */ 2280 if (rx_ringp->hw_rh == NULL) { 2281 rx_ringp->gen_num = mr_gen_num; 2282 rx_ringp->state |= VNET_RXRING_STARTED; 2283 return (0); 2284 } 2285 2286 err = mac_hwring_start(rx_ringp->hw_rh); 2287 if (err == 0) { 2288 rx_ringp->gen_num = mr_gen_num; 2289 rx_ringp->state |= VNET_RXRING_STARTED; 2290 } else { 2291 err = ENXIO; 2292 } 2293 2294 return (err); 2295 } 2296 2297 static void 2298 vnet_rx_ring_stop(mac_ring_driver_t arg) 2299 { 2300 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; 2301 2302 /* 2303 * If this ring is mapped to a LDC resource, simply mark the state to 2304 * indicate the ring is now stopped and return. 2305 */ 2306 if ((rx_ringp->state & 2307 (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) { 2308 rx_ringp->state &= ~VNET_RXRING_STARTED; 2309 return; 2310 } 2311 2312 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0); 2313 2314 /* 2315 * This must be a ring reserved for a hwring. If the hwring is not 2316 * bound yet, simply mark the state to indicate the ring is stopped and 2317 * return. If a hwring is already bound, stop it now. 2318 */ 2319 if (rx_ringp->hw_rh == NULL) { 2320 rx_ringp->state &= ~VNET_RXRING_STARTED; 2321 return; 2322 } 2323 2324 mac_hwring_stop(rx_ringp->hw_rh); 2325 rx_ringp->state &= ~VNET_RXRING_STARTED; 2326 } 2327 2328 /* ARGSUSED */ 2329 static int 2330 vnet_tx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num) 2331 { 2332 vnet_pseudo_tx_ring_t *tx_ringp = (vnet_pseudo_tx_ring_t *)arg; 2333 2334 tx_ringp->state |= VNET_TXRING_STARTED; 2335 return (0); 2336 } 2337 2338 static void 2339 vnet_tx_ring_stop(mac_ring_driver_t arg) 2340 { 2341 vnet_pseudo_tx_ring_t *tx_ringp = (vnet_pseudo_tx_ring_t *)arg; 2342 2343 tx_ringp->state &= ~VNET_TXRING_STARTED; 2344 } 2345 2346 /* 2347 * Disable polling for a ring and enable its interrupt. 2348 */ 2349 static int 2350 vnet_ring_enable_intr(void *arg) 2351 { 2352 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; 2353 vnet_res_t *vresp; 2354 2355 if (rx_ringp->hw_rh == NULL) { 2356 /* 2357 * Ring enable intr func is being invoked, but the ring is 2358 * not bound to any underlying resource ? This must be a ring 2359 * reserved for Hybrid resource and no such resource has been 2360 * assigned to this vnet device yet. We simply return success. 2361 */ 2362 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0); 2363 return (0); 2364 } 2365 2366 /* 2367 * The rx ring has been bound to either a LDC or a Hybrid resource. 2368 * Call the appropriate function to enable interrupts for the ring. 2369 */ 2370 if (rx_ringp->state & VNET_RXRING_HYBRID) { 2371 return (mac_hwring_enable_intr(rx_ringp->hw_rh)); 2372 } else { 2373 vresp = (vnet_res_t *)rx_ringp->hw_rh; 2374 return (vgen_enable_intr(vresp->macreg.m_driver)); 2375 } 2376 } 2377 2378 /* 2379 * Enable polling for a ring and disable its interrupt. 2380 */ 2381 static int 2382 vnet_ring_disable_intr(void *arg) 2383 { 2384 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; 2385 vnet_res_t *vresp; 2386 2387 if (rx_ringp->hw_rh == NULL) { 2388 /* 2389 * Ring disable intr func is being invoked, but the ring is 2390 * not bound to any underlying resource ? This must be a ring 2391 * reserved for Hybrid resource and no such resource has been 2392 * assigned to this vnet device yet. We simply return success. 2393 */ 2394 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0); 2395 return (0); 2396 } 2397 2398 /* 2399 * The rx ring has been bound to either a LDC or a Hybrid resource. 2400 * Call the appropriate function to disable interrupts for the ring. 2401 */ 2402 if (rx_ringp->state & VNET_RXRING_HYBRID) { 2403 return (mac_hwring_disable_intr(rx_ringp->hw_rh)); 2404 } else { 2405 vresp = (vnet_res_t *)rx_ringp->hw_rh; 2406 return (vgen_disable_intr(vresp->macreg.m_driver)); 2407 } 2408 } 2409 2410 /* 2411 * Poll 'bytes_to_pickup' bytes of message from the rx ring. 2412 */ 2413 static mblk_t * 2414 vnet_rx_poll(void *arg, int bytes_to_pickup) 2415 { 2416 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; 2417 mblk_t *mp = NULL; 2418 vnet_res_t *vresp; 2419 vnet_t *vnetp = rx_ringp->vnetp; 2420 2421 if (rx_ringp->hw_rh == NULL) { 2422 return (NULL); 2423 } 2424 2425 if (rx_ringp->state & VNET_RXRING_HYBRID) { 2426 mp = mac_hwring_poll(rx_ringp->hw_rh, bytes_to_pickup); 2427 /* 2428 * Packets received over a hybrid resource need additional 2429 * processing to remove the tag, for the pvid case. The 2430 * underlying resource is not aware of the vnet's pvid and thus 2431 * packets are received with the vlan tag in the header; unlike 2432 * packets that are received over a ldc channel in which case 2433 * the peer vnet/vsw would have already removed the tag. 2434 */ 2435 if (vnetp->pvid != vnetp->default_vlan_id) { 2436 vnet_rx_frames_untag(vnetp->pvid, &mp); 2437 } 2438 } else { 2439 vresp = (vnet_res_t *)rx_ringp->hw_rh; 2440 mp = vgen_poll(vresp->macreg.m_driver, bytes_to_pickup); 2441 } 2442 return (mp); 2443 } 2444 2445 /* ARGSUSED */ 2446 void 2447 vnet_hio_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 2448 boolean_t loopback) 2449 { 2450 vnet_t *vnetp = (vnet_t *)arg; 2451 vnet_pseudo_rx_ring_t *ringp = (vnet_pseudo_rx_ring_t *)mrh; 2452 2453 /* 2454 * Packets received over a hybrid resource need additional processing 2455 * to remove the tag, for the pvid case. The underlying resource is 2456 * not aware of the vnet's pvid and thus packets are received with the 2457 * vlan tag in the header; unlike packets that are received over a ldc 2458 * channel in which case the peer vnet/vsw would have already removed 2459 * the tag. 2460 */ 2461 if (vnetp->pvid != vnetp->default_vlan_id) { 2462 vnet_rx_frames_untag(vnetp->pvid, &mp); 2463 if (mp == NULL) { 2464 return; 2465 } 2466 } 2467 mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num); 2468 } 2469 2470 static int 2471 vnet_addmac(void *arg, const uint8_t *mac_addr) 2472 { 2473 vnet_pseudo_rx_group_t *rx_grp = (vnet_pseudo_rx_group_t *)arg; 2474 vnet_t *vnetp; 2475 2476 vnetp = rx_grp->vnetp; 2477 2478 if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) { 2479 return (0); 2480 } 2481 2482 cmn_err(CE_CONT, "!vnet%d: %s: Multiple macaddr unsupported\n", 2483 vnetp->instance, __func__); 2484 return (EINVAL); 2485 } 2486 2487 static int 2488 vnet_remmac(void *arg, const uint8_t *mac_addr) 2489 { 2490 vnet_pseudo_rx_group_t *rx_grp = (vnet_pseudo_rx_group_t *)arg; 2491 vnet_t *vnetp; 2492 2493 vnetp = rx_grp->vnetp; 2494 2495 if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) { 2496 return (0); 2497 } 2498 2499 cmn_err(CE_CONT, "!vnet%d: %s: Invalid macaddr: %s\n", 2500 vnetp->instance, __func__, ether_sprintf((void *)mac_addr)); 2501 return (EINVAL); 2502 } 2503 2504 int 2505 vnet_hio_mac_init(vnet_t *vnetp, char *ifname) 2506 { 2507 mac_handle_t mh; 2508 mac_client_handle_t mch = NULL; 2509 mac_unicast_handle_t muh = NULL; 2510 mac_diag_t diag; 2511 mac_register_t *macp; 2512 char client_name[MAXNAMELEN]; 2513 int rv; 2514 uint16_t mac_flags = MAC_UNICAST_TAG_DISABLE | 2515 MAC_UNICAST_STRIP_DISABLE | MAC_UNICAST_PRIMARY; 2516 vio_net_callbacks_t vcb; 2517 ether_addr_t rem_addr = 2518 { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 2519 uint32_t retries = 0; 2520 2521 if ((macp = mac_alloc(MAC_VERSION)) == NULL) { 2522 return (EAGAIN); 2523 } 2524 2525 do { 2526 rv = mac_open_by_linkname(ifname, &mh); 2527 if (rv == 0) { 2528 break; 2529 } 2530 if (rv != ENOENT || (retries++ >= vnet_mac_open_retries)) { 2531 mac_free(macp); 2532 return (rv); 2533 } 2534 drv_usecwait(vnet_mac_open_delay); 2535 } while (rv == ENOENT); 2536 2537 vnetp->hio_mh = mh; 2538 2539 (void) snprintf(client_name, MAXNAMELEN, "vnet%d-%s", vnetp->instance, 2540 ifname); 2541 rv = mac_client_open(mh, &mch, client_name, MAC_OPEN_FLAGS_EXCLUSIVE); 2542 if (rv != 0) { 2543 goto fail; 2544 } 2545 vnetp->hio_mch = mch; 2546 2547 rv = mac_unicast_add(mch, vnetp->curr_macaddr, mac_flags, &muh, 0, 2548 &diag); 2549 if (rv != 0) { 2550 goto fail; 2551 } 2552 vnetp->hio_muh = muh; 2553 2554 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 2555 macp->m_driver = vnetp; 2556 macp->m_dip = NULL; 2557 macp->m_src_addr = NULL; 2558 macp->m_callbacks = &vnet_hio_res_callbacks; 2559 macp->m_min_sdu = 0; 2560 macp->m_max_sdu = ETHERMTU; 2561 2562 rv = vio_net_resource_reg(macp, VIO_NET_RES_HYBRID, 2563 vnetp->curr_macaddr, rem_addr, &vnetp->hio_vhp, &vcb); 2564 if (rv != 0) { 2565 goto fail; 2566 } 2567 mac_free(macp); 2568 2569 /* add the recv callback */ 2570 mac_rx_set(vnetp->hio_mch, vnet_hio_rx_cb, vnetp); 2571 2572 /* add the notify callback - only tx updates for now */ 2573 vnetp->hio_mnh = mac_notify_add(vnetp->hio_mh, vnet_hio_notify_cb, 2574 vnetp); 2575 2576 return (0); 2577 2578 fail: 2579 mac_free(macp); 2580 vnet_hio_mac_cleanup(vnetp); 2581 return (1); 2582 } 2583 2584 void 2585 vnet_hio_mac_cleanup(vnet_t *vnetp) 2586 { 2587 if (vnetp->hio_mnh != NULL) { 2588 (void) mac_notify_remove(vnetp->hio_mnh, B_TRUE); 2589 vnetp->hio_mnh = NULL; 2590 } 2591 2592 if (vnetp->hio_vhp != NULL) { 2593 vio_net_resource_unreg(vnetp->hio_vhp); 2594 vnetp->hio_vhp = NULL; 2595 } 2596 2597 if (vnetp->hio_muh != NULL) { 2598 (void) mac_unicast_remove(vnetp->hio_mch, vnetp->hio_muh); 2599 vnetp->hio_muh = NULL; 2600 } 2601 2602 if (vnetp->hio_mch != NULL) { 2603 mac_client_close(vnetp->hio_mch, 0); 2604 vnetp->hio_mch = NULL; 2605 } 2606 2607 if (vnetp->hio_mh != NULL) { 2608 mac_close(vnetp->hio_mh); 2609 vnetp->hio_mh = NULL; 2610 } 2611 } 2612 2613 /* Bind pseudo rings to hwrings */ 2614 static int 2615 vnet_bind_hwrings(vnet_t *vnetp) 2616 { 2617 mac_ring_handle_t hw_rh[VNET_NUM_HYBRID_RINGS]; 2618 mac_perim_handle_t mph1; 2619 vnet_pseudo_rx_group_t *rx_grp; 2620 vnet_pseudo_rx_ring_t *rx_ringp; 2621 vnet_pseudo_tx_group_t *tx_grp; 2622 vnet_pseudo_tx_ring_t *tx_ringp; 2623 int hw_ring_cnt; 2624 int i; 2625 int rv; 2626 2627 mac_perim_enter_by_mh(vnetp->hio_mh, &mph1); 2628 2629 /* Get the list of the underlying RX rings. */ 2630 hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->rx_hwgh, hw_rh, 2631 MAC_RING_TYPE_RX); 2632 2633 /* We expect the the # of hw rx rings to match VNET_NUM_HYBRID_RINGS */ 2634 if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) { 2635 cmn_err(CE_WARN, 2636 "!vnet%d: vnet_bind_hwrings: bad rx hw_ring_cnt(%d)\n", 2637 vnetp->instance, hw_ring_cnt); 2638 goto fail; 2639 } 2640 2641 if (vnetp->rx_hwgh != NULL) { 2642 /* 2643 * Quiesce the HW ring and the mac srs on the ring. Note 2644 * that the HW ring will be restarted when the pseudo ring 2645 * is started. At that time all the packets will be 2646 * directly passed up to the pseudo RX ring and handled 2647 * by mac srs created over the pseudo RX ring. 2648 */ 2649 mac_rx_client_quiesce(vnetp->hio_mch); 2650 mac_srs_perm_quiesce(vnetp->hio_mch, B_TRUE); 2651 } 2652 2653 /* 2654 * Bind the pseudo rings to the hwrings and start the hwrings. 2655 * Note we don't need to register these with the upper mac, as we have 2656 * statically exported these pseudo rxrings which are reserved for 2657 * rxrings of Hybrid resource. 2658 */ 2659 rx_grp = &vnetp->rx_grp[0]; 2660 for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) { 2661 /* Pick the rxrings reserved for Hybrid resource */ 2662 rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX]; 2663 2664 /* Store the hw ring handle */ 2665 rx_ringp->hw_rh = hw_rh[i]; 2666 2667 /* Bind the pseudo ring to the underlying hwring */ 2668 mac_hwring_setup(rx_ringp->hw_rh, 2669 (mac_resource_handle_t)rx_ringp); 2670 2671 /* Start the hwring if needed */ 2672 if (rx_ringp->state & VNET_RXRING_STARTED) { 2673 rv = mac_hwring_start(rx_ringp->hw_rh); 2674 if (rv != 0) { 2675 mac_hwring_teardown(rx_ringp->hw_rh); 2676 rx_ringp->hw_rh = NULL; 2677 goto fail; 2678 } 2679 } 2680 } 2681 2682 /* Get the list of the underlying TX rings. */ 2683 hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->tx_hwgh, hw_rh, 2684 MAC_RING_TYPE_TX); 2685 2686 /* We expect the # of hw tx rings to match VNET_NUM_HYBRID_RINGS */ 2687 if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) { 2688 cmn_err(CE_WARN, 2689 "!vnet%d: vnet_bind_hwrings: bad tx hw_ring_cnt(%d)\n", 2690 vnetp->instance, hw_ring_cnt); 2691 goto fail; 2692 } 2693 2694 /* 2695 * Now map the pseudo txrings to the hw txrings. Note we don't need 2696 * to register these with the upper mac, as we have statically exported 2697 * these rings. Note that these rings will continue to be used for LDC 2698 * resources to peer vnets and vswitch (shared ring). 2699 */ 2700 tx_grp = &vnetp->tx_grp[0]; 2701 for (i = 0; i < tx_grp->ring_cnt; i++) { 2702 tx_ringp = &tx_grp->rings[i]; 2703 tx_ringp->hw_rh = hw_rh[i]; 2704 tx_ringp->state |= VNET_TXRING_HYBRID; 2705 } 2706 2707 mac_perim_exit(mph1); 2708 return (0); 2709 2710 fail: 2711 mac_perim_exit(mph1); 2712 vnet_unbind_hwrings(vnetp); 2713 return (1); 2714 } 2715 2716 /* Unbind pseudo rings from hwrings */ 2717 static void 2718 vnet_unbind_hwrings(vnet_t *vnetp) 2719 { 2720 mac_perim_handle_t mph1; 2721 vnet_pseudo_rx_ring_t *rx_ringp; 2722 vnet_pseudo_rx_group_t *rx_grp; 2723 vnet_pseudo_tx_group_t *tx_grp; 2724 vnet_pseudo_tx_ring_t *tx_ringp; 2725 int i; 2726 2727 mac_perim_enter_by_mh(vnetp->hio_mh, &mph1); 2728 2729 tx_grp = &vnetp->tx_grp[0]; 2730 for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) { 2731 tx_ringp = &tx_grp->rings[i]; 2732 if (tx_ringp->state & VNET_TXRING_HYBRID) { 2733 tx_ringp->state &= ~VNET_TXRING_HYBRID; 2734 tx_ringp->hw_rh = NULL; 2735 } 2736 } 2737 2738 rx_grp = &vnetp->rx_grp[0]; 2739 for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) { 2740 rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX]; 2741 if (rx_ringp->hw_rh != NULL) { 2742 /* Stop the hwring */ 2743 mac_hwring_stop(rx_ringp->hw_rh); 2744 2745 /* Teardown the hwring */ 2746 mac_hwring_teardown(rx_ringp->hw_rh); 2747 rx_ringp->hw_rh = NULL; 2748 } 2749 } 2750 2751 if (vnetp->rx_hwgh != NULL) { 2752 vnetp->rx_hwgh = NULL; 2753 /* 2754 * First clear the permanent-quiesced flag of the RX srs then 2755 * restart the HW ring and the mac srs on the ring. 2756 */ 2757 mac_srs_perm_quiesce(vnetp->hio_mch, B_FALSE); 2758 mac_rx_client_restart(vnetp->hio_mch); 2759 } 2760 2761 mac_perim_exit(mph1); 2762 } 2763 2764 /* Bind pseudo ring to a LDC resource */ 2765 static int 2766 vnet_bind_vgenring(vnet_res_t *vresp) 2767 { 2768 vnet_t *vnetp; 2769 vnet_pseudo_rx_group_t *rx_grp; 2770 vnet_pseudo_rx_ring_t *rx_ringp; 2771 mac_perim_handle_t mph1; 2772 int rv; 2773 int type; 2774 2775 vnetp = vresp->vnetp; 2776 type = vresp->type; 2777 rx_grp = &vnetp->rx_grp[0]; 2778 2779 if (type == VIO_NET_RES_LDC_SERVICE) { 2780 /* 2781 * Ring Index 0 is the default ring in the group and is 2782 * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring 2783 * is allocated statically and is reported to the mac layer 2784 * in vnet_m_capab(). So, all we need to do here, is save a 2785 * reference to the associated vresp. 2786 */ 2787 rx_ringp = &rx_grp->rings[0]; 2788 rx_ringp->hw_rh = (mac_ring_handle_t)vresp; 2789 vresp->rx_ringp = (void *)rx_ringp; 2790 return (0); 2791 } 2792 ASSERT(type == VIO_NET_RES_LDC_GUEST); 2793 2794 mac_perim_enter_by_mh(vnetp->mh, &mph1); 2795 2796 rx_ringp = vnet_alloc_pseudo_rx_ring(vnetp); 2797 if (rx_ringp == NULL) { 2798 cmn_err(CE_WARN, "!vnet%d: Failed to allocate pseudo rx ring", 2799 vnetp->instance); 2800 goto fail; 2801 } 2802 2803 /* Store the LDC resource itself as the ring handle */ 2804 rx_ringp->hw_rh = (mac_ring_handle_t)vresp; 2805 2806 /* 2807 * Save a reference to the ring in the resource for lookup during 2808 * unbind. Note this is only done for LDC resources. We don't need this 2809 * in the case of a Hybrid resource (see vnet_bind_hwrings()), as its 2810 * rx rings are mapped to reserved pseudo rx rings (index 1 and 2). 2811 */ 2812 vresp->rx_ringp = (void *)rx_ringp; 2813 rx_ringp->state |= VNET_RXRING_LDC_GUEST; 2814 2815 /* Register the pseudo ring with upper-mac */ 2816 rv = mac_group_add_ring(rx_grp->handle, rx_ringp->index); 2817 if (rv != 0) { 2818 rx_ringp->state &= ~VNET_RXRING_LDC_GUEST; 2819 rx_ringp->hw_rh = NULL; 2820 vnet_free_pseudo_rx_ring(vnetp, rx_ringp); 2821 goto fail; 2822 } 2823 2824 mac_perim_exit(mph1); 2825 return (0); 2826 fail: 2827 mac_perim_exit(mph1); 2828 return (1); 2829 } 2830 2831 /* Unbind pseudo ring from a LDC resource */ 2832 static void 2833 vnet_unbind_vgenring(vnet_res_t *vresp) 2834 { 2835 vnet_t *vnetp; 2836 vnet_pseudo_rx_group_t *rx_grp; 2837 vnet_pseudo_rx_ring_t *rx_ringp; 2838 mac_perim_handle_t mph1; 2839 int type; 2840 2841 vnetp = vresp->vnetp; 2842 type = vresp->type; 2843 rx_grp = &vnetp->rx_grp[0]; 2844 2845 if (vresp->rx_ringp == NULL) { 2846 return; 2847 } 2848 2849 if (type == VIO_NET_RES_LDC_SERVICE) { 2850 /* 2851 * Ring Index 0 is the default ring in the group and is 2852 * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring 2853 * is allocated statically and is reported to the mac layer 2854 * in vnet_m_capab(). So, all we need to do here, is remove its 2855 * reference to the associated vresp. 2856 */ 2857 rx_ringp = &rx_grp->rings[0]; 2858 rx_ringp->hw_rh = NULL; 2859 vresp->rx_ringp = NULL; 2860 return; 2861 } 2862 ASSERT(type == VIO_NET_RES_LDC_GUEST); 2863 2864 mac_perim_enter_by_mh(vnetp->mh, &mph1); 2865 2866 rx_ringp = (vnet_pseudo_rx_ring_t *)vresp->rx_ringp; 2867 vresp->rx_ringp = NULL; 2868 2869 if (rx_ringp != NULL && (rx_ringp->state & VNET_RXRING_LDC_GUEST)) { 2870 /* Unregister the pseudo ring with upper-mac */ 2871 mac_group_rem_ring(rx_grp->handle, rx_ringp->handle); 2872 2873 rx_ringp->hw_rh = NULL; 2874 rx_ringp->state &= ~VNET_RXRING_LDC_GUEST; 2875 2876 /* Free the pseudo rx ring */ 2877 vnet_free_pseudo_rx_ring(vnetp, rx_ringp); 2878 } 2879 2880 mac_perim_exit(mph1); 2881 } 2882 2883 static void 2884 vnet_unbind_rings(vnet_res_t *vresp) 2885 { 2886 switch (vresp->type) { 2887 2888 case VIO_NET_RES_LDC_SERVICE: 2889 case VIO_NET_RES_LDC_GUEST: 2890 vnet_unbind_vgenring(vresp); 2891 break; 2892 2893 case VIO_NET_RES_HYBRID: 2894 vnet_unbind_hwrings(vresp->vnetp); 2895 break; 2896 2897 default: 2898 break; 2899 2900 } 2901 } 2902 2903 static int 2904 vnet_bind_rings(vnet_res_t *vresp) 2905 { 2906 int rv; 2907 2908 switch (vresp->type) { 2909 2910 case VIO_NET_RES_LDC_SERVICE: 2911 case VIO_NET_RES_LDC_GUEST: 2912 rv = vnet_bind_vgenring(vresp); 2913 break; 2914 2915 case VIO_NET_RES_HYBRID: 2916 rv = vnet_bind_hwrings(vresp->vnetp); 2917 break; 2918 2919 default: 2920 rv = 1; 2921 break; 2922 2923 } 2924 2925 return (rv); 2926 } 2927 2928 /* ARGSUSED */ 2929 int 2930 vnet_hio_stat(void *arg, uint_t stat, uint64_t *val) 2931 { 2932 vnet_t *vnetp = (vnet_t *)arg; 2933 2934 *val = mac_stat_get(vnetp->hio_mh, stat); 2935 return (0); 2936 } 2937 2938 /* 2939 * The start() and stop() routines for the Hybrid resource below, are just 2940 * dummy functions. This is provided to avoid resource type specific code in 2941 * vnet_start_resources() and vnet_stop_resources(). The starting and stopping 2942 * of the Hybrid resource happens in the context of the mac_client interfaces 2943 * that are invoked in vnet_hio_mac_init() and vnet_hio_mac_cleanup(). 2944 */ 2945 /* ARGSUSED */ 2946 static int 2947 vnet_hio_start(void *arg) 2948 { 2949 return (0); 2950 } 2951 2952 /* ARGSUSED */ 2953 static void 2954 vnet_hio_stop(void *arg) 2955 { 2956 } 2957 2958 mblk_t * 2959 vnet_hio_tx(void *arg, mblk_t *mp) 2960 { 2961 vnet_pseudo_tx_ring_t *tx_ringp; 2962 mblk_t *nextp; 2963 mblk_t *ret_mp; 2964 2965 tx_ringp = (vnet_pseudo_tx_ring_t *)arg; 2966 for (;;) { 2967 nextp = mp->b_next; 2968 mp->b_next = NULL; 2969 2970 ret_mp = mac_hwring_tx(tx_ringp->hw_rh, mp); 2971 if (ret_mp != NULL) { 2972 ret_mp->b_next = nextp; 2973 mp = ret_mp; 2974 break; 2975 } 2976 2977 if ((mp = nextp) == NULL) 2978 break; 2979 } 2980 return (mp); 2981 } 2982 2983 static void 2984 vnet_hio_notify_cb(void *arg, mac_notify_type_t type) 2985 { 2986 vnet_t *vnetp = (vnet_t *)arg; 2987 mac_perim_handle_t mph; 2988 2989 mac_perim_enter_by_mh(vnetp->hio_mh, &mph); 2990 switch (type) { 2991 case MAC_NOTE_TX: 2992 vnet_tx_update(vnetp->hio_vhp); 2993 break; 2994 2995 default: 2996 break; 2997 } 2998 mac_perim_exit(mph); 2999 } 3000 3001 #ifdef VNET_IOC_DEBUG 3002 3003 /* 3004 * The ioctl entry point is used only for debugging for now. The ioctl commands 3005 * can be used to force the link state of the channel connected to vsw. 3006 */ 3007 static void 3008 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp) 3009 { 3010 struct iocblk *iocp; 3011 vnet_t *vnetp; 3012 3013 iocp = (struct iocblk *)(uintptr_t)mp->b_rptr; 3014 iocp->ioc_error = 0; 3015 vnetp = (vnet_t *)arg; 3016 3017 if (vnetp == NULL) { 3018 miocnak(q, mp, 0, EINVAL); 3019 return; 3020 } 3021 3022 switch (iocp->ioc_cmd) { 3023 3024 case VNET_FORCE_LINK_DOWN: 3025 case VNET_FORCE_LINK_UP: 3026 vnet_force_link_state(vnetp, q, mp); 3027 break; 3028 3029 default: 3030 iocp->ioc_error = EINVAL; 3031 miocnak(q, mp, 0, iocp->ioc_error); 3032 break; 3033 3034 } 3035 } 3036 3037 static void 3038 vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp) 3039 { 3040 mac_register_t *macp; 3041 mac_callbacks_t *cbp; 3042 vnet_res_t *vresp; 3043 3044 READ_ENTER(&vnetp->vsw_fp_rw); 3045 3046 vresp = vnetp->vsw_fp; 3047 if (vresp == NULL) { 3048 RW_EXIT(&vnetp->vsw_fp_rw); 3049 return; 3050 } 3051 3052 macp = &vresp->macreg; 3053 cbp = macp->m_callbacks; 3054 cbp->mc_ioctl(macp->m_driver, q, mp); 3055 3056 RW_EXIT(&vnetp->vsw_fp_rw); 3057 } 3058 3059 #else 3060 3061 static void 3062 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp) 3063 { 3064 vnet_t *vnetp; 3065 3066 vnetp = (vnet_t *)arg; 3067 3068 if (vnetp == NULL) { 3069 miocnak(q, mp, 0, EINVAL); 3070 return; 3071 } 3072 3073 /* ioctl support only for debugging */ 3074 miocnak(q, mp, 0, ENOTSUP); 3075 } 3076 3077 #endif 3078