1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/errno.h> 31 #include <sys/debug.h> 32 #include <sys/time.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/user.h> 36 #include <sys/stropts.h> 37 #include <sys/stream.h> 38 #include <sys/strlog.h> 39 #include <sys/strsubr.h> 40 #include <sys/cmn_err.h> 41 #include <sys/cpu.h> 42 #include <sys/kmem.h> 43 #include <sys/conf.h> 44 #include <sys/ddi.h> 45 #include <sys/sunddi.h> 46 #include <sys/ksynch.h> 47 #include <sys/stat.h> 48 #include <sys/kstat.h> 49 #include <sys/vtrace.h> 50 #include <sys/strsun.h> 51 #include <sys/dlpi.h> 52 #include <sys/ethernet.h> 53 #include <net/if.h> 54 #include <sys/varargs.h> 55 #include <sys/machsystm.h> 56 #include <sys/modctl.h> 57 #include <sys/modhash.h> 58 #include <sys/mac.h> 59 #include <sys/mac_ether.h> 60 #include <sys/taskq.h> 61 #include <sys/note.h> 62 #include <sys/mach_descrip.h> 63 #include <sys/mac.h> 64 #include <sys/mdeg.h> 65 #include <sys/ldc.h> 66 #include <sys/vsw_fdb.h> 67 #include <sys/vsw.h> 68 #include <sys/vio_mailbox.h> 69 #include <sys/vnet_mailbox.h> 70 #include <sys/vnet_common.h> 71 #include <sys/vio_util.h> 72 #include <sys/sdt.h> 73 74 /* 75 * Function prototypes. 76 */ 77 static int vsw_attach(dev_info_t *, ddi_attach_cmd_t); 78 static int vsw_detach(dev_info_t *, ddi_detach_cmd_t); 79 static int vsw_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 80 static void vsw_get_md_properties(vsw_t *vswp); 81 static int vsw_setup_layer2(vsw_t *); 82 static int vsw_setup_layer3(vsw_t *); 83 84 /* MAC layer routines */ 85 static int vsw_mac_attach(vsw_t *vswp); 86 static void vsw_mac_detach(vsw_t *vswp); 87 static void vsw_notify_cb(void *, mac_notify_type_t); 88 static void vsw_rx_cb(void *, mac_resource_handle_t, mblk_t *); 89 static mblk_t *vsw_tx_msg(vsw_t *, mblk_t *); 90 static int vsw_mac_register(vsw_t *); 91 static int vsw_mac_unregister(vsw_t *); 92 static int vsw_m_stat(void *, uint_t, uint64_t *); 93 static void vsw_m_stop(void *arg); 94 static int vsw_m_start(void *arg); 95 static int vsw_m_unicst(void *arg, const uint8_t *); 96 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *); 97 static int vsw_m_promisc(void *arg, boolean_t); 98 static mblk_t *vsw_m_tx(void *arg, mblk_t *); 99 100 /* MDEG routines */ 101 static void vsw_mdeg_register(vsw_t *vswp); 102 static void vsw_mdeg_unregister(vsw_t *vswp); 103 static int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *); 104 105 /* Port add/deletion routines */ 106 static int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 107 static int vsw_port_attach(vsw_t *vswp, int p_instance, 108 uint64_t *ldcids, int nids, struct ether_addr *macaddr); 109 static int vsw_detach_ports(vsw_t *vswp); 110 static int vsw_port_detach(vsw_t *vswp, int p_instance); 111 static int vsw_port_delete(vsw_port_t *port); 112 static int vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id); 113 static int vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id); 114 static int vsw_init_ldcs(vsw_port_t *port); 115 static int vsw_uninit_ldcs(vsw_port_t *port); 116 static int vsw_ldc_init(vsw_ldc_t *ldcp); 117 static int vsw_ldc_uninit(vsw_ldc_t *ldcp); 118 static int vsw_drain_ldcs(vsw_port_t *port); 119 static int vsw_drain_port_taskq(vsw_port_t *port); 120 static void vsw_marker_task(void *); 121 static vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 122 static int vsw_plist_del_node(vsw_t *, vsw_port_t *port); 123 124 /* Interrupt routines */ 125 static uint_t vsw_ldc_cb(uint64_t cb, caddr_t arg); 126 127 /* Handshake routines */ 128 static void vsw_restart_handshake(vsw_ldc_t *); 129 static int vsw_check_flag(vsw_ldc_t *, int, uint64_t); 130 static void vsw_next_milestone(vsw_ldc_t *); 131 static int vsw_supported_version(vio_ver_msg_t *); 132 133 /* Data processing routines */ 134 static void vsw_process_pkt(void *); 135 static void vsw_dispatch_ctrl_task(vsw_ldc_t *, void *, vio_msg_tag_t); 136 static void vsw_process_ctrl_pkt(void *); 137 static void vsw_process_ctrl_ver_pkt(vsw_ldc_t *, void *); 138 static void vsw_process_ctrl_attr_pkt(vsw_ldc_t *, void *); 139 static void vsw_process_ctrl_mcst_pkt(vsw_ldc_t *, void *); 140 static void vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *, void *); 141 static void vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *, void *); 142 static void vsw_process_ctrl_rdx_pkt(vsw_ldc_t *, void *); 143 static void vsw_process_data_pkt(vsw_ldc_t *, void *, vio_msg_tag_t); 144 static void vsw_process_data_dring_pkt(vsw_ldc_t *, void *); 145 static void vsw_process_data_raw_pkt(vsw_ldc_t *, void *); 146 static void vsw_process_data_ibnd_pkt(vsw_ldc_t *, void *); 147 static void vsw_process_err_pkt(vsw_ldc_t *, void *, vio_msg_tag_t); 148 149 /* Switching/data transmit routines */ 150 static void vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller, 151 vsw_port_t *port, mac_resource_handle_t); 152 static void vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller, 153 vsw_port_t *port, mac_resource_handle_t); 154 static int vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller, 155 vsw_port_t *port); 156 static int vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller, 157 vsw_port_t *port); 158 static int vsw_portsend(vsw_port_t *, mblk_t *); 159 static int vsw_dringsend(vsw_ldc_t *, mblk_t *); 160 static int vsw_descrsend(vsw_ldc_t *, mblk_t *); 161 162 /* Packet creation routines */ 163 static void vsw_send_ver(vsw_ldc_t *); 164 static void vsw_send_attr(vsw_ldc_t *); 165 static vio_dring_reg_msg_t *vsw_create_dring_info_pkt(vsw_ldc_t *); 166 static void vsw_send_dring_info(vsw_ldc_t *); 167 static void vsw_send_rdx(vsw_ldc_t *); 168 169 static void vsw_send_msg(vsw_ldc_t *, void *, int); 170 171 /* Forwarding database (FDB) routines */ 172 static int vsw_add_fdb(vsw_t *vswp, vsw_port_t *port); 173 static int vsw_del_fdb(vsw_t *vswp, vsw_port_t *port); 174 static vsw_port_t *vsw_lookup_fdb(vsw_t *vswp, struct ether_header *); 175 static int vsw_add_rem_mcst(vnet_mcast_msg_t *, vsw_port_t *); 176 static int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *); 177 static int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *); 178 static void vsw_del_addr(uint8_t, void *, uint64_t); 179 static void vsw_del_mcst_port(vsw_port_t *); 180 static void vsw_del_mcst_vsw(vsw_t *); 181 182 /* Dring routines */ 183 static dring_info_t *vsw_create_dring(vsw_ldc_t *); 184 static void vsw_create_privring(vsw_ldc_t *); 185 static int vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp); 186 static int vsw_dring_find_free_desc(dring_info_t *, vsw_private_desc_t **, 187 int *); 188 static dring_info_t *vsw_ident2dring(lane_t *, uint64_t); 189 190 static void vsw_set_lane_attr(vsw_t *, lane_t *); 191 static int vsw_check_attr(vnet_attr_msg_t *, vsw_port_t *); 192 static int vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg); 193 static int vsw_mem_cookie_match(ldc_mem_cookie_t *, ldc_mem_cookie_t *); 194 static int vsw_check_dring_info(vio_dring_reg_msg_t *); 195 196 /* Misc support routines */ 197 static caddr_t vsw_print_ethaddr(uint8_t *addr, char *ebuf); 198 static void vsw_free_lane_resources(vsw_ldc_t *, uint64_t); 199 static int vsw_free_ring(dring_info_t *); 200 201 202 /* Debugging routines */ 203 static void dump_flags(uint64_t); 204 static void display_state(void); 205 static void display_lane(lane_t *); 206 static void display_ring(dring_info_t *); 207 208 int vsw_num_handshakes = 3; /* # of handshake attempts */ 209 int vsw_wretries = 100; /* # of write attempts */ 210 int vsw_chain_len = 150; /* max # of mblks in msg chain */ 211 int vsw_desc_delay = 0; /* delay in us */ 212 int vsw_read_attempts = 5; /* # of reads of descriptor */ 213 214 uint32_t vsw_mblk_size = VSW_MBLK_SIZE; 215 uint32_t vsw_num_mblks = VSW_NUM_MBLKS; 216 217 218 /* 219 * mode specific frame switching function 220 */ 221 void (*vsw_switch_frame)(vsw_t *, mblk_t *, int, vsw_port_t *, 222 mac_resource_handle_t); 223 224 static mac_callbacks_t vsw_m_callbacks = { 225 0, 226 vsw_m_stat, 227 vsw_m_start, 228 vsw_m_stop, 229 vsw_m_promisc, 230 vsw_m_multicst, 231 vsw_m_unicst, 232 vsw_m_tx, 233 NULL, 234 NULL, 235 NULL 236 }; 237 238 static struct cb_ops vsw_cb_ops = { 239 nulldev, /* cb_open */ 240 nulldev, /* cb_close */ 241 nodev, /* cb_strategy */ 242 nodev, /* cb_print */ 243 nodev, /* cb_dump */ 244 nodev, /* cb_read */ 245 nodev, /* cb_write */ 246 nodev, /* cb_ioctl */ 247 nodev, /* cb_devmap */ 248 nodev, /* cb_mmap */ 249 nodev, /* cb_segmap */ 250 nochpoll, /* cb_chpoll */ 251 ddi_prop_op, /* cb_prop_op */ 252 NULL, /* cb_stream */ 253 D_MP, /* cb_flag */ 254 CB_REV, /* rev */ 255 nodev, /* int (*cb_aread)() */ 256 nodev /* int (*cb_awrite)() */ 257 }; 258 259 static struct dev_ops vsw_ops = { 260 DEVO_REV, /* devo_rev */ 261 0, /* devo_refcnt */ 262 vsw_getinfo, /* devo_getinfo */ 263 nulldev, /* devo_identify */ 264 nulldev, /* devo_probe */ 265 vsw_attach, /* devo_attach */ 266 vsw_detach, /* devo_detach */ 267 nodev, /* devo_reset */ 268 &vsw_cb_ops, /* devo_cb_ops */ 269 (struct bus_ops *)NULL, /* devo_bus_ops */ 270 ddi_power /* devo_power */ 271 }; 272 273 extern struct mod_ops mod_driverops; 274 static struct modldrv vswmodldrv = { 275 &mod_driverops, 276 "sun4v Virtual Switch Driver %I%", 277 &vsw_ops, 278 }; 279 280 #define LDC_ENTER_LOCK(ldcp) \ 281 mutex_enter(&((ldcp)->ldc_cblock));\ 282 mutex_enter(&((ldcp)->ldc_txlock)); 283 #define LDC_EXIT_LOCK(ldcp) \ 284 mutex_exit(&((ldcp)->ldc_txlock));\ 285 mutex_exit(&((ldcp)->ldc_cblock)); 286 287 /* Driver soft state ptr */ 288 static void *vsw_state; 289 290 /* 291 * Linked list of "vsw_t" structures - one per instance. 292 */ 293 vsw_t *vsw_head = NULL; 294 krwlock_t vsw_rw; 295 296 /* 297 * Property names 298 */ 299 static char vdev_propname[] = "virtual-device"; 300 static char vsw_propname[] = "virtual-network-switch"; 301 static char physdev_propname[] = "vsw-phys-dev"; 302 static char smode_propname[] = "vsw-switch-mode"; 303 static char macaddr_propname[] = "local-mac-address"; 304 static char remaddr_propname[] = "remote-mac-address"; 305 static char ldcids_propname[] = "ldc-ids"; 306 static char chan_propname[] = "channel-endpoint"; 307 static char id_propname[] = "id"; 308 static char reg_propname[] = "reg"; 309 310 /* supported versions */ 311 static ver_sup_t vsw_versions[] = { {1, 0} }; 312 313 /* 314 * Matching criteria passed to the MDEG to register interest 315 * in changes to 'virtual-device-port' nodes identified by their 316 * 'id' property. 317 */ 318 static md_prop_match_t vport_prop_match[] = { 319 { MDET_PROP_VAL, "id" }, 320 { MDET_LIST_END, NULL } 321 }; 322 323 static mdeg_node_match_t vport_match = { "virtual-device-port", 324 vport_prop_match }; 325 326 /* 327 * Specification of an MD node passed to the MDEG to filter any 328 * 'vport' nodes that do not belong to the specified node. This 329 * template is copied for each vsw instance and filled in with 330 * the appropriate 'cfg-handle' value before being passed to the MDEG. 331 */ 332 static mdeg_prop_spec_t vsw_prop_template[] = { 333 { MDET_PROP_STR, "name", vsw_propname }, 334 { MDET_PROP_VAL, "cfg-handle", NULL }, 335 { MDET_LIST_END, NULL, NULL } 336 }; 337 338 #define VSW_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val); 339 340 /* 341 * Print debug messages - set to 0x1f to enable all msgs 342 * or 0x0 to turn all off. 343 */ 344 int vswdbg = 0x0; 345 346 /* 347 * debug levels: 348 * 0x01: Function entry/exit tracing 349 * 0x02: Internal function messages 350 * 0x04: Verbose internal messages 351 * 0x08: Warning messages 352 * 0x10: Error messages 353 */ 354 355 static void 356 vswdebug(vsw_t *vswp, const char *fmt, ...) 357 { 358 char buf[512]; 359 va_list ap; 360 361 va_start(ap, fmt); 362 (void) vsprintf(buf, fmt, ap); 363 va_end(ap); 364 365 if (vswp == NULL) 366 cmn_err(CE_CONT, "%s\n", buf); 367 else 368 cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf); 369 } 370 371 /* 372 * For the moment the state dump routines have their own 373 * private flag. 374 */ 375 #define DUMP_STATE 0 376 377 #if DUMP_STATE 378 379 #define DUMP_TAG(tag) \ 380 { \ 381 D1(NULL, "DUMP_TAG: type 0x%llx", (tag).vio_msgtype); \ 382 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag).vio_subtype); \ 383 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag).vio_subtype_env); \ 384 } 385 386 #define DUMP_TAG_PTR(tag) \ 387 { \ 388 D1(NULL, "DUMP_TAG: type 0x%llx", (tag)->vio_msgtype); \ 389 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag)->vio_subtype); \ 390 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag)->vio_subtype_env); \ 391 } 392 393 #define DUMP_FLAGS(flags) dump_flags(flags); 394 #define DISPLAY_STATE() display_state() 395 396 #else 397 398 #define DUMP_TAG(tag) 399 #define DUMP_TAG_PTR(tag) 400 #define DUMP_FLAGS(state) 401 #define DISPLAY_STATE() 402 403 #endif /* DUMP_STATE */ 404 405 #ifdef DEBUG 406 407 #define D1 \ 408 if (vswdbg & 0x01) \ 409 vswdebug 410 411 #define D2 \ 412 if (vswdbg & 0x02) \ 413 vswdebug 414 415 #define D3 \ 416 if (vswdbg & 0x04) \ 417 vswdebug 418 419 #define DWARN \ 420 if (vswdbg & 0x08) \ 421 vswdebug 422 423 #define DERR \ 424 if (vswdbg & 0x10) \ 425 vswdebug 426 427 #else 428 429 #define DERR if (0) vswdebug 430 #define DWARN if (0) vswdebug 431 #define D1 if (0) vswdebug 432 #define D2 if (0) vswdebug 433 #define D3 if (0) vswdebug 434 435 #endif /* DEBUG */ 436 437 static struct modlinkage modlinkage = { 438 MODREV_1, 439 &vswmodldrv, 440 NULL 441 }; 442 443 int 444 _init(void) 445 { 446 int status; 447 448 rw_init(&vsw_rw, NULL, RW_DRIVER, NULL); 449 450 status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1); 451 if (status != 0) { 452 return (status); 453 } 454 455 mac_init_ops(&vsw_ops, "vsw"); 456 status = mod_install(&modlinkage); 457 if (status != 0) { 458 ddi_soft_state_fini(&vsw_state); 459 } 460 return (status); 461 } 462 463 int 464 _fini(void) 465 { 466 int status; 467 468 status = mod_remove(&modlinkage); 469 if (status != 0) 470 return (status); 471 mac_fini_ops(&vsw_ops); 472 ddi_soft_state_fini(&vsw_state); 473 474 rw_destroy(&vsw_rw); 475 476 return (status); 477 } 478 479 int 480 _info(struct modinfo *modinfop) 481 { 482 return (mod_info(&modlinkage, modinfop)); 483 } 484 485 static int 486 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 487 { 488 vsw_t *vswp; 489 int smode, instance, i; 490 char hashname[MAXNAMELEN]; 491 char qname[TASKQ_NAMELEN]; 492 int rv = 1; 493 enum { PROG_init = 0x0, PROG_if_lock = 0x1, 494 PROG_fdb = 0x2, PROG_mfdb = 0x4, 495 PROG_report_dev = 0x8, PROG_plist = 0x10, 496 PROG_taskq = 0x20} 497 progress; 498 499 progress = PROG_init; 500 501 switch (cmd) { 502 case DDI_ATTACH: 503 break; 504 case DDI_RESUME: 505 /* nothing to do for this non-device */ 506 return (DDI_SUCCESS); 507 case DDI_PM_RESUME: 508 default: 509 return (DDI_FAILURE); 510 } 511 512 instance = ddi_get_instance(dip); 513 if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) { 514 DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance); 515 return (DDI_FAILURE); 516 } 517 vswp = ddi_get_soft_state(vsw_state, instance); 518 519 if (vswp == NULL) { 520 DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance); 521 goto vsw_attach_fail; 522 } 523 524 vswp->dip = dip; 525 vswp->instance = instance; 526 ddi_set_driver_private(dip, (caddr_t)vswp); 527 528 rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL); 529 530 progress |= PROG_if_lock; 531 532 /* 533 * User specifies (via MD) an array of switching modes in 534 * decreasing order of preference. Default mode is always 535 * layer 2 (mac switching), so init array with that value. 536 */ 537 vswp->smode_idx = 0; 538 for (i = 0; i < NUM_SMODES; i++) 539 vswp->smode[i] = VSW_LAYER2; 540 541 /* 542 * Get the various properties such as physical device name 543 * (vsw-phys-dev), switch mode etc from the MD. 544 */ 545 vsw_get_md_properties(vswp); 546 547 /* setup the unicast forwarding database */ 548 (void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d", 549 vswp->instance); 550 D2(vswp, "creating unicast hash table (%s)...", hashname); 551 vswp->fdb = mod_hash_create_ptrhash(hashname, VSW_NCHAINS, 552 mod_hash_null_valdtor, sizeof (void *)); 553 554 progress |= PROG_fdb; 555 556 /* setup the multicast fowarding database */ 557 (void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d", 558 vswp->instance); 559 D2(vswp, "creating multicast hash table %s)...", hashname); 560 rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL); 561 vswp->mfdb = mod_hash_create_ptrhash(hashname, VSW_NCHAINS, 562 mod_hash_null_valdtor, sizeof (void *)); 563 564 progress |= PROG_mfdb; 565 566 /* 567 * create lock protecting list of multicast addresses 568 * which could come via m_multicst() entry point when plumbed. 569 */ 570 mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL); 571 vswp->mcap = NULL; 572 573 ddi_report_dev(vswp->dip); 574 575 progress |= PROG_report_dev; 576 577 WRITE_ENTER(&vsw_rw); 578 vswp->next = vsw_head; 579 vsw_head = vswp; 580 RW_EXIT(&vsw_rw); 581 582 /* setup the port list */ 583 rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL); 584 vswp->plist.head = NULL; 585 586 progress |= PROG_plist; 587 588 /* 589 * Create the taskq which will process all the VIO 590 * control messages. 591 */ 592 (void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance); 593 if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1, 594 TASKQ_DEFAULTPRI, 0)) == NULL) { 595 cmn_err(CE_WARN, "Unable to create task queue"); 596 goto vsw_attach_fail; 597 } 598 599 progress |= PROG_taskq; 600 601 /* select best switching mode */ 602 for (i = 0; i < NUM_SMODES; i++) { 603 smode = vswp->smode[i]; 604 switch (smode) { 605 case VSW_LAYER2: 606 rv = vsw_setup_layer2(vswp); 607 break; 608 609 case VSW_LAYER2_PROMISC: 610 rv = vsw_setup_layer2(vswp); 611 break; 612 613 case VSW_LAYER3: 614 rv = vsw_setup_layer3(vswp); 615 break; 616 617 default: 618 DERR(vswp, "unknown switch mode"); 619 break; 620 } 621 622 if (rv == 0) { 623 vswp->smode_idx = i; 624 break; 625 } 626 } 627 628 if (rv == 1) { 629 cmn_err(CE_WARN, "Unable to setup switching mode"); 630 goto vsw_attach_fail; 631 } 632 633 D2(vswp, "Operating in mode %d", vswp->smode[vswp->smode_idx]); 634 635 /* 636 * Register with the MAC layer as a network device so 637 * we can be plumbed if desired. 638 * 639 * Do this in both layer 2 and layer 3 mode. 640 */ 641 vswp->if_state &= ~VSW_IF_UP; 642 if (vswp->mdprops & VSW_MD_MACADDR) { 643 if (vsw_mac_register(vswp) != 0) { 644 cmn_err(CE_WARN, "Unable to register as provider " 645 " with MAC layer, continuing with attach"); 646 } 647 } 648 649 /* prevent auto-detaching */ 650 if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip, 651 DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) { 652 cmn_err(CE_NOTE, "Unable to set \"%s\" property for " 653 "instance %u", DDI_NO_AUTODETACH, instance); 654 } 655 656 /* 657 * Now we have everything setup, register for MD change 658 * events. 659 */ 660 vsw_mdeg_register(vswp); 661 662 return (DDI_SUCCESS); 663 664 vsw_attach_fail: 665 DERR(NULL, "vsw_attach: failed"); 666 667 if (progress & PROG_taskq) 668 ddi_taskq_destroy(vswp->taskq_p); 669 670 if (progress & PROG_plist) 671 rw_destroy(&vswp->plist.lockrw); 672 673 if (progress & PROG_report_dev) { 674 ddi_remove_minor_node(dip, NULL); 675 mutex_destroy(&vswp->mca_lock); 676 } 677 678 if (progress & PROG_mfdb) { 679 mod_hash_destroy_hash(vswp->mfdb); 680 vswp->mfdb = NULL; 681 rw_destroy(&vswp->mfdbrw); 682 } 683 684 if (progress & PROG_fdb) { 685 mod_hash_destroy_hash(vswp->fdb); 686 vswp->fdb = NULL; 687 } 688 689 if (progress & PROG_if_lock) 690 rw_destroy(&vswp->if_lockrw); 691 692 ddi_soft_state_free(vsw_state, instance); 693 return (DDI_FAILURE); 694 } 695 696 static int 697 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 698 { 699 vio_mblk_pool_t *poolp, *npoolp; 700 vsw_t **vswpp, *vswp; 701 int instance; 702 703 instance = ddi_get_instance(dip); 704 vswp = ddi_get_soft_state(vsw_state, instance); 705 706 if (vswp == NULL) { 707 return (DDI_FAILURE); 708 } 709 710 switch (cmd) { 711 case DDI_DETACH: 712 break; 713 case DDI_SUSPEND: 714 case DDI_PM_SUSPEND: 715 default: 716 return (DDI_FAILURE); 717 } 718 719 D2(vswp, "detaching instance %d", instance); 720 721 if (vswp->mdprops & VSW_MD_MACADDR) { 722 if (vsw_mac_unregister(vswp) != 0) { 723 cmn_err(CE_WARN, "Unable to detach from MAC layer"); 724 return (DDI_FAILURE); 725 } 726 rw_destroy(&vswp->if_lockrw); 727 } 728 729 vsw_mdeg_unregister(vswp); 730 731 if ((vswp->smode[vswp->smode_idx] == VSW_LAYER2) || 732 (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) { 733 vsw_mac_detach(vswp); 734 } 735 736 if (vsw_detach_ports(vswp) != 0) { 737 cmn_err(CE_WARN, "Unable to detach ports"); 738 return (DDI_FAILURE); 739 } 740 741 /* 742 * Destroy any free pools that may still exist. 743 */ 744 poolp = vswp->rxh; 745 while (poolp != NULL) { 746 npoolp = vswp->rxh = poolp->nextp; 747 if (vio_destroy_mblks(poolp) != 0) { 748 vswp->rxh = poolp; 749 return (DDI_FAILURE); 750 } 751 poolp = npoolp; 752 } 753 754 /* 755 * Remove this instance from any entries it may be on in 756 * the hash table by using the list of addresses maintained 757 * in the vsw_t structure. 758 */ 759 vsw_del_mcst_vsw(vswp); 760 761 vswp->mcap = NULL; 762 mutex_destroy(&vswp->mca_lock); 763 764 /* 765 * By now any pending tasks have finished and the underlying 766 * ldc's have been destroyed, so its safe to delete the control 767 * message taskq. 768 */ 769 if (vswp->taskq_p != NULL) 770 ddi_taskq_destroy(vswp->taskq_p); 771 772 /* 773 * At this stage all the data pointers in the hash table 774 * should be NULL, as all the ports have been removed and will 775 * have deleted themselves from the port lists which the data 776 * pointers point to. Hence we can destroy the table using the 777 * default destructors. 778 */ 779 D2(vswp, "vsw_detach: destroying hash tables.."); 780 mod_hash_destroy_hash(vswp->fdb); 781 vswp->fdb = NULL; 782 783 WRITE_ENTER(&vswp->mfdbrw); 784 mod_hash_destroy_hash(vswp->mfdb); 785 vswp->mfdb = NULL; 786 RW_EXIT(&vswp->mfdbrw); 787 rw_destroy(&vswp->mfdbrw); 788 789 ddi_remove_minor_node(dip, NULL); 790 791 rw_destroy(&vswp->plist.lockrw); 792 WRITE_ENTER(&vsw_rw); 793 for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) { 794 if (*vswpp == vswp) { 795 *vswpp = vswp->next; 796 break; 797 } 798 } 799 RW_EXIT(&vsw_rw); 800 ddi_soft_state_free(vsw_state, instance); 801 802 return (DDI_SUCCESS); 803 } 804 805 static int 806 vsw_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 807 { 808 _NOTE(ARGUNUSED(dip)) 809 810 vsw_t *vswp = NULL; 811 dev_t dev = (dev_t)arg; 812 int instance; 813 814 instance = getminor(dev); 815 816 switch (infocmd) { 817 case DDI_INFO_DEVT2DEVINFO: 818 if ((vswp = ddi_get_soft_state(vsw_state, instance)) == NULL) { 819 *result = NULL; 820 return (DDI_FAILURE); 821 } 822 *result = vswp->dip; 823 return (DDI_SUCCESS); 824 825 case DDI_INFO_DEVT2INSTANCE: 826 *result = (void *)(uintptr_t)instance; 827 return (DDI_SUCCESS); 828 829 default: 830 *result = NULL; 831 return (DDI_FAILURE); 832 } 833 } 834 835 /* 836 * Get the properties from our MD node. 837 */ 838 static void 839 vsw_get_md_properties(vsw_t *vswp) 840 { 841 md_t *mdp = NULL; 842 int num_nodes = 0; 843 int len = 0, listsz = 0; 844 int num_vdev = 0; 845 int i, idx; 846 boolean_t found_node = B_FALSE; 847 char *smode = NULL; 848 char *curr_mode = NULL; 849 char *physname = NULL; 850 char *node_name = NULL; 851 char *dev; 852 uint64_t macaddr = 0; 853 uint64_t md_inst, obp_inst; 854 mde_cookie_t *listp = NULL; 855 mde_cookie_t rootnode; 856 857 D1(vswp, "%s: enter", __func__); 858 859 /* 860 * Further down we compare the obp 'reg' property to the 861 * 'cfg-handle' property in the vsw MD node to determine 862 * if the node refers to this particular instance. So if 863 * we can't read the obp value then there is no point 864 * in proceeding further. 865 */ 866 if (ddi_prop_exists(DDI_DEV_T_ANY, vswp->dip, 867 DDI_PROP_DONTPASS, reg_propname) != 1) { 868 cmn_err(CE_WARN, "Unable to read %s property " 869 "from OBP device node", reg_propname); 870 return; 871 } 872 873 obp_inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip, 874 DDI_PROP_DONTPASS, reg_propname, 0); 875 876 D2(vswp, "%s: obp_inst 0x%llx", __func__, obp_inst); 877 878 if ((mdp = md_get_handle()) == NULL) { 879 DERR(vswp, "%s: unable to init MD", __func__); 880 return; 881 } 882 883 if ((num_nodes = md_node_count(mdp)) <= 0) { 884 DERR(vswp, "%s: invalid number of nodes found %d", 885 __func__, num_nodes); 886 (void) md_fini_handle(mdp); 887 return; 888 } 889 890 D2(vswp, "%s: %d nodes in total in MD", __func__, num_nodes); 891 892 /* allocate enough space for node list */ 893 listsz = num_nodes * sizeof (mde_cookie_t); 894 listp = kmem_zalloc(listsz, KM_SLEEP); 895 896 rootnode = md_root_node(mdp); 897 898 /* Get the list of virtual devices */ 899 num_vdev = md_scan_dag(mdp, rootnode, 900 md_find_name(mdp, vdev_propname), 901 md_find_name(mdp, "fwd"), listp); 902 903 if (num_vdev <= 0) { 904 DERR(vswp, "%s: didn't find any virtual-device nodes in MD", 905 __func__); 906 goto md_prop_exit; 907 } 908 909 D2(vswp, "%s: %d virtual-device nodes found", __func__, num_vdev); 910 911 /* Look for the virtual switch nodes in the list */ 912 for (idx = 0; idx < num_vdev; idx++) { 913 if (md_get_prop_str(mdp, listp[idx], 914 "name", &node_name) != 0) { 915 DERR(vswp, "%s: unable to get node name", __func__); 916 continue; 917 918 } 919 920 if (strcmp(node_name, vsw_propname) == 0) { 921 /* Virtual switch node */ 922 if (md_get_prop_val(mdp, listp[idx], 923 "cfg-handle", &md_inst) != 0) { 924 DERR(vswp, "%s: unable to get cfg-handle from" 925 " node %d", __func__, idx); 926 goto md_prop_exit; 927 } else if (md_inst == obp_inst) { 928 D2(vswp, "%s: found matching node (%d)" 929 " 0x%llx == 0x%llx", __func__, idx, 930 md_inst, obp_inst); 931 found_node = B_TRUE; 932 break; 933 } 934 } 935 } 936 937 if (!found_node) { 938 DWARN(vswp, "%s: couldn't find correct vsw node", __func__); 939 goto md_prop_exit; 940 } 941 942 /* 943 * Now, having found the correct node, get the various properties. 944 */ 945 946 if (md_get_prop_data(mdp, listp[idx], physdev_propname, 947 (uint8_t **)(&physname), &len) != 0) { 948 cmn_err(CE_WARN, "%s: unable to get name(s) of physical " 949 "device(s) from MD", __func__); 950 } else if ((strlen(physname) + 1) > LIFNAMSIZ) { 951 cmn_err(CE_WARN, "%s is too long a device name", physname); 952 } else { 953 (void) strncpy(vswp->physname, physname, strlen(physname) + 1); 954 vswp->mdprops |= VSW_MD_PHYSNAME; 955 D2(vswp, "%s: using first device specified (%s)", 956 __func__, vswp->physname); 957 } 958 959 #ifdef DEBUG 960 /* 961 * As a temporary measure to aid testing we check to see if there 962 * is a vsw.conf file present. If there is we use the value of the 963 * vsw_physname property in the file as the name of the physical 964 * device, overriding the value from the MD. 965 * 966 * There may be multiple devices listed, but for the moment 967 * we just use the first one. 968 */ 969 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0, 970 "vsw_physname", &dev) == DDI_PROP_SUCCESS) { 971 if ((strlen(dev) + 1) > LIFNAMSIZ) { 972 cmn_err(CE_WARN, "%s is too long a device name", dev); 973 } else { 974 cmn_err(CE_NOTE, "%s: using device name (%s) from " 975 "config file", __func__, dev); 976 977 (void) strncpy(vswp->physname, dev, strlen(dev) + 1); 978 vswp->mdprops |= VSW_MD_PHYSNAME; 979 } 980 981 ddi_prop_free(dev); 982 983 } 984 #endif 985 986 /* local mac address */ 987 if (md_get_prop_val(mdp, listp[idx], 988 macaddr_propname, &macaddr) != 0) { 989 cmn_err(CE_WARN, "%s: unable to get local MAC address", 990 __func__); 991 } else { 992 READ_ENTER(&vswp->if_lockrw); 993 for (i = ETHERADDRL - 1; i >= 0; i--) { 994 vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF; 995 macaddr >>= 8; 996 } 997 RW_EXIT(&vswp->if_lockrw); 998 vswp->mdprops |= VSW_MD_MACADDR; 999 } 1000 1001 /* 1002 * Get the switch-mode property. The modes are listed in 1003 * decreasing order of preference, i.e. prefered mode is 1004 * first item in list. 1005 */ 1006 len = 0; 1007 if (md_get_prop_data(mdp, listp[idx], smode_propname, 1008 (uint8_t **)(&smode), &len) != 0) { 1009 /* 1010 * Unable to get switch-mode property, so just use 1011 * default values which vswp->smode[] array has already 1012 * been pre-populated with, namely layer2. 1013 */ 1014 cmn_err(CE_WARN, "%s: unable to get switch mode property, " 1015 "defaulting to layer 2 mode", __func__); 1016 } else { 1017 i = 0; 1018 curr_mode = smode; 1019 /* 1020 * Modes of operation: 1021 * 'switched' - layer 2 switching, underlying HW in 1022 * non-promiscuous mode. 1023 * 'promiscuous' - layer 2 switching, underlying HW in 1024 * promiscuous mode. 1025 * 'routed' - layer 3 (i.e. IP) routing, underlying HW 1026 * in non-promiscuous mode. 1027 */ 1028 while ((curr_mode < (smode + len)) && (i < NUM_SMODES)) { 1029 D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode); 1030 if (strcmp(curr_mode, "switched") == 0) 1031 vswp->smode[i] = VSW_LAYER2; 1032 else if (strcmp(curr_mode, "promiscuous") == 0) 1033 vswp->smode[i] = VSW_LAYER2_PROMISC; 1034 else if (strcmp(curr_mode, "routed") == 0) 1035 vswp->smode[i] = VSW_LAYER3; 1036 else { 1037 DERR(vswp, "%s: unknown mode %s", 1038 __func__, curr_mode); 1039 /* default to layer 2 */ 1040 vswp->smode[i] = VSW_LAYER2; 1041 } 1042 curr_mode += strlen(curr_mode) + 1; 1043 i++; 1044 } 1045 1046 vswp->mdprops |= VSW_MD_SMODE; 1047 } 1048 1049 md_prop_exit: 1050 (void) md_fini_handle(mdp); 1051 1052 kmem_free(listp, listsz); 1053 1054 D1(vswp, "%s: exit", __func__); 1055 } 1056 1057 static int 1058 vsw_setup_layer2(vsw_t *vswp) 1059 { 1060 int rv = 0; 1061 1062 D1(vswp, "%s: enter", __func__); 1063 1064 vsw_switch_frame = vsw_switch_l2_frame; 1065 1066 /* 1067 * Attempt to link into the MAC layer so we can get 1068 * and send packets out over the physical adapter. 1069 */ 1070 if (vswp->mdprops & VSW_MD_PHYSNAME) { 1071 if (vsw_mac_attach(vswp) != 0) { 1072 /* 1073 * Registration with the MAC layer has failed, 1074 * so return 1 so that can fall back to next 1075 * prefered switching method. 1076 */ 1077 cmn_err(CE_WARN, "!unable to join as MAC layer " 1078 "client, continuing with attach"); 1079 rv = 1; 1080 } 1081 } else { 1082 /* No physical device name found in MD */ 1083 DERR(vswp, "%s: no physical device name specified", __func__); 1084 rv = 1; 1085 } 1086 1087 D1(vswp, "%s: exit", __func__); 1088 1089 return (rv); 1090 } 1091 1092 static int 1093 vsw_setup_layer3(vsw_t *vswp) 1094 { 1095 D1(vswp, "%s: enter", __func__); 1096 1097 D2(vswp, "%s: operating in layer 3 mode", __func__); 1098 vsw_switch_frame = vsw_switch_l3_frame; 1099 1100 D1(vswp, "%s: exit", __func__); 1101 1102 return (0); 1103 } 1104 1105 /* 1106 * Link into the MAC layer to gain access to the services provided by 1107 * the underlying physical device driver (which should also have 1108 * registered with the MAC layer). 1109 * 1110 * Only when in layer 2 mode. 1111 */ 1112 static int 1113 vsw_mac_attach(vsw_t *vswp) 1114 { 1115 char drv[LIFNAMSIZ]; 1116 uint_t ddi_instance; 1117 1118 D1(vswp, "vsw_mac_attach: enter"); 1119 1120 vswp->mh = NULL; 1121 vswp->mrh = NULL; 1122 vswp->mnh = NULL; 1123 1124 ASSERT(vswp->mdprops & VSW_MD_PHYSNAME); 1125 1126 if (ddi_parse(vswp->physname, drv, &ddi_instance) != DDI_SUCCESS) { 1127 cmn_err(CE_WARN, "invalid device name: %s", vswp->physname); 1128 goto mac_fail_exit; 1129 } 1130 if ((mac_open(vswp->physname, ddi_instance, &vswp->mh)) != 0) { 1131 cmn_err(CE_WARN, "mac_open %s failed", vswp->physname); 1132 goto mac_fail_exit; 1133 } 1134 1135 D2(vswp, "vsw_mac_attach: using device %s", vswp->physname); 1136 1137 /* register for changes in the interface */ 1138 vswp->mnh = mac_notify_add(vswp->mh, vsw_notify_cb, (void *)vswp); 1139 1140 /* register our rx callback function */ 1141 vswp->mrh = mac_rx_add(vswp->mh, vsw_rx_cb, (void *)vswp); 1142 1143 /* get the MAC tx fn */ 1144 vswp->txinfo = mac_tx_get(vswp->mh); 1145 1146 /* start the interface */ 1147 if (mac_start(vswp->mh) != 0) { 1148 cmn_err(CE_WARN, "could not start mac interface"); 1149 goto mac_fail_exit; 1150 } 1151 1152 /* get and store original promisc setting */ 1153 vswp->init_promisc = mac_promisc_get(vswp->mh, MAC_DEVPROMISC); 1154 1155 /* 1156 * FUTURE: When we have the ability to set multiple unicast 1157 * mac address then we won't have to set the device into 1158 * promisc mode, but for the moment its the only way we. 1159 * can see pkts that logical domains we are serving are 1160 * interested in. 1161 */ 1162 if ((vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC) && 1163 (vswp->init_promisc == B_FALSE)) { 1164 DERR(vswp, "vsw_mac_attach: enabling promisc mode.."); 1165 1166 if (mac_promisc_set(vswp->mh, B_TRUE, MAC_DEVPROMISC) != 0) { 1167 DERR(vswp, "vsw_mac_attach: unable to set device" 1168 " into promiscuous mode"); 1169 goto mac_fail_exit; 1170 } 1171 } 1172 1173 D1(vswp, "vsw_mac_attach: exit"); 1174 return (0); 1175 1176 mac_fail_exit: 1177 if (vswp->mh != NULL) { 1178 mac_promisc_set(vswp->mh, vswp->init_promisc, MAC_DEVPROMISC); 1179 if (vswp->mrh != NULL) 1180 mac_rx_remove(vswp->mh, vswp->mrh); 1181 1182 if (vswp->mnh != NULL) 1183 mac_notify_remove(vswp->mh, vswp->mnh); 1184 1185 mac_close(vswp->mh); 1186 } 1187 1188 vswp->mrh = NULL; 1189 vswp->mnh = NULL; 1190 vswp->mh = NULL; 1191 vswp->txinfo = NULL; 1192 1193 D1(vswp, "vsw_mac_attach: fail exit"); 1194 return (1); 1195 } 1196 1197 static void 1198 vsw_mac_detach(vsw_t *vswp) 1199 { 1200 D1(vswp, "vsw_mac_detach: enter"); 1201 1202 if (vswp->mh != NULL) { 1203 /* restore promisc to original setting */ 1204 mac_promisc_set(vswp->mh, vswp->init_promisc, MAC_DEVPROMISC); 1205 if (vswp->mrh != NULL) 1206 mac_rx_remove(vswp->mh, vswp->mrh); 1207 1208 if (vswp->mnh != NULL) 1209 mac_notify_remove(vswp->mh, vswp->mnh); 1210 1211 mac_close(vswp->mh); 1212 } 1213 1214 vswp->mrh = NULL; 1215 vswp->mnh = NULL; 1216 vswp->mh = NULL; 1217 vswp->txinfo = NULL; 1218 1219 D1(vswp, "vsw_mac_detach: exit"); 1220 } 1221 1222 /* 1223 * Get notified of changes to the interface. 1224 * 1225 * For the moment we brute force the interface back 1226 * into promisc mode if it is unset (e.g. by snoop). 1227 * When we have the ability to set multiple mac addresses, 1228 * we will need to see if this is necessary. 1229 */ 1230 static void 1231 vsw_notify_cb(void *arg, mac_notify_type_t type) 1232 { 1233 vsw_t *vswp = (vsw_t *)arg; 1234 1235 switch (type) { 1236 case MAC_NOTE_PROMISC: 1237 vswp->txinfo = mac_tx_get(vswp->mh); 1238 if (mac_promisc_get(vswp->mh, MAC_DEVPROMISC) == B_TRUE) { 1239 D2(vswp, "%s: still in PROMISC mode", __func__); 1240 } else { 1241 D2(vswp, "%s: now in NON-PROMISC mode", __func__); 1242 D2(vswp, "...re-enabling"); 1243 mac_promisc_set(vswp->mh, B_TRUE, MAC_DEVPROMISC); 1244 } 1245 break; 1246 default: 1247 break; 1248 } 1249 } 1250 1251 /* 1252 * receive callback routine. Invoked by MAC layer when there 1253 * are pkts being passed up from physical device. 1254 * 1255 * PERF: It may be more efficient when the card is in promisc 1256 * mode to check the dest address of the pkts here (against 1257 * the FDB) rather than checking later. Needs to be investigated. 1258 */ 1259 static void 1260 vsw_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp) 1261 { 1262 _NOTE(ARGUNUSED(mrh)) 1263 1264 vsw_t *vswp = (vsw_t *)arg; 1265 1266 ASSERT(vswp != NULL); 1267 1268 D1(vswp, "vsw_rx_cb: enter"); 1269 1270 /* switch the chain of packets received */ 1271 vsw_switch_frame(vswp, mp, VSW_PHYSDEV, NULL, NULL); 1272 1273 D1(vswp, "vsw_rx_cb: exit"); 1274 } 1275 1276 /* 1277 * Send a message out over the physical device via the MAC layer. 1278 * 1279 * Returns any mblks that it was unable to transmit. 1280 */ 1281 static mblk_t * 1282 vsw_tx_msg(vsw_t *vswp, mblk_t *mp) 1283 { 1284 const mac_txinfo_t *mtp; 1285 mblk_t *nextp; 1286 1287 if (vswp->mh == NULL) { 1288 DERR(vswp, "vsw_tx_msg: dropping pkts: no tx routine avail"); 1289 return (mp); 1290 } else { 1291 for (;;) { 1292 nextp = mp->b_next; 1293 mp->b_next = NULL; 1294 1295 mtp = vswp->txinfo; 1296 if ((mp = mtp->mt_fn(mtp->mt_arg, mp)) != NULL) { 1297 mp->b_next = nextp; 1298 break; 1299 } 1300 1301 if ((mp = nextp) == NULL) 1302 break; 1303 1304 } 1305 1306 } 1307 1308 return (mp); 1309 } 1310 1311 /* 1312 * Register with the MAC layer as a network device, so we 1313 * can be plumbed if necessary. 1314 */ 1315 static int 1316 vsw_mac_register(vsw_t *vswp) 1317 { 1318 mac_register_t *macp; 1319 int rv; 1320 1321 D1(vswp, "%s: enter", __func__); 1322 1323 if ((macp = mac_alloc(MAC_VERSION)) == NULL) 1324 return (EINVAL); 1325 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 1326 macp->m_driver = vswp; 1327 macp->m_dip = vswp->dip; 1328 macp->m_src_addr = (uint8_t *)&vswp->if_addr; 1329 macp->m_callbacks = &vsw_m_callbacks; 1330 macp->m_min_sdu = 0; 1331 macp->m_max_sdu = ETHERMTU; 1332 rv = mac_register(macp, &vswp->if_mh); 1333 mac_free(macp); 1334 if (rv == 0) 1335 vswp->if_state |= VSW_IF_REG; 1336 1337 D1(vswp, "%s: exit", __func__); 1338 1339 return (rv); 1340 } 1341 1342 static int 1343 vsw_mac_unregister(vsw_t *vswp) 1344 { 1345 int rv = 0; 1346 1347 D1(vswp, "%s: enter", __func__); 1348 1349 WRITE_ENTER(&vswp->if_lockrw); 1350 1351 if (vswp->if_state & VSW_IF_REG) { 1352 rv = mac_unregister(vswp->if_mh); 1353 if (rv != 0) { 1354 DWARN(vswp, "%s: unable to unregister from MAC " 1355 "framework", __func__); 1356 1357 RW_EXIT(&vswp->if_lockrw); 1358 D1(vswp, "%s: fail exit", __func__); 1359 return (rv); 1360 } 1361 1362 /* mark i/f as down and unregistered */ 1363 vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG); 1364 } 1365 RW_EXIT(&vswp->if_lockrw); 1366 1367 vswp->mdprops &= ~VSW_MD_MACADDR; 1368 1369 D1(vswp, "%s: exit", __func__); 1370 1371 return (rv); 1372 } 1373 1374 static int 1375 vsw_m_stat(void *arg, uint_t stat, uint64_t *val) 1376 { 1377 vsw_t *vswp = (vsw_t *)arg; 1378 1379 D1(vswp, "%s: enter", __func__); 1380 1381 if (vswp->mh == NULL) 1382 return (EINVAL); 1383 1384 /* return stats from underlying device */ 1385 *val = mac_stat_get(vswp->mh, stat); 1386 return (0); 1387 } 1388 1389 static void 1390 vsw_m_stop(void *arg) 1391 { 1392 vsw_t *vswp = (vsw_t *)arg; 1393 1394 D1(vswp, "%s: enter", __func__); 1395 1396 WRITE_ENTER(&vswp->if_lockrw); 1397 vswp->if_state &= ~VSW_IF_UP; 1398 RW_EXIT(&vswp->if_lockrw); 1399 1400 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 1401 } 1402 1403 static int 1404 vsw_m_start(void *arg) 1405 { 1406 vsw_t *vswp = (vsw_t *)arg; 1407 1408 D1(vswp, "%s: enter", __func__); 1409 1410 WRITE_ENTER(&vswp->if_lockrw); 1411 vswp->if_state |= VSW_IF_UP; 1412 RW_EXIT(&vswp->if_lockrw); 1413 1414 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 1415 return (0); 1416 } 1417 1418 /* 1419 * Change the local interface address. 1420 */ 1421 static int 1422 vsw_m_unicst(void *arg, const uint8_t *macaddr) 1423 { 1424 vsw_t *vswp = (vsw_t *)arg; 1425 1426 D1(vswp, "%s: enter", __func__); 1427 1428 WRITE_ENTER(&vswp->if_lockrw); 1429 ether_copy(macaddr, &vswp->if_addr); 1430 RW_EXIT(&vswp->if_lockrw); 1431 1432 D1(vswp, "%s: exit", __func__); 1433 1434 return (0); 1435 } 1436 1437 static int 1438 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca) 1439 { 1440 vsw_t *vswp = (vsw_t *)arg; 1441 mcst_addr_t *mcst_p = NULL; 1442 uint64_t addr = 0x0; 1443 int i; 1444 1445 D1(vswp, "%s: enter", __func__); 1446 1447 /* 1448 * Convert address into form that can be used 1449 * as hash table key. 1450 */ 1451 for (i = 0; i < ETHERADDRL; i++) { 1452 addr = (addr << 8) | mca[i]; 1453 } 1454 1455 D2(vswp, "%s: addr = 0x%llx", __func__, addr); 1456 1457 if (add) { 1458 D2(vswp, "%s: adding multicast", __func__); 1459 if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 1460 /* 1461 * Update the list of multicast addresses 1462 * contained within the vsw_t structure to 1463 * include this new one. 1464 */ 1465 mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP); 1466 if (mcst_p == NULL) { 1467 DERR(vswp, "%s unable to alloc mem", __func__); 1468 return (1); 1469 } 1470 mcst_p->addr = addr; 1471 1472 mutex_enter(&vswp->mca_lock); 1473 mcst_p->nextp = vswp->mcap; 1474 vswp->mcap = mcst_p; 1475 mutex_exit(&vswp->mca_lock); 1476 1477 /* 1478 * Call into the underlying driver to program the 1479 * address into HW. 1480 * 1481 * Note: 1482 * Can safely ignore the return value as the card 1483 * will for the moment always be in promisc mode. 1484 * When we can program multiple MAC addresses into the 1485 * HW then we will need to care about the return 1486 * value here. 1487 */ 1488 if (vswp->mh != NULL) 1489 (void) mac_multicst_add(vswp->mh, mca); 1490 } 1491 } else { 1492 D2(vswp, "%s: removing multicast", __func__); 1493 /* 1494 * Remove the address from the hash table.. 1495 */ 1496 if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 1497 1498 /* 1499 * ..and then from the list maintained in the 1500 * vsw_t structure. 1501 */ 1502 vsw_del_addr(VSW_LOCALDEV, vswp, addr); 1503 1504 if (vswp->mh != NULL) 1505 (void) mac_multicst_remove(vswp->mh, mca); 1506 } 1507 } 1508 1509 D1(vswp, "%s: exit", __func__); 1510 1511 return (0); 1512 } 1513 1514 static int 1515 vsw_m_promisc(void *arg, boolean_t on) 1516 { 1517 vsw_t *vswp = (vsw_t *)arg; 1518 1519 D1(vswp, "%s: enter", __func__); 1520 1521 WRITE_ENTER(&vswp->if_lockrw); 1522 if (on) 1523 vswp->if_state |= VSW_IF_PROMISC; 1524 else 1525 vswp->if_state &= ~VSW_IF_PROMISC; 1526 RW_EXIT(&vswp->if_lockrw); 1527 1528 D1(vswp, "%s: exit", __func__); 1529 1530 return (0); 1531 } 1532 1533 static mblk_t * 1534 vsw_m_tx(void *arg, mblk_t *mp) 1535 { 1536 vsw_t *vswp = (vsw_t *)arg; 1537 1538 D1(vswp, "%s: enter", __func__); 1539 1540 vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL); 1541 1542 D1(vswp, "%s: exit", __func__); 1543 1544 return (NULL); 1545 } 1546 1547 /* 1548 * Register for machine description (MD) updates. 1549 */ 1550 static void 1551 vsw_mdeg_register(vsw_t *vswp) 1552 { 1553 mdeg_prop_spec_t *pspecp; 1554 mdeg_node_spec_t *inst_specp; 1555 mdeg_handle_t mdeg_hdl; 1556 size_t templatesz; 1557 int inst, rv; 1558 1559 D1(vswp, "%s: enter", __func__); 1560 1561 inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip, 1562 DDI_PROP_DONTPASS, reg_propname, -1); 1563 if (inst == -1) { 1564 DERR(vswp, "%s: unable to get %s property", 1565 __func__, reg_propname); 1566 return; 1567 } 1568 1569 D2(vswp, "%s: instance %d registering with mdeg", __func__, inst); 1570 1571 /* 1572 * Allocate and initialize a per-instance copy 1573 * of the global property spec array that will 1574 * uniquely identify this vsw instance. 1575 */ 1576 templatesz = sizeof (vsw_prop_template); 1577 pspecp = kmem_zalloc(templatesz, KM_SLEEP); 1578 1579 bcopy(vsw_prop_template, pspecp, templatesz); 1580 1581 VSW_SET_MDEG_PROP_INST(pspecp, inst); 1582 1583 /* initialize the complete prop spec structure */ 1584 inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP); 1585 inst_specp->namep = "virtual-device"; 1586 inst_specp->specp = pspecp; 1587 1588 /* perform the registration */ 1589 rv = mdeg_register(inst_specp, &vport_match, vsw_mdeg_cb, 1590 (void *)vswp, &mdeg_hdl); 1591 1592 if (rv != MDEG_SUCCESS) { 1593 DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv); 1594 kmem_free(inst_specp, sizeof (mdeg_node_spec_t)); 1595 kmem_free(pspecp, templatesz); 1596 return; 1597 } 1598 1599 /* save off data that will be needed later */ 1600 vswp->inst_spec = inst_specp; 1601 vswp->mdeg_hdl = mdeg_hdl; 1602 1603 D1(vswp, "%s: exit", __func__); 1604 } 1605 1606 static void 1607 vsw_mdeg_unregister(vsw_t *vswp) 1608 { 1609 D1(vswp, "vsw_mdeg_unregister: enter"); 1610 1611 (void) mdeg_unregister(vswp->mdeg_hdl); 1612 1613 if (vswp->inst_spec->specp != NULL) { 1614 (void) kmem_free(vswp->inst_spec->specp, 1615 sizeof (vsw_prop_template)); 1616 vswp->inst_spec->specp = NULL; 1617 } 1618 1619 if (vswp->inst_spec != NULL) { 1620 (void) kmem_free(vswp->inst_spec, 1621 sizeof (mdeg_node_spec_t)); 1622 vswp->inst_spec = NULL; 1623 } 1624 1625 D1(vswp, "vsw_mdeg_unregister: exit"); 1626 } 1627 1628 static int 1629 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 1630 { 1631 vsw_t *vswp; 1632 int idx; 1633 md_t *mdp; 1634 mde_cookie_t node; 1635 uint64_t inst; 1636 1637 if (resp == NULL) 1638 return (MDEG_FAILURE); 1639 1640 vswp = (vsw_t *)cb_argp; 1641 1642 D1(vswp, "%s: added %d : removed %d : matched %d", 1643 __func__, resp->added.nelem, resp->removed.nelem, 1644 resp->match_prev.nelem); 1645 1646 /* process added ports */ 1647 for (idx = 0; idx < resp->added.nelem; idx++) { 1648 mdp = resp->added.mdp; 1649 node = resp->added.mdep[idx]; 1650 1651 D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node); 1652 1653 if (vsw_port_add(vswp, mdp, &node) != 0) { 1654 cmn_err(CE_WARN, "Unable to add new port (0x%lx)", 1655 node); 1656 } 1657 } 1658 1659 /* process removed ports */ 1660 for (idx = 0; idx < resp->removed.nelem; idx++) { 1661 mdp = resp->removed.mdp; 1662 node = resp->removed.mdep[idx]; 1663 1664 if (md_get_prop_val(mdp, node, id_propname, &inst)) { 1665 DERR(vswp, "%s: prop(%s) not found port(%d)", 1666 __func__, id_propname, idx); 1667 continue; 1668 } 1669 1670 D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node); 1671 1672 if (vsw_port_detach(vswp, inst) != 0) { 1673 cmn_err(CE_WARN, "Unable to remove port %ld", inst); 1674 } 1675 } 1676 1677 /* 1678 * Currently no support for updating already active ports. 1679 * So, ignore the match_curr and match_priv arrays for now. 1680 */ 1681 1682 D1(vswp, "%s: exit", __func__); 1683 1684 return (MDEG_SUCCESS); 1685 } 1686 1687 /* 1688 * Add a new port to the system. 1689 * 1690 * Returns 0 on success, 1 on failure. 1691 */ 1692 int 1693 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node) 1694 { 1695 uint64_t ldc_id; 1696 uint8_t *addrp; 1697 int i, addrsz; 1698 int num_nodes = 0, nchan = 0; 1699 int listsz = 0; 1700 mde_cookie_t *listp = NULL; 1701 struct ether_addr ea; 1702 uint64_t macaddr; 1703 uint64_t inst = 0; 1704 vsw_port_t *port; 1705 1706 if (md_get_prop_val(mdp, *node, id_propname, &inst)) { 1707 DWARN(vswp, "%s: prop(%s) not found", __func__, 1708 id_propname); 1709 return (1); 1710 } 1711 1712 /* 1713 * Find the channel endpoint node(s) (which should be under this 1714 * port node) which contain the channel id(s). 1715 */ 1716 if ((num_nodes = md_node_count(mdp)) <= 0) { 1717 DERR(vswp, "%s: invalid number of nodes found (%d)", 1718 __func__, num_nodes); 1719 return (1); 1720 } 1721 1722 /* allocate enough space for node list */ 1723 listsz = num_nodes * sizeof (mde_cookie_t); 1724 listp = kmem_zalloc(listsz, KM_SLEEP); 1725 1726 nchan = md_scan_dag(mdp, *node, 1727 md_find_name(mdp, chan_propname), 1728 md_find_name(mdp, "fwd"), listp); 1729 1730 if (nchan <= 0) { 1731 DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname); 1732 kmem_free(listp, listsz); 1733 return (1); 1734 } 1735 1736 D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname); 1737 1738 /* use property from first node found */ 1739 if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) { 1740 DWARN(vswp, "%s: prop(%s) not found\n", __func__, 1741 id_propname); 1742 kmem_free(listp, listsz); 1743 return (1); 1744 } 1745 1746 /* don't need list any more */ 1747 kmem_free(listp, listsz); 1748 1749 D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id); 1750 1751 /* read mac-address property */ 1752 if (md_get_prop_data(mdp, *node, remaddr_propname, 1753 &addrp, &addrsz)) { 1754 DWARN(vswp, "%s: prop(%s) not found", 1755 __func__, remaddr_propname); 1756 return (1); 1757 } 1758 1759 if (addrsz < ETHERADDRL) { 1760 DWARN(vswp, "%s: invalid address size", __func__); 1761 return (1); 1762 } 1763 1764 macaddr = *((uint64_t *)addrp); 1765 D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr); 1766 1767 for (i = ETHERADDRL - 1; i >= 0; i--) { 1768 ea.ether_addr_octet[i] = macaddr & 0xFF; 1769 macaddr >>= 8; 1770 } 1771 1772 if (vsw_port_attach(vswp, (int)inst, &ldc_id, 1, &ea) != 0) { 1773 DERR(vswp, "%s: failed to attach port", __func__); 1774 return (1); 1775 } 1776 1777 port = vsw_lookup_port(vswp, (int)inst); 1778 1779 /* just successfuly created the port, so it should exist */ 1780 ASSERT(port != NULL); 1781 1782 return (0); 1783 } 1784 1785 /* 1786 * Attach the specified port. 1787 * 1788 * Returns 0 on success, 1 on failure. 1789 */ 1790 static int 1791 vsw_port_attach(vsw_t *vswp, int p_instance, uint64_t *ldcids, int nids, 1792 struct ether_addr *macaddr) 1793 { 1794 vsw_port_list_t *plist = &vswp->plist; 1795 vsw_port_t *port, **prev_port; 1796 int i; 1797 1798 D1(vswp, "%s: enter : port %d", __func__, p_instance); 1799 1800 /* port already exists? */ 1801 READ_ENTER(&plist->lockrw); 1802 for (port = plist->head; port != NULL; port = port->p_next) { 1803 if (port->p_instance == p_instance) { 1804 DWARN(vswp, "%s: port instance %d already attached", 1805 __func__, p_instance); 1806 RW_EXIT(&plist->lockrw); 1807 return (1); 1808 } 1809 } 1810 RW_EXIT(&plist->lockrw); 1811 1812 port = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP); 1813 port->p_vswp = vswp; 1814 port->p_instance = p_instance; 1815 port->p_ldclist.num_ldcs = 0; 1816 port->p_ldclist.head = NULL; 1817 1818 rw_init(&port->p_ldclist.lockrw, NULL, RW_DRIVER, NULL); 1819 1820 mutex_init(&port->tx_lock, NULL, MUTEX_DRIVER, NULL); 1821 mutex_init(&port->mca_lock, NULL, MUTEX_DRIVER, NULL); 1822 1823 mutex_init(&port->ref_lock, NULL, MUTEX_DRIVER, NULL); 1824 cv_init(&port->ref_cv, NULL, CV_DRIVER, NULL); 1825 1826 mutex_init(&port->state_lock, NULL, MUTEX_DRIVER, NULL); 1827 cv_init(&port->state_cv, NULL, CV_DRIVER, NULL); 1828 port->state = VSW_PORT_INIT; 1829 1830 if (nids > VSW_PORT_MAX_LDCS) { 1831 D2(vswp, "%s: using first of %d ldc ids", 1832 __func__, nids); 1833 nids = VSW_PORT_MAX_LDCS; 1834 } 1835 1836 D2(vswp, "%s: %d nids", __func__, nids); 1837 for (i = 0; i < nids; i++) { 1838 D2(vswp, "%s: ldcid (%llx)", __func__, (uint64_t)ldcids[i]); 1839 if (vsw_ldc_attach(port, (uint64_t)ldcids[i]) != 0) { 1840 DERR(vswp, "%s: ldc_attach failed", __func__); 1841 1842 rw_destroy(&port->p_ldclist.lockrw); 1843 1844 cv_destroy(&port->ref_cv); 1845 mutex_destroy(&port->ref_lock); 1846 1847 cv_destroy(&port->state_cv); 1848 mutex_destroy(&port->state_lock); 1849 1850 mutex_destroy(&port->tx_lock); 1851 mutex_destroy(&port->mca_lock); 1852 kmem_free(port, sizeof (vsw_port_t)); 1853 return (1); 1854 } 1855 } 1856 1857 ether_copy(macaddr, &port->p_macaddr); 1858 1859 WRITE_ENTER(&plist->lockrw); 1860 1861 /* create the fdb entry for this port/mac address */ 1862 (void) vsw_add_fdb(vswp, port); 1863 1864 /* link it into the list of ports for this vsw instance */ 1865 prev_port = (vsw_port_t **)(&plist->head); 1866 port->p_next = *prev_port; 1867 *prev_port = port; 1868 plist->num_ports++; 1869 RW_EXIT(&plist->lockrw); 1870 1871 /* 1872 * Initialise the port and any ldc's under it. 1873 */ 1874 (void) vsw_init_ldcs(port); 1875 1876 D1(vswp, "%s: exit", __func__); 1877 return (0); 1878 } 1879 1880 /* 1881 * Detach the specified port. 1882 * 1883 * Returns 0 on success, 1 on failure. 1884 */ 1885 static int 1886 vsw_port_detach(vsw_t *vswp, int p_instance) 1887 { 1888 vsw_port_t *port = NULL; 1889 vsw_port_list_t *plist = &vswp->plist; 1890 1891 D1(vswp, "%s: enter: port id %d", __func__, p_instance); 1892 1893 WRITE_ENTER(&plist->lockrw); 1894 1895 if ((port = vsw_lookup_port(vswp, p_instance)) == NULL) { 1896 RW_EXIT(&plist->lockrw); 1897 return (1); 1898 } 1899 1900 if (vsw_plist_del_node(vswp, port)) { 1901 RW_EXIT(&plist->lockrw); 1902 return (1); 1903 } 1904 1905 /* Remove the fdb entry for this port/mac address */ 1906 (void) vsw_del_fdb(vswp, port); 1907 1908 /* Remove any multicast addresses.. */ 1909 vsw_del_mcst_port(port); 1910 1911 /* 1912 * No longer need to hold lock on port list now that we 1913 * have unlinked the target port from the list. 1914 */ 1915 RW_EXIT(&plist->lockrw); 1916 1917 if (vsw_port_delete(port)) { 1918 return (1); 1919 } 1920 1921 D1(vswp, "%s: exit: p_instance(%d)", __func__, p_instance); 1922 return (0); 1923 } 1924 1925 /* 1926 * Detach all active ports. 1927 * 1928 * Returns 0 on success, 1 on failure. 1929 */ 1930 static int 1931 vsw_detach_ports(vsw_t *vswp) 1932 { 1933 vsw_port_list_t *plist = &vswp->plist; 1934 vsw_port_t *port = NULL; 1935 1936 D1(vswp, "%s: enter", __func__); 1937 1938 WRITE_ENTER(&plist->lockrw); 1939 1940 while ((port = plist->head) != NULL) { 1941 if (vsw_plist_del_node(vswp, port)) { 1942 DERR(vswp, "%s: Error deleting port %d" 1943 " from port list", __func__, 1944 port->p_instance); 1945 RW_EXIT(&plist->lockrw); 1946 return (1); 1947 } 1948 1949 /* Remove the fdb entry for this port/mac address */ 1950 (void) vsw_del_fdb(vswp, port); 1951 1952 /* Remove any multicast addresses.. */ 1953 vsw_del_mcst_port(port); 1954 1955 /* 1956 * No longer need to hold the lock on the port list 1957 * now that we have unlinked the target port from the 1958 * list. 1959 */ 1960 RW_EXIT(&plist->lockrw); 1961 if (vsw_port_delete(port)) { 1962 DERR(vswp, "%s: Error deleting port %d", 1963 __func__, port->p_instance); 1964 return (1); 1965 } 1966 WRITE_ENTER(&plist->lockrw); 1967 } 1968 RW_EXIT(&plist->lockrw); 1969 1970 D1(vswp, "%s: exit", __func__); 1971 1972 return (0); 1973 } 1974 1975 /* 1976 * Delete the specified port. 1977 * 1978 * Returns 0 on success, 1 on failure. 1979 */ 1980 static int 1981 vsw_port_delete(vsw_port_t *port) 1982 { 1983 vsw_ldc_list_t *ldcl; 1984 vsw_t *vswp = port->p_vswp; 1985 1986 D1(vswp, "%s: enter : port id %d", __func__, port->p_instance); 1987 1988 (void) vsw_uninit_ldcs(port); 1989 1990 /* 1991 * Wait for any pending ctrl msg tasks which reference this 1992 * port to finish. 1993 */ 1994 if (vsw_drain_port_taskq(port)) 1995 return (1); 1996 1997 /* 1998 * Wait for port reference count to hit zero. 1999 */ 2000 mutex_enter(&port->ref_lock); 2001 while (port->ref_cnt != 0) 2002 cv_wait(&port->ref_cv, &port->ref_lock); 2003 mutex_exit(&port->ref_lock); 2004 2005 /* 2006 * Wait for any active callbacks to finish 2007 */ 2008 if (vsw_drain_ldcs(port)) 2009 return (1); 2010 2011 ldcl = &port->p_ldclist; 2012 WRITE_ENTER(&ldcl->lockrw); 2013 while (ldcl->num_ldcs > 0) { 2014 if (vsw_ldc_detach(port, ldcl->head->ldc_id) != 0) {; 2015 cmn_err(CE_WARN, "unable to detach ldc %ld", 2016 ldcl->head->ldc_id); 2017 RW_EXIT(&ldcl->lockrw); 2018 return (1); 2019 } 2020 } 2021 RW_EXIT(&ldcl->lockrw); 2022 2023 rw_destroy(&port->p_ldclist.lockrw); 2024 2025 mutex_destroy(&port->mca_lock); 2026 mutex_destroy(&port->tx_lock); 2027 cv_destroy(&port->ref_cv); 2028 mutex_destroy(&port->ref_lock); 2029 2030 cv_destroy(&port->state_cv); 2031 mutex_destroy(&port->state_lock); 2032 2033 kmem_free(port, sizeof (vsw_port_t)); 2034 2035 D1(vswp, "%s: exit", __func__); 2036 2037 return (0); 2038 } 2039 2040 /* 2041 * Attach a logical domain channel (ldc) under a specified port. 2042 * 2043 * Returns 0 on success, 1 on failure. 2044 */ 2045 static int 2046 vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id) 2047 { 2048 vsw_t *vswp = port->p_vswp; 2049 vsw_ldc_list_t *ldcl = &port->p_ldclist; 2050 vsw_ldc_t *ldcp = NULL; 2051 ldc_attr_t attr; 2052 ldc_status_t istatus; 2053 int status = DDI_FAILURE; 2054 int rv; 2055 2056 D1(vswp, "%s: enter", __func__); 2057 2058 ldcp = kmem_zalloc(sizeof (vsw_ldc_t), KM_NOSLEEP); 2059 if (ldcp == NULL) { 2060 DERR(vswp, "%s: kmem_zalloc failed", __func__); 2061 return (1); 2062 } 2063 ldcp->ldc_id = ldc_id; 2064 2065 /* allocate pool of receive mblks */ 2066 rv = vio_create_mblks(vsw_num_mblks, vsw_mblk_size, &(ldcp->rxh)); 2067 if (rv) { 2068 DWARN(vswp, "%s: unable to create free mblk pool for" 2069 " channel %ld (rv %d)", __func__, ldc_id, rv); 2070 kmem_free(ldcp, sizeof (vsw_ldc_t)); 2071 return (1); 2072 } 2073 2074 mutex_init(&ldcp->ldc_txlock, NULL, MUTEX_DRIVER, NULL); 2075 mutex_init(&ldcp->ldc_cblock, NULL, MUTEX_DRIVER, NULL); 2076 mutex_init(&ldcp->drain_cv_lock, NULL, MUTEX_DRIVER, NULL); 2077 cv_init(&ldcp->drain_cv, NULL, CV_DRIVER, NULL); 2078 2079 /* required for handshake with peer */ 2080 ldcp->local_session = (uint64_t)ddi_get_lbolt(); 2081 ldcp->peer_session = 0; 2082 ldcp->session_status = 0; 2083 2084 mutex_init(&ldcp->hss_lock, NULL, MUTEX_DRIVER, NULL); 2085 ldcp->hss_id = 1; /* Initial handshake session id */ 2086 2087 /* only set for outbound lane, inbound set by peer */ 2088 mutex_init(&ldcp->lane_in.seq_lock, NULL, MUTEX_DRIVER, NULL); 2089 mutex_init(&ldcp->lane_out.seq_lock, NULL, MUTEX_DRIVER, NULL); 2090 vsw_set_lane_attr(vswp, &ldcp->lane_out); 2091 2092 attr.devclass = LDC_DEV_NT_SVC; 2093 attr.instance = ddi_get_instance(vswp->dip); 2094 attr.mode = LDC_MODE_UNRELIABLE; 2095 attr.qlen = VSW_LDC_QLEN; 2096 status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle); 2097 if (status != 0) { 2098 DERR(vswp, "%s(%lld): ldc_init failed, rv (%d)", 2099 __func__, ldc_id, status); 2100 goto ldc_attach_fail; 2101 } 2102 2103 status = ldc_reg_callback(ldcp->ldc_handle, vsw_ldc_cb, (caddr_t)ldcp); 2104 if (status != 0) { 2105 DERR(vswp, "%s(%lld): ldc_reg_callback failed, rv (%d)", 2106 __func__, ldc_id, status); 2107 (void) ldc_fini(ldcp->ldc_handle); 2108 goto ldc_attach_fail; 2109 } 2110 2111 2112 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 2113 DERR(vswp, "%s: ldc_status failed", __func__); 2114 return (1); 2115 } 2116 2117 ldcp->ldc_status = istatus; 2118 ldcp->ldc_port = port; 2119 ldcp->ldc_vswp = vswp; 2120 2121 /* link it into the list of channels for this port */ 2122 WRITE_ENTER(&ldcl->lockrw); 2123 ldcp->ldc_next = ldcl->head; 2124 ldcl->head = ldcp; 2125 ldcl->num_ldcs++; 2126 RW_EXIT(&ldcl->lockrw); 2127 2128 D1(vswp, "%s: exit", __func__); 2129 return (0); 2130 2131 ldc_attach_fail: 2132 mutex_destroy(&ldcp->ldc_txlock); 2133 mutex_destroy(&ldcp->ldc_cblock); 2134 2135 cv_destroy(&ldcp->drain_cv); 2136 2137 if (ldcp->rxh != NULL) { 2138 if (vio_destroy_mblks(ldcp->rxh) != 0) { 2139 /* 2140 * Something odd has happened, as the destroy 2141 * will only fail if some mblks have been allocated 2142 * from the pool already (which shouldn't happen) 2143 * and have not been returned. 2144 * 2145 * Add the pool pointer to a list maintained in 2146 * the device instance. Another attempt will be made 2147 * to free the pool when the device itself detaches. 2148 */ 2149 cmn_err(CE_WARN, "Creation of ldc channel %ld failed" 2150 " and cannot destroy associated mblk pool", 2151 ldc_id); 2152 ldcp->rxh->nextp = vswp->rxh; 2153 vswp->rxh = ldcp->rxh; 2154 } 2155 } 2156 mutex_destroy(&ldcp->drain_cv_lock); 2157 mutex_destroy(&ldcp->hss_lock); 2158 2159 mutex_destroy(&ldcp->lane_in.seq_lock); 2160 mutex_destroy(&ldcp->lane_out.seq_lock); 2161 kmem_free(ldcp, sizeof (vsw_ldc_t)); 2162 2163 return (1); 2164 } 2165 2166 /* 2167 * Detach a logical domain channel (ldc) belonging to a 2168 * particular port. 2169 * 2170 * Returns 0 on success, 1 on failure. 2171 */ 2172 static int 2173 vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id) 2174 { 2175 vsw_t *vswp = port->p_vswp; 2176 vsw_ldc_t *ldcp, *prev_ldcp; 2177 vsw_ldc_list_t *ldcl = &port->p_ldclist; 2178 int rv; 2179 2180 prev_ldcp = ldcl->head; 2181 for (; (ldcp = prev_ldcp) != NULL; prev_ldcp = ldcp->ldc_next) { 2182 if (ldcp->ldc_id == ldc_id) { 2183 break; 2184 } 2185 } 2186 2187 /* specified ldc id not found */ 2188 if (ldcp == NULL) { 2189 DERR(vswp, "%s: ldcp = NULL", __func__); 2190 return (1); 2191 } 2192 2193 D2(vswp, "%s: detaching channel %lld", __func__, ldcp->ldc_id); 2194 2195 /* 2196 * Before we can close the channel we must release any mapped 2197 * resources (e.g. drings). 2198 */ 2199 vsw_free_lane_resources(ldcp, INBOUND); 2200 vsw_free_lane_resources(ldcp, OUTBOUND); 2201 2202 /* 2203 * If the close fails we are in serious trouble, as won't 2204 * be able to delete the parent port. 2205 */ 2206 if ((rv = ldc_close(ldcp->ldc_handle)) != 0) { 2207 DERR(vswp, "%s: error %d closing channel %lld", 2208 __func__, rv, ldcp->ldc_id); 2209 return (1); 2210 } 2211 2212 (void) ldc_fini(ldcp->ldc_handle); 2213 2214 ldcp->ldc_status = LDC_INIT; 2215 ldcp->ldc_handle = NULL; 2216 ldcp->ldc_vswp = NULL; 2217 2218 if (ldcp->rxh != NULL) { 2219 if (vio_destroy_mblks(ldcp->rxh)) { 2220 /* 2221 * Mostly likely some mblks are still in use and 2222 * have not been returned to the pool. Add the pool 2223 * to the list maintained in the device instance. 2224 * Another attempt will be made to destroy the pool 2225 * when the device detaches. 2226 */ 2227 ldcp->rxh->nextp = vswp->rxh; 2228 vswp->rxh = ldcp->rxh; 2229 } 2230 } 2231 2232 mutex_destroy(&ldcp->ldc_txlock); 2233 mutex_destroy(&ldcp->ldc_cblock); 2234 cv_destroy(&ldcp->drain_cv); 2235 mutex_destroy(&ldcp->drain_cv_lock); 2236 mutex_destroy(&ldcp->hss_lock); 2237 mutex_destroy(&ldcp->lane_in.seq_lock); 2238 mutex_destroy(&ldcp->lane_out.seq_lock); 2239 2240 /* unlink it from the list */ 2241 prev_ldcp = ldcp->ldc_next; 2242 ldcl->num_ldcs--; 2243 kmem_free(ldcp, sizeof (vsw_ldc_t)); 2244 2245 return (0); 2246 } 2247 2248 /* 2249 * Open and attempt to bring up the channel. Note that channel 2250 * can only be brought up if peer has also opened channel. 2251 * 2252 * Returns 0 if can open and bring up channel, otherwise 2253 * returns 1. 2254 */ 2255 static int 2256 vsw_ldc_init(vsw_ldc_t *ldcp) 2257 { 2258 vsw_t *vswp = ldcp->ldc_vswp; 2259 ldc_status_t istatus = 0; 2260 int rv; 2261 2262 D1(vswp, "%s: enter", __func__); 2263 2264 LDC_ENTER_LOCK(ldcp); 2265 2266 /* don't start at 0 in case clients don't like that */ 2267 ldcp->next_ident = 1; 2268 2269 rv = ldc_open(ldcp->ldc_handle); 2270 if (rv != 0) { 2271 DERR(vswp, "%s: ldc_open failed: id(%lld) rv(%d)", 2272 __func__, ldcp->ldc_id, rv); 2273 LDC_EXIT_LOCK(ldcp); 2274 return (1); 2275 } 2276 2277 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 2278 DERR(vswp, "%s: unable to get status", __func__); 2279 LDC_EXIT_LOCK(ldcp); 2280 return (1); 2281 2282 } else if (istatus != LDC_OPEN && istatus != LDC_READY) { 2283 DERR(vswp, "%s: id (%lld) status(%d) is not OPEN/READY", 2284 __func__, ldcp->ldc_id, istatus); 2285 LDC_EXIT_LOCK(ldcp); 2286 return (1); 2287 } 2288 2289 ldcp->ldc_status = istatus; 2290 rv = ldc_up(ldcp->ldc_handle); 2291 if (rv != 0) { 2292 /* 2293 * Not a fatal error for ldc_up() to fail, as peer 2294 * end point may simply not be ready yet. 2295 */ 2296 D2(vswp, "%s: ldc_up err id(%lld) rv(%d)", __func__, 2297 ldcp->ldc_id, rv); 2298 LDC_EXIT_LOCK(ldcp); 2299 return (1); 2300 } 2301 2302 /* 2303 * ldc_up() call is non-blocking so need to explicitly 2304 * check channel status to see if in fact the channel 2305 * is UP. 2306 */ 2307 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 2308 DERR(vswp, "%s: unable to get status", __func__); 2309 LDC_EXIT_LOCK(ldcp); 2310 return (1); 2311 2312 } else if (istatus != LDC_UP) { 2313 DERR(vswp, "%s: id(%lld) status(%d) is not UP", 2314 __func__, ldcp->ldc_id, istatus); 2315 } else { 2316 ldcp->ldc_status = istatus; 2317 } 2318 2319 LDC_EXIT_LOCK(ldcp); 2320 2321 D1(vswp, "%s: exit", __func__); 2322 return (0); 2323 } 2324 2325 /* disable callbacks on the channel */ 2326 static int 2327 vsw_ldc_uninit(vsw_ldc_t *ldcp) 2328 { 2329 vsw_t *vswp = ldcp->ldc_vswp; 2330 int rv; 2331 2332 D1(vswp, "vsw_ldc_uninit: enter: id(%lx)\n", ldcp->ldc_id); 2333 2334 LDC_ENTER_LOCK(ldcp); 2335 2336 rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE); 2337 if (rv != 0) { 2338 DERR(vswp, "vsw_ldc_uninit(%lld): error disabling " 2339 "interrupts (rv = %d)\n", ldcp->ldc_id, rv); 2340 LDC_EXIT_LOCK(ldcp); 2341 return (1); 2342 } 2343 2344 ldcp->ldc_status = LDC_INIT; 2345 2346 LDC_EXIT_LOCK(ldcp); 2347 2348 D1(vswp, "vsw_ldc_uninit: exit: id(%lx)", ldcp->ldc_id); 2349 2350 return (0); 2351 } 2352 2353 static int 2354 vsw_init_ldcs(vsw_port_t *port) 2355 { 2356 vsw_ldc_list_t *ldcl = &port->p_ldclist; 2357 vsw_ldc_t *ldcp; 2358 2359 READ_ENTER(&ldcl->lockrw); 2360 ldcp = ldcl->head; 2361 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 2362 (void) vsw_ldc_init(ldcp); 2363 } 2364 RW_EXIT(&ldcl->lockrw); 2365 2366 return (0); 2367 } 2368 2369 static int 2370 vsw_uninit_ldcs(vsw_port_t *port) 2371 { 2372 vsw_ldc_list_t *ldcl = &port->p_ldclist; 2373 vsw_ldc_t *ldcp; 2374 2375 D1(NULL, "vsw_uninit_ldcs: enter\n"); 2376 2377 READ_ENTER(&ldcl->lockrw); 2378 ldcp = ldcl->head; 2379 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 2380 (void) vsw_ldc_uninit(ldcp); 2381 } 2382 RW_EXIT(&ldcl->lockrw); 2383 2384 D1(NULL, "vsw_uninit_ldcs: exit\n"); 2385 2386 return (0); 2387 } 2388 2389 /* 2390 * Wait until the callback(s) associated with the ldcs under the specified 2391 * port have completed. 2392 * 2393 * Prior to this function being invoked each channel under this port 2394 * should have been quiesced via ldc_set_cb_mode(DISABLE). 2395 * 2396 * A short explaination of what we are doing below.. 2397 * 2398 * The simplest approach would be to have a reference counter in 2399 * the ldc structure which is increment/decremented by the callbacks as 2400 * they use the channel. The drain function could then simply disable any 2401 * further callbacks and do a cv_wait for the ref to hit zero. Unfortunately 2402 * there is a tiny window here - before the callback is able to get the lock 2403 * on the channel it is interrupted and this function gets to execute. It 2404 * sees that the ref count is zero and believes its free to delete the 2405 * associated data structures. 2406 * 2407 * We get around this by taking advantage of the fact that before the ldc 2408 * framework invokes a callback it sets a flag to indicate that there is a 2409 * callback active (or about to become active). If when we attempt to 2410 * unregister a callback when this active flag is set then the unregister 2411 * will fail with EWOULDBLOCK. 2412 * 2413 * If the unregister fails we do a cv_timedwait. We will either be signaled 2414 * by the callback as it is exiting (note we have to wait a short period to 2415 * allow the callback to return fully to the ldc framework and it to clear 2416 * the active flag), or by the timer expiring. In either case we again attempt 2417 * the unregister. We repeat this until we can succesfully unregister the 2418 * callback. 2419 * 2420 * The reason we use a cv_timedwait rather than a simple cv_wait is to catch 2421 * the case where the callback has finished but the ldc framework has not yet 2422 * cleared the active flag. In this case we would never get a cv_signal. 2423 */ 2424 static int 2425 vsw_drain_ldcs(vsw_port_t *port) 2426 { 2427 vsw_ldc_list_t *ldcl = &port->p_ldclist; 2428 vsw_ldc_t *ldcp; 2429 vsw_t *vswp = port->p_vswp; 2430 2431 D1(vswp, "%s: enter", __func__); 2432 2433 READ_ENTER(&ldcl->lockrw); 2434 2435 ldcp = ldcl->head; 2436 2437 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 2438 /* 2439 * If we can unregister the channel callback then we 2440 * know that there is no callback either running or 2441 * scheduled to run for this channel so move on to next 2442 * channel in the list. 2443 */ 2444 mutex_enter(&ldcp->drain_cv_lock); 2445 2446 /* prompt active callbacks to quit */ 2447 ldcp->drain_state = VSW_LDC_DRAINING; 2448 2449 if ((ldc_unreg_callback(ldcp->ldc_handle)) == 0) { 2450 D2(vswp, "%s: unreg callback for chan %ld", __func__, 2451 ldcp->ldc_id); 2452 mutex_exit(&ldcp->drain_cv_lock); 2453 continue; 2454 } else { 2455 /* 2456 * If we end up here we know that either 1) a callback 2457 * is currently executing, 2) is about to start (i.e. 2458 * the ldc framework has set the active flag but 2459 * has not actually invoked the callback yet, or 3) 2460 * has finished and has returned to the ldc framework 2461 * but the ldc framework has not yet cleared the 2462 * active bit. 2463 * 2464 * Wait for it to finish. 2465 */ 2466 while (ldc_unreg_callback(ldcp->ldc_handle) 2467 == EWOULDBLOCK) 2468 (void) cv_timedwait(&ldcp->drain_cv, 2469 &ldcp->drain_cv_lock, lbolt + hz); 2470 2471 mutex_exit(&ldcp->drain_cv_lock); 2472 D2(vswp, "%s: unreg callback for chan %ld after " 2473 "timeout", __func__, ldcp->ldc_id); 2474 } 2475 } 2476 RW_EXIT(&ldcl->lockrw); 2477 2478 D1(vswp, "%s: exit", __func__); 2479 return (0); 2480 } 2481 2482 /* 2483 * Wait until all tasks which reference this port have completed. 2484 * 2485 * Prior to this function being invoked each channel under this port 2486 * should have been quiesced via ldc_set_cb_mode(DISABLE). 2487 */ 2488 static int 2489 vsw_drain_port_taskq(vsw_port_t *port) 2490 { 2491 vsw_t *vswp = port->p_vswp; 2492 2493 D1(vswp, "%s: enter", __func__); 2494 2495 /* 2496 * Mark the port as in the process of being detached, and 2497 * dispatch a marker task to the queue so we know when all 2498 * relevant tasks have completed. 2499 */ 2500 mutex_enter(&port->state_lock); 2501 port->state = VSW_PORT_DETACHING; 2502 2503 if ((vswp->taskq_p == NULL) || 2504 (ddi_taskq_dispatch(vswp->taskq_p, vsw_marker_task, 2505 port, DDI_NOSLEEP) != DDI_SUCCESS)) { 2506 DERR(vswp, "%s: unable to dispatch marker task", 2507 __func__); 2508 mutex_exit(&port->state_lock); 2509 return (1); 2510 } 2511 2512 /* 2513 * Wait for the marker task to finish. 2514 */ 2515 while (port->state != VSW_PORT_DETACHABLE) 2516 cv_wait(&port->state_cv, &port->state_lock); 2517 2518 mutex_exit(&port->state_lock); 2519 2520 D1(vswp, "%s: exit", __func__); 2521 2522 return (0); 2523 } 2524 2525 static void 2526 vsw_marker_task(void *arg) 2527 { 2528 vsw_port_t *port = arg; 2529 vsw_t *vswp = port->p_vswp; 2530 2531 D1(vswp, "%s: enter", __func__); 2532 2533 mutex_enter(&port->state_lock); 2534 2535 /* 2536 * No further tasks should be dispatched which reference 2537 * this port so ok to mark it as safe to detach. 2538 */ 2539 port->state = VSW_PORT_DETACHABLE; 2540 2541 cv_signal(&port->state_cv); 2542 2543 mutex_exit(&port->state_lock); 2544 2545 D1(vswp, "%s: exit", __func__); 2546 } 2547 2548 static vsw_port_t * 2549 vsw_lookup_port(vsw_t *vswp, int p_instance) 2550 { 2551 vsw_port_list_t *plist = &vswp->plist; 2552 vsw_port_t *port; 2553 2554 for (port = plist->head; port != NULL; port = port->p_next) { 2555 if (port->p_instance == p_instance) { 2556 D2(vswp, "vsw_lookup_port: found p_instance\n"); 2557 return (port); 2558 } 2559 } 2560 2561 return (NULL); 2562 } 2563 2564 /* 2565 * Search for and remove the specified port from the port 2566 * list. Returns 0 if able to locate and remove port, otherwise 2567 * returns 1. 2568 */ 2569 static int 2570 vsw_plist_del_node(vsw_t *vswp, vsw_port_t *port) 2571 { 2572 vsw_port_list_t *plist = &vswp->plist; 2573 vsw_port_t *curr_p, *prev_p; 2574 2575 if (plist->head == NULL) 2576 return (1); 2577 2578 curr_p = prev_p = plist->head; 2579 2580 while (curr_p != NULL) { 2581 if (curr_p == port) { 2582 if (prev_p == curr_p) { 2583 plist->head = curr_p->p_next; 2584 } else { 2585 prev_p->p_next = curr_p->p_next; 2586 } 2587 plist->num_ports--; 2588 break; 2589 } else { 2590 prev_p = curr_p; 2591 curr_p = curr_p->p_next; 2592 } 2593 } 2594 return (0); 2595 } 2596 2597 /* 2598 * Interrupt handler for ldc messages. 2599 */ 2600 static uint_t 2601 vsw_ldc_cb(uint64_t event, caddr_t arg) 2602 { 2603 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 2604 vsw_t *vswp = ldcp->ldc_vswp; 2605 ldc_status_t lstatus; 2606 int rv; 2607 2608 D1(vswp, "%s: enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 2609 2610 mutex_enter(&ldcp->ldc_cblock); 2611 2612 if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) { 2613 mutex_exit(&ldcp->ldc_cblock); 2614 return (LDC_SUCCESS); 2615 } 2616 2617 if (event & LDC_EVT_UP) { 2618 /* 2619 * Channel has come up, get the state and then start 2620 * the handshake. 2621 */ 2622 rv = ldc_status(ldcp->ldc_handle, &lstatus); 2623 if (rv != 0) { 2624 cmn_err(CE_WARN, "Unable to read channel state"); 2625 } 2626 ldcp->ldc_status = lstatus; 2627 2628 D2(vswp, "%s: id(%ld) event(%llx) UP: status(%ld)", 2629 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 2630 2631 vsw_restart_handshake(ldcp); 2632 2633 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 2634 } 2635 2636 if (event & LDC_EVT_READ) { 2637 /* 2638 * Data available for reading. 2639 */ 2640 D2(vswp, "%s: id(ld) event(%llx) data READ", 2641 __func__, ldcp->ldc_id, event); 2642 2643 vsw_process_pkt(ldcp); 2644 2645 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 2646 2647 goto vsw_cb_exit; 2648 } 2649 2650 if (event & LDC_EVT_RESET) { 2651 rv = ldc_status(ldcp->ldc_handle, &lstatus); 2652 if (rv != 0) { 2653 cmn_err(CE_WARN, "Unable to read channel state"); 2654 } else { 2655 ldcp->ldc_status = lstatus; 2656 } 2657 D2(vswp, "%s: id(%ld) event(%llx) RESET: status (%ld)", 2658 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 2659 } 2660 2661 if (event & LDC_EVT_DOWN) { 2662 rv = ldc_status(ldcp->ldc_handle, &lstatus); 2663 if (rv != 0) { 2664 cmn_err(CE_WARN, "Unable to read channel state"); 2665 } else { 2666 ldcp->ldc_status = lstatus; 2667 } 2668 2669 D2(vswp, "%s: id(%ld) event(%llx) DOWN: status (%ld)", 2670 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 2671 2672 } 2673 2674 /* 2675 * Catch either LDC_EVT_WRITE which we don't support or any 2676 * unknown event. 2677 */ 2678 if (event & ~(LDC_EVT_UP | LDC_EVT_RESET 2679 | LDC_EVT_DOWN | LDC_EVT_READ)) { 2680 2681 DERR(vswp, "%s: id(%ld) Unexpected event=(%llx) status(%ld)", 2682 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 2683 } 2684 2685 vsw_cb_exit: 2686 mutex_exit(&ldcp->ldc_cblock); 2687 2688 /* 2689 * Let the drain function know we are finishing if it 2690 * is waiting. 2691 */ 2692 mutex_enter(&ldcp->drain_cv_lock); 2693 if (ldcp->drain_state == VSW_LDC_DRAINING) 2694 cv_signal(&ldcp->drain_cv); 2695 mutex_exit(&ldcp->drain_cv_lock); 2696 2697 return (LDC_SUCCESS); 2698 } 2699 2700 /* 2701 * (Re)start a handshake with our peer by sending them 2702 * our version info. 2703 */ 2704 static void 2705 vsw_restart_handshake(vsw_ldc_t *ldcp) 2706 { 2707 vsw_t *vswp = ldcp->ldc_vswp; 2708 vsw_port_t *port; 2709 vsw_ldc_list_t *ldcl; 2710 2711 D1(vswp, "vsw_restart_handshake: enter"); 2712 2713 port = ldcp->ldc_port; 2714 ldcl = &port->p_ldclist; 2715 2716 WRITE_ENTER(&ldcl->lockrw); 2717 2718 D2(vswp, "%s: in 0x%llx : out 0x%llx", __func__, 2719 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 2720 2721 vsw_free_lane_resources(ldcp, INBOUND); 2722 vsw_free_lane_resources(ldcp, OUTBOUND); 2723 RW_EXIT(&ldcl->lockrw); 2724 2725 ldcp->lane_in.lstate = 0; 2726 ldcp->lane_out.lstate = 0; 2727 2728 /* 2729 * Remove parent port from any multicast groups 2730 * it may have registered with. Client must resend 2731 * multicast add command after handshake completes. 2732 */ 2733 (void) vsw_del_fdb(vswp, port); 2734 2735 vsw_del_mcst_port(port); 2736 2737 ldcp->hphase = VSW_MILESTONE0; 2738 2739 ldcp->peer_session = 0; 2740 ldcp->session_status = 0; 2741 2742 /* 2743 * We now increment the transaction group id. This allows 2744 * us to identify and disard any tasks which are still pending 2745 * on the taskq and refer to the handshake session we are about 2746 * to restart. These stale messages no longer have any real 2747 * meaning. 2748 */ 2749 mutex_enter(&ldcp->hss_lock); 2750 ldcp->hss_id++; 2751 mutex_exit(&ldcp->hss_lock); 2752 2753 if (ldcp->hcnt++ > vsw_num_handshakes) { 2754 cmn_err(CE_WARN, "exceeded number of permitted " 2755 "handshake attempts (%d) on channel %ld", 2756 ldcp->hcnt, ldcp->ldc_id); 2757 return; 2758 } 2759 2760 vsw_send_ver(ldcp); 2761 2762 D1(vswp, "vsw_restart_handshake: exit"); 2763 } 2764 2765 /* 2766 * returns 0 if legal for event signified by flag to have 2767 * occured at the time it did. Otherwise returns 1. 2768 */ 2769 int 2770 vsw_check_flag(vsw_ldc_t *ldcp, int dir, uint64_t flag) 2771 { 2772 vsw_t *vswp = ldcp->ldc_vswp; 2773 uint64_t state; 2774 uint64_t phase; 2775 2776 if (dir == INBOUND) 2777 state = ldcp->lane_in.lstate; 2778 else 2779 state = ldcp->lane_out.lstate; 2780 2781 phase = ldcp->hphase; 2782 2783 switch (flag) { 2784 case VSW_VER_INFO_RECV: 2785 if (phase > VSW_MILESTONE0) { 2786 DERR(vswp, "vsw_check_flag (%d): VER_INFO_RECV" 2787 " when in state %d\n", ldcp->ldc_id, phase); 2788 vsw_restart_handshake(ldcp); 2789 return (1); 2790 } 2791 break; 2792 2793 case VSW_VER_ACK_RECV: 2794 case VSW_VER_NACK_RECV: 2795 if (!(state & VSW_VER_INFO_SENT)) { 2796 DERR(vswp, "vsw_check_flag (%d): spurious VER_ACK" 2797 " or VER_NACK when in state %d\n", 2798 ldcp->ldc_id, phase); 2799 vsw_restart_handshake(ldcp); 2800 return (1); 2801 } else 2802 state &= ~VSW_VER_INFO_SENT; 2803 break; 2804 2805 case VSW_ATTR_INFO_RECV: 2806 if ((phase < VSW_MILESTONE1) || (phase >= VSW_MILESTONE2)) { 2807 DERR(vswp, "vsw_check_flag (%d): ATTR_INFO_RECV" 2808 " when in state %d\n", ldcp->ldc_id, phase); 2809 vsw_restart_handshake(ldcp); 2810 return (1); 2811 } 2812 break; 2813 2814 case VSW_ATTR_ACK_RECV: 2815 case VSW_ATTR_NACK_RECV: 2816 if (!(state & VSW_ATTR_INFO_SENT)) { 2817 DERR(vswp, "vsw_check_flag (%d): spurious ATTR_ACK" 2818 " or ATTR_NACK when in state %d\n", 2819 ldcp->ldc_id, phase); 2820 vsw_restart_handshake(ldcp); 2821 return (1); 2822 } else 2823 state &= ~VSW_ATTR_INFO_SENT; 2824 break; 2825 2826 case VSW_DRING_INFO_RECV: 2827 if (phase < VSW_MILESTONE1) { 2828 DERR(vswp, "vsw_check_flag (%d): DRING_INFO_RECV" 2829 " when in state %d\n", ldcp->ldc_id, phase); 2830 vsw_restart_handshake(ldcp); 2831 return (1); 2832 } 2833 break; 2834 2835 case VSW_DRING_ACK_RECV: 2836 case VSW_DRING_NACK_RECV: 2837 if (!(state & VSW_DRING_INFO_SENT)) { 2838 DERR(vswp, "vsw_check_flag (%d): spurious DRING_ACK" 2839 " or DRING_NACK when in state %d\n", 2840 ldcp->ldc_id, phase); 2841 vsw_restart_handshake(ldcp); 2842 return (1); 2843 } else 2844 state &= ~VSW_DRING_INFO_SENT; 2845 break; 2846 2847 case VSW_RDX_INFO_RECV: 2848 if (phase < VSW_MILESTONE3) { 2849 DERR(vswp, "vsw_check_flag (%d): RDX_INFO_RECV" 2850 " when in state %d\n", ldcp->ldc_id, phase); 2851 vsw_restart_handshake(ldcp); 2852 return (1); 2853 } 2854 break; 2855 2856 case VSW_RDX_ACK_RECV: 2857 case VSW_RDX_NACK_RECV: 2858 if (!(state & VSW_RDX_INFO_SENT)) { 2859 DERR(vswp, "vsw_check_flag (%d): spurious RDX_ACK" 2860 " or RDX_NACK when in state %d\n", 2861 ldcp->ldc_id, phase); 2862 vsw_restart_handshake(ldcp); 2863 return (1); 2864 } else 2865 state &= ~VSW_RDX_INFO_SENT; 2866 break; 2867 2868 case VSW_MCST_INFO_RECV: 2869 if (phase < VSW_MILESTONE3) { 2870 DERR(vswp, "vsw_check_flag (%d): VSW_MCST_INFO_RECV" 2871 " when in state %d\n", ldcp->ldc_id, phase); 2872 vsw_restart_handshake(ldcp); 2873 return (1); 2874 } 2875 break; 2876 2877 default: 2878 DERR(vswp, "vsw_check_flag (%lld): unknown flag (%llx)", 2879 ldcp->ldc_id, flag); 2880 return (1); 2881 } 2882 2883 if (dir == INBOUND) 2884 ldcp->lane_in.lstate = state; 2885 else 2886 ldcp->lane_out.lstate = state; 2887 2888 D1(vswp, "vsw_check_flag (chan %lld): exit", ldcp->ldc_id); 2889 2890 return (0); 2891 } 2892 2893 void 2894 vsw_next_milestone(vsw_ldc_t *ldcp) 2895 { 2896 vsw_t *vswp = ldcp->ldc_vswp; 2897 2898 D1(vswp, "%s (chan %lld): enter (phase %ld)", __func__, 2899 ldcp->ldc_id, ldcp->hphase); 2900 2901 DUMP_FLAGS(ldcp->lane_in.lstate); 2902 DUMP_FLAGS(ldcp->lane_out.lstate); 2903 2904 switch (ldcp->hphase) { 2905 2906 case VSW_MILESTONE0: 2907 /* 2908 * If we haven't started to handshake with our peer, 2909 * start to do so now. 2910 */ 2911 if (ldcp->lane_out.lstate == 0) { 2912 D2(vswp, "%s: (chan %lld) starting handshake " 2913 "with peer", __func__, ldcp->ldc_id); 2914 vsw_restart_handshake(ldcp); 2915 } 2916 2917 /* 2918 * Only way to pass this milestone is to have successfully 2919 * negotiated version info. 2920 */ 2921 if ((ldcp->lane_in.lstate & VSW_VER_ACK_SENT) && 2922 (ldcp->lane_out.lstate & VSW_VER_ACK_RECV)) { 2923 2924 D2(vswp, "%s: (chan %lld) leaving milestone 0", 2925 __func__, ldcp->ldc_id); 2926 2927 /* 2928 * Next milestone is passed when attribute 2929 * information has been successfully exchanged. 2930 */ 2931 ldcp->hphase = VSW_MILESTONE1; 2932 vsw_send_attr(ldcp); 2933 2934 } 2935 break; 2936 2937 case VSW_MILESTONE1: 2938 /* 2939 * Only way to pass this milestone is to have successfully 2940 * negotiated attribute information. 2941 */ 2942 if (ldcp->lane_in.lstate & VSW_ATTR_ACK_SENT) { 2943 2944 ldcp->hphase = VSW_MILESTONE2; 2945 2946 /* 2947 * If the peer device has said it wishes to 2948 * use descriptor rings then we send it our ring 2949 * info, otherwise we just set up a private ring 2950 * which we use an internal buffer 2951 */ 2952 if (ldcp->lane_in.xfer_mode == VIO_DRING_MODE) 2953 vsw_send_dring_info(ldcp); 2954 } 2955 break; 2956 2957 2958 case VSW_MILESTONE2: 2959 /* 2960 * If peer has indicated in its attribute message that 2961 * it wishes to use descriptor rings then the only way 2962 * to pass this milestone is for us to have received 2963 * valid dring info. 2964 * 2965 * If peer is not using descriptor rings then just fall 2966 * through. 2967 */ 2968 if ((ldcp->lane_in.xfer_mode == VIO_DRING_MODE) && 2969 (!(ldcp->lane_in.lstate & VSW_DRING_ACK_SENT))) 2970 break; 2971 2972 D2(vswp, "%s: (chan %lld) leaving milestone 2", 2973 __func__, ldcp->ldc_id); 2974 2975 ldcp->hphase = VSW_MILESTONE3; 2976 vsw_send_rdx(ldcp); 2977 break; 2978 2979 case VSW_MILESTONE3: 2980 /* 2981 * Pass this milestone when all paramaters have been 2982 * successfully exchanged and RDX sent in both directions. 2983 * 2984 * Mark outbound lane as available to transmit data. 2985 */ 2986 if ((ldcp->lane_in.lstate & VSW_RDX_ACK_SENT) && 2987 (ldcp->lane_out.lstate & VSW_RDX_ACK_RECV)) { 2988 2989 D2(vswp, "%s: (chan %lld) leaving milestone 3", 2990 __func__, ldcp->ldc_id); 2991 D2(vswp, "%s: ** handshake complete **", __func__); 2992 ldcp->lane_out.lstate |= VSW_LANE_ACTIVE; 2993 ldcp->hphase = VSW_MILESTONE4; 2994 ldcp->hcnt = 0; 2995 DISPLAY_STATE(); 2996 } 2997 break; 2998 2999 case VSW_MILESTONE4: 3000 D2(vswp, "%s: (chan %lld) in milestone 4", __func__, 3001 ldcp->ldc_id); 3002 break; 3003 3004 default: 3005 DERR(vswp, "%s: (chan %lld) Unknown Phase %x", __func__, 3006 ldcp->ldc_id, ldcp->hphase); 3007 } 3008 3009 D1(vswp, "%s (chan %lld): exit (phase %ld)", __func__, ldcp->ldc_id, 3010 ldcp->hphase); 3011 } 3012 3013 /* 3014 * Check if major version is supported. 3015 * 3016 * Returns 0 if finds supported major number, and if necessary 3017 * adjusts the minor field. 3018 * 3019 * Returns 1 if can't match major number exactly. Sets mjor/minor 3020 * to next lowest support values, or to zero if no other values possible. 3021 */ 3022 static int 3023 vsw_supported_version(vio_ver_msg_t *vp) 3024 { 3025 int i; 3026 3027 D1(NULL, "vsw_supported_version: enter"); 3028 3029 for (i = 0; i < VSW_NUM_VER; i++) { 3030 if (vsw_versions[i].ver_major == vp->ver_major) { 3031 /* 3032 * Matching or lower major version found. Update 3033 * minor number if necessary. 3034 */ 3035 if (vp->ver_minor > vsw_versions[i].ver_minor) { 3036 D2(NULL, "%s: adjusting minor value" 3037 " from %d to %d", __func__, 3038 vp->ver_minor, 3039 vsw_versions[i].ver_minor); 3040 vp->ver_minor = vsw_versions[i].ver_minor; 3041 } 3042 3043 return (0); 3044 } 3045 3046 if (vsw_versions[i].ver_major < vp->ver_major) { 3047 if (vp->ver_minor > vsw_versions[i].ver_minor) { 3048 D2(NULL, "%s: adjusting minor value" 3049 " from %d to %d", __func__, 3050 vp->ver_minor, 3051 vsw_versions[i].ver_minor); 3052 vp->ver_minor = vsw_versions[i].ver_minor; 3053 } 3054 return (1); 3055 } 3056 } 3057 3058 /* No match was possible, zero out fields */ 3059 vp->ver_major = 0; 3060 vp->ver_minor = 0; 3061 3062 D1(NULL, "vsw_supported_version: exit"); 3063 3064 return (1); 3065 } 3066 3067 /* 3068 * Main routine for processing messages received over LDC. 3069 */ 3070 static void 3071 vsw_process_pkt(void *arg) 3072 { 3073 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 3074 vsw_t *vswp = ldcp->ldc_vswp; 3075 size_t msglen; 3076 vio_msg_tag_t tag; 3077 def_msg_t dmsg; 3078 int rv = 0; 3079 3080 D1(vswp, "%s enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 3081 3082 /* 3083 * If channel is up read messages until channel is empty. 3084 */ 3085 do { 3086 msglen = sizeof (dmsg); 3087 rv = ldc_read(ldcp->ldc_handle, (caddr_t)&dmsg, &msglen); 3088 3089 if (rv != 0) { 3090 DERR(vswp, "%s :ldc_read err id(%lld) rv(%d) " 3091 "len(%d)\n", __func__, ldcp->ldc_id, 3092 rv, msglen); 3093 break; 3094 } 3095 3096 if (msglen == 0) { 3097 D2(vswp, "%s: ldc_read id(%lld) NODATA", __func__, 3098 ldcp->ldc_id); 3099 break; 3100 } 3101 3102 D2(vswp, "%s: ldc_read id(%lld): msglen(%d)", __func__, 3103 ldcp->ldc_id, msglen); 3104 3105 /* 3106 * Figure out what sort of packet we have gotten by 3107 * examining the msg tag, and then switch it appropriately. 3108 */ 3109 bcopy(&dmsg, &tag, sizeof (vio_msg_tag_t)); 3110 3111 switch (tag.vio_msgtype) { 3112 case VIO_TYPE_CTRL: 3113 vsw_dispatch_ctrl_task(ldcp, &dmsg, tag); 3114 break; 3115 case VIO_TYPE_DATA: 3116 vsw_process_data_pkt(ldcp, &dmsg, tag); 3117 break; 3118 case VIO_TYPE_ERR: 3119 vsw_process_err_pkt(ldcp, &dmsg, tag); 3120 break; 3121 default: 3122 DERR(vswp, "%s: Unknown tag(%lx) ", __func__, 3123 "id(%lx)\n", tag.vio_msgtype, ldcp->ldc_id); 3124 break; 3125 } 3126 } while (msglen); 3127 3128 D1(vswp, "%s exit: ldcid (%lld)\n", __func__, ldcp->ldc_id); 3129 } 3130 3131 /* 3132 * Dispatch a task to process a VIO control message. 3133 */ 3134 static void 3135 vsw_dispatch_ctrl_task(vsw_ldc_t *ldcp, void *cpkt, vio_msg_tag_t tag) 3136 { 3137 vsw_ctrl_task_t *ctaskp = NULL; 3138 vsw_port_t *port = ldcp->ldc_port; 3139 vsw_t *vswp = port->p_vswp; 3140 3141 D1(vswp, "%s: enter", __func__); 3142 3143 /* 3144 * We need to handle RDX ACK messages in-band as once they 3145 * are exchanged it is possible that we will get an 3146 * immediate (legitimate) data packet. 3147 */ 3148 if ((tag.vio_subtype_env == VIO_RDX) && 3149 (tag.vio_subtype == VIO_SUBTYPE_ACK)) { 3150 if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_ACK_RECV)) 3151 return; 3152 3153 ldcp->lane_out.lstate |= VSW_RDX_ACK_RECV; 3154 vsw_next_milestone(ldcp); 3155 D2(vswp, "%s (%ld) handling RDX_ACK in place", __func__, 3156 ldcp->ldc_id); 3157 return; 3158 } 3159 3160 ctaskp = kmem_alloc(sizeof (vsw_ctrl_task_t), KM_NOSLEEP); 3161 3162 if (ctaskp == NULL) { 3163 DERR(vswp, "%s: unable to alloc space for ctrl" 3164 " msg", __func__); 3165 vsw_restart_handshake(ldcp); 3166 return; 3167 } 3168 3169 ctaskp->ldcp = ldcp; 3170 bcopy((def_msg_t *)cpkt, &ctaskp->pktp, sizeof (def_msg_t)); 3171 mutex_enter(&ldcp->hss_lock); 3172 ctaskp->hss_id = ldcp->hss_id; 3173 mutex_exit(&ldcp->hss_lock); 3174 3175 /* 3176 * Dispatch task to processing taskq if port is not in 3177 * the process of being detached. 3178 */ 3179 mutex_enter(&port->state_lock); 3180 if (port->state == VSW_PORT_INIT) { 3181 if ((vswp->taskq_p == NULL) || 3182 (ddi_taskq_dispatch(vswp->taskq_p, 3183 vsw_process_ctrl_pkt, ctaskp, DDI_NOSLEEP) 3184 != DDI_SUCCESS)) { 3185 DERR(vswp, "%s: unable to dispatch task to taskq", 3186 __func__); 3187 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 3188 mutex_exit(&port->state_lock); 3189 vsw_restart_handshake(ldcp); 3190 return; 3191 } 3192 } else { 3193 DWARN(vswp, "%s: port %d detaching, not dispatching " 3194 "task", __func__, port->p_instance); 3195 } 3196 3197 mutex_exit(&port->state_lock); 3198 3199 D2(vswp, "%s: dispatched task to taskq for chan %d", __func__, 3200 ldcp->ldc_id); 3201 D1(vswp, "%s: exit", __func__); 3202 } 3203 3204 /* 3205 * Process a VIO ctrl message. Invoked from taskq. 3206 */ 3207 static void 3208 vsw_process_ctrl_pkt(void *arg) 3209 { 3210 vsw_ctrl_task_t *ctaskp = (vsw_ctrl_task_t *)arg; 3211 vsw_ldc_t *ldcp = ctaskp->ldcp; 3212 vsw_t *vswp = ldcp->ldc_vswp; 3213 vio_msg_tag_t tag; 3214 uint16_t env; 3215 3216 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3217 3218 bcopy(&ctaskp->pktp, &tag, sizeof (vio_msg_tag_t)); 3219 env = tag.vio_subtype_env; 3220 3221 /* stale pkt check */ 3222 mutex_enter(&ldcp->hss_lock); 3223 if (ctaskp->hss_id < ldcp->hss_id) { 3224 DWARN(vswp, "%s: discarding stale packet belonging to" 3225 " earlier (%ld) handshake session", __func__, 3226 ctaskp->hss_id); 3227 mutex_exit(&ldcp->hss_lock); 3228 return; 3229 } 3230 mutex_exit(&ldcp->hss_lock); 3231 3232 /* session id check */ 3233 if (ldcp->session_status & VSW_PEER_SESSION) { 3234 if (ldcp->peer_session != tag.vio_sid) { 3235 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 3236 __func__, ldcp->ldc_id, tag.vio_sid); 3237 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 3238 vsw_restart_handshake(ldcp); 3239 return; 3240 } 3241 } 3242 3243 /* 3244 * Switch on vio_subtype envelope, then let lower routines 3245 * decide if its an INFO, ACK or NACK packet. 3246 */ 3247 switch (env) { 3248 case VIO_VER_INFO: 3249 vsw_process_ctrl_ver_pkt(ldcp, &ctaskp->pktp); 3250 break; 3251 case VIO_DRING_REG: 3252 vsw_process_ctrl_dring_reg_pkt(ldcp, &ctaskp->pktp); 3253 break; 3254 case VIO_DRING_UNREG: 3255 vsw_process_ctrl_dring_unreg_pkt(ldcp, &ctaskp->pktp); 3256 break; 3257 case VIO_ATTR_INFO: 3258 vsw_process_ctrl_attr_pkt(ldcp, &ctaskp->pktp); 3259 break; 3260 case VNET_MCAST_INFO: 3261 vsw_process_ctrl_mcst_pkt(ldcp, &ctaskp->pktp); 3262 break; 3263 case VIO_RDX: 3264 vsw_process_ctrl_rdx_pkt(ldcp, &ctaskp->pktp); 3265 break; 3266 default: 3267 DERR(vswp, "%s : unknown vio_subtype_env (%x)\n", 3268 __func__, env); 3269 } 3270 3271 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 3272 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3273 } 3274 3275 /* 3276 * Version negotiation. We can end up here either because our peer 3277 * has responded to a handshake message we have sent it, or our peer 3278 * has initiated a handshake with us. If its the former then can only 3279 * be ACK or NACK, if its the later can only be INFO. 3280 * 3281 * If its an ACK we move to the next stage of the handshake, namely 3282 * attribute exchange. If its a NACK we see if we can specify another 3283 * version, if we can't we stop. 3284 * 3285 * If it is an INFO we reset all params associated with communication 3286 * in that direction over this channel (remember connection is 3287 * essentially 2 independent simplex channels). 3288 */ 3289 void 3290 vsw_process_ctrl_ver_pkt(vsw_ldc_t *ldcp, void *pkt) 3291 { 3292 vio_ver_msg_t *ver_pkt; 3293 vsw_t *vswp = ldcp->ldc_vswp; 3294 3295 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3296 3297 /* 3298 * We know this is a ctrl/version packet so 3299 * cast it into the correct structure. 3300 */ 3301 ver_pkt = (vio_ver_msg_t *)pkt; 3302 3303 switch (ver_pkt->tag.vio_subtype) { 3304 case VIO_SUBTYPE_INFO: 3305 D2(vswp, "vsw_process_ctrl_ver_pkt: VIO_SUBTYPE_INFO\n"); 3306 3307 /* 3308 * Record the session id, which we will use from now 3309 * until we see another VER_INFO msg. Even then the 3310 * session id in most cases will be unchanged, execpt 3311 * if channel was reset. 3312 */ 3313 if ((ldcp->session_status & VSW_PEER_SESSION) && 3314 (ldcp->peer_session != ver_pkt->tag.vio_sid)) { 3315 DERR(vswp, "%s: updating session id for chan %lld " 3316 "from %llx to %llx", __func__, ldcp->ldc_id, 3317 ldcp->peer_session, ver_pkt->tag.vio_sid); 3318 } 3319 3320 ldcp->peer_session = ver_pkt->tag.vio_sid; 3321 ldcp->session_status |= VSW_PEER_SESSION; 3322 3323 /* Legal message at this time ? */ 3324 if (vsw_check_flag(ldcp, INBOUND, VSW_VER_INFO_RECV)) 3325 return; 3326 3327 /* 3328 * First check the device class. Currently only expect 3329 * to be talking to a network device. In the future may 3330 * also talk to another switch. 3331 */ 3332 if (ver_pkt->dev_class != VDEV_NETWORK) { 3333 DERR(vswp, "%s: illegal device class %d", __func__, 3334 ver_pkt->dev_class); 3335 3336 ver_pkt->tag.vio_sid = ldcp->local_session; 3337 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 3338 3339 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 3340 3341 vsw_send_msg(ldcp, (void *)ver_pkt, 3342 sizeof (vio_ver_msg_t)); 3343 3344 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 3345 vsw_next_milestone(ldcp); 3346 return; 3347 } else { 3348 ldcp->dev_class = ver_pkt->dev_class; 3349 } 3350 3351 /* 3352 * Now check the version. 3353 */ 3354 if (vsw_supported_version(ver_pkt) == 0) { 3355 /* 3356 * Support this major version and possibly 3357 * adjusted minor version. 3358 */ 3359 3360 D2(vswp, "%s: accepted ver %d:%d", __func__, 3361 ver_pkt->ver_major, ver_pkt->ver_minor); 3362 3363 /* Store accepted values */ 3364 ldcp->lane_in.ver_major = ver_pkt->ver_major; 3365 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 3366 3367 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3368 3369 ldcp->lane_in.lstate |= VSW_VER_ACK_SENT; 3370 } else { 3371 /* 3372 * NACK back with the next lower major/minor 3373 * pairing we support (if don't suuport any more 3374 * versions then they will be set to zero. 3375 */ 3376 3377 D2(vswp, "%s: replying with ver %d:%d", __func__, 3378 ver_pkt->ver_major, ver_pkt->ver_minor); 3379 3380 /* Store updated values */ 3381 ldcp->lane_in.ver_major = ver_pkt->ver_major; 3382 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 3383 3384 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 3385 3386 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 3387 } 3388 3389 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 3390 ver_pkt->tag.vio_sid = ldcp->local_session; 3391 vsw_send_msg(ldcp, (void *)ver_pkt, sizeof (vio_ver_msg_t)); 3392 3393 vsw_next_milestone(ldcp); 3394 break; 3395 3396 case VIO_SUBTYPE_ACK: 3397 D2(vswp, "%s: VIO_SUBTYPE_ACK\n", __func__); 3398 3399 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_ACK_RECV)) 3400 return; 3401 3402 /* Store updated values */ 3403 ldcp->lane_in.ver_major = ver_pkt->ver_major; 3404 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 3405 3406 3407 ldcp->lane_out.lstate |= VSW_VER_ACK_RECV; 3408 vsw_next_milestone(ldcp); 3409 3410 break; 3411 3412 case VIO_SUBTYPE_NACK: 3413 D2(vswp, "%s: VIO_SUBTYPE_NACK\n", __func__); 3414 3415 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_NACK_RECV)) 3416 return; 3417 3418 /* 3419 * If our peer sent us a NACK with the ver fields set to 3420 * zero then there is nothing more we can do. Otherwise see 3421 * if we support either the version suggested, or a lesser 3422 * one. 3423 */ 3424 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 3425 DERR(vswp, "%s: peer unable to negotiate any " 3426 "further.", __func__); 3427 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 3428 vsw_next_milestone(ldcp); 3429 return; 3430 } 3431 3432 /* 3433 * Check to see if we support this major version or 3434 * a lower one. If we don't then maj/min will be set 3435 * to zero. 3436 */ 3437 (void) vsw_supported_version(ver_pkt); 3438 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 3439 /* Nothing more we can do */ 3440 DERR(vswp, "%s: version negotiation failed.\n", 3441 __func__); 3442 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 3443 vsw_next_milestone(ldcp); 3444 } else { 3445 /* found a supported major version */ 3446 ldcp->lane_out.ver_major = ver_pkt->ver_major; 3447 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 3448 3449 D2(vswp, "%s: resending with updated values (%x, %x)", 3450 __func__, ver_pkt->ver_major, 3451 ver_pkt->ver_minor); 3452 3453 ldcp->lane_out.lstate |= VSW_VER_INFO_SENT; 3454 ver_pkt->tag.vio_sid = ldcp->local_session; 3455 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 3456 3457 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 3458 3459 vsw_send_msg(ldcp, (void *)ver_pkt, 3460 sizeof (vio_ver_msg_t)); 3461 3462 vsw_next_milestone(ldcp); 3463 3464 } 3465 break; 3466 3467 default: 3468 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 3469 ver_pkt->tag.vio_subtype); 3470 } 3471 3472 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 3473 } 3474 3475 /* 3476 * Process an attribute packet. We can end up here either because our peer 3477 * has ACK/NACK'ed back to an earlier ATTR msg we had sent it, or our 3478 * peer has sent us an attribute INFO message 3479 * 3480 * If its an ACK we then move to the next stage of the handshake which 3481 * is to send our descriptor ring info to our peer. If its a NACK then 3482 * there is nothing more we can (currently) do. 3483 * 3484 * If we get a valid/acceptable INFO packet (and we have already negotiated 3485 * a version) we ACK back and set channel state to ATTR_RECV, otherwise we 3486 * NACK back and reset channel state to INACTIV. 3487 * 3488 * FUTURE: in time we will probably negotiate over attributes, but for 3489 * the moment unacceptable attributes are regarded as a fatal error. 3490 * 3491 */ 3492 void 3493 vsw_process_ctrl_attr_pkt(vsw_ldc_t *ldcp, void *pkt) 3494 { 3495 vnet_attr_msg_t *attr_pkt; 3496 vsw_t *vswp = ldcp->ldc_vswp; 3497 vsw_port_t *port = ldcp->ldc_port; 3498 uint64_t macaddr = 0; 3499 int i; 3500 3501 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 3502 3503 /* 3504 * We know this is a ctrl/attr packet so 3505 * cast it into the correct structure. 3506 */ 3507 attr_pkt = (vnet_attr_msg_t *)pkt; 3508 3509 switch (attr_pkt->tag.vio_subtype) { 3510 case VIO_SUBTYPE_INFO: 3511 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3512 3513 if (vsw_check_flag(ldcp, INBOUND, VSW_ATTR_INFO_RECV)) 3514 return; 3515 3516 /* 3517 * If the attributes are unacceptable then we NACK back. 3518 */ 3519 if (vsw_check_attr(attr_pkt, ldcp->ldc_port)) { 3520 3521 DERR(vswp, "%s (chan %d): invalid attributes", 3522 __func__, ldcp->ldc_id); 3523 3524 vsw_free_lane_resources(ldcp, INBOUND); 3525 3526 attr_pkt->tag.vio_sid = ldcp->local_session; 3527 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 3528 3529 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 3530 ldcp->lane_in.lstate |= VSW_ATTR_NACK_SENT; 3531 vsw_send_msg(ldcp, (void *)attr_pkt, 3532 sizeof (vnet_attr_msg_t)); 3533 3534 vsw_next_milestone(ldcp); 3535 return; 3536 } 3537 3538 /* 3539 * Otherwise store attributes for this lane and update 3540 * lane state. 3541 */ 3542 ldcp->lane_in.mtu = attr_pkt->mtu; 3543 ldcp->lane_in.addr = attr_pkt->addr; 3544 ldcp->lane_in.addr_type = attr_pkt->addr_type; 3545 ldcp->lane_in.xfer_mode = attr_pkt->xfer_mode; 3546 ldcp->lane_in.ack_freq = attr_pkt->ack_freq; 3547 3548 macaddr = ldcp->lane_in.addr; 3549 for (i = ETHERADDRL - 1; i >= 0; i--) { 3550 port->p_macaddr.ether_addr_octet[i] = macaddr & 0xFF; 3551 macaddr >>= 8; 3552 } 3553 3554 /* create the fdb entry for this port/mac address */ 3555 (void) vsw_add_fdb(vswp, port); 3556 3557 /* setup device specifc xmit routines */ 3558 mutex_enter(&port->tx_lock); 3559 if (ldcp->lane_in.xfer_mode == VIO_DRING_MODE) { 3560 D2(vswp, "%s: mode = VIO_DRING_MODE", __func__); 3561 port->transmit = vsw_dringsend; 3562 } else if (ldcp->lane_in.xfer_mode == VIO_DESC_MODE) { 3563 D2(vswp, "%s: mode = VIO_DESC_MODE", __func__); 3564 vsw_create_privring(ldcp); 3565 port->transmit = vsw_descrsend; 3566 } 3567 mutex_exit(&port->tx_lock); 3568 3569 attr_pkt->tag.vio_sid = ldcp->local_session; 3570 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3571 3572 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 3573 3574 ldcp->lane_in.lstate |= VSW_ATTR_ACK_SENT; 3575 3576 vsw_send_msg(ldcp, (void *)attr_pkt, 3577 sizeof (vnet_attr_msg_t)); 3578 3579 vsw_next_milestone(ldcp); 3580 break; 3581 3582 case VIO_SUBTYPE_ACK: 3583 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3584 3585 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_ACK_RECV)) 3586 return; 3587 3588 ldcp->lane_out.lstate |= VSW_ATTR_ACK_RECV; 3589 vsw_next_milestone(ldcp); 3590 break; 3591 3592 case VIO_SUBTYPE_NACK: 3593 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3594 3595 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_NACK_RECV)) 3596 return; 3597 3598 ldcp->lane_out.lstate |= VSW_ATTR_NACK_RECV; 3599 vsw_next_milestone(ldcp); 3600 break; 3601 3602 default: 3603 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 3604 attr_pkt->tag.vio_subtype); 3605 } 3606 3607 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3608 } 3609 3610 /* 3611 * Process a dring info packet. We can end up here either because our peer 3612 * has ACK/NACK'ed back to an earlier DRING msg we had sent it, or our 3613 * peer has sent us a dring INFO message. 3614 * 3615 * If we get a valid/acceptable INFO packet (and we have already negotiated 3616 * a version) we ACK back and update the lane state, otherwise we NACK back. 3617 * 3618 * FUTURE: nothing to stop client from sending us info on multiple dring's 3619 * but for the moment we will just use the first one we are given. 3620 * 3621 */ 3622 void 3623 vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *ldcp, void *pkt) 3624 { 3625 vio_dring_reg_msg_t *dring_pkt; 3626 vsw_t *vswp = ldcp->ldc_vswp; 3627 ldc_mem_info_t minfo; 3628 dring_info_t *dp, *dbp; 3629 int dring_found = 0; 3630 3631 /* 3632 * We know this is a ctrl/dring packet so 3633 * cast it into the correct structure. 3634 */ 3635 dring_pkt = (vio_dring_reg_msg_t *)pkt; 3636 3637 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 3638 3639 switch (dring_pkt->tag.vio_subtype) { 3640 case VIO_SUBTYPE_INFO: 3641 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3642 3643 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 3644 return; 3645 3646 /* 3647 * If the dring params are unacceptable then we NACK back. 3648 */ 3649 if (vsw_check_dring_info(dring_pkt)) { 3650 3651 DERR(vswp, "%s (%lld): invalid dring info", 3652 __func__, ldcp->ldc_id); 3653 3654 vsw_free_lane_resources(ldcp, INBOUND); 3655 3656 dring_pkt->tag.vio_sid = ldcp->local_session; 3657 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 3658 3659 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 3660 3661 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 3662 3663 vsw_send_msg(ldcp, (void *)dring_pkt, 3664 sizeof (vio_dring_reg_msg_t)); 3665 3666 vsw_next_milestone(ldcp); 3667 return; 3668 } 3669 3670 /* 3671 * Otherwise, attempt to map in the dring using the 3672 * cookie. If that succeeds we send back a unique dring 3673 * identifier that the sending side will use in future 3674 * to refer to this descriptor ring. 3675 */ 3676 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 3677 3678 dp->num_descriptors = dring_pkt->num_descriptors; 3679 dp->descriptor_size = dring_pkt->descriptor_size; 3680 dp->options = dring_pkt->options; 3681 dp->ncookies = dring_pkt->ncookies; 3682 3683 /* 3684 * Note: should only get one cookie. Enforced in 3685 * the ldc layer. 3686 */ 3687 bcopy(&dring_pkt->cookie[0], &dp->cookie[0], 3688 sizeof (ldc_mem_cookie_t)); 3689 3690 D2(vswp, "%s: num_desc %ld : desc_size %ld", __func__, 3691 dp->num_descriptors, dp->descriptor_size); 3692 D2(vswp, "%s: options 0x%lx: ncookies %ld", __func__, 3693 dp->options, dp->ncookies); 3694 3695 if ((ldc_mem_dring_map(ldcp->ldc_handle, &dp->cookie[0], 3696 dp->ncookies, dp->num_descriptors, 3697 dp->descriptor_size, LDC_SHADOW_MAP, 3698 &(dp->handle))) != 0) { 3699 3700 DERR(vswp, "%s: dring_map failed\n", __func__); 3701 3702 kmem_free(dp, sizeof (dring_info_t)); 3703 vsw_free_lane_resources(ldcp, INBOUND); 3704 3705 dring_pkt->tag.vio_sid = ldcp->local_session; 3706 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 3707 3708 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 3709 3710 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 3711 vsw_send_msg(ldcp, (void *)dring_pkt, 3712 sizeof (vio_dring_reg_msg_t)); 3713 3714 vsw_next_milestone(ldcp); 3715 return; 3716 } 3717 3718 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 3719 3720 DERR(vswp, "%s: dring_addr failed\n", __func__); 3721 3722 kmem_free(dp, sizeof (dring_info_t)); 3723 vsw_free_lane_resources(ldcp, INBOUND); 3724 3725 dring_pkt->tag.vio_sid = ldcp->local_session; 3726 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 3727 3728 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 3729 3730 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 3731 vsw_send_msg(ldcp, (void *)dring_pkt, 3732 sizeof (vio_dring_reg_msg_t)); 3733 3734 vsw_next_milestone(ldcp); 3735 return; 3736 } else { 3737 /* store the address of the pub part of ring */ 3738 dp->pub_addr = minfo.vaddr; 3739 } 3740 3741 /* no private section as we are importing */ 3742 dp->priv_addr = NULL; 3743 3744 /* 3745 * Using simple mono increasing int for ident at 3746 * the moment. 3747 */ 3748 dp->ident = ldcp->next_ident; 3749 ldcp->next_ident++; 3750 3751 dp->end_idx = 0; 3752 dp->next = NULL; 3753 3754 /* 3755 * Link it onto the end of the list of drings 3756 * for this lane. 3757 */ 3758 if (ldcp->lane_in.dringp == NULL) { 3759 D2(vswp, "%s: adding first INBOUND dring", __func__); 3760 ldcp->lane_in.dringp = dp; 3761 } else { 3762 dbp = ldcp->lane_in.dringp; 3763 3764 while (dbp->next != NULL) 3765 dbp = dbp->next; 3766 3767 dbp->next = dp; 3768 } 3769 3770 /* acknowledge it */ 3771 dring_pkt->tag.vio_sid = ldcp->local_session; 3772 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3773 dring_pkt->dring_ident = dp->ident; 3774 3775 vsw_send_msg(ldcp, (void *)dring_pkt, 3776 sizeof (vio_dring_reg_msg_t)); 3777 3778 ldcp->lane_in.lstate |= VSW_DRING_ACK_SENT; 3779 vsw_next_milestone(ldcp); 3780 break; 3781 3782 case VIO_SUBTYPE_ACK: 3783 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3784 3785 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_ACK_RECV)) 3786 return; 3787 3788 /* 3789 * Peer is acknowledging our dring info and will have 3790 * sent us a dring identifier which we will use to 3791 * refer to this ring w.r.t. our peer. 3792 */ 3793 dp = ldcp->lane_out.dringp; 3794 if (dp != NULL) { 3795 /* 3796 * Find the ring this ident should be associated 3797 * with. 3798 */ 3799 if (vsw_dring_match(dp, dring_pkt)) { 3800 dring_found = 1; 3801 3802 } else while (dp != NULL) { 3803 if (vsw_dring_match(dp, dring_pkt)) { 3804 dring_found = 1; 3805 break; 3806 } 3807 dp = dp->next; 3808 } 3809 3810 if (dring_found == 0) { 3811 DERR(NULL, "%s: unrecognised ring cookie", 3812 __func__); 3813 vsw_restart_handshake(ldcp); 3814 return; 3815 } 3816 3817 } else { 3818 DERR(vswp, "%s: DRING ACK received but no drings " 3819 "allocated", __func__); 3820 vsw_restart_handshake(ldcp); 3821 return; 3822 } 3823 3824 /* store ident */ 3825 dp->ident = dring_pkt->dring_ident; 3826 ldcp->lane_out.lstate |= VSW_DRING_ACK_RECV; 3827 vsw_next_milestone(ldcp); 3828 break; 3829 3830 case VIO_SUBTYPE_NACK: 3831 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3832 3833 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_NACK_RECV)) 3834 return; 3835 3836 ldcp->lane_out.lstate |= VSW_DRING_NACK_RECV; 3837 vsw_next_milestone(ldcp); 3838 break; 3839 3840 default: 3841 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3842 dring_pkt->tag.vio_subtype); 3843 } 3844 3845 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3846 } 3847 3848 /* 3849 * Process a request from peer to unregister a dring. 3850 * 3851 * For the moment we just restart the handshake if our 3852 * peer endpoint attempts to unregister a dring. 3853 */ 3854 void 3855 vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *ldcp, void *pkt) 3856 { 3857 vsw_t *vswp = ldcp->ldc_vswp; 3858 vio_dring_unreg_msg_t *dring_pkt; 3859 3860 /* 3861 * We know this is a ctrl/dring packet so 3862 * cast it into the correct structure. 3863 */ 3864 dring_pkt = (vio_dring_unreg_msg_t *)pkt; 3865 3866 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3867 3868 switch (dring_pkt->tag.vio_subtype) { 3869 case VIO_SUBTYPE_INFO: 3870 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3871 3872 DWARN(vswp, "%s: restarting handshake..", __func__); 3873 vsw_restart_handshake(ldcp); 3874 break; 3875 3876 case VIO_SUBTYPE_ACK: 3877 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3878 3879 DWARN(vswp, "%s: restarting handshake..", __func__); 3880 vsw_restart_handshake(ldcp); 3881 break; 3882 3883 case VIO_SUBTYPE_NACK: 3884 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3885 3886 DWARN(vswp, "%s: restarting handshake..", __func__); 3887 vsw_restart_handshake(ldcp); 3888 break; 3889 3890 default: 3891 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3892 dring_pkt->tag.vio_subtype); 3893 vsw_restart_handshake(ldcp); 3894 } 3895 3896 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3897 } 3898 3899 #define SND_MCST_NACK(ldcp, pkt) \ 3900 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 3901 pkt->tag.vio_sid = ldcp->local_session; \ 3902 vsw_send_msg(ldcp, (void *)pkt, sizeof (vnet_mcast_msg_t)); 3903 3904 /* 3905 * Process a multicast request from a vnet. 3906 * 3907 * Vnet's specify a multicast address that they are interested in. This 3908 * address is used as a key into the hash table which forms the multicast 3909 * forwarding database (mFDB). 3910 * 3911 * The table keys are the multicast addresses, while the table entries 3912 * are pointers to lists of ports which wish to receive packets for the 3913 * specified multicast address. 3914 * 3915 * When a multicast packet is being switched we use the address as a key 3916 * into the hash table, and then walk the appropriate port list forwarding 3917 * the pkt to each port in turn. 3918 * 3919 * If a vnet is no longer interested in a particular multicast grouping 3920 * we simply find the correct location in the hash table and then delete 3921 * the relevant port from the port list. 3922 * 3923 * To deal with the case whereby a port is being deleted without first 3924 * removing itself from the lists in the hash table, we maintain a list 3925 * of multicast addresses the port has registered an interest in, within 3926 * the port structure itself. We then simply walk that list of addresses 3927 * using them as keys into the hash table and remove the port from the 3928 * appropriate lists. 3929 */ 3930 static void 3931 vsw_process_ctrl_mcst_pkt(vsw_ldc_t *ldcp, void *pkt) 3932 { 3933 vnet_mcast_msg_t *mcst_pkt; 3934 vsw_port_t *port = ldcp->ldc_port; 3935 vsw_t *vswp = ldcp->ldc_vswp; 3936 int i; 3937 3938 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3939 3940 /* 3941 * We know this is a ctrl/mcast packet so 3942 * cast it into the correct structure. 3943 */ 3944 mcst_pkt = (vnet_mcast_msg_t *)pkt; 3945 3946 switch (mcst_pkt->tag.vio_subtype) { 3947 case VIO_SUBTYPE_INFO: 3948 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3949 3950 /* 3951 * Check if in correct state to receive a multicast 3952 * message (i.e. handshake complete). If not reset 3953 * the handshake. 3954 */ 3955 if (vsw_check_flag(ldcp, INBOUND, VSW_MCST_INFO_RECV)) 3956 return; 3957 3958 /* 3959 * Before attempting to add or remove address check 3960 * that they are valid multicast addresses. 3961 * If not, then NACK back. 3962 */ 3963 for (i = 0; i < mcst_pkt->count; i++) { 3964 if ((mcst_pkt->mca[i].ether_addr_octet[0] & 01) != 1) { 3965 DERR(vswp, "%s: invalid multicast address", 3966 __func__); 3967 SND_MCST_NACK(ldcp, mcst_pkt); 3968 return; 3969 } 3970 } 3971 3972 /* 3973 * Now add/remove the addresses. If this fails we 3974 * NACK back. 3975 */ 3976 if (vsw_add_rem_mcst(mcst_pkt, port) != 0) { 3977 SND_MCST_NACK(ldcp, mcst_pkt); 3978 return; 3979 } 3980 3981 mcst_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3982 mcst_pkt->tag.vio_sid = ldcp->local_session; 3983 3984 DUMP_TAG_PTR((vio_msg_tag_t *)mcst_pkt); 3985 3986 vsw_send_msg(ldcp, (void *)mcst_pkt, 3987 sizeof (vnet_mcast_msg_t)); 3988 break; 3989 3990 case VIO_SUBTYPE_ACK: 3991 DWARN(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3992 3993 /* 3994 * We shouldn't ever get a multicast ACK message as 3995 * at the moment we never request multicast addresses 3996 * to be set on some other device. This may change in 3997 * the future if we have cascading switches. 3998 */ 3999 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_ACK_RECV)) 4000 return; 4001 4002 /* Do nothing */ 4003 break; 4004 4005 case VIO_SUBTYPE_NACK: 4006 DWARN(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 4007 4008 /* 4009 * We shouldn't get a multicast NACK packet for the 4010 * same reasons as we shouldn't get a ACK packet. 4011 */ 4012 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_NACK_RECV)) 4013 return; 4014 4015 /* Do nothing */ 4016 break; 4017 4018 default: 4019 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 4020 mcst_pkt->tag.vio_subtype); 4021 } 4022 4023 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4024 } 4025 4026 static void 4027 vsw_process_ctrl_rdx_pkt(vsw_ldc_t *ldcp, void *pkt) 4028 { 4029 vio_rdx_msg_t *rdx_pkt; 4030 vsw_t *vswp = ldcp->ldc_vswp; 4031 4032 /* 4033 * We know this is a ctrl/rdx packet so 4034 * cast it into the correct structure. 4035 */ 4036 rdx_pkt = (vio_rdx_msg_t *)pkt; 4037 4038 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 4039 4040 switch (rdx_pkt->tag.vio_subtype) { 4041 case VIO_SUBTYPE_INFO: 4042 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 4043 4044 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_INFO_RECV)) 4045 return; 4046 4047 rdx_pkt->tag.vio_sid = ldcp->local_session; 4048 rdx_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 4049 4050 DUMP_TAG_PTR((vio_msg_tag_t *)rdx_pkt); 4051 4052 ldcp->lane_in.lstate |= VSW_RDX_ACK_SENT; 4053 4054 vsw_send_msg(ldcp, (void *)rdx_pkt, 4055 sizeof (vio_rdx_msg_t)); 4056 4057 vsw_next_milestone(ldcp); 4058 break; 4059 4060 case VIO_SUBTYPE_ACK: 4061 /* 4062 * Should be handled in-band by callback handler. 4063 */ 4064 DERR(vswp, "%s: Unexpected VIO_SUBTYPE_ACK", __func__); 4065 vsw_restart_handshake(ldcp); 4066 break; 4067 4068 case VIO_SUBTYPE_NACK: 4069 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 4070 4071 if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_NACK_RECV)) 4072 return; 4073 4074 ldcp->lane_out.lstate |= VSW_RDX_NACK_RECV; 4075 vsw_next_milestone(ldcp); 4076 break; 4077 4078 default: 4079 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 4080 rdx_pkt->tag.vio_subtype); 4081 } 4082 4083 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4084 } 4085 4086 static void 4087 vsw_process_data_pkt(vsw_ldc_t *ldcp, void *dpkt, vio_msg_tag_t tag) 4088 { 4089 uint16_t env = tag.vio_subtype_env; 4090 vsw_t *vswp = ldcp->ldc_vswp; 4091 4092 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4093 4094 /* session id check */ 4095 if (ldcp->session_status & VSW_PEER_SESSION) { 4096 if (ldcp->peer_session != tag.vio_sid) { 4097 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 4098 __func__, ldcp->ldc_id, tag.vio_sid); 4099 vsw_restart_handshake(ldcp); 4100 return; 4101 } 4102 } 4103 4104 /* 4105 * It is an error for us to be getting data packets 4106 * before the handshake has completed. 4107 */ 4108 if (ldcp->hphase != VSW_MILESTONE4) { 4109 DERR(vswp, "%s: got data packet before handshake complete " 4110 "hphase %d (%x: %x)", __func__, ldcp->hphase, 4111 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 4112 DUMP_FLAGS(ldcp->lane_in.lstate); 4113 DUMP_FLAGS(ldcp->lane_out.lstate); 4114 vsw_restart_handshake(ldcp); 4115 return; 4116 } 4117 4118 /* 4119 * Switch on vio_subtype envelope, then let lower routines 4120 * decide if its an INFO, ACK or NACK packet. 4121 */ 4122 if (env == VIO_DRING_DATA) { 4123 vsw_process_data_dring_pkt(ldcp, dpkt); 4124 } else if (env == VIO_PKT_DATA) { 4125 vsw_process_data_raw_pkt(ldcp, dpkt); 4126 } else if (env == VIO_DESC_DATA) { 4127 vsw_process_data_ibnd_pkt(ldcp, dpkt); 4128 } else { 4129 DERR(vswp, "%s : unknown vio_subtype_env (%x)\n", 4130 __func__, env); 4131 } 4132 4133 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4134 } 4135 4136 #define SND_DRING_NACK(ldcp, pkt) \ 4137 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 4138 pkt->tag.vio_sid = ldcp->local_session; \ 4139 vsw_send_msg(ldcp, (void *)pkt, sizeof (vio_dring_msg_t)); 4140 4141 static void 4142 vsw_process_data_dring_pkt(vsw_ldc_t *ldcp, void *dpkt) 4143 { 4144 vio_dring_msg_t *dring_pkt; 4145 vnet_public_desc_t *pub_addr = NULL; 4146 vsw_private_desc_t *priv_addr = NULL; 4147 dring_info_t *dp = NULL; 4148 vsw_t *vswp = ldcp->ldc_vswp; 4149 mblk_t *mp = NULL; 4150 mblk_t *bp = NULL; 4151 mblk_t *bpt = NULL; 4152 size_t nbytes = 0; 4153 size_t off = 0; 4154 uint64_t ncookies = 0; 4155 uint64_t chain = 0; 4156 uint64_t j, len; 4157 uint32_t pos, start, datalen; 4158 uint32_t range_start, range_end; 4159 int32_t end, num, cnt = 0; 4160 int i, rv; 4161 boolean_t ack_needed = B_FALSE; 4162 boolean_t prev_desc_ack = B_FALSE; 4163 int read_attempts = 0; 4164 4165 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4166 4167 /* 4168 * We know this is a data/dring packet so 4169 * cast it into the correct structure. 4170 */ 4171 dring_pkt = (vio_dring_msg_t *)dpkt; 4172 4173 /* 4174 * Switch on the vio_subtype. If its INFO then we need to 4175 * process the data. If its an ACK we need to make sure 4176 * it makes sense (i.e did we send an earlier data/info), 4177 * and if its a NACK then we maybe attempt a retry. 4178 */ 4179 switch (dring_pkt->tag.vio_subtype) { 4180 case VIO_SUBTYPE_INFO: 4181 D2(vswp, "%s(%lld): VIO_SUBTYPE_INFO", __func__, ldcp->ldc_id); 4182 4183 if ((dp = vsw_ident2dring(&ldcp->lane_in, 4184 dring_pkt->dring_ident)) == NULL) { 4185 4186 DERR(vswp, "%s(%lld): unable to find dring from " 4187 "ident 0x%llx", __func__, ldcp->ldc_id, 4188 dring_pkt->dring_ident); 4189 4190 SND_DRING_NACK(ldcp, dring_pkt); 4191 return; 4192 } 4193 4194 start = pos = dring_pkt->start_idx; 4195 end = dring_pkt->end_idx; 4196 len = dp->num_descriptors; 4197 4198 range_start = range_end = pos; 4199 4200 D2(vswp, "%s(%lld): start index %ld : end %ld\n", 4201 __func__, ldcp->ldc_id, start, end); 4202 4203 if (end == -1) { 4204 num = -1; 4205 } else if (num >= 0) { 4206 num = end >= pos ? 4207 end - pos + 1: (len - pos + 1) + end; 4208 4209 /* basic sanity check */ 4210 if (end > len) { 4211 DERR(vswp, "%s(%lld): endpoint %lld outside " 4212 "ring length %lld", __func__, 4213 ldcp->ldc_id, end, len); 4214 4215 SND_DRING_NACK(ldcp, dring_pkt); 4216 return; 4217 } 4218 } else { 4219 DERR(vswp, "%s(%lld): invalid endpoint %lld", 4220 __func__, ldcp->ldc_id, end); 4221 SND_DRING_NACK(ldcp, dring_pkt); 4222 return; 4223 } 4224 4225 while (cnt != num) { 4226 vsw_recheck_desc: 4227 if ((rv = ldc_mem_dring_acquire(dp->handle, 4228 pos, pos)) != 0) { 4229 DERR(vswp, "%s(%lld): unable to acquire " 4230 "descriptor at pos %d: err %d", 4231 __func__, pos, ldcp->ldc_id, rv); 4232 SND_DRING_NACK(ldcp, dring_pkt); 4233 return; 4234 } 4235 4236 pub_addr = (vnet_public_desc_t *)dp->pub_addr + pos; 4237 4238 /* 4239 * When given a bounded range of descriptors 4240 * to process, its an error to hit a descriptor 4241 * which is not ready. In the non-bounded case 4242 * (end_idx == -1) this simply indicates we have 4243 * reached the end of the current active range. 4244 */ 4245 if (pub_addr->hdr.dstate != VIO_DESC_READY) { 4246 /* unbound - no error */ 4247 if (end == -1) { 4248 if (read_attempts == vsw_read_attempts) 4249 break; 4250 4251 delay(drv_usectohz(vsw_desc_delay)); 4252 read_attempts++; 4253 goto vsw_recheck_desc; 4254 } 4255 4256 /* bounded - error - so NACK back */ 4257 DERR(vswp, "%s(%lld): descriptor not READY " 4258 "(%d)", __func__, ldcp->ldc_id, 4259 pub_addr->hdr.dstate); 4260 SND_DRING_NACK(ldcp, dring_pkt); 4261 return; 4262 } 4263 4264 DTRACE_PROBE1(read_attempts, int, read_attempts); 4265 4266 range_end = pos; 4267 4268 /* 4269 * If we ACK'd the previous descriptor then now 4270 * record the new range start position for later 4271 * ACK's. 4272 */ 4273 if (prev_desc_ack) { 4274 range_start = pos; 4275 4276 D2(vswp, "%s(%lld): updating range start " 4277 "to be %d", __func__, ldcp->ldc_id, 4278 range_start); 4279 4280 prev_desc_ack = B_FALSE; 4281 } 4282 4283 /* 4284 * Data is padded to align on 8 byte boundary, 4285 * datalen is actual data length, i.e. minus that 4286 * padding. 4287 */ 4288 datalen = pub_addr->nbytes; 4289 4290 /* 4291 * Does peer wish us to ACK when we have finished 4292 * with this descriptor ? 4293 */ 4294 if (pub_addr->hdr.ack) 4295 ack_needed = B_TRUE; 4296 4297 D2(vswp, "%s(%lld): processing desc %lld at pos" 4298 " 0x%llx : dstate 0x%lx : datalen 0x%lx", 4299 __func__, ldcp->ldc_id, pos, pub_addr, 4300 pub_addr->hdr.dstate, datalen); 4301 4302 /* 4303 * Mark that we are starting to process descriptor. 4304 */ 4305 pub_addr->hdr.dstate = VIO_DESC_ACCEPTED; 4306 4307 mp = vio_allocb(ldcp->rxh); 4308 if (mp == NULL) { 4309 /* 4310 * No free receive buffers available, so 4311 * fallback onto allocb(9F). Make sure that 4312 * we get a data buffer which is a multiple 4313 * of 8 as this is required by ldc_mem_copy. 4314 */ 4315 DTRACE_PROBE(allocb); 4316 mp = allocb(datalen + VNET_IPALIGN + 8, 4317 BPRI_MED); 4318 } 4319 4320 /* 4321 * Ensure that we ask ldc for an aligned 4322 * number of bytes. 4323 */ 4324 nbytes = datalen + VNET_IPALIGN; 4325 if (nbytes & 0x7) { 4326 off = 8 - (nbytes & 0x7); 4327 nbytes += off; 4328 } 4329 4330 ncookies = pub_addr->ncookies; 4331 rv = ldc_mem_copy(ldcp->ldc_handle, 4332 (caddr_t)mp->b_rptr, 0, &nbytes, 4333 pub_addr->memcookie, ncookies, 4334 LDC_COPY_IN); 4335 4336 if (rv != 0) { 4337 DERR(vswp, "%s(%d): unable to copy in " 4338 "data from %d cookies in desc %d" 4339 " (rv %d)", __func__, ldcp->ldc_id, 4340 ncookies, pos, rv); 4341 freemsg(mp); 4342 4343 pub_addr->hdr.dstate = VIO_DESC_DONE; 4344 (void) ldc_mem_dring_release(dp->handle, 4345 pos, pos); 4346 break; 4347 } else { 4348 D2(vswp, "%s(%d): copied in %ld bytes" 4349 " using %d cookies", __func__, 4350 ldcp->ldc_id, nbytes, ncookies); 4351 } 4352 4353 /* adjust the read pointer to skip over the padding */ 4354 mp->b_rptr += VNET_IPALIGN; 4355 4356 /* point to the actual end of data */ 4357 mp->b_wptr = mp->b_rptr + datalen; 4358 4359 /* build a chain of received packets */ 4360 if (bp == NULL) { 4361 /* first pkt */ 4362 bp = mp; 4363 bp->b_next = bp->b_prev = NULL; 4364 bpt = bp; 4365 chain = 1; 4366 } else { 4367 mp->b_next = NULL; 4368 mp->b_prev = bpt; 4369 bpt->b_next = mp; 4370 bpt = mp; 4371 chain++; 4372 } 4373 4374 /* mark we are finished with this descriptor */ 4375 pub_addr->hdr.dstate = VIO_DESC_DONE; 4376 4377 (void) ldc_mem_dring_release(dp->handle, pos, pos); 4378 4379 /* 4380 * Send an ACK back to peer if requested. 4381 */ 4382 if (ack_needed) { 4383 ack_needed = B_FALSE; 4384 4385 dring_pkt->start_idx = range_start; 4386 dring_pkt->end_idx = range_end; 4387 4388 DERR(vswp, "%s(%lld): processed %d %d, ACK" 4389 " requested", __func__, ldcp->ldc_id, 4390 dring_pkt->start_idx, 4391 dring_pkt->end_idx); 4392 4393 dring_pkt->dring_process_state = VIO_DP_ACTIVE; 4394 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 4395 dring_pkt->tag.vio_sid = ldcp->local_session; 4396 vsw_send_msg(ldcp, (void *)dring_pkt, 4397 sizeof (vio_dring_msg_t)); 4398 4399 prev_desc_ack = B_TRUE; 4400 range_start = pos; 4401 } 4402 4403 /* next descriptor */ 4404 pos = (pos + 1) % len; 4405 cnt++; 4406 4407 /* 4408 * Break out of loop here and stop processing to 4409 * allow some other network device (or disk) to 4410 * get access to the cpu. 4411 */ 4412 /* send the chain of packets to be switched */ 4413 if (chain > vsw_chain_len) { 4414 D3(vswp, "%s(%lld): switching chain of %d " 4415 "msgs", __func__, ldcp->ldc_id, chain); 4416 vsw_switch_frame(vswp, bp, VSW_VNETPORT, 4417 ldcp->ldc_port, NULL); 4418 bp = NULL; 4419 break; 4420 } 4421 } 4422 4423 /* send the chain of packets to be switched */ 4424 if (bp != NULL) { 4425 D3(vswp, "%s(%lld): switching chain of %d msgs", 4426 __func__, ldcp->ldc_id, chain); 4427 vsw_switch_frame(vswp, bp, VSW_VNETPORT, 4428 ldcp->ldc_port, NULL); 4429 } 4430 4431 DTRACE_PROBE1(msg_cnt, int, cnt); 4432 4433 /* 4434 * We are now finished so ACK back with the state 4435 * set to STOPPING so our peer knows we are finished 4436 */ 4437 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 4438 dring_pkt->tag.vio_sid = ldcp->local_session; 4439 4440 dring_pkt->dring_process_state = VIO_DP_STOPPED; 4441 4442 DTRACE_PROBE(stop_process_sent); 4443 4444 /* 4445 * We have not processed any more descriptors beyond 4446 * the last one we ACK'd. 4447 */ 4448 if (prev_desc_ack) 4449 range_start = range_end; 4450 4451 dring_pkt->start_idx = range_start; 4452 dring_pkt->end_idx = range_end; 4453 4454 D2(vswp, "%s(%lld) processed : %d : %d, now stopping", 4455 __func__, ldcp->ldc_id, dring_pkt->start_idx, 4456 dring_pkt->end_idx); 4457 4458 vsw_send_msg(ldcp, (void *)dring_pkt, 4459 sizeof (vio_dring_msg_t)); 4460 break; 4461 4462 case VIO_SUBTYPE_ACK: 4463 D2(vswp, "%s(%lld): VIO_SUBTYPE_ACK", __func__, ldcp->ldc_id); 4464 /* 4465 * Verify that the relevant descriptors are all 4466 * marked as DONE 4467 */ 4468 if ((dp = vsw_ident2dring(&ldcp->lane_out, 4469 dring_pkt->dring_ident)) == NULL) { 4470 DERR(vswp, "%s: unknown ident in ACK", __func__); 4471 return; 4472 } 4473 4474 pub_addr = (vnet_public_desc_t *)dp->pub_addr; 4475 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 4476 4477 start = end = 0; 4478 start = dring_pkt->start_idx; 4479 end = dring_pkt->end_idx; 4480 len = dp->num_descriptors; 4481 4482 j = num = 0; 4483 /* calculate # descriptors taking into a/c wrap around */ 4484 num = end >= start ? end - start + 1: (len - start + 1) + end; 4485 4486 D2(vswp, "%s(%lld): start index %ld : end %ld : num %ld\n", 4487 __func__, ldcp->ldc_id, start, end, num); 4488 4489 mutex_enter(&dp->dlock); 4490 dp->last_ack_recv = end; 4491 mutex_exit(&dp->dlock); 4492 4493 for (i = start; j < num; i = (i + 1) % len, j++) { 4494 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 4495 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 4496 4497 /* 4498 * If the last descriptor in a range has the ACK 4499 * bit set then we will get two messages from our 4500 * peer relating to it. The normal ACK msg and then 4501 * a subsequent STOP msg. The first message will have 4502 * resulted in the descriptor being reclaimed and 4503 * its state set to FREE so when we encounter a non 4504 * DONE descriptor we need to check to see if its 4505 * because we have just reclaimed it. 4506 */ 4507 mutex_enter(&priv_addr->dstate_lock); 4508 if (pub_addr->hdr.dstate == VIO_DESC_DONE) { 4509 /* clear all the fields */ 4510 bzero(priv_addr->datap, priv_addr->datalen); 4511 priv_addr->datalen = 0; 4512 4513 pub_addr->hdr.dstate = VIO_DESC_FREE; 4514 pub_addr->hdr.ack = 0; 4515 4516 priv_addr->dstate = VIO_DESC_FREE; 4517 mutex_exit(&priv_addr->dstate_lock); 4518 4519 D3(vswp, "clearing descp %d : pub state " 4520 "0x%llx : priv state 0x%llx", i, 4521 pub_addr->hdr.dstate, 4522 priv_addr->dstate); 4523 4524 } else { 4525 mutex_exit(&priv_addr->dstate_lock); 4526 4527 if (dring_pkt->dring_process_state != 4528 VIO_DP_STOPPED) { 4529 DERR(vswp, "%s: descriptor %lld at pos " 4530 " 0x%llx not DONE (0x%lx)\n", 4531 __func__, i, pub_addr, 4532 pub_addr->hdr.dstate); 4533 return; 4534 } 4535 } 4536 } 4537 4538 /* 4539 * If our peer is stopping processing descriptors then 4540 * we check to make sure it has processed all the descriptors 4541 * we have updated. If not then we send it a new message 4542 * to prompt it to restart. 4543 */ 4544 if (dring_pkt->dring_process_state == VIO_DP_STOPPED) { 4545 DTRACE_PROBE(stop_process_recv); 4546 D2(vswp, "%s(%lld): got stopping msg : %d : %d", 4547 __func__, ldcp->ldc_id, dring_pkt->start_idx, 4548 dring_pkt->end_idx); 4549 4550 /* 4551 * Check next descriptor in public section of ring. 4552 * If its marked as READY then we need to prompt our 4553 * peer to start processing the ring again. 4554 */ 4555 i = (end + 1) % len; 4556 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 4557 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 4558 4559 /* 4560 * Hold the restart lock across all of this to 4561 * make sure that its not possible for us to 4562 * decide that a msg needs to be sent in the future 4563 * but the sending code having already checked is 4564 * about to exit. 4565 */ 4566 mutex_enter(&dp->restart_lock); 4567 mutex_enter(&priv_addr->dstate_lock); 4568 if (pub_addr->hdr.dstate == VIO_DESC_READY) { 4569 4570 mutex_exit(&priv_addr->dstate_lock); 4571 4572 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 4573 dring_pkt->tag.vio_sid = ldcp->local_session; 4574 4575 mutex_enter(&ldcp->lane_out.seq_lock); 4576 dring_pkt->seq_num = ldcp->lane_out.seq_num++; 4577 mutex_exit(&ldcp->lane_out.seq_lock); 4578 4579 dring_pkt->start_idx = (end + 1) % len; 4580 dring_pkt->end_idx = -1; 4581 4582 D2(vswp, "%s(%lld) : sending restart msg:" 4583 " %d : %d", __func__, ldcp->ldc_id, 4584 dring_pkt->start_idx, 4585 dring_pkt->end_idx); 4586 4587 vsw_send_msg(ldcp, (void *)dring_pkt, 4588 sizeof (vio_dring_msg_t)); 4589 } else { 4590 mutex_exit(&priv_addr->dstate_lock); 4591 dp->restart_reqd = B_TRUE; 4592 } 4593 mutex_exit(&dp->restart_lock); 4594 } 4595 break; 4596 4597 case VIO_SUBTYPE_NACK: 4598 DWARN(vswp, "%s(%lld): VIO_SUBTYPE_NACK", 4599 __func__, ldcp->ldc_id); 4600 /* 4601 * Something is badly wrong if we are getting NACK's 4602 * for our data pkts. So reset the channel. 4603 */ 4604 vsw_restart_handshake(ldcp); 4605 4606 break; 4607 4608 default: 4609 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 4610 ldcp->ldc_id, dring_pkt->tag.vio_subtype); 4611 } 4612 4613 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 4614 } 4615 4616 /* 4617 * VIO_PKT_DATA (a.k.a raw data mode ) 4618 * 4619 * Note - currently not supported. Do nothing. 4620 */ 4621 static void 4622 vsw_process_data_raw_pkt(vsw_ldc_t *ldcp, void *dpkt) 4623 { 4624 _NOTE(ARGUNUSED(dpkt)) 4625 4626 D1(NULL, "%s (%lld): enter\n", __func__, ldcp->ldc_id); 4627 4628 DERR(NULL, "%s (%lld): currently not supported", 4629 __func__, ldcp->ldc_id); 4630 4631 D1(NULL, "%s (%lld): exit\n", __func__, ldcp->ldc_id); 4632 } 4633 4634 #define SND_IBND_DESC_NACK(ldcp, pkt) \ 4635 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 4636 pkt->tag.vio_sid = ldcp->local_session; \ 4637 vsw_send_msg(ldcp, (void *)pkt, sizeof (vio_ibnd_desc_t)); 4638 4639 /* 4640 * Process an in-band descriptor message (most likely from 4641 * OBP). 4642 */ 4643 static void 4644 vsw_process_data_ibnd_pkt(vsw_ldc_t *ldcp, void *pkt) 4645 { 4646 vio_ibnd_desc_t *ibnd_desc; 4647 dring_info_t *dp = NULL; 4648 vsw_private_desc_t *priv_addr = NULL; 4649 vsw_t *vswp = ldcp->ldc_vswp; 4650 mblk_t *mp = NULL; 4651 size_t nbytes = 0; 4652 size_t off = 0; 4653 uint64_t idx = 0; 4654 uint32_t datalen = 0; 4655 uint64_t ncookies = 0; 4656 int rv; 4657 4658 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4659 4660 ibnd_desc = (vio_ibnd_desc_t *)pkt; 4661 4662 switch (ibnd_desc->hdr.tag.vio_subtype) { 4663 case VIO_SUBTYPE_INFO: 4664 D1(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 4665 4666 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 4667 return; 4668 4669 /* 4670 * Data is padded to align on a 8 byte boundary, 4671 * nbytes is actual data length, i.e. minus that 4672 * padding. 4673 */ 4674 datalen = ibnd_desc->nbytes; 4675 4676 D2(vswp, "%s(%lld): processing inband desc : " 4677 ": datalen 0x%lx", __func__, ldcp->ldc_id, datalen); 4678 4679 ncookies = ibnd_desc->ncookies; 4680 4681 /* 4682 * allocb(9F) returns an aligned data block. We 4683 * need to ensure that we ask ldc for an aligned 4684 * number of bytes also. 4685 */ 4686 nbytes = datalen; 4687 if (nbytes & 0x7) { 4688 off = 8 - (nbytes & 0x7); 4689 nbytes += off; 4690 } 4691 4692 mp = allocb(datalen, BPRI_MED); 4693 if (mp == NULL) { 4694 DERR(vswp, "%s(%lld): allocb failed", 4695 __func__, ldcp->ldc_id); 4696 return; 4697 } 4698 4699 rv = ldc_mem_copy(ldcp->ldc_handle, (caddr_t)mp->b_rptr, 4700 0, &nbytes, ibnd_desc->memcookie, (uint64_t)ncookies, 4701 LDC_COPY_IN); 4702 4703 if (rv != 0) { 4704 DERR(vswp, "%s(%d): unable to copy in data from " 4705 "%d cookie(s)", __func__, 4706 ldcp->ldc_id, ncookies); 4707 freemsg(mp); 4708 return; 4709 } else { 4710 D2(vswp, "%s(%d): copied in %ld bytes using %d " 4711 "cookies", __func__, ldcp->ldc_id, nbytes, 4712 ncookies); 4713 } 4714 4715 /* point to the actual end of data */ 4716 mp->b_wptr = mp->b_rptr + datalen; 4717 4718 /* 4719 * We ACK back every in-band descriptor message we process 4720 */ 4721 ibnd_desc->hdr.tag.vio_subtype = VIO_SUBTYPE_ACK; 4722 ibnd_desc->hdr.tag.vio_sid = ldcp->local_session; 4723 vsw_send_msg(ldcp, (void *)ibnd_desc, 4724 sizeof (vio_ibnd_desc_t)); 4725 4726 /* send the packet to be switched */ 4727 vsw_switch_frame(vswp, mp, VSW_VNETPORT, 4728 ldcp->ldc_port, NULL); 4729 4730 break; 4731 4732 case VIO_SUBTYPE_ACK: 4733 D1(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 4734 4735 /* Verify the ACK is valid */ 4736 idx = ibnd_desc->hdr.desc_handle; 4737 4738 if (idx >= VSW_RING_NUM_EL) { 4739 cmn_err(CE_WARN, "%s: corrupted ACK received " 4740 "(idx %ld)", __func__, idx); 4741 return; 4742 } 4743 4744 if ((dp = ldcp->lane_out.dringp) == NULL) { 4745 DERR(vswp, "%s: no dring found", __func__); 4746 return; 4747 } 4748 4749 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 4750 4751 /* move to correct location in ring */ 4752 priv_addr += idx; 4753 4754 /* 4755 * When we sent the in-band message to our peer we 4756 * marked the copy in our private ring as READY. We now 4757 * check that the descriptor we are being ACK'ed for is in 4758 * fact READY, i.e. it is one we have shared with our peer. 4759 */ 4760 mutex_enter(&priv_addr->dstate_lock); 4761 if (priv_addr->dstate != VIO_DESC_READY) { 4762 mutex_exit(&priv_addr->dstate_lock); 4763 cmn_err(CE_WARN, "%s: (%ld) desc at index %ld not " 4764 "READY (0x%lx)", __func__, ldcp->ldc_id, idx, 4765 priv_addr->dstate); 4766 cmn_err(CE_CONT, "%s: bound %d: ncookies %ld\n", 4767 __func__, priv_addr->bound, 4768 priv_addr->ncookies); 4769 cmn_err(CE_CONT, "datalen %ld\n", priv_addr->datalen); 4770 return; 4771 } else { 4772 D2(vswp, "%s: (%lld) freeing descp at %lld", __func__, 4773 ldcp->ldc_id, idx); 4774 4775 /* release resources associated with sent msg */ 4776 bzero(priv_addr->datap, priv_addr->datalen); 4777 priv_addr->datalen = 0; 4778 priv_addr->dstate = VIO_DESC_FREE; 4779 mutex_exit(&priv_addr->dstate_lock); 4780 } 4781 break; 4782 4783 case VIO_SUBTYPE_NACK: 4784 DERR(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 4785 4786 /* 4787 * We should only get a NACK if our peer doesn't like 4788 * something about a message we have sent it. If this 4789 * happens we just release the resources associated with 4790 * the message. (We are relying on higher layers to decide 4791 * whether or not to resend. 4792 */ 4793 4794 /* limit check */ 4795 idx = ibnd_desc->hdr.desc_handle; 4796 4797 if (idx >= VSW_RING_NUM_EL) { 4798 DERR(vswp, "%s: corrupted NACK received (idx %lld)", 4799 __func__, idx); 4800 return; 4801 } 4802 4803 if ((dp = ldcp->lane_out.dringp) == NULL) { 4804 DERR(vswp, "%s: no dring found", __func__); 4805 return; 4806 } 4807 4808 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 4809 4810 /* move to correct location in ring */ 4811 priv_addr += idx; 4812 4813 /* release resources associated with sent msg */ 4814 mutex_enter(&priv_addr->dstate_lock); 4815 bzero(priv_addr->datap, priv_addr->datalen); 4816 priv_addr->datalen = 0; 4817 priv_addr->dstate = VIO_DESC_FREE; 4818 mutex_exit(&priv_addr->dstate_lock); 4819 4820 break; 4821 4822 default: 4823 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 4824 ldcp->ldc_id, ibnd_desc->hdr.tag.vio_subtype); 4825 } 4826 4827 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 4828 } 4829 4830 static void 4831 vsw_process_err_pkt(vsw_ldc_t *ldcp, void *epkt, vio_msg_tag_t tag) 4832 { 4833 _NOTE(ARGUNUSED(epkt)) 4834 4835 vsw_t *vswp = ldcp->ldc_vswp; 4836 uint16_t env = tag.vio_subtype_env; 4837 4838 D1(vswp, "%s (%lld): enter\n", __func__, ldcp->ldc_id); 4839 4840 /* 4841 * Error vio_subtypes have yet to be defined. So for 4842 * the moment we can't do anything. 4843 */ 4844 D2(vswp, "%s: (%x) vio_subtype env", __func__, env); 4845 4846 D1(vswp, "%s (%lld): exit\n", __func__, ldcp->ldc_id); 4847 } 4848 4849 /* 4850 * Switch the given ethernet frame when operating in layer 2 mode. 4851 * 4852 * vswp: pointer to the vsw instance 4853 * mp: pointer to chain of ethernet frame(s) to be switched 4854 * caller: identifies the source of this frame as: 4855 * 1. VSW_VNETPORT - a vsw port (connected to a vnet). 4856 * 2. VSW_PHYSDEV - the physical ethernet device 4857 * 3. VSW_LOCALDEV - vsw configured as a virtual interface 4858 * arg: argument provided by the caller. 4859 * 1. for VNETPORT - pointer to the corresponding vsw_port_t. 4860 * 2. for PHYSDEV - NULL 4861 * 3. for LOCALDEV - pointer to to this vsw_t(self) 4862 */ 4863 void 4864 vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller, 4865 vsw_port_t *arg, mac_resource_handle_t mrh) 4866 { 4867 struct ether_header *ehp; 4868 vsw_port_t *port = NULL; 4869 mblk_t *bp, *ret_m; 4870 mblk_t *nmp = NULL; 4871 vsw_port_list_t *plist = &vswp->plist; 4872 4873 D1(vswp, "%s: enter (caller %d)", __func__, caller); 4874 4875 /* 4876 * PERF: rather than breaking up the chain here, scan it 4877 * to find all mblks heading to same destination and then 4878 * pass that sub-chain to the lower transmit functions. 4879 */ 4880 4881 /* process the chain of packets */ 4882 bp = mp; 4883 while (bp) { 4884 mp = bp; 4885 bp = bp->b_next; 4886 mp->b_next = mp->b_prev = NULL; 4887 ehp = (struct ether_header *)mp->b_rptr; 4888 4889 D2(vswp, "%s: mblk data buffer %lld : actual data size %lld", 4890 __func__, MBLKSIZE(mp), MBLKL(mp)); 4891 4892 READ_ENTER(&vswp->if_lockrw); 4893 if (ether_cmp(&ehp->ether_dhost, &vswp->if_addr) == 0) { 4894 /* 4895 * If destination is VSW_LOCALDEV (vsw as an eth 4896 * interface) and if the device is up & running, 4897 * send the packet up the stack on this host. 4898 * If the virtual interface is down, drop the packet. 4899 */ 4900 if (caller != VSW_LOCALDEV) { 4901 if (vswp->if_state & VSW_IF_UP) { 4902 RW_EXIT(&vswp->if_lockrw); 4903 mac_rx(vswp->if_mh, mrh, mp); 4904 } else { 4905 RW_EXIT(&vswp->if_lockrw); 4906 /* Interface down, drop pkt */ 4907 freemsg(mp); 4908 } 4909 } else { 4910 RW_EXIT(&vswp->if_lockrw); 4911 freemsg(mp); 4912 } 4913 continue; 4914 } 4915 RW_EXIT(&vswp->if_lockrw); 4916 4917 READ_ENTER(&plist->lockrw); 4918 port = vsw_lookup_fdb(vswp, ehp); 4919 if (port) { 4920 /* 4921 * Mark the port as in-use. 4922 */ 4923 mutex_enter(&port->ref_lock); 4924 port->ref_cnt++; 4925 mutex_exit(&port->ref_lock); 4926 RW_EXIT(&plist->lockrw); 4927 4928 /* 4929 * If plumbed and in promisc mode then copy msg 4930 * and send up the stack. 4931 */ 4932 READ_ENTER(&vswp->if_lockrw); 4933 if (VSW_U_P(vswp->if_state)) { 4934 RW_EXIT(&vswp->if_lockrw); 4935 nmp = copymsg(mp); 4936 if (nmp) 4937 mac_rx(vswp->if_mh, mrh, nmp); 4938 } else { 4939 RW_EXIT(&vswp->if_lockrw); 4940 } 4941 4942 /* 4943 * If the destination is in FDB, the packet 4944 * should be forwarded to the correponding 4945 * vsw_port (connected to a vnet device - 4946 * VSW_VNETPORT) 4947 */ 4948 (void) vsw_portsend(port, mp); 4949 4950 /* 4951 * Decrement use count in port and check if 4952 * should wake delete thread. 4953 */ 4954 mutex_enter(&port->ref_lock); 4955 port->ref_cnt--; 4956 if (port->ref_cnt == 0) 4957 cv_signal(&port->ref_cv); 4958 mutex_exit(&port->ref_lock); 4959 } else { 4960 RW_EXIT(&plist->lockrw); 4961 /* 4962 * Destination not in FDB. 4963 * 4964 * If the destination is broadcast or 4965 * multicast forward the packet to all 4966 * (VNETPORTs, PHYSDEV, LOCALDEV), 4967 * except the caller. 4968 */ 4969 if (IS_BROADCAST(ehp)) { 4970 D3(vswp, "%s: BROADCAST pkt", __func__); 4971 (void) vsw_forward_all(vswp, mp, 4972 caller, arg); 4973 } else if (IS_MULTICAST(ehp)) { 4974 D3(vswp, "%s: MULTICAST pkt", __func__); 4975 (void) vsw_forward_grp(vswp, mp, 4976 caller, arg); 4977 } else { 4978 /* 4979 * If the destination is unicast, and came 4980 * from either a logical network device or 4981 * the switch itself when it is plumbed, then 4982 * send it out on the physical device and also 4983 * up the stack if the logical interface is 4984 * in promiscious mode. 4985 * 4986 * NOTE: The assumption here is that if we 4987 * cannot find the destination in our fdb, its 4988 * a unicast address, and came from either a 4989 * vnet or down the stack (when plumbed) it 4990 * must be destinded for an ethernet device 4991 * outside our ldoms. 4992 */ 4993 if (caller == VSW_VNETPORT) { 4994 READ_ENTER(&vswp->if_lockrw); 4995 if (VSW_U_P(vswp->if_state)) { 4996 RW_EXIT(&vswp->if_lockrw); 4997 nmp = copymsg(mp); 4998 if (nmp) 4999 mac_rx(vswp->if_mh, 5000 mrh, nmp); 5001 } else { 5002 RW_EXIT(&vswp->if_lockrw); 5003 } 5004 if ((ret_m = vsw_tx_msg(vswp, mp)) 5005 != NULL) { 5006 DERR(vswp, "%s: drop mblks to " 5007 "phys dev", __func__); 5008 freemsg(ret_m); 5009 } 5010 5011 } else if (caller == VSW_PHYSDEV) { 5012 /* 5013 * Pkt seen because card in promisc 5014 * mode. Send up stack if plumbed in 5015 * promisc mode, else drop it. 5016 */ 5017 READ_ENTER(&vswp->if_lockrw); 5018 if (VSW_U_P(vswp->if_state)) { 5019 RW_EXIT(&vswp->if_lockrw); 5020 mac_rx(vswp->if_mh, mrh, mp); 5021 } else { 5022 RW_EXIT(&vswp->if_lockrw); 5023 freemsg(mp); 5024 } 5025 5026 } else if (caller == VSW_LOCALDEV) { 5027 /* 5028 * Pkt came down the stack, send out 5029 * over physical device. 5030 */ 5031 if ((ret_m = vsw_tx_msg(vswp, mp)) 5032 != NULL) { 5033 DERR(vswp, "%s: drop mblks to " 5034 "phys dev", __func__); 5035 freemsg(ret_m); 5036 } 5037 } 5038 } 5039 } 5040 } 5041 D1(vswp, "%s: exit\n", __func__); 5042 } 5043 5044 /* 5045 * Switch ethernet frame when in layer 3 mode (i.e. using IP 5046 * layer to do the routing). 5047 * 5048 * There is a large amount of overlap between this function and 5049 * vsw_switch_l2_frame. At some stage we need to revisit and refactor 5050 * both these functions. 5051 */ 5052 void 5053 vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller, 5054 vsw_port_t *arg, mac_resource_handle_t mrh) 5055 { 5056 struct ether_header *ehp; 5057 vsw_port_t *port = NULL; 5058 mblk_t *bp = NULL; 5059 vsw_port_list_t *plist = &vswp->plist; 5060 5061 D1(vswp, "%s: enter (caller %d)", __func__, caller); 5062 5063 /* 5064 * In layer 3 mode should only ever be switching packets 5065 * between IP layer and vnet devices. So make sure thats 5066 * who is invoking us. 5067 */ 5068 if ((caller != VSW_LOCALDEV) && (caller != VSW_VNETPORT)) { 5069 DERR(vswp, "%s: unexpected caller (%d)", __func__, caller); 5070 freemsgchain(mp); 5071 return; 5072 } 5073 5074 /* process the chain of packets */ 5075 bp = mp; 5076 while (bp) { 5077 mp = bp; 5078 bp = bp->b_next; 5079 mp->b_next = mp->b_prev = NULL; 5080 ehp = (struct ether_header *)mp->b_rptr; 5081 5082 D2(vswp, "%s: mblk data buffer %lld : actual data size %lld", 5083 __func__, MBLKSIZE(mp), MBLKL(mp)); 5084 5085 READ_ENTER(&plist->lockrw); 5086 port = vsw_lookup_fdb(vswp, ehp); 5087 if (port) { 5088 /* 5089 * Mark port as in-use. 5090 */ 5091 mutex_enter(&port->ref_lock); 5092 port->ref_cnt++; 5093 mutex_exit(&port->ref_lock); 5094 RW_EXIT(&plist->lockrw); 5095 5096 D2(vswp, "%s: sending to target port", __func__); 5097 (void) vsw_portsend(port, mp); 5098 5099 /* 5100 * Finished with port so decrement ref count and 5101 * check if should wake delete thread. 5102 */ 5103 mutex_enter(&port->ref_lock); 5104 port->ref_cnt--; 5105 if (port->ref_cnt == 0) 5106 cv_signal(&port->ref_cv); 5107 mutex_exit(&port->ref_lock); 5108 } else { 5109 RW_EXIT(&plist->lockrw); 5110 /* 5111 * Destination not in FDB 5112 * 5113 * If the destination is broadcast or 5114 * multicast forward the packet to all 5115 * (VNETPORTs, PHYSDEV, LOCALDEV), 5116 * except the caller. 5117 */ 5118 if (IS_BROADCAST(ehp)) { 5119 D2(vswp, "%s: BROADCAST pkt", __func__); 5120 (void) vsw_forward_all(vswp, mp, 5121 caller, arg); 5122 } else if (IS_MULTICAST(ehp)) { 5123 D2(vswp, "%s: MULTICAST pkt", __func__); 5124 (void) vsw_forward_grp(vswp, mp, 5125 caller, arg); 5126 } else { 5127 /* 5128 * Unicast pkt from vnet that we don't have 5129 * an FDB entry for, so must be destinded for 5130 * the outside world. Attempt to send up to the 5131 * IP layer to allow it to deal with it. 5132 */ 5133 if (caller == VSW_VNETPORT) { 5134 READ_ENTER(&vswp->if_lockrw); 5135 if (vswp->if_state & VSW_IF_UP) { 5136 RW_EXIT(&vswp->if_lockrw); 5137 D2(vswp, "%s: sending up", 5138 __func__); 5139 mac_rx(vswp->if_mh, mrh, mp); 5140 } else { 5141 RW_EXIT(&vswp->if_lockrw); 5142 /* Interface down, drop pkt */ 5143 D2(vswp, "%s I/F down", 5144 __func__); 5145 freemsg(mp); 5146 } 5147 } 5148 } 5149 } 5150 } 5151 5152 D1(vswp, "%s: exit", __func__); 5153 } 5154 5155 /* 5156 * Forward the ethernet frame to all ports (VNETPORTs, PHYSDEV, LOCALDEV), 5157 * except the caller (port on which frame arrived). 5158 */ 5159 static int 5160 vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg) 5161 { 5162 vsw_port_list_t *plist = &vswp->plist; 5163 vsw_port_t *portp; 5164 mblk_t *nmp = NULL; 5165 mblk_t *ret_m = NULL; 5166 int skip_port = 0; 5167 5168 D1(vswp, "vsw_forward_all: enter\n"); 5169 5170 /* 5171 * Broadcast message from inside ldoms so send to outside 5172 * world if in either of layer 2 modes. 5173 */ 5174 if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) || 5175 (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) && 5176 ((caller == VSW_LOCALDEV) || (caller == VSW_VNETPORT))) { 5177 5178 nmp = dupmsg(mp); 5179 if (nmp) { 5180 if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) { 5181 DERR(vswp, "%s: dropping pkt(s) " 5182 "consisting of %ld bytes of data for" 5183 " physical device", __func__, MBLKL(ret_m)); 5184 freemsg(ret_m); 5185 } 5186 } 5187 } 5188 5189 if (caller == VSW_VNETPORT) 5190 skip_port = 1; 5191 5192 /* 5193 * Broadcast message from other vnet (layer 2 or 3) or outside 5194 * world (layer 2 only), send up stack if plumbed. 5195 */ 5196 if ((caller == VSW_PHYSDEV) || (caller == VSW_VNETPORT)) { 5197 READ_ENTER(&vswp->if_lockrw); 5198 if (vswp->if_state & VSW_IF_UP) { 5199 RW_EXIT(&vswp->if_lockrw); 5200 nmp = copymsg(mp); 5201 if (nmp) 5202 mac_rx(vswp->if_mh, NULL, nmp); 5203 } else { 5204 RW_EXIT(&vswp->if_lockrw); 5205 } 5206 } 5207 5208 /* send it to all VNETPORTs */ 5209 READ_ENTER(&plist->lockrw); 5210 for (portp = plist->head; portp != NULL; portp = portp->p_next) { 5211 D2(vswp, "vsw_forward_all: port %d", portp->p_instance); 5212 /* 5213 * Caution ! - don't reorder these two checks as arg 5214 * will be NULL if the caller is PHYSDEV. skip_port is 5215 * only set if caller is VNETPORT. 5216 */ 5217 if ((skip_port) && (portp == arg)) 5218 continue; 5219 else { 5220 nmp = dupmsg(mp); 5221 if (nmp) { 5222 (void) vsw_portsend(portp, nmp); 5223 } else { 5224 DERR(vswp, "vsw_forward_all: nmp NULL"); 5225 } 5226 } 5227 } 5228 RW_EXIT(&plist->lockrw); 5229 5230 freemsg(mp); 5231 5232 D1(vswp, "vsw_forward_all: exit\n"); 5233 return (0); 5234 } 5235 5236 /* 5237 * Forward pkts to any devices or interfaces which have registered 5238 * an interest in them (i.e. multicast groups). 5239 */ 5240 static int 5241 vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg) 5242 { 5243 struct ether_header *ehp = (struct ether_header *)mp->b_rptr; 5244 mfdb_ent_t *entp = NULL; 5245 mfdb_ent_t *tpp = NULL; 5246 vsw_port_t *port; 5247 uint64_t key = 0; 5248 mblk_t *nmp = NULL; 5249 mblk_t *ret_m = NULL; 5250 boolean_t check_if = B_TRUE; 5251 5252 /* 5253 * Convert address to hash table key 5254 */ 5255 KEY_HASH(key, ehp->ether_dhost); 5256 5257 D1(vswp, "%s: key 0x%llx", __func__, key); 5258 5259 /* 5260 * If pkt came from either a vnet or down the stack (if we are 5261 * plumbed) and we are in layer 2 mode, then we send the pkt out 5262 * over the physical adapter, and then check to see if any other 5263 * vnets are interested in it. 5264 */ 5265 if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) || 5266 (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) && 5267 ((caller == VSW_VNETPORT) || (caller == VSW_LOCALDEV))) { 5268 nmp = dupmsg(mp); 5269 if (nmp) { 5270 if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) { 5271 DERR(vswp, "%s: dropping pkt(s) " 5272 "consisting of %ld bytes of " 5273 "data for physical device", 5274 __func__, MBLKL(ret_m)); 5275 freemsg(ret_m); 5276 } 5277 } 5278 } 5279 5280 READ_ENTER(&vswp->mfdbrw); 5281 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)key, 5282 (mod_hash_val_t *)&entp) != 0) { 5283 D3(vswp, "%s: no table entry found for addr 0x%llx", 5284 __func__, key); 5285 } else { 5286 /* 5287 * Send to list of devices associated with this address... 5288 */ 5289 for (tpp = entp; tpp != NULL; tpp = tpp->nextp) { 5290 5291 /* dont send to ourselves */ 5292 if ((caller == VSW_VNETPORT) && 5293 (tpp->d_addr == (void *)arg)) { 5294 port = (vsw_port_t *)tpp->d_addr; 5295 D3(vswp, "%s: not sending to ourselves" 5296 " : port %d", __func__, 5297 port->p_instance); 5298 continue; 5299 5300 } else if ((caller == VSW_LOCALDEV) && 5301 (tpp->d_type == VSW_LOCALDEV)) { 5302 D3(vswp, "%s: not sending back up stack", 5303 __func__); 5304 continue; 5305 } 5306 5307 if (tpp->d_type == VSW_VNETPORT) { 5308 port = (vsw_port_t *)tpp->d_addr; 5309 D3(vswp, "%s: sending to port %ld for " 5310 " addr 0x%llx", __func__, 5311 port->p_instance, key); 5312 5313 nmp = dupmsg(mp); 5314 if (nmp) 5315 (void) vsw_portsend(port, nmp); 5316 } else { 5317 if (vswp->if_state & VSW_IF_UP) { 5318 nmp = copymsg(mp); 5319 if (nmp) 5320 mac_rx(vswp->if_mh, NULL, nmp); 5321 check_if = B_FALSE; 5322 D3(vswp, "%s: sending up stack" 5323 " for addr 0x%llx", __func__, 5324 key); 5325 } 5326 } 5327 } 5328 } 5329 5330 RW_EXIT(&vswp->mfdbrw); 5331 5332 /* 5333 * If the pkt came from either a vnet or from physical device, 5334 * and if we havent already sent the pkt up the stack then we 5335 * check now if we can/should (i.e. the interface is plumbed 5336 * and in promisc mode). 5337 */ 5338 if ((check_if) && 5339 ((caller == VSW_VNETPORT) || (caller == VSW_PHYSDEV))) { 5340 READ_ENTER(&vswp->if_lockrw); 5341 if (VSW_U_P(vswp->if_state)) { 5342 RW_EXIT(&vswp->if_lockrw); 5343 D3(vswp, "%s: (caller %d) finally sending up stack" 5344 " for addr 0x%llx", __func__, caller, key); 5345 nmp = copymsg(mp); 5346 if (nmp) 5347 mac_rx(vswp->if_mh, NULL, nmp); 5348 } else { 5349 RW_EXIT(&vswp->if_lockrw); 5350 } 5351 } 5352 5353 freemsg(mp); 5354 5355 D1(vswp, "%s: exit", __func__); 5356 5357 return (0); 5358 } 5359 5360 /* transmit the packet over the given port */ 5361 static int 5362 vsw_portsend(vsw_port_t *port, mblk_t *mp) 5363 { 5364 vsw_ldc_list_t *ldcl = &port->p_ldclist; 5365 vsw_ldc_t *ldcp; 5366 int status = 0; 5367 5368 5369 READ_ENTER(&ldcl->lockrw); 5370 /* 5371 * Note for now, we have a single channel. 5372 */ 5373 ldcp = ldcl->head; 5374 if (ldcp == NULL) { 5375 DERR(port->p_vswp, "vsw_portsend: no ldc: dropping packet\n"); 5376 freemsg(mp); 5377 RW_EXIT(&ldcl->lockrw); 5378 return (1); 5379 } 5380 5381 /* 5382 * Send the message out using the appropriate 5383 * transmit function which will free mblock when it 5384 * is finished with it. 5385 */ 5386 mutex_enter(&port->tx_lock); 5387 if (port->transmit != NULL) 5388 status = (*port->transmit)(ldcp, mp); 5389 else { 5390 freemsg(mp); 5391 } 5392 mutex_exit(&port->tx_lock); 5393 5394 RW_EXIT(&ldcl->lockrw); 5395 5396 return (status); 5397 } 5398 5399 /* 5400 * Send packet out via descriptor ring to a logical device. 5401 */ 5402 static int 5403 vsw_dringsend(vsw_ldc_t *ldcp, mblk_t *mp) 5404 { 5405 vio_dring_msg_t dring_pkt; 5406 dring_info_t *dp = NULL; 5407 vsw_private_desc_t *priv_desc = NULL; 5408 vnet_public_desc_t *pub = NULL; 5409 vsw_t *vswp = ldcp->ldc_vswp; 5410 mblk_t *bp; 5411 size_t n, size; 5412 caddr_t bufp; 5413 int idx; 5414 int status = LDC_TX_SUCCESS; 5415 5416 D1(vswp, "%s(%lld): enter\n", __func__, ldcp->ldc_id); 5417 5418 /* TODO: make test a macro */ 5419 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 5420 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 5421 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 5422 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 5423 ldcp->lane_out.lstate); 5424 freemsg(mp); 5425 return (LDC_TX_FAILURE); 5426 } 5427 5428 /* 5429 * Note - using first ring only, this may change 5430 * in the future. 5431 */ 5432 if ((dp = ldcp->lane_out.dringp) == NULL) { 5433 DERR(vswp, "%s(%lld): no dring for outbound lane on" 5434 " channel %d", __func__, ldcp->ldc_id, ldcp->ldc_id); 5435 freemsg(mp); 5436 return (LDC_TX_FAILURE); 5437 } 5438 5439 size = msgsize(mp); 5440 if (size > (size_t)ETHERMAX) { 5441 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 5442 ldcp->ldc_id, size); 5443 freemsg(mp); 5444 return (LDC_TX_FAILURE); 5445 } 5446 5447 /* 5448 * Find a free descriptor 5449 * 5450 * Note: for the moment we are assuming that we will only 5451 * have one dring going from the switch to each of its 5452 * peers. This may change in the future. 5453 */ 5454 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 5455 D2(vswp, "%s(%lld): no descriptor available for ring " 5456 "at 0x%llx", __func__, ldcp->ldc_id, dp); 5457 5458 /* nothing more we can do */ 5459 status = LDC_TX_NORESOURCES; 5460 goto vsw_dringsend_free_exit; 5461 } else { 5462 D2(vswp, "%s(%lld): free private descriptor found at pos " 5463 "%ld addr 0x%llx\n", __func__, ldcp->ldc_id, idx, 5464 priv_desc); 5465 } 5466 5467 /* copy data into the descriptor */ 5468 bufp = priv_desc->datap; 5469 bufp += VNET_IPALIGN; 5470 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 5471 n = MBLKL(bp); 5472 bcopy(bp->b_rptr, bufp, n); 5473 bufp += n; 5474 } 5475 5476 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 5477 5478 pub = priv_desc->descp; 5479 pub->nbytes = priv_desc->datalen; 5480 5481 mutex_enter(&priv_desc->dstate_lock); 5482 pub->hdr.dstate = VIO_DESC_READY; 5483 mutex_exit(&priv_desc->dstate_lock); 5484 5485 /* 5486 * Determine whether or not we need to send a message to our 5487 * peer prompting them to read our newly updated descriptor(s). 5488 */ 5489 mutex_enter(&dp->restart_lock); 5490 if (dp->restart_reqd) { 5491 dp->restart_reqd = B_FALSE; 5492 mutex_exit(&dp->restart_lock); 5493 5494 /* 5495 * Send a vio_dring_msg to peer to prompt them to read 5496 * the updated descriptor ring. 5497 */ 5498 dring_pkt.tag.vio_msgtype = VIO_TYPE_DATA; 5499 dring_pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 5500 dring_pkt.tag.vio_subtype_env = VIO_DRING_DATA; 5501 dring_pkt.tag.vio_sid = ldcp->local_session; 5502 5503 /* Note - for now using first ring */ 5504 dring_pkt.dring_ident = dp->ident; 5505 5506 mutex_enter(&ldcp->lane_out.seq_lock); 5507 dring_pkt.seq_num = ldcp->lane_out.seq_num++; 5508 mutex_exit(&ldcp->lane_out.seq_lock); 5509 5510 /* 5511 * If last_ack_recv is -1 then we know we've not 5512 * received any ack's yet, so this must be the first 5513 * msg sent, so set the start to the begining of the ring. 5514 */ 5515 mutex_enter(&dp->dlock); 5516 if (dp->last_ack_recv == -1) { 5517 dring_pkt.start_idx = 0; 5518 } else { 5519 dring_pkt.start_idx = (dp->last_ack_recv + 1) % 5520 dp->num_descriptors; 5521 } 5522 dring_pkt.end_idx = -1; 5523 mutex_exit(&dp->dlock); 5524 5525 D3(vswp, "%s(%lld): dring 0x%llx : ident 0x%llx\n", __func__, 5526 ldcp->ldc_id, dp, dring_pkt.dring_ident); 5527 D3(vswp, "%s(%lld): start %lld : end %lld : seq %lld\n", 5528 __func__, ldcp->ldc_id, dring_pkt.start_idx, 5529 dring_pkt.end_idx, dring_pkt.seq_num); 5530 5531 vsw_send_msg(ldcp, (void *)&dring_pkt, 5532 sizeof (vio_dring_msg_t)); 5533 } else { 5534 mutex_exit(&dp->restart_lock); 5535 D2(vswp, "%s(%lld): updating descp %d", __func__, 5536 ldcp->ldc_id, idx); 5537 } 5538 5539 vsw_dringsend_free_exit: 5540 5541 /* free the message block */ 5542 freemsg(mp); 5543 5544 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 5545 return (status); 5546 } 5547 5548 /* 5549 * Send an in-band descriptor message over ldc. 5550 */ 5551 static int 5552 vsw_descrsend(vsw_ldc_t *ldcp, mblk_t *mp) 5553 { 5554 vsw_t *vswp = ldcp->ldc_vswp; 5555 vio_ibnd_desc_t ibnd_msg; 5556 vsw_private_desc_t *priv_desc = NULL; 5557 dring_info_t *dp = NULL; 5558 size_t n, size = 0; 5559 caddr_t bufp; 5560 mblk_t *bp; 5561 int idx, i; 5562 int status = LDC_TX_SUCCESS; 5563 static int warn_msg = 1; 5564 5565 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 5566 5567 ASSERT(mp != NULL); 5568 5569 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 5570 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 5571 DERR(vswp, "%s(%lld) status(%d) state (0x%llx), dropping pkt", 5572 __func__, ldcp->ldc_id, ldcp->ldc_status, 5573 ldcp->lane_out.lstate); 5574 freemsg(mp); 5575 return (LDC_TX_FAILURE); 5576 } 5577 5578 /* 5579 * only expect single dring to exist, which we use 5580 * as an internal buffer, rather than a transfer channel. 5581 */ 5582 if ((dp = ldcp->lane_out.dringp) == NULL) { 5583 DERR(vswp, "%s(%lld): no dring for outbound lane", 5584 __func__, ldcp->ldc_id); 5585 DERR(vswp, "%s(%lld) status(%d) state (0x%llx)", 5586 __func__, ldcp->ldc_id, ldcp->ldc_status, 5587 ldcp->lane_out.lstate); 5588 freemsg(mp); 5589 return (LDC_TX_FAILURE); 5590 } 5591 5592 size = msgsize(mp); 5593 if (size > (size_t)ETHERMAX) { 5594 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 5595 ldcp->ldc_id, size); 5596 freemsg(mp); 5597 return (LDC_TX_FAILURE); 5598 } 5599 5600 /* 5601 * Find a free descriptor in our buffer ring 5602 */ 5603 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 5604 if (warn_msg) { 5605 DERR(vswp, "%s(%lld): no descriptor available for ring " 5606 "at 0x%llx", __func__, ldcp->ldc_id, dp); 5607 warn_msg = 0; 5608 } 5609 5610 /* nothing more we can do */ 5611 status = LDC_TX_NORESOURCES; 5612 goto vsw_descrsend_free_exit; 5613 } else { 5614 D2(vswp, "%s(%lld): free private descriptor found at pos " 5615 "%ld addr 0x%x\n", __func__, ldcp->ldc_id, idx, 5616 priv_desc); 5617 warn_msg = 1; 5618 } 5619 5620 /* copy data into the descriptor */ 5621 bufp = priv_desc->datap; 5622 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 5623 n = MBLKL(bp); 5624 bcopy(bp->b_rptr, bufp, n); 5625 bufp += n; 5626 } 5627 5628 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 5629 5630 /* create and send the in-band descp msg */ 5631 ibnd_msg.hdr.tag.vio_msgtype = VIO_TYPE_DATA; 5632 ibnd_msg.hdr.tag.vio_subtype = VIO_SUBTYPE_INFO; 5633 ibnd_msg.hdr.tag.vio_subtype_env = VIO_DESC_DATA; 5634 ibnd_msg.hdr.tag.vio_sid = ldcp->local_session; 5635 5636 mutex_enter(&ldcp->lane_out.seq_lock); 5637 ibnd_msg.hdr.seq_num = ldcp->lane_out.seq_num++; 5638 mutex_exit(&ldcp->lane_out.seq_lock); 5639 5640 /* 5641 * Copy the mem cookies describing the data from the 5642 * private region of the descriptor ring into the inband 5643 * descriptor. 5644 */ 5645 for (i = 0; i < priv_desc->ncookies; i++) { 5646 bcopy(&priv_desc->memcookie[i], &ibnd_msg.memcookie[i], 5647 sizeof (ldc_mem_cookie_t)); 5648 } 5649 5650 ibnd_msg.hdr.desc_handle = idx; 5651 ibnd_msg.ncookies = priv_desc->ncookies; 5652 ibnd_msg.nbytes = size; 5653 5654 vsw_send_msg(ldcp, (void *)&ibnd_msg, sizeof (vio_ibnd_desc_t)); 5655 5656 vsw_descrsend_free_exit: 5657 5658 /* free the allocated message blocks */ 5659 freemsg(mp); 5660 5661 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 5662 return (status); 5663 } 5664 5665 static void 5666 vsw_send_ver(vsw_ldc_t *ldcp) 5667 { 5668 vsw_t *vswp = ldcp->ldc_vswp; 5669 lane_t *lp = &ldcp->lane_out; 5670 vio_ver_msg_t ver_msg; 5671 5672 D1(vswp, "%s enter", __func__); 5673 5674 ver_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 5675 ver_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 5676 ver_msg.tag.vio_subtype_env = VIO_VER_INFO; 5677 ver_msg.tag.vio_sid = ldcp->local_session; 5678 5679 ver_msg.ver_major = vsw_versions[0].ver_major; 5680 ver_msg.ver_minor = vsw_versions[0].ver_minor; 5681 ver_msg.dev_class = VDEV_NETWORK_SWITCH; 5682 5683 lp->lstate |= VSW_VER_INFO_SENT; 5684 lp->ver_major = ver_msg.ver_major; 5685 lp->ver_minor = ver_msg.ver_minor; 5686 5687 DUMP_TAG(ver_msg.tag); 5688 5689 vsw_send_msg(ldcp, &ver_msg, sizeof (vio_ver_msg_t)); 5690 5691 D1(vswp, "%s (%d): exit", __func__, ldcp->ldc_id); 5692 } 5693 5694 static void 5695 vsw_send_attr(vsw_ldc_t *ldcp) 5696 { 5697 vsw_t *vswp = ldcp->ldc_vswp; 5698 lane_t *lp = &ldcp->lane_out; 5699 vnet_attr_msg_t attr_msg; 5700 5701 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 5702 5703 /* 5704 * Subtype is set to INFO by default 5705 */ 5706 attr_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 5707 attr_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 5708 attr_msg.tag.vio_subtype_env = VIO_ATTR_INFO; 5709 attr_msg.tag.vio_sid = ldcp->local_session; 5710 5711 /* payload copied from default settings for lane */ 5712 attr_msg.mtu = lp->mtu; 5713 attr_msg.addr_type = lp->addr_type; 5714 attr_msg.xfer_mode = lp->xfer_mode; 5715 attr_msg.ack_freq = lp->xfer_mode; 5716 5717 READ_ENTER(&vswp->if_lockrw); 5718 bcopy(&(vswp->if_addr), &(attr_msg.addr), ETHERADDRL); 5719 RW_EXIT(&vswp->if_lockrw); 5720 5721 ldcp->lane_out.lstate |= VSW_ATTR_INFO_SENT; 5722 5723 DUMP_TAG(attr_msg.tag); 5724 5725 vsw_send_msg(ldcp, &attr_msg, sizeof (vnet_attr_msg_t)); 5726 5727 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 5728 } 5729 5730 /* 5731 * Create dring info msg (which also results in the creation of 5732 * a dring). 5733 */ 5734 static vio_dring_reg_msg_t * 5735 vsw_create_dring_info_pkt(vsw_ldc_t *ldcp) 5736 { 5737 vio_dring_reg_msg_t *mp; 5738 dring_info_t *dp; 5739 vsw_t *vswp = ldcp->ldc_vswp; 5740 5741 D1(vswp, "vsw_create_dring_info_pkt enter\n"); 5742 5743 /* 5744 * If we can't create a dring, obviously no point sending 5745 * a message. 5746 */ 5747 if ((dp = vsw_create_dring(ldcp)) == NULL) 5748 return (NULL); 5749 5750 mp = kmem_zalloc(sizeof (vio_dring_reg_msg_t), KM_SLEEP); 5751 5752 mp->tag.vio_msgtype = VIO_TYPE_CTRL; 5753 mp->tag.vio_subtype = VIO_SUBTYPE_INFO; 5754 mp->tag.vio_subtype_env = VIO_DRING_REG; 5755 mp->tag.vio_sid = ldcp->local_session; 5756 5757 /* payload */ 5758 mp->num_descriptors = dp->num_descriptors; 5759 mp->descriptor_size = dp->descriptor_size; 5760 mp->options = dp->options; 5761 mp->ncookies = dp->ncookies; 5762 bcopy(&dp->cookie[0], &mp->cookie[0], sizeof (ldc_mem_cookie_t)); 5763 5764 mp->dring_ident = 0; 5765 5766 D1(vswp, "vsw_create_dring_info_pkt exit\n"); 5767 5768 return (mp); 5769 } 5770 5771 static void 5772 vsw_send_dring_info(vsw_ldc_t *ldcp) 5773 { 5774 vio_dring_reg_msg_t *dring_msg; 5775 vsw_t *vswp = ldcp->ldc_vswp; 5776 5777 D1(vswp, "%s: (%ld) enter", __func__, ldcp->ldc_id); 5778 5779 dring_msg = vsw_create_dring_info_pkt(ldcp); 5780 if (dring_msg == NULL) { 5781 cmn_err(CE_WARN, "vsw_send_dring_info: error creating msg"); 5782 return; 5783 } 5784 5785 ldcp->lane_out.lstate |= VSW_DRING_INFO_SENT; 5786 5787 DUMP_TAG_PTR((vio_msg_tag_t *)dring_msg); 5788 5789 vsw_send_msg(ldcp, dring_msg, 5790 sizeof (vio_dring_reg_msg_t)); 5791 5792 kmem_free(dring_msg, sizeof (vio_dring_reg_msg_t)); 5793 5794 D1(vswp, "%s: (%ld) exit", __func__, ldcp->ldc_id); 5795 } 5796 5797 static void 5798 vsw_send_rdx(vsw_ldc_t *ldcp) 5799 { 5800 vsw_t *vswp = ldcp->ldc_vswp; 5801 vio_rdx_msg_t rdx_msg; 5802 5803 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 5804 5805 rdx_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 5806 rdx_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 5807 rdx_msg.tag.vio_subtype_env = VIO_RDX; 5808 rdx_msg.tag.vio_sid = ldcp->local_session; 5809 5810 ldcp->lane_out.lstate |= VSW_RDX_INFO_SENT; 5811 5812 DUMP_TAG(rdx_msg.tag); 5813 5814 vsw_send_msg(ldcp, &rdx_msg, sizeof (vio_rdx_msg_t)); 5815 5816 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 5817 } 5818 5819 /* 5820 * Generic routine to send message out over ldc channel. 5821 */ 5822 static void 5823 vsw_send_msg(vsw_ldc_t *ldcp, void *msgp, int size) 5824 { 5825 int rv; 5826 size_t msglen = size; 5827 vio_msg_tag_t *tag = (vio_msg_tag_t *)msgp; 5828 vsw_t *vswp = ldcp->ldc_vswp; 5829 5830 D1(vswp, "vsw_send_msg (%lld) enter : sending %d bytes", 5831 ldcp->ldc_id, size); 5832 5833 D2(vswp, "send_msg: type 0x%llx", tag->vio_msgtype); 5834 D2(vswp, "send_msg: stype 0x%llx", tag->vio_subtype); 5835 D2(vswp, "send_msg: senv 0x%llx", tag->vio_subtype_env); 5836 5837 mutex_enter(&ldcp->ldc_txlock); 5838 do { 5839 msglen = size; 5840 rv = ldc_write(ldcp->ldc_handle, (caddr_t)msgp, &msglen); 5841 } while (rv == EWOULDBLOCK && --vsw_wretries > 0); 5842 5843 mutex_exit(&ldcp->ldc_txlock); 5844 5845 if ((rv != 0) || (msglen != size)) { 5846 DERR(vswp, "vsw_send_msg:ldc_write failed: chan(%lld) " 5847 "rv(%d) size (%d) msglen(%d)\n", ldcp->ldc_id, 5848 rv, size, msglen); 5849 } 5850 5851 D1(vswp, "vsw_send_msg (%lld) exit : sent %d bytes", 5852 ldcp->ldc_id, msglen); 5853 } 5854 5855 /* 5856 * Add an entry into FDB, for the given mac address and port_id. 5857 * Returns 0 on success, 1 on failure. 5858 * 5859 * Lock protecting FDB must be held by calling process. 5860 */ 5861 static int 5862 vsw_add_fdb(vsw_t *vswp, vsw_port_t *port) 5863 { 5864 uint64_t addr = 0; 5865 5866 D1(vswp, "%s: enter", __func__); 5867 5868 KEY_HASH(addr, port->p_macaddr); 5869 5870 D2(vswp, "%s: key = 0x%llx", __func__, addr); 5871 5872 /* 5873 * Note: duplicate keys will be rejected by mod_hash. 5874 */ 5875 if (mod_hash_insert(vswp->fdb, (mod_hash_key_t)addr, 5876 (mod_hash_val_t)port) != 0) { 5877 DERR(vswp, "%s: unable to add entry into fdb.", __func__); 5878 return (1); 5879 } 5880 5881 D1(vswp, "%s: exit", __func__); 5882 return (0); 5883 } 5884 5885 /* 5886 * Remove an entry from FDB. 5887 * Returns 0 on success, 1 on failure. 5888 */ 5889 static int 5890 vsw_del_fdb(vsw_t *vswp, vsw_port_t *port) 5891 { 5892 uint64_t addr = 0; 5893 5894 D1(vswp, "%s: enter", __func__); 5895 5896 KEY_HASH(addr, port->p_macaddr); 5897 5898 D2(vswp, "%s: key = 0x%llx", __func__, addr); 5899 5900 (void) mod_hash_destroy(vswp->fdb, (mod_hash_val_t)addr); 5901 5902 D1(vswp, "%s: enter", __func__); 5903 5904 return (0); 5905 } 5906 5907 /* 5908 * Search fdb for a given mac address. 5909 * Returns pointer to the entry if found, else returns NULL. 5910 */ 5911 static vsw_port_t * 5912 vsw_lookup_fdb(vsw_t *vswp, struct ether_header *ehp) 5913 { 5914 uint64_t key = 0; 5915 vsw_port_t *port = NULL; 5916 5917 D1(vswp, "%s: enter", __func__); 5918 5919 KEY_HASH(key, ehp->ether_dhost); 5920 5921 D2(vswp, "%s: key = 0x%llx", __func__, key); 5922 5923 if (mod_hash_find(vswp->fdb, (mod_hash_key_t)key, 5924 (mod_hash_val_t *)&port) != 0) { 5925 return (NULL); 5926 } 5927 5928 D1(vswp, "%s: exit", __func__); 5929 5930 return (port); 5931 } 5932 5933 /* 5934 * Add or remove multicast address(es). 5935 * 5936 * Returns 0 on success, 1 on failure. 5937 */ 5938 static int 5939 vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port) 5940 { 5941 mcst_addr_t *mcst_p = NULL; 5942 vsw_t *vswp = port->p_vswp; 5943 uint64_t addr = 0x0; 5944 int i; 5945 5946 D1(vswp, "%s: enter", __func__); 5947 5948 D2(vswp, "%s: %d addresses", __func__, mcst_pkt->count); 5949 5950 for (i = 0; i < mcst_pkt->count; i++) { 5951 /* 5952 * Convert address into form that can be used 5953 * as hash table key. 5954 */ 5955 KEY_HASH(addr, mcst_pkt->mca[i]); 5956 5957 /* 5958 * Add or delete the specified address/port combination. 5959 */ 5960 if (mcst_pkt->set == 0x1) { 5961 D3(vswp, "%s: adding multicast address 0x%llx for " 5962 "port %ld", __func__, addr, port->p_instance); 5963 if (vsw_add_mcst(vswp, VSW_VNETPORT, addr, port) == 0) { 5964 /* 5965 * Update the list of multicast 5966 * addresses contained within the 5967 * port structure to include this new 5968 * one. 5969 */ 5970 mcst_p = kmem_alloc(sizeof (mcst_addr_t), 5971 KM_NOSLEEP); 5972 if (mcst_p == NULL) { 5973 DERR(vswp, "%s: unable to alloc mem", 5974 __func__); 5975 return (1); 5976 } 5977 5978 mcst_p->nextp = NULL; 5979 mcst_p->addr = addr; 5980 5981 mutex_enter(&port->mca_lock); 5982 mcst_p->nextp = port->mcap; 5983 port->mcap = mcst_p; 5984 mutex_exit(&port->mca_lock); 5985 5986 /* 5987 * Program the address into HW. If the addr 5988 * has already been programmed then the MAC 5989 * just increments a ref counter (which is 5990 * used when the address is being deleted) 5991 * 5992 * Note: 5993 * For the moment we dont care if this 5994 * succeeds because the card must be in 5995 * promics mode. When we have the ability 5996 * to program multiple unicst address into 5997 * the card then we will need to check this 5998 * return value. 5999 */ 6000 if (vswp->mh != NULL) 6001 (void) mac_multicst_add(vswp->mh, 6002 (uchar_t *)&mcst_pkt->mca[i]); 6003 6004 } else { 6005 DERR(vswp, "%s: error adding multicast " 6006 "address 0x%llx for port %ld", 6007 __func__, addr, port->p_instance); 6008 return (1); 6009 } 6010 } else { 6011 /* 6012 * Delete an entry from the multicast hash 6013 * table and update the address list 6014 * appropriately. 6015 */ 6016 if (vsw_del_mcst(vswp, VSW_VNETPORT, addr, port) == 0) { 6017 D3(vswp, "%s: deleting multicast address " 6018 "0x%llx for port %ld", __func__, addr, 6019 port->p_instance); 6020 6021 vsw_del_addr(VSW_VNETPORT, port, addr); 6022 6023 /* 6024 * Remove the address from HW. The address 6025 * will actually only be removed once the ref 6026 * count within the MAC layer has dropped to 6027 * zero. I.e. we can safely call this fn even 6028 * if other ports are interested in this 6029 * address. 6030 */ 6031 if (vswp->mh != NULL) 6032 (void) mac_multicst_remove(vswp->mh, 6033 (uchar_t *)&mcst_pkt->mca[i]); 6034 6035 } else { 6036 DERR(vswp, "%s: error deleting multicast " 6037 "addr 0x%llx for port %ld", 6038 __func__, addr, port->p_instance); 6039 return (1); 6040 } 6041 } 6042 } 6043 D1(vswp, "%s: exit", __func__); 6044 return (0); 6045 } 6046 6047 /* 6048 * Add a new multicast entry. 6049 * 6050 * Search hash table based on address. If match found then 6051 * update associated val (which is chain of ports), otherwise 6052 * create new key/val (addr/port) pair and insert into table. 6053 */ 6054 static int 6055 vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg) 6056 { 6057 int dup = 0; 6058 int rv = 0; 6059 mfdb_ent_t *ment = NULL; 6060 mfdb_ent_t *tmp_ent = NULL; 6061 mfdb_ent_t *new_ent = NULL; 6062 void *tgt = NULL; 6063 6064 if (devtype == VSW_VNETPORT) { 6065 /* 6066 * Being invoked from a vnet. 6067 */ 6068 ASSERT(arg != NULL); 6069 tgt = arg; 6070 D2(NULL, "%s: port %d : address 0x%llx", __func__, 6071 ((vsw_port_t *)arg)->p_instance, addr); 6072 } else { 6073 /* 6074 * We are being invoked via the m_multicst mac entry 6075 * point. 6076 */ 6077 D2(NULL, "%s: address 0x%llx", __func__, addr); 6078 tgt = (void *)vswp; 6079 } 6080 6081 WRITE_ENTER(&vswp->mfdbrw); 6082 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr, 6083 (mod_hash_val_t *)&ment) != 0) { 6084 6085 /* address not currently in table */ 6086 ment = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP); 6087 ment->d_addr = (void *)tgt; 6088 ment->d_type = devtype; 6089 ment->nextp = NULL; 6090 6091 if (mod_hash_insert(vswp->mfdb, (mod_hash_key_t)addr, 6092 (mod_hash_val_t)ment) != 0) { 6093 DERR(vswp, "%s: hash table insertion failed", __func__); 6094 kmem_free(ment, sizeof (mfdb_ent_t)); 6095 rv = 1; 6096 } else { 6097 D2(vswp, "%s: added initial entry for 0x%llx to " 6098 "table", __func__, addr); 6099 } 6100 } else { 6101 /* 6102 * Address in table. Check to see if specified port 6103 * is already associated with the address. If not add 6104 * it now. 6105 */ 6106 tmp_ent = ment; 6107 while (tmp_ent != NULL) { 6108 if (tmp_ent->d_addr == (void *)tgt) { 6109 if (devtype == VSW_VNETPORT) { 6110 DERR(vswp, "%s: duplicate port entry " 6111 "found for portid %ld and key " 6112 "0x%llx", __func__, 6113 ((vsw_port_t *)arg)->p_instance, 6114 addr); 6115 } else { 6116 DERR(vswp, "%s: duplicate entry found" 6117 "for key 0x%llx", 6118 __func__, addr); 6119 } 6120 rv = 1; 6121 dup = 1; 6122 break; 6123 } 6124 tmp_ent = tmp_ent->nextp; 6125 } 6126 6127 /* 6128 * Port not on list so add it to end now. 6129 */ 6130 if (0 == dup) { 6131 D2(vswp, "%s: added entry for 0x%llx to table", 6132 __func__, addr); 6133 new_ent = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP); 6134 new_ent->d_addr = (void *)tgt; 6135 new_ent->d_type = devtype; 6136 new_ent->nextp = NULL; 6137 6138 tmp_ent = ment; 6139 while (tmp_ent->nextp != NULL) 6140 tmp_ent = tmp_ent->nextp; 6141 6142 tmp_ent->nextp = new_ent; 6143 } 6144 } 6145 6146 RW_EXIT(&vswp->mfdbrw); 6147 return (rv); 6148 } 6149 6150 /* 6151 * Remove a multicast entry from the hashtable. 6152 * 6153 * Search hash table based on address. If match found, scan 6154 * list of ports associated with address. If specified port 6155 * found remove it from list. 6156 */ 6157 static int 6158 vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg) 6159 { 6160 mfdb_ent_t *ment = NULL; 6161 mfdb_ent_t *curr_p, *prev_p; 6162 void *tgt = NULL; 6163 6164 D1(vswp, "%s: enter", __func__); 6165 6166 if (devtype == VSW_VNETPORT) { 6167 tgt = (vsw_port_t *)arg; 6168 D2(vswp, "%s: removing port %d from mFDB for address" 6169 " 0x%llx", __func__, ((vsw_port_t *)tgt)->p_instance, 6170 addr); 6171 } else { 6172 D2(vswp, "%s: removing entry", __func__); 6173 tgt = (void *)vswp; 6174 } 6175 6176 WRITE_ENTER(&vswp->mfdbrw); 6177 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr, 6178 (mod_hash_val_t *)&ment) != 0) { 6179 D2(vswp, "%s: address 0x%llx not in table", __func__, addr); 6180 RW_EXIT(&vswp->mfdbrw); 6181 return (1); 6182 } 6183 6184 prev_p = curr_p = ment; 6185 6186 while (curr_p != NULL) { 6187 if (curr_p->d_addr == (void *)tgt) { 6188 if (devtype == VSW_VNETPORT) { 6189 D2(vswp, "%s: port %d found", __func__, 6190 ((vsw_port_t *)tgt)->p_instance); 6191 } else { 6192 D2(vswp, "%s: instance found", __func__); 6193 } 6194 6195 if (prev_p == curr_p) { 6196 /* 6197 * head of list, if no other element is in 6198 * list then destroy this entry, otherwise 6199 * just replace it with updated value. 6200 */ 6201 ment = curr_p->nextp; 6202 kmem_free(curr_p, sizeof (mfdb_ent_t)); 6203 if (ment == NULL) { 6204 (void) mod_hash_destroy(vswp->mfdb, 6205 (mod_hash_val_t)addr); 6206 } else { 6207 (void) mod_hash_replace(vswp->mfdb, 6208 (mod_hash_key_t)addr, 6209 (mod_hash_val_t)ment); 6210 } 6211 } else { 6212 /* 6213 * Not head of list, no need to do 6214 * replacement, just adjust list pointers. 6215 */ 6216 prev_p->nextp = curr_p->nextp; 6217 kmem_free(curr_p, sizeof (mfdb_ent_t)); 6218 } 6219 break; 6220 } 6221 6222 prev_p = curr_p; 6223 curr_p = curr_p->nextp; 6224 } 6225 6226 RW_EXIT(&vswp->mfdbrw); 6227 6228 D1(vswp, "%s: exit", __func__); 6229 6230 return (0); 6231 } 6232 6233 /* 6234 * Port is being deleted, but has registered an interest in one 6235 * or more multicast groups. Using the list of addresses maintained 6236 * within the port structure find the appropriate entry in the hash 6237 * table and remove this port from the list of interested ports. 6238 */ 6239 static void 6240 vsw_del_mcst_port(vsw_port_t *port) 6241 { 6242 mcst_addr_t *mcst_p = NULL; 6243 vsw_t *vswp = port->p_vswp; 6244 6245 D1(vswp, "%s: enter", __func__); 6246 6247 mutex_enter(&port->mca_lock); 6248 while (port->mcap != NULL) { 6249 (void) vsw_del_mcst(vswp, VSW_VNETPORT, 6250 port->mcap->addr, port); 6251 6252 mcst_p = port->mcap->nextp; 6253 kmem_free(port->mcap, sizeof (mcst_addr_t)); 6254 port->mcap = mcst_p; 6255 } 6256 mutex_exit(&port->mca_lock); 6257 6258 D1(vswp, "%s: exit", __func__); 6259 } 6260 6261 /* 6262 * This vsw instance is detaching, but has registered an interest in one 6263 * or more multicast groups. Using the list of addresses maintained 6264 * within the vsw structure find the appropriate entry in the hash 6265 * table and remove this instance from the list of interested ports. 6266 */ 6267 static void 6268 vsw_del_mcst_vsw(vsw_t *vswp) 6269 { 6270 mcst_addr_t *next_p = NULL; 6271 6272 D1(vswp, "%s: enter", __func__); 6273 6274 mutex_enter(&vswp->mca_lock); 6275 6276 while (vswp->mcap != NULL) { 6277 DERR(vswp, "%s: deleting addr 0x%llx", 6278 __func__, vswp->mcap->addr); 6279 (void) vsw_del_mcst(vswp, VSW_LOCALDEV, 6280 vswp->mcap->addr, NULL); 6281 6282 next_p = vswp->mcap->nextp; 6283 kmem_free(vswp->mcap, sizeof (mcst_addr_t)); 6284 vswp->mcap = next_p; 6285 } 6286 6287 vswp->mcap = NULL; 6288 mutex_exit(&vswp->mca_lock); 6289 6290 D1(vswp, "%s: exit", __func__); 6291 } 6292 6293 6294 /* 6295 * Remove the specified address from the list of address maintained 6296 * in this port node. 6297 */ 6298 static void 6299 vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr) 6300 { 6301 vsw_t *vswp = NULL; 6302 vsw_port_t *port = NULL; 6303 mcst_addr_t *prev_p = NULL; 6304 mcst_addr_t *curr_p = NULL; 6305 6306 D1(NULL, "%s: enter : devtype %d : addr 0x%llx", 6307 __func__, devtype, addr); 6308 6309 if (devtype == VSW_VNETPORT) { 6310 port = (vsw_port_t *)arg; 6311 mutex_enter(&port->mca_lock); 6312 prev_p = curr_p = port->mcap; 6313 } else { 6314 vswp = (vsw_t *)arg; 6315 mutex_enter(&vswp->mca_lock); 6316 prev_p = curr_p = vswp->mcap; 6317 } 6318 6319 while (curr_p != NULL) { 6320 if (curr_p->addr == addr) { 6321 D2(NULL, "%s: address found", __func__); 6322 /* match found */ 6323 if (prev_p == curr_p) { 6324 /* list head */ 6325 if (devtype == VSW_VNETPORT) 6326 port->mcap = curr_p->nextp; 6327 else 6328 vswp->mcap = curr_p->nextp; 6329 } else { 6330 prev_p->nextp = curr_p->nextp; 6331 } 6332 kmem_free(curr_p, sizeof (mcst_addr_t)); 6333 break; 6334 } else { 6335 prev_p = curr_p; 6336 curr_p = curr_p->nextp; 6337 } 6338 } 6339 6340 if (devtype == VSW_VNETPORT) 6341 mutex_exit(&port->mca_lock); 6342 else 6343 mutex_exit(&vswp->mca_lock); 6344 6345 D1(NULL, "%s: exit", __func__); 6346 } 6347 6348 /* 6349 * Creates a descriptor ring (dring) and links it into the 6350 * link of outbound drings for this channel. 6351 * 6352 * Returns NULL if creation failed. 6353 */ 6354 static dring_info_t * 6355 vsw_create_dring(vsw_ldc_t *ldcp) 6356 { 6357 vsw_private_desc_t *priv_addr = NULL; 6358 vsw_t *vswp = ldcp->ldc_vswp; 6359 ldc_mem_info_t minfo; 6360 dring_info_t *dp, *tp; 6361 int i; 6362 6363 dp = (dring_info_t *)kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 6364 6365 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 6366 6367 /* create public section of ring */ 6368 if ((ldc_mem_dring_create(VSW_RING_NUM_EL, 6369 VSW_PUB_SIZE, &dp->handle)) != 0) { 6370 6371 DERR(vswp, "vsw_create_dring(%lld): ldc dring create " 6372 "failed", ldcp->ldc_id); 6373 goto create_fail_exit; 6374 } 6375 6376 ASSERT(dp->handle != NULL); 6377 6378 /* 6379 * Get the base address of the public section of the ring. 6380 */ 6381 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 6382 DERR(vswp, "vsw_create_dring(%lld): dring info failed\n", 6383 ldcp->ldc_id); 6384 goto dring_fail_exit; 6385 } else { 6386 ASSERT(minfo.vaddr != 0); 6387 dp->pub_addr = minfo.vaddr; 6388 } 6389 6390 dp->num_descriptors = VSW_RING_NUM_EL; 6391 dp->descriptor_size = VSW_PUB_SIZE; 6392 dp->options = VIO_TX_DRING; 6393 dp->ncookies = 1; /* guaranteed by ldc */ 6394 6395 /* 6396 * create private portion of ring 6397 */ 6398 dp->priv_addr = (vsw_private_desc_t *)kmem_zalloc( 6399 (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL), KM_SLEEP); 6400 6401 if (vsw_setup_ring(ldcp, dp)) { 6402 DERR(vswp, "%s: unable to setup ring", __func__); 6403 goto dring_fail_exit; 6404 } 6405 6406 /* haven't used any descriptors yet */ 6407 dp->end_idx = 0; 6408 dp->last_ack_recv = -1; 6409 6410 /* bind dring to the channel */ 6411 if ((ldc_mem_dring_bind(ldcp->ldc_handle, dp->handle, 6412 LDC_SHADOW_MAP, LDC_MEM_RW, 6413 &dp->cookie[0], &dp->ncookies)) != 0) { 6414 DERR(vswp, "vsw_create_dring: unable to bind to channel " 6415 "%lld", ldcp->ldc_id); 6416 goto dring_fail_exit; 6417 } 6418 6419 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 6420 dp->restart_reqd = B_TRUE; 6421 6422 /* 6423 * Only ever create rings for outgoing lane. Link it onto 6424 * end of list. 6425 */ 6426 if (ldcp->lane_out.dringp == NULL) { 6427 D2(vswp, "vsw_create_dring: adding first outbound ring"); 6428 ldcp->lane_out.dringp = dp; 6429 } else { 6430 tp = ldcp->lane_out.dringp; 6431 while (tp->next != NULL) 6432 tp = tp->next; 6433 6434 tp->next = dp; 6435 } 6436 6437 return (dp); 6438 6439 dring_fail_exit: 6440 (void) ldc_mem_dring_destroy(dp->handle); 6441 6442 create_fail_exit: 6443 if (dp->priv_addr != NULL) { 6444 priv_addr = dp->priv_addr; 6445 for (i = 0; i < VSW_RING_NUM_EL; i++) { 6446 if (priv_addr->memhandle != NULL) 6447 (void) ldc_mem_free_handle( 6448 priv_addr->memhandle); 6449 priv_addr++; 6450 } 6451 kmem_free(dp->priv_addr, 6452 (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL)); 6453 } 6454 mutex_destroy(&dp->dlock); 6455 6456 kmem_free(dp, sizeof (dring_info_t)); 6457 return (NULL); 6458 } 6459 6460 /* 6461 * Create a ring consisting of just a private portion and link 6462 * it into the list of rings for the outbound lane. 6463 * 6464 * These type of rings are used primarily for temporary data 6465 * storage (i.e. as data buffers). 6466 */ 6467 void 6468 vsw_create_privring(vsw_ldc_t *ldcp) 6469 { 6470 dring_info_t *dp, *tp; 6471 vsw_t *vswp = ldcp->ldc_vswp; 6472 6473 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 6474 6475 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 6476 6477 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 6478 6479 /* no public section */ 6480 dp->pub_addr = NULL; 6481 6482 dp->priv_addr = kmem_zalloc((sizeof (vsw_private_desc_t) * 6483 VSW_RING_NUM_EL), KM_SLEEP); 6484 6485 if (vsw_setup_ring(ldcp, dp)) { 6486 DERR(vswp, "%s: setup of ring failed", __func__); 6487 kmem_free(dp->priv_addr, 6488 (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL)); 6489 mutex_destroy(&dp->dlock); 6490 kmem_free(dp, sizeof (dring_info_t)); 6491 return; 6492 } 6493 6494 /* haven't used any descriptors yet */ 6495 dp->end_idx = 0; 6496 6497 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 6498 dp->restart_reqd = B_TRUE; 6499 6500 /* 6501 * Only ever create rings for outgoing lane. Link it onto 6502 * end of list. 6503 */ 6504 if (ldcp->lane_out.dringp == NULL) { 6505 D2(vswp, "%s: adding first outbound privring", __func__); 6506 ldcp->lane_out.dringp = dp; 6507 } else { 6508 tp = ldcp->lane_out.dringp; 6509 while (tp->next != NULL) 6510 tp = tp->next; 6511 6512 tp->next = dp; 6513 } 6514 6515 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 6516 } 6517 6518 /* 6519 * Setup the descriptors in the dring. Returns 0 on success, 1 on 6520 * failure. 6521 */ 6522 int 6523 vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp) 6524 { 6525 vnet_public_desc_t *pub_addr = NULL; 6526 vsw_private_desc_t *priv_addr = NULL; 6527 vsw_t *vswp = ldcp->ldc_vswp; 6528 uint64_t *tmpp; 6529 uint64_t offset = 0; 6530 uint32_t ncookies = 0; 6531 static char *name = "vsw_setup_ring"; 6532 int i, j, nc, rv; 6533 6534 priv_addr = dp->priv_addr; 6535 pub_addr = dp->pub_addr; 6536 6537 /* public section may be null but private should never be */ 6538 ASSERT(priv_addr != NULL); 6539 6540 /* 6541 * Allocate the region of memory which will be used to hold 6542 * the data the descriptors will refer to. 6543 */ 6544 dp->data_sz = (VSW_RING_NUM_EL * VSW_RING_EL_DATA_SZ); 6545 dp->data_addr = kmem_alloc(dp->data_sz, KM_SLEEP); 6546 6547 D2(vswp, "%s: allocated %lld bytes at 0x%llx\n", name, 6548 dp->data_sz, dp->data_addr); 6549 6550 tmpp = (uint64_t *)dp->data_addr; 6551 offset = VSW_RING_EL_DATA_SZ / sizeof (tmpp); 6552 6553 /* 6554 * Initialise some of the private and public (if they exist) 6555 * descriptor fields. 6556 */ 6557 for (i = 0; i < VSW_RING_NUM_EL; i++) { 6558 mutex_init(&priv_addr->dstate_lock, NULL, MUTEX_DRIVER, NULL); 6559 6560 if ((ldc_mem_alloc_handle(ldcp->ldc_handle, 6561 &priv_addr->memhandle)) != 0) { 6562 DERR(vswp, "%s: alloc mem handle failed", name); 6563 goto setup_ring_cleanup; 6564 } 6565 6566 priv_addr->datap = (void *)tmpp; 6567 6568 rv = ldc_mem_bind_handle(priv_addr->memhandle, 6569 (caddr_t)priv_addr->datap, VSW_RING_EL_DATA_SZ, 6570 LDC_SHADOW_MAP, LDC_MEM_R|LDC_MEM_W, 6571 &(priv_addr->memcookie[0]), &ncookies); 6572 if (rv != 0) { 6573 DERR(vswp, "%s(%lld): ldc_mem_bind_handle failed " 6574 "(rv %d)", name, ldcp->ldc_id, rv); 6575 goto setup_ring_cleanup; 6576 } 6577 priv_addr->bound = 1; 6578 6579 D2(vswp, "%s: %d: memcookie 0 : addr 0x%llx : size 0x%llx", 6580 name, i, priv_addr->memcookie[0].addr, 6581 priv_addr->memcookie[0].size); 6582 6583 if (ncookies >= (uint32_t)(VSW_MAX_COOKIES + 1)) { 6584 DERR(vswp, "%s(%lld) ldc_mem_bind_handle returned " 6585 "invalid num of cookies (%d) for size 0x%llx", 6586 name, ldcp->ldc_id, ncookies, 6587 VSW_RING_EL_DATA_SZ); 6588 6589 goto setup_ring_cleanup; 6590 } else { 6591 for (j = 1; j < ncookies; j++) { 6592 rv = ldc_mem_nextcookie(priv_addr->memhandle, 6593 &(priv_addr->memcookie[j])); 6594 if (rv != 0) { 6595 DERR(vswp, "%s: ldc_mem_nextcookie " 6596 "failed rv (%d)", name, rv); 6597 goto setup_ring_cleanup; 6598 } 6599 D3(vswp, "%s: memcookie %d : addr 0x%llx : " 6600 "size 0x%llx", name, j, 6601 priv_addr->memcookie[j].addr, 6602 priv_addr->memcookie[j].size); 6603 } 6604 6605 } 6606 priv_addr->ncookies = ncookies; 6607 priv_addr->dstate = VIO_DESC_FREE; 6608 6609 if (pub_addr != NULL) { 6610 6611 /* link pub and private sides */ 6612 priv_addr->descp = pub_addr; 6613 6614 pub_addr->ncookies = priv_addr->ncookies; 6615 6616 for (nc = 0; nc < pub_addr->ncookies; nc++) { 6617 bcopy(&priv_addr->memcookie[nc], 6618 &pub_addr->memcookie[nc], 6619 sizeof (ldc_mem_cookie_t)); 6620 } 6621 6622 pub_addr->hdr.dstate = VIO_DESC_FREE; 6623 pub_addr++; 6624 } 6625 6626 /* 6627 * move to next element in the dring and the next 6628 * position in the data buffer. 6629 */ 6630 priv_addr++; 6631 tmpp += offset; 6632 } 6633 6634 return (0); 6635 6636 setup_ring_cleanup: 6637 priv_addr = dp->priv_addr; 6638 6639 for (j = 0; j < i; j++) { 6640 (void) ldc_mem_unbind_handle(priv_addr->memhandle); 6641 (void) ldc_mem_free_handle(priv_addr->memhandle); 6642 6643 mutex_destroy(&priv_addr->dstate_lock); 6644 6645 priv_addr++; 6646 } 6647 kmem_free(dp->data_addr, dp->data_sz); 6648 6649 return (1); 6650 } 6651 6652 /* 6653 * Searches the private section of a ring for a free descriptor, 6654 * starting at the location of the last free descriptor found 6655 * previously. 6656 * 6657 * Returns 0 if free descriptor is available, and updates state 6658 * of private descriptor to VIO_DESC_READY, otherwise returns 1. 6659 * 6660 * FUTURE: might need to return contiguous range of descriptors 6661 * as dring info msg assumes all will be contiguous. 6662 */ 6663 static int 6664 vsw_dring_find_free_desc(dring_info_t *dringp, 6665 vsw_private_desc_t **priv_p, int *idx) 6666 { 6667 vsw_private_desc_t *addr = NULL; 6668 int num = VSW_RING_NUM_EL; 6669 int ret = 1; 6670 6671 D1(NULL, "%s enter\n", __func__); 6672 6673 ASSERT(dringp->priv_addr != NULL); 6674 6675 D2(NULL, "%s: searching ring, dringp 0x%llx : start pos %lld", 6676 __func__, dringp, dringp->end_idx); 6677 6678 addr = (vsw_private_desc_t *)dringp->priv_addr + dringp->end_idx; 6679 6680 mutex_enter(&addr->dstate_lock); 6681 if (addr->dstate == VIO_DESC_FREE) { 6682 addr->dstate = VIO_DESC_READY; 6683 *priv_p = addr; 6684 *idx = dringp->end_idx; 6685 dringp->end_idx = (dringp->end_idx + 1) % num; 6686 ret = 0; 6687 6688 } 6689 mutex_exit(&addr->dstate_lock); 6690 6691 /* ring full */ 6692 if (ret == 1) { 6693 D2(NULL, "%s: no desp free: started at %d", __func__, 6694 dringp->end_idx); 6695 } 6696 6697 D1(NULL, "%s: exit\n", __func__); 6698 6699 return (ret); 6700 } 6701 6702 /* 6703 * Map from a dring identifier to the ring itself. Returns 6704 * pointer to ring or NULL if no match found. 6705 */ 6706 static dring_info_t * 6707 vsw_ident2dring(lane_t *lane, uint64_t ident) 6708 { 6709 dring_info_t *dp = NULL; 6710 6711 if ((dp = lane->dringp) == NULL) { 6712 return (NULL); 6713 } else { 6714 if (dp->ident == ident) 6715 return (dp); 6716 6717 while (dp != NULL) { 6718 if (dp->ident == ident) 6719 break; 6720 dp = dp->next; 6721 } 6722 } 6723 6724 return (dp); 6725 } 6726 6727 /* 6728 * Set the default lane attributes. These are copied into 6729 * the attr msg we send to our peer. If they are not acceptable 6730 * then (currently) the handshake ends. 6731 */ 6732 static void 6733 vsw_set_lane_attr(vsw_t *vswp, lane_t *lp) 6734 { 6735 bzero(lp, sizeof (lane_t)); 6736 6737 READ_ENTER(&vswp->if_lockrw); 6738 ether_copy(&(vswp->if_addr), &(lp->addr)); 6739 RW_EXIT(&vswp->if_lockrw); 6740 6741 lp->mtu = VSW_MTU; 6742 lp->addr_type = ADDR_TYPE_MAC; 6743 lp->xfer_mode = VIO_DRING_MODE; 6744 lp->ack_freq = 0; /* for shared mode */ 6745 6746 mutex_enter(&lp->seq_lock); 6747 lp->seq_num = VNET_ISS; 6748 mutex_exit(&lp->seq_lock); 6749 } 6750 6751 /* 6752 * Verify that the attributes are acceptable. 6753 * 6754 * FUTURE: If some attributes are not acceptable, change them 6755 * our desired values. 6756 */ 6757 static int 6758 vsw_check_attr(vnet_attr_msg_t *pkt, vsw_port_t *port) 6759 { 6760 int ret = 0; 6761 6762 D1(NULL, "vsw_check_attr enter\n"); 6763 6764 /* 6765 * Note we currently only support in-band descriptors 6766 * and descriptor rings, not packet based transfer (VIO_PKT_MODE) 6767 */ 6768 if ((pkt->xfer_mode != VIO_DESC_MODE) && 6769 (pkt->xfer_mode != VIO_DRING_MODE)) { 6770 D2(NULL, "vsw_check_attr: unknown mode %x\n", 6771 pkt->xfer_mode); 6772 ret = 1; 6773 } 6774 6775 /* Only support MAC addresses at moment. */ 6776 if ((pkt->addr_type != ADDR_TYPE_MAC) || (pkt->addr == 0)) { 6777 D2(NULL, "vsw_check_attr: invalid addr_type %x, " 6778 "or address 0x%llx\n", pkt->addr_type, 6779 pkt->addr); 6780 ret = 1; 6781 } 6782 6783 /* 6784 * MAC address supplied by device should match that stored 6785 * in the vsw-port OBP node. Need to decide what to do if they 6786 * don't match, for the moment just warn but don't fail. 6787 */ 6788 if (bcmp(&pkt->addr, &port->p_macaddr, ETHERADDRL) != 0) { 6789 DERR(NULL, "vsw_check_attr: device supplied address " 6790 "0x%llx doesn't match node address 0x%llx\n", 6791 pkt->addr, port->p_macaddr); 6792 } 6793 6794 /* 6795 * Ack freq only makes sense in pkt mode, in shared 6796 * mode the ring descriptors say whether or not to 6797 * send back an ACK. 6798 */ 6799 if ((pkt->xfer_mode == VIO_DRING_MODE) && 6800 (pkt->ack_freq > 0)) { 6801 D2(NULL, "vsw_check_attr: non zero ack freq " 6802 " in SHM mode\n"); 6803 ret = 1; 6804 } 6805 6806 /* 6807 * Note: for the moment we only support ETHER 6808 * frames. This may change in the future. 6809 */ 6810 if ((pkt->mtu > VSW_MTU) || (pkt->mtu <= 0)) { 6811 D2(NULL, "vsw_check_attr: invalid MTU (0x%llx)\n", 6812 pkt->mtu); 6813 ret = 1; 6814 } 6815 6816 D1(NULL, "vsw_check_attr exit\n"); 6817 6818 return (ret); 6819 } 6820 6821 /* 6822 * Returns 1 if there is a problem, 0 otherwise. 6823 */ 6824 static int 6825 vsw_check_dring_info(vio_dring_reg_msg_t *pkt) 6826 { 6827 _NOTE(ARGUNUSED(pkt)) 6828 6829 int ret = 0; 6830 6831 D1(NULL, "vsw_check_dring_info enter\n"); 6832 6833 if ((pkt->num_descriptors == 0) || 6834 (pkt->descriptor_size == 0) || 6835 (pkt->ncookies != 1)) { 6836 DERR(NULL, "vsw_check_dring_info: invalid dring msg"); 6837 ret = 1; 6838 } 6839 6840 D1(NULL, "vsw_check_dring_info exit\n"); 6841 6842 return (ret); 6843 } 6844 6845 /* 6846 * Returns 1 if two memory cookies match. Otherwise returns 0. 6847 */ 6848 static int 6849 vsw_mem_cookie_match(ldc_mem_cookie_t *m1, ldc_mem_cookie_t *m2) 6850 { 6851 if ((m1->addr != m2->addr) || 6852 (m2->size != m2->size)) { 6853 return (0); 6854 } else { 6855 return (1); 6856 } 6857 } 6858 6859 /* 6860 * Returns 1 if ring described in reg message matches that 6861 * described by dring_info structure. Otherwise returns 0. 6862 */ 6863 static int 6864 vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg) 6865 { 6866 if ((msg->descriptor_size != dp->descriptor_size) || 6867 (msg->num_descriptors != dp->num_descriptors) || 6868 (msg->ncookies != dp->ncookies) || 6869 !(vsw_mem_cookie_match(&msg->cookie[0], &dp->cookie[0]))) { 6870 return (0); 6871 } else { 6872 return (1); 6873 } 6874 6875 } 6876 6877 static caddr_t 6878 vsw_print_ethaddr(uint8_t *a, char *ebuf) 6879 { 6880 (void) sprintf(ebuf, "%x:%x:%x:%x:%x:%x", 6881 a[0], a[1], a[2], a[3], a[4], a[5]); 6882 return (ebuf); 6883 } 6884 6885 /* 6886 * Reset and free all the resources associated with 6887 * the channel. 6888 */ 6889 static void 6890 vsw_free_lane_resources(vsw_ldc_t *ldcp, uint64_t dir) 6891 { 6892 dring_info_t *dp, *dpp; 6893 lane_t *lp = NULL; 6894 int rv = 0; 6895 6896 ASSERT(ldcp != NULL); 6897 6898 D1(ldcp->ldc_vswp, "%s (%lld): enter", __func__, ldcp->ldc_id); 6899 6900 if (dir == INBOUND) { 6901 D2(ldcp->ldc_vswp, "%s: freeing INBOUND lane" 6902 " of channel %lld", __func__, ldcp->ldc_id); 6903 lp = &ldcp->lane_in; 6904 } else { 6905 D2(ldcp->ldc_vswp, "%s: freeing OUTBOUND lane" 6906 " of channel %lld", __func__, ldcp->ldc_id); 6907 lp = &ldcp->lane_out; 6908 } 6909 6910 lp->lstate = VSW_LANE_INACTIV; 6911 mutex_enter(&lp->seq_lock); 6912 lp->seq_num = VNET_ISS; 6913 mutex_exit(&lp->seq_lock); 6914 if (lp->dringp) { 6915 if (dir == INBOUND) { 6916 dp = lp->dringp; 6917 while (dp != NULL) { 6918 dpp = dp->next; 6919 if (dp->handle != NULL) 6920 (void) ldc_mem_dring_unmap(dp->handle); 6921 kmem_free(dp, sizeof (dring_info_t)); 6922 dp = dpp; 6923 } 6924 } else { 6925 /* 6926 * unbind, destroy exported dring, free dring struct 6927 */ 6928 dp = lp->dringp; 6929 rv = vsw_free_ring(dp); 6930 } 6931 if (rv == 0) { 6932 lp->dringp = NULL; 6933 } 6934 } 6935 6936 D1(ldcp->ldc_vswp, "%s (%lld): exit", __func__, ldcp->ldc_id); 6937 } 6938 6939 /* 6940 * Free ring and all associated resources. 6941 */ 6942 static int 6943 vsw_free_ring(dring_info_t *dp) 6944 { 6945 vsw_private_desc_t *paddr = NULL; 6946 dring_info_t *dpp; 6947 int i, rv = 1; 6948 6949 while (dp != NULL) { 6950 mutex_enter(&dp->dlock); 6951 dpp = dp->next; 6952 if (dp->priv_addr != NULL) { 6953 /* 6954 * First unbind and free the memory handles 6955 * stored in each descriptor within the ring. 6956 */ 6957 for (i = 0; i < VSW_RING_NUM_EL; i++) { 6958 paddr = (vsw_private_desc_t *) 6959 dp->priv_addr + i; 6960 if (paddr->memhandle != NULL) { 6961 if (paddr->bound == 1) { 6962 rv = ldc_mem_unbind_handle( 6963 paddr->memhandle); 6964 6965 if (rv != 0) { 6966 DERR(NULL, "error " 6967 "unbinding handle for " 6968 "ring 0x%llx at pos %d", 6969 dp, i); 6970 mutex_exit(&dp->dlock); 6971 return (rv); 6972 } 6973 paddr->bound = 0; 6974 } 6975 6976 rv = ldc_mem_free_handle( 6977 paddr->memhandle); 6978 if (rv != 0) { 6979 DERR(NULL, "error freeing " 6980 "handle for ring " 6981 "0x%llx at pos %d", 6982 dp, i); 6983 mutex_exit(&dp->dlock); 6984 return (rv); 6985 } 6986 paddr->memhandle = NULL; 6987 } 6988 mutex_destroy(&paddr->dstate_lock); 6989 } 6990 kmem_free(dp->priv_addr, (sizeof (vsw_private_desc_t) 6991 * VSW_RING_NUM_EL)); 6992 } 6993 6994 /* 6995 * Now unbind and destroy the ring itself. 6996 */ 6997 if (dp->handle != NULL) { 6998 (void) ldc_mem_dring_unbind(dp->handle); 6999 (void) ldc_mem_dring_destroy(dp->handle); 7000 } 7001 7002 if (dp->data_addr != NULL) { 7003 kmem_free(dp->data_addr, dp->data_sz); 7004 } 7005 7006 mutex_exit(&dp->dlock); 7007 mutex_destroy(&dp->dlock); 7008 mutex_destroy(&dp->restart_lock); 7009 kmem_free(dp, sizeof (dring_info_t)); 7010 7011 dp = dpp; 7012 } 7013 return (0); 7014 } 7015 7016 /* 7017 * Debugging routines 7018 */ 7019 static void 7020 display_state(void) 7021 { 7022 vsw_t *vswp; 7023 vsw_port_list_t *plist; 7024 vsw_port_t *port; 7025 vsw_ldc_list_t *ldcl; 7026 vsw_ldc_t *ldcp; 7027 7028 cmn_err(CE_NOTE, "***** system state *****"); 7029 7030 for (vswp = vsw_head; vswp; vswp = vswp->next) { 7031 plist = &vswp->plist; 7032 READ_ENTER(&plist->lockrw); 7033 cmn_err(CE_CONT, "vsw instance %d has %d ports attached\n", 7034 vswp->instance, plist->num_ports); 7035 7036 for (port = plist->head; port != NULL; port = port->p_next) { 7037 ldcl = &port->p_ldclist; 7038 cmn_err(CE_CONT, "port %d : %d ldcs attached\n", 7039 port->p_instance, ldcl->num_ldcs); 7040 READ_ENTER(&ldcl->lockrw); 7041 ldcp = ldcl->head; 7042 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 7043 cmn_err(CE_CONT, "chan %lu : dev %d : " 7044 "status %d : phase %u\n", 7045 ldcp->ldc_id, ldcp->dev_class, 7046 ldcp->ldc_status, ldcp->hphase); 7047 cmn_err(CE_CONT, "chan %lu : lsession %lu : " 7048 "psession %lu\n", 7049 ldcp->ldc_id, 7050 ldcp->local_session, 7051 ldcp->peer_session); 7052 7053 cmn_err(CE_CONT, "Inbound lane:\n"); 7054 display_lane(&ldcp->lane_in); 7055 cmn_err(CE_CONT, "Outbound lane:\n"); 7056 display_lane(&ldcp->lane_out); 7057 } 7058 RW_EXIT(&ldcl->lockrw); 7059 } 7060 RW_EXIT(&plist->lockrw); 7061 } 7062 cmn_err(CE_NOTE, "***** system state *****"); 7063 } 7064 7065 static void 7066 display_lane(lane_t *lp) 7067 { 7068 dring_info_t *drp; 7069 7070 cmn_err(CE_CONT, "ver 0x%x:0x%x : state %lx : mtu 0x%lx\n", 7071 lp->ver_major, lp->ver_minor, lp->lstate, lp->mtu); 7072 cmn_err(CE_CONT, "addr_type %d : addr 0x%lx : xmode %d\n", 7073 lp->addr_type, lp->addr, lp->xfer_mode); 7074 cmn_err(CE_CONT, "dringp 0x%lx\n", (uint64_t)lp->dringp); 7075 7076 cmn_err(CE_CONT, "Dring info:\n"); 7077 for (drp = lp->dringp; drp != NULL; drp = drp->next) { 7078 cmn_err(CE_CONT, "\tnum_desc %u : dsize %u\n", 7079 drp->num_descriptors, drp->descriptor_size); 7080 cmn_err(CE_CONT, "\thandle 0x%lx\n", drp->handle); 7081 cmn_err(CE_CONT, "\tpub_addr 0x%lx : priv_addr 0x%lx\n", 7082 (uint64_t)drp->pub_addr, (uint64_t)drp->priv_addr); 7083 cmn_err(CE_CONT, "\tident 0x%lx : end_idx %lu\n", 7084 drp->ident, drp->end_idx); 7085 display_ring(drp); 7086 } 7087 } 7088 7089 static void 7090 display_ring(dring_info_t *dringp) 7091 { 7092 uint64_t i; 7093 uint64_t priv_count = 0; 7094 uint64_t pub_count = 0; 7095 vnet_public_desc_t *pub_addr = NULL; 7096 vsw_private_desc_t *priv_addr = NULL; 7097 7098 for (i = 0; i < VSW_RING_NUM_EL; i++) { 7099 if (dringp->pub_addr != NULL) { 7100 pub_addr = (vnet_public_desc_t *)dringp->pub_addr + i; 7101 7102 if (pub_addr->hdr.dstate == VIO_DESC_FREE) 7103 pub_count++; 7104 } 7105 7106 if (dringp->priv_addr != NULL) { 7107 priv_addr = 7108 (vsw_private_desc_t *)dringp->priv_addr + i; 7109 7110 if (priv_addr->dstate == VIO_DESC_FREE) 7111 priv_count++; 7112 } 7113 } 7114 cmn_err(CE_CONT, "\t%lu elements: %lu priv free: %lu pub free\n", 7115 i, priv_count, pub_count); 7116 } 7117 7118 static void 7119 dump_flags(uint64_t state) 7120 { 7121 int i; 7122 7123 typedef struct flag_name { 7124 int flag_val; 7125 char *flag_name; 7126 } flag_name_t; 7127 7128 flag_name_t flags[] = { 7129 VSW_VER_INFO_SENT, "VSW_VER_INFO_SENT", 7130 VSW_VER_INFO_RECV, "VSW_VER_INFO_RECV", 7131 VSW_VER_ACK_RECV, "VSW_VER_ACK_RECV", 7132 VSW_VER_ACK_SENT, "VSW_VER_ACK_SENT", 7133 VSW_VER_NACK_RECV, "VSW_VER_NACK_RECV", 7134 VSW_VER_NACK_SENT, "VSW_VER_NACK_SENT", 7135 VSW_ATTR_INFO_SENT, "VSW_ATTR_INFO_SENT", 7136 VSW_ATTR_INFO_RECV, "VSW_ATTR_INFO_RECV", 7137 VSW_ATTR_ACK_SENT, "VSW_ATTR_ACK_SENT", 7138 VSW_ATTR_ACK_RECV, "VSW_ATTR_ACK_RECV", 7139 VSW_ATTR_NACK_SENT, "VSW_ATTR_NACK_SENT", 7140 VSW_ATTR_NACK_RECV, "VSW_ATTR_NACK_RECV", 7141 VSW_DRING_INFO_SENT, "VSW_DRING_INFO_SENT", 7142 VSW_DRING_INFO_RECV, "VSW_DRING_INFO_RECV", 7143 VSW_DRING_ACK_SENT, "VSW_DRING_ACK_SENT", 7144 VSW_DRING_ACK_RECV, "VSW_DRING_ACK_RECV", 7145 VSW_DRING_NACK_SENT, "VSW_DRING_NACK_SENT", 7146 VSW_DRING_NACK_RECV, "VSW_DRING_NACK_RECV", 7147 VSW_RDX_INFO_SENT, "VSW_RDX_INFO_SENT", 7148 VSW_RDX_INFO_RECV, "VSW_RDX_INFO_RECV", 7149 VSW_RDX_ACK_SENT, "VSW_RDX_ACK_SENT", 7150 VSW_RDX_ACK_RECV, "VSW_RDX_ACK_RECV", 7151 VSW_RDX_NACK_SENT, "VSW_RDX_NACK_SENT", 7152 VSW_RDX_NACK_RECV, "VSW_RDX_NACK_RECV", 7153 VSW_MCST_INFO_SENT, "VSW_MCST_INFO_SENT", 7154 VSW_MCST_INFO_RECV, "VSW_MCST_INFO_RECV", 7155 VSW_MCST_ACK_SENT, "VSW_MCST_ACK_SENT", 7156 VSW_MCST_ACK_RECV, "VSW_MCST_ACK_RECV", 7157 VSW_MCST_NACK_SENT, "VSW_MCST_NACK_SENT", 7158 VSW_MCST_NACK_RECV, "VSW_MCST_NACK_RECV", 7159 VSW_LANE_ACTIVE, "VSW_LANE_ACTIVE"}; 7160 7161 DERR(NULL, "DUMP_FLAGS: %llx\n", state); 7162 for (i = 0; i < sizeof (flags)/sizeof (flag_name_t); i++) { 7163 if (state & flags[i].flag_val) 7164 DERR(NULL, "DUMP_FLAGS %s", flags[i].flag_name); 7165 } 7166 } 7167