1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/errno.h> 31 #include <sys/debug.h> 32 #include <sys/time.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/user.h> 36 #include <sys/stropts.h> 37 #include <sys/stream.h> 38 #include <sys/strlog.h> 39 #include <sys/strsubr.h> 40 #include <sys/cmn_err.h> 41 #include <sys/cpu.h> 42 #include <sys/kmem.h> 43 #include <sys/conf.h> 44 #include <sys/ddi.h> 45 #include <sys/sunddi.h> 46 #include <sys/ksynch.h> 47 #include <sys/stat.h> 48 #include <sys/kstat.h> 49 #include <sys/vtrace.h> 50 #include <sys/strsun.h> 51 #include <sys/dlpi.h> 52 #include <sys/ethernet.h> 53 #include <net/if.h> 54 #include <sys/varargs.h> 55 #include <sys/machsystm.h> 56 #include <sys/modctl.h> 57 #include <sys/modhash.h> 58 #include <sys/mac.h> 59 #include <sys/mac_ether.h> 60 #include <sys/taskq.h> 61 #include <sys/note.h> 62 #include <sys/mach_descrip.h> 63 #include <sys/mac.h> 64 #include <sys/mdeg.h> 65 #include <sys/ldc.h> 66 #include <sys/vsw_fdb.h> 67 #include <sys/vsw.h> 68 #include <sys/vio_mailbox.h> 69 #include <sys/vnet_mailbox.h> 70 #include <sys/vnet_common.h> 71 72 /* 73 * Function prototypes. 74 */ 75 static int vsw_attach(dev_info_t *, ddi_attach_cmd_t); 76 static int vsw_detach(dev_info_t *, ddi_detach_cmd_t); 77 static int vsw_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 78 static void vsw_get_md_properties(vsw_t *vswp); 79 static int vsw_setup_layer2(vsw_t *); 80 static int vsw_setup_layer3(vsw_t *); 81 82 /* MAC layer routines */ 83 static int vsw_mac_attach(vsw_t *vswp); 84 static void vsw_mac_detach(vsw_t *vswp); 85 static void vsw_notify_cb(void *, mac_notify_type_t); 86 static void vsw_rx_cb(void *, mac_resource_handle_t, mblk_t *); 87 static mblk_t *vsw_tx_msg(vsw_t *, mblk_t *); 88 static int vsw_mac_register(vsw_t *); 89 static int vsw_mac_unregister(vsw_t *); 90 static int vsw_m_stat(void *, uint_t, uint64_t *); 91 static void vsw_m_stop(void *arg); 92 static int vsw_m_start(void *arg); 93 static int vsw_m_unicst(void *arg, const uint8_t *); 94 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *); 95 static int vsw_m_promisc(void *arg, boolean_t); 96 static mblk_t *vsw_m_tx(void *arg, mblk_t *); 97 98 /* MDEG routines */ 99 static void vsw_mdeg_register(vsw_t *vswp); 100 static void vsw_mdeg_unregister(vsw_t *vswp); 101 static int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *); 102 103 /* Port add/deletion routines */ 104 static int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 105 static int vsw_port_attach(vsw_t *vswp, int p_instance, 106 uint64_t *ldcids, int nids, struct ether_addr *macaddr); 107 static int vsw_detach_ports(vsw_t *vswp); 108 static int vsw_port_detach(vsw_t *vswp, int p_instance); 109 static int vsw_port_delete(vsw_port_t *port); 110 static int vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id); 111 static int vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id); 112 static int vsw_init_ldcs(vsw_port_t *port); 113 static int vsw_uninit_ldcs(vsw_port_t *port); 114 static int vsw_ldc_init(vsw_ldc_t *ldcp); 115 static int vsw_ldc_uninit(vsw_ldc_t *ldcp); 116 static int vsw_drain_ldcs(vsw_port_t *port); 117 static int vsw_drain_port_taskq(vsw_port_t *port); 118 static void vsw_marker_task(void *); 119 static vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 120 static int vsw_plist_del_node(vsw_t *, vsw_port_t *port); 121 122 /* Interrupt routines */ 123 static uint_t vsw_ldc_cb(uint64_t cb, caddr_t arg); 124 125 /* Handshake routines */ 126 static void vsw_restart_handshake(vsw_ldc_t *); 127 static int vsw_check_flag(vsw_ldc_t *, int, uint64_t); 128 static void vsw_next_milestone(vsw_ldc_t *); 129 static int vsw_supported_version(vio_ver_msg_t *); 130 131 /* Data processing routines */ 132 static void vsw_process_pkt(void *); 133 static void vsw_dispatch_ctrl_task(vsw_ldc_t *, void *, vio_msg_tag_t); 134 static void vsw_process_ctrl_pkt(void *); 135 static void vsw_process_ctrl_ver_pkt(vsw_ldc_t *, void *); 136 static void vsw_process_ctrl_attr_pkt(vsw_ldc_t *, void *); 137 static void vsw_process_ctrl_mcst_pkt(vsw_ldc_t *, void *); 138 static void vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *, void *); 139 static void vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *, void *); 140 static void vsw_process_ctrl_rdx_pkt(vsw_ldc_t *, void *); 141 static void vsw_process_data_pkt(vsw_ldc_t *, void *, vio_msg_tag_t); 142 static void vsw_process_data_dring_pkt(vsw_ldc_t *, void *); 143 static void vsw_process_data_raw_pkt(vsw_ldc_t *, void *); 144 static void vsw_process_data_ibnd_pkt(vsw_ldc_t *, void *); 145 static void vsw_process_err_pkt(vsw_ldc_t *, void *, vio_msg_tag_t); 146 147 /* Switching/data transmit routines */ 148 static void vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller, 149 vsw_port_t *port, mac_resource_handle_t); 150 static void vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller, 151 vsw_port_t *port, mac_resource_handle_t); 152 static int vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller, 153 vsw_port_t *port); 154 static int vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller, 155 vsw_port_t *port); 156 static int vsw_portsend(vsw_port_t *, mblk_t *); 157 static int vsw_dringsend(vsw_ldc_t *, mblk_t *); 158 static int vsw_descrsend(vsw_ldc_t *, mblk_t *); 159 160 /* Packet creation routines */ 161 static void vsw_send_ver(vsw_ldc_t *); 162 static void vsw_send_attr(vsw_ldc_t *); 163 static vio_dring_reg_msg_t *vsw_create_dring_info_pkt(vsw_ldc_t *); 164 static void vsw_send_dring_info(vsw_ldc_t *); 165 static void vsw_send_rdx(vsw_ldc_t *); 166 167 static void vsw_send_msg(vsw_ldc_t *, void *, int); 168 169 /* Forwarding database (FDB) routines */ 170 static int vsw_add_fdb(vsw_t *vswp, vsw_port_t *port); 171 static int vsw_del_fdb(vsw_t *vswp, vsw_port_t *port); 172 static vsw_port_t *vsw_lookup_fdb(vsw_t *vswp, struct ether_header *); 173 static int vsw_add_rem_mcst(vnet_mcast_msg_t *, vsw_port_t *); 174 static int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *); 175 static int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *); 176 static void vsw_del_addr(uint8_t, void *, uint64_t); 177 static void vsw_del_mcst_port(vsw_port_t *); 178 static void vsw_del_mcst_vsw(vsw_t *); 179 180 /* Dring routines */ 181 static dring_info_t *vsw_create_dring(vsw_ldc_t *); 182 static void vsw_create_privring(vsw_ldc_t *); 183 static int vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp); 184 static int vsw_dring_find_free_desc(dring_info_t *, vsw_private_desc_t **, 185 int *); 186 static void vsw_dring_priv2pub(vsw_private_desc_t *); 187 static dring_info_t *vsw_ident2dring(lane_t *, uint64_t); 188 189 static void vsw_set_lane_attr(vsw_t *, lane_t *); 190 static int vsw_check_attr(vnet_attr_msg_t *, vsw_port_t *); 191 static int vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg); 192 static int vsw_mem_cookie_match(ldc_mem_cookie_t *, ldc_mem_cookie_t *); 193 static int vsw_check_dring_info(vio_dring_reg_msg_t *); 194 195 /* Misc support routines */ 196 static caddr_t vsw_print_ethaddr(uint8_t *addr, char *ebuf); 197 198 static void vsw_free_lane_resources(vsw_ldc_t *, uint64_t); 199 static int vsw_free_ring(dring_info_t *); 200 201 /* Debugging routines */ 202 static void dump_flags(uint64_t); 203 static void display_state(void); 204 static void display_lane(lane_t *); 205 static void display_ring(dring_info_t *); 206 207 int vsw_num_handshakes = 3; /* # of handshake attempts */ 208 int vsw_wretries = 100; /* # of write attempts */ 209 210 /* 211 * mode specific frame switching function 212 */ 213 void (*vsw_switch_frame)(vsw_t *, mblk_t *, int, vsw_port_t *, 214 mac_resource_handle_t); 215 216 static mac_callbacks_t vsw_m_callbacks = { 217 0, 218 vsw_m_stat, 219 vsw_m_start, 220 vsw_m_stop, 221 vsw_m_promisc, 222 vsw_m_multicst, 223 vsw_m_unicst, 224 vsw_m_tx, 225 NULL, 226 NULL, 227 NULL 228 }; 229 230 static struct cb_ops vsw_cb_ops = { 231 nulldev, /* cb_open */ 232 nulldev, /* cb_close */ 233 nodev, /* cb_strategy */ 234 nodev, /* cb_print */ 235 nodev, /* cb_dump */ 236 nodev, /* cb_read */ 237 nodev, /* cb_write */ 238 nodev, /* cb_ioctl */ 239 nodev, /* cb_devmap */ 240 nodev, /* cb_mmap */ 241 nodev, /* cb_segmap */ 242 nochpoll, /* cb_chpoll */ 243 ddi_prop_op, /* cb_prop_op */ 244 NULL, /* cb_stream */ 245 D_MP, /* cb_flag */ 246 CB_REV, /* rev */ 247 nodev, /* int (*cb_aread)() */ 248 nodev /* int (*cb_awrite)() */ 249 }; 250 251 static struct dev_ops vsw_ops = { 252 DEVO_REV, /* devo_rev */ 253 0, /* devo_refcnt */ 254 vsw_getinfo, /* devo_getinfo */ 255 nulldev, /* devo_identify */ 256 nulldev, /* devo_probe */ 257 vsw_attach, /* devo_attach */ 258 vsw_detach, /* devo_detach */ 259 nodev, /* devo_reset */ 260 &vsw_cb_ops, /* devo_cb_ops */ 261 (struct bus_ops *)NULL, /* devo_bus_ops */ 262 ddi_power /* devo_power */ 263 }; 264 265 extern struct mod_ops mod_driverops; 266 static struct modldrv vswmodldrv = { 267 &mod_driverops, 268 "sun4v Virtual Switch Driver %I%", 269 &vsw_ops, 270 }; 271 272 #define LDC_ENTER_LOCK(ldcp) \ 273 mutex_enter(&((ldcp)->ldc_cblock));\ 274 mutex_enter(&((ldcp)->ldc_txlock)); 275 #define LDC_EXIT_LOCK(ldcp) \ 276 mutex_exit(&((ldcp)->ldc_txlock));\ 277 mutex_exit(&((ldcp)->ldc_cblock)); 278 279 /* Driver soft state ptr */ 280 static void *vsw_state; 281 282 /* 283 * Linked list of "vsw_t" structures - one per instance. 284 */ 285 vsw_t *vsw_head = NULL; 286 krwlock_t vsw_rw; 287 288 /* 289 * Property names 290 */ 291 static char vdev_propname[] = "virtual-device"; 292 static char vsw_propname[] = "virtual-network-switch"; 293 static char physdev_propname[] = "vsw-phys-dev"; 294 static char smode_propname[] = "vsw-switch-mode"; 295 static char macaddr_propname[] = "local-mac-address"; 296 static char remaddr_propname[] = "remote-mac-address"; 297 static char ldcids_propname[] = "ldc-ids"; 298 static char chan_propname[] = "channel-endpoint"; 299 static char id_propname[] = "id"; 300 static char reg_propname[] = "reg"; 301 302 /* supported versions */ 303 static ver_sup_t vsw_versions[] = { {1, 0} }; 304 305 /* 306 * Matching criteria passed to the MDEG to register interest 307 * in changes to 'virtual-device-port' nodes identified by their 308 * 'id' property. 309 */ 310 static md_prop_match_t vport_prop_match[] = { 311 { MDET_PROP_VAL, "id" }, 312 { MDET_LIST_END, NULL } 313 }; 314 315 static mdeg_node_match_t vport_match = { "virtual-device-port", 316 vport_prop_match }; 317 318 /* 319 * Specification of an MD node passed to the MDEG to filter any 320 * 'vport' nodes that do not belong to the specified node. This 321 * template is copied for each vsw instance and filled in with 322 * the appropriate 'cfg-handle' value before being passed to the MDEG. 323 */ 324 static mdeg_prop_spec_t vsw_prop_template[] = { 325 { MDET_PROP_STR, "name", vsw_propname }, 326 { MDET_PROP_VAL, "cfg-handle", NULL }, 327 { MDET_LIST_END, NULL, NULL } 328 }; 329 330 #define VSW_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val); 331 332 /* 333 * Print debug messages - set to 0x1f to enable all msgs 334 * or 0x0 to turn all off. 335 */ 336 int vswdbg = 0x0; 337 338 /* 339 * debug levels: 340 * 0x01: Function entry/exit tracing 341 * 0x02: Internal function messages 342 * 0x04: Verbose internal messages 343 * 0x08: Warning messages 344 * 0x10: Error messages 345 */ 346 347 static void 348 vswdebug(vsw_t *vswp, const char *fmt, ...) 349 { 350 char buf[512]; 351 va_list ap; 352 353 va_start(ap, fmt); 354 (void) vsprintf(buf, fmt, ap); 355 va_end(ap); 356 357 if (vswp == NULL) 358 cmn_err(CE_CONT, "%s\n", buf); 359 else 360 cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf); 361 } 362 363 /* 364 * For the moment the state dump routines have their own 365 * private flag. 366 */ 367 #define DUMP_STATE 0 368 369 #if DUMP_STATE 370 371 #define DUMP_TAG(tag) \ 372 { \ 373 D1(NULL, "DUMP_TAG: type 0x%llx", (tag).vio_msgtype); \ 374 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag).vio_subtype); \ 375 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag).vio_subtype_env); \ 376 } 377 378 #define DUMP_TAG_PTR(tag) \ 379 { \ 380 D1(NULL, "DUMP_TAG: type 0x%llx", (tag)->vio_msgtype); \ 381 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag)->vio_subtype); \ 382 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag)->vio_subtype_env); \ 383 } 384 385 #define DUMP_FLAGS(flags) dump_flags(flags); 386 #define DISPLAY_STATE() display_state() 387 388 #else 389 390 #define DUMP_TAG(tag) 391 #define DUMP_TAG_PTR(tag) 392 #define DUMP_FLAGS(state) 393 #define DISPLAY_STATE() 394 395 #endif /* DUMP_STATE */ 396 397 #ifdef DEBUG 398 399 #define D1 \ 400 if (vswdbg & 0x01) \ 401 vswdebug 402 403 #define D2 \ 404 if (vswdbg & 0x02) \ 405 vswdebug 406 407 #define D3 \ 408 if (vswdbg & 0x04) \ 409 vswdebug 410 411 #define DWARN \ 412 if (vswdbg & 0x08) \ 413 vswdebug 414 415 #define DERR \ 416 if (vswdbg & 0x10) \ 417 vswdebug 418 419 #else 420 421 #define DERR if (0) vswdebug 422 #define DWARN if (0) vswdebug 423 #define D1 if (0) vswdebug 424 #define D2 if (0) vswdebug 425 #define D3 if (0) vswdebug 426 427 #endif /* DEBUG */ 428 429 static struct modlinkage modlinkage = { 430 MODREV_1, 431 &vswmodldrv, 432 NULL 433 }; 434 435 int 436 _init(void) 437 { 438 int status; 439 440 rw_init(&vsw_rw, NULL, RW_DRIVER, NULL); 441 442 status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1); 443 if (status != 0) { 444 return (status); 445 } 446 447 mac_init_ops(&vsw_ops, "vsw"); 448 status = mod_install(&modlinkage); 449 if (status != 0) { 450 ddi_soft_state_fini(&vsw_state); 451 } 452 return (status); 453 } 454 455 int 456 _fini(void) 457 { 458 int status; 459 460 status = mod_remove(&modlinkage); 461 if (status != 0) 462 return (status); 463 mac_fini_ops(&vsw_ops); 464 ddi_soft_state_fini(&vsw_state); 465 466 rw_destroy(&vsw_rw); 467 468 return (status); 469 } 470 471 int 472 _info(struct modinfo *modinfop) 473 { 474 return (mod_info(&modlinkage, modinfop)); 475 } 476 477 static int 478 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 479 { 480 vsw_t *vswp; 481 int smode, instance, i; 482 char hashname[MAXNAMELEN]; 483 char qname[TASKQ_NAMELEN]; 484 int rv = 1; 485 enum { PROG_init = 0x0, PROG_if_lock = 0x1, 486 PROG_fdb = 0x2, PROG_mfdb = 0x4, 487 PROG_report_dev = 0x8, PROG_plist = 0x10, 488 PROG_taskq = 0x20} 489 progress; 490 491 progress = PROG_init; 492 493 switch (cmd) { 494 case DDI_ATTACH: 495 break; 496 case DDI_RESUME: 497 /* nothing to do for this non-device */ 498 return (DDI_SUCCESS); 499 case DDI_PM_RESUME: 500 default: 501 return (DDI_FAILURE); 502 } 503 504 instance = ddi_get_instance(dip); 505 if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) { 506 DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance); 507 return (DDI_FAILURE); 508 } 509 vswp = ddi_get_soft_state(vsw_state, instance); 510 511 if (vswp == NULL) { 512 DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance); 513 goto vsw_attach_fail; 514 } 515 516 vswp->dip = dip; 517 vswp->instance = instance; 518 ddi_set_driver_private(dip, (caddr_t)vswp); 519 520 rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL); 521 522 progress |= PROG_if_lock; 523 524 /* 525 * User specifies (via MD) an array of switching modes in 526 * decreasing order of preference. Default mode is always 527 * layer 2 (mac switching), so init array with that value. 528 */ 529 vswp->smode_idx = 0; 530 for (i = 0; i < NUM_SMODES; i++) 531 vswp->smode[i] = VSW_LAYER2; 532 533 /* 534 * Get the various properties such as physical device name 535 * (vsw-phys-dev), switch mode etc from the MD. 536 */ 537 vsw_get_md_properties(vswp); 538 539 /* setup the unicast forwarding database */ 540 (void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d", 541 vswp->instance); 542 D2(vswp, "creating unicast hash table (%s)...", hashname); 543 vswp->fdb = mod_hash_create_ptrhash(hashname, VSW_NCHAINS, 544 mod_hash_null_valdtor, sizeof (void *)); 545 546 progress |= PROG_fdb; 547 548 /* setup the multicast fowarding database */ 549 (void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d", 550 vswp->instance); 551 D2(vswp, "creating multicast hash table %s)...", hashname); 552 rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL); 553 vswp->mfdb = mod_hash_create_ptrhash(hashname, VSW_NCHAINS, 554 mod_hash_null_valdtor, sizeof (void *)); 555 556 progress |= PROG_mfdb; 557 558 /* 559 * create lock protecting list of multicast addresses 560 * which could come via m_multicst() entry point when plumbed. 561 */ 562 mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL); 563 vswp->mcap = NULL; 564 565 ddi_report_dev(vswp->dip); 566 567 progress |= PROG_report_dev; 568 569 WRITE_ENTER(&vsw_rw); 570 vswp->next = vsw_head; 571 vsw_head = vswp; 572 RW_EXIT(&vsw_rw); 573 574 /* setup the port list */ 575 rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL); 576 vswp->plist.head = NULL; 577 578 progress |= PROG_plist; 579 580 /* 581 * Create the taskq which will process all the VIO 582 * control messages. 583 */ 584 (void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance); 585 if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1, 586 TASKQ_DEFAULTPRI, 0)) == NULL) { 587 cmn_err(CE_WARN, "Unable to create task queue"); 588 goto vsw_attach_fail; 589 } 590 591 progress |= PROG_taskq; 592 593 /* select best switching mode */ 594 for (i = 0; i < NUM_SMODES; i++) { 595 smode = vswp->smode[i]; 596 switch (smode) { 597 case VSW_LAYER2: 598 rv = vsw_setup_layer2(vswp); 599 break; 600 601 case VSW_LAYER2_PROMISC: 602 rv = vsw_setup_layer2(vswp); 603 break; 604 605 case VSW_LAYER3: 606 rv = vsw_setup_layer3(vswp); 607 break; 608 609 default: 610 DERR(vswp, "unknown switch mode"); 611 break; 612 } 613 614 if (rv == 0) { 615 vswp->smode_idx = i; 616 break; 617 } 618 } 619 620 if (rv == 1) { 621 cmn_err(CE_WARN, "Unable to setup switching mode"); 622 goto vsw_attach_fail; 623 } 624 625 D2(vswp, "Operating in mode %d", vswp->smode[vswp->smode_idx]); 626 627 /* 628 * Register with the MAC layer as a network device so 629 * we can be plumbed if desired. 630 * 631 * Do this in both layer 2 and layer 3 mode. 632 */ 633 vswp->if_state &= ~VSW_IF_UP; 634 if (vswp->mdprops & VSW_MD_MACADDR) { 635 if (vsw_mac_register(vswp) != 0) { 636 cmn_err(CE_WARN, "Unable to register as provider " 637 " with MAC layer, continuing with attach"); 638 } 639 } 640 641 /* 642 * Now we have everything setup, register for MD change 643 * events. 644 */ 645 vsw_mdeg_register(vswp); 646 647 return (DDI_SUCCESS); 648 649 vsw_attach_fail: 650 DERR(NULL, "vsw_attach: failed"); 651 652 if (progress & PROG_taskq) 653 ddi_taskq_destroy(vswp->taskq_p); 654 655 if (progress & PROG_plist) 656 rw_destroy(&vswp->plist.lockrw); 657 658 if (progress & PROG_report_dev) { 659 ddi_remove_minor_node(dip, NULL); 660 mutex_destroy(&vswp->mca_lock); 661 } 662 663 if (progress & PROG_mfdb) { 664 mod_hash_destroy_hash(vswp->mfdb); 665 vswp->mfdb = NULL; 666 rw_destroy(&vswp->mfdbrw); 667 } 668 669 if (progress & PROG_fdb) { 670 mod_hash_destroy_hash(vswp->fdb); 671 vswp->fdb = NULL; 672 } 673 674 if (progress & PROG_if_lock) 675 rw_destroy(&vswp->if_lockrw); 676 677 ddi_soft_state_free(vsw_state, instance); 678 return (DDI_FAILURE); 679 } 680 681 static int 682 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 683 { 684 vsw_t **vswpp, *vswp; 685 int instance; 686 687 instance = ddi_get_instance(dip); 688 vswp = ddi_get_soft_state(vsw_state, instance); 689 690 if (vswp == NULL) { 691 return (DDI_FAILURE); 692 } 693 694 switch (cmd) { 695 case DDI_DETACH: 696 break; 697 case DDI_SUSPEND: 698 case DDI_PM_SUSPEND: 699 default: 700 return (DDI_FAILURE); 701 } 702 703 D2(vswp, "detaching instance %d", instance); 704 705 if (vswp->mdprops & VSW_MD_MACADDR) { 706 if (vsw_mac_unregister(vswp) != 0) { 707 cmn_err(CE_WARN, "Unable to detach from MAC layer"); 708 return (DDI_FAILURE); 709 } 710 } 711 rw_destroy(&vswp->if_lockrw); 712 713 vsw_mdeg_unregister(vswp); 714 715 if ((vswp->smode[vswp->smode_idx] == VSW_LAYER2) || 716 (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) { 717 vsw_mac_detach(vswp); 718 } 719 720 if (vsw_detach_ports(vswp) != 0) { 721 cmn_err(CE_WARN, "Unable to detach ports"); 722 return (DDI_FAILURE); 723 } 724 725 /* 726 * Remove this instance from any entries it may be on in 727 * the hash table by using the list of addresses maintained 728 * in the vsw_t structure. 729 */ 730 vsw_del_mcst_vsw(vswp); 731 732 vswp->mcap = NULL; 733 mutex_destroy(&vswp->mca_lock); 734 735 /* 736 * By now any pending tasks have finished and the underlying 737 * ldc's have been destroyed, so its safe to delete the control 738 * message taskq. 739 */ 740 if (vswp->taskq_p != NULL) 741 ddi_taskq_destroy(vswp->taskq_p); 742 743 /* 744 * At this stage all the data pointers in the hash table 745 * should be NULL, as all the ports have been removed and will 746 * have deleted themselves from the port lists which the data 747 * pointers point to. Hence we can destroy the table using the 748 * default destructors. 749 */ 750 D2(vswp, "vsw_detach: destroying hash tables.."); 751 mod_hash_destroy_hash(vswp->fdb); 752 vswp->fdb = NULL; 753 754 WRITE_ENTER(&vswp->mfdbrw); 755 mod_hash_destroy_hash(vswp->mfdb); 756 vswp->mfdb = NULL; 757 RW_EXIT(&vswp->mfdbrw); 758 rw_destroy(&vswp->mfdbrw); 759 760 ddi_remove_minor_node(dip, NULL); 761 762 rw_destroy(&vswp->plist.lockrw); 763 WRITE_ENTER(&vsw_rw); 764 for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) { 765 if (*vswpp == vswp) { 766 *vswpp = vswp->next; 767 break; 768 } 769 } 770 RW_EXIT(&vsw_rw); 771 ddi_soft_state_free(vsw_state, instance); 772 773 return (DDI_SUCCESS); 774 } 775 776 static int 777 vsw_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 778 { 779 _NOTE(ARGUNUSED(dip)) 780 781 vsw_t *vswp = NULL; 782 dev_t dev = (dev_t)arg; 783 int instance; 784 785 instance = getminor(dev); 786 787 switch (infocmd) { 788 case DDI_INFO_DEVT2DEVINFO: 789 if ((vswp = ddi_get_soft_state(vsw_state, instance)) == NULL) { 790 *result = NULL; 791 return (DDI_FAILURE); 792 } 793 *result = vswp->dip; 794 return (DDI_SUCCESS); 795 796 case DDI_INFO_DEVT2INSTANCE: 797 *result = (void *)(uintptr_t)instance; 798 return (DDI_SUCCESS); 799 800 default: 801 *result = NULL; 802 return (DDI_FAILURE); 803 } 804 } 805 806 /* 807 * Get the properties from our MD node. 808 */ 809 static void 810 vsw_get_md_properties(vsw_t *vswp) 811 { 812 md_t *mdp = NULL; 813 int num_nodes = 0; 814 int len = 0, listsz = 0; 815 int num_vdev = 0; 816 int i, idx; 817 boolean_t found_node = B_FALSE; 818 char *smode = NULL; 819 char *curr_mode = NULL; 820 char *physname = NULL; 821 char *node_name = NULL; 822 char *dev; 823 uint64_t macaddr = 0; 824 uint64_t md_inst, obp_inst; 825 mde_cookie_t *listp = NULL; 826 mde_cookie_t rootnode; 827 828 D1(vswp, "%s: enter", __func__); 829 830 /* 831 * Further down we compare the obp 'reg' property to the 832 * 'cfg-handle' property in the vsw MD node to determine 833 * if the node refers to this particular instance. So if 834 * we can't read the obp value then there is no point 835 * in proceeding further. 836 */ 837 if (ddi_prop_exists(DDI_DEV_T_ANY, vswp->dip, 838 DDI_PROP_DONTPASS, reg_propname) != 1) { 839 cmn_err(CE_WARN, "Unable to read %s property " 840 "from OBP device node", reg_propname); 841 return; 842 } 843 844 obp_inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip, 845 DDI_PROP_DONTPASS, reg_propname, 0); 846 847 D2(vswp, "%s: obp_inst 0x%llx", __func__, obp_inst); 848 849 if ((mdp = md_get_handle()) == NULL) { 850 DERR(vswp, "%s: unable to init MD", __func__); 851 return; 852 } 853 854 if ((num_nodes = md_node_count(mdp)) <= 0) { 855 DERR(vswp, "%s: invalid number of nodes found %d", 856 __func__, num_nodes); 857 (void) md_fini_handle(mdp); 858 return; 859 } 860 861 D2(vswp, "%s: %d nodes in total in MD", __func__, num_nodes); 862 863 /* allocate enough space for node list */ 864 listsz = num_nodes * sizeof (mde_cookie_t); 865 listp = kmem_zalloc(listsz, KM_SLEEP); 866 867 rootnode = md_root_node(mdp); 868 869 /* Get the list of virtual devices */ 870 num_vdev = md_scan_dag(mdp, rootnode, 871 md_find_name(mdp, vdev_propname), 872 md_find_name(mdp, "fwd"), listp); 873 874 if (num_vdev <= 0) { 875 DERR(vswp, "%s: didn't find any virtual-device nodes in MD", 876 __func__); 877 goto md_prop_exit; 878 } 879 880 D2(vswp, "%s: %d virtual-device nodes found", __func__, num_vdev); 881 882 /* Look for the virtual switch nodes in the list */ 883 for (idx = 0; idx < num_vdev; idx++) { 884 if (md_get_prop_str(mdp, listp[idx], 885 "name", &node_name) != 0) { 886 DERR(vswp, "%s: unable to get node name", __func__); 887 continue; 888 889 } 890 891 if (strcmp(node_name, vsw_propname) == 0) { 892 /* Virtual switch node */ 893 if (md_get_prop_val(mdp, listp[idx], 894 "cfg-handle", &md_inst) != 0) { 895 DERR(vswp, "%s: unable to get cfg-handle from" 896 " node %d", __func__, idx); 897 goto md_prop_exit; 898 } else if (md_inst == obp_inst) { 899 D2(vswp, "%s: found matching node (%d)" 900 " 0x%llx == 0x%llx", __func__, idx, 901 md_inst, obp_inst); 902 found_node = B_TRUE; 903 break; 904 } 905 } 906 } 907 908 if (!found_node) { 909 DWARN(vswp, "%s: couldn't find correct vsw node", __func__); 910 goto md_prop_exit; 911 } 912 913 /* 914 * Now, having found the correct node, get the various properties. 915 */ 916 917 if (md_get_prop_data(mdp, listp[idx], physdev_propname, 918 (uint8_t **)(&physname), &len) != 0) { 919 cmn_err(CE_WARN, "%s: unable to get name(s) of physical " 920 "device(s) from MD", __func__); 921 } else if ((strlen(physname) + 1) > LIFNAMSIZ) { 922 cmn_err(CE_WARN, "%s is too long a device name", physname); 923 } else { 924 (void) strncpy(vswp->physname, physname, strlen(physname) + 1); 925 vswp->mdprops |= VSW_MD_PHYSNAME; 926 D2(vswp, "%s: using first device specified (%s)", 927 __func__, vswp->physname); 928 } 929 930 931 #ifdef DEBUG 932 /* 933 * As a temporary measure to aid testing we check to see if there 934 * is a vsw.conf file present. If there is we use the value of the 935 * vsw_physname property in the file as the name of the physical 936 * device, overriding the value from the MD. 937 * 938 * There may be multiple devices listed, but for the moment 939 * we just use the first one. 940 */ 941 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0, 942 "vsw_physname", &dev) == DDI_PROP_SUCCESS) { 943 if ((strlen(dev) + 1) > LIFNAMSIZ) { 944 cmn_err(CE_WARN, "%s is too long a device name", dev); 945 } else { 946 cmn_err(CE_NOTE, "%s: using device name (%s) from " 947 "config file", __func__, dev); 948 949 (void) strncpy(vswp->physname, dev, strlen(dev) + 1); 950 vswp->mdprops |= VSW_MD_PHYSNAME; 951 } 952 953 ddi_prop_free(dev); 954 955 } 956 #endif 957 958 /* local mac address */ 959 if (md_get_prop_val(mdp, listp[idx], 960 macaddr_propname, &macaddr) != 0) { 961 cmn_err(CE_WARN, "%s: unable to get local MAC address", 962 __func__); 963 } else { 964 READ_ENTER(&vswp->if_lockrw); 965 for (i = ETHERADDRL - 1; i >= 0; i--) { 966 vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF; 967 macaddr >>= 8; 968 } 969 RW_EXIT(&vswp->if_lockrw); 970 vswp->mdprops |= VSW_MD_MACADDR; 971 } 972 973 /* 974 * Get the switch-mode property. The modes are listed in 975 * decreasing order of preference, i.e. prefered mode is 976 * first item in list. 977 */ 978 len = 0; 979 if (md_get_prop_data(mdp, listp[idx], smode_propname, 980 (uint8_t **)(&smode), &len) != 0) { 981 /* 982 * Unable to get switch-mode property, so just use 983 * default values which vswp->smode[] array has already 984 * been pre-populated with, namely layer2. 985 */ 986 cmn_err(CE_WARN, "%s: unable to get switch mode property, " 987 "defaulting to layer 2 mode", __func__); 988 } else { 989 i = 0; 990 curr_mode = smode; 991 /* 992 * Modes of operation: 993 * 'switched' - layer 2 switching, underlying HW in 994 * non-promiscuous mode. 995 * 'promiscuous' - layer 2 switching, underlying HW in 996 * promiscuous mode. 997 * 'routed' - layer 3 (i.e. IP) routing, underlying HW 998 * in non-promiscuous mode. 999 */ 1000 while ((curr_mode < (smode + len)) && (i < NUM_SMODES)) { 1001 D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode); 1002 if (strcmp(curr_mode, "switched") == 0) 1003 vswp->smode[i] = VSW_LAYER2; 1004 else if (strcmp(curr_mode, "promiscuous") == 0) 1005 vswp->smode[i] = VSW_LAYER2_PROMISC; 1006 else if (strcmp(curr_mode, "routed") == 0) 1007 vswp->smode[i] = VSW_LAYER3; 1008 else { 1009 DERR(vswp, "%s: unknown mode %s", 1010 __func__, curr_mode); 1011 /* default to layer 2 */ 1012 vswp->smode[i] = VSW_LAYER2; 1013 } 1014 curr_mode += strlen(curr_mode) + 1; 1015 i++; 1016 } 1017 1018 vswp->mdprops |= VSW_MD_SMODE; 1019 } 1020 1021 md_prop_exit: 1022 (void) md_fini_handle(mdp); 1023 1024 kmem_free(listp, listsz); 1025 1026 D1(vswp, "%s: exit", __func__); 1027 } 1028 1029 static int 1030 vsw_setup_layer2(vsw_t *vswp) 1031 { 1032 int rv = 0; 1033 1034 D1(vswp, "%s: enter", __func__); 1035 1036 vsw_switch_frame = vsw_switch_l2_frame; 1037 1038 /* 1039 * Attempt to link into the MAC layer so we can get 1040 * and send packets out over the physical adapter. 1041 */ 1042 if (vswp->mdprops & VSW_MD_PHYSNAME) { 1043 if (vsw_mac_attach(vswp) != 0) { 1044 /* 1045 * Registration with the MAC layer has failed, 1046 * so return 1 so that can fall back to next 1047 * prefered switching method. 1048 */ 1049 cmn_err(CE_WARN, "!unable to join as MAC layer " 1050 "client, continuing with attach"); 1051 rv = 1; 1052 } 1053 } else { 1054 /* No physical device name found in MD */ 1055 DERR(vswp, "%s: no physical device name specified", __func__); 1056 rv = 1; 1057 } 1058 1059 D1(vswp, "%s: exit", __func__); 1060 1061 return (rv); 1062 } 1063 1064 static int 1065 vsw_setup_layer3(vsw_t *vswp) 1066 { 1067 D1(vswp, "%s: enter", __func__); 1068 1069 D2(vswp, "%s: operating in layer 3 mode", __func__); 1070 vsw_switch_frame = vsw_switch_l3_frame; 1071 1072 D1(vswp, "%s: exit", __func__); 1073 1074 return (0); 1075 } 1076 1077 /* 1078 * Link into the MAC layer to gain access to the services provided by 1079 * the underlying physical device driver (which should also have 1080 * registered with the MAC layer). 1081 * 1082 * Only when in layer 2 mode. 1083 */ 1084 static int 1085 vsw_mac_attach(vsw_t *vswp) 1086 { 1087 char drv[LIFNAMSIZ]; 1088 uint_t ddi_instance; 1089 1090 D1(vswp, "vsw_mac_attach: enter"); 1091 1092 vswp->mh = NULL; 1093 vswp->mrh = NULL; 1094 vswp->mnh = NULL; 1095 1096 ASSERT(vswp->mdprops & VSW_MD_PHYSNAME); 1097 1098 if (ddi_parse(vswp->physname, drv, &ddi_instance) != DDI_SUCCESS) { 1099 cmn_err(CE_WARN, "invalid device name: %s", vswp->physname); 1100 goto mac_fail_exit; 1101 } 1102 if ((mac_open(vswp->physname, ddi_instance, &vswp->mh)) != 0) { 1103 cmn_err(CE_WARN, "mac_open %s failed", vswp->physname); 1104 goto mac_fail_exit; 1105 } 1106 1107 D2(vswp, "vsw_mac_attach: using device %s", vswp->physname); 1108 1109 /* register for changes in the interface */ 1110 vswp->mnh = mac_notify_add(vswp->mh, vsw_notify_cb, (void *)vswp); 1111 1112 /* register our rx callback function */ 1113 vswp->mrh = mac_rx_add(vswp->mh, vsw_rx_cb, (void *)vswp); 1114 1115 /* get the MAC tx fn */ 1116 vswp->txinfo = mac_tx_get(vswp->mh); 1117 1118 /* start the interface */ 1119 if (mac_start(vswp->mh) != 0) { 1120 cmn_err(CE_WARN, "could not start mac interface"); 1121 goto mac_fail_exit; 1122 } 1123 1124 /* get and store original promisc setting */ 1125 vswp->init_promisc = mac_promisc_get(vswp->mh, MAC_DEVPROMISC); 1126 1127 /* 1128 * FUTURE: When we have the ability to set multiple unicast 1129 * mac address then we won't have to set the device into 1130 * promisc mode, but for the moment its the only way we. 1131 * can see pkts that logical domains we are serving are 1132 * interested in. 1133 */ 1134 if ((vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC) && 1135 (vswp->init_promisc == B_FALSE)) { 1136 DERR(vswp, "vsw_mac_attach: enabling promisc mode.."); 1137 1138 if (mac_promisc_set(vswp->mh, B_TRUE, MAC_DEVPROMISC) != 0) { 1139 DERR(vswp, "vsw_mac_attach: unable to set device" 1140 " into promiscuous mode"); 1141 goto mac_fail_exit; 1142 } 1143 } 1144 1145 D1(vswp, "vsw_mac_attach: exit"); 1146 return (0); 1147 1148 mac_fail_exit: 1149 if (vswp->mh != NULL) { 1150 mac_promisc_set(vswp->mh, vswp->init_promisc, MAC_DEVPROMISC); 1151 if (vswp->mrh != NULL) 1152 mac_rx_remove(vswp->mh, vswp->mrh); 1153 1154 if (vswp->mnh != NULL) 1155 mac_notify_remove(vswp->mh, vswp->mnh); 1156 1157 mac_close(vswp->mh); 1158 } 1159 1160 vswp->mrh = NULL; 1161 vswp->mnh = NULL; 1162 vswp->mh = NULL; 1163 vswp->txinfo = NULL; 1164 1165 D1(vswp, "vsw_mac_attach: fail exit"); 1166 return (1); 1167 } 1168 1169 static void 1170 vsw_mac_detach(vsw_t *vswp) 1171 { 1172 D1(vswp, "vsw_mac_detach: enter"); 1173 1174 if (vswp->mh != NULL) { 1175 /* restore promisc to original setting */ 1176 mac_promisc_set(vswp->mh, vswp->init_promisc, MAC_DEVPROMISC); 1177 if (vswp->mrh != NULL) 1178 mac_rx_remove(vswp->mh, vswp->mrh); 1179 1180 if (vswp->mnh != NULL) 1181 mac_notify_remove(vswp->mh, vswp->mnh); 1182 1183 mac_close(vswp->mh); 1184 } 1185 1186 vswp->mrh = NULL; 1187 vswp->mnh = NULL; 1188 vswp->mh = NULL; 1189 vswp->txinfo = NULL; 1190 1191 D1(vswp, "vsw_mac_detach: exit"); 1192 } 1193 1194 /* 1195 * Get notified of changes to the interface. 1196 * 1197 * For the moment we brute force the interface back 1198 * into promisc mode if it is unset (e.g. by snoop). 1199 * When we have the ability to set multiple mac addresses, 1200 * we will need to see if this is necessary. 1201 */ 1202 static void 1203 vsw_notify_cb(void *arg, mac_notify_type_t type) 1204 { 1205 vsw_t *vswp = (vsw_t *)arg; 1206 1207 switch (type) { 1208 case MAC_NOTE_PROMISC: 1209 vswp->txinfo = mac_tx_get(vswp->mh); 1210 if (mac_promisc_get(vswp->mh, MAC_DEVPROMISC) == B_TRUE) { 1211 D2(vswp, "%s: still in PROMISC mode", __func__); 1212 } else { 1213 D2(vswp, "%s: now in NON-PROMISC mode", __func__); 1214 D2(vswp, "...re-enabling"); 1215 mac_promisc_set(vswp->mh, B_TRUE, MAC_DEVPROMISC); 1216 } 1217 break; 1218 default: 1219 break; 1220 } 1221 } 1222 1223 /* 1224 * receive callback routine. Invoked by MAC layer when there 1225 * are pkts being passed up from physical device. 1226 * 1227 * PERF: It may be more efficient when the card is in promisc 1228 * mode to check the dest address of the pkts here (against 1229 * the FDB) rather than checking later. Needs to be investigated. 1230 */ 1231 static void 1232 vsw_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp) 1233 { 1234 _NOTE(ARGUNUSED(mrh)) 1235 1236 vsw_t *vswp = (vsw_t *)arg; 1237 1238 ASSERT(vswp != NULL); 1239 1240 D1(vswp, "vsw_rx_cb: enter"); 1241 1242 /* switch the chain of packets received */ 1243 vsw_switch_frame(vswp, mp, VSW_PHYSDEV, NULL, NULL); 1244 1245 D1(vswp, "vsw_rx_cb: exit"); 1246 } 1247 1248 /* 1249 * Send a message out over the physical device via the MAC layer. 1250 * 1251 * Returns any mblks that it was unable to transmit. 1252 */ 1253 static mblk_t * 1254 vsw_tx_msg(vsw_t *vswp, mblk_t *mp) 1255 { 1256 const mac_txinfo_t *mtp; 1257 mblk_t *nextp; 1258 1259 if (vswp->mh == NULL) { 1260 DERR(vswp, "vsw_tx_msg: dropping pkts: no tx routine avail"); 1261 return (mp); 1262 } else { 1263 for (;;) { 1264 nextp = mp->b_next; 1265 mp->b_next = NULL; 1266 1267 mtp = vswp->txinfo; 1268 if ((mp = mtp->mt_fn(mtp->mt_arg, mp)) != NULL) { 1269 mp->b_next = nextp; 1270 break; 1271 } 1272 1273 if ((mp = nextp) == NULL) 1274 break; 1275 1276 } 1277 1278 } 1279 1280 return (mp); 1281 } 1282 1283 /* 1284 * Register with the MAC layer as a network device, so we 1285 * can be plumbed if necessary. 1286 */ 1287 static int 1288 vsw_mac_register(vsw_t *vswp) 1289 { 1290 mac_register_t *macp; 1291 int rv; 1292 1293 D1(vswp, "%s: enter", __func__); 1294 1295 if ((macp = mac_alloc(MAC_VERSION)) == NULL) 1296 return (EINVAL); 1297 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 1298 macp->m_driver = vswp; 1299 macp->m_dip = vswp->dip; 1300 macp->m_src_addr = (uint8_t *)&vswp->if_addr; 1301 macp->m_callbacks = &vsw_m_callbacks; 1302 macp->m_min_sdu = 0; 1303 macp->m_max_sdu = ETHERMTU; 1304 rv = mac_register(macp, &vswp->if_mh); 1305 mac_free(macp); 1306 if (rv == 0) 1307 vswp->if_state |= VSW_IF_REG; 1308 1309 D1(vswp, "%s: exit", __func__); 1310 1311 return (rv); 1312 } 1313 1314 static int 1315 vsw_mac_unregister(vsw_t *vswp) 1316 { 1317 int rv = 0; 1318 1319 D1(vswp, "%s: enter", __func__); 1320 1321 WRITE_ENTER(&vswp->if_lockrw); 1322 1323 if (vswp->if_state & VSW_IF_REG) { 1324 rv = mac_unregister(vswp->if_mh); 1325 if (rv != 0) { 1326 DWARN(vswp, "%s: unable to unregister from MAC " 1327 "framework", __func__); 1328 1329 RW_EXIT(&vswp->if_lockrw); 1330 D1(vswp, "%s: fail exit", __func__); 1331 return (rv); 1332 } 1333 1334 /* mark i/f as down and unregistered */ 1335 vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG); 1336 } 1337 RW_EXIT(&vswp->if_lockrw); 1338 1339 D1(vswp, "%s: exit", __func__); 1340 1341 return (rv); 1342 } 1343 1344 static int 1345 vsw_m_stat(void *arg, uint_t stat, uint64_t *val) 1346 { 1347 vsw_t *vswp = (vsw_t *)arg; 1348 1349 D1(vswp, "%s: enter", __func__); 1350 1351 if (vswp->mh == NULL) 1352 return (EINVAL); 1353 1354 /* return stats from underlying device */ 1355 *val = mac_stat_get(vswp->mh, stat); 1356 return (0); 1357 } 1358 1359 static void 1360 vsw_m_stop(void *arg) 1361 { 1362 vsw_t *vswp = (vsw_t *)arg; 1363 1364 D1(vswp, "%s: enter", __func__); 1365 1366 WRITE_ENTER(&vswp->if_lockrw); 1367 vswp->if_state &= ~VSW_IF_UP; 1368 RW_EXIT(&vswp->if_lockrw); 1369 1370 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 1371 } 1372 1373 static int 1374 vsw_m_start(void *arg) 1375 { 1376 vsw_t *vswp = (vsw_t *)arg; 1377 1378 D1(vswp, "%s: enter", __func__); 1379 1380 WRITE_ENTER(&vswp->if_lockrw); 1381 vswp->if_state |= VSW_IF_UP; 1382 RW_EXIT(&vswp->if_lockrw); 1383 1384 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 1385 return (0); 1386 } 1387 1388 /* 1389 * Change the local interface address. 1390 */ 1391 static int 1392 vsw_m_unicst(void *arg, const uint8_t *macaddr) 1393 { 1394 vsw_t *vswp = (vsw_t *)arg; 1395 1396 D1(vswp, "%s: enter", __func__); 1397 1398 WRITE_ENTER(&vswp->if_lockrw); 1399 ether_copy(macaddr, &vswp->if_addr); 1400 RW_EXIT(&vswp->if_lockrw); 1401 1402 D1(vswp, "%s: exit", __func__); 1403 1404 return (0); 1405 } 1406 1407 static int 1408 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca) 1409 { 1410 vsw_t *vswp = (vsw_t *)arg; 1411 mcst_addr_t *mcst_p = NULL; 1412 uint64_t addr = 0x0; 1413 int i; 1414 1415 D1(vswp, "%s: enter", __func__); 1416 1417 /* 1418 * Convert address into form that can be used 1419 * as hash table key. 1420 */ 1421 for (i = 0; i < ETHERADDRL; i++) { 1422 addr = (addr << 8) | mca[i]; 1423 } 1424 1425 D2(vswp, "%s: addr = 0x%llx", __func__, addr); 1426 1427 if (add) { 1428 D2(vswp, "%s: adding multicast", __func__); 1429 if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 1430 /* 1431 * Update the list of multicast addresses 1432 * contained within the vsw_t structure to 1433 * include this new one. 1434 */ 1435 mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP); 1436 if (mcst_p == NULL) { 1437 DERR(vswp, "%s unable to alloc mem", __func__); 1438 return (1); 1439 } 1440 mcst_p->addr = addr; 1441 1442 mutex_enter(&vswp->mca_lock); 1443 mcst_p->nextp = vswp->mcap; 1444 vswp->mcap = mcst_p; 1445 mutex_exit(&vswp->mca_lock); 1446 1447 /* 1448 * Call into the underlying driver to program the 1449 * address into HW. 1450 * 1451 * Note: 1452 * Can safely ignore the return value as the card 1453 * will for the moment always be in promisc mode. 1454 * When we can program multiple MAC addresses into the 1455 * HW then we will need to care about the return 1456 * value here. 1457 */ 1458 if (vswp->mh != NULL) 1459 (void) mac_multicst_add(vswp->mh, mca); 1460 } 1461 } else { 1462 D2(vswp, "%s: removing multicast", __func__); 1463 /* 1464 * Remove the address from the hash table.. 1465 */ 1466 if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 1467 1468 /* 1469 * ..and then from the list maintained in the 1470 * vsw_t structure. 1471 */ 1472 vsw_del_addr(VSW_LOCALDEV, vswp, addr); 1473 1474 if (vswp->mh != NULL) 1475 (void) mac_multicst_remove(vswp->mh, mca); 1476 } 1477 } 1478 1479 D1(vswp, "%s: exit", __func__); 1480 1481 return (0); 1482 } 1483 1484 static int 1485 vsw_m_promisc(void *arg, boolean_t on) 1486 { 1487 vsw_t *vswp = (vsw_t *)arg; 1488 1489 D1(vswp, "%s: enter", __func__); 1490 1491 WRITE_ENTER(&vswp->if_lockrw); 1492 if (on) 1493 vswp->if_state |= VSW_IF_PROMISC; 1494 else 1495 vswp->if_state &= ~VSW_IF_PROMISC; 1496 RW_EXIT(&vswp->if_lockrw); 1497 1498 D1(vswp, "%s: exit", __func__); 1499 1500 return (0); 1501 } 1502 1503 static mblk_t * 1504 vsw_m_tx(void *arg, mblk_t *mp) 1505 { 1506 vsw_t *vswp = (vsw_t *)arg; 1507 1508 D1(vswp, "%s: enter", __func__); 1509 1510 vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL); 1511 1512 D1(vswp, "%s: exit", __func__); 1513 1514 return (NULL); 1515 } 1516 1517 /* 1518 * Register for machine description (MD) updates. 1519 */ 1520 static void 1521 vsw_mdeg_register(vsw_t *vswp) 1522 { 1523 mdeg_prop_spec_t *pspecp; 1524 mdeg_node_spec_t *inst_specp; 1525 mdeg_handle_t mdeg_hdl; 1526 size_t templatesz; 1527 int inst, rv; 1528 1529 D1(vswp, "%s: enter", __func__); 1530 1531 inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip, 1532 DDI_PROP_DONTPASS, reg_propname, -1); 1533 if (inst == -1) { 1534 DERR(vswp, "%s: unable to get %s property", 1535 __func__, reg_propname); 1536 return; 1537 } 1538 1539 D2(vswp, "%s: instance %d registering with mdeg", __func__, inst); 1540 1541 /* 1542 * Allocate and initialize a per-instance copy 1543 * of the global property spec array that will 1544 * uniquely identify this vsw instance. 1545 */ 1546 templatesz = sizeof (vsw_prop_template); 1547 pspecp = kmem_zalloc(templatesz, KM_SLEEP); 1548 1549 bcopy(vsw_prop_template, pspecp, templatesz); 1550 1551 VSW_SET_MDEG_PROP_INST(pspecp, inst); 1552 1553 /* initialize the complete prop spec structure */ 1554 inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP); 1555 inst_specp->namep = "virtual-device"; 1556 inst_specp->specp = pspecp; 1557 1558 /* perform the registration */ 1559 rv = mdeg_register(inst_specp, &vport_match, vsw_mdeg_cb, 1560 (void *)vswp, &mdeg_hdl); 1561 1562 if (rv != MDEG_SUCCESS) { 1563 DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv); 1564 kmem_free(inst_specp, sizeof (mdeg_node_spec_t)); 1565 kmem_free(pspecp, templatesz); 1566 return; 1567 } 1568 1569 /* save off data that will be needed later */ 1570 vswp->inst_spec = inst_specp; 1571 vswp->mdeg_hdl = mdeg_hdl; 1572 1573 D1(vswp, "%s: exit", __func__); 1574 } 1575 1576 static void 1577 vsw_mdeg_unregister(vsw_t *vswp) 1578 { 1579 D1(vswp, "vsw_mdeg_unregister: enter"); 1580 1581 (void) mdeg_unregister(vswp->mdeg_hdl); 1582 1583 if (vswp->inst_spec->specp != NULL) { 1584 (void) kmem_free(vswp->inst_spec->specp, 1585 sizeof (vsw_prop_template)); 1586 vswp->inst_spec->specp = NULL; 1587 } 1588 1589 if (vswp->inst_spec != NULL) { 1590 (void) kmem_free(vswp->inst_spec, 1591 sizeof (mdeg_node_spec_t)); 1592 vswp->inst_spec = NULL; 1593 } 1594 1595 D1(vswp, "vsw_mdeg_unregister: exit"); 1596 } 1597 1598 static int 1599 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 1600 { 1601 vsw_t *vswp; 1602 int idx; 1603 md_t *mdp; 1604 mde_cookie_t node; 1605 uint64_t inst; 1606 1607 if (resp == NULL) 1608 return (MDEG_FAILURE); 1609 1610 vswp = (vsw_t *)cb_argp; 1611 1612 D1(vswp, "%s: added %d : removed %d : matched %d", 1613 __func__, resp->added.nelem, resp->removed.nelem, 1614 resp->match_prev.nelem); 1615 1616 /* process added ports */ 1617 for (idx = 0; idx < resp->added.nelem; idx++) { 1618 mdp = resp->added.mdp; 1619 node = resp->added.mdep[idx]; 1620 1621 D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node); 1622 1623 if (vsw_port_add(vswp, mdp, &node) != 0) { 1624 cmn_err(CE_WARN, "Unable to add new port (0x%lx)", 1625 node); 1626 } 1627 } 1628 1629 /* process removed ports */ 1630 for (idx = 0; idx < resp->removed.nelem; idx++) { 1631 mdp = resp->removed.mdp; 1632 node = resp->removed.mdep[idx]; 1633 1634 if (md_get_prop_val(mdp, node, id_propname, &inst)) { 1635 DERR(vswp, "%s: prop(%s) not found port(%d)", 1636 __func__, id_propname, idx); 1637 continue; 1638 } 1639 1640 D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node); 1641 1642 if (vsw_port_detach(vswp, inst) != 0) { 1643 cmn_err(CE_WARN, "Unable to remove port %ld", inst); 1644 } 1645 } 1646 1647 /* 1648 * Currently no support for updating already active ports. 1649 * So, ignore the match_curr and match_priv arrays for now. 1650 */ 1651 1652 D1(vswp, "%s: exit", __func__); 1653 1654 return (MDEG_SUCCESS); 1655 } 1656 1657 /* 1658 * Add a new port to the system. 1659 * 1660 * Returns 0 on success, 1 on failure. 1661 */ 1662 int 1663 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node) 1664 { 1665 uint64_t ldc_id; 1666 uint8_t *addrp; 1667 int i, addrsz; 1668 int num_nodes = 0, nchan = 0; 1669 int listsz = 0; 1670 mde_cookie_t *listp = NULL; 1671 struct ether_addr ea; 1672 uint64_t macaddr; 1673 uint64_t inst = 0; 1674 vsw_port_t *port; 1675 1676 if (md_get_prop_val(mdp, *node, id_propname, &inst)) { 1677 DWARN(vswp, "%s: prop(%s) not found", __func__, 1678 id_propname); 1679 return (1); 1680 } 1681 1682 /* 1683 * Find the channel endpoint node(s) (which should be under this 1684 * port node) which contain the channel id(s). 1685 */ 1686 if ((num_nodes = md_node_count(mdp)) <= 0) { 1687 DERR(vswp, "%s: invalid number of nodes found (%d)", 1688 __func__, num_nodes); 1689 return (1); 1690 } 1691 1692 /* allocate enough space for node list */ 1693 listsz = num_nodes * sizeof (mde_cookie_t); 1694 listp = kmem_zalloc(listsz, KM_SLEEP); 1695 1696 nchan = md_scan_dag(mdp, *node, 1697 md_find_name(mdp, chan_propname), 1698 md_find_name(mdp, "fwd"), listp); 1699 1700 if (nchan <= 0) { 1701 DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname); 1702 kmem_free(listp, listsz); 1703 return (1); 1704 } 1705 1706 D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname); 1707 1708 /* use property from first node found */ 1709 if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) { 1710 DWARN(vswp, "%s: prop(%s) not found\n", __func__, 1711 id_propname); 1712 kmem_free(listp, listsz); 1713 return (1); 1714 } 1715 1716 /* don't need list any more */ 1717 kmem_free(listp, listsz); 1718 1719 D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id); 1720 1721 /* read mac-address property */ 1722 if (md_get_prop_data(mdp, *node, remaddr_propname, 1723 &addrp, &addrsz)) { 1724 DWARN(vswp, "%s: prop(%s) not found", 1725 __func__, remaddr_propname); 1726 return (1); 1727 } 1728 1729 if (addrsz < ETHERADDRL) { 1730 DWARN(vswp, "%s: invalid address size", __func__); 1731 return (1); 1732 } 1733 1734 macaddr = *((uint64_t *)addrp); 1735 D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr); 1736 1737 for (i = ETHERADDRL - 1; i >= 0; i--) { 1738 ea.ether_addr_octet[i] = macaddr & 0xFF; 1739 macaddr >>= 8; 1740 } 1741 1742 if (vsw_port_attach(vswp, (int)inst, &ldc_id, 1, &ea) != 0) { 1743 DERR(vswp, "%s: failed to attach port", __func__); 1744 return (1); 1745 } 1746 1747 port = vsw_lookup_port(vswp, (int)inst); 1748 1749 /* just successfuly created the port, so it should exist */ 1750 ASSERT(port != NULL); 1751 1752 return (0); 1753 } 1754 1755 /* 1756 * Attach the specified port. 1757 * 1758 * Returns 0 on success, 1 on failure. 1759 */ 1760 static int 1761 vsw_port_attach(vsw_t *vswp, int p_instance, uint64_t *ldcids, int nids, 1762 struct ether_addr *macaddr) 1763 { 1764 vsw_port_list_t *plist = &vswp->plist; 1765 vsw_port_t *port, **prev_port; 1766 int i; 1767 1768 D1(vswp, "%s: enter : port %d", __func__, p_instance); 1769 1770 /* port already exists? */ 1771 READ_ENTER(&plist->lockrw); 1772 for (port = plist->head; port != NULL; port = port->p_next) { 1773 if (port->p_instance == p_instance) { 1774 DWARN(vswp, "%s: port instance %d already attached", 1775 __func__, p_instance); 1776 RW_EXIT(&plist->lockrw); 1777 return (1); 1778 } 1779 } 1780 RW_EXIT(&plist->lockrw); 1781 1782 port = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP); 1783 port->p_vswp = vswp; 1784 port->p_instance = p_instance; 1785 port->p_ldclist.num_ldcs = 0; 1786 port->p_ldclist.head = NULL; 1787 1788 rw_init(&port->p_ldclist.lockrw, NULL, RW_DRIVER, NULL); 1789 1790 mutex_init(&port->tx_lock, NULL, MUTEX_DRIVER, NULL); 1791 mutex_init(&port->mca_lock, NULL, MUTEX_DRIVER, NULL); 1792 1793 mutex_init(&port->ref_lock, NULL, MUTEX_DRIVER, NULL); 1794 cv_init(&port->ref_cv, NULL, CV_DRIVER, NULL); 1795 1796 mutex_init(&port->state_lock, NULL, MUTEX_DRIVER, NULL); 1797 cv_init(&port->state_cv, NULL, CV_DRIVER, NULL); 1798 port->state = VSW_PORT_INIT; 1799 1800 if (nids > VSW_PORT_MAX_LDCS) { 1801 D2(vswp, "%s: using first of %d ldc ids", 1802 __func__, nids); 1803 nids = VSW_PORT_MAX_LDCS; 1804 } 1805 1806 D2(vswp, "%s: %d nids", __func__, nids); 1807 for (i = 0; i < nids; i++) { 1808 D2(vswp, "%s: ldcid (%llx)", __func__, (uint64_t)ldcids[i]); 1809 if (vsw_ldc_attach(port, (uint64_t)ldcids[i]) != 0) { 1810 DERR(vswp, "%s: ldc_attach failed", __func__); 1811 1812 rw_destroy(&port->p_ldclist.lockrw); 1813 1814 cv_destroy(&port->ref_cv); 1815 mutex_destroy(&port->ref_lock); 1816 1817 cv_destroy(&port->state_cv); 1818 mutex_destroy(&port->state_lock); 1819 1820 mutex_destroy(&port->tx_lock); 1821 mutex_destroy(&port->mca_lock); 1822 kmem_free(port, sizeof (vsw_port_t)); 1823 return (1); 1824 } 1825 } 1826 1827 ether_copy(macaddr, &port->p_macaddr); 1828 1829 WRITE_ENTER(&plist->lockrw); 1830 1831 /* create the fdb entry for this port/mac address */ 1832 (void) vsw_add_fdb(vswp, port); 1833 1834 /* link it into the list of ports for this vsw instance */ 1835 prev_port = (vsw_port_t **)(&plist->head); 1836 port->p_next = *prev_port; 1837 *prev_port = port; 1838 plist->num_ports++; 1839 RW_EXIT(&plist->lockrw); 1840 1841 /* 1842 * Initialise the port and any ldc's under it. 1843 */ 1844 (void) vsw_init_ldcs(port); 1845 1846 D1(vswp, "%s: exit", __func__); 1847 return (0); 1848 } 1849 1850 /* 1851 * Detach the specified port. 1852 * 1853 * Returns 0 on success, 1 on failure. 1854 */ 1855 static int 1856 vsw_port_detach(vsw_t *vswp, int p_instance) 1857 { 1858 vsw_port_t *port = NULL; 1859 vsw_port_list_t *plist = &vswp->plist; 1860 1861 D1(vswp, "%s: enter: port id %d", __func__, p_instance); 1862 1863 WRITE_ENTER(&plist->lockrw); 1864 1865 if ((port = vsw_lookup_port(vswp, p_instance)) == NULL) { 1866 RW_EXIT(&plist->lockrw); 1867 return (1); 1868 } 1869 1870 if (vsw_plist_del_node(vswp, port)) { 1871 RW_EXIT(&plist->lockrw); 1872 return (1); 1873 } 1874 1875 /* Remove the fdb entry for this port/mac address */ 1876 (void) vsw_del_fdb(vswp, port); 1877 1878 /* Remove any multicast addresses.. */ 1879 vsw_del_mcst_port(port); 1880 1881 /* 1882 * No longer need to hold lock on port list now that we 1883 * have unlinked the target port from the list. 1884 */ 1885 RW_EXIT(&plist->lockrw); 1886 1887 if (vsw_port_delete(port)) { 1888 return (1); 1889 } 1890 1891 D1(vswp, "%s: exit: p_instance(%d)", __func__, p_instance); 1892 return (0); 1893 } 1894 1895 /* 1896 * Detach all active ports. 1897 * 1898 * Returns 0 on success, 1 on failure. 1899 */ 1900 static int 1901 vsw_detach_ports(vsw_t *vswp) 1902 { 1903 vsw_port_list_t *plist = &vswp->plist; 1904 vsw_port_t *port = NULL; 1905 1906 D1(vswp, "%s: enter", __func__); 1907 1908 WRITE_ENTER(&plist->lockrw); 1909 1910 while ((port = plist->head) != NULL) { 1911 if (vsw_plist_del_node(vswp, port)) { 1912 DERR(vswp, "%s: Error deleting port %d" 1913 " from port list", __func__, 1914 port->p_instance); 1915 RW_EXIT(&plist->lockrw); 1916 return (1); 1917 } 1918 1919 /* Remove the fdb entry for this port/mac address */ 1920 (void) vsw_del_fdb(vswp, port); 1921 1922 /* Remove any multicast addresses.. */ 1923 vsw_del_mcst_port(port); 1924 1925 /* 1926 * No longer need to hold the lock on the port list 1927 * now that we have unlinked the target port from the 1928 * list. 1929 */ 1930 RW_EXIT(&plist->lockrw); 1931 if (vsw_port_delete(port)) { 1932 DERR(vswp, "%s: Error deleting port %d", 1933 __func__, port->p_instance); 1934 return (1); 1935 } 1936 WRITE_ENTER(&plist->lockrw); 1937 } 1938 RW_EXIT(&plist->lockrw); 1939 1940 D1(vswp, "%s: exit", __func__); 1941 1942 return (0); 1943 } 1944 1945 /* 1946 * Delete the specified port. 1947 * 1948 * Returns 0 on success, 1 on failure. 1949 */ 1950 static int 1951 vsw_port_delete(vsw_port_t *port) 1952 { 1953 vsw_ldc_list_t *ldcl; 1954 vsw_t *vswp = port->p_vswp; 1955 1956 D1(vswp, "%s: enter : port id %d", __func__, port->p_instance); 1957 1958 (void) vsw_uninit_ldcs(port); 1959 1960 /* 1961 * Wait for any pending ctrl msg tasks which reference this 1962 * port to finish. 1963 */ 1964 if (vsw_drain_port_taskq(port)) 1965 return (1); 1966 1967 /* 1968 * Wait for port reference count to hit zero. 1969 */ 1970 mutex_enter(&port->ref_lock); 1971 while (port->ref_cnt != 0) 1972 cv_wait(&port->ref_cv, &port->ref_lock); 1973 mutex_exit(&port->ref_lock); 1974 1975 /* 1976 * Wait for any active callbacks to finish 1977 */ 1978 if (vsw_drain_ldcs(port)) 1979 return (1); 1980 1981 ldcl = &port->p_ldclist; 1982 WRITE_ENTER(&ldcl->lockrw); 1983 while (ldcl->num_ldcs > 0) { 1984 if (vsw_ldc_detach(port, ldcl->head->ldc_id) != 0) {; 1985 cmn_err(CE_WARN, "unable to detach ldc %ld", 1986 ldcl->head->ldc_id); 1987 RW_EXIT(&ldcl->lockrw); 1988 return (1); 1989 } 1990 } 1991 RW_EXIT(&ldcl->lockrw); 1992 1993 rw_destroy(&port->p_ldclist.lockrw); 1994 1995 mutex_destroy(&port->mca_lock); 1996 mutex_destroy(&port->tx_lock); 1997 cv_destroy(&port->ref_cv); 1998 mutex_destroy(&port->ref_lock); 1999 2000 cv_destroy(&port->state_cv); 2001 mutex_destroy(&port->state_lock); 2002 2003 kmem_free(port, sizeof (vsw_port_t)); 2004 2005 D1(vswp, "%s: exit", __func__); 2006 2007 return (0); 2008 } 2009 2010 /* 2011 * Attach a logical domain channel (ldc) under a specified port. 2012 * 2013 * Returns 0 on success, 1 on failure. 2014 */ 2015 static int 2016 vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id) 2017 { 2018 vsw_t *vswp = port->p_vswp; 2019 vsw_ldc_list_t *ldcl = &port->p_ldclist; 2020 vsw_ldc_t *ldcp = NULL; 2021 ldc_attr_t attr; 2022 ldc_status_t istatus; 2023 int status = DDI_FAILURE; 2024 2025 D1(vswp, "%s: enter", __func__); 2026 2027 ldcp = kmem_zalloc(sizeof (vsw_ldc_t), KM_NOSLEEP); 2028 if (ldcp == NULL) { 2029 DERR(vswp, "%s: kmem_zalloc failed", __func__); 2030 return (1); 2031 } 2032 ldcp->ldc_id = ldc_id; 2033 2034 mutex_init(&ldcp->ldc_txlock, NULL, MUTEX_DRIVER, NULL); 2035 mutex_init(&ldcp->ldc_cblock, NULL, MUTEX_DRIVER, NULL); 2036 mutex_init(&ldcp->drain_cv_lock, NULL, MUTEX_DRIVER, NULL); 2037 cv_init(&ldcp->drain_cv, NULL, CV_DRIVER, NULL); 2038 2039 /* required for handshake with peer */ 2040 ldcp->local_session = (uint64_t)ddi_get_lbolt(); 2041 ldcp->peer_session = 0; 2042 ldcp->session_status = 0; 2043 2044 mutex_init(&ldcp->hss_lock, NULL, MUTEX_DRIVER, NULL); 2045 ldcp->hss_id = 1; /* Initial handshake session id */ 2046 2047 /* only set for outbound lane, inbound set by peer */ 2048 vsw_set_lane_attr(vswp, &ldcp->lane_out); 2049 2050 attr.devclass = LDC_DEV_NT_SVC; 2051 attr.instance = ddi_get_instance(vswp->dip); 2052 attr.mode = LDC_MODE_UNRELIABLE; 2053 attr.qlen = VSW_LDC_QLEN; 2054 status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle); 2055 if (status != 0) { 2056 DERR(vswp, "%s(%lld): ldc_init failed, rv (%d)", 2057 __func__, ldc_id, status); 2058 mutex_destroy(&ldcp->ldc_txlock); 2059 mutex_destroy(&ldcp->ldc_cblock); 2060 cv_destroy(&ldcp->drain_cv); 2061 mutex_destroy(&ldcp->drain_cv_lock); 2062 mutex_destroy(&ldcp->hss_lock); 2063 kmem_free(ldcp, sizeof (vsw_ldc_t)); 2064 return (1); 2065 } 2066 2067 status = ldc_reg_callback(ldcp->ldc_handle, vsw_ldc_cb, (caddr_t)ldcp); 2068 if (status != 0) { 2069 DERR(vswp, "%s(%lld): ldc_reg_callback failed, rv (%d)", 2070 __func__, ldc_id, status); 2071 mutex_destroy(&ldcp->ldc_txlock); 2072 mutex_destroy(&ldcp->ldc_cblock); 2073 cv_destroy(&ldcp->drain_cv); 2074 mutex_destroy(&ldcp->drain_cv_lock); 2075 mutex_destroy(&ldcp->hss_lock); 2076 (void) ldc_fini(ldcp->ldc_handle); 2077 kmem_free(ldcp, sizeof (vsw_ldc_t)); 2078 return (1); 2079 } 2080 2081 2082 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 2083 DERR(vswp, "%s: ldc_status failed", __func__); 2084 return (1); 2085 } 2086 2087 ldcp->ldc_status = istatus; 2088 ldcp->ldc_port = port; 2089 ldcp->ldc_vswp = vswp; 2090 2091 /* link it into the list of channels for this port */ 2092 WRITE_ENTER(&ldcl->lockrw); 2093 ldcp->ldc_next = ldcl->head; 2094 ldcl->head = ldcp; 2095 ldcl->num_ldcs++; 2096 RW_EXIT(&ldcl->lockrw); 2097 2098 D1(vswp, "%s: exit", __func__); 2099 return (0); 2100 } 2101 2102 /* 2103 * Detach a logical domain channel (ldc) belonging to a 2104 * particular port. 2105 * 2106 * Returns 0 on success, 1 on failure. 2107 */ 2108 static int 2109 vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id) 2110 { 2111 vsw_t *vswp = port->p_vswp; 2112 vsw_ldc_t *ldcp, *prev_ldcp; 2113 vsw_ldc_list_t *ldcl = &port->p_ldclist; 2114 int rv; 2115 2116 prev_ldcp = ldcl->head; 2117 for (; (ldcp = prev_ldcp) != NULL; prev_ldcp = ldcp->ldc_next) { 2118 if (ldcp->ldc_id == ldc_id) { 2119 break; 2120 } 2121 } 2122 2123 /* specified ldc id not found */ 2124 if (ldcp == NULL) { 2125 DERR(vswp, "%s: ldcp = NULL", __func__); 2126 return (1); 2127 } 2128 2129 D2(vswp, "%s: detaching channel %lld", __func__, ldcp->ldc_id); 2130 2131 /* 2132 * Before we can close the channel we must release any mapped 2133 * resources (e.g. drings). 2134 */ 2135 vsw_free_lane_resources(ldcp, INBOUND); 2136 vsw_free_lane_resources(ldcp, OUTBOUND); 2137 2138 /* 2139 * If the close fails we are in serious trouble, as won't 2140 * be able to delete the parent port. 2141 */ 2142 if ((rv = ldc_close(ldcp->ldc_handle)) != 0) { 2143 DERR(vswp, "%s: error %d closing channel %lld", 2144 __func__, rv, ldcp->ldc_id); 2145 return (1); 2146 } 2147 2148 (void) ldc_fini(ldcp->ldc_handle); 2149 2150 ldcp->ldc_status = LDC_INIT; 2151 ldcp->ldc_handle = NULL; 2152 ldcp->ldc_vswp = NULL; 2153 mutex_destroy(&ldcp->ldc_txlock); 2154 mutex_destroy(&ldcp->ldc_cblock); 2155 cv_destroy(&ldcp->drain_cv); 2156 mutex_destroy(&ldcp->drain_cv_lock); 2157 mutex_destroy(&ldcp->hss_lock); 2158 2159 /* unlink it from the list */ 2160 prev_ldcp = ldcp->ldc_next; 2161 ldcl->num_ldcs--; 2162 kmem_free(ldcp, sizeof (vsw_ldc_t)); 2163 2164 return (0); 2165 } 2166 2167 /* 2168 * Open and attempt to bring up the channel. Note that channel 2169 * can only be brought up if peer has also opened channel. 2170 * 2171 * Returns 0 if can open and bring up channel, otherwise 2172 * returns 1. 2173 */ 2174 static int 2175 vsw_ldc_init(vsw_ldc_t *ldcp) 2176 { 2177 vsw_t *vswp = ldcp->ldc_vswp; 2178 ldc_status_t istatus = 0; 2179 int rv; 2180 2181 D1(vswp, "%s: enter", __func__); 2182 2183 LDC_ENTER_LOCK(ldcp); 2184 2185 /* don't start at 0 in case clients don't like that */ 2186 ldcp->next_ident = 1; 2187 2188 rv = ldc_open(ldcp->ldc_handle); 2189 if (rv != 0) { 2190 DERR(vswp, "%s: ldc_open failed: id(%lld) rv(%d)", 2191 __func__, ldcp->ldc_id, rv); 2192 LDC_EXIT_LOCK(ldcp); 2193 return (1); 2194 } 2195 2196 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 2197 DERR(vswp, "%s: unable to get status", __func__); 2198 LDC_EXIT_LOCK(ldcp); 2199 return (1); 2200 2201 } else if (istatus != LDC_OPEN && istatus != LDC_READY) { 2202 DERR(vswp, "%s: id (%lld) status(%d) is not OPEN/READY", 2203 __func__, ldcp->ldc_id, istatus); 2204 LDC_EXIT_LOCK(ldcp); 2205 return (1); 2206 } 2207 2208 ldcp->ldc_status = istatus; 2209 rv = ldc_up(ldcp->ldc_handle); 2210 if (rv != 0) { 2211 /* 2212 * Not a fatal error for ldc_up() to fail, as peer 2213 * end point may simply not be ready yet. 2214 */ 2215 D2(vswp, "%s: ldc_up err id(%lld) rv(%d)", __func__, 2216 ldcp->ldc_id, rv); 2217 LDC_EXIT_LOCK(ldcp); 2218 return (1); 2219 } 2220 2221 /* 2222 * ldc_up() call is non-blocking so need to explicitly 2223 * check channel status to see if in fact the channel 2224 * is UP. 2225 */ 2226 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 2227 DERR(vswp, "%s: unable to get status", __func__); 2228 LDC_EXIT_LOCK(ldcp); 2229 return (1); 2230 2231 } else if (istatus != LDC_UP) { 2232 DERR(vswp, "%s: id(%lld) status(%d) is not UP", 2233 __func__, ldcp->ldc_id, istatus); 2234 } else { 2235 ldcp->ldc_status = istatus; 2236 } 2237 2238 LDC_EXIT_LOCK(ldcp); 2239 2240 D1(vswp, "%s: exit", __func__); 2241 return (0); 2242 } 2243 2244 /* disable callbacks on the channel */ 2245 static int 2246 vsw_ldc_uninit(vsw_ldc_t *ldcp) 2247 { 2248 vsw_t *vswp = ldcp->ldc_vswp; 2249 int rv; 2250 2251 D1(vswp, "vsw_ldc_uninit: enter: id(%lx)\n", ldcp->ldc_id); 2252 2253 LDC_ENTER_LOCK(ldcp); 2254 2255 rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE); 2256 if (rv != 0) { 2257 DERR(vswp, "vsw_ldc_uninit(%lld): error disabling " 2258 "interrupts (rv = %d)\n", ldcp->ldc_id, rv); 2259 LDC_EXIT_LOCK(ldcp); 2260 return (1); 2261 } 2262 2263 ldcp->ldc_status = LDC_INIT; 2264 2265 LDC_EXIT_LOCK(ldcp); 2266 2267 D1(vswp, "vsw_ldc_uninit: exit: id(%lx)", ldcp->ldc_id); 2268 2269 return (0); 2270 } 2271 2272 static int 2273 vsw_init_ldcs(vsw_port_t *port) 2274 { 2275 vsw_ldc_list_t *ldcl = &port->p_ldclist; 2276 vsw_ldc_t *ldcp; 2277 2278 READ_ENTER(&ldcl->lockrw); 2279 ldcp = ldcl->head; 2280 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 2281 (void) vsw_ldc_init(ldcp); 2282 } 2283 RW_EXIT(&ldcl->lockrw); 2284 2285 return (0); 2286 } 2287 2288 static int 2289 vsw_uninit_ldcs(vsw_port_t *port) 2290 { 2291 vsw_ldc_list_t *ldcl = &port->p_ldclist; 2292 vsw_ldc_t *ldcp; 2293 2294 D1(NULL, "vsw_uninit_ldcs: enter\n"); 2295 2296 READ_ENTER(&ldcl->lockrw); 2297 ldcp = ldcl->head; 2298 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 2299 (void) vsw_ldc_uninit(ldcp); 2300 } 2301 RW_EXIT(&ldcl->lockrw); 2302 2303 D1(NULL, "vsw_uninit_ldcs: exit\n"); 2304 2305 return (0); 2306 } 2307 2308 /* 2309 * Wait until the callback(s) associated with the ldcs under the specified 2310 * port have completed. 2311 * 2312 * Prior to this function being invoked each channel under this port 2313 * should have been quiesced via ldc_set_cb_mode(DISABLE). 2314 * 2315 * A short explaination of what we are doing below.. 2316 * 2317 * The simplest approach would be to have a reference counter in 2318 * the ldc structure which is increment/decremented by the callbacks as 2319 * they use the channel. The drain function could then simply disable any 2320 * further callbacks and do a cv_wait for the ref to hit zero. Unfortunately 2321 * there is a tiny window here - before the callback is able to get the lock 2322 * on the channel it is interrupted and this function gets to execute. It 2323 * sees that the ref count is zero and believes its free to delete the 2324 * associated data structures. 2325 * 2326 * We get around this by taking advantage of the fact that before the ldc 2327 * framework invokes a callback it sets a flag to indicate that there is a 2328 * callback active (or about to become active). If when we attempt to 2329 * unregister a callback when this active flag is set then the unregister 2330 * will fail with EWOULDBLOCK. 2331 * 2332 * If the unregister fails we do a cv_timedwait. We will either be signaled 2333 * by the callback as it is exiting (note we have to wait a short period to 2334 * allow the callback to return fully to the ldc framework and it to clear 2335 * the active flag), or by the timer expiring. In either case we again attempt 2336 * the unregister. We repeat this until we can succesfully unregister the 2337 * callback. 2338 * 2339 * The reason we use a cv_timedwait rather than a simple cv_wait is to catch 2340 * the case where the callback has finished but the ldc framework has not yet 2341 * cleared the active flag. In this case we would never get a cv_signal. 2342 */ 2343 static int 2344 vsw_drain_ldcs(vsw_port_t *port) 2345 { 2346 vsw_ldc_list_t *ldcl = &port->p_ldclist; 2347 vsw_ldc_t *ldcp; 2348 vsw_t *vswp = port->p_vswp; 2349 2350 D1(vswp, "%s: enter", __func__); 2351 2352 READ_ENTER(&ldcl->lockrw); 2353 2354 ldcp = ldcl->head; 2355 2356 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 2357 /* 2358 * If we can unregister the channel callback then we 2359 * know that there is no callback either running or 2360 * scheduled to run for this channel so move on to next 2361 * channel in the list. 2362 */ 2363 mutex_enter(&ldcp->drain_cv_lock); 2364 2365 /* prompt active callbacks to quit */ 2366 ldcp->drain_state = VSW_LDC_DRAINING; 2367 2368 if ((ldc_unreg_callback(ldcp->ldc_handle)) == 0) { 2369 D2(vswp, "%s: unreg callback for chan %ld", __func__, 2370 ldcp->ldc_id); 2371 mutex_exit(&ldcp->drain_cv_lock); 2372 continue; 2373 } else { 2374 /* 2375 * If we end up here we know that either 1) a callback 2376 * is currently executing, 2) is about to start (i.e. 2377 * the ldc framework has set the active flag but 2378 * has not actually invoked the callback yet, or 3) 2379 * has finished and has returned to the ldc framework 2380 * but the ldc framework has not yet cleared the 2381 * active bit. 2382 * 2383 * Wait for it to finish. 2384 */ 2385 while (ldc_unreg_callback(ldcp->ldc_handle) 2386 == EWOULDBLOCK) 2387 (void) cv_timedwait(&ldcp->drain_cv, 2388 &ldcp->drain_cv_lock, lbolt + hz); 2389 2390 mutex_exit(&ldcp->drain_cv_lock); 2391 D2(vswp, "%s: unreg callback for chan %ld after " 2392 "timeout", __func__, ldcp->ldc_id); 2393 } 2394 } 2395 RW_EXIT(&ldcl->lockrw); 2396 2397 D1(vswp, "%s: exit", __func__); 2398 return (0); 2399 } 2400 2401 /* 2402 * Wait until all tasks which reference this port have completed. 2403 * 2404 * Prior to this function being invoked each channel under this port 2405 * should have been quiesced via ldc_set_cb_mode(DISABLE). 2406 */ 2407 static int 2408 vsw_drain_port_taskq(vsw_port_t *port) 2409 { 2410 vsw_t *vswp = port->p_vswp; 2411 2412 D1(vswp, "%s: enter", __func__); 2413 2414 /* 2415 * Mark the port as in the process of being detached, and 2416 * dispatch a marker task to the queue so we know when all 2417 * relevant tasks have completed. 2418 */ 2419 mutex_enter(&port->state_lock); 2420 port->state = VSW_PORT_DETACHING; 2421 2422 if ((vswp->taskq_p == NULL) || 2423 (ddi_taskq_dispatch(vswp->taskq_p, vsw_marker_task, 2424 port, DDI_NOSLEEP) != DDI_SUCCESS)) { 2425 DERR(vswp, "%s: unable to dispatch marker task", 2426 __func__); 2427 mutex_exit(&port->state_lock); 2428 return (1); 2429 } 2430 2431 /* 2432 * Wait for the marker task to finish. 2433 */ 2434 while (port->state != VSW_PORT_DETACHABLE) 2435 cv_wait(&port->state_cv, &port->state_lock); 2436 2437 mutex_exit(&port->state_lock); 2438 2439 D1(vswp, "%s: exit", __func__); 2440 2441 return (0); 2442 } 2443 2444 static void 2445 vsw_marker_task(void *arg) 2446 { 2447 vsw_port_t *port = arg; 2448 vsw_t *vswp = port->p_vswp; 2449 2450 D1(vswp, "%s: enter", __func__); 2451 2452 mutex_enter(&port->state_lock); 2453 2454 /* 2455 * No further tasks should be dispatched which reference 2456 * this port so ok to mark it as safe to detach. 2457 */ 2458 port->state = VSW_PORT_DETACHABLE; 2459 2460 cv_signal(&port->state_cv); 2461 2462 mutex_exit(&port->state_lock); 2463 2464 D1(vswp, "%s: exit", __func__); 2465 } 2466 2467 static vsw_port_t * 2468 vsw_lookup_port(vsw_t *vswp, int p_instance) 2469 { 2470 vsw_port_list_t *plist = &vswp->plist; 2471 vsw_port_t *port; 2472 2473 for (port = plist->head; port != NULL; port = port->p_next) { 2474 if (port->p_instance == p_instance) { 2475 D2(vswp, "vsw_lookup_port: found p_instance\n"); 2476 return (port); 2477 } 2478 } 2479 2480 return (NULL); 2481 } 2482 2483 /* 2484 * Search for and remove the specified port from the port 2485 * list. Returns 0 if able to locate and remove port, otherwise 2486 * returns 1. 2487 */ 2488 static int 2489 vsw_plist_del_node(vsw_t *vswp, vsw_port_t *port) 2490 { 2491 vsw_port_list_t *plist = &vswp->plist; 2492 vsw_port_t *curr_p, *prev_p; 2493 2494 if (plist->head == NULL) 2495 return (1); 2496 2497 curr_p = prev_p = plist->head; 2498 2499 while (curr_p != NULL) { 2500 if (curr_p == port) { 2501 if (prev_p == curr_p) { 2502 plist->head = curr_p->p_next; 2503 } else { 2504 prev_p->p_next = curr_p->p_next; 2505 } 2506 plist->num_ports--; 2507 break; 2508 } else { 2509 prev_p = curr_p; 2510 curr_p = curr_p->p_next; 2511 } 2512 } 2513 return (0); 2514 } 2515 2516 /* 2517 * Interrupt handler for ldc messages. 2518 */ 2519 static uint_t 2520 vsw_ldc_cb(uint64_t event, caddr_t arg) 2521 { 2522 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 2523 vsw_t *vswp = ldcp->ldc_vswp; 2524 ldc_status_t lstatus; 2525 int rv; 2526 2527 D1(vswp, "%s: enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 2528 2529 mutex_enter(&ldcp->ldc_cblock); 2530 2531 if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) { 2532 mutex_exit(&ldcp->ldc_cblock); 2533 return (LDC_SUCCESS); 2534 } 2535 2536 if (event & LDC_EVT_UP) { 2537 /* 2538 * Channel has come up, get the state and then start 2539 * the handshake. 2540 */ 2541 rv = ldc_status(ldcp->ldc_handle, &lstatus); 2542 if (rv != 0) { 2543 cmn_err(CE_WARN, "Unable to read channel state"); 2544 } 2545 ldcp->ldc_status = lstatus; 2546 2547 D2(vswp, "%s: id(%ld) event(%llx) UP: status(%ld)", 2548 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 2549 2550 vsw_restart_handshake(ldcp); 2551 2552 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 2553 } 2554 2555 if (event & LDC_EVT_READ) { 2556 /* 2557 * Data available for reading. 2558 */ 2559 D2(vswp, "%s: id(ld) event(%llx) data READ", 2560 __func__, ldcp->ldc_id, event); 2561 2562 vsw_process_pkt(ldcp); 2563 2564 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 2565 2566 goto vsw_cb_exit; 2567 } 2568 2569 if (event & LDC_EVT_RESET) { 2570 rv = ldc_status(ldcp->ldc_handle, &lstatus); 2571 if (rv != 0) { 2572 cmn_err(CE_WARN, "Unable to read channel state"); 2573 } else { 2574 ldcp->ldc_status = lstatus; 2575 } 2576 D2(vswp, "%s: id(%ld) event(%llx) RESET: status (%ld)", 2577 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 2578 } 2579 2580 if (event & LDC_EVT_DOWN) { 2581 rv = ldc_status(ldcp->ldc_handle, &lstatus); 2582 if (rv != 0) { 2583 cmn_err(CE_WARN, "Unable to read channel state"); 2584 } else { 2585 ldcp->ldc_status = lstatus; 2586 } 2587 2588 D2(vswp, "%s: id(%ld) event(%llx) DOWN: status (%ld)", 2589 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 2590 2591 } 2592 2593 /* 2594 * Catch either LDC_EVT_WRITE which we don't support or any 2595 * unknown event. 2596 */ 2597 if (event & ~(LDC_EVT_UP | LDC_EVT_RESET 2598 | LDC_EVT_DOWN | LDC_EVT_READ)) { 2599 2600 DERR(vswp, "%s: id(%ld) Unexpected event=(%llx) status(%ld)", 2601 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 2602 } 2603 2604 vsw_cb_exit: 2605 mutex_exit(&ldcp->ldc_cblock); 2606 2607 /* 2608 * Let the drain function know we are finishing if it 2609 * is waiting. 2610 */ 2611 mutex_enter(&ldcp->drain_cv_lock); 2612 if (ldcp->drain_state == VSW_LDC_DRAINING) 2613 cv_signal(&ldcp->drain_cv); 2614 mutex_exit(&ldcp->drain_cv_lock); 2615 2616 return (LDC_SUCCESS); 2617 } 2618 2619 /* 2620 * (Re)start a handshake with our peer by sending them 2621 * our version info. 2622 */ 2623 static void 2624 vsw_restart_handshake(vsw_ldc_t *ldcp) 2625 { 2626 vsw_t *vswp = ldcp->ldc_vswp; 2627 vsw_port_t *port; 2628 vsw_ldc_list_t *ldcl; 2629 2630 D1(vswp, "vsw_restart_handshake: enter"); 2631 2632 port = ldcp->ldc_port; 2633 ldcl = &port->p_ldclist; 2634 2635 WRITE_ENTER(&ldcl->lockrw); 2636 2637 D2(vswp, "%s: in 0x%llx : out 0x%llx", __func__, 2638 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 2639 2640 vsw_free_lane_resources(ldcp, INBOUND); 2641 vsw_free_lane_resources(ldcp, OUTBOUND); 2642 RW_EXIT(&ldcl->lockrw); 2643 2644 ldcp->lane_in.lstate = 0; 2645 ldcp->lane_out.lstate = 0; 2646 2647 /* 2648 * Remove parent port from any multicast groups 2649 * it may have registered with. Client must resend 2650 * multicast add command after handshake completes. 2651 */ 2652 (void) vsw_del_fdb(vswp, port); 2653 2654 vsw_del_mcst_port(port); 2655 2656 ldcp->hphase = VSW_MILESTONE0; 2657 2658 ldcp->peer_session = 0; 2659 ldcp->session_status = 0; 2660 2661 /* 2662 * We now increment the transaction group id. This allows 2663 * us to identify and disard any tasks which are still pending 2664 * on the taskq and refer to the handshake session we are about 2665 * to restart. These stale messages no longer have any real 2666 * meaning. 2667 */ 2668 mutex_enter(&ldcp->hss_lock); 2669 ldcp->hss_id++; 2670 mutex_exit(&ldcp->hss_lock); 2671 2672 if (ldcp->hcnt++ > vsw_num_handshakes) { 2673 cmn_err(CE_WARN, "exceeded number of permitted " 2674 "handshake attempts (%d) on channel %ld", 2675 ldcp->hcnt, ldcp->ldc_id); 2676 return; 2677 } 2678 2679 vsw_send_ver(ldcp); 2680 2681 D1(vswp, "vsw_restart_handshake: exit"); 2682 } 2683 2684 /* 2685 * returns 0 if legal for event signified by flag to have 2686 * occured at the time it did. Otherwise returns 1. 2687 */ 2688 int 2689 vsw_check_flag(vsw_ldc_t *ldcp, int dir, uint64_t flag) 2690 { 2691 vsw_t *vswp = ldcp->ldc_vswp; 2692 uint64_t state; 2693 uint64_t phase; 2694 2695 if (dir == INBOUND) 2696 state = ldcp->lane_in.lstate; 2697 else 2698 state = ldcp->lane_out.lstate; 2699 2700 phase = ldcp->hphase; 2701 2702 switch (flag) { 2703 case VSW_VER_INFO_RECV: 2704 if (phase > VSW_MILESTONE0) { 2705 DERR(vswp, "vsw_check_flag (%d): VER_INFO_RECV" 2706 " when in state %d\n", ldcp->ldc_id, phase); 2707 vsw_restart_handshake(ldcp); 2708 return (1); 2709 } 2710 break; 2711 2712 case VSW_VER_ACK_RECV: 2713 case VSW_VER_NACK_RECV: 2714 if (!(state & VSW_VER_INFO_SENT)) { 2715 DERR(vswp, "vsw_check_flag (%d): spurious VER_ACK" 2716 " or VER_NACK when in state %d\n", 2717 ldcp->ldc_id, phase); 2718 vsw_restart_handshake(ldcp); 2719 return (1); 2720 } else 2721 state &= ~VSW_VER_INFO_SENT; 2722 break; 2723 2724 case VSW_ATTR_INFO_RECV: 2725 if ((phase < VSW_MILESTONE1) || (phase >= VSW_MILESTONE2)) { 2726 DERR(vswp, "vsw_check_flag (%d): ATTR_INFO_RECV" 2727 " when in state %d\n", ldcp->ldc_id, phase); 2728 vsw_restart_handshake(ldcp); 2729 return (1); 2730 } 2731 break; 2732 2733 case VSW_ATTR_ACK_RECV: 2734 case VSW_ATTR_NACK_RECV: 2735 if (!(state & VSW_ATTR_INFO_SENT)) { 2736 DERR(vswp, "vsw_check_flag (%d): spurious ATTR_ACK" 2737 " or ATTR_NACK when in state %d\n", 2738 ldcp->ldc_id, phase); 2739 vsw_restart_handshake(ldcp); 2740 return (1); 2741 } else 2742 state &= ~VSW_ATTR_INFO_SENT; 2743 break; 2744 2745 case VSW_DRING_INFO_RECV: 2746 if (phase < VSW_MILESTONE1) { 2747 DERR(vswp, "vsw_check_flag (%d): DRING_INFO_RECV" 2748 " when in state %d\n", ldcp->ldc_id, phase); 2749 vsw_restart_handshake(ldcp); 2750 return (1); 2751 } 2752 break; 2753 2754 case VSW_DRING_ACK_RECV: 2755 case VSW_DRING_NACK_RECV: 2756 if (!(state & VSW_DRING_INFO_SENT)) { 2757 DERR(vswp, "vsw_check_flag (%d): spurious DRING_ACK" 2758 " or DRING_NACK when in state %d\n", 2759 ldcp->ldc_id, phase); 2760 vsw_restart_handshake(ldcp); 2761 return (1); 2762 } else 2763 state &= ~VSW_DRING_INFO_SENT; 2764 break; 2765 2766 case VSW_RDX_INFO_RECV: 2767 if (phase < VSW_MILESTONE3) { 2768 DERR(vswp, "vsw_check_flag (%d): RDX_INFO_RECV" 2769 " when in state %d\n", ldcp->ldc_id, phase); 2770 vsw_restart_handshake(ldcp); 2771 return (1); 2772 } 2773 break; 2774 2775 case VSW_RDX_ACK_RECV: 2776 case VSW_RDX_NACK_RECV: 2777 if (!(state & VSW_RDX_INFO_SENT)) { 2778 DERR(vswp, "vsw_check_flag (%d): spurious RDX_ACK" 2779 " or RDX_NACK when in state %d\n", 2780 ldcp->ldc_id, phase); 2781 vsw_restart_handshake(ldcp); 2782 return (1); 2783 } else 2784 state &= ~VSW_RDX_INFO_SENT; 2785 break; 2786 2787 case VSW_MCST_INFO_RECV: 2788 if (phase < VSW_MILESTONE3) { 2789 DERR(vswp, "vsw_check_flag (%d): VSW_MCST_INFO_RECV" 2790 " when in state %d\n", ldcp->ldc_id, phase); 2791 vsw_restart_handshake(ldcp); 2792 return (1); 2793 } 2794 break; 2795 2796 default: 2797 DERR(vswp, "vsw_check_flag (%lld): unknown flag (%llx)", 2798 ldcp->ldc_id, flag); 2799 return (1); 2800 } 2801 2802 if (dir == INBOUND) 2803 ldcp->lane_in.lstate = state; 2804 else 2805 ldcp->lane_out.lstate = state; 2806 2807 D1(vswp, "vsw_check_flag (chan %lld): exit", ldcp->ldc_id); 2808 2809 return (0); 2810 } 2811 2812 void 2813 vsw_next_milestone(vsw_ldc_t *ldcp) 2814 { 2815 vsw_t *vswp = ldcp->ldc_vswp; 2816 2817 D1(vswp, "%s (chan %lld): enter (phase %ld)", __func__, 2818 ldcp->ldc_id, ldcp->hphase); 2819 2820 DUMP_FLAGS(ldcp->lane_in.lstate); 2821 DUMP_FLAGS(ldcp->lane_out.lstate); 2822 2823 switch (ldcp->hphase) { 2824 2825 case VSW_MILESTONE0: 2826 /* 2827 * If we haven't started to handshake with our peer, 2828 * start to do so now. 2829 */ 2830 if (ldcp->lane_out.lstate == 0) { 2831 D2(vswp, "%s: (chan %lld) starting handshake " 2832 "with peer", __func__, ldcp->ldc_id); 2833 vsw_restart_handshake(ldcp); 2834 } 2835 2836 /* 2837 * Only way to pass this milestone is to have successfully 2838 * negotiated version info. 2839 */ 2840 if ((ldcp->lane_in.lstate & VSW_VER_ACK_SENT) && 2841 (ldcp->lane_out.lstate & VSW_VER_ACK_RECV)) { 2842 2843 D2(vswp, "%s: (chan %lld) leaving milestone 0", 2844 __func__, ldcp->ldc_id); 2845 2846 /* 2847 * Next milestone is passed when attribute 2848 * information has been successfully exchanged. 2849 */ 2850 ldcp->hphase = VSW_MILESTONE1; 2851 vsw_send_attr(ldcp); 2852 2853 } 2854 break; 2855 2856 case VSW_MILESTONE1: 2857 /* 2858 * Only way to pass this milestone is to have successfully 2859 * negotiated attribute information. 2860 */ 2861 if (ldcp->lane_in.lstate & VSW_ATTR_ACK_SENT) { 2862 2863 ldcp->hphase = VSW_MILESTONE2; 2864 2865 /* 2866 * If the peer device has said it wishes to 2867 * use descriptor rings then we send it our ring 2868 * info, otherwise we just set up a private ring 2869 * which we use an internal buffer 2870 */ 2871 if (ldcp->lane_in.xfer_mode == VIO_DRING_MODE) 2872 vsw_send_dring_info(ldcp); 2873 } 2874 break; 2875 2876 2877 case VSW_MILESTONE2: 2878 /* 2879 * If peer has indicated in its attribute message that 2880 * it wishes to use descriptor rings then the only way 2881 * to pass this milestone is for us to have received 2882 * valid dring info. 2883 * 2884 * If peer is not using descriptor rings then just fall 2885 * through. 2886 */ 2887 if ((ldcp->lane_in.xfer_mode == VIO_DRING_MODE) && 2888 (!(ldcp->lane_in.lstate & VSW_DRING_ACK_SENT))) 2889 break; 2890 2891 D2(vswp, "%s: (chan %lld) leaving milestone 2", 2892 __func__, ldcp->ldc_id); 2893 2894 ldcp->hphase = VSW_MILESTONE3; 2895 vsw_send_rdx(ldcp); 2896 break; 2897 2898 case VSW_MILESTONE3: 2899 /* 2900 * Pass this milestone when all paramaters have been 2901 * successfully exchanged and RDX sent in both directions. 2902 * 2903 * Mark outbound lane as available to transmit data. 2904 */ 2905 if ((ldcp->lane_in.lstate & VSW_RDX_ACK_SENT) && 2906 (ldcp->lane_out.lstate & VSW_RDX_ACK_RECV)) { 2907 2908 D2(vswp, "%s: (chan %lld) leaving milestone 3", 2909 __func__, ldcp->ldc_id); 2910 D2(vswp, "%s: ** handshake complete **", __func__); 2911 ldcp->lane_out.lstate |= VSW_LANE_ACTIVE; 2912 ldcp->hphase = VSW_MILESTONE4; 2913 ldcp->hcnt = 0; 2914 DISPLAY_STATE(); 2915 } 2916 break; 2917 2918 case VSW_MILESTONE4: 2919 D2(vswp, "%s: (chan %lld) in milestone 4", __func__, 2920 ldcp->ldc_id); 2921 break; 2922 2923 default: 2924 DERR(vswp, "%s: (chan %lld) Unknown Phase %x", __func__, 2925 ldcp->ldc_id, ldcp->hphase); 2926 } 2927 2928 D1(vswp, "%s (chan %lld): exit (phase %ld)", __func__, ldcp->ldc_id, 2929 ldcp->hphase); 2930 } 2931 2932 /* 2933 * Check if major version is supported. 2934 * 2935 * Returns 0 if finds supported major number, and if necessary 2936 * adjusts the minor field. 2937 * 2938 * Returns 1 if can't match major number exactly. Sets mjor/minor 2939 * to next lowest support values, or to zero if no other values possible. 2940 */ 2941 static int 2942 vsw_supported_version(vio_ver_msg_t *vp) 2943 { 2944 int i; 2945 2946 D1(NULL, "vsw_supported_version: enter"); 2947 2948 for (i = 0; i < VSW_NUM_VER; i++) { 2949 if (vsw_versions[i].ver_major == vp->ver_major) { 2950 /* 2951 * Matching or lower major version found. Update 2952 * minor number if necessary. 2953 */ 2954 if (vp->ver_minor > vsw_versions[i].ver_minor) { 2955 D2(NULL, "%s: adjusting minor value" 2956 " from %d to %d", __func__, 2957 vp->ver_minor, 2958 vsw_versions[i].ver_minor); 2959 vp->ver_minor = vsw_versions[i].ver_minor; 2960 } 2961 2962 return (0); 2963 } 2964 2965 if (vsw_versions[i].ver_major < vp->ver_major) { 2966 if (vp->ver_minor > vsw_versions[i].ver_minor) { 2967 D2(NULL, "%s: adjusting minor value" 2968 " from %d to %d", __func__, 2969 vp->ver_minor, 2970 vsw_versions[i].ver_minor); 2971 vp->ver_minor = vsw_versions[i].ver_minor; 2972 } 2973 return (1); 2974 } 2975 } 2976 2977 /* No match was possible, zero out fields */ 2978 vp->ver_major = 0; 2979 vp->ver_minor = 0; 2980 2981 D1(NULL, "vsw_supported_version: exit"); 2982 2983 return (1); 2984 } 2985 2986 /* 2987 * Main routine for processing messages received over LDC. 2988 */ 2989 static void 2990 vsw_process_pkt(void *arg) 2991 { 2992 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 2993 vsw_t *vswp = ldcp->ldc_vswp; 2994 size_t msglen; 2995 vio_msg_tag_t tag; 2996 def_msg_t dmsg; 2997 int rv = 0; 2998 2999 D1(vswp, "%s enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 3000 3001 /* 3002 * If channel is up read messages until channel is empty. 3003 */ 3004 do { 3005 msglen = sizeof (dmsg); 3006 rv = ldc_read(ldcp->ldc_handle, (caddr_t)&dmsg, &msglen); 3007 3008 if (rv != 0) { 3009 DERR(vswp, "%s :ldc_read err id(%lld) rv(%d) " 3010 "len(%d)\n", __func__, ldcp->ldc_id, 3011 rv, msglen); 3012 break; 3013 } 3014 3015 if (msglen == 0) { 3016 D2(vswp, "%s: ldc_read id(%lld) NODATA", __func__, 3017 ldcp->ldc_id); 3018 break; 3019 } 3020 3021 D2(vswp, "%s: ldc_read id(%lld): msglen(%d)", __func__, 3022 ldcp->ldc_id, msglen); 3023 3024 /* 3025 * Figure out what sort of packet we have gotten by 3026 * examining the msg tag, and then switch it appropriately. 3027 */ 3028 bcopy(&dmsg, &tag, sizeof (vio_msg_tag_t)); 3029 3030 switch (tag.vio_msgtype) { 3031 case VIO_TYPE_CTRL: 3032 vsw_dispatch_ctrl_task(ldcp, &dmsg, tag); 3033 break; 3034 case VIO_TYPE_DATA: 3035 vsw_process_data_pkt(ldcp, &dmsg, tag); 3036 break; 3037 case VIO_TYPE_ERR: 3038 vsw_process_err_pkt(ldcp, &dmsg, tag); 3039 break; 3040 default: 3041 DERR(vswp, "%s: Unknown tag(%lx) ", __func__, 3042 "id(%lx)\n", tag.vio_msgtype, ldcp->ldc_id); 3043 break; 3044 } 3045 } while (msglen); 3046 3047 D1(vswp, "%s exit: ldcid (%lld)\n", __func__, ldcp->ldc_id); 3048 } 3049 3050 /* 3051 * Dispatch a task to process a VIO control message. 3052 */ 3053 static void 3054 vsw_dispatch_ctrl_task(vsw_ldc_t *ldcp, void *cpkt, vio_msg_tag_t tag) 3055 { 3056 vsw_ctrl_task_t *ctaskp = NULL; 3057 vsw_port_t *port = ldcp->ldc_port; 3058 vsw_t *vswp = port->p_vswp; 3059 3060 D1(vswp, "%s: enter", __func__); 3061 3062 /* 3063 * We need to handle RDX ACK messages in-band as once they 3064 * are exchanged it is possible that we will get an 3065 * immediate (legitimate) data packet. 3066 */ 3067 if ((tag.vio_subtype_env == VIO_RDX) && 3068 (tag.vio_subtype == VIO_SUBTYPE_ACK)) { 3069 if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_ACK_RECV)) 3070 return; 3071 3072 ldcp->lane_out.lstate |= VSW_RDX_ACK_RECV; 3073 vsw_next_milestone(ldcp); 3074 D2(vswp, "%s (%ld) handling RDX_ACK in place", __func__, 3075 ldcp->ldc_id); 3076 return; 3077 } 3078 3079 ctaskp = kmem_alloc(sizeof (vsw_ctrl_task_t), KM_NOSLEEP); 3080 3081 if (ctaskp == NULL) { 3082 DERR(vswp, "%s: unable to alloc space for ctrl" 3083 " msg", __func__); 3084 vsw_restart_handshake(ldcp); 3085 return; 3086 } 3087 3088 ctaskp->ldcp = ldcp; 3089 bcopy((def_msg_t *)cpkt, &ctaskp->pktp, sizeof (def_msg_t)); 3090 mutex_enter(&ldcp->hss_lock); 3091 ctaskp->hss_id = ldcp->hss_id; 3092 mutex_exit(&ldcp->hss_lock); 3093 3094 /* 3095 * Dispatch task to processing taskq if port is not in 3096 * the process of being detached. 3097 */ 3098 mutex_enter(&port->state_lock); 3099 if (port->state == VSW_PORT_INIT) { 3100 if ((vswp->taskq_p == NULL) || 3101 (ddi_taskq_dispatch(vswp->taskq_p, 3102 vsw_process_ctrl_pkt, ctaskp, DDI_NOSLEEP) 3103 != DDI_SUCCESS)) { 3104 DERR(vswp, "%s: unable to dispatch task to taskq", 3105 __func__); 3106 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 3107 mutex_exit(&port->state_lock); 3108 vsw_restart_handshake(ldcp); 3109 return; 3110 } 3111 } else { 3112 DWARN(vswp, "%s: port %d detaching, not dispatching " 3113 "task", __func__, port->p_instance); 3114 } 3115 3116 mutex_exit(&port->state_lock); 3117 3118 D2(vswp, "%s: dispatched task to taskq for chan %d", __func__, 3119 ldcp->ldc_id); 3120 D1(vswp, "%s: exit", __func__); 3121 } 3122 3123 /* 3124 * Process a VIO ctrl message. Invoked from taskq. 3125 */ 3126 static void 3127 vsw_process_ctrl_pkt(void *arg) 3128 { 3129 vsw_ctrl_task_t *ctaskp = (vsw_ctrl_task_t *)arg; 3130 vsw_ldc_t *ldcp = ctaskp->ldcp; 3131 vsw_t *vswp = ldcp->ldc_vswp; 3132 vio_msg_tag_t tag; 3133 uint16_t env; 3134 3135 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3136 3137 bcopy(&ctaskp->pktp, &tag, sizeof (vio_msg_tag_t)); 3138 env = tag.vio_subtype_env; 3139 3140 /* stale pkt check */ 3141 mutex_enter(&ldcp->hss_lock); 3142 if (ctaskp->hss_id < ldcp->hss_id) { 3143 DWARN(vswp, "%s: discarding stale packet belonging to" 3144 " earlier (%ld) handshake session", __func__, 3145 ctaskp->hss_id); 3146 mutex_exit(&ldcp->hss_lock); 3147 return; 3148 } 3149 mutex_exit(&ldcp->hss_lock); 3150 3151 /* session id check */ 3152 if (ldcp->session_status & VSW_PEER_SESSION) { 3153 if (ldcp->peer_session != tag.vio_sid) { 3154 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 3155 __func__, ldcp->ldc_id, tag.vio_sid); 3156 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 3157 vsw_restart_handshake(ldcp); 3158 return; 3159 } 3160 } 3161 3162 /* 3163 * Switch on vio_subtype envelope, then let lower routines 3164 * decide if its an INFO, ACK or NACK packet. 3165 */ 3166 switch (env) { 3167 case VIO_VER_INFO: 3168 vsw_process_ctrl_ver_pkt(ldcp, &ctaskp->pktp); 3169 break; 3170 case VIO_DRING_REG: 3171 vsw_process_ctrl_dring_reg_pkt(ldcp, &ctaskp->pktp); 3172 break; 3173 case VIO_DRING_UNREG: 3174 vsw_process_ctrl_dring_unreg_pkt(ldcp, &ctaskp->pktp); 3175 break; 3176 case VIO_ATTR_INFO: 3177 vsw_process_ctrl_attr_pkt(ldcp, &ctaskp->pktp); 3178 break; 3179 case VNET_MCAST_INFO: 3180 vsw_process_ctrl_mcst_pkt(ldcp, &ctaskp->pktp); 3181 break; 3182 case VIO_RDX: 3183 vsw_process_ctrl_rdx_pkt(ldcp, &ctaskp->pktp); 3184 break; 3185 default: 3186 DERR(vswp, "%s : unknown vio_subtype_env (%x)\n", 3187 __func__, env); 3188 } 3189 3190 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 3191 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3192 } 3193 3194 /* 3195 * Version negotiation. We can end up here either because our peer 3196 * has responded to a handshake message we have sent it, or our peer 3197 * has initiated a handshake with us. If its the former then can only 3198 * be ACK or NACK, if its the later can only be INFO. 3199 * 3200 * If its an ACK we move to the next stage of the handshake, namely 3201 * attribute exchange. If its a NACK we see if we can specify another 3202 * version, if we can't we stop. 3203 * 3204 * If it is an INFO we reset all params associated with communication 3205 * in that direction over this channel (remember connection is 3206 * essentially 2 independent simplex channels). 3207 */ 3208 void 3209 vsw_process_ctrl_ver_pkt(vsw_ldc_t *ldcp, void *pkt) 3210 { 3211 vio_ver_msg_t *ver_pkt; 3212 vsw_t *vswp = ldcp->ldc_vswp; 3213 3214 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3215 3216 /* 3217 * We know this is a ctrl/version packet so 3218 * cast it into the correct structure. 3219 */ 3220 ver_pkt = (vio_ver_msg_t *)pkt; 3221 3222 switch (ver_pkt->tag.vio_subtype) { 3223 case VIO_SUBTYPE_INFO: 3224 D2(vswp, "vsw_process_ctrl_ver_pkt: VIO_SUBTYPE_INFO\n"); 3225 3226 /* 3227 * Record the session id, which we will use from now 3228 * until we see another VER_INFO msg. Even then the 3229 * session id in most cases will be unchanged, execpt 3230 * if channel was reset. 3231 */ 3232 if ((ldcp->session_status & VSW_PEER_SESSION) && 3233 (ldcp->peer_session != ver_pkt->tag.vio_sid)) { 3234 DERR(vswp, "%s: updating session id for chan %lld " 3235 "from %llx to %llx", __func__, ldcp->ldc_id, 3236 ldcp->peer_session, ver_pkt->tag.vio_sid); 3237 } 3238 3239 ldcp->peer_session = ver_pkt->tag.vio_sid; 3240 ldcp->session_status |= VSW_PEER_SESSION; 3241 3242 /* Legal message at this time ? */ 3243 if (vsw_check_flag(ldcp, INBOUND, VSW_VER_INFO_RECV)) 3244 return; 3245 3246 /* 3247 * First check the device class. Currently only expect 3248 * to be talking to a network device. In the future may 3249 * also talk to another switch. 3250 */ 3251 if (ver_pkt->dev_class != VDEV_NETWORK) { 3252 DERR(vswp, "%s: illegal device class %d", __func__, 3253 ver_pkt->dev_class); 3254 3255 ver_pkt->tag.vio_sid = ldcp->local_session; 3256 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 3257 3258 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 3259 3260 vsw_send_msg(ldcp, (void *)ver_pkt, 3261 sizeof (vio_ver_msg_t)); 3262 3263 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 3264 vsw_next_milestone(ldcp); 3265 return; 3266 } else { 3267 ldcp->dev_class = ver_pkt->dev_class; 3268 } 3269 3270 /* 3271 * Now check the version. 3272 */ 3273 if (vsw_supported_version(ver_pkt) == 0) { 3274 /* 3275 * Support this major version and possibly 3276 * adjusted minor version. 3277 */ 3278 3279 D2(vswp, "%s: accepted ver %d:%d", __func__, 3280 ver_pkt->ver_major, ver_pkt->ver_minor); 3281 3282 /* Store accepted values */ 3283 ldcp->lane_in.ver_major = ver_pkt->ver_major; 3284 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 3285 3286 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3287 3288 ldcp->lane_in.lstate |= VSW_VER_ACK_SENT; 3289 } else { 3290 /* 3291 * NACK back with the next lower major/minor 3292 * pairing we support (if don't suuport any more 3293 * versions then they will be set to zero. 3294 */ 3295 3296 D2(vswp, "%s: replying with ver %d:%d", __func__, 3297 ver_pkt->ver_major, ver_pkt->ver_minor); 3298 3299 /* Store updated values */ 3300 ldcp->lane_in.ver_major = ver_pkt->ver_major; 3301 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 3302 3303 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 3304 3305 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 3306 } 3307 3308 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 3309 ver_pkt->tag.vio_sid = ldcp->local_session; 3310 vsw_send_msg(ldcp, (void *)ver_pkt, sizeof (vio_ver_msg_t)); 3311 3312 vsw_next_milestone(ldcp); 3313 break; 3314 3315 case VIO_SUBTYPE_ACK: 3316 D2(vswp, "%s: VIO_SUBTYPE_ACK\n", __func__); 3317 3318 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_ACK_RECV)) 3319 return; 3320 3321 /* Store updated values */ 3322 ldcp->lane_in.ver_major = ver_pkt->ver_major; 3323 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 3324 3325 3326 ldcp->lane_out.lstate |= VSW_VER_ACK_RECV; 3327 vsw_next_milestone(ldcp); 3328 3329 break; 3330 3331 case VIO_SUBTYPE_NACK: 3332 D2(vswp, "%s: VIO_SUBTYPE_NACK\n", __func__); 3333 3334 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_NACK_RECV)) 3335 return; 3336 3337 /* 3338 * If our peer sent us a NACK with the ver fields set to 3339 * zero then there is nothing more we can do. Otherwise see 3340 * if we support either the version suggested, or a lesser 3341 * one. 3342 */ 3343 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 3344 DERR(vswp, "%s: peer unable to negotiate any " 3345 "further.", __func__); 3346 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 3347 vsw_next_milestone(ldcp); 3348 return; 3349 } 3350 3351 /* 3352 * Check to see if we support this major version or 3353 * a lower one. If we don't then maj/min will be set 3354 * to zero. 3355 */ 3356 (void) vsw_supported_version(ver_pkt); 3357 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 3358 /* Nothing more we can do */ 3359 DERR(vswp, "%s: version negotiation failed.\n", 3360 __func__); 3361 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 3362 vsw_next_milestone(ldcp); 3363 } else { 3364 /* found a supported major version */ 3365 ldcp->lane_out.ver_major = ver_pkt->ver_major; 3366 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 3367 3368 D2(vswp, "%s: resending with updated values (%x, %x)", 3369 __func__, ver_pkt->ver_major, 3370 ver_pkt->ver_minor); 3371 3372 ldcp->lane_out.lstate |= VSW_VER_INFO_SENT; 3373 ver_pkt->tag.vio_sid = ldcp->local_session; 3374 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 3375 3376 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 3377 3378 vsw_send_msg(ldcp, (void *)ver_pkt, 3379 sizeof (vio_ver_msg_t)); 3380 3381 vsw_next_milestone(ldcp); 3382 3383 } 3384 break; 3385 3386 default: 3387 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 3388 ver_pkt->tag.vio_subtype); 3389 } 3390 3391 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 3392 } 3393 3394 /* 3395 * Process an attribute packet. We can end up here either because our peer 3396 * has ACK/NACK'ed back to an earlier ATTR msg we had sent it, or our 3397 * peer has sent us an attribute INFO message 3398 * 3399 * If its an ACK we then move to the next stage of the handshake which 3400 * is to send our descriptor ring info to our peer. If its a NACK then 3401 * there is nothing more we can (currently) do. 3402 * 3403 * If we get a valid/acceptable INFO packet (and we have already negotiated 3404 * a version) we ACK back and set channel state to ATTR_RECV, otherwise we 3405 * NACK back and reset channel state to INACTIV. 3406 * 3407 * FUTURE: in time we will probably negotiate over attributes, but for 3408 * the moment unacceptable attributes are regarded as a fatal error. 3409 * 3410 */ 3411 void 3412 vsw_process_ctrl_attr_pkt(vsw_ldc_t *ldcp, void *pkt) 3413 { 3414 vnet_attr_msg_t *attr_pkt; 3415 vsw_t *vswp = ldcp->ldc_vswp; 3416 vsw_port_t *port = ldcp->ldc_port; 3417 uint64_t macaddr = 0; 3418 int i; 3419 3420 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 3421 3422 /* 3423 * We know this is a ctrl/attr packet so 3424 * cast it into the correct structure. 3425 */ 3426 attr_pkt = (vnet_attr_msg_t *)pkt; 3427 3428 switch (attr_pkt->tag.vio_subtype) { 3429 case VIO_SUBTYPE_INFO: 3430 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3431 3432 if (vsw_check_flag(ldcp, INBOUND, VSW_ATTR_INFO_RECV)) 3433 return; 3434 3435 /* 3436 * If the attributes are unacceptable then we NACK back. 3437 */ 3438 if (vsw_check_attr(attr_pkt, ldcp->ldc_port)) { 3439 3440 DERR(vswp, "%s (chan %d): invalid attributes", 3441 __func__, ldcp->ldc_id); 3442 3443 vsw_free_lane_resources(ldcp, INBOUND); 3444 3445 attr_pkt->tag.vio_sid = ldcp->local_session; 3446 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 3447 3448 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 3449 ldcp->lane_in.lstate |= VSW_ATTR_NACK_SENT; 3450 vsw_send_msg(ldcp, (void *)attr_pkt, 3451 sizeof (vnet_attr_msg_t)); 3452 3453 vsw_next_milestone(ldcp); 3454 return; 3455 } 3456 3457 /* 3458 * Otherwise store attributes for this lane and update 3459 * lane state. 3460 */ 3461 ldcp->lane_in.mtu = attr_pkt->mtu; 3462 ldcp->lane_in.addr = attr_pkt->addr; 3463 ldcp->lane_in.addr_type = attr_pkt->addr_type; 3464 ldcp->lane_in.xfer_mode = attr_pkt->xfer_mode; 3465 ldcp->lane_in.ack_freq = attr_pkt->ack_freq; 3466 3467 macaddr = ldcp->lane_in.addr; 3468 for (i = ETHERADDRL - 1; i >= 0; i--) { 3469 port->p_macaddr.ether_addr_octet[i] = macaddr & 0xFF; 3470 macaddr >>= 8; 3471 } 3472 3473 /* create the fdb entry for this port/mac address */ 3474 (void) vsw_add_fdb(vswp, port); 3475 3476 /* setup device specifc xmit routines */ 3477 mutex_enter(&port->tx_lock); 3478 if (ldcp->lane_in.xfer_mode == VIO_DRING_MODE) { 3479 D2(vswp, "%s: mode = VIO_DRING_MODE", __func__); 3480 port->transmit = vsw_dringsend; 3481 } else if (ldcp->lane_in.xfer_mode == VIO_DESC_MODE) { 3482 D2(vswp, "%s: mode = VIO_DESC_MODE", __func__); 3483 vsw_create_privring(ldcp); 3484 port->transmit = vsw_descrsend; 3485 } 3486 mutex_exit(&port->tx_lock); 3487 3488 attr_pkt->tag.vio_sid = ldcp->local_session; 3489 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3490 3491 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 3492 3493 ldcp->lane_in.lstate |= VSW_ATTR_ACK_SENT; 3494 3495 vsw_send_msg(ldcp, (void *)attr_pkt, 3496 sizeof (vnet_attr_msg_t)); 3497 3498 vsw_next_milestone(ldcp); 3499 break; 3500 3501 case VIO_SUBTYPE_ACK: 3502 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3503 3504 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_ACK_RECV)) 3505 return; 3506 3507 ldcp->lane_out.lstate |= VSW_ATTR_ACK_RECV; 3508 vsw_next_milestone(ldcp); 3509 break; 3510 3511 case VIO_SUBTYPE_NACK: 3512 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3513 3514 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_NACK_RECV)) 3515 return; 3516 3517 ldcp->lane_out.lstate |= VSW_ATTR_NACK_RECV; 3518 vsw_next_milestone(ldcp); 3519 break; 3520 3521 default: 3522 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 3523 attr_pkt->tag.vio_subtype); 3524 } 3525 3526 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3527 } 3528 3529 /* 3530 * Process a dring info packet. We can end up here either because our peer 3531 * has ACK/NACK'ed back to an earlier DRING msg we had sent it, or our 3532 * peer has sent us a dring INFO message. 3533 * 3534 * If we get a valid/acceptable INFO packet (and we have already negotiated 3535 * a version) we ACK back and update the lane state, otherwise we NACK back. 3536 * 3537 * FUTURE: nothing to stop client from sending us info on multiple dring's 3538 * but for the moment we will just use the first one we are given. 3539 * 3540 */ 3541 void 3542 vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *ldcp, void *pkt) 3543 { 3544 vio_dring_reg_msg_t *dring_pkt; 3545 vsw_t *vswp = ldcp->ldc_vswp; 3546 ldc_mem_info_t minfo; 3547 dring_info_t *dp, *dbp; 3548 int dring_found = 0; 3549 3550 /* 3551 * We know this is a ctrl/dring packet so 3552 * cast it into the correct structure. 3553 */ 3554 dring_pkt = (vio_dring_reg_msg_t *)pkt; 3555 3556 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 3557 3558 switch (dring_pkt->tag.vio_subtype) { 3559 case VIO_SUBTYPE_INFO: 3560 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3561 3562 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 3563 return; 3564 3565 /* 3566 * If the dring params are unacceptable then we NACK back. 3567 */ 3568 if (vsw_check_dring_info(dring_pkt)) { 3569 3570 DERR(vswp, "%s (%lld): invalid dring info", 3571 __func__, ldcp->ldc_id); 3572 3573 vsw_free_lane_resources(ldcp, INBOUND); 3574 3575 dring_pkt->tag.vio_sid = ldcp->local_session; 3576 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 3577 3578 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 3579 3580 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 3581 3582 vsw_send_msg(ldcp, (void *)dring_pkt, 3583 sizeof (vio_dring_reg_msg_t)); 3584 3585 vsw_next_milestone(ldcp); 3586 return; 3587 } 3588 3589 /* 3590 * Otherwise, attempt to map in the dring using the 3591 * cookie. If that succeeds we send back a unique dring 3592 * identifier that the sending side will use in future 3593 * to refer to this descriptor ring. 3594 */ 3595 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 3596 3597 dp->num_descriptors = dring_pkt->num_descriptors; 3598 dp->descriptor_size = dring_pkt->descriptor_size; 3599 dp->options = dring_pkt->options; 3600 dp->ncookies = dring_pkt->ncookies; 3601 3602 /* 3603 * Note: should only get one cookie. Enforced in 3604 * the ldc layer. 3605 */ 3606 bcopy(&dring_pkt->cookie[0], &dp->cookie[0], 3607 sizeof (ldc_mem_cookie_t)); 3608 3609 D2(vswp, "%s: num_desc %ld : desc_size %ld", __func__, 3610 dp->num_descriptors, dp->descriptor_size); 3611 D2(vswp, "%s: options 0x%lx: ncookies %ld", __func__, 3612 dp->options, dp->ncookies); 3613 3614 if ((ldc_mem_dring_map(ldcp->ldc_handle, &dp->cookie[0], 3615 dp->ncookies, dp->num_descriptors, 3616 dp->descriptor_size, LDC_SHADOW_MAP, 3617 &(dp->handle))) != 0) { 3618 3619 DERR(vswp, "%s: dring_map failed\n", __func__); 3620 3621 kmem_free(dp, sizeof (dring_info_t)); 3622 vsw_free_lane_resources(ldcp, INBOUND); 3623 3624 dring_pkt->tag.vio_sid = ldcp->local_session; 3625 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 3626 3627 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 3628 3629 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 3630 vsw_send_msg(ldcp, (void *)dring_pkt, 3631 sizeof (vio_dring_reg_msg_t)); 3632 3633 vsw_next_milestone(ldcp); 3634 return; 3635 } 3636 3637 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 3638 3639 DERR(vswp, "%s: dring_addr failed\n", __func__); 3640 3641 kmem_free(dp, sizeof (dring_info_t)); 3642 vsw_free_lane_resources(ldcp, INBOUND); 3643 3644 dring_pkt->tag.vio_sid = ldcp->local_session; 3645 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 3646 3647 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 3648 3649 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 3650 vsw_send_msg(ldcp, (void *)dring_pkt, 3651 sizeof (vio_dring_reg_msg_t)); 3652 3653 vsw_next_milestone(ldcp); 3654 return; 3655 } else { 3656 /* store the address of the pub part of ring */ 3657 dp->pub_addr = minfo.vaddr; 3658 } 3659 3660 /* no private section as we are importing */ 3661 dp->priv_addr = NULL; 3662 3663 /* 3664 * Using simple mono increasing int for ident at 3665 * the moment. 3666 */ 3667 dp->ident = ldcp->next_ident; 3668 ldcp->next_ident++; 3669 3670 dp->end_idx = 0; 3671 dp->next = NULL; 3672 3673 /* 3674 * Link it onto the end of the list of drings 3675 * for this lane. 3676 */ 3677 if (ldcp->lane_in.dringp == NULL) { 3678 D2(vswp, "%s: adding first INBOUND dring", __func__); 3679 ldcp->lane_in.dringp = dp; 3680 } else { 3681 dbp = ldcp->lane_in.dringp; 3682 3683 while (dbp->next != NULL) 3684 dbp = dbp->next; 3685 3686 dbp->next = dp; 3687 } 3688 3689 /* acknowledge it */ 3690 dring_pkt->tag.vio_sid = ldcp->local_session; 3691 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3692 dring_pkt->dring_ident = dp->ident; 3693 3694 vsw_send_msg(ldcp, (void *)dring_pkt, 3695 sizeof (vio_dring_reg_msg_t)); 3696 3697 ldcp->lane_in.lstate |= VSW_DRING_ACK_SENT; 3698 vsw_next_milestone(ldcp); 3699 break; 3700 3701 case VIO_SUBTYPE_ACK: 3702 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3703 3704 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_ACK_RECV)) 3705 return; 3706 3707 /* 3708 * Peer is acknowledging our dring info and will have 3709 * sent us a dring identifier which we will use to 3710 * refer to this ring w.r.t. our peer. 3711 */ 3712 dp = ldcp->lane_out.dringp; 3713 if (dp != NULL) { 3714 /* 3715 * Find the ring this ident should be associated 3716 * with. 3717 */ 3718 if (vsw_dring_match(dp, dring_pkt)) { 3719 dring_found = 1; 3720 3721 } else while (dp != NULL) { 3722 if (vsw_dring_match(dp, dring_pkt)) { 3723 dring_found = 1; 3724 break; 3725 } 3726 dp = dp->next; 3727 } 3728 3729 if (dring_found == 0) { 3730 DERR(NULL, "%s: unrecognised ring cookie", 3731 __func__); 3732 vsw_restart_handshake(ldcp); 3733 return; 3734 } 3735 3736 } else { 3737 DERR(vswp, "%s: DRING ACK received but no drings " 3738 "allocated", __func__); 3739 vsw_restart_handshake(ldcp); 3740 return; 3741 } 3742 3743 /* store ident */ 3744 dp->ident = dring_pkt->dring_ident; 3745 ldcp->lane_out.lstate |= VSW_DRING_ACK_RECV; 3746 vsw_next_milestone(ldcp); 3747 break; 3748 3749 case VIO_SUBTYPE_NACK: 3750 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3751 3752 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_NACK_RECV)) 3753 return; 3754 3755 ldcp->lane_out.lstate |= VSW_DRING_NACK_RECV; 3756 vsw_next_milestone(ldcp); 3757 break; 3758 3759 default: 3760 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3761 dring_pkt->tag.vio_subtype); 3762 } 3763 3764 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3765 } 3766 3767 /* 3768 * Process a request from peer to unregister a dring. 3769 * 3770 * For the moment we just restart the handshake if our 3771 * peer endpoint attempts to unregister a dring. 3772 */ 3773 void 3774 vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *ldcp, void *pkt) 3775 { 3776 vsw_t *vswp = ldcp->ldc_vswp; 3777 vio_dring_unreg_msg_t *dring_pkt; 3778 3779 /* 3780 * We know this is a ctrl/dring packet so 3781 * cast it into the correct structure. 3782 */ 3783 dring_pkt = (vio_dring_unreg_msg_t *)pkt; 3784 3785 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3786 3787 switch (dring_pkt->tag.vio_subtype) { 3788 case VIO_SUBTYPE_INFO: 3789 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3790 3791 DWARN(vswp, "%s: restarting handshake..", __func__); 3792 vsw_restart_handshake(ldcp); 3793 break; 3794 3795 case VIO_SUBTYPE_ACK: 3796 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3797 3798 DWARN(vswp, "%s: restarting handshake..", __func__); 3799 vsw_restart_handshake(ldcp); 3800 break; 3801 3802 case VIO_SUBTYPE_NACK: 3803 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3804 3805 DWARN(vswp, "%s: restarting handshake..", __func__); 3806 vsw_restart_handshake(ldcp); 3807 break; 3808 3809 default: 3810 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3811 dring_pkt->tag.vio_subtype); 3812 vsw_restart_handshake(ldcp); 3813 } 3814 3815 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3816 } 3817 3818 #define SND_MCST_NACK(ldcp, pkt) \ 3819 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 3820 pkt->tag.vio_sid = ldcp->local_session; \ 3821 vsw_send_msg(ldcp, (void *)pkt, sizeof (vnet_mcast_msg_t)); 3822 3823 /* 3824 * Process a multicast request from a vnet. 3825 * 3826 * Vnet's specify a multicast address that they are interested in. This 3827 * address is used as a key into the hash table which forms the multicast 3828 * forwarding database (mFDB). 3829 * 3830 * The table keys are the multicast addresses, while the table entries 3831 * are pointers to lists of ports which wish to receive packets for the 3832 * specified multicast address. 3833 * 3834 * When a multicast packet is being switched we use the address as a key 3835 * into the hash table, and then walk the appropriate port list forwarding 3836 * the pkt to each port in turn. 3837 * 3838 * If a vnet is no longer interested in a particular multicast grouping 3839 * we simply find the correct location in the hash table and then delete 3840 * the relevant port from the port list. 3841 * 3842 * To deal with the case whereby a port is being deleted without first 3843 * removing itself from the lists in the hash table, we maintain a list 3844 * of multicast addresses the port has registered an interest in, within 3845 * the port structure itself. We then simply walk that list of addresses 3846 * using them as keys into the hash table and remove the port from the 3847 * appropriate lists. 3848 */ 3849 static void 3850 vsw_process_ctrl_mcst_pkt(vsw_ldc_t *ldcp, void *pkt) 3851 { 3852 vnet_mcast_msg_t *mcst_pkt; 3853 vsw_port_t *port = ldcp->ldc_port; 3854 vsw_t *vswp = ldcp->ldc_vswp; 3855 int i; 3856 3857 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3858 3859 /* 3860 * We know this is a ctrl/mcast packet so 3861 * cast it into the correct structure. 3862 */ 3863 mcst_pkt = (vnet_mcast_msg_t *)pkt; 3864 3865 switch (mcst_pkt->tag.vio_subtype) { 3866 case VIO_SUBTYPE_INFO: 3867 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3868 3869 /* 3870 * Check if in correct state to receive a multicast 3871 * message (i.e. handshake complete). If not reset 3872 * the handshake. 3873 */ 3874 if (vsw_check_flag(ldcp, INBOUND, VSW_MCST_INFO_RECV)) 3875 return; 3876 3877 /* 3878 * Before attempting to add or remove address check 3879 * that they are valid multicast addresses. 3880 * If not, then NACK back. 3881 */ 3882 for (i = 0; i < mcst_pkt->count; i++) { 3883 if ((mcst_pkt->mca[i].ether_addr_octet[0] & 01) != 1) { 3884 DERR(vswp, "%s: invalid multicast address", 3885 __func__); 3886 SND_MCST_NACK(ldcp, mcst_pkt); 3887 return; 3888 } 3889 } 3890 3891 /* 3892 * Now add/remove the addresses. If this fails we 3893 * NACK back. 3894 */ 3895 if (vsw_add_rem_mcst(mcst_pkt, port) != 0) { 3896 SND_MCST_NACK(ldcp, mcst_pkt); 3897 return; 3898 } 3899 3900 mcst_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3901 mcst_pkt->tag.vio_sid = ldcp->local_session; 3902 3903 DUMP_TAG_PTR((vio_msg_tag_t *)mcst_pkt); 3904 3905 vsw_send_msg(ldcp, (void *)mcst_pkt, 3906 sizeof (vnet_mcast_msg_t)); 3907 break; 3908 3909 case VIO_SUBTYPE_ACK: 3910 DWARN(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3911 3912 /* 3913 * We shouldn't ever get a multicast ACK message as 3914 * at the moment we never request multicast addresses 3915 * to be set on some other device. This may change in 3916 * the future if we have cascading switches. 3917 */ 3918 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_ACK_RECV)) 3919 return; 3920 3921 /* Do nothing */ 3922 break; 3923 3924 case VIO_SUBTYPE_NACK: 3925 DWARN(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3926 3927 /* 3928 * We shouldn't get a multicast NACK packet for the 3929 * same reasons as we shouldn't get a ACK packet. 3930 */ 3931 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_NACK_RECV)) 3932 return; 3933 3934 /* Do nothing */ 3935 break; 3936 3937 default: 3938 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 3939 mcst_pkt->tag.vio_subtype); 3940 } 3941 3942 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3943 } 3944 3945 static void 3946 vsw_process_ctrl_rdx_pkt(vsw_ldc_t *ldcp, void *pkt) 3947 { 3948 vio_rdx_msg_t *rdx_pkt; 3949 vsw_t *vswp = ldcp->ldc_vswp; 3950 3951 /* 3952 * We know this is a ctrl/rdx packet so 3953 * cast it into the correct structure. 3954 */ 3955 rdx_pkt = (vio_rdx_msg_t *)pkt; 3956 3957 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 3958 3959 switch (rdx_pkt->tag.vio_subtype) { 3960 case VIO_SUBTYPE_INFO: 3961 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3962 3963 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_INFO_RECV)) 3964 return; 3965 3966 rdx_pkt->tag.vio_sid = ldcp->local_session; 3967 rdx_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3968 3969 DUMP_TAG_PTR((vio_msg_tag_t *)rdx_pkt); 3970 3971 ldcp->lane_in.lstate |= VSW_RDX_ACK_SENT; 3972 3973 vsw_send_msg(ldcp, (void *)rdx_pkt, 3974 sizeof (vio_rdx_msg_t)); 3975 3976 vsw_next_milestone(ldcp); 3977 break; 3978 3979 case VIO_SUBTYPE_ACK: 3980 /* 3981 * Should be handled in-band by callback handler. 3982 */ 3983 DERR(vswp, "%s: Unexpected VIO_SUBTYPE_ACK", __func__); 3984 vsw_restart_handshake(ldcp); 3985 break; 3986 3987 case VIO_SUBTYPE_NACK: 3988 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3989 3990 if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_NACK_RECV)) 3991 return; 3992 3993 ldcp->lane_out.lstate |= VSW_RDX_NACK_RECV; 3994 vsw_next_milestone(ldcp); 3995 break; 3996 3997 default: 3998 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3999 rdx_pkt->tag.vio_subtype); 4000 } 4001 4002 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4003 } 4004 4005 static void 4006 vsw_process_data_pkt(vsw_ldc_t *ldcp, void *dpkt, vio_msg_tag_t tag) 4007 { 4008 uint16_t env = tag.vio_subtype_env; 4009 vsw_t *vswp = ldcp->ldc_vswp; 4010 4011 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4012 4013 /* session id check */ 4014 if (ldcp->session_status & VSW_PEER_SESSION) { 4015 if (ldcp->peer_session != tag.vio_sid) { 4016 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 4017 __func__, ldcp->ldc_id, tag.vio_sid); 4018 vsw_restart_handshake(ldcp); 4019 return; 4020 } 4021 } 4022 4023 /* 4024 * It is an error for us to be getting data packets 4025 * before the handshake has completed. 4026 */ 4027 if (ldcp->hphase != VSW_MILESTONE4) { 4028 DERR(vswp, "%s: got data packet before handshake complete " 4029 "hphase %d (%x: %x)", __func__, ldcp->hphase, 4030 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 4031 DUMP_FLAGS(ldcp->lane_in.lstate); 4032 DUMP_FLAGS(ldcp->lane_out.lstate); 4033 vsw_restart_handshake(ldcp); 4034 return; 4035 } 4036 4037 /* 4038 * Switch on vio_subtype envelope, then let lower routines 4039 * decide if its an INFO, ACK or NACK packet. 4040 */ 4041 if (env == VIO_DRING_DATA) { 4042 vsw_process_data_dring_pkt(ldcp, dpkt); 4043 } else if (env == VIO_PKT_DATA) { 4044 vsw_process_data_raw_pkt(ldcp, dpkt); 4045 } else if (env == VIO_DESC_DATA) { 4046 vsw_process_data_ibnd_pkt(ldcp, dpkt); 4047 } else { 4048 DERR(vswp, "%s : unknown vio_subtype_env (%x)\n", 4049 __func__, env); 4050 } 4051 4052 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4053 } 4054 4055 #define SND_DRING_NACK(ldcp, pkt) \ 4056 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 4057 pkt->tag.vio_sid = ldcp->local_session; \ 4058 vsw_send_msg(ldcp, (void *)pkt, sizeof (vio_dring_msg_t)); 4059 4060 static void 4061 vsw_process_data_dring_pkt(vsw_ldc_t *ldcp, void *dpkt) 4062 { 4063 vio_dring_msg_t *dring_pkt; 4064 vnet_public_desc_t *pub_addr = NULL; 4065 vsw_private_desc_t *priv_addr = NULL; 4066 dring_info_t *dp = NULL; 4067 vsw_t *vswp = ldcp->ldc_vswp; 4068 mblk_t *mp = NULL; 4069 mblk_t *bp = NULL; 4070 mblk_t *bpt = NULL; 4071 size_t nbytes = 0; 4072 size_t off = 0; 4073 uint64_t ncookies = 0; 4074 uint64_t chain = 0; 4075 uint64_t j, len, num; 4076 uint32_t start, end, datalen; 4077 int i, last_sync, rv; 4078 boolean_t ack_needed = B_FALSE; 4079 boolean_t sync_needed = B_TRUE; 4080 4081 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4082 4083 /* 4084 * We know this is a data/dring packet so 4085 * cast it into the correct structure. 4086 */ 4087 dring_pkt = (vio_dring_msg_t *)dpkt; 4088 4089 /* 4090 * Switch on the vio_subtype. If its INFO then we need to 4091 * process the data. If its an ACK we need to make sure 4092 * it makes sense (i.e did we send an earlier data/info), 4093 * and if its a NACK then we maybe attempt a retry. 4094 */ 4095 switch (dring_pkt->tag.vio_subtype) { 4096 case VIO_SUBTYPE_INFO: 4097 D2(vswp, "%s(%lld): VIO_SUBTYPE_INFO", __func__, ldcp->ldc_id); 4098 4099 if ((dp = vsw_ident2dring(&ldcp->lane_in, 4100 dring_pkt->dring_ident)) == NULL) { 4101 4102 DERR(vswp, "%s(%lld): unable to find dring from " 4103 "ident 0x%llx", __func__, ldcp->ldc_id, 4104 dring_pkt->dring_ident); 4105 4106 SND_DRING_NACK(ldcp, dring_pkt); 4107 return; 4108 } 4109 4110 start = end = 0; 4111 start = dring_pkt->start_idx; 4112 end = dring_pkt->end_idx; 4113 4114 D3(vswp, "%s(%lld): start index %ld : end %ld\n", 4115 __func__, ldcp->ldc_id, start, end); 4116 4117 /* basic sanity check */ 4118 len = dp->num_descriptors; 4119 if (end > len) { 4120 DERR(vswp, "%s(%lld): endpoint %lld outside ring" 4121 " length %lld", __func__, ldcp->ldc_id, 4122 end, len); 4123 4124 SND_DRING_NACK(ldcp, dring_pkt); 4125 return; 4126 } 4127 4128 /* sync data */ 4129 if ((rv = ldc_mem_dring_acquire(dp->handle, 4130 start, end)) != 0) { 4131 DERR(vswp, "%s(%lld): unable to acquire dring : err %d", 4132 __func__, ldcp->ldc_id, rv); 4133 return; 4134 } 4135 4136 pub_addr = (vnet_public_desc_t *)dp->pub_addr; 4137 4138 j = num = 0; 4139 4140 /* calculate # descriptors taking into a/c wrap around */ 4141 num = end >= start ? end - start + 1: (len - start + 1) + end; 4142 4143 last_sync = start; 4144 4145 for (i = start; j < num; i = (i + 1) % len, j++) { 4146 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 4147 4148 /* 4149 * Data is padded to align on 8 byte boundary, 4150 * datalen is actual data length, i.e. minus that 4151 * padding. 4152 */ 4153 datalen = pub_addr->nbytes; 4154 4155 /* 4156 * Does peer wish us to ACK when we have finished 4157 * with this descriptor ? 4158 */ 4159 if (pub_addr->hdr.ack) 4160 ack_needed = B_TRUE; 4161 4162 D2(vswp, "%s(%lld): processing desc %lld at pos" 4163 " 0x%llx : dstate 0x%lx : datalen 0x%lx", 4164 __func__, ldcp->ldc_id, i, pub_addr, 4165 pub_addr->hdr.dstate, datalen); 4166 4167 /* 4168 * XXXX : Is it a fatal error to be told to 4169 * process a packet when the READY bit is not 4170 * set ? 4171 */ 4172 if (pub_addr->hdr.dstate != VIO_DESC_READY) { 4173 DERR(vswp, "%s(%d): descriptor %lld at pos " 4174 " 0x%llx not READY (0x%lx)", __func__, 4175 ldcp->ldc_id, i, pub_addr, 4176 pub_addr->hdr.dstate); 4177 4178 SND_DRING_NACK(ldcp, dring_pkt); 4179 (void) ldc_mem_dring_release(dp->handle, 4180 start, end); 4181 return; 4182 } 4183 4184 /* 4185 * Mark that we are starting to process descriptor. 4186 */ 4187 pub_addr->hdr.dstate = VIO_DESC_ACCEPTED; 4188 4189 /* 4190 * allocb(9F) returns an aligned data block. We 4191 * need to ensure that we ask ldc for an aligned 4192 * number of bytes also. 4193 */ 4194 nbytes = datalen; 4195 if (nbytes & 0x7) { 4196 off = 8 - (nbytes & 0x7); 4197 nbytes += off; 4198 } 4199 mp = allocb(datalen, BPRI_MED); 4200 if (mp == NULL) { 4201 DERR(vswp, "%s(%lld): allocb failed", 4202 __func__, ldcp->ldc_id); 4203 (void) ldc_mem_dring_release(dp->handle, 4204 start, end); 4205 return; 4206 } 4207 4208 ncookies = pub_addr->ncookies; 4209 rv = ldc_mem_copy(ldcp->ldc_handle, 4210 (caddr_t)mp->b_rptr, 0, &nbytes, 4211 pub_addr->memcookie, ncookies, 4212 LDC_COPY_IN); 4213 4214 if (rv != 0) { 4215 DERR(vswp, "%s(%d): unable to copy in " 4216 "data from %d cookies", __func__, 4217 ldcp->ldc_id, ncookies); 4218 freemsg(mp); 4219 (void) ldc_mem_dring_release(dp->handle, 4220 start, end); 4221 return; 4222 } else { 4223 D2(vswp, "%s(%d): copied in %ld bytes" 4224 " using %d cookies", __func__, 4225 ldcp->ldc_id, nbytes, ncookies); 4226 } 4227 4228 /* point to the actual end of data */ 4229 mp->b_wptr = mp->b_rptr + datalen; 4230 4231 /* build a chain of received packets */ 4232 if (bp == NULL) { 4233 /* first pkt */ 4234 bp = mp; 4235 bp->b_next = bp->b_prev = NULL; 4236 bpt = bp; 4237 chain = 1; 4238 } else { 4239 mp->b_next = NULL; 4240 mp->b_prev = bpt; 4241 bpt->b_next = mp; 4242 bpt = mp; 4243 chain++; 4244 } 4245 4246 /* mark we are finished with this descriptor */ 4247 pub_addr->hdr.dstate = VIO_DESC_DONE; 4248 4249 /* 4250 * Send an ACK back to peer if requested, and sync 4251 * the rings up to this point so the remote side sees 4252 * the descriptor flag in a consistent state. 4253 */ 4254 if (ack_needed) { 4255 if ((rv = ldc_mem_dring_release( 4256 dp->handle, last_sync, i)) != 0) { 4257 DERR(vswp, "%s(%lld): unable to sync" 4258 " from %d to %d", __func__, 4259 ldcp->ldc_id, last_sync, i); 4260 } 4261 4262 ack_needed = B_FALSE; 4263 4264 if (i == end) 4265 sync_needed = B_FALSE; 4266 else 4267 sync_needed = B_TRUE; 4268 4269 last_sync = (i + 1) % len; 4270 4271 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 4272 dring_pkt->tag.vio_sid = ldcp->local_session; 4273 vsw_send_msg(ldcp, (void *)dring_pkt, 4274 sizeof (vio_dring_msg_t)); 4275 } 4276 } 4277 4278 if (sync_needed) { 4279 if ((rv = ldc_mem_dring_release(dp->handle, 4280 last_sync, end)) != 0) { 4281 DERR(vswp, "%s(%lld): unable to sync" 4282 " from %d to %d", __func__, 4283 ldcp->ldc_id, last_sync, end); 4284 } 4285 } 4286 4287 /* send the chain of packets to be switched */ 4288 D3(vswp, "%s(%lld): switching chain of %d msgs", __func__, 4289 ldcp->ldc_id, chain); 4290 vsw_switch_frame(vswp, bp, VSW_VNETPORT, 4291 ldcp->ldc_port, NULL); 4292 4293 break; 4294 4295 case VIO_SUBTYPE_ACK: 4296 D2(vswp, "%s(%lld): VIO_SUBTYPE_ACK", __func__, ldcp->ldc_id); 4297 /* 4298 * Verify that the relevant descriptors are all 4299 * marked as DONE 4300 */ 4301 if ((dp = vsw_ident2dring(&ldcp->lane_out, 4302 dring_pkt->dring_ident)) == NULL) { 4303 DERR(vswp, "%s: unknown ident in ACK", __func__); 4304 return; 4305 } 4306 4307 pub_addr = (vnet_public_desc_t *)dp->pub_addr; 4308 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 4309 4310 start = end = 0; 4311 start = dring_pkt->start_idx; 4312 end = dring_pkt->end_idx; 4313 len = dp->num_descriptors; 4314 4315 4316 j = num = 0; 4317 /* calculate # descriptors taking into a/c wrap around */ 4318 num = end >= start ? end - start + 1: (len - start + 1) + end; 4319 4320 D2(vswp, "%s(%lld): start index %ld : end %ld : num %ld\n", 4321 __func__, ldcp->ldc_id, start, end, num); 4322 4323 for (i = start; j < num; i = (i + 1) % len, j++) { 4324 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 4325 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 4326 4327 if (pub_addr->hdr.dstate != VIO_DESC_DONE) { 4328 DERR(vswp, "%s: descriptor %lld at pos " 4329 " 0x%llx not DONE (0x%lx)\n", __func__, 4330 i, pub_addr, pub_addr->hdr.dstate); 4331 return; 4332 } else { 4333 /* clear all the fields */ 4334 bzero(priv_addr->datap, priv_addr->datalen); 4335 priv_addr->datalen = 0; 4336 4337 pub_addr->hdr.dstate = VIO_DESC_FREE; 4338 pub_addr->hdr.ack = 0; 4339 priv_addr->dstate = VIO_DESC_FREE; 4340 4341 D3(vswp, "clearing descp %d : pub state " 4342 "0x%llx : priv state 0x%llx", i, 4343 pub_addr->hdr.dstate, 4344 priv_addr->dstate); 4345 } 4346 } 4347 4348 break; 4349 4350 case VIO_SUBTYPE_NACK: 4351 DWARN(vswp, "%s(%lld): VIO_SUBTYPE_NACK", 4352 __func__, ldcp->ldc_id); 4353 /* 4354 * Something is badly wrong if we are getting NACK's 4355 * for our data pkts. So reset the channel. 4356 */ 4357 vsw_restart_handshake(ldcp); 4358 4359 break; 4360 4361 default: 4362 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 4363 ldcp->ldc_id, dring_pkt->tag.vio_subtype); 4364 } 4365 4366 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 4367 } 4368 4369 /* 4370 * VIO_PKT_DATA (a.k.a raw data mode ) 4371 * 4372 * Note - currently not supported. Do nothing. 4373 */ 4374 static void 4375 vsw_process_data_raw_pkt(vsw_ldc_t *ldcp, void *dpkt) 4376 { 4377 _NOTE(ARGUNUSED(dpkt)) 4378 4379 D1(NULL, "%s (%lld): enter\n", __func__, ldcp->ldc_id); 4380 4381 DERR(NULL, "%s (%lld): currently not supported", 4382 __func__, ldcp->ldc_id); 4383 4384 D1(NULL, "%s (%lld): exit\n", __func__, ldcp->ldc_id); 4385 } 4386 4387 #define SND_IBND_DESC_NACK(ldcp, pkt) \ 4388 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 4389 pkt->tag.vio_sid = ldcp->local_session; \ 4390 vsw_send_msg(ldcp, (void *)pkt, sizeof (vio_ibnd_desc_t)); 4391 4392 /* 4393 * Process an in-band descriptor message (most likely from 4394 * OBP). 4395 */ 4396 static void 4397 vsw_process_data_ibnd_pkt(vsw_ldc_t *ldcp, void *pkt) 4398 { 4399 vio_ibnd_desc_t *ibnd_desc; 4400 dring_info_t *dp = NULL; 4401 vsw_private_desc_t *priv_addr = NULL; 4402 vsw_t *vswp = ldcp->ldc_vswp; 4403 mblk_t *mp = NULL; 4404 size_t nbytes = 0; 4405 size_t off = 0; 4406 uint64_t idx = 0; 4407 uint32_t datalen = 0; 4408 uint64_t ncookies = 0; 4409 int rv; 4410 4411 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4412 4413 ibnd_desc = (vio_ibnd_desc_t *)pkt; 4414 4415 switch (ibnd_desc->hdr.tag.vio_subtype) { 4416 case VIO_SUBTYPE_INFO: 4417 D1(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 4418 4419 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 4420 return; 4421 4422 /* 4423 * Data is padded to align on a 8 byte boundary, 4424 * nbytes is actual data length, i.e. minus that 4425 * padding. 4426 */ 4427 datalen = ibnd_desc->nbytes; 4428 4429 D2(vswp, "%s(%lld): processing inband desc : " 4430 ": datalen 0x%lx", __func__, ldcp->ldc_id, datalen); 4431 4432 ncookies = ibnd_desc->ncookies; 4433 4434 /* 4435 * allocb(9F) returns an aligned data block. We 4436 * need to ensure that we ask ldc for an aligned 4437 * number of bytes also. 4438 */ 4439 nbytes = datalen; 4440 if (nbytes & 0x7) { 4441 off = 8 - (nbytes & 0x7); 4442 nbytes += off; 4443 } 4444 4445 mp = allocb(datalen, BPRI_MED); 4446 if (mp == NULL) { 4447 DERR(vswp, "%s(%lld): allocb failed", 4448 __func__, ldcp->ldc_id); 4449 return; 4450 } 4451 4452 rv = ldc_mem_copy(ldcp->ldc_handle, (caddr_t)mp->b_rptr, 4453 0, &nbytes, ibnd_desc->memcookie, (uint64_t)ncookies, 4454 LDC_COPY_IN); 4455 4456 if (rv != 0) { 4457 DERR(vswp, "%s(%d): unable to copy in data from " 4458 "%d cookie(s)", __func__, 4459 ldcp->ldc_id, ncookies); 4460 freemsg(mp); 4461 return; 4462 } else { 4463 D2(vswp, "%s(%d): copied in %ld bytes using %d " 4464 "cookies", __func__, ldcp->ldc_id, nbytes, 4465 ncookies); 4466 } 4467 4468 /* point to the actual end of data */ 4469 mp->b_wptr = mp->b_rptr + datalen; 4470 4471 /* 4472 * We ACK back every in-band descriptor message we process 4473 */ 4474 ibnd_desc->hdr.tag.vio_subtype = VIO_SUBTYPE_ACK; 4475 ibnd_desc->hdr.tag.vio_sid = ldcp->local_session; 4476 vsw_send_msg(ldcp, (void *)ibnd_desc, 4477 sizeof (vio_ibnd_desc_t)); 4478 4479 /* send the packet to be switched */ 4480 vsw_switch_frame(vswp, mp, VSW_VNETPORT, 4481 ldcp->ldc_port, NULL); 4482 4483 break; 4484 4485 case VIO_SUBTYPE_ACK: 4486 D1(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 4487 4488 /* Verify the ACK is valid */ 4489 idx = ibnd_desc->hdr.desc_handle; 4490 4491 if (idx >= VSW_RING_NUM_EL) { 4492 cmn_err(CE_WARN, "%s: corrupted ACK received " 4493 "(idx %ld)", __func__, idx); 4494 return; 4495 } 4496 4497 if ((dp = ldcp->lane_out.dringp) == NULL) { 4498 DERR(vswp, "%s: no dring found", __func__); 4499 return; 4500 } 4501 4502 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 4503 4504 /* move to correct location in ring */ 4505 priv_addr += idx; 4506 4507 /* 4508 * When we sent the in-band message to our peer we 4509 * marked the copy in our private ring as READY. We now 4510 * check that the descriptor we are being ACK'ed for is in 4511 * fact READY, i.e. it is one we have shared with our peer. 4512 */ 4513 if (priv_addr->dstate != VIO_DESC_READY) { 4514 cmn_err(CE_WARN, "%s: (%ld) desc at index %ld not " 4515 "READY (0x%lx)", __func__, ldcp->ldc_id, idx, 4516 priv_addr->dstate); 4517 cmn_err(CE_CONT, "%s: bound %d: ncookies %ld\n", 4518 __func__, priv_addr->bound, 4519 priv_addr->ncookies); 4520 cmn_err(CE_CONT, "datalen %ld\n", priv_addr->datalen); 4521 return; 4522 } else { 4523 D2(vswp, "%s: (%lld) freeing descp at %lld", __func__, 4524 ldcp->ldc_id, idx); 4525 4526 /* release resources associated with sent msg */ 4527 bzero(priv_addr->datap, priv_addr->datalen); 4528 priv_addr->datalen = 0; 4529 priv_addr->dstate = VIO_DESC_FREE; 4530 } 4531 break; 4532 4533 case VIO_SUBTYPE_NACK: 4534 DERR(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 4535 4536 /* 4537 * We should only get a NACK if our peer doesn't like 4538 * something about a message we have sent it. If this 4539 * happens we just release the resources associated with 4540 * the message. (We are relying on higher layers to decide 4541 * whether or not to resend. 4542 */ 4543 4544 /* limit check */ 4545 idx = ibnd_desc->hdr.desc_handle; 4546 4547 if (idx >= VSW_RING_NUM_EL) { 4548 DERR(vswp, "%s: corrupted NACK received (idx %lld)", 4549 __func__, idx); 4550 return; 4551 } 4552 4553 if ((dp = ldcp->lane_out.dringp) == NULL) { 4554 DERR(vswp, "%s: no dring found", __func__); 4555 return; 4556 } 4557 4558 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 4559 4560 /* move to correct location in ring */ 4561 priv_addr += idx; 4562 4563 /* release resources associated with sent msg */ 4564 bzero(priv_addr->datap, priv_addr->datalen); 4565 priv_addr->datalen = 0; 4566 priv_addr->dstate = VIO_DESC_FREE; 4567 4568 break; 4569 4570 default: 4571 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 4572 ldcp->ldc_id, ibnd_desc->hdr.tag.vio_subtype); 4573 } 4574 4575 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 4576 } 4577 4578 static void 4579 vsw_process_err_pkt(vsw_ldc_t *ldcp, void *epkt, vio_msg_tag_t tag) 4580 { 4581 _NOTE(ARGUNUSED(epkt)) 4582 4583 vsw_t *vswp = ldcp->ldc_vswp; 4584 uint16_t env = tag.vio_subtype_env; 4585 4586 D1(vswp, "%s (%lld): enter\n", __func__, ldcp->ldc_id); 4587 4588 /* 4589 * Error vio_subtypes have yet to be defined. So for 4590 * the moment we can't do anything. 4591 */ 4592 D2(vswp, "%s: (%x) vio_subtype env", __func__, env); 4593 4594 D1(vswp, "%s (%lld): exit\n", __func__, ldcp->ldc_id); 4595 } 4596 4597 /* 4598 * Switch the given ethernet frame when operating in layer 2 mode. 4599 * 4600 * vswp: pointer to the vsw instance 4601 * mp: pointer to chain of ethernet frame(s) to be switched 4602 * caller: identifies the source of this frame as: 4603 * 1. VSW_VNETPORT - a vsw port (connected to a vnet). 4604 * 2. VSW_PHYSDEV - the physical ethernet device 4605 * 3. VSW_LOCALDEV - vsw configured as a virtual interface 4606 * arg: argument provided by the caller. 4607 * 1. for VNETPORT - pointer to the corresponding vsw_port_t. 4608 * 2. for PHYSDEV - NULL 4609 * 3. for LOCALDEV - pointer to to this vsw_t(self) 4610 */ 4611 void 4612 vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller, 4613 vsw_port_t *arg, mac_resource_handle_t mrh) 4614 { 4615 struct ether_header *ehp; 4616 vsw_port_t *port = NULL; 4617 mblk_t *bp, *ret_m; 4618 mblk_t *nmp = NULL; 4619 vsw_port_list_t *plist = &vswp->plist; 4620 4621 D1(vswp, "%s: enter (caller %d)", __func__, caller); 4622 4623 /* 4624 * PERF: rather than breaking up the chain here, scan it 4625 * to find all mblks heading to same destination and then 4626 * pass that sub-chain to the lower transmit functions. 4627 */ 4628 4629 /* process the chain of packets */ 4630 bp = mp; 4631 while (bp) { 4632 mp = bp; 4633 bp = bp->b_next; 4634 mp->b_next = mp->b_prev = NULL; 4635 ehp = (struct ether_header *)mp->b_rptr; 4636 4637 D2(vswp, "%s: mblk data buffer %lld : actual data size %lld", 4638 __func__, MBLKSIZE(mp), MBLKL(mp)); 4639 4640 READ_ENTER(&vswp->if_lockrw); 4641 if (ether_cmp(&ehp->ether_dhost, &vswp->if_addr) == 0) { 4642 /* 4643 * If destination is VSW_LOCALDEV (vsw as an eth 4644 * interface) and if the device is up & running, 4645 * send the packet up the stack on this host. 4646 * If the virtual interface is down, drop the packet. 4647 */ 4648 if (caller != VSW_LOCALDEV) { 4649 if (vswp->if_state & VSW_IF_UP) { 4650 RW_EXIT(&vswp->if_lockrw); 4651 mac_rx(vswp->if_mh, mrh, mp); 4652 } else { 4653 RW_EXIT(&vswp->if_lockrw); 4654 /* Interface down, drop pkt */ 4655 freemsg(mp); 4656 } 4657 } else { 4658 RW_EXIT(&vswp->if_lockrw); 4659 freemsg(mp); 4660 } 4661 continue; 4662 } 4663 RW_EXIT(&vswp->if_lockrw); 4664 4665 READ_ENTER(&plist->lockrw); 4666 port = vsw_lookup_fdb(vswp, ehp); 4667 if (port) { 4668 /* 4669 * Mark the port as in-use. 4670 */ 4671 mutex_enter(&port->ref_lock); 4672 port->ref_cnt++; 4673 mutex_exit(&port->ref_lock); 4674 RW_EXIT(&plist->lockrw); 4675 4676 /* 4677 * If plumbed and in promisc mode then copy msg 4678 * and send up the stack. 4679 */ 4680 READ_ENTER(&vswp->if_lockrw); 4681 if (VSW_U_P(vswp->if_state)) { 4682 RW_EXIT(&vswp->if_lockrw); 4683 nmp = copymsg(mp); 4684 if (nmp) 4685 mac_rx(vswp->if_mh, mrh, nmp); 4686 } else { 4687 RW_EXIT(&vswp->if_lockrw); 4688 } 4689 4690 /* 4691 * If the destination is in FDB, the packet 4692 * should be forwarded to the correponding 4693 * vsw_port (connected to a vnet device - 4694 * VSW_VNETPORT) 4695 */ 4696 (void) vsw_portsend(port, mp); 4697 4698 /* 4699 * Decrement use count in port and check if 4700 * should wake delete thread. 4701 */ 4702 mutex_enter(&port->ref_lock); 4703 port->ref_cnt--; 4704 if (port->ref_cnt == 0) 4705 cv_signal(&port->ref_cv); 4706 mutex_exit(&port->ref_lock); 4707 } else { 4708 RW_EXIT(&plist->lockrw); 4709 /* 4710 * Destination not in FDB. 4711 * 4712 * If the destination is broadcast or 4713 * multicast forward the packet to all 4714 * (VNETPORTs, PHYSDEV, LOCALDEV), 4715 * except the caller. 4716 */ 4717 if (IS_BROADCAST(ehp)) { 4718 D3(vswp, "%s: BROADCAST pkt", __func__); 4719 (void) vsw_forward_all(vswp, mp, 4720 caller, arg); 4721 } else if (IS_MULTICAST(ehp)) { 4722 D3(vswp, "%s: MULTICAST pkt", __func__); 4723 (void) vsw_forward_grp(vswp, mp, 4724 caller, arg); 4725 } else { 4726 /* 4727 * If the destination is unicast, and came 4728 * from either a logical network device or 4729 * the switch itself when it is plumbed, then 4730 * send it out on the physical device and also 4731 * up the stack if the logical interface is 4732 * in promiscious mode. 4733 * 4734 * NOTE: The assumption here is that if we 4735 * cannot find the destination in our fdb, its 4736 * a unicast address, and came from either a 4737 * vnet or down the stack (when plumbed) it 4738 * must be destinded for an ethernet device 4739 * outside our ldoms. 4740 */ 4741 if (caller == VSW_VNETPORT) { 4742 READ_ENTER(&vswp->if_lockrw); 4743 if (VSW_U_P(vswp->if_state)) { 4744 RW_EXIT(&vswp->if_lockrw); 4745 nmp = copymsg(mp); 4746 if (nmp) 4747 mac_rx(vswp->if_mh, 4748 mrh, nmp); 4749 } else { 4750 RW_EXIT(&vswp->if_lockrw); 4751 } 4752 if ((ret_m = vsw_tx_msg(vswp, mp)) 4753 != NULL) { 4754 DERR(vswp, "%s: drop mblks to " 4755 "phys dev", __func__); 4756 freemsg(ret_m); 4757 } 4758 4759 } else if (caller == VSW_PHYSDEV) { 4760 /* 4761 * Pkt seen because card in promisc 4762 * mode. Send up stack if plumbed in 4763 * promisc mode, else drop it. 4764 */ 4765 READ_ENTER(&vswp->if_lockrw); 4766 if (VSW_U_P(vswp->if_state)) { 4767 RW_EXIT(&vswp->if_lockrw); 4768 mac_rx(vswp->if_mh, mrh, mp); 4769 } else { 4770 RW_EXIT(&vswp->if_lockrw); 4771 freemsg(mp); 4772 } 4773 4774 } else if (caller == VSW_LOCALDEV) { 4775 /* 4776 * Pkt came down the stack, send out 4777 * over physical device. 4778 */ 4779 if ((ret_m = vsw_tx_msg(vswp, mp)) 4780 != NULL) { 4781 DERR(vswp, "%s: drop mblks to " 4782 "phys dev", __func__); 4783 freemsg(ret_m); 4784 } 4785 } 4786 } 4787 } 4788 } 4789 D1(vswp, "%s: exit\n", __func__); 4790 } 4791 4792 /* 4793 * Switch ethernet frame when in layer 3 mode (i.e. using IP 4794 * layer to do the routing). 4795 * 4796 * There is a large amount of overlap between this function and 4797 * vsw_switch_l2_frame. At some stage we need to revisit and refactor 4798 * both these functions. 4799 */ 4800 void 4801 vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller, 4802 vsw_port_t *arg, mac_resource_handle_t mrh) 4803 { 4804 struct ether_header *ehp; 4805 vsw_port_t *port = NULL; 4806 mblk_t *bp = NULL; 4807 vsw_port_list_t *plist = &vswp->plist; 4808 4809 D1(vswp, "%s: enter (caller %d)", __func__, caller); 4810 4811 /* 4812 * In layer 3 mode should only ever be switching packets 4813 * between IP layer and vnet devices. So make sure thats 4814 * who is invoking us. 4815 */ 4816 if ((caller != VSW_LOCALDEV) && (caller != VSW_VNETPORT)) { 4817 DERR(vswp, "%s: unexpected caller (%d)", __func__, caller); 4818 freemsgchain(mp); 4819 return; 4820 } 4821 4822 /* process the chain of packets */ 4823 bp = mp; 4824 while (bp) { 4825 mp = bp; 4826 bp = bp->b_next; 4827 mp->b_next = mp->b_prev = NULL; 4828 ehp = (struct ether_header *)mp->b_rptr; 4829 4830 D2(vswp, "%s: mblk data buffer %lld : actual data size %lld", 4831 __func__, MBLKSIZE(mp), MBLKL(mp)); 4832 4833 READ_ENTER(&plist->lockrw); 4834 port = vsw_lookup_fdb(vswp, ehp); 4835 if (port) { 4836 /* 4837 * Mark port as in-use. 4838 */ 4839 mutex_enter(&port->ref_lock); 4840 port->ref_cnt++; 4841 mutex_exit(&port->ref_lock); 4842 RW_EXIT(&plist->lockrw); 4843 4844 D2(vswp, "%s: sending to target port", __func__); 4845 (void) vsw_portsend(port, mp); 4846 4847 /* 4848 * Finished with port so decrement ref count and 4849 * check if should wake delete thread. 4850 */ 4851 mutex_enter(&port->ref_lock); 4852 port->ref_cnt--; 4853 if (port->ref_cnt == 0) 4854 cv_signal(&port->ref_cv); 4855 mutex_exit(&port->ref_lock); 4856 } else { 4857 RW_EXIT(&plist->lockrw); 4858 /* 4859 * Destination not in FDB 4860 * 4861 * If the destination is broadcast or 4862 * multicast forward the packet to all 4863 * (VNETPORTs, PHYSDEV, LOCALDEV), 4864 * except the caller. 4865 */ 4866 if (IS_BROADCAST(ehp)) { 4867 D2(vswp, "%s: BROADCAST pkt", __func__); 4868 (void) vsw_forward_all(vswp, mp, 4869 caller, arg); 4870 } else if (IS_MULTICAST(ehp)) { 4871 D2(vswp, "%s: MULTICAST pkt", __func__); 4872 (void) vsw_forward_grp(vswp, mp, 4873 caller, arg); 4874 } else { 4875 /* 4876 * Unicast pkt from vnet that we don't have 4877 * an FDB entry for, so must be destinded for 4878 * the outside world. Attempt to send up to the 4879 * IP layer to allow it to deal with it. 4880 */ 4881 if (caller == VSW_VNETPORT) { 4882 READ_ENTER(&vswp->if_lockrw); 4883 if (vswp->if_state & VSW_IF_UP) { 4884 RW_EXIT(&vswp->if_lockrw); 4885 D2(vswp, "%s: sending up", 4886 __func__); 4887 mac_rx(vswp->if_mh, mrh, mp); 4888 } else { 4889 RW_EXIT(&vswp->if_lockrw); 4890 /* Interface down, drop pkt */ 4891 D2(vswp, "%s I/F down", 4892 __func__); 4893 freemsg(mp); 4894 } 4895 } 4896 } 4897 } 4898 } 4899 4900 D1(vswp, "%s: exit", __func__); 4901 } 4902 4903 /* 4904 * Forward the ethernet frame to all ports (VNETPORTs, PHYSDEV, LOCALDEV), 4905 * except the caller (port on which frame arrived). 4906 */ 4907 static int 4908 vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg) 4909 { 4910 vsw_port_list_t *plist = &vswp->plist; 4911 vsw_port_t *portp; 4912 mblk_t *nmp = NULL; 4913 mblk_t *ret_m = NULL; 4914 int skip_port = 0; 4915 4916 D1(vswp, "vsw_forward_all: enter\n"); 4917 4918 /* 4919 * Broadcast message from inside ldoms so send to outside 4920 * world if in either of layer 2 modes. 4921 */ 4922 if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) || 4923 (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) && 4924 ((caller == VSW_LOCALDEV) || (caller == VSW_VNETPORT))) { 4925 4926 nmp = dupmsg(mp); 4927 if (nmp) { 4928 if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) { 4929 DERR(vswp, "%s: dropping pkt(s) " 4930 "consisting of %ld bytes of data for" 4931 " physical device", __func__, MBLKL(ret_m)); 4932 freemsg(ret_m); 4933 } 4934 } 4935 } 4936 4937 if (caller == VSW_VNETPORT) 4938 skip_port = 1; 4939 4940 /* 4941 * Broadcast message from other vnet (layer 2 or 3) or outside 4942 * world (layer 2 only), send up stack if plumbed. 4943 */ 4944 if ((caller == VSW_PHYSDEV) || (caller == VSW_VNETPORT)) { 4945 READ_ENTER(&vswp->if_lockrw); 4946 if (vswp->if_state & VSW_IF_UP) { 4947 RW_EXIT(&vswp->if_lockrw); 4948 nmp = copymsg(mp); 4949 if (nmp) 4950 mac_rx(vswp->if_mh, NULL, nmp); 4951 } else { 4952 RW_EXIT(&vswp->if_lockrw); 4953 } 4954 } 4955 4956 /* send it to all VNETPORTs */ 4957 READ_ENTER(&plist->lockrw); 4958 for (portp = plist->head; portp != NULL; portp = portp->p_next) { 4959 D2(vswp, "vsw_forward_all: port %d", portp->p_instance); 4960 /* 4961 * Caution ! - don't reorder these two checks as arg 4962 * will be NULL if the caller is PHYSDEV. skip_port is 4963 * only set if caller is VNETPORT. 4964 */ 4965 if ((skip_port) && (portp == arg)) 4966 continue; 4967 else { 4968 nmp = dupmsg(mp); 4969 if (nmp) { 4970 (void) vsw_portsend(portp, nmp); 4971 } else { 4972 DERR(vswp, "vsw_forward_all: nmp NULL"); 4973 } 4974 } 4975 } 4976 RW_EXIT(&plist->lockrw); 4977 4978 freemsg(mp); 4979 4980 D1(vswp, "vsw_forward_all: exit\n"); 4981 return (0); 4982 } 4983 4984 /* 4985 * Forward pkts to any devices or interfaces which have registered 4986 * an interest in them (i.e. multicast groups). 4987 */ 4988 static int 4989 vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg) 4990 { 4991 struct ether_header *ehp = (struct ether_header *)mp->b_rptr; 4992 mfdb_ent_t *entp = NULL; 4993 mfdb_ent_t *tpp = NULL; 4994 vsw_port_t *port; 4995 uint64_t key = 0; 4996 mblk_t *nmp = NULL; 4997 mblk_t *ret_m = NULL; 4998 boolean_t check_if = B_TRUE; 4999 5000 /* 5001 * Convert address to hash table key 5002 */ 5003 KEY_HASH(key, ehp->ether_dhost); 5004 5005 D1(vswp, "%s: key 0x%llx", __func__, key); 5006 5007 /* 5008 * If pkt came from either a vnet or down the stack (if we are 5009 * plumbed) and we are in layer 2 mode, then we send the pkt out 5010 * over the physical adapter, and then check to see if any other 5011 * vnets are interested in it. 5012 */ 5013 if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) || 5014 (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) && 5015 ((caller == VSW_VNETPORT) || (caller == VSW_LOCALDEV))) { 5016 nmp = dupmsg(mp); 5017 if (nmp) { 5018 if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) { 5019 DERR(vswp, "%s: dropping pkt(s) " 5020 "consisting of %ld bytes of " 5021 "data for physical device", 5022 __func__, MBLKL(ret_m)); 5023 freemsg(ret_m); 5024 } 5025 } 5026 } 5027 5028 READ_ENTER(&vswp->mfdbrw); 5029 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)key, 5030 (mod_hash_val_t *)&entp) != 0) { 5031 D3(vswp, "%s: no table entry found for addr 0x%llx", 5032 __func__, key); 5033 } else { 5034 /* 5035 * Send to list of devices associated with this address... 5036 */ 5037 for (tpp = entp; tpp != NULL; tpp = tpp->nextp) { 5038 5039 /* dont send to ourselves */ 5040 if ((caller == VSW_VNETPORT) && 5041 (tpp->d_addr == (void *)arg)) { 5042 port = (vsw_port_t *)tpp->d_addr; 5043 D3(vswp, "%s: not sending to ourselves" 5044 " : port %d", __func__, 5045 port->p_instance); 5046 continue; 5047 5048 } else if ((caller == VSW_LOCALDEV) && 5049 (tpp->d_type == VSW_LOCALDEV)) { 5050 D3(vswp, "%s: not sending back up stack", 5051 __func__); 5052 continue; 5053 } 5054 5055 if (tpp->d_type == VSW_VNETPORT) { 5056 port = (vsw_port_t *)tpp->d_addr; 5057 D3(vswp, "%s: sending to port %ld for " 5058 " addr 0x%llx", __func__, 5059 port->p_instance, key); 5060 5061 nmp = dupmsg(mp); 5062 if (nmp) 5063 (void) vsw_portsend(port, nmp); 5064 } else { 5065 if (vswp->if_state & VSW_IF_UP) { 5066 nmp = copymsg(mp); 5067 if (nmp) 5068 mac_rx(vswp->if_mh, NULL, nmp); 5069 check_if = B_FALSE; 5070 D3(vswp, "%s: sending up stack" 5071 " for addr 0x%llx", __func__, 5072 key); 5073 } 5074 } 5075 } 5076 } 5077 5078 RW_EXIT(&vswp->mfdbrw); 5079 5080 /* 5081 * If the pkt came from either a vnet or from physical device, 5082 * and if we havent already sent the pkt up the stack then we 5083 * check now if we can/should (i.e. the interface is plumbed 5084 * and in promisc mode). 5085 */ 5086 if ((check_if) && 5087 ((caller == VSW_VNETPORT) || (caller == VSW_PHYSDEV))) { 5088 READ_ENTER(&vswp->if_lockrw); 5089 if (VSW_U_P(vswp->if_state)) { 5090 RW_EXIT(&vswp->if_lockrw); 5091 D3(vswp, "%s: (caller %d) finally sending up stack" 5092 " for addr 0x%llx", __func__, caller, key); 5093 nmp = copymsg(mp); 5094 if (nmp) 5095 mac_rx(vswp->if_mh, NULL, nmp); 5096 } else { 5097 RW_EXIT(&vswp->if_lockrw); 5098 } 5099 } 5100 5101 freemsg(mp); 5102 5103 D1(vswp, "%s: exit", __func__); 5104 5105 return (0); 5106 } 5107 5108 /* transmit the packet over the given port */ 5109 static int 5110 vsw_portsend(vsw_port_t *port, mblk_t *mp) 5111 { 5112 vsw_ldc_list_t *ldcl = &port->p_ldclist; 5113 vsw_ldc_t *ldcp; 5114 int status = 0; 5115 5116 5117 READ_ENTER(&ldcl->lockrw); 5118 /* 5119 * Note for now, we have a single channel. 5120 */ 5121 ldcp = ldcl->head; 5122 if (ldcp == NULL) { 5123 DERR(port->p_vswp, "vsw_portsend: no ldc: dropping packet\n"); 5124 freemsg(mp); 5125 RW_EXIT(&ldcl->lockrw); 5126 return (1); 5127 } 5128 5129 /* 5130 * Send the message out using the appropriate 5131 * transmit function which will free mblock when it 5132 * is finished with it. 5133 */ 5134 mutex_enter(&port->tx_lock); 5135 if (port->transmit != NULL) 5136 status = (*port->transmit)(ldcp, mp); 5137 else { 5138 freemsg(mp); 5139 } 5140 mutex_exit(&port->tx_lock); 5141 5142 RW_EXIT(&ldcl->lockrw); 5143 5144 return (status); 5145 } 5146 5147 /* 5148 * Send packet out via descriptor ring to a logical device. 5149 */ 5150 static int 5151 vsw_dringsend(vsw_ldc_t *ldcp, mblk_t *mp) 5152 { 5153 vio_dring_msg_t dring_pkt; 5154 dring_info_t *dp = NULL; 5155 vsw_private_desc_t *priv_desc = NULL; 5156 vsw_t *vswp = ldcp->ldc_vswp; 5157 mblk_t *bp; 5158 size_t n, size; 5159 caddr_t bufp; 5160 int idx; 5161 int status = LDC_TX_SUCCESS; 5162 5163 D1(vswp, "%s(%lld): enter\n", __func__, ldcp->ldc_id); 5164 5165 /* TODO: make test a macro */ 5166 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 5167 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 5168 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 5169 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 5170 ldcp->lane_out.lstate); 5171 freemsg(mp); 5172 return (LDC_TX_FAILURE); 5173 } 5174 5175 /* 5176 * Note - using first ring only, this may change 5177 * in the future. 5178 */ 5179 if ((dp = ldcp->lane_out.dringp) == NULL) { 5180 DERR(vswp, "%s(%lld): no dring for outbound lane on" 5181 " channel %d", __func__, ldcp->ldc_id, ldcp->ldc_id); 5182 freemsg(mp); 5183 return (LDC_TX_FAILURE); 5184 } 5185 5186 mutex_enter(&dp->dlock); 5187 5188 size = msgsize(mp); 5189 if (size > (size_t)ETHERMAX) { 5190 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 5191 ldcp->ldc_id, size); 5192 status = LDC_TX_FAILURE; 5193 goto vsw_dringsend_free_exit; 5194 } 5195 5196 /* 5197 * Find a free descriptor 5198 * 5199 * Note: for the moment we are assuming that we will only 5200 * have one dring going from the switch to each of its 5201 * peers. This may change in the future. 5202 */ 5203 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 5204 DERR(vswp, "%s(%lld): no descriptor available for ring " 5205 "at 0x%llx", __func__, ldcp->ldc_id, dp); 5206 5207 /* nothing more we can do */ 5208 status = LDC_TX_NORESOURCES; 5209 goto vsw_dringsend_free_exit; 5210 } else { 5211 D2(vswp, "%s(%lld): free private descriptor found at pos " 5212 "%ld addr 0x%llx\n", __func__, ldcp->ldc_id, idx, 5213 priv_desc); 5214 } 5215 5216 /* copy data into the descriptor */ 5217 bufp = priv_desc->datap; 5218 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 5219 n = MBLKL(bp); 5220 bcopy(bp->b_rptr, bufp, n); 5221 bufp += n; 5222 } 5223 5224 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 5225 priv_desc->dstate = VIO_DESC_READY; 5226 5227 /* 5228 * Copy relevant sections of private descriptor 5229 * to public section 5230 */ 5231 vsw_dring_priv2pub(priv_desc); 5232 5233 /* 5234 * Send a vio_dring_msg to peer to prompt them to read 5235 * the updated descriptor ring. 5236 */ 5237 dring_pkt.tag.vio_msgtype = VIO_TYPE_DATA; 5238 dring_pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 5239 dring_pkt.tag.vio_subtype_env = VIO_DRING_DATA; 5240 dring_pkt.tag.vio_sid = ldcp->local_session; 5241 5242 /* Note - for now using first ring */ 5243 dring_pkt.dring_ident = dp->ident; 5244 5245 /* 5246 * Access to the seq_num is implicitly protected by the 5247 * fact that we have only one dring associated with the 5248 * lane currently and we hold the associated dring lock. 5249 */ 5250 dring_pkt.seq_num = ldcp->lane_out.seq_num++; 5251 5252 /* Note - only updating single descrip at time at the moment */ 5253 dring_pkt.start_idx = idx; 5254 dring_pkt.end_idx = idx; 5255 5256 D3(vswp, "%s(%lld): dring 0x%llx : ident 0x%llx\n", __func__, 5257 ldcp->ldc_id, dp, dring_pkt.dring_ident); 5258 D3(vswp, "%s(%lld): start %lld : end %lld : seq %lld\n", __func__, 5259 ldcp->ldc_id, dring_pkt.start_idx, dring_pkt.end_idx, 5260 dring_pkt.seq_num); 5261 5262 vsw_send_msg(ldcp, (void *)&dring_pkt, sizeof (vio_dring_msg_t)); 5263 5264 vsw_dringsend_free_exit: 5265 5266 mutex_exit(&dp->dlock); 5267 5268 /* free the message block */ 5269 freemsg(mp); 5270 5271 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 5272 return (status); 5273 } 5274 5275 /* 5276 * Send an in-band descriptor message over ldc. 5277 */ 5278 static int 5279 vsw_descrsend(vsw_ldc_t *ldcp, mblk_t *mp) 5280 { 5281 vsw_t *vswp = ldcp->ldc_vswp; 5282 vio_ibnd_desc_t ibnd_msg; 5283 vsw_private_desc_t *priv_desc = NULL; 5284 dring_info_t *dp = NULL; 5285 size_t n, size = 0; 5286 caddr_t bufp; 5287 mblk_t *bp; 5288 int idx, i; 5289 int status = LDC_TX_SUCCESS; 5290 static int warn_msg = 1; 5291 5292 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 5293 5294 ASSERT(mp != NULL); 5295 5296 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 5297 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 5298 DERR(vswp, "%s(%lld) status(%d) state (0x%llx), dropping pkt", 5299 __func__, ldcp->ldc_id, ldcp->ldc_status, 5300 ldcp->lane_out.lstate); 5301 freemsg(mp); 5302 return (LDC_TX_FAILURE); 5303 } 5304 5305 /* 5306 * only expect single dring to exist, which we use 5307 * as an internal buffer, rather than a transfer channel. 5308 */ 5309 if ((dp = ldcp->lane_out.dringp) == NULL) { 5310 DERR(vswp, "%s(%lld): no dring for outbound lane", 5311 __func__, ldcp->ldc_id); 5312 DERR(vswp, "%s(%lld) status(%d) state (0x%llx)", 5313 __func__, ldcp->ldc_id, ldcp->ldc_status, 5314 ldcp->lane_out.lstate); 5315 freemsg(mp); 5316 return (LDC_TX_FAILURE); 5317 } 5318 5319 mutex_enter(&dp->dlock); 5320 5321 size = msgsize(mp); 5322 if (size > (size_t)ETHERMAX) { 5323 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 5324 ldcp->ldc_id, size); 5325 status = LDC_TX_FAILURE; 5326 goto vsw_descrsend_free_exit; 5327 } 5328 5329 /* 5330 * Find a free descriptor in our buffer ring 5331 */ 5332 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 5333 if (warn_msg) { 5334 DERR(vswp, "%s(%lld): no descriptor available for ring " 5335 "at 0x%llx", __func__, ldcp->ldc_id, dp); 5336 warn_msg = 0; 5337 } 5338 5339 /* nothing more we can do */ 5340 status = LDC_TX_NORESOURCES; 5341 goto vsw_descrsend_free_exit; 5342 } else { 5343 D2(vswp, "%s(%lld): free private descriptor found at pos " 5344 "%ld addr 0x%x\n", __func__, ldcp->ldc_id, idx, 5345 priv_desc); 5346 warn_msg = 1; 5347 } 5348 5349 /* copy data into the descriptor */ 5350 bufp = priv_desc->datap; 5351 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 5352 n = MBLKL(bp); 5353 bcopy(bp->b_rptr, bufp, n); 5354 bufp += n; 5355 } 5356 5357 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 5358 priv_desc->dstate = VIO_DESC_READY; 5359 5360 /* create and send the in-band descp msg */ 5361 ibnd_msg.hdr.tag.vio_msgtype = VIO_TYPE_DATA; 5362 ibnd_msg.hdr.tag.vio_subtype = VIO_SUBTYPE_INFO; 5363 ibnd_msg.hdr.tag.vio_subtype_env = VIO_DESC_DATA; 5364 ibnd_msg.hdr.tag.vio_sid = ldcp->local_session; 5365 5366 /* 5367 * Access to the seq_num is implicitly protected by the 5368 * fact that we have only one dring associated with the 5369 * lane currently and we hold the associated dring lock. 5370 */ 5371 ibnd_msg.hdr.seq_num = ldcp->lane_out.seq_num++; 5372 5373 /* 5374 * Copy the mem cookies describing the data from the 5375 * private region of the descriptor ring into the inband 5376 * descriptor. 5377 */ 5378 for (i = 0; i < priv_desc->ncookies; i++) { 5379 bcopy(&priv_desc->memcookie[i], &ibnd_msg.memcookie[i], 5380 sizeof (ldc_mem_cookie_t)); 5381 } 5382 5383 ibnd_msg.hdr.desc_handle = idx; 5384 ibnd_msg.ncookies = priv_desc->ncookies; 5385 ibnd_msg.nbytes = size; 5386 5387 vsw_send_msg(ldcp, (void *)&ibnd_msg, sizeof (vio_ibnd_desc_t)); 5388 5389 vsw_descrsend_free_exit: 5390 5391 mutex_exit(&dp->dlock); 5392 5393 /* free the allocated message blocks */ 5394 freemsg(mp); 5395 5396 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 5397 return (status); 5398 } 5399 5400 static void 5401 vsw_send_ver(vsw_ldc_t *ldcp) 5402 { 5403 vsw_t *vswp = ldcp->ldc_vswp; 5404 lane_t *lp = &ldcp->lane_out; 5405 vio_ver_msg_t ver_msg; 5406 5407 D1(vswp, "%s enter", __func__); 5408 5409 ver_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 5410 ver_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 5411 ver_msg.tag.vio_subtype_env = VIO_VER_INFO; 5412 ver_msg.tag.vio_sid = ldcp->local_session; 5413 5414 ver_msg.ver_major = vsw_versions[0].ver_major; 5415 ver_msg.ver_minor = vsw_versions[0].ver_minor; 5416 ver_msg.dev_class = VDEV_NETWORK_SWITCH; 5417 5418 lp->lstate |= VSW_VER_INFO_SENT; 5419 lp->ver_major = ver_msg.ver_major; 5420 lp->ver_minor = ver_msg.ver_minor; 5421 5422 DUMP_TAG(ver_msg.tag); 5423 5424 vsw_send_msg(ldcp, &ver_msg, sizeof (vio_ver_msg_t)); 5425 5426 D1(vswp, "%s (%d): exit", __func__, ldcp->ldc_id); 5427 } 5428 5429 static void 5430 vsw_send_attr(vsw_ldc_t *ldcp) 5431 { 5432 vsw_t *vswp = ldcp->ldc_vswp; 5433 lane_t *lp = &ldcp->lane_out; 5434 vnet_attr_msg_t attr_msg; 5435 5436 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 5437 5438 /* 5439 * Subtype is set to INFO by default 5440 */ 5441 attr_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 5442 attr_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 5443 attr_msg.tag.vio_subtype_env = VIO_ATTR_INFO; 5444 attr_msg.tag.vio_sid = ldcp->local_session; 5445 5446 /* payload copied from default settings for lane */ 5447 attr_msg.mtu = lp->mtu; 5448 attr_msg.addr_type = lp->addr_type; 5449 attr_msg.xfer_mode = lp->xfer_mode; 5450 attr_msg.ack_freq = lp->xfer_mode; 5451 5452 READ_ENTER(&vswp->if_lockrw); 5453 bcopy(&(vswp->if_addr), &(attr_msg.addr), ETHERADDRL); 5454 RW_EXIT(&vswp->if_lockrw); 5455 5456 ldcp->lane_out.lstate |= VSW_ATTR_INFO_SENT; 5457 5458 DUMP_TAG(attr_msg.tag); 5459 5460 vsw_send_msg(ldcp, &attr_msg, sizeof (vnet_attr_msg_t)); 5461 5462 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 5463 } 5464 5465 /* 5466 * Create dring info msg (which also results in the creation of 5467 * a dring). 5468 */ 5469 static vio_dring_reg_msg_t * 5470 vsw_create_dring_info_pkt(vsw_ldc_t *ldcp) 5471 { 5472 vio_dring_reg_msg_t *mp; 5473 dring_info_t *dp; 5474 vsw_t *vswp = ldcp->ldc_vswp; 5475 5476 D1(vswp, "vsw_create_dring_info_pkt enter\n"); 5477 5478 /* 5479 * If we can't create a dring, obviously no point sending 5480 * a message. 5481 */ 5482 if ((dp = vsw_create_dring(ldcp)) == NULL) 5483 return (NULL); 5484 5485 mp = kmem_zalloc(sizeof (vio_dring_reg_msg_t), KM_SLEEP); 5486 5487 mp->tag.vio_msgtype = VIO_TYPE_CTRL; 5488 mp->tag.vio_subtype = VIO_SUBTYPE_INFO; 5489 mp->tag.vio_subtype_env = VIO_DRING_REG; 5490 mp->tag.vio_sid = ldcp->local_session; 5491 5492 /* payload */ 5493 mp->num_descriptors = dp->num_descriptors; 5494 mp->descriptor_size = dp->descriptor_size; 5495 mp->options = dp->options; 5496 mp->ncookies = dp->ncookies; 5497 bcopy(&dp->cookie[0], &mp->cookie[0], sizeof (ldc_mem_cookie_t)); 5498 5499 mp->dring_ident = 0; 5500 5501 D1(vswp, "vsw_create_dring_info_pkt exit\n"); 5502 5503 return (mp); 5504 } 5505 5506 static void 5507 vsw_send_dring_info(vsw_ldc_t *ldcp) 5508 { 5509 vio_dring_reg_msg_t *dring_msg; 5510 vsw_t *vswp = ldcp->ldc_vswp; 5511 5512 D1(vswp, "%s: (%ld) enter", __func__, ldcp->ldc_id); 5513 5514 dring_msg = vsw_create_dring_info_pkt(ldcp); 5515 if (dring_msg == NULL) { 5516 cmn_err(CE_WARN, "vsw_send_dring_info: error creating msg"); 5517 return; 5518 } 5519 5520 ldcp->lane_out.lstate |= VSW_DRING_INFO_SENT; 5521 5522 DUMP_TAG_PTR((vio_msg_tag_t *)dring_msg); 5523 5524 vsw_send_msg(ldcp, dring_msg, 5525 sizeof (vio_dring_reg_msg_t)); 5526 5527 kmem_free(dring_msg, sizeof (vio_dring_reg_msg_t)); 5528 5529 D1(vswp, "%s: (%ld) exit", __func__, ldcp->ldc_id); 5530 } 5531 5532 static void 5533 vsw_send_rdx(vsw_ldc_t *ldcp) 5534 { 5535 vsw_t *vswp = ldcp->ldc_vswp; 5536 vio_rdx_msg_t rdx_msg; 5537 5538 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 5539 5540 rdx_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 5541 rdx_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 5542 rdx_msg.tag.vio_subtype_env = VIO_RDX; 5543 rdx_msg.tag.vio_sid = ldcp->local_session; 5544 5545 ldcp->lane_out.lstate |= VSW_RDX_INFO_SENT; 5546 5547 DUMP_TAG(rdx_msg.tag); 5548 5549 vsw_send_msg(ldcp, &rdx_msg, sizeof (vio_rdx_msg_t)); 5550 5551 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 5552 } 5553 5554 /* 5555 * Generic routine to send message out over ldc channel. 5556 */ 5557 static void 5558 vsw_send_msg(vsw_ldc_t *ldcp, void *msgp, int size) 5559 { 5560 int rv; 5561 size_t msglen = size; 5562 vio_msg_tag_t *tag = (vio_msg_tag_t *)msgp; 5563 vsw_t *vswp = ldcp->ldc_vswp; 5564 5565 D1(vswp, "vsw_send_msg (%lld) enter : sending %d bytes", 5566 ldcp->ldc_id, size); 5567 5568 D2(vswp, "send_msg: type 0x%llx", tag->vio_msgtype); 5569 D2(vswp, "send_msg: stype 0x%llx", tag->vio_subtype); 5570 D2(vswp, "send_msg: senv 0x%llx", tag->vio_subtype_env); 5571 5572 mutex_enter(&ldcp->ldc_txlock); 5573 do { 5574 msglen = size; 5575 rv = ldc_write(ldcp->ldc_handle, (caddr_t)msgp, &msglen); 5576 } while (rv == EWOULDBLOCK && --vsw_wretries > 0); 5577 5578 mutex_exit(&ldcp->ldc_txlock); 5579 5580 if ((rv != 0) || (msglen != size)) { 5581 DERR(vswp, "vsw_send_msg:ldc_write failed: chan(%lld) " 5582 "rv(%d) size (%d) msglen(%d)\n", ldcp->ldc_id, 5583 rv, size, msglen); 5584 } 5585 5586 D1(vswp, "vsw_send_msg (%lld) exit : sent %d bytes", 5587 ldcp->ldc_id, msglen); 5588 } 5589 5590 /* 5591 * Add an entry into FDB, for the given mac address and port_id. 5592 * Returns 0 on success, 1 on failure. 5593 * 5594 * Lock protecting FDB must be held by calling process. 5595 */ 5596 static int 5597 vsw_add_fdb(vsw_t *vswp, vsw_port_t *port) 5598 { 5599 uint64_t addr = 0; 5600 5601 D1(vswp, "%s: enter", __func__); 5602 5603 KEY_HASH(addr, port->p_macaddr); 5604 5605 D2(vswp, "%s: key = 0x%llx", __func__, addr); 5606 5607 /* 5608 * Note: duplicate keys will be rejected by mod_hash. 5609 */ 5610 if (mod_hash_insert(vswp->fdb, (mod_hash_key_t)addr, 5611 (mod_hash_val_t)port) != 0) { 5612 DERR(vswp, "%s: unable to add entry into fdb.", __func__); 5613 return (1); 5614 } 5615 5616 D1(vswp, "%s: exit", __func__); 5617 return (0); 5618 } 5619 5620 /* 5621 * Remove an entry from FDB. 5622 * Returns 0 on success, 1 on failure. 5623 */ 5624 static int 5625 vsw_del_fdb(vsw_t *vswp, vsw_port_t *port) 5626 { 5627 uint64_t addr = 0; 5628 5629 D1(vswp, "%s: enter", __func__); 5630 5631 KEY_HASH(addr, port->p_macaddr); 5632 5633 D2(vswp, "%s: key = 0x%llx", __func__, addr); 5634 5635 (void) mod_hash_destroy(vswp->fdb, (mod_hash_val_t)addr); 5636 5637 D1(vswp, "%s: enter", __func__); 5638 5639 return (0); 5640 } 5641 5642 /* 5643 * Search fdb for a given mac address. 5644 * Returns pointer to the entry if found, else returns NULL. 5645 */ 5646 static vsw_port_t * 5647 vsw_lookup_fdb(vsw_t *vswp, struct ether_header *ehp) 5648 { 5649 uint64_t key = 0; 5650 vsw_port_t *port = NULL; 5651 5652 D1(vswp, "%s: enter", __func__); 5653 5654 KEY_HASH(key, ehp->ether_dhost); 5655 5656 D2(vswp, "%s: key = 0x%llx", __func__, key); 5657 5658 if (mod_hash_find(vswp->fdb, (mod_hash_key_t)key, 5659 (mod_hash_val_t *)&port) != 0) { 5660 return (NULL); 5661 } 5662 5663 D1(vswp, "%s: exit", __func__); 5664 5665 return (port); 5666 } 5667 5668 /* 5669 * Add or remove multicast address(es). 5670 * 5671 * Returns 0 on success, 1 on failure. 5672 */ 5673 static int 5674 vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port) 5675 { 5676 mcst_addr_t *mcst_p = NULL; 5677 vsw_t *vswp = port->p_vswp; 5678 uint64_t addr = 0x0; 5679 int i; 5680 5681 D1(vswp, "%s: enter", __func__); 5682 5683 D2(vswp, "%s: %d addresses", __func__, mcst_pkt->count); 5684 5685 for (i = 0; i < mcst_pkt->count; i++) { 5686 /* 5687 * Convert address into form that can be used 5688 * as hash table key. 5689 */ 5690 KEY_HASH(addr, mcst_pkt->mca[i]); 5691 5692 /* 5693 * Add or delete the specified address/port combination. 5694 */ 5695 if (mcst_pkt->set == 0x1) { 5696 D3(vswp, "%s: adding multicast address 0x%llx for " 5697 "port %ld", __func__, addr, port->p_instance); 5698 if (vsw_add_mcst(vswp, VSW_VNETPORT, addr, port) == 0) { 5699 /* 5700 * Update the list of multicast 5701 * addresses contained within the 5702 * port structure to include this new 5703 * one. 5704 */ 5705 mcst_p = kmem_alloc(sizeof (mcst_addr_t), 5706 KM_NOSLEEP); 5707 if (mcst_p == NULL) { 5708 DERR(vswp, "%s: unable to alloc mem", 5709 __func__); 5710 return (1); 5711 } 5712 5713 mcst_p->nextp = NULL; 5714 mcst_p->addr = addr; 5715 5716 mutex_enter(&port->mca_lock); 5717 mcst_p->nextp = port->mcap; 5718 port->mcap = mcst_p; 5719 mutex_exit(&port->mca_lock); 5720 5721 /* 5722 * Program the address into HW. If the addr 5723 * has already been programmed then the MAC 5724 * just increments a ref counter (which is 5725 * used when the address is being deleted) 5726 * 5727 * Note: 5728 * For the moment we dont care if this 5729 * succeeds because the card must be in 5730 * promics mode. When we have the ability 5731 * to program multiple unicst address into 5732 * the card then we will need to check this 5733 * return value. 5734 */ 5735 if (vswp->mh != NULL) 5736 (void) mac_multicst_add(vswp->mh, 5737 (uchar_t *)&mcst_pkt->mca[i]); 5738 5739 } else { 5740 DERR(vswp, "%s: error adding multicast " 5741 "address 0x%llx for port %ld", 5742 __func__, addr, port->p_instance); 5743 return (1); 5744 } 5745 } else { 5746 /* 5747 * Delete an entry from the multicast hash 5748 * table and update the address list 5749 * appropriately. 5750 */ 5751 if (vsw_del_mcst(vswp, VSW_VNETPORT, addr, port) == 0) { 5752 D3(vswp, "%s: deleting multicast address " 5753 "0x%llx for port %ld", __func__, addr, 5754 port->p_instance); 5755 5756 vsw_del_addr(VSW_VNETPORT, port, addr); 5757 5758 /* 5759 * Remove the address from HW. The address 5760 * will actually only be removed once the ref 5761 * count within the MAC layer has dropped to 5762 * zero. I.e. we can safely call this fn even 5763 * if other ports are interested in this 5764 * address. 5765 */ 5766 if (vswp->mh != NULL) 5767 (void) mac_multicst_remove(vswp->mh, 5768 (uchar_t *)&mcst_pkt->mca[i]); 5769 5770 } else { 5771 DERR(vswp, "%s: error deleting multicast " 5772 "addr 0x%llx for port %ld", 5773 __func__, addr, port->p_instance); 5774 return (1); 5775 } 5776 } 5777 } 5778 D1(vswp, "%s: exit", __func__); 5779 return (0); 5780 } 5781 5782 /* 5783 * Add a new multicast entry. 5784 * 5785 * Search hash table based on address. If match found then 5786 * update associated val (which is chain of ports), otherwise 5787 * create new key/val (addr/port) pair and insert into table. 5788 */ 5789 static int 5790 vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg) 5791 { 5792 int dup = 0; 5793 int rv = 0; 5794 mfdb_ent_t *ment = NULL; 5795 mfdb_ent_t *tmp_ent = NULL; 5796 mfdb_ent_t *new_ent = NULL; 5797 void *tgt = NULL; 5798 5799 if (devtype == VSW_VNETPORT) { 5800 /* 5801 * Being invoked from a vnet. 5802 */ 5803 ASSERT(arg != NULL); 5804 tgt = arg; 5805 D2(NULL, "%s: port %d : address 0x%llx", __func__, 5806 ((vsw_port_t *)arg)->p_instance, addr); 5807 } else { 5808 /* 5809 * We are being invoked via the m_multicst mac entry 5810 * point. 5811 */ 5812 D2(NULL, "%s: address 0x%llx", __func__, addr); 5813 tgt = (void *)vswp; 5814 } 5815 5816 WRITE_ENTER(&vswp->mfdbrw); 5817 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr, 5818 (mod_hash_val_t *)&ment) != 0) { 5819 5820 /* address not currently in table */ 5821 ment = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP); 5822 ment->d_addr = (void *)tgt; 5823 ment->d_type = devtype; 5824 ment->nextp = NULL; 5825 5826 if (mod_hash_insert(vswp->mfdb, (mod_hash_key_t)addr, 5827 (mod_hash_val_t)ment) != 0) { 5828 DERR(vswp, "%s: hash table insertion failed", __func__); 5829 kmem_free(ment, sizeof (mfdb_ent_t)); 5830 rv = 1; 5831 } else { 5832 D2(vswp, "%s: added initial entry for 0x%llx to " 5833 "table", __func__, addr); 5834 } 5835 } else { 5836 /* 5837 * Address in table. Check to see if specified port 5838 * is already associated with the address. If not add 5839 * it now. 5840 */ 5841 tmp_ent = ment; 5842 while (tmp_ent != NULL) { 5843 if (tmp_ent->d_addr == (void *)tgt) { 5844 if (devtype == VSW_VNETPORT) { 5845 DERR(vswp, "%s: duplicate port entry " 5846 "found for portid %ld and key " 5847 "0x%llx", __func__, 5848 ((vsw_port_t *)arg)->p_instance, 5849 addr); 5850 } else { 5851 DERR(vswp, "%s: duplicate entry found" 5852 "for key 0x%llx", 5853 __func__, addr); 5854 } 5855 rv = 1; 5856 dup = 1; 5857 break; 5858 } 5859 tmp_ent = tmp_ent->nextp; 5860 } 5861 5862 /* 5863 * Port not on list so add it to end now. 5864 */ 5865 if (0 == dup) { 5866 D2(vswp, "%s: added entry for 0x%llx to table", 5867 __func__, addr); 5868 new_ent = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP); 5869 new_ent->d_addr = (void *)tgt; 5870 new_ent->d_type = devtype; 5871 new_ent->nextp = NULL; 5872 5873 tmp_ent = ment; 5874 while (tmp_ent->nextp != NULL) 5875 tmp_ent = tmp_ent->nextp; 5876 5877 tmp_ent->nextp = new_ent; 5878 } 5879 } 5880 5881 RW_EXIT(&vswp->mfdbrw); 5882 return (rv); 5883 } 5884 5885 /* 5886 * Remove a multicast entry from the hashtable. 5887 * 5888 * Search hash table based on address. If match found, scan 5889 * list of ports associated with address. If specified port 5890 * found remove it from list. 5891 */ 5892 static int 5893 vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg) 5894 { 5895 mfdb_ent_t *ment = NULL; 5896 mfdb_ent_t *curr_p, *prev_p; 5897 void *tgt = NULL; 5898 5899 D1(vswp, "%s: enter", __func__); 5900 5901 if (devtype == VSW_VNETPORT) { 5902 tgt = (vsw_port_t *)arg; 5903 D2(vswp, "%s: removing port %d from mFDB for address" 5904 " 0x%llx", __func__, ((vsw_port_t *)tgt)->p_instance, 5905 addr); 5906 } else { 5907 D2(vswp, "%s: removing entry", __func__); 5908 tgt = (void *)vswp; 5909 } 5910 5911 WRITE_ENTER(&vswp->mfdbrw); 5912 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr, 5913 (mod_hash_val_t *)&ment) != 0) { 5914 D2(vswp, "%s: address 0x%llx not in table", __func__, addr); 5915 RW_EXIT(&vswp->mfdbrw); 5916 return (1); 5917 } 5918 5919 prev_p = curr_p = ment; 5920 5921 while (curr_p != NULL) { 5922 if (curr_p->d_addr == (void *)tgt) { 5923 if (devtype == VSW_VNETPORT) { 5924 D2(vswp, "%s: port %d found", __func__, 5925 ((vsw_port_t *)tgt)->p_instance); 5926 } else { 5927 D2(vswp, "%s: instance found", __func__); 5928 } 5929 5930 if (prev_p == curr_p) { 5931 /* 5932 * head of list, if no other element is in 5933 * list then destroy this entry, otherwise 5934 * just replace it with updated value. 5935 */ 5936 ment = curr_p->nextp; 5937 kmem_free(curr_p, sizeof (mfdb_ent_t)); 5938 if (ment == NULL) { 5939 (void) mod_hash_destroy(vswp->mfdb, 5940 (mod_hash_val_t)addr); 5941 } else { 5942 (void) mod_hash_replace(vswp->mfdb, 5943 (mod_hash_key_t)addr, 5944 (mod_hash_val_t)ment); 5945 } 5946 } else { 5947 /* 5948 * Not head of list, no need to do 5949 * replacement, just adjust list pointers. 5950 */ 5951 prev_p->nextp = curr_p->nextp; 5952 kmem_free(curr_p, sizeof (mfdb_ent_t)); 5953 } 5954 break; 5955 } 5956 5957 prev_p = curr_p; 5958 curr_p = curr_p->nextp; 5959 } 5960 5961 RW_EXIT(&vswp->mfdbrw); 5962 5963 D1(vswp, "%s: exit", __func__); 5964 5965 return (0); 5966 } 5967 5968 /* 5969 * Port is being deleted, but has registered an interest in one 5970 * or more multicast groups. Using the list of addresses maintained 5971 * within the port structure find the appropriate entry in the hash 5972 * table and remove this port from the list of interested ports. 5973 */ 5974 static void 5975 vsw_del_mcst_port(vsw_port_t *port) 5976 { 5977 mcst_addr_t *mcst_p = NULL; 5978 vsw_t *vswp = port->p_vswp; 5979 5980 D1(vswp, "%s: enter", __func__); 5981 5982 mutex_enter(&port->mca_lock); 5983 while (port->mcap != NULL) { 5984 (void) vsw_del_mcst(vswp, VSW_VNETPORT, 5985 port->mcap->addr, port); 5986 5987 mcst_p = port->mcap->nextp; 5988 kmem_free(port->mcap, sizeof (mcst_addr_t)); 5989 port->mcap = mcst_p; 5990 } 5991 mutex_exit(&port->mca_lock); 5992 5993 D1(vswp, "%s: exit", __func__); 5994 } 5995 5996 /* 5997 * This vsw instance is detaching, but has registered an interest in one 5998 * or more multicast groups. Using the list of addresses maintained 5999 * within the vsw structure find the appropriate entry in the hash 6000 * table and remove this instance from the list of interested ports. 6001 */ 6002 static void 6003 vsw_del_mcst_vsw(vsw_t *vswp) 6004 { 6005 mcst_addr_t *next_p = NULL; 6006 6007 D1(vswp, "%s: enter", __func__); 6008 6009 mutex_enter(&vswp->mca_lock); 6010 6011 while (vswp->mcap != NULL) { 6012 DERR(vswp, "%s: deleting addr 0x%llx", 6013 __func__, vswp->mcap->addr); 6014 (void) vsw_del_mcst(vswp, VSW_LOCALDEV, 6015 vswp->mcap->addr, NULL); 6016 6017 next_p = vswp->mcap->nextp; 6018 kmem_free(vswp->mcap, sizeof (mcst_addr_t)); 6019 vswp->mcap = next_p; 6020 } 6021 6022 vswp->mcap = NULL; 6023 mutex_exit(&vswp->mca_lock); 6024 6025 D1(vswp, "%s: exit", __func__); 6026 } 6027 6028 6029 /* 6030 * Remove the specified address from the list of address maintained 6031 * in this port node. 6032 */ 6033 static void 6034 vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr) 6035 { 6036 vsw_t *vswp = NULL; 6037 vsw_port_t *port = NULL; 6038 mcst_addr_t *prev_p = NULL; 6039 mcst_addr_t *curr_p = NULL; 6040 6041 D1(NULL, "%s: enter : devtype %d : addr 0x%llx", 6042 __func__, devtype, addr); 6043 6044 if (devtype == VSW_VNETPORT) { 6045 port = (vsw_port_t *)arg; 6046 mutex_enter(&port->mca_lock); 6047 prev_p = curr_p = port->mcap; 6048 } else { 6049 vswp = (vsw_t *)arg; 6050 mutex_enter(&vswp->mca_lock); 6051 prev_p = curr_p = vswp->mcap; 6052 } 6053 6054 while (curr_p != NULL) { 6055 if (curr_p->addr == addr) { 6056 D2(NULL, "%s: address found", __func__); 6057 /* match found */ 6058 if (prev_p == curr_p) { 6059 /* list head */ 6060 if (devtype == VSW_VNETPORT) 6061 port->mcap = curr_p->nextp; 6062 else 6063 vswp->mcap = curr_p->nextp; 6064 } else { 6065 prev_p->nextp = curr_p->nextp; 6066 } 6067 kmem_free(curr_p, sizeof (mcst_addr_t)); 6068 break; 6069 } else { 6070 prev_p = curr_p; 6071 curr_p = curr_p->nextp; 6072 } 6073 } 6074 6075 if (devtype == VSW_VNETPORT) 6076 mutex_exit(&port->mca_lock); 6077 else 6078 mutex_exit(&vswp->mca_lock); 6079 6080 D1(NULL, "%s: exit", __func__); 6081 } 6082 6083 /* 6084 * Creates a descriptor ring (dring) and links it into the 6085 * link of outbound drings for this channel. 6086 * 6087 * Returns NULL if creation failed. 6088 */ 6089 static dring_info_t * 6090 vsw_create_dring(vsw_ldc_t *ldcp) 6091 { 6092 vsw_private_desc_t *priv_addr = NULL; 6093 vsw_t *vswp = ldcp->ldc_vswp; 6094 ldc_mem_info_t minfo; 6095 dring_info_t *dp, *tp; 6096 int i; 6097 6098 dp = (dring_info_t *)kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 6099 6100 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 6101 6102 /* create public section of ring */ 6103 if ((ldc_mem_dring_create(VSW_RING_NUM_EL, 6104 VSW_PUB_SIZE, &dp->handle)) != 0) { 6105 6106 DERR(vswp, "vsw_create_dring(%lld): ldc dring create " 6107 "failed", ldcp->ldc_id); 6108 goto create_fail_exit; 6109 } 6110 6111 ASSERT(dp->handle != NULL); 6112 6113 /* 6114 * Get the base address of the public section of the ring. 6115 */ 6116 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 6117 DERR(vswp, "vsw_create_dring(%lld): dring info failed\n", 6118 ldcp->ldc_id); 6119 goto dring_fail_exit; 6120 } else { 6121 ASSERT(minfo.vaddr != 0); 6122 dp->pub_addr = minfo.vaddr; 6123 } 6124 6125 dp->num_descriptors = VSW_RING_NUM_EL; 6126 dp->descriptor_size = VSW_PUB_SIZE; 6127 dp->options = VIO_TX_DRING; 6128 dp->ncookies = 1; /* guaranteed by ldc */ 6129 6130 /* 6131 * create private portion of ring 6132 */ 6133 dp->priv_addr = (vsw_private_desc_t *)kmem_zalloc( 6134 (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL), KM_SLEEP); 6135 6136 if (vsw_setup_ring(ldcp, dp)) { 6137 DERR(vswp, "%s: unable to setup ring", __func__); 6138 goto dring_fail_exit; 6139 } 6140 6141 /* haven't used any descriptors yet */ 6142 dp->end_idx = 0; 6143 6144 /* bind dring to the channel */ 6145 if ((ldc_mem_dring_bind(ldcp->ldc_handle, dp->handle, 6146 LDC_SHADOW_MAP, LDC_MEM_RW, 6147 &dp->cookie[0], &dp->ncookies)) != 0) { 6148 DERR(vswp, "vsw_create_dring: unable to bind to channel " 6149 "%lld", ldcp->ldc_id); 6150 goto dring_fail_exit; 6151 } 6152 6153 /* 6154 * Only ever create rings for outgoing lane. Link it onto 6155 * end of list. 6156 */ 6157 if (ldcp->lane_out.dringp == NULL) { 6158 D2(vswp, "vsw_create_dring: adding first outbound ring"); 6159 ldcp->lane_out.dringp = dp; 6160 } else { 6161 tp = ldcp->lane_out.dringp; 6162 while (tp->next != NULL) 6163 tp = tp->next; 6164 6165 tp->next = dp; 6166 } 6167 6168 return (dp); 6169 6170 dring_fail_exit: 6171 (void) ldc_mem_dring_destroy(dp->handle); 6172 6173 create_fail_exit: 6174 if (dp->priv_addr != NULL) { 6175 priv_addr = dp->priv_addr; 6176 for (i = 0; i < VSW_RING_NUM_EL; i++) { 6177 if (priv_addr->memhandle != NULL) 6178 (void) ldc_mem_free_handle( 6179 priv_addr->memhandle); 6180 priv_addr++; 6181 } 6182 kmem_free(dp->priv_addr, 6183 (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL)); 6184 } 6185 mutex_destroy(&dp->dlock); 6186 6187 kmem_free(dp, sizeof (dring_info_t)); 6188 return (NULL); 6189 } 6190 6191 /* 6192 * Create a ring consisting of just a private portion and link 6193 * it into the list of rings for the outbound lane. 6194 * 6195 * These type of rings are used primarily for temporary data 6196 * storage (i.e. as data buffers). 6197 */ 6198 void 6199 vsw_create_privring(vsw_ldc_t *ldcp) 6200 { 6201 dring_info_t *dp, *tp; 6202 vsw_t *vswp = ldcp->ldc_vswp; 6203 6204 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 6205 6206 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 6207 6208 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 6209 6210 /* no public section */ 6211 dp->pub_addr = NULL; 6212 6213 dp->priv_addr = kmem_zalloc((sizeof (vsw_private_desc_t) * 6214 VSW_RING_NUM_EL), KM_SLEEP); 6215 6216 if (vsw_setup_ring(ldcp, dp)) { 6217 DERR(vswp, "%s: setup of ring failed", __func__); 6218 kmem_free(dp->priv_addr, 6219 (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL)); 6220 mutex_destroy(&dp->dlock); 6221 kmem_free(dp, sizeof (dring_info_t)); 6222 return; 6223 } 6224 6225 /* haven't used any descriptors yet */ 6226 dp->end_idx = 0; 6227 6228 /* 6229 * Only ever create rings for outgoing lane. Link it onto 6230 * end of list. 6231 */ 6232 if (ldcp->lane_out.dringp == NULL) { 6233 D2(vswp, "%s: adding first outbound privring", __func__); 6234 ldcp->lane_out.dringp = dp; 6235 } else { 6236 tp = ldcp->lane_out.dringp; 6237 while (tp->next != NULL) 6238 tp = tp->next; 6239 6240 tp->next = dp; 6241 } 6242 6243 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 6244 } 6245 6246 /* 6247 * Setup the descriptors in the dring. Returns 0 on success, 1 on 6248 * failure. 6249 */ 6250 int 6251 vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp) 6252 { 6253 vnet_public_desc_t *pub_addr = NULL; 6254 vsw_private_desc_t *priv_addr = NULL; 6255 vsw_t *vswp = ldcp->ldc_vswp; 6256 uint64_t *tmpp; 6257 uint64_t offset = 0; 6258 uint32_t ncookies = 0; 6259 static char *name = "vsw_setup_ring"; 6260 int i, j, rv; 6261 6262 /* note - public section may be null */ 6263 priv_addr = dp->priv_addr; 6264 pub_addr = dp->pub_addr; 6265 6266 /* 6267 * Allocate the region of memory which will be used to hold 6268 * the data the descriptors will refer to. 6269 */ 6270 dp->data_sz = (VSW_RING_NUM_EL * VSW_RING_EL_DATA_SZ); 6271 dp->data_addr = kmem_alloc(dp->data_sz, KM_SLEEP); 6272 6273 D2(vswp, "%s: allocated %lld bytes at 0x%llx\n", name, 6274 dp->data_sz, dp->data_addr); 6275 6276 tmpp = (uint64_t *)dp->data_addr; 6277 offset = VSW_RING_EL_DATA_SZ / sizeof (tmpp); 6278 6279 /* 6280 * Initialise some of the private and public (if they exist) 6281 * descriptor fields. 6282 */ 6283 for (i = 0; i < VSW_RING_NUM_EL; i++) { 6284 if ((ldc_mem_alloc_handle(ldcp->ldc_handle, 6285 &priv_addr->memhandle)) != 0) { 6286 DERR(vswp, "%s: alloc mem handle failed", name); 6287 goto setup_ring_cleanup; 6288 } 6289 6290 priv_addr->datap = (void *)tmpp; 6291 6292 rv = ldc_mem_bind_handle(priv_addr->memhandle, 6293 (caddr_t)priv_addr->datap, VSW_RING_EL_DATA_SZ, 6294 LDC_SHADOW_MAP, LDC_MEM_R|LDC_MEM_W, 6295 &(priv_addr->memcookie[0]), &ncookies); 6296 if (rv != 0) { 6297 DERR(vswp, "%s(%lld): ldc_mem_bind_handle failed " 6298 "(rv %d)", name, ldcp->ldc_id, rv); 6299 goto setup_ring_cleanup; 6300 } 6301 priv_addr->bound = 1; 6302 6303 D2(vswp, "%s: %d: memcookie 0 : addr 0x%llx : size 0x%llx", 6304 name, i, priv_addr->memcookie[0].addr, 6305 priv_addr->memcookie[0].size); 6306 6307 if (ncookies >= (uint32_t)(VSW_MAX_COOKIES + 1)) { 6308 DERR(vswp, "%s(%lld) ldc_mem_bind_handle returned " 6309 "invalid num of cookies (%d) for size 0x%llx", 6310 name, ldcp->ldc_id, ncookies, 6311 VSW_RING_EL_DATA_SZ); 6312 6313 goto setup_ring_cleanup; 6314 } else { 6315 for (j = 1; j < ncookies; j++) { 6316 rv = ldc_mem_nextcookie(priv_addr->memhandle, 6317 &(priv_addr->memcookie[j])); 6318 if (rv != 0) { 6319 DERR(vswp, "%s: ldc_mem_nextcookie " 6320 "failed rv (%d)", name, rv); 6321 goto setup_ring_cleanup; 6322 } 6323 D3(vswp, "%s: memcookie %d : addr 0x%llx : " 6324 "size 0x%llx", name, j, 6325 priv_addr->memcookie[j].addr, 6326 priv_addr->memcookie[j].size); 6327 } 6328 6329 } 6330 priv_addr->ncookies = ncookies; 6331 priv_addr->dstate = VIO_DESC_FREE; 6332 6333 if (pub_addr != NULL) { 6334 6335 /* link pub and private sides */ 6336 priv_addr->descp = pub_addr; 6337 6338 pub_addr->hdr.dstate = VIO_DESC_FREE; 6339 pub_addr++; 6340 } 6341 6342 /* 6343 * move to next element in the dring and the next 6344 * position in the data buffer. 6345 */ 6346 priv_addr++; 6347 tmpp += offset; 6348 } 6349 6350 return (0); 6351 6352 setup_ring_cleanup: 6353 priv_addr = dp->priv_addr; 6354 6355 for (i = 0; i < VSW_RING_NUM_EL; i++) { 6356 (void) ldc_mem_unbind_handle(priv_addr->memhandle); 6357 (void) ldc_mem_free_handle(priv_addr->memhandle); 6358 6359 priv_addr++; 6360 } 6361 kmem_free(dp->data_addr, dp->data_sz); 6362 6363 return (1); 6364 } 6365 6366 /* 6367 * Searches the private section of a ring for a free descriptor, 6368 * starting at the location of the last free descriptor found 6369 * previously. 6370 * 6371 * Returns 0 if free descriptor is available, 1 otherwise. 6372 * 6373 * FUTURE: might need to return contiguous range of descriptors 6374 * as dring info msg assumes all will be contiguous. 6375 */ 6376 static int 6377 vsw_dring_find_free_desc(dring_info_t *dringp, 6378 vsw_private_desc_t **priv_p, int *idx) 6379 { 6380 vsw_private_desc_t *addr; 6381 uint64_t i; 6382 uint64_t j = 0; 6383 uint64_t start = dringp->end_idx; 6384 int num = VSW_RING_NUM_EL; 6385 int ret = 1; 6386 6387 D1(NULL, "%s enter\n", __func__); 6388 6389 addr = dringp->priv_addr; 6390 6391 D2(NULL, "%s: searching ring, dringp 0x%llx : start pos %lld", 6392 __func__, dringp, start); 6393 6394 for (i = start; j < num; i = (i + 1) % num, j++) { 6395 addr = (vsw_private_desc_t *)dringp->priv_addr + i; 6396 D2(NULL, "%s: descriptor %lld : dstate 0x%llx\n", 6397 __func__, i, addr->dstate); 6398 if (addr->dstate == VIO_DESC_FREE) { 6399 D2(NULL, "%s: descriptor %lld is available", 6400 __func__, i); 6401 *priv_p = addr; 6402 *idx = i; 6403 dringp->end_idx = (i + 1) % num; 6404 ret = 0; 6405 break; 6406 } 6407 } 6408 6409 /* ring full */ 6410 if (ret == 1) { 6411 D2(NULL, "%s: no desp free: started at %d", __func__, start); 6412 } 6413 6414 D1(NULL, "%s: exit\n", __func__); 6415 6416 return (ret); 6417 } 6418 6419 /* 6420 * Copy relevant fields from the private descriptor into the 6421 * associated public side. 6422 */ 6423 static void 6424 vsw_dring_priv2pub(vsw_private_desc_t *priv) 6425 { 6426 vnet_public_desc_t *pub; 6427 int i; 6428 6429 D1(NULL, "vsw_dring_priv2pub enter\n"); 6430 6431 pub = priv->descp; 6432 6433 pub->ncookies = priv->ncookies; 6434 pub->nbytes = priv->datalen; 6435 6436 for (i = 0; i < pub->ncookies; i++) { 6437 bcopy(&priv->memcookie[i], &pub->memcookie[i], 6438 sizeof (ldc_mem_cookie_t)); 6439 } 6440 6441 pub->hdr.ack = 1; 6442 pub->hdr.dstate = VIO_DESC_READY; 6443 6444 D1(NULL, "vsw_dring_priv2pub exit"); 6445 } 6446 6447 /* 6448 * Map from a dring identifier to the ring itself. Returns 6449 * pointer to ring or NULL if no match found. 6450 */ 6451 static dring_info_t * 6452 vsw_ident2dring(lane_t *lane, uint64_t ident) 6453 { 6454 dring_info_t *dp = NULL; 6455 6456 if ((dp = lane->dringp) == NULL) { 6457 return (NULL); 6458 } else { 6459 if (dp->ident == ident) 6460 return (dp); 6461 6462 while (dp != NULL) { 6463 if (dp->ident == ident) 6464 break; 6465 dp = dp->next; 6466 } 6467 } 6468 6469 return (dp); 6470 } 6471 6472 /* 6473 * Set the default lane attributes. These are copied into 6474 * the attr msg we send to our peer. If they are not acceptable 6475 * then (currently) the handshake ends. 6476 */ 6477 static void 6478 vsw_set_lane_attr(vsw_t *vswp, lane_t *lp) 6479 { 6480 bzero(lp, sizeof (lane_t)); 6481 6482 READ_ENTER(&vswp->if_lockrw); 6483 ether_copy(&(vswp->if_addr), &(lp->addr)); 6484 RW_EXIT(&vswp->if_lockrw); 6485 6486 lp->mtu = VSW_MTU; 6487 lp->addr_type = ADDR_TYPE_MAC; 6488 lp->xfer_mode = VIO_DRING_MODE; 6489 lp->ack_freq = 0; /* for shared mode */ 6490 lp->seq_num = VNET_ISS; 6491 } 6492 6493 /* 6494 * Verify that the attributes are acceptable. 6495 * 6496 * FUTURE: If some attributes are not acceptable, change them 6497 * our desired values. 6498 */ 6499 static int 6500 vsw_check_attr(vnet_attr_msg_t *pkt, vsw_port_t *port) 6501 { 6502 int ret = 0; 6503 6504 D1(NULL, "vsw_check_attr enter\n"); 6505 6506 /* 6507 * Note we currently only support in-band descriptors 6508 * and descriptor rings, not packet based transfer (VIO_PKT_MODE) 6509 */ 6510 if ((pkt->xfer_mode != VIO_DESC_MODE) && 6511 (pkt->xfer_mode != VIO_DRING_MODE)) { 6512 D2(NULL, "vsw_check_attr: unknown mode %x\n", 6513 pkt->xfer_mode); 6514 ret = 1; 6515 } 6516 6517 /* Only support MAC addresses at moment. */ 6518 if ((pkt->addr_type != ADDR_TYPE_MAC) || (pkt->addr == 0)) { 6519 D2(NULL, "vsw_check_attr: invalid addr_type %x, " 6520 "or address 0x%llx\n", pkt->addr_type, 6521 pkt->addr); 6522 ret = 1; 6523 } 6524 6525 /* 6526 * MAC address supplied by device should match that stored 6527 * in the vsw-port OBP node. Need to decide what to do if they 6528 * don't match, for the moment just warn but don't fail. 6529 */ 6530 if (bcmp(&pkt->addr, &port->p_macaddr, ETHERADDRL) != 0) { 6531 DERR(NULL, "vsw_check_attr: device supplied address " 6532 "0x%llx doesn't match node address 0x%llx\n", 6533 pkt->addr, port->p_macaddr); 6534 } 6535 6536 /* 6537 * Ack freq only makes sense in pkt mode, in shared 6538 * mode the ring descriptors say whether or not to 6539 * send back an ACK. 6540 */ 6541 if ((pkt->xfer_mode == VIO_DRING_MODE) && 6542 (pkt->ack_freq > 0)) { 6543 D2(NULL, "vsw_check_attr: non zero ack freq " 6544 " in SHM mode\n"); 6545 ret = 1; 6546 } 6547 6548 /* 6549 * Note: for the moment we only support ETHER 6550 * frames. This may change in the future. 6551 */ 6552 if ((pkt->mtu > VSW_MTU) || (pkt->mtu <= 0)) { 6553 D2(NULL, "vsw_check_attr: invalid MTU (0x%llx)\n", 6554 pkt->mtu); 6555 ret = 1; 6556 } 6557 6558 D1(NULL, "vsw_check_attr exit\n"); 6559 6560 return (ret); 6561 } 6562 6563 /* 6564 * Returns 1 if there is a problem, 0 otherwise. 6565 */ 6566 static int 6567 vsw_check_dring_info(vio_dring_reg_msg_t *pkt) 6568 { 6569 _NOTE(ARGUNUSED(pkt)) 6570 6571 int ret = 0; 6572 6573 D1(NULL, "vsw_check_dring_info enter\n"); 6574 6575 if ((pkt->num_descriptors == 0) || 6576 (pkt->descriptor_size == 0) || 6577 (pkt->ncookies != 1)) { 6578 DERR(NULL, "vsw_check_dring_info: invalid dring msg"); 6579 ret = 1; 6580 } 6581 6582 D1(NULL, "vsw_check_dring_info exit\n"); 6583 6584 return (ret); 6585 } 6586 6587 /* 6588 * Returns 1 if two memory cookies match. Otherwise returns 0. 6589 */ 6590 static int 6591 vsw_mem_cookie_match(ldc_mem_cookie_t *m1, ldc_mem_cookie_t *m2) 6592 { 6593 if ((m1->addr != m2->addr) || 6594 (m2->size != m2->size)) { 6595 return (0); 6596 } else { 6597 return (1); 6598 } 6599 } 6600 6601 /* 6602 * Returns 1 if ring described in reg message matches that 6603 * described by dring_info structure. Otherwise returns 0. 6604 */ 6605 static int 6606 vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg) 6607 { 6608 if ((msg->descriptor_size != dp->descriptor_size) || 6609 (msg->num_descriptors != dp->num_descriptors) || 6610 (msg->ncookies != dp->ncookies) || 6611 !(vsw_mem_cookie_match(&msg->cookie[0], &dp->cookie[0]))) { 6612 return (0); 6613 } else { 6614 return (1); 6615 } 6616 6617 } 6618 6619 static caddr_t 6620 vsw_print_ethaddr(uint8_t *a, char *ebuf) 6621 { 6622 (void) sprintf(ebuf, "%x:%x:%x:%x:%x:%x", 6623 a[0], a[1], a[2], a[3], a[4], a[5]); 6624 return (ebuf); 6625 } 6626 6627 /* 6628 * Reset and free all the resources associated with 6629 * the channel. 6630 */ 6631 static void 6632 vsw_free_lane_resources(vsw_ldc_t *ldcp, uint64_t dir) 6633 { 6634 dring_info_t *dp, *dpp; 6635 lane_t *lp = NULL; 6636 int rv = 0; 6637 6638 ASSERT(ldcp != NULL); 6639 6640 D1(ldcp->ldc_vswp, "%s (%lld): enter", __func__, ldcp->ldc_id); 6641 6642 if (dir == INBOUND) { 6643 D2(ldcp->ldc_vswp, "%s: freeing INBOUND lane" 6644 " of channel %lld", __func__, ldcp->ldc_id); 6645 lp = &ldcp->lane_in; 6646 } else { 6647 D2(ldcp->ldc_vswp, "%s: freeing OUTBOUND lane" 6648 " of channel %lld", __func__, ldcp->ldc_id); 6649 lp = &ldcp->lane_out; 6650 } 6651 6652 lp->lstate = VSW_LANE_INACTIV; 6653 lp->seq_num = VNET_ISS; 6654 if (lp->dringp) { 6655 if (dir == INBOUND) { 6656 dp = lp->dringp; 6657 while (dp != NULL) { 6658 dpp = dp->next; 6659 if (dp->handle != NULL) 6660 (void) ldc_mem_dring_unmap(dp->handle); 6661 kmem_free(dp, sizeof (dring_info_t)); 6662 dp = dpp; 6663 } 6664 } else { 6665 /* 6666 * unbind, destroy exported dring, free dring struct 6667 */ 6668 dp = lp->dringp; 6669 rv = vsw_free_ring(dp); 6670 } 6671 if (rv == 0) { 6672 lp->dringp = NULL; 6673 } 6674 } 6675 6676 D1(ldcp->ldc_vswp, "%s (%lld): exit", __func__, ldcp->ldc_id); 6677 } 6678 6679 /* 6680 * Free ring and all associated resources. 6681 */ 6682 static int 6683 vsw_free_ring(dring_info_t *dp) 6684 { 6685 vsw_private_desc_t *paddr = NULL; 6686 dring_info_t *dpp; 6687 int i, rv = 1; 6688 6689 while (dp != NULL) { 6690 mutex_enter(&dp->dlock); 6691 dpp = dp->next; 6692 if (dp->priv_addr != NULL) { 6693 /* 6694 * First unbind and free the memory handles 6695 * stored in each descriptor within the ring. 6696 */ 6697 for (i = 0; i < VSW_RING_NUM_EL; i++) { 6698 paddr = (vsw_private_desc_t *) 6699 dp->priv_addr + i; 6700 if (paddr->memhandle != NULL) { 6701 if (paddr->bound == 1) { 6702 rv = ldc_mem_unbind_handle( 6703 paddr->memhandle); 6704 6705 if (rv != 0) { 6706 DERR(NULL, "error " 6707 "unbinding handle for " 6708 "ring 0x%llx at pos %d", 6709 dp, i); 6710 mutex_exit(&dp->dlock); 6711 return (rv); 6712 } 6713 paddr->bound = 0; 6714 } 6715 6716 rv = ldc_mem_free_handle( 6717 paddr->memhandle); 6718 if (rv != 0) { 6719 DERR(NULL, "error freeing " 6720 "handle for ring " 6721 "0x%llx at pos %d", 6722 dp, i); 6723 mutex_exit(&dp->dlock); 6724 return (rv); 6725 } 6726 paddr->memhandle = NULL; 6727 } 6728 } 6729 kmem_free(dp->priv_addr, (sizeof (vsw_private_desc_t) 6730 * VSW_RING_NUM_EL)); 6731 } 6732 6733 /* 6734 * Now unbind and destroy the ring itself. 6735 */ 6736 if (dp->handle != NULL) { 6737 (void) ldc_mem_dring_unbind(dp->handle); 6738 (void) ldc_mem_dring_destroy(dp->handle); 6739 } 6740 6741 if (dp->data_addr != NULL) { 6742 kmem_free(dp->data_addr, dp->data_sz); 6743 } 6744 6745 mutex_exit(&dp->dlock); 6746 mutex_destroy(&dp->dlock); 6747 kmem_free(dp, sizeof (dring_info_t)); 6748 6749 dp = dpp; 6750 } 6751 return (0); 6752 } 6753 6754 /* 6755 * Debugging routines 6756 */ 6757 static void 6758 display_state(void) 6759 { 6760 vsw_t *vswp; 6761 vsw_port_list_t *plist; 6762 vsw_port_t *port; 6763 vsw_ldc_list_t *ldcl; 6764 vsw_ldc_t *ldcp; 6765 6766 cmn_err(CE_NOTE, "***** system state *****"); 6767 6768 for (vswp = vsw_head; vswp; vswp = vswp->next) { 6769 plist = &vswp->plist; 6770 READ_ENTER(&plist->lockrw); 6771 cmn_err(CE_CONT, "vsw instance %d has %d ports attached\n", 6772 vswp->instance, plist->num_ports); 6773 6774 for (port = plist->head; port != NULL; port = port->p_next) { 6775 ldcl = &port->p_ldclist; 6776 cmn_err(CE_CONT, "port %d : %d ldcs attached\n", 6777 port->p_instance, ldcl->num_ldcs); 6778 READ_ENTER(&ldcl->lockrw); 6779 ldcp = ldcl->head; 6780 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 6781 cmn_err(CE_CONT, "chan %lu : dev %d : " 6782 "status %d : phase %u\n", 6783 ldcp->ldc_id, ldcp->dev_class, 6784 ldcp->ldc_status, ldcp->hphase); 6785 cmn_err(CE_CONT, "chan %lu : lsession %lu : " 6786 "psession %lu\n", 6787 ldcp->ldc_id, 6788 ldcp->local_session, 6789 ldcp->peer_session); 6790 6791 cmn_err(CE_CONT, "Inbound lane:\n"); 6792 display_lane(&ldcp->lane_in); 6793 cmn_err(CE_CONT, "Outbound lane:\n"); 6794 display_lane(&ldcp->lane_out); 6795 } 6796 RW_EXIT(&ldcl->lockrw); 6797 } 6798 RW_EXIT(&plist->lockrw); 6799 } 6800 cmn_err(CE_NOTE, "***** system state *****"); 6801 } 6802 6803 static void 6804 display_lane(lane_t *lp) 6805 { 6806 dring_info_t *drp; 6807 6808 cmn_err(CE_CONT, "ver 0x%x:0x%x : state %lx : mtu 0x%lx\n", 6809 lp->ver_major, lp->ver_minor, lp->lstate, lp->mtu); 6810 cmn_err(CE_CONT, "addr_type %d : addr 0x%lx : xmode %d\n", 6811 lp->addr_type, lp->addr, lp->xfer_mode); 6812 cmn_err(CE_CONT, "dringp 0x%lx\n", (uint64_t)lp->dringp); 6813 6814 cmn_err(CE_CONT, "Dring info:\n"); 6815 for (drp = lp->dringp; drp != NULL; drp = drp->next) { 6816 cmn_err(CE_CONT, "\tnum_desc %u : dsize %u\n", 6817 drp->num_descriptors, drp->descriptor_size); 6818 cmn_err(CE_CONT, "\thandle 0x%lx\n", drp->handle); 6819 cmn_err(CE_CONT, "\tpub_addr 0x%lx : priv_addr 0x%lx\n", 6820 (uint64_t)drp->pub_addr, (uint64_t)drp->priv_addr); 6821 cmn_err(CE_CONT, "\tident 0x%lx : end_idx %lu\n", 6822 drp->ident, drp->end_idx); 6823 display_ring(drp); 6824 } 6825 } 6826 6827 static void 6828 display_ring(dring_info_t *dringp) 6829 { 6830 uint64_t i; 6831 uint64_t priv_count = 0; 6832 uint64_t pub_count = 0; 6833 vnet_public_desc_t *pub_addr = NULL; 6834 vsw_private_desc_t *priv_addr = NULL; 6835 6836 for (i = 0; i < VSW_RING_NUM_EL; i++) { 6837 if (dringp->pub_addr != NULL) { 6838 pub_addr = (vnet_public_desc_t *)dringp->pub_addr + i; 6839 6840 if (pub_addr->hdr.dstate == VIO_DESC_FREE) 6841 pub_count++; 6842 } 6843 6844 if (dringp->priv_addr != NULL) { 6845 priv_addr = 6846 (vsw_private_desc_t *)dringp->priv_addr + i; 6847 6848 if (priv_addr->dstate == VIO_DESC_FREE) 6849 priv_count++; 6850 } 6851 } 6852 cmn_err(CE_CONT, "\t%lu elements: %lu priv free: %lu pub free\n", 6853 i, priv_count, pub_count); 6854 } 6855 6856 static void 6857 dump_flags(uint64_t state) 6858 { 6859 int i; 6860 6861 typedef struct flag_name { 6862 int flag_val; 6863 char *flag_name; 6864 } flag_name_t; 6865 6866 flag_name_t flags[] = { 6867 VSW_VER_INFO_SENT, "VSW_VER_INFO_SENT", 6868 VSW_VER_INFO_RECV, "VSW_VER_INFO_RECV", 6869 VSW_VER_ACK_RECV, "VSW_VER_ACK_RECV", 6870 VSW_VER_ACK_SENT, "VSW_VER_ACK_SENT", 6871 VSW_VER_NACK_RECV, "VSW_VER_NACK_RECV", 6872 VSW_VER_NACK_SENT, "VSW_VER_NACK_SENT", 6873 VSW_ATTR_INFO_SENT, "VSW_ATTR_INFO_SENT", 6874 VSW_ATTR_INFO_RECV, "VSW_ATTR_INFO_RECV", 6875 VSW_ATTR_ACK_SENT, "VSW_ATTR_ACK_SENT", 6876 VSW_ATTR_ACK_RECV, "VSW_ATTR_ACK_RECV", 6877 VSW_ATTR_NACK_SENT, "VSW_ATTR_NACK_SENT", 6878 VSW_ATTR_NACK_RECV, "VSW_ATTR_NACK_RECV", 6879 VSW_DRING_INFO_SENT, "VSW_DRING_INFO_SENT", 6880 VSW_DRING_INFO_RECV, "VSW_DRING_INFO_RECV", 6881 VSW_DRING_ACK_SENT, "VSW_DRING_ACK_SENT", 6882 VSW_DRING_ACK_RECV, "VSW_DRING_ACK_RECV", 6883 VSW_DRING_NACK_SENT, "VSW_DRING_NACK_SENT", 6884 VSW_DRING_NACK_RECV, "VSW_DRING_NACK_RECV", 6885 VSW_RDX_INFO_SENT, "VSW_RDX_INFO_SENT", 6886 VSW_RDX_INFO_RECV, "VSW_RDX_INFO_RECV", 6887 VSW_RDX_ACK_SENT, "VSW_RDX_ACK_SENT", 6888 VSW_RDX_ACK_RECV, "VSW_RDX_ACK_RECV", 6889 VSW_RDX_NACK_SENT, "VSW_RDX_NACK_SENT", 6890 VSW_RDX_NACK_RECV, "VSW_RDX_NACK_RECV", 6891 VSW_MCST_INFO_SENT, "VSW_MCST_INFO_SENT", 6892 VSW_MCST_INFO_RECV, "VSW_MCST_INFO_RECV", 6893 VSW_MCST_ACK_SENT, "VSW_MCST_ACK_SENT", 6894 VSW_MCST_ACK_RECV, "VSW_MCST_ACK_RECV", 6895 VSW_MCST_NACK_SENT, "VSW_MCST_NACK_SENT", 6896 VSW_MCST_NACK_RECV, "VSW_MCST_NACK_RECV", 6897 VSW_LANE_ACTIVE, "VSW_LANE_ACTIVE"}; 6898 6899 DERR(NULL, "DUMP_FLAGS: %llx\n", state); 6900 for (i = 0; i < sizeof (flags)/sizeof (flag_name_t); i++) { 6901 if (state & flags[i].flag_val) 6902 DERR(NULL, "DUMP_FLAGS %s", flags[i].flag_name); 6903 } 6904 } 6905