1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/errno.h> 31 #include <sys/debug.h> 32 #include <sys/time.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/user.h> 36 #include <sys/stropts.h> 37 #include <sys/stream.h> 38 #include <sys/strlog.h> 39 #include <sys/strsubr.h> 40 #include <sys/cmn_err.h> 41 #include <sys/cpu.h> 42 #include <sys/kmem.h> 43 #include <sys/conf.h> 44 #include <sys/ddi.h> 45 #include <sys/sunddi.h> 46 #include <sys/ksynch.h> 47 #include <sys/stat.h> 48 #include <sys/kstat.h> 49 #include <sys/vtrace.h> 50 #include <sys/strsun.h> 51 #include <sys/dlpi.h> 52 #include <sys/ethernet.h> 53 #include <net/if.h> 54 #include <sys/varargs.h> 55 #include <sys/machsystm.h> 56 #include <sys/modctl.h> 57 #include <sys/modhash.h> 58 #include <sys/mac.h> 59 #include <sys/taskq.h> 60 #include <sys/note.h> 61 #include <sys/mach_descrip.h> 62 #include <sys/mac.h> 63 #include <sys/mdeg.h> 64 #include <sys/ldc.h> 65 #include <sys/vsw_fdb.h> 66 #include <sys/vsw.h> 67 #include <sys/vio_mailbox.h> 68 #include <sys/vnet_mailbox.h> 69 #include <sys/vnet_common.h> 70 71 /* 72 * Function prototypes. 73 */ 74 static int vsw_attach(dev_info_t *, ddi_attach_cmd_t); 75 static int vsw_detach(dev_info_t *, ddi_detach_cmd_t); 76 static int vsw_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 77 static void vsw_get_md_properties(vsw_t *vswp); 78 static int vsw_setup_layer2(vsw_t *); 79 static int vsw_setup_layer3(vsw_t *); 80 81 /* MAC layer routines */ 82 static int vsw_mac_attach(vsw_t *vswp); 83 static void vsw_mac_detach(vsw_t *vswp); 84 static void vsw_notify_cb(void *, mac_notify_type_t); 85 static void vsw_rx_cb(void *, mac_resource_handle_t, mblk_t *); 86 static mblk_t *vsw_tx_msg(vsw_t *, mblk_t *); 87 static int vsw_mac_register(vsw_t *); 88 static int vsw_mac_unregister(vsw_t *); 89 static uint64_t vsw_m_stat(void *arg, enum mac_stat); 90 static void vsw_m_stop(void *arg); 91 static int vsw_m_start(void *arg); 92 static int vsw_m_unicst(void *arg, const uint8_t *); 93 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *); 94 static int vsw_m_promisc(void *arg, boolean_t); 95 static mblk_t *vsw_m_tx(void *arg, mblk_t *); 96 static void vsw_m_resources(void *arg); 97 static void vsw_m_ioctl(void *arg, queue_t *q, mblk_t *mp); 98 99 /* MDEG routines */ 100 static void vsw_mdeg_register(vsw_t *vswp); 101 static void vsw_mdeg_unregister(vsw_t *vswp); 102 static int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *); 103 104 /* Port add/deletion routines */ 105 static int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 106 static int vsw_port_attach(vsw_t *vswp, int p_instance, 107 uint64_t *ldcids, int nids, struct ether_addr *macaddr); 108 static int vsw_detach_ports(vsw_t *vswp); 109 static int vsw_port_detach(vsw_t *vswp, int p_instance); 110 static int vsw_port_delete(vsw_port_t *port); 111 static int vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id); 112 static int vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id); 113 static int vsw_init_ldcs(vsw_port_t *port); 114 static int vsw_uninit_ldcs(vsw_port_t *port); 115 static int vsw_ldc_init(vsw_ldc_t *ldcp); 116 static int vsw_ldc_uninit(vsw_ldc_t *ldcp); 117 static int vsw_drain_ldcs(vsw_port_t *port); 118 static int vsw_drain_port_taskq(vsw_port_t *port); 119 static void vsw_marker_task(void *); 120 static vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 121 static int vsw_plist_del_node(vsw_t *, vsw_port_t *port); 122 123 /* Interrupt routines */ 124 static uint_t vsw_ldc_cb(uint64_t cb, caddr_t arg); 125 126 /* Handshake routines */ 127 static void vsw_restart_handshake(vsw_ldc_t *); 128 static int vsw_check_flag(vsw_ldc_t *, int, uint64_t); 129 static void vsw_next_milestone(vsw_ldc_t *); 130 static int vsw_supported_version(vio_ver_msg_t *); 131 132 /* Data processing routines */ 133 static void vsw_process_pkt(void *); 134 static void vsw_dispatch_ctrl_task(vsw_ldc_t *, void *, vio_msg_tag_t); 135 static void vsw_process_ctrl_pkt(void *); 136 static void vsw_process_ctrl_ver_pkt(vsw_ldc_t *, void *); 137 static void vsw_process_ctrl_attr_pkt(vsw_ldc_t *, void *); 138 static void vsw_process_ctrl_mcst_pkt(vsw_ldc_t *, void *); 139 static void vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *, void *); 140 static void vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *, void *); 141 static void vsw_process_ctrl_rdx_pkt(vsw_ldc_t *, void *); 142 static void vsw_process_data_pkt(vsw_ldc_t *, void *, vio_msg_tag_t); 143 static void vsw_process_data_dring_pkt(vsw_ldc_t *, void *); 144 static void vsw_process_data_raw_pkt(vsw_ldc_t *, void *); 145 static void vsw_process_data_ibnd_pkt(vsw_ldc_t *, void *); 146 static void vsw_process_err_pkt(vsw_ldc_t *, void *, vio_msg_tag_t); 147 148 /* Switching/data transmit routines */ 149 static void vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller, 150 vsw_port_t *port, mac_resource_handle_t); 151 static void vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller, 152 vsw_port_t *port, mac_resource_handle_t); 153 static int vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller, 154 vsw_port_t *port); 155 static int vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller, 156 vsw_port_t *port); 157 static int vsw_portsend(vsw_port_t *, mblk_t *); 158 static int vsw_dringsend(vsw_ldc_t *, mblk_t *); 159 static int vsw_descrsend(vsw_ldc_t *, mblk_t *); 160 161 /* Packet creation routines */ 162 static void vsw_send_ver(vsw_ldc_t *); 163 static void vsw_send_attr(vsw_ldc_t *); 164 static vio_dring_reg_msg_t *vsw_create_dring_info_pkt(vsw_ldc_t *); 165 static void vsw_send_dring_info(vsw_ldc_t *); 166 static void vsw_send_rdx(vsw_ldc_t *); 167 168 static void vsw_send_msg(vsw_ldc_t *, void *, int); 169 170 /* Forwarding database (FDB) routines */ 171 static int vsw_add_fdb(vsw_t *vswp, vsw_port_t *port); 172 static int vsw_del_fdb(vsw_t *vswp, vsw_port_t *port); 173 static vsw_port_t *vsw_lookup_fdb(vsw_t *vswp, struct ether_header *); 174 static int vsw_add_rem_mcst(vnet_mcast_msg_t *, vsw_port_t *); 175 static int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *); 176 static int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *); 177 static void vsw_del_addr(uint8_t, void *, uint64_t); 178 static void vsw_del_mcst_port(vsw_port_t *); 179 static void vsw_del_mcst_vsw(vsw_t *); 180 181 /* Dring routines */ 182 static dring_info_t *vsw_create_dring(vsw_ldc_t *); 183 static void vsw_create_privring(vsw_ldc_t *); 184 static int vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp); 185 static int vsw_dring_find_free_desc(dring_info_t *, vsw_private_desc_t **, 186 int *); 187 static void vsw_dring_priv2pub(vsw_private_desc_t *); 188 static dring_info_t *vsw_ident2dring(lane_t *, uint64_t); 189 190 static void vsw_set_lane_attr(vsw_t *, lane_t *); 191 static int vsw_check_attr(vnet_attr_msg_t *, vsw_port_t *); 192 static int vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg); 193 static int vsw_mem_cookie_match(ldc_mem_cookie_t *, ldc_mem_cookie_t *); 194 static int vsw_check_dring_info(vio_dring_reg_msg_t *); 195 196 /* Misc support routines */ 197 static caddr_t vsw_print_ethaddr(uint8_t *addr, char *ebuf); 198 199 static void vsw_free_lane_resources(vsw_ldc_t *, uint64_t); 200 static int vsw_free_ring(dring_info_t *); 201 202 /* Debugging routines */ 203 static void dump_flags(uint64_t); 204 static void display_state(void); 205 static void display_lane(lane_t *); 206 static void display_ring(dring_info_t *); 207 208 int vsw_num_handshakes = 3; /* # of handshake attempts */ 209 int vsw_wretries = 100; /* # of write attempts */ 210 211 /* 212 * mode specific frame switching function 213 */ 214 void (*vsw_switch_frame)(vsw_t *, mblk_t *, int, vsw_port_t *, 215 mac_resource_handle_t); 216 217 static struct cb_ops vsw_cb_ops = { 218 nulldev, /* cb_open */ 219 nulldev, /* cb_close */ 220 nodev, /* cb_strategy */ 221 nodev, /* cb_print */ 222 nodev, /* cb_dump */ 223 nodev, /* cb_read */ 224 nodev, /* cb_write */ 225 nodev, /* cb_ioctl */ 226 nodev, /* cb_devmap */ 227 nodev, /* cb_mmap */ 228 nodev, /* cb_segmap */ 229 nochpoll, /* cb_chpoll */ 230 ddi_prop_op, /* cb_prop_op */ 231 NULL, /* cb_stream */ 232 D_MP, /* cb_flag */ 233 CB_REV, /* rev */ 234 nodev, /* int (*cb_aread)() */ 235 nodev /* int (*cb_awrite)() */ 236 }; 237 238 static struct dev_ops vsw_ops = { 239 DEVO_REV, /* devo_rev */ 240 0, /* devo_refcnt */ 241 vsw_getinfo, /* devo_getinfo */ 242 nulldev, /* devo_identify */ 243 nulldev, /* devo_probe */ 244 vsw_attach, /* devo_attach */ 245 vsw_detach, /* devo_detach */ 246 nodev, /* devo_reset */ 247 &vsw_cb_ops, /* devo_cb_ops */ 248 (struct bus_ops *)NULL, /* devo_bus_ops */ 249 ddi_power /* devo_power */ 250 }; 251 252 extern struct mod_ops mod_driverops; 253 static struct modldrv vswmodldrv = { 254 &mod_driverops, 255 "sun4v Virtual Switch Driver %I%", 256 &vsw_ops, 257 }; 258 259 #define LDC_ENTER_LOCK(ldcp) \ 260 mutex_enter(&((ldcp)->ldc_cblock));\ 261 mutex_enter(&((ldcp)->ldc_txlock)); 262 #define LDC_EXIT_LOCK(ldcp) \ 263 mutex_exit(&((ldcp)->ldc_txlock));\ 264 mutex_exit(&((ldcp)->ldc_cblock)); 265 266 /* Driver soft state ptr */ 267 static void *vsw_state; 268 269 /* 270 * Linked list of "vsw_t" structures - one per instance. 271 */ 272 vsw_t *vsw_head = NULL; 273 krwlock_t vsw_rw; 274 275 /* 276 * Property names 277 */ 278 static char vdev_propname[] = "virtual-device"; 279 static char vsw_propname[] = "virtual-network-switch"; 280 static char physdev_propname[] = "vsw-phys-dev"; 281 static char smode_propname[] = "vsw-switch-mode"; 282 static char macaddr_propname[] = "local-mac-address"; 283 static char remaddr_propname[] = "remote-mac-address"; 284 static char ldcids_propname[] = "ldc-ids"; 285 static char chan_propname[] = "channel-endpoint"; 286 static char id_propname[] = "id"; 287 static char reg_propname[] = "reg"; 288 289 /* supported versions */ 290 static ver_sup_t vsw_versions[] = { {1, 0} }; 291 292 /* 293 * Matching criteria passed to the MDEG to register interest 294 * in changes to 'virtual-device-port' nodes identified by their 295 * 'id' property. 296 */ 297 static md_prop_match_t vport_prop_match[] = { 298 { MDET_PROP_VAL, "id" }, 299 { MDET_LIST_END, NULL } 300 }; 301 302 static mdeg_node_match_t vport_match = { "virtual-device-port", 303 vport_prop_match }; 304 305 /* 306 * Specification of an MD node passed to the MDEG to filter any 307 * 'vport' nodes that do not belong to the specified node. This 308 * template is copied for each vsw instance and filled in with 309 * the appropriate 'cfg-handle' value before being passed to the MDEG. 310 */ 311 static mdeg_prop_spec_t vsw_prop_template[] = { 312 { MDET_PROP_STR, "name", vsw_propname }, 313 { MDET_PROP_VAL, "cfg-handle", NULL }, 314 { MDET_LIST_END, NULL, NULL } 315 }; 316 317 #define VSW_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val); 318 319 /* 320 * Print debug messages - set to 0x1f to enable all msgs 321 * or 0x0 to turn all off. 322 */ 323 int vswdbg = 0x0; 324 325 /* 326 * debug levels: 327 * 0x01: Function entry/exit tracing 328 * 0x02: Internal function messages 329 * 0x04: Verbose internal messages 330 * 0x08: Warning messages 331 * 0x10: Error messages 332 */ 333 334 static void 335 vswdebug(vsw_t *vswp, const char *fmt, ...) 336 { 337 char buf[512]; 338 va_list ap; 339 340 va_start(ap, fmt); 341 (void) vsprintf(buf, fmt, ap); 342 va_end(ap); 343 344 if (vswp == NULL) 345 cmn_err(CE_CONT, "%s\n", buf); 346 else 347 cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf); 348 } 349 350 /* 351 * For the moment the state dump routines have their own 352 * private flag. 353 */ 354 #define DUMP_STATE 0 355 356 #if DUMP_STATE 357 358 #define DUMP_TAG(tag) \ 359 { \ 360 D1(NULL, "DUMP_TAG: type 0x%llx", (tag).vio_msgtype); \ 361 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag).vio_subtype); \ 362 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag).vio_subtype_env); \ 363 } 364 365 #define DUMP_TAG_PTR(tag) \ 366 { \ 367 D1(NULL, "DUMP_TAG: type 0x%llx", (tag)->vio_msgtype); \ 368 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag)->vio_subtype); \ 369 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag)->vio_subtype_env); \ 370 } 371 372 #define DUMP_FLAGS(flags) dump_flags(flags); 373 #define DISPLAY_STATE() display_state() 374 375 #else 376 377 #define DUMP_TAG(tag) 378 #define DUMP_TAG_PTR(tag) 379 #define DUMP_FLAGS(state) 380 #define DISPLAY_STATE() 381 382 #endif /* DUMP_STATE */ 383 384 #ifdef DEBUG 385 386 #define D1 \ 387 if (vswdbg & 0x01) \ 388 vswdebug 389 390 #define D2 \ 391 if (vswdbg & 0x02) \ 392 vswdebug 393 394 #define D3 \ 395 if (vswdbg & 0x04) \ 396 vswdebug 397 398 #define DWARN \ 399 if (vswdbg & 0x08) \ 400 vswdebug 401 402 #define DERR \ 403 if (vswdbg & 0x10) \ 404 vswdebug 405 406 #else 407 408 #define DERR if (0) vswdebug 409 #define DWARN if (0) vswdebug 410 #define D1 if (0) vswdebug 411 #define D2 if (0) vswdebug 412 #define D3 if (0) vswdebug 413 414 #endif /* DEBUG */ 415 416 static struct modlinkage modlinkage = { 417 MODREV_1, 418 &vswmodldrv, 419 NULL 420 }; 421 422 int 423 _init(void) 424 { 425 int status; 426 427 rw_init(&vsw_rw, NULL, RW_DRIVER, NULL); 428 429 status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1); 430 if (status != 0) { 431 return (status); 432 } 433 434 mac_init_ops(&vsw_ops, "vsw"); 435 status = mod_install(&modlinkage); 436 if (status != 0) { 437 ddi_soft_state_fini(&vsw_state); 438 } 439 return (status); 440 } 441 442 int 443 _fini(void) 444 { 445 int status; 446 447 status = mod_remove(&modlinkage); 448 if (status != 0) 449 return (status); 450 mac_fini_ops(&vsw_ops); 451 ddi_soft_state_fini(&vsw_state); 452 453 rw_destroy(&vsw_rw); 454 455 return (status); 456 } 457 458 int 459 _info(struct modinfo *modinfop) 460 { 461 return (mod_info(&modlinkage, modinfop)); 462 } 463 464 static int 465 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 466 { 467 vsw_t *vswp; 468 int smode, instance, i; 469 char hashname[MAXNAMELEN]; 470 char qname[TASKQ_NAMELEN]; 471 int rv = 1; 472 enum { PROG_init = 0x0, PROG_if_lock = 0x1, 473 PROG_fdb = 0x2, PROG_mfdb = 0x4, 474 PROG_report_dev = 0x8, PROG_plist = 0x10, 475 PROG_taskq = 0x20} 476 progress; 477 478 progress = PROG_init; 479 480 switch (cmd) { 481 case DDI_ATTACH: 482 break; 483 case DDI_RESUME: 484 /* nothing to do for this non-device */ 485 return (DDI_SUCCESS); 486 case DDI_PM_RESUME: 487 default: 488 return (DDI_FAILURE); 489 } 490 491 instance = ddi_get_instance(dip); 492 if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) { 493 DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance); 494 return (DDI_FAILURE); 495 } 496 vswp = ddi_get_soft_state(vsw_state, instance); 497 498 if (vswp == NULL) { 499 DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance); 500 goto vsw_attach_fail; 501 } 502 503 vswp->dip = dip; 504 vswp->instance = instance; 505 ddi_set_driver_private(dip, (caddr_t)vswp); 506 507 rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL); 508 509 progress |= PROG_if_lock; 510 511 /* 512 * User specifies (via MD) an array of switching modes in 513 * decreasing order of preference. Default mode is always 514 * layer 2 (mac switching), so init array with that value. 515 */ 516 vswp->smode_idx = 0; 517 for (i = 0; i < NUM_SMODES; i++) 518 vswp->smode[i] = VSW_LAYER2; 519 520 /* 521 * Get the various properties such as physical device name 522 * (vsw-phys-dev), switch mode etc from the MD. 523 */ 524 vsw_get_md_properties(vswp); 525 526 /* setup the unicast forwarding database */ 527 (void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d", 528 vswp->instance); 529 D2(vswp, "creating unicast hash table (%s)...", hashname); 530 vswp->fdb = mod_hash_create_ptrhash(hashname, VSW_NCHAINS, 531 mod_hash_null_valdtor, sizeof (void *)); 532 533 progress |= PROG_fdb; 534 535 /* setup the multicast fowarding database */ 536 (void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d", 537 vswp->instance); 538 D2(vswp, "creating multicast hash table %s)...", hashname); 539 rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL); 540 vswp->mfdb = mod_hash_create_ptrhash(hashname, VSW_NCHAINS, 541 mod_hash_null_valdtor, sizeof (void *)); 542 543 progress |= PROG_mfdb; 544 545 /* 546 * create lock protecting list of multicast addresses 547 * which could come via m_multicst() entry point when plumbed. 548 */ 549 mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL); 550 vswp->mcap = NULL; 551 552 ddi_report_dev(vswp->dip); 553 554 progress |= PROG_report_dev; 555 556 WRITE_ENTER(&vsw_rw); 557 vswp->next = vsw_head; 558 vsw_head = vswp; 559 RW_EXIT(&vsw_rw); 560 561 /* setup the port list */ 562 rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL); 563 vswp->plist.head = NULL; 564 565 progress |= PROG_plist; 566 567 /* 568 * Create the taskq which will process all the VIO 569 * control messages. 570 */ 571 (void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance); 572 if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1, 573 TASKQ_DEFAULTPRI, 0)) == NULL) { 574 cmn_err(CE_WARN, "Unable to create task queue"); 575 goto vsw_attach_fail; 576 } 577 578 progress |= PROG_taskq; 579 580 /* select best switching mode */ 581 for (i = 0; i < NUM_SMODES; i++) { 582 smode = vswp->smode[i]; 583 switch (smode) { 584 case VSW_LAYER2: 585 rv = vsw_setup_layer2(vswp); 586 break; 587 588 case VSW_LAYER2_PROMISC: 589 rv = vsw_setup_layer2(vswp); 590 break; 591 592 case VSW_LAYER3: 593 rv = vsw_setup_layer3(vswp); 594 break; 595 596 default: 597 DERR(vswp, "unknown switch mode"); 598 break; 599 } 600 601 if (rv == 0) { 602 vswp->smode_idx = i; 603 break; 604 } 605 } 606 607 if (rv == 1) { 608 cmn_err(CE_WARN, "Unable to setup switching mode"); 609 goto vsw_attach_fail; 610 } 611 612 D2(vswp, "Operating in mode %d", vswp->smode[vswp->smode_idx]); 613 614 /* 615 * Register with the MAC layer as a network device so 616 * we can be plumbed if desired. 617 * 618 * Do this in both layer 2 and layer 3 mode. 619 */ 620 vswp->if_state &= ~VSW_IF_UP; 621 vswp->if_macp = NULL; 622 vswp->if_mrh = NULL; 623 if (vswp->mdprops & VSW_MD_MACADDR) { 624 if (vsw_mac_register(vswp) != 0) { 625 cmn_err(CE_WARN, "Unable to register as provider " 626 " with MAC layer, continuing with attach"); 627 } 628 } 629 630 /* 631 * Now we have everything setup, register for MD change 632 * events. 633 */ 634 vsw_mdeg_register(vswp); 635 636 return (DDI_SUCCESS); 637 638 vsw_attach_fail: 639 DERR(NULL, "vsw_attach: failed"); 640 641 if (progress & PROG_taskq) 642 ddi_taskq_destroy(vswp->taskq_p); 643 644 if (progress & PROG_plist) 645 rw_destroy(&vswp->plist.lockrw); 646 647 if (progress & PROG_report_dev) { 648 ddi_remove_minor_node(dip, NULL); 649 mutex_destroy(&vswp->mca_lock); 650 } 651 652 if (progress & PROG_mfdb) { 653 mod_hash_destroy_hash(vswp->mfdb); 654 vswp->mfdb = NULL; 655 rw_destroy(&vswp->mfdbrw); 656 } 657 658 if (progress & PROG_fdb) { 659 mod_hash_destroy_hash(vswp->fdb); 660 vswp->fdb = NULL; 661 } 662 663 if (progress & PROG_if_lock) 664 rw_destroy(&vswp->if_lockrw); 665 666 ddi_soft_state_free(vsw_state, instance); 667 return (DDI_FAILURE); 668 } 669 670 static int 671 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 672 { 673 vsw_t **vswpp, *vswp; 674 int instance; 675 676 instance = ddi_get_instance(dip); 677 vswp = ddi_get_soft_state(vsw_state, instance); 678 679 if (vswp == NULL) { 680 return (DDI_FAILURE); 681 } 682 683 switch (cmd) { 684 case DDI_DETACH: 685 break; 686 case DDI_SUSPEND: 687 case DDI_PM_SUSPEND: 688 default: 689 return (DDI_FAILURE); 690 } 691 692 D2(vswp, "detaching instance %d", instance); 693 694 if (vswp->mdprops & VSW_MD_MACADDR) { 695 if (vsw_mac_unregister(vswp) != 0) { 696 cmn_err(CE_WARN, "Unable to detach from MAC layer"); 697 return (DDI_FAILURE); 698 } 699 } 700 rw_destroy(&vswp->if_lockrw); 701 702 vsw_mdeg_unregister(vswp); 703 704 if ((vswp->smode[vswp->smode_idx] == VSW_LAYER2) || 705 (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) { 706 vsw_mac_detach(vswp); 707 } 708 709 if (vsw_detach_ports(vswp) != 0) { 710 cmn_err(CE_WARN, "Unable to detach ports"); 711 return (DDI_FAILURE); 712 } 713 714 /* 715 * Remove this instance from any entries it may be on in 716 * the hash table by using the list of addresses maintained 717 * in the vsw_t structure. 718 */ 719 vsw_del_mcst_vsw(vswp); 720 721 vswp->mcap = NULL; 722 mutex_destroy(&vswp->mca_lock); 723 724 /* 725 * By now any pending tasks have finished and the underlying 726 * ldc's have been destroyed, so its safe to delete the control 727 * message taskq. 728 */ 729 if (vswp->taskq_p != NULL) 730 ddi_taskq_destroy(vswp->taskq_p); 731 732 /* 733 * At this stage all the data pointers in the hash table 734 * should be NULL, as all the ports have been removed and will 735 * have deleted themselves from the port lists which the data 736 * pointers point to. Hence we can destroy the table using the 737 * default destructors. 738 */ 739 D2(vswp, "vsw_detach: destroying hash tables.."); 740 mod_hash_destroy_hash(vswp->fdb); 741 vswp->fdb = NULL; 742 743 WRITE_ENTER(&vswp->mfdbrw); 744 mod_hash_destroy_hash(vswp->mfdb); 745 vswp->mfdb = NULL; 746 RW_EXIT(&vswp->mfdbrw); 747 rw_destroy(&vswp->mfdbrw); 748 749 ddi_remove_minor_node(dip, NULL); 750 751 rw_destroy(&vswp->plist.lockrw); 752 WRITE_ENTER(&vsw_rw); 753 for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) { 754 if (*vswpp == vswp) { 755 *vswpp = vswp->next; 756 break; 757 } 758 } 759 RW_EXIT(&vsw_rw); 760 ddi_soft_state_free(vsw_state, instance); 761 762 return (DDI_SUCCESS); 763 } 764 765 static int 766 vsw_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 767 { 768 _NOTE(ARGUNUSED(dip)) 769 770 vsw_t *vswp = NULL; 771 dev_t dev = (dev_t)arg; 772 int instance; 773 774 instance = getminor(dev); 775 776 switch (infocmd) { 777 case DDI_INFO_DEVT2DEVINFO: 778 if ((vswp = ddi_get_soft_state(vsw_state, instance)) == NULL) { 779 *result = NULL; 780 return (DDI_FAILURE); 781 } 782 *result = vswp->dip; 783 return (DDI_SUCCESS); 784 785 case DDI_INFO_DEVT2INSTANCE: 786 *result = (void *)(uintptr_t)instance; 787 return (DDI_SUCCESS); 788 789 default: 790 *result = NULL; 791 return (DDI_FAILURE); 792 } 793 } 794 795 /* 796 * Get the properties from our MD node. 797 */ 798 static void 799 vsw_get_md_properties(vsw_t *vswp) 800 { 801 md_t *mdp = NULL; 802 int num_nodes = 0; 803 int len = 0, listsz = 0; 804 int num_vdev = 0; 805 int i, idx; 806 boolean_t found_node = B_FALSE; 807 char *smode = NULL; 808 char *curr_mode = NULL; 809 char *physname = NULL; 810 char *node_name = NULL; 811 char *dev; 812 uint64_t macaddr = 0; 813 uint64_t md_inst, obp_inst; 814 mde_cookie_t *listp = NULL; 815 mde_cookie_t rootnode; 816 817 D1(vswp, "%s: enter", __func__); 818 819 /* 820 * Further down we compare the obp 'reg' property to the 821 * 'cfg-handle' property in the vsw MD node to determine 822 * if the node refers to this particular instance. So if 823 * we can't read the obp value then there is no point 824 * in proceeding further. 825 */ 826 if (ddi_prop_exists(DDI_DEV_T_ANY, vswp->dip, 827 DDI_PROP_DONTPASS, reg_propname) != 1) { 828 cmn_err(CE_WARN, "Unable to read %s property " 829 "from OBP device node", reg_propname); 830 return; 831 } 832 833 obp_inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip, 834 DDI_PROP_DONTPASS, reg_propname, 0); 835 836 D2(vswp, "%s: obp_inst 0x%llx", __func__, obp_inst); 837 838 if ((mdp = md_get_handle()) == NULL) { 839 DERR(vswp, "%s: unable to init MD", __func__); 840 return; 841 } 842 843 if ((num_nodes = md_node_count(mdp)) <= 0) { 844 DERR(vswp, "%s: invalid number of nodes found %d", 845 __func__, num_nodes); 846 (void) md_fini_handle(mdp); 847 return; 848 } 849 850 D2(vswp, "%s: %d nodes in total in MD", __func__, num_nodes); 851 852 /* allocate enough space for node list */ 853 listsz = num_nodes * sizeof (mde_cookie_t); 854 listp = kmem_zalloc(listsz, KM_SLEEP); 855 856 rootnode = md_root_node(mdp); 857 858 /* Get the list of virtual devices */ 859 num_vdev = md_scan_dag(mdp, rootnode, 860 md_find_name(mdp, vdev_propname), 861 md_find_name(mdp, "fwd"), listp); 862 863 if (num_vdev <= 0) { 864 DERR(vswp, "%s: didn't find any virtual-device nodes in MD", 865 __func__); 866 goto md_prop_exit; 867 } 868 869 D2(vswp, "%s: %d virtual-device nodes found", __func__, num_vdev); 870 871 /* Look for the virtual switch nodes in the list */ 872 for (idx = 0; idx < num_vdev; idx++) { 873 if (md_get_prop_str(mdp, listp[idx], 874 "name", &node_name) != 0) { 875 DERR(vswp, "%s: unable to get node name", __func__); 876 continue; 877 878 } 879 880 if (strcmp(node_name, vsw_propname) == 0) { 881 /* Virtual switch node */ 882 if (md_get_prop_val(mdp, listp[idx], 883 "cfg-handle", &md_inst) != 0) { 884 DERR(vswp, "%s: unable to get cfg-handle from" 885 " node %d", __func__, idx); 886 goto md_prop_exit; 887 } else if (md_inst == obp_inst) { 888 D2(vswp, "%s: found matching node (%d)" 889 " 0x%llx == 0x%llx", __func__, idx, 890 md_inst, obp_inst); 891 found_node = B_TRUE; 892 break; 893 } 894 } 895 } 896 897 if (!found_node) { 898 DWARN(vswp, "%s: couldn't find correct vsw node", __func__); 899 goto md_prop_exit; 900 } 901 902 /* 903 * Now, having found the correct node, get the various properties. 904 */ 905 906 if (md_get_prop_data(mdp, listp[idx], physdev_propname, 907 (uint8_t **)(&physname), &len) != 0) { 908 cmn_err(CE_WARN, "%s: unable to get name(s) of physical " 909 "device(s) from MD", __func__); 910 } else if ((strlen(physname) + 1) > LIFNAMSIZ) { 911 cmn_err(CE_WARN, "%s is too long a device name", physname); 912 } else { 913 (void) strncpy(vswp->physname, physname, strlen(physname) + 1); 914 vswp->mdprops |= VSW_MD_PHYSNAME; 915 D2(vswp, "%s: using first device specified (%s)", 916 __func__, vswp->physname); 917 } 918 919 920 #ifdef DEBUG 921 /* 922 * As a temporary measure to aid testing we check to see if there 923 * is a vsw.conf file present. If there is we use the value of the 924 * vsw_physname property in the file as the name of the physical 925 * device, overriding the value from the MD. 926 * 927 * There may be multiple devices listed, but for the moment 928 * we just use the first one. 929 */ 930 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0, 931 "vsw_physname", &dev) == DDI_PROP_SUCCESS) { 932 if ((strlen(dev) + 1) > LIFNAMSIZ) { 933 cmn_err(CE_WARN, "%s is too long a device name", dev); 934 } else { 935 cmn_err(CE_NOTE, "%s: using device name (%s) from " 936 "config file", __func__, dev); 937 938 (void) strncpy(vswp->physname, dev, strlen(dev) + 1); 939 vswp->mdprops |= VSW_MD_PHYSNAME; 940 } 941 942 ddi_prop_free(dev); 943 944 } 945 #endif 946 947 /* local mac address */ 948 if (md_get_prop_val(mdp, listp[idx], 949 macaddr_propname, &macaddr) != 0) { 950 cmn_err(CE_WARN, "%s: unable to get local MAC address", 951 __func__); 952 } else { 953 READ_ENTER(&vswp->if_lockrw); 954 for (i = ETHERADDRL - 1; i >= 0; i--) { 955 vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF; 956 macaddr >>= 8; 957 } 958 RW_EXIT(&vswp->if_lockrw); 959 vswp->mdprops |= VSW_MD_MACADDR; 960 } 961 962 /* 963 * Get the switch-mode property. The modes are listed in 964 * decreasing order of preference, i.e. prefered mode is 965 * first item in list. 966 */ 967 len = 0; 968 if (md_get_prop_data(mdp, listp[idx], smode_propname, 969 (uint8_t **)(&smode), &len) != 0) { 970 /* 971 * Unable to get switch-mode property, so just use 972 * default values which vswp->smode[] array has already 973 * been pre-populated with, namely layer2. 974 */ 975 cmn_err(CE_WARN, "%s: unable to get switch mode property, " 976 "defaulting to layer 2 mode", __func__); 977 } else { 978 i = 0; 979 curr_mode = smode; 980 /* 981 * Modes of operation: 982 * 'switched' - layer 2 switching, underlying HW in 983 * non-promiscuous mode. 984 * 'promiscuous' - layer 2 switching, underlying HW in 985 * promiscuous mode. 986 * 'routed' - layer 3 (i.e. IP) routing, underlying HW 987 * in non-promiscuous mode. 988 */ 989 while ((curr_mode < (smode + len)) && (i < NUM_SMODES)) { 990 D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode); 991 if (strcmp(curr_mode, "switched") == 0) 992 vswp->smode[i] = VSW_LAYER2; 993 else if (strcmp(curr_mode, "promiscuous") == 0) 994 vswp->smode[i] = VSW_LAYER2_PROMISC; 995 else if (strcmp(curr_mode, "routed") == 0) 996 vswp->smode[i] = VSW_LAYER3; 997 else { 998 DERR(vswp, "%s: unknown mode %s", 999 __func__, curr_mode); 1000 /* default to layer 2 */ 1001 vswp->smode[i] = VSW_LAYER2; 1002 } 1003 curr_mode += strlen(curr_mode) + 1; 1004 i++; 1005 } 1006 1007 vswp->mdprops |= VSW_MD_SMODE; 1008 } 1009 1010 md_prop_exit: 1011 (void) md_fini_handle(mdp); 1012 1013 kmem_free(listp, listsz); 1014 1015 D1(vswp, "%s: exit", __func__); 1016 } 1017 1018 static int 1019 vsw_setup_layer2(vsw_t *vswp) 1020 { 1021 int rv = 0; 1022 1023 D1(vswp, "%s: enter", __func__); 1024 1025 vsw_switch_frame = vsw_switch_l2_frame; 1026 1027 /* 1028 * Attempt to link into the MAC layer so we can get 1029 * and send packets out over the physical adapter. 1030 */ 1031 if (vswp->mdprops & VSW_MD_PHYSNAME) { 1032 if (vsw_mac_attach(vswp) != 0) { 1033 /* 1034 * Registration with the MAC layer has failed, 1035 * so return 1 so that can fall back to next 1036 * prefered switching method. 1037 */ 1038 cmn_err(CE_WARN, "!unable to join as MAC layer " 1039 "client, continuing with attach"); 1040 rv = 1; 1041 } 1042 } else { 1043 /* No physical device name found in MD */ 1044 DERR(vswp, "%s: no physical device name specified", __func__); 1045 rv = 1; 1046 } 1047 1048 D1(vswp, "%s: exit", __func__); 1049 1050 return (rv); 1051 } 1052 1053 static int 1054 vsw_setup_layer3(vsw_t *vswp) 1055 { 1056 D1(vswp, "%s: enter", __func__); 1057 1058 D2(vswp, "%s: operating in layer 3 mode", __func__); 1059 vsw_switch_frame = vsw_switch_l3_frame; 1060 1061 D1(vswp, "%s: exit", __func__); 1062 1063 return (0); 1064 } 1065 1066 /* 1067 * Link into the MAC layer to gain access to the services provided by 1068 * the underlying physical device driver (which should also have 1069 * registered with the MAC layer). 1070 * 1071 * Only when in layer 2 mode. 1072 */ 1073 static int 1074 vsw_mac_attach(vsw_t *vswp) 1075 { 1076 D1(vswp, "vsw_mac_attach: enter"); 1077 1078 vswp->mh = NULL; 1079 vswp->mrh = NULL; 1080 vswp->mnh = NULL; 1081 1082 ASSERT(vswp->mdprops & VSW_MD_PHYSNAME); 1083 1084 if ((mac_open(vswp->physname, 0, &vswp->mh)) != 0) { 1085 cmn_err(CE_WARN, "mac_open %s failed", vswp->physname); 1086 goto mac_fail_exit; 1087 } 1088 1089 D2(vswp, "vsw_mac_attach: using device %s", vswp->physname); 1090 1091 /* register for changes in the interface */ 1092 vswp->mnh = mac_notify_add(vswp->mh, vsw_notify_cb, (void *)vswp); 1093 1094 /* register our rx callback function */ 1095 vswp->mrh = mac_rx_add(vswp->mh, vsw_rx_cb, (void *)vswp); 1096 1097 /* get the MAC tx fn */ 1098 vswp->txinfo = mac_tx_get(vswp->mh); 1099 1100 /* start the interface */ 1101 if (mac_start(vswp->mh) != 0) { 1102 cmn_err(CE_WARN, "could not start mac interface"); 1103 goto mac_fail_exit; 1104 } 1105 1106 /* get and store original promisc setting */ 1107 vswp->init_promisc = mac_promisc_get(vswp->mh, MAC_DEVPROMISC); 1108 1109 /* 1110 * FUTURE: When we have the ability to set multiple unicast 1111 * mac address then we won't have to set the device into 1112 * promisc mode, but for the moment its the only way we. 1113 * can see pkts that logical domains we are serving are 1114 * interested in. 1115 */ 1116 if ((vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC) && 1117 (vswp->init_promisc == B_FALSE)) { 1118 DERR(vswp, "vsw_mac_attach: enabling promisc mode.."); 1119 1120 if (mac_promisc_set(vswp->mh, B_TRUE, MAC_DEVPROMISC) != 0) { 1121 DERR(vswp, "vsw_mac_attach: unable to set device" 1122 " into promiscuous mode"); 1123 goto mac_fail_exit; 1124 } 1125 } 1126 1127 D1(vswp, "vsw_mac_attach: exit"); 1128 return (0); 1129 1130 mac_fail_exit: 1131 if (vswp->mh != NULL) { 1132 mac_promisc_set(vswp->mh, vswp->init_promisc, MAC_DEVPROMISC); 1133 if (vswp->mrh != NULL) 1134 mac_rx_remove(vswp->mh, vswp->mrh); 1135 1136 if (vswp->mnh != NULL) 1137 mac_notify_remove(vswp->mh, vswp->mnh); 1138 1139 mac_close(vswp->mh); 1140 } 1141 1142 vswp->mrh = NULL; 1143 vswp->mnh = NULL; 1144 vswp->mh = NULL; 1145 vswp->txinfo = NULL; 1146 1147 D1(vswp, "vsw_mac_attach: fail exit"); 1148 return (1); 1149 } 1150 1151 static void 1152 vsw_mac_detach(vsw_t *vswp) 1153 { 1154 D1(vswp, "vsw_mac_detach: enter"); 1155 1156 if (vswp->mh != NULL) { 1157 /* restore promisc to original setting */ 1158 mac_promisc_set(vswp->mh, vswp->init_promisc, MAC_DEVPROMISC); 1159 if (vswp->mrh != NULL) 1160 mac_rx_remove(vswp->mh, vswp->mrh); 1161 1162 if (vswp->mnh != NULL) 1163 mac_notify_remove(vswp->mh, vswp->mnh); 1164 1165 mac_close(vswp->mh); 1166 } 1167 1168 vswp->mrh = NULL; 1169 vswp->mnh = NULL; 1170 vswp->mh = NULL; 1171 vswp->txinfo = NULL; 1172 1173 D1(vswp, "vsw_mac_detach: exit"); 1174 } 1175 1176 /* 1177 * Get notified of changes to the interface. 1178 * 1179 * For the moment we brute force the interface back 1180 * into promisc mode if it is unset (e.g. by snoop). 1181 * When we have the ability to set multiple mac addresses, 1182 * we will need to see if this is necessary. 1183 */ 1184 static void 1185 vsw_notify_cb(void *arg, mac_notify_type_t type) 1186 { 1187 vsw_t *vswp = (vsw_t *)arg; 1188 1189 switch (type) { 1190 case MAC_NOTE_PROMISC: 1191 vswp->txinfo = mac_tx_get(vswp->mh); 1192 if (mac_promisc_get(vswp->mh, MAC_DEVPROMISC) == B_TRUE) { 1193 D2(vswp, "%s: still in PROMISC mode", __func__); 1194 } else { 1195 D2(vswp, "%s: now in NON-PROMISC mode", __func__); 1196 D2(vswp, "...re-enabling"); 1197 mac_promisc_set(vswp->mh, B_TRUE, MAC_DEVPROMISC); 1198 } 1199 break; 1200 default: 1201 break; 1202 } 1203 } 1204 1205 /* 1206 * receive callback routine. Invoked by MAC layer when there 1207 * are pkts being passed up from physical device. 1208 * 1209 * PERF: It may be more efficient when the card is in promisc 1210 * mode to check the dest address of the pkts here (against 1211 * the FDB) rather than checking later. Needs to be investigated. 1212 */ 1213 static void 1214 vsw_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp) 1215 { 1216 _NOTE(ARGUNUSED(mrh)) 1217 1218 vsw_t *vswp = (vsw_t *)arg; 1219 1220 ASSERT(vswp != NULL); 1221 1222 D1(vswp, "vsw_rx_cb: enter"); 1223 1224 /* switch the chain of packets received */ 1225 vsw_switch_frame(vswp, mp, VSW_PHYSDEV, NULL, NULL); 1226 1227 D1(vswp, "vsw_rx_cb: exit"); 1228 } 1229 1230 /* 1231 * Send a message out over the physical device via the MAC layer. 1232 * 1233 * Returns any mblks that it was unable to transmit. 1234 */ 1235 static mblk_t * 1236 vsw_tx_msg(vsw_t *vswp, mblk_t *mp) 1237 { 1238 const mac_txinfo_t *mtp; 1239 mblk_t *nextp; 1240 1241 if (vswp->mh == NULL) { 1242 DERR(vswp, "vsw_tx_msg: dropping pkts: no tx routine avail"); 1243 return (mp); 1244 } else { 1245 for (;;) { 1246 nextp = mp->b_next; 1247 mp->b_next = NULL; 1248 1249 mtp = vswp->txinfo; 1250 if ((mp = mtp->mt_fn(mtp->mt_arg, mp)) != NULL) { 1251 mp->b_next = nextp; 1252 break; 1253 } 1254 1255 if ((mp = nextp) == NULL) 1256 break; 1257 1258 } 1259 1260 } 1261 1262 return (mp); 1263 } 1264 1265 /* 1266 * Register with the MAC layer as a network device, so we 1267 * can be plumbed if necessary. 1268 */ 1269 static int 1270 vsw_mac_register(vsw_t *vswp) 1271 { 1272 mac_t *macp = NULL; 1273 mac_info_t *mip = NULL; 1274 int rv = 0; 1275 1276 D1(vswp, "%s: enter", __func__); 1277 1278 macp = kmem_zalloc(sizeof (mac_t), KM_SLEEP); 1279 1280 /* 1281 * Setup the m_info fields. 1282 */ 1283 mip = &(macp->m_info); 1284 mip->mi_media = DL_ETHER; 1285 mip->mi_sdu_min = 0; 1286 mip->mi_sdu_max = ETHERMTU; 1287 mip->mi_cksum = 0; 1288 mip->mi_poll = DL_CAPAB_POLL; 1289 1290 mip->mi_addr_length = ETHERADDRL; 1291 bcopy(ðerbroadcastaddr, mip->mi_brdcst_addr, ETHERADDRL); 1292 1293 READ_ENTER(&vswp->if_lockrw); 1294 bcopy(&vswp->if_addr, mip->mi_unicst_addr, ETHERADDRL); 1295 RW_EXIT(&vswp->if_lockrw); 1296 1297 MAC_STAT_MIB(mip->mi_stat); 1298 MAC_STAT_ETHER(mip->mi_stat); 1299 1300 /* entry points */ 1301 macp->m_stat = vsw_m_stat; 1302 macp->m_stop = vsw_m_stop; 1303 macp->m_start = vsw_m_start; 1304 macp->m_unicst = vsw_m_unicst; 1305 macp->m_multicst = vsw_m_multicst; 1306 macp->m_promisc = vsw_m_promisc; 1307 macp->m_tx = vsw_m_tx; 1308 macp->m_resources = vsw_m_resources; 1309 macp->m_ioctl = vsw_m_ioctl; 1310 1311 macp->m_port = 0; 1312 macp->m_dip = vswp->dip; 1313 macp->m_ident = MAC_IDENT; 1314 macp->m_driver = vswp; 1315 1316 vswp->if_macp = macp; 1317 1318 /* register */ 1319 rv = mac_register(macp); 1320 1321 D1(vswp, "%s: exit", __func__); 1322 1323 return (rv); 1324 } 1325 1326 static int 1327 vsw_mac_unregister(vsw_t *vswp) 1328 { 1329 int rv = 0; 1330 1331 D1(vswp, "%s: enter", __func__); 1332 1333 WRITE_ENTER(&vswp->if_lockrw); 1334 1335 if (vswp->if_macp != NULL) { 1336 rv = mac_unregister(vswp->if_macp); 1337 if (rv != 0) { 1338 DWARN(vswp, "%s: unable to unregister from MAC " 1339 "framework", __func__); 1340 1341 RW_EXIT(&vswp->if_lockrw); 1342 D1(vswp, "%s: fail exit", __func__); 1343 return (rv); 1344 } 1345 1346 /* mark i/f as down and promisc off */ 1347 vswp->if_state &= ~VSW_IF_UP; 1348 1349 kmem_free(vswp->if_macp, sizeof (mac_t)); 1350 vswp->if_macp = NULL; 1351 } 1352 RW_EXIT(&vswp->if_lockrw); 1353 1354 D1(vswp, "%s: exit", __func__); 1355 1356 return (rv); 1357 } 1358 1359 static uint64_t 1360 vsw_m_stat(void *arg, enum mac_stat stat) 1361 { 1362 vsw_t *vswp = (vsw_t *)arg; 1363 const mac_info_t *mip; 1364 1365 D1(vswp, "%s: enter", __func__); 1366 1367 if (vswp->mh != NULL) 1368 mip = mac_info(vswp->mh); 1369 else 1370 return (0); 1371 1372 if (!mip->mi_stat[stat]) 1373 return (0); 1374 1375 /* return stats from underlying device */ 1376 return (mac_stat_get(vswp->mh, stat)); 1377 1378 } 1379 1380 static void 1381 vsw_m_stop(void *arg) 1382 { 1383 vsw_t *vswp = (vsw_t *)arg; 1384 1385 D1(vswp, "%s: enter", __func__); 1386 1387 WRITE_ENTER(&vswp->if_lockrw); 1388 vswp->if_state &= ~VSW_IF_UP; 1389 RW_EXIT(&vswp->if_lockrw); 1390 1391 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 1392 } 1393 1394 static int 1395 vsw_m_start(void *arg) 1396 { 1397 vsw_t *vswp = (vsw_t *)arg; 1398 1399 D1(vswp, "%s: enter", __func__); 1400 1401 WRITE_ENTER(&vswp->if_lockrw); 1402 vswp->if_state |= VSW_IF_UP; 1403 RW_EXIT(&vswp->if_lockrw); 1404 1405 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 1406 return (0); 1407 } 1408 1409 /* 1410 * Change the local interface address. 1411 */ 1412 static int 1413 vsw_m_unicst(void *arg, const uint8_t *macaddr) 1414 { 1415 vsw_t *vswp = (vsw_t *)arg; 1416 1417 D1(vswp, "%s: enter", __func__); 1418 1419 WRITE_ENTER(&vswp->if_lockrw); 1420 ether_copy(macaddr, &vswp->if_addr); 1421 RW_EXIT(&vswp->if_lockrw); 1422 1423 D1(vswp, "%s: exit", __func__); 1424 1425 return (0); 1426 } 1427 1428 static int 1429 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca) 1430 { 1431 vsw_t *vswp = (vsw_t *)arg; 1432 mcst_addr_t *mcst_p = NULL; 1433 uint64_t addr = 0x0; 1434 int i; 1435 1436 D1(vswp, "%s: enter", __func__); 1437 1438 /* 1439 * Convert address into form that can be used 1440 * as hash table key. 1441 */ 1442 for (i = 0; i < ETHERADDRL; i++) { 1443 addr = (addr << 8) | mca[i]; 1444 } 1445 1446 D2(vswp, "%s: addr = 0x%llx", __func__, addr); 1447 1448 if (add) { 1449 D2(vswp, "%s: adding multicast", __func__); 1450 if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 1451 /* 1452 * Update the list of multicast addresses 1453 * contained within the vsw_t structure to 1454 * include this new one. 1455 */ 1456 mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP); 1457 if (mcst_p == NULL) { 1458 DERR(vswp, "%s unable to alloc mem", __func__); 1459 return (1); 1460 } 1461 mcst_p->addr = addr; 1462 1463 mutex_enter(&vswp->mca_lock); 1464 mcst_p->nextp = vswp->mcap; 1465 vswp->mcap = mcst_p; 1466 mutex_exit(&vswp->mca_lock); 1467 1468 /* 1469 * Call into the underlying driver to program the 1470 * address into HW. 1471 * 1472 * Note: 1473 * Can safely ignore the return value as the card 1474 * will for the moment always be in promisc mode. 1475 * When we can program multiple MAC addresses into the 1476 * HW then we will need to care about the return 1477 * value here. 1478 */ 1479 if (vswp->mh != NULL) 1480 (void) mac_multicst_add(vswp->mh, mca); 1481 } 1482 } else { 1483 D2(vswp, "%s: removing multicast", __func__); 1484 /* 1485 * Remove the address from the hash table.. 1486 */ 1487 if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 1488 1489 /* 1490 * ..and then from the list maintained in the 1491 * vsw_t structure. 1492 */ 1493 vsw_del_addr(VSW_LOCALDEV, vswp, addr); 1494 1495 if (vswp->mh != NULL) 1496 (void) mac_multicst_remove(vswp->mh, mca); 1497 } 1498 } 1499 1500 D1(vswp, "%s: exit", __func__); 1501 1502 return (0); 1503 } 1504 1505 static int 1506 vsw_m_promisc(void *arg, boolean_t on) 1507 { 1508 vsw_t *vswp = (vsw_t *)arg; 1509 1510 D1(vswp, "%s: enter", __func__); 1511 1512 WRITE_ENTER(&vswp->if_lockrw); 1513 if (on) 1514 vswp->if_state |= VSW_IF_PROMISC; 1515 else 1516 vswp->if_state &= ~VSW_IF_PROMISC; 1517 RW_EXIT(&vswp->if_lockrw); 1518 1519 D1(vswp, "%s: exit", __func__); 1520 1521 return (0); 1522 } 1523 1524 static mblk_t * 1525 vsw_m_tx(void *arg, mblk_t *mp) 1526 { 1527 vsw_t *vswp = (vsw_t *)arg; 1528 1529 D1(vswp, "%s: enter", __func__); 1530 1531 vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL); 1532 1533 D1(vswp, "%s: exit", __func__); 1534 1535 return (NULL); 1536 } 1537 1538 static void 1539 vsw_m_resources(void *arg) 1540 { 1541 vsw_t *vswp = (vsw_t *)arg; 1542 mac_rx_fifo_t mrf; 1543 1544 D1(vswp, "%s: enter", __func__); 1545 1546 mrf.mrf_type = MAC_RX_FIFO; 1547 mrf.mrf_blank = NULL; 1548 mrf.mrf_arg = (void *)vswp; 1549 mrf.mrf_normal_blank_time = 0; 1550 mrf.mrf_normal_pkt_count = 0; 1551 1552 WRITE_ENTER(&vswp->if_lockrw); 1553 vswp->if_mrh = mac_resource_add(vswp->if_macp, (mac_resource_t *)&mrf); 1554 RW_EXIT(&vswp->if_lockrw); 1555 1556 D1(vswp, "%s: exit", __func__); 1557 } 1558 1559 static void 1560 vsw_m_ioctl(void *arg, queue_t *q, mblk_t *mp) 1561 { 1562 vsw_t *vswp = (vsw_t *)arg; 1563 1564 D1(vswp, "%s: enter", __func__); 1565 1566 miocnak(q, mp, 0, ENOTSUP); 1567 1568 D1(vswp, "%s: exit", __func__); 1569 } 1570 1571 /* 1572 * Register for machine description (MD) updates. 1573 */ 1574 static void 1575 vsw_mdeg_register(vsw_t *vswp) 1576 { 1577 mdeg_prop_spec_t *pspecp; 1578 mdeg_node_spec_t *inst_specp; 1579 mdeg_handle_t mdeg_hdl; 1580 size_t templatesz; 1581 int inst, rv; 1582 1583 D1(vswp, "%s: enter", __func__); 1584 1585 inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip, 1586 DDI_PROP_DONTPASS, reg_propname, -1); 1587 if (inst == -1) { 1588 DERR(vswp, "%s: unable to get %s property", 1589 __func__, reg_propname); 1590 return; 1591 } 1592 1593 D2(vswp, "%s: instance %d registering with mdeg", __func__, inst); 1594 1595 /* 1596 * Allocate and initialize a per-instance copy 1597 * of the global property spec array that will 1598 * uniquely identify this vsw instance. 1599 */ 1600 templatesz = sizeof (vsw_prop_template); 1601 pspecp = kmem_zalloc(templatesz, KM_SLEEP); 1602 1603 bcopy(vsw_prop_template, pspecp, templatesz); 1604 1605 VSW_SET_MDEG_PROP_INST(pspecp, inst); 1606 1607 /* initialize the complete prop spec structure */ 1608 inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP); 1609 inst_specp->namep = "virtual-device"; 1610 inst_specp->specp = pspecp; 1611 1612 /* perform the registration */ 1613 rv = mdeg_register(inst_specp, &vport_match, vsw_mdeg_cb, 1614 (void *)vswp, &mdeg_hdl); 1615 1616 if (rv != MDEG_SUCCESS) { 1617 DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv); 1618 kmem_free(inst_specp, sizeof (mdeg_node_spec_t)); 1619 kmem_free(pspecp, templatesz); 1620 return; 1621 } 1622 1623 /* save off data that will be needed later */ 1624 vswp->inst_spec = inst_specp; 1625 vswp->mdeg_hdl = mdeg_hdl; 1626 1627 D1(vswp, "%s: exit", __func__); 1628 } 1629 1630 static void 1631 vsw_mdeg_unregister(vsw_t *vswp) 1632 { 1633 D1(vswp, "vsw_mdeg_unregister: enter"); 1634 1635 (void) mdeg_unregister(vswp->mdeg_hdl); 1636 1637 if (vswp->inst_spec->specp != NULL) { 1638 (void) kmem_free(vswp->inst_spec->specp, 1639 sizeof (vsw_prop_template)); 1640 vswp->inst_spec->specp = NULL; 1641 } 1642 1643 if (vswp->inst_spec != NULL) { 1644 (void) kmem_free(vswp->inst_spec, 1645 sizeof (mdeg_node_spec_t)); 1646 vswp->inst_spec = NULL; 1647 } 1648 1649 D1(vswp, "vsw_mdeg_unregister: exit"); 1650 } 1651 1652 static int 1653 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 1654 { 1655 vsw_t *vswp; 1656 int idx; 1657 md_t *mdp; 1658 mde_cookie_t node; 1659 uint64_t inst; 1660 1661 if (resp == NULL) 1662 return (MDEG_FAILURE); 1663 1664 vswp = (vsw_t *)cb_argp; 1665 1666 D1(vswp, "%s: added %d : removed %d : matched %d", 1667 __func__, resp->added.nelem, resp->removed.nelem, 1668 resp->match_prev.nelem); 1669 1670 /* process added ports */ 1671 for (idx = 0; idx < resp->added.nelem; idx++) { 1672 mdp = resp->added.mdp; 1673 node = resp->added.mdep[idx]; 1674 1675 D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node); 1676 1677 if (vsw_port_add(vswp, mdp, &node) != 0) { 1678 cmn_err(CE_WARN, "Unable to add new port (0x%lx)", 1679 node); 1680 } 1681 } 1682 1683 /* process removed ports */ 1684 for (idx = 0; idx < resp->removed.nelem; idx++) { 1685 mdp = resp->removed.mdp; 1686 node = resp->removed.mdep[idx]; 1687 1688 if (md_get_prop_val(mdp, node, id_propname, &inst)) { 1689 DERR(vswp, "%s: prop(%s) not found port(%d)", 1690 __func__, id_propname, idx); 1691 continue; 1692 } 1693 1694 D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node); 1695 1696 if (vsw_port_detach(vswp, inst) != 0) { 1697 cmn_err(CE_WARN, "Unable to remove port %ld", inst); 1698 } 1699 } 1700 1701 /* 1702 * Currently no support for updating already active ports. 1703 * So, ignore the match_curr and match_priv arrays for now. 1704 */ 1705 1706 D1(vswp, "%s: exit", __func__); 1707 1708 return (MDEG_SUCCESS); 1709 } 1710 1711 /* 1712 * Add a new port to the system. 1713 * 1714 * Returns 0 on success, 1 on failure. 1715 */ 1716 int 1717 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node) 1718 { 1719 uint64_t ldc_id; 1720 uint8_t *addrp; 1721 int i, addrsz; 1722 int num_nodes = 0, nchan = 0; 1723 int listsz = 0; 1724 mde_cookie_t *listp = NULL; 1725 struct ether_addr ea; 1726 uint64_t macaddr; 1727 uint64_t inst = 0; 1728 vsw_port_t *port; 1729 1730 if (md_get_prop_val(mdp, *node, id_propname, &inst)) { 1731 DWARN(vswp, "%s: prop(%s) not found", __func__, 1732 id_propname); 1733 return (1); 1734 } 1735 1736 /* 1737 * Find the channel endpoint node(s) (which should be under this 1738 * port node) which contain the channel id(s). 1739 */ 1740 if ((num_nodes = md_node_count(mdp)) <= 0) { 1741 DERR(vswp, "%s: invalid number of nodes found (%d)", 1742 __func__, num_nodes); 1743 return (1); 1744 } 1745 1746 /* allocate enough space for node list */ 1747 listsz = num_nodes * sizeof (mde_cookie_t); 1748 listp = kmem_zalloc(listsz, KM_SLEEP); 1749 1750 nchan = md_scan_dag(mdp, *node, 1751 md_find_name(mdp, chan_propname), 1752 md_find_name(mdp, "fwd"), listp); 1753 1754 if (nchan <= 0) { 1755 DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname); 1756 kmem_free(listp, listsz); 1757 return (1); 1758 } 1759 1760 D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname); 1761 1762 /* use property from first node found */ 1763 if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) { 1764 DWARN(vswp, "%s: prop(%s) not found\n", __func__, 1765 id_propname); 1766 kmem_free(listp, listsz); 1767 return (1); 1768 } 1769 1770 /* don't need list any more */ 1771 kmem_free(listp, listsz); 1772 1773 D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id); 1774 1775 /* read mac-address property */ 1776 if (md_get_prop_data(mdp, *node, remaddr_propname, 1777 &addrp, &addrsz)) { 1778 DWARN(vswp, "%s: prop(%s) not found", 1779 __func__, remaddr_propname); 1780 return (1); 1781 } 1782 1783 if (addrsz < ETHERADDRL) { 1784 DWARN(vswp, "%s: invalid address size", __func__); 1785 return (1); 1786 } 1787 1788 macaddr = *((uint64_t *)addrp); 1789 D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr); 1790 1791 for (i = ETHERADDRL - 1; i >= 0; i--) { 1792 ea.ether_addr_octet[i] = macaddr & 0xFF; 1793 macaddr >>= 8; 1794 } 1795 1796 if (vsw_port_attach(vswp, (int)inst, &ldc_id, 1, &ea) != 0) { 1797 DERR(vswp, "%s: failed to attach port", __func__); 1798 return (1); 1799 } 1800 1801 port = vsw_lookup_port(vswp, (int)inst); 1802 1803 /* just successfuly created the port, so it should exist */ 1804 ASSERT(port != NULL); 1805 1806 return (0); 1807 } 1808 1809 /* 1810 * Attach the specified port. 1811 * 1812 * Returns 0 on success, 1 on failure. 1813 */ 1814 static int 1815 vsw_port_attach(vsw_t *vswp, int p_instance, uint64_t *ldcids, int nids, 1816 struct ether_addr *macaddr) 1817 { 1818 vsw_port_list_t *plist = &vswp->plist; 1819 vsw_port_t *port, **prev_port; 1820 int i; 1821 1822 D1(vswp, "%s: enter : port %d", __func__, p_instance); 1823 1824 /* port already exists? */ 1825 READ_ENTER(&plist->lockrw); 1826 for (port = plist->head; port != NULL; port = port->p_next) { 1827 if (port->p_instance == p_instance) { 1828 DWARN(vswp, "%s: port instance %d already attached", 1829 __func__, p_instance); 1830 RW_EXIT(&plist->lockrw); 1831 return (1); 1832 } 1833 } 1834 RW_EXIT(&plist->lockrw); 1835 1836 port = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP); 1837 port->p_vswp = vswp; 1838 port->p_instance = p_instance; 1839 port->p_ldclist.num_ldcs = 0; 1840 port->p_ldclist.head = NULL; 1841 1842 rw_init(&port->p_ldclist.lockrw, NULL, RW_DRIVER, NULL); 1843 1844 mutex_init(&port->tx_lock, NULL, MUTEX_DRIVER, NULL); 1845 mutex_init(&port->mca_lock, NULL, MUTEX_DRIVER, NULL); 1846 1847 mutex_init(&port->ref_lock, NULL, MUTEX_DRIVER, NULL); 1848 cv_init(&port->ref_cv, NULL, CV_DRIVER, NULL); 1849 1850 mutex_init(&port->state_lock, NULL, MUTEX_DRIVER, NULL); 1851 cv_init(&port->state_cv, NULL, CV_DRIVER, NULL); 1852 port->state = VSW_PORT_INIT; 1853 1854 if (nids > VSW_PORT_MAX_LDCS) { 1855 D2(vswp, "%s: using first of %d ldc ids", 1856 __func__, nids); 1857 nids = VSW_PORT_MAX_LDCS; 1858 } 1859 1860 D2(vswp, "%s: %d nids", __func__, nids); 1861 for (i = 0; i < nids; i++) { 1862 D2(vswp, "%s: ldcid (%llx)", __func__, (uint64_t)ldcids[i]); 1863 if (vsw_ldc_attach(port, (uint64_t)ldcids[i]) != 0) { 1864 DERR(vswp, "%s: ldc_attach failed", __func__); 1865 1866 rw_destroy(&port->p_ldclist.lockrw); 1867 1868 cv_destroy(&port->ref_cv); 1869 mutex_destroy(&port->ref_lock); 1870 1871 cv_destroy(&port->state_cv); 1872 mutex_destroy(&port->state_lock); 1873 1874 mutex_destroy(&port->tx_lock); 1875 mutex_destroy(&port->mca_lock); 1876 kmem_free(port, sizeof (vsw_port_t)); 1877 return (1); 1878 } 1879 } 1880 1881 ether_copy(macaddr, &port->p_macaddr); 1882 1883 WRITE_ENTER(&plist->lockrw); 1884 1885 /* create the fdb entry for this port/mac address */ 1886 (void) vsw_add_fdb(vswp, port); 1887 1888 /* link it into the list of ports for this vsw instance */ 1889 prev_port = (vsw_port_t **)(&plist->head); 1890 port->p_next = *prev_port; 1891 *prev_port = port; 1892 plist->num_ports++; 1893 RW_EXIT(&plist->lockrw); 1894 1895 /* 1896 * Initialise the port and any ldc's under it. 1897 */ 1898 (void) vsw_init_ldcs(port); 1899 1900 D1(vswp, "%s: exit", __func__); 1901 return (0); 1902 } 1903 1904 /* 1905 * Detach the specified port. 1906 * 1907 * Returns 0 on success, 1 on failure. 1908 */ 1909 static int 1910 vsw_port_detach(vsw_t *vswp, int p_instance) 1911 { 1912 vsw_port_t *port = NULL; 1913 vsw_port_list_t *plist = &vswp->plist; 1914 1915 D1(vswp, "%s: enter: port id %d", __func__, p_instance); 1916 1917 WRITE_ENTER(&plist->lockrw); 1918 1919 if ((port = vsw_lookup_port(vswp, p_instance)) == NULL) { 1920 RW_EXIT(&plist->lockrw); 1921 return (1); 1922 } 1923 1924 if (vsw_plist_del_node(vswp, port)) { 1925 RW_EXIT(&plist->lockrw); 1926 return (1); 1927 } 1928 1929 /* Remove the fdb entry for this port/mac address */ 1930 (void) vsw_del_fdb(vswp, port); 1931 1932 /* Remove any multicast addresses.. */ 1933 vsw_del_mcst_port(port); 1934 1935 /* 1936 * No longer need to hold lock on port list now that we 1937 * have unlinked the target port from the list. 1938 */ 1939 RW_EXIT(&plist->lockrw); 1940 1941 if (vsw_port_delete(port)) { 1942 return (1); 1943 } 1944 1945 D1(vswp, "%s: exit: p_instance(%d)", __func__, p_instance); 1946 return (0); 1947 } 1948 1949 /* 1950 * Detach all active ports. 1951 * 1952 * Returns 0 on success, 1 on failure. 1953 */ 1954 static int 1955 vsw_detach_ports(vsw_t *vswp) 1956 { 1957 vsw_port_list_t *plist = &vswp->plist; 1958 vsw_port_t *port = NULL; 1959 1960 D1(vswp, "%s: enter", __func__); 1961 1962 WRITE_ENTER(&plist->lockrw); 1963 1964 while ((port = plist->head) != NULL) { 1965 if (vsw_plist_del_node(vswp, port)) { 1966 DERR(vswp, "%s: Error deleting port %d" 1967 " from port list", __func__, 1968 port->p_instance); 1969 RW_EXIT(&plist->lockrw); 1970 return (1); 1971 } 1972 1973 /* Remove the fdb entry for this port/mac address */ 1974 (void) vsw_del_fdb(vswp, port); 1975 1976 /* Remove any multicast addresses.. */ 1977 vsw_del_mcst_port(port); 1978 1979 /* 1980 * No longer need to hold the lock on the port list 1981 * now that we have unlinked the target port from the 1982 * list. 1983 */ 1984 RW_EXIT(&plist->lockrw); 1985 if (vsw_port_delete(port)) { 1986 DERR(vswp, "%s: Error deleting port %d", 1987 __func__, port->p_instance); 1988 return (1); 1989 } 1990 WRITE_ENTER(&plist->lockrw); 1991 } 1992 RW_EXIT(&plist->lockrw); 1993 1994 D1(vswp, "%s: exit", __func__); 1995 1996 return (0); 1997 } 1998 1999 /* 2000 * Delete the specified port. 2001 * 2002 * Returns 0 on success, 1 on failure. 2003 */ 2004 static int 2005 vsw_port_delete(vsw_port_t *port) 2006 { 2007 vsw_ldc_list_t *ldcl; 2008 vsw_t *vswp = port->p_vswp; 2009 2010 D1(vswp, "%s: enter : port id %d", __func__, port->p_instance); 2011 2012 (void) vsw_uninit_ldcs(port); 2013 2014 /* 2015 * Wait for any pending ctrl msg tasks which reference this 2016 * port to finish. 2017 */ 2018 if (vsw_drain_port_taskq(port)) 2019 return (1); 2020 2021 /* 2022 * Wait for port reference count to hit zero. 2023 */ 2024 mutex_enter(&port->ref_lock); 2025 while (port->ref_cnt != 0) 2026 cv_wait(&port->ref_cv, &port->ref_lock); 2027 mutex_exit(&port->ref_lock); 2028 2029 /* 2030 * Wait for any active callbacks to finish 2031 */ 2032 if (vsw_drain_ldcs(port)) 2033 return (1); 2034 2035 ldcl = &port->p_ldclist; 2036 WRITE_ENTER(&ldcl->lockrw); 2037 while (ldcl->num_ldcs > 0) { 2038 if (vsw_ldc_detach(port, ldcl->head->ldc_id) != 0) {; 2039 cmn_err(CE_WARN, "unable to detach ldc %ld", 2040 ldcl->head->ldc_id); 2041 RW_EXIT(&ldcl->lockrw); 2042 return (1); 2043 } 2044 } 2045 RW_EXIT(&ldcl->lockrw); 2046 2047 rw_destroy(&port->p_ldclist.lockrw); 2048 2049 mutex_destroy(&port->mca_lock); 2050 mutex_destroy(&port->tx_lock); 2051 cv_destroy(&port->ref_cv); 2052 mutex_destroy(&port->ref_lock); 2053 2054 cv_destroy(&port->state_cv); 2055 mutex_destroy(&port->state_lock); 2056 2057 kmem_free(port, sizeof (vsw_port_t)); 2058 2059 D1(vswp, "%s: exit", __func__); 2060 2061 return (0); 2062 } 2063 2064 /* 2065 * Attach a logical domain channel (ldc) under a specified port. 2066 * 2067 * Returns 0 on success, 1 on failure. 2068 */ 2069 static int 2070 vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id) 2071 { 2072 vsw_t *vswp = port->p_vswp; 2073 vsw_ldc_list_t *ldcl = &port->p_ldclist; 2074 vsw_ldc_t *ldcp = NULL; 2075 ldc_attr_t attr; 2076 ldc_status_t istatus; 2077 int status = DDI_FAILURE; 2078 2079 D1(vswp, "%s: enter", __func__); 2080 2081 ldcp = kmem_zalloc(sizeof (vsw_ldc_t), KM_NOSLEEP); 2082 if (ldcp == NULL) { 2083 DERR(vswp, "%s: kmem_zalloc failed", __func__); 2084 return (1); 2085 } 2086 ldcp->ldc_id = ldc_id; 2087 2088 mutex_init(&ldcp->ldc_txlock, NULL, MUTEX_DRIVER, NULL); 2089 mutex_init(&ldcp->ldc_cblock, NULL, MUTEX_DRIVER, NULL); 2090 mutex_init(&ldcp->drain_cv_lock, NULL, MUTEX_DRIVER, NULL); 2091 cv_init(&ldcp->drain_cv, NULL, CV_DRIVER, NULL); 2092 2093 /* required for handshake with peer */ 2094 ldcp->local_session = (uint64_t)ddi_get_lbolt(); 2095 ldcp->peer_session = 0; 2096 ldcp->session_status = 0; 2097 2098 mutex_init(&ldcp->hss_lock, NULL, MUTEX_DRIVER, NULL); 2099 ldcp->hss_id = 1; /* Initial handshake session id */ 2100 2101 /* only set for outbound lane, inbound set by peer */ 2102 vsw_set_lane_attr(vswp, &ldcp->lane_out); 2103 2104 attr.devclass = LDC_DEV_NT_SVC; 2105 attr.instance = ddi_get_instance(vswp->dip); 2106 attr.mode = LDC_MODE_UNRELIABLE; 2107 attr.qlen = VSW_LDC_QLEN; 2108 status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle); 2109 if (status != 0) { 2110 DERR(vswp, "%s(%lld): ldc_init failed, rv (%d)", 2111 __func__, ldc_id, status); 2112 mutex_destroy(&ldcp->ldc_txlock); 2113 mutex_destroy(&ldcp->ldc_cblock); 2114 cv_destroy(&ldcp->drain_cv); 2115 mutex_destroy(&ldcp->drain_cv_lock); 2116 mutex_destroy(&ldcp->hss_lock); 2117 kmem_free(ldcp, sizeof (vsw_ldc_t)); 2118 return (1); 2119 } 2120 2121 status = ldc_reg_callback(ldcp->ldc_handle, vsw_ldc_cb, (caddr_t)ldcp); 2122 if (status != 0) { 2123 DERR(vswp, "%s(%lld): ldc_reg_callback failed, rv (%d)", 2124 __func__, ldc_id, status); 2125 mutex_destroy(&ldcp->ldc_txlock); 2126 mutex_destroy(&ldcp->ldc_cblock); 2127 cv_destroy(&ldcp->drain_cv); 2128 mutex_destroy(&ldcp->drain_cv_lock); 2129 mutex_destroy(&ldcp->hss_lock); 2130 (void) ldc_fini(ldcp->ldc_handle); 2131 kmem_free(ldcp, sizeof (vsw_ldc_t)); 2132 return (1); 2133 } 2134 2135 2136 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 2137 DERR(vswp, "%s: ldc_status failed", __func__); 2138 return (1); 2139 } 2140 2141 ldcp->ldc_status = istatus; 2142 ldcp->ldc_port = port; 2143 ldcp->ldc_vswp = vswp; 2144 2145 /* link it into the list of channels for this port */ 2146 WRITE_ENTER(&ldcl->lockrw); 2147 ldcp->ldc_next = ldcl->head; 2148 ldcl->head = ldcp; 2149 ldcl->num_ldcs++; 2150 RW_EXIT(&ldcl->lockrw); 2151 2152 D1(vswp, "%s: exit", __func__); 2153 return (0); 2154 } 2155 2156 /* 2157 * Detach a logical domain channel (ldc) belonging to a 2158 * particular port. 2159 * 2160 * Returns 0 on success, 1 on failure. 2161 */ 2162 static int 2163 vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id) 2164 { 2165 vsw_t *vswp = port->p_vswp; 2166 vsw_ldc_t *ldcp, *prev_ldcp; 2167 vsw_ldc_list_t *ldcl = &port->p_ldclist; 2168 int rv; 2169 2170 prev_ldcp = ldcl->head; 2171 for (; (ldcp = prev_ldcp) != NULL; prev_ldcp = ldcp->ldc_next) { 2172 if (ldcp->ldc_id == ldc_id) { 2173 break; 2174 } 2175 } 2176 2177 /* specified ldc id not found */ 2178 if (ldcp == NULL) { 2179 DERR(vswp, "%s: ldcp = NULL", __func__); 2180 return (1); 2181 } 2182 2183 D2(vswp, "%s: detaching channel %lld", __func__, ldcp->ldc_id); 2184 2185 /* 2186 * Before we can close the channel we must release any mapped 2187 * resources (e.g. drings). 2188 */ 2189 vsw_free_lane_resources(ldcp, INBOUND); 2190 vsw_free_lane_resources(ldcp, OUTBOUND); 2191 2192 /* 2193 * If the close fails we are in serious trouble, as won't 2194 * be able to delete the parent port. 2195 */ 2196 if ((rv = ldc_close(ldcp->ldc_handle)) != 0) { 2197 DERR(vswp, "%s: error %d closing channel %lld", 2198 __func__, rv, ldcp->ldc_id); 2199 return (1); 2200 } 2201 2202 (void) ldc_fini(ldcp->ldc_handle); 2203 2204 ldcp->ldc_status = LDC_INIT; 2205 ldcp->ldc_handle = NULL; 2206 ldcp->ldc_vswp = NULL; 2207 mutex_destroy(&ldcp->ldc_txlock); 2208 mutex_destroy(&ldcp->ldc_cblock); 2209 cv_destroy(&ldcp->drain_cv); 2210 mutex_destroy(&ldcp->drain_cv_lock); 2211 mutex_destroy(&ldcp->hss_lock); 2212 2213 /* unlink it from the list */ 2214 prev_ldcp = ldcp->ldc_next; 2215 ldcl->num_ldcs--; 2216 kmem_free(ldcp, sizeof (vsw_ldc_t)); 2217 2218 return (0); 2219 } 2220 2221 /* 2222 * Open and attempt to bring up the channel. Note that channel 2223 * can only be brought up if peer has also opened channel. 2224 * 2225 * Returns 0 if can open and bring up channel, otherwise 2226 * returns 1. 2227 */ 2228 static int 2229 vsw_ldc_init(vsw_ldc_t *ldcp) 2230 { 2231 vsw_t *vswp = ldcp->ldc_vswp; 2232 ldc_status_t istatus = 0; 2233 int rv; 2234 2235 D1(vswp, "%s: enter", __func__); 2236 2237 LDC_ENTER_LOCK(ldcp); 2238 2239 /* don't start at 0 in case clients don't like that */ 2240 ldcp->next_ident = 1; 2241 2242 rv = ldc_open(ldcp->ldc_handle); 2243 if (rv != 0) { 2244 DERR(vswp, "%s: ldc_open failed: id(%lld) rv(%d)", 2245 __func__, ldcp->ldc_id, rv); 2246 LDC_EXIT_LOCK(ldcp); 2247 return (1); 2248 } 2249 2250 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 2251 DERR(vswp, "%s: unable to get status", __func__); 2252 LDC_EXIT_LOCK(ldcp); 2253 return (1); 2254 2255 } else if (istatus != LDC_OPEN && istatus != LDC_READY) { 2256 DERR(vswp, "%s: id (%lld) status(%d) is not OPEN/READY", 2257 __func__, ldcp->ldc_id, istatus); 2258 LDC_EXIT_LOCK(ldcp); 2259 return (1); 2260 } 2261 2262 ldcp->ldc_status = istatus; 2263 rv = ldc_up(ldcp->ldc_handle); 2264 if (rv != 0) { 2265 /* 2266 * Not a fatal error for ldc_up() to fail, as peer 2267 * end point may simply not be ready yet. 2268 */ 2269 D2(vswp, "%s: ldc_up err id(%lld) rv(%d)", __func__, 2270 ldcp->ldc_id, rv); 2271 LDC_EXIT_LOCK(ldcp); 2272 return (1); 2273 } 2274 2275 /* 2276 * ldc_up() call is non-blocking so need to explicitly 2277 * check channel status to see if in fact the channel 2278 * is UP. 2279 */ 2280 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 2281 DERR(vswp, "%s: unable to get status", __func__); 2282 LDC_EXIT_LOCK(ldcp); 2283 return (1); 2284 2285 } else if (istatus != LDC_UP) { 2286 DERR(vswp, "%s: id(%lld) status(%d) is not UP", 2287 __func__, ldcp->ldc_id, istatus); 2288 } else { 2289 ldcp->ldc_status = istatus; 2290 } 2291 2292 LDC_EXIT_LOCK(ldcp); 2293 2294 D1(vswp, "%s: exit", __func__); 2295 return (0); 2296 } 2297 2298 /* disable callbacks on the channel */ 2299 static int 2300 vsw_ldc_uninit(vsw_ldc_t *ldcp) 2301 { 2302 vsw_t *vswp = ldcp->ldc_vswp; 2303 int rv; 2304 2305 D1(vswp, "vsw_ldc_uninit: enter: id(%lx)\n", ldcp->ldc_id); 2306 2307 LDC_ENTER_LOCK(ldcp); 2308 2309 rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE); 2310 if (rv != 0) { 2311 DERR(vswp, "vsw_ldc_uninit(%lld): error disabling " 2312 "interrupts (rv = %d)\n", ldcp->ldc_id, rv); 2313 LDC_EXIT_LOCK(ldcp); 2314 return (1); 2315 } 2316 2317 ldcp->ldc_status = LDC_INIT; 2318 2319 LDC_EXIT_LOCK(ldcp); 2320 2321 D1(vswp, "vsw_ldc_uninit: exit: id(%lx)", ldcp->ldc_id); 2322 2323 return (0); 2324 } 2325 2326 static int 2327 vsw_init_ldcs(vsw_port_t *port) 2328 { 2329 vsw_ldc_list_t *ldcl = &port->p_ldclist; 2330 vsw_ldc_t *ldcp; 2331 2332 READ_ENTER(&ldcl->lockrw); 2333 ldcp = ldcl->head; 2334 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 2335 (void) vsw_ldc_init(ldcp); 2336 } 2337 RW_EXIT(&ldcl->lockrw); 2338 2339 return (0); 2340 } 2341 2342 static int 2343 vsw_uninit_ldcs(vsw_port_t *port) 2344 { 2345 vsw_ldc_list_t *ldcl = &port->p_ldclist; 2346 vsw_ldc_t *ldcp; 2347 2348 D1(NULL, "vsw_uninit_ldcs: enter\n"); 2349 2350 READ_ENTER(&ldcl->lockrw); 2351 ldcp = ldcl->head; 2352 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 2353 (void) vsw_ldc_uninit(ldcp); 2354 } 2355 RW_EXIT(&ldcl->lockrw); 2356 2357 D1(NULL, "vsw_uninit_ldcs: exit\n"); 2358 2359 return (0); 2360 } 2361 2362 /* 2363 * Wait until the callback(s) associated with the ldcs under the specified 2364 * port have completed. 2365 * 2366 * Prior to this function being invoked each channel under this port 2367 * should have been quiesced via ldc_set_cb_mode(DISABLE). 2368 * 2369 * A short explaination of what we are doing below.. 2370 * 2371 * The simplest approach would be to have a reference counter in 2372 * the ldc structure which is increment/decremented by the callbacks as 2373 * they use the channel. The drain function could then simply disable any 2374 * further callbacks and do a cv_wait for the ref to hit zero. Unfortunately 2375 * there is a tiny window here - before the callback is able to get the lock 2376 * on the channel it is interrupted and this function gets to execute. It 2377 * sees that the ref count is zero and believes its free to delete the 2378 * associated data structures. 2379 * 2380 * We get around this by taking advantage of the fact that before the ldc 2381 * framework invokes a callback it sets a flag to indicate that there is a 2382 * callback active (or about to become active). If when we attempt to 2383 * unregister a callback when this active flag is set then the unregister 2384 * will fail with EWOULDBLOCK. 2385 * 2386 * If the unregister fails we do a cv_timedwait. We will either be signaled 2387 * by the callback as it is exiting (note we have to wait a short period to 2388 * allow the callback to return fully to the ldc framework and it to clear 2389 * the active flag), or by the timer expiring. In either case we again attempt 2390 * the unregister. We repeat this until we can succesfully unregister the 2391 * callback. 2392 * 2393 * The reason we use a cv_timedwait rather than a simple cv_wait is to catch 2394 * the case where the callback has finished but the ldc framework has not yet 2395 * cleared the active flag. In this case we would never get a cv_signal. 2396 */ 2397 static int 2398 vsw_drain_ldcs(vsw_port_t *port) 2399 { 2400 vsw_ldc_list_t *ldcl = &port->p_ldclist; 2401 vsw_ldc_t *ldcp; 2402 vsw_t *vswp = port->p_vswp; 2403 2404 D1(vswp, "%s: enter", __func__); 2405 2406 READ_ENTER(&ldcl->lockrw); 2407 2408 ldcp = ldcl->head; 2409 2410 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 2411 /* 2412 * If we can unregister the channel callback then we 2413 * know that there is no callback either running or 2414 * scheduled to run for this channel so move on to next 2415 * channel in the list. 2416 */ 2417 mutex_enter(&ldcp->drain_cv_lock); 2418 2419 /* prompt active callbacks to quit */ 2420 ldcp->drain_state = VSW_LDC_DRAINING; 2421 2422 if ((ldc_unreg_callback(ldcp->ldc_handle)) == 0) { 2423 D2(vswp, "%s: unreg callback for chan %ld", __func__, 2424 ldcp->ldc_id); 2425 mutex_exit(&ldcp->drain_cv_lock); 2426 continue; 2427 } else { 2428 /* 2429 * If we end up here we know that either 1) a callback 2430 * is currently executing, 2) is about to start (i.e. 2431 * the ldc framework has set the active flag but 2432 * has not actually invoked the callback yet, or 3) 2433 * has finished and has returned to the ldc framework 2434 * but the ldc framework has not yet cleared the 2435 * active bit. 2436 * 2437 * Wait for it to finish. 2438 */ 2439 while (ldc_unreg_callback(ldcp->ldc_handle) 2440 == EWOULDBLOCK) 2441 (void) cv_timedwait(&ldcp->drain_cv, 2442 &ldcp->drain_cv_lock, lbolt + hz); 2443 2444 mutex_exit(&ldcp->drain_cv_lock); 2445 D2(vswp, "%s: unreg callback for chan %ld after " 2446 "timeout", __func__, ldcp->ldc_id); 2447 } 2448 } 2449 RW_EXIT(&ldcl->lockrw); 2450 2451 D1(vswp, "%s: exit", __func__); 2452 return (0); 2453 } 2454 2455 /* 2456 * Wait until all tasks which reference this port have completed. 2457 * 2458 * Prior to this function being invoked each channel under this port 2459 * should have been quiesced via ldc_set_cb_mode(DISABLE). 2460 */ 2461 static int 2462 vsw_drain_port_taskq(vsw_port_t *port) 2463 { 2464 vsw_t *vswp = port->p_vswp; 2465 2466 D1(vswp, "%s: enter", __func__); 2467 2468 /* 2469 * Mark the port as in the process of being detached, and 2470 * dispatch a marker task to the queue so we know when all 2471 * relevant tasks have completed. 2472 */ 2473 mutex_enter(&port->state_lock); 2474 port->state = VSW_PORT_DETACHING; 2475 2476 if ((vswp->taskq_p == NULL) || 2477 (ddi_taskq_dispatch(vswp->taskq_p, vsw_marker_task, 2478 port, DDI_NOSLEEP) != DDI_SUCCESS)) { 2479 DERR(vswp, "%s: unable to dispatch marker task", 2480 __func__); 2481 mutex_exit(&port->state_lock); 2482 return (1); 2483 } 2484 2485 /* 2486 * Wait for the marker task to finish. 2487 */ 2488 while (port->state != VSW_PORT_DETACHABLE) 2489 cv_wait(&port->state_cv, &port->state_lock); 2490 2491 mutex_exit(&port->state_lock); 2492 2493 D1(vswp, "%s: exit", __func__); 2494 2495 return (0); 2496 } 2497 2498 static void 2499 vsw_marker_task(void *arg) 2500 { 2501 vsw_port_t *port = arg; 2502 vsw_t *vswp = port->p_vswp; 2503 2504 D1(vswp, "%s: enter", __func__); 2505 2506 mutex_enter(&port->state_lock); 2507 2508 /* 2509 * No further tasks should be dispatched which reference 2510 * this port so ok to mark it as safe to detach. 2511 */ 2512 port->state = VSW_PORT_DETACHABLE; 2513 2514 cv_signal(&port->state_cv); 2515 2516 mutex_exit(&port->state_lock); 2517 2518 D1(vswp, "%s: exit", __func__); 2519 } 2520 2521 static vsw_port_t * 2522 vsw_lookup_port(vsw_t *vswp, int p_instance) 2523 { 2524 vsw_port_list_t *plist = &vswp->plist; 2525 vsw_port_t *port; 2526 2527 for (port = plist->head; port != NULL; port = port->p_next) { 2528 if (port->p_instance == p_instance) { 2529 D2(vswp, "vsw_lookup_port: found p_instance\n"); 2530 return (port); 2531 } 2532 } 2533 2534 return (NULL); 2535 } 2536 2537 /* 2538 * Search for and remove the specified port from the port 2539 * list. Returns 0 if able to locate and remove port, otherwise 2540 * returns 1. 2541 */ 2542 static int 2543 vsw_plist_del_node(vsw_t *vswp, vsw_port_t *port) 2544 { 2545 vsw_port_list_t *plist = &vswp->plist; 2546 vsw_port_t *curr_p, *prev_p; 2547 2548 if (plist->head == NULL) 2549 return (1); 2550 2551 curr_p = prev_p = plist->head; 2552 2553 while (curr_p != NULL) { 2554 if (curr_p == port) { 2555 if (prev_p == curr_p) { 2556 plist->head = curr_p->p_next; 2557 } else { 2558 prev_p->p_next = curr_p->p_next; 2559 } 2560 plist->num_ports--; 2561 break; 2562 } else { 2563 prev_p = curr_p; 2564 curr_p = curr_p->p_next; 2565 } 2566 } 2567 return (0); 2568 } 2569 2570 /* 2571 * Interrupt handler for ldc messages. 2572 */ 2573 static uint_t 2574 vsw_ldc_cb(uint64_t event, caddr_t arg) 2575 { 2576 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 2577 vsw_t *vswp = ldcp->ldc_vswp; 2578 ldc_status_t lstatus; 2579 int rv; 2580 2581 D1(vswp, "%s: enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 2582 2583 mutex_enter(&ldcp->ldc_cblock); 2584 2585 if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) { 2586 mutex_exit(&ldcp->ldc_cblock); 2587 return (LDC_SUCCESS); 2588 } 2589 2590 if (event & LDC_EVT_UP) { 2591 /* 2592 * Channel has come up, get the state and then start 2593 * the handshake. 2594 */ 2595 rv = ldc_status(ldcp->ldc_handle, &lstatus); 2596 if (rv != 0) { 2597 cmn_err(CE_WARN, "Unable to read channel state"); 2598 } 2599 ldcp->ldc_status = lstatus; 2600 2601 D2(vswp, "%s: id(%ld) event(%llx) UP: status(%ld)", 2602 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 2603 2604 vsw_restart_handshake(ldcp); 2605 2606 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 2607 } 2608 2609 if (event & LDC_EVT_READ) { 2610 /* 2611 * Data available for reading. 2612 */ 2613 D2(vswp, "%s: id(ld) event(%llx) data READ", 2614 __func__, ldcp->ldc_id, event); 2615 2616 vsw_process_pkt(ldcp); 2617 2618 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 2619 2620 goto vsw_cb_exit; 2621 } 2622 2623 if (event & LDC_EVT_RESET) { 2624 rv = ldc_status(ldcp->ldc_handle, &lstatus); 2625 if (rv != 0) { 2626 cmn_err(CE_WARN, "Unable to read channel state"); 2627 } else { 2628 ldcp->ldc_status = lstatus; 2629 } 2630 D2(vswp, "%s: id(%ld) event(%llx) RESET: status (%ld)", 2631 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 2632 } 2633 2634 if (event & LDC_EVT_DOWN) { 2635 rv = ldc_status(ldcp->ldc_handle, &lstatus); 2636 if (rv != 0) { 2637 cmn_err(CE_WARN, "Unable to read channel state"); 2638 } else { 2639 ldcp->ldc_status = lstatus; 2640 } 2641 2642 D2(vswp, "%s: id(%ld) event(%llx) DOWN: status (%ld)", 2643 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 2644 2645 } 2646 2647 /* 2648 * Catch either LDC_EVT_WRITE which we don't support or any 2649 * unknown event. 2650 */ 2651 if (event & ~(LDC_EVT_UP | LDC_EVT_RESET 2652 | LDC_EVT_DOWN | LDC_EVT_READ)) { 2653 2654 DERR(vswp, "%s: id(%ld) Unexpected event=(%llx) status(%ld)", 2655 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 2656 } 2657 2658 vsw_cb_exit: 2659 mutex_exit(&ldcp->ldc_cblock); 2660 2661 /* 2662 * Let the drain function know we are finishing if it 2663 * is waiting. 2664 */ 2665 mutex_enter(&ldcp->drain_cv_lock); 2666 if (ldcp->drain_state == VSW_LDC_DRAINING) 2667 cv_signal(&ldcp->drain_cv); 2668 mutex_exit(&ldcp->drain_cv_lock); 2669 2670 return (LDC_SUCCESS); 2671 } 2672 2673 /* 2674 * (Re)start a handshake with our peer by sending them 2675 * our version info. 2676 */ 2677 static void 2678 vsw_restart_handshake(vsw_ldc_t *ldcp) 2679 { 2680 vsw_t *vswp = ldcp->ldc_vswp; 2681 vsw_port_t *port; 2682 vsw_ldc_list_t *ldcl; 2683 2684 D1(vswp, "vsw_restart_handshake: enter"); 2685 2686 port = ldcp->ldc_port; 2687 ldcl = &port->p_ldclist; 2688 2689 WRITE_ENTER(&ldcl->lockrw); 2690 2691 D2(vswp, "%s: in 0x%llx : out 0x%llx", __func__, 2692 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 2693 2694 vsw_free_lane_resources(ldcp, INBOUND); 2695 vsw_free_lane_resources(ldcp, OUTBOUND); 2696 RW_EXIT(&ldcl->lockrw); 2697 2698 ldcp->lane_in.lstate = 0; 2699 ldcp->lane_out.lstate = 0; 2700 2701 /* 2702 * Remove parent port from any multicast groups 2703 * it may have registered with. Client must resend 2704 * multicast add command after handshake completes. 2705 */ 2706 (void) vsw_del_fdb(vswp, port); 2707 2708 vsw_del_mcst_port(port); 2709 2710 ldcp->hphase = VSW_MILESTONE0; 2711 2712 ldcp->peer_session = 0; 2713 ldcp->session_status = 0; 2714 2715 /* 2716 * We now increment the transaction group id. This allows 2717 * us to identify and disard any tasks which are still pending 2718 * on the taskq and refer to the handshake session we are about 2719 * to restart. These stale messages no longer have any real 2720 * meaning. 2721 */ 2722 mutex_enter(&ldcp->hss_lock); 2723 ldcp->hss_id++; 2724 mutex_exit(&ldcp->hss_lock); 2725 2726 if (ldcp->hcnt++ > vsw_num_handshakes) { 2727 cmn_err(CE_WARN, "exceeded number of permitted " 2728 "handshake attempts (%d) on channel %ld", 2729 ldcp->hcnt, ldcp->ldc_id); 2730 return; 2731 } 2732 2733 vsw_send_ver(ldcp); 2734 2735 D1(vswp, "vsw_restart_handshake: exit"); 2736 } 2737 2738 /* 2739 * returns 0 if legal for event signified by flag to have 2740 * occured at the time it did. Otherwise returns 1. 2741 */ 2742 int 2743 vsw_check_flag(vsw_ldc_t *ldcp, int dir, uint64_t flag) 2744 { 2745 vsw_t *vswp = ldcp->ldc_vswp; 2746 uint64_t state; 2747 uint64_t phase; 2748 2749 if (dir == INBOUND) 2750 state = ldcp->lane_in.lstate; 2751 else 2752 state = ldcp->lane_out.lstate; 2753 2754 phase = ldcp->hphase; 2755 2756 switch (flag) { 2757 case VSW_VER_INFO_RECV: 2758 if (phase > VSW_MILESTONE0) { 2759 DERR(vswp, "vsw_check_flag (%d): VER_INFO_RECV" 2760 " when in state %d\n", ldcp->ldc_id, phase); 2761 vsw_restart_handshake(ldcp); 2762 return (1); 2763 } 2764 break; 2765 2766 case VSW_VER_ACK_RECV: 2767 case VSW_VER_NACK_RECV: 2768 if (!(state & VSW_VER_INFO_SENT)) { 2769 DERR(vswp, "vsw_check_flag (%d): spurious VER_ACK" 2770 " or VER_NACK when in state %d\n", 2771 ldcp->ldc_id, phase); 2772 vsw_restart_handshake(ldcp); 2773 return (1); 2774 } else 2775 state &= ~VSW_VER_INFO_SENT; 2776 break; 2777 2778 case VSW_ATTR_INFO_RECV: 2779 if ((phase < VSW_MILESTONE1) || (phase >= VSW_MILESTONE2)) { 2780 DERR(vswp, "vsw_check_flag (%d): ATTR_INFO_RECV" 2781 " when in state %d\n", ldcp->ldc_id, phase); 2782 vsw_restart_handshake(ldcp); 2783 return (1); 2784 } 2785 break; 2786 2787 case VSW_ATTR_ACK_RECV: 2788 case VSW_ATTR_NACK_RECV: 2789 if (!(state & VSW_ATTR_INFO_SENT)) { 2790 DERR(vswp, "vsw_check_flag (%d): spurious ATTR_ACK" 2791 " or ATTR_NACK when in state %d\n", 2792 ldcp->ldc_id, phase); 2793 vsw_restart_handshake(ldcp); 2794 return (1); 2795 } else 2796 state &= ~VSW_ATTR_INFO_SENT; 2797 break; 2798 2799 case VSW_DRING_INFO_RECV: 2800 if (phase < VSW_MILESTONE1) { 2801 DERR(vswp, "vsw_check_flag (%d): DRING_INFO_RECV" 2802 " when in state %d\n", ldcp->ldc_id, phase); 2803 vsw_restart_handshake(ldcp); 2804 return (1); 2805 } 2806 break; 2807 2808 case VSW_DRING_ACK_RECV: 2809 case VSW_DRING_NACK_RECV: 2810 if (!(state & VSW_DRING_INFO_SENT)) { 2811 DERR(vswp, "vsw_check_flag (%d): spurious DRING_ACK" 2812 " or DRING_NACK when in state %d\n", 2813 ldcp->ldc_id, phase); 2814 vsw_restart_handshake(ldcp); 2815 return (1); 2816 } else 2817 state &= ~VSW_DRING_INFO_SENT; 2818 break; 2819 2820 case VSW_RDX_INFO_RECV: 2821 if (phase < VSW_MILESTONE3) { 2822 DERR(vswp, "vsw_check_flag (%d): RDX_INFO_RECV" 2823 " when in state %d\n", ldcp->ldc_id, phase); 2824 vsw_restart_handshake(ldcp); 2825 return (1); 2826 } 2827 break; 2828 2829 case VSW_RDX_ACK_RECV: 2830 case VSW_RDX_NACK_RECV: 2831 if (!(state & VSW_RDX_INFO_SENT)) { 2832 DERR(vswp, "vsw_check_flag (%d): spurious RDX_ACK" 2833 " or RDX_NACK when in state %d\n", 2834 ldcp->ldc_id, phase); 2835 vsw_restart_handshake(ldcp); 2836 return (1); 2837 } else 2838 state &= ~VSW_RDX_INFO_SENT; 2839 break; 2840 2841 case VSW_MCST_INFO_RECV: 2842 if (phase < VSW_MILESTONE3) { 2843 DERR(vswp, "vsw_check_flag (%d): VSW_MCST_INFO_RECV" 2844 " when in state %d\n", ldcp->ldc_id, phase); 2845 vsw_restart_handshake(ldcp); 2846 return (1); 2847 } 2848 break; 2849 2850 default: 2851 DERR(vswp, "vsw_check_flag (%lld): unknown flag (%llx)", 2852 ldcp->ldc_id, flag); 2853 return (1); 2854 } 2855 2856 if (dir == INBOUND) 2857 ldcp->lane_in.lstate = state; 2858 else 2859 ldcp->lane_out.lstate = state; 2860 2861 D1(vswp, "vsw_check_flag (chan %lld): exit", ldcp->ldc_id); 2862 2863 return (0); 2864 } 2865 2866 void 2867 vsw_next_milestone(vsw_ldc_t *ldcp) 2868 { 2869 vsw_t *vswp = ldcp->ldc_vswp; 2870 2871 D1(vswp, "%s (chan %lld): enter (phase %ld)", __func__, 2872 ldcp->ldc_id, ldcp->hphase); 2873 2874 DUMP_FLAGS(ldcp->lane_in.lstate); 2875 DUMP_FLAGS(ldcp->lane_out.lstate); 2876 2877 switch (ldcp->hphase) { 2878 2879 case VSW_MILESTONE0: 2880 /* 2881 * If we haven't started to handshake with our peer, 2882 * start to do so now. 2883 */ 2884 if (ldcp->lane_out.lstate == 0) { 2885 D2(vswp, "%s: (chan %lld) starting handshake " 2886 "with peer", __func__, ldcp->ldc_id); 2887 vsw_restart_handshake(ldcp); 2888 } 2889 2890 /* 2891 * Only way to pass this milestone is to have successfully 2892 * negotiated version info. 2893 */ 2894 if ((ldcp->lane_in.lstate & VSW_VER_ACK_SENT) && 2895 (ldcp->lane_out.lstate & VSW_VER_ACK_RECV)) { 2896 2897 D2(vswp, "%s: (chan %lld) leaving milestone 0", 2898 __func__, ldcp->ldc_id); 2899 2900 /* 2901 * Next milestone is passed when attribute 2902 * information has been successfully exchanged. 2903 */ 2904 ldcp->hphase = VSW_MILESTONE1; 2905 vsw_send_attr(ldcp); 2906 2907 } 2908 break; 2909 2910 case VSW_MILESTONE1: 2911 /* 2912 * Only way to pass this milestone is to have successfully 2913 * negotiated attribute information. 2914 */ 2915 if (ldcp->lane_in.lstate & VSW_ATTR_ACK_SENT) { 2916 2917 ldcp->hphase = VSW_MILESTONE2; 2918 2919 /* 2920 * If the peer device has said it wishes to 2921 * use descriptor rings then we send it our ring 2922 * info, otherwise we just set up a private ring 2923 * which we use an internal buffer 2924 */ 2925 if (ldcp->lane_in.xfer_mode == VIO_DRING_MODE) 2926 vsw_send_dring_info(ldcp); 2927 } 2928 break; 2929 2930 2931 case VSW_MILESTONE2: 2932 /* 2933 * If peer has indicated in its attribute message that 2934 * it wishes to use descriptor rings then the only way 2935 * to pass this milestone is for us to have received 2936 * valid dring info. 2937 * 2938 * If peer is not using descriptor rings then just fall 2939 * through. 2940 */ 2941 if ((ldcp->lane_in.xfer_mode == VIO_DRING_MODE) && 2942 (!(ldcp->lane_in.lstate & VSW_DRING_ACK_SENT))) 2943 break; 2944 2945 D2(vswp, "%s: (chan %lld) leaving milestone 2", 2946 __func__, ldcp->ldc_id); 2947 2948 ldcp->hphase = VSW_MILESTONE3; 2949 vsw_send_rdx(ldcp); 2950 break; 2951 2952 case VSW_MILESTONE3: 2953 /* 2954 * Pass this milestone when all paramaters have been 2955 * successfully exchanged and RDX sent in both directions. 2956 * 2957 * Mark outbound lane as available to transmit data. 2958 */ 2959 if ((ldcp->lane_in.lstate & VSW_RDX_ACK_SENT) && 2960 (ldcp->lane_out.lstate & VSW_RDX_ACK_RECV)) { 2961 2962 D2(vswp, "%s: (chan %lld) leaving milestone 3", 2963 __func__, ldcp->ldc_id); 2964 D2(vswp, "%s: ** handshake complete **", __func__); 2965 ldcp->lane_out.lstate |= VSW_LANE_ACTIVE; 2966 ldcp->hphase = VSW_MILESTONE4; 2967 ldcp->hcnt = 0; 2968 DISPLAY_STATE(); 2969 } 2970 break; 2971 2972 case VSW_MILESTONE4: 2973 D2(vswp, "%s: (chan %lld) in milestone 4", __func__, 2974 ldcp->ldc_id); 2975 break; 2976 2977 default: 2978 DERR(vswp, "%s: (chan %lld) Unknown Phase %x", __func__, 2979 ldcp->ldc_id, ldcp->hphase); 2980 } 2981 2982 D1(vswp, "%s (chan %lld): exit (phase %ld)", __func__, ldcp->ldc_id, 2983 ldcp->hphase); 2984 } 2985 2986 /* 2987 * Check if major version is supported. 2988 * 2989 * Returns 0 if finds supported major number, and if necessary 2990 * adjusts the minor field. 2991 * 2992 * Returns 1 if can't match major number exactly. Sets mjor/minor 2993 * to next lowest support values, or to zero if no other values possible. 2994 */ 2995 static int 2996 vsw_supported_version(vio_ver_msg_t *vp) 2997 { 2998 int i; 2999 3000 D1(NULL, "vsw_supported_version: enter"); 3001 3002 for (i = 0; i < VSW_NUM_VER; i++) { 3003 if (vsw_versions[i].ver_major == vp->ver_major) { 3004 /* 3005 * Matching or lower major version found. Update 3006 * minor number if necessary. 3007 */ 3008 if (vp->ver_minor > vsw_versions[i].ver_minor) { 3009 D2(NULL, "%s: adjusting minor value" 3010 " from %d to %d", __func__, 3011 vp->ver_minor, 3012 vsw_versions[i].ver_minor); 3013 vp->ver_minor = vsw_versions[i].ver_minor; 3014 } 3015 3016 return (0); 3017 } 3018 3019 if (vsw_versions[i].ver_major < vp->ver_major) { 3020 if (vp->ver_minor > vsw_versions[i].ver_minor) { 3021 D2(NULL, "%s: adjusting minor value" 3022 " from %d to %d", __func__, 3023 vp->ver_minor, 3024 vsw_versions[i].ver_minor); 3025 vp->ver_minor = vsw_versions[i].ver_minor; 3026 } 3027 return (1); 3028 } 3029 } 3030 3031 /* No match was possible, zero out fields */ 3032 vp->ver_major = 0; 3033 vp->ver_minor = 0; 3034 3035 D1(NULL, "vsw_supported_version: exit"); 3036 3037 return (1); 3038 } 3039 3040 /* 3041 * Main routine for processing messages received over LDC. 3042 */ 3043 static void 3044 vsw_process_pkt(void *arg) 3045 { 3046 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 3047 vsw_t *vswp = ldcp->ldc_vswp; 3048 size_t msglen; 3049 vio_msg_tag_t tag; 3050 def_msg_t dmsg; 3051 int rv = 0; 3052 3053 D1(vswp, "%s enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 3054 3055 /* 3056 * If channel is up read messages until channel is empty. 3057 */ 3058 do { 3059 msglen = sizeof (dmsg); 3060 rv = ldc_read(ldcp->ldc_handle, (caddr_t)&dmsg, &msglen); 3061 3062 if (rv != 0) { 3063 DERR(vswp, "%s :ldc_read err id(%lld) rv(%d) " 3064 "len(%d)\n", __func__, ldcp->ldc_id, 3065 rv, msglen); 3066 break; 3067 } 3068 3069 if (msglen == 0) { 3070 D2(vswp, "%s: ldc_read id(%lld) NODATA", __func__, 3071 ldcp->ldc_id); 3072 break; 3073 } 3074 3075 D2(vswp, "%s: ldc_read id(%lld): msglen(%d)", __func__, 3076 ldcp->ldc_id, msglen); 3077 3078 /* 3079 * Figure out what sort of packet we have gotten by 3080 * examining the msg tag, and then switch it appropriately. 3081 */ 3082 bcopy(&dmsg, &tag, sizeof (vio_msg_tag_t)); 3083 3084 switch (tag.vio_msgtype) { 3085 case VIO_TYPE_CTRL: 3086 vsw_dispatch_ctrl_task(ldcp, &dmsg, tag); 3087 break; 3088 case VIO_TYPE_DATA: 3089 vsw_process_data_pkt(ldcp, &dmsg, tag); 3090 break; 3091 case VIO_TYPE_ERR: 3092 vsw_process_err_pkt(ldcp, &dmsg, tag); 3093 break; 3094 default: 3095 DERR(vswp, "%s: Unknown tag(%lx) ", __func__, 3096 "id(%lx)\n", tag.vio_msgtype, ldcp->ldc_id); 3097 break; 3098 } 3099 } while (msglen); 3100 3101 D1(vswp, "%s exit: ldcid (%lld)\n", __func__, ldcp->ldc_id); 3102 } 3103 3104 /* 3105 * Dispatch a task to process a VIO control message. 3106 */ 3107 static void 3108 vsw_dispatch_ctrl_task(vsw_ldc_t *ldcp, void *cpkt, vio_msg_tag_t tag) 3109 { 3110 vsw_ctrl_task_t *ctaskp = NULL; 3111 vsw_port_t *port = ldcp->ldc_port; 3112 vsw_t *vswp = port->p_vswp; 3113 3114 D1(vswp, "%s: enter", __func__); 3115 3116 /* 3117 * We need to handle RDX ACK messages in-band as once they 3118 * are exchanged it is possible that we will get an 3119 * immediate (legitimate) data packet. 3120 */ 3121 if ((tag.vio_subtype_env == VIO_RDX) && 3122 (tag.vio_subtype == VIO_SUBTYPE_ACK)) { 3123 if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_ACK_RECV)) 3124 return; 3125 3126 ldcp->lane_out.lstate |= VSW_RDX_ACK_RECV; 3127 vsw_next_milestone(ldcp); 3128 D2(vswp, "%s (%ld) handling RDX_ACK in place", __func__, 3129 ldcp->ldc_id); 3130 return; 3131 } 3132 3133 ctaskp = kmem_alloc(sizeof (vsw_ctrl_task_t), KM_NOSLEEP); 3134 3135 if (ctaskp == NULL) { 3136 DERR(vswp, "%s: unable to alloc space for ctrl" 3137 " msg", __func__); 3138 vsw_restart_handshake(ldcp); 3139 return; 3140 } 3141 3142 ctaskp->ldcp = ldcp; 3143 bcopy((def_msg_t *)cpkt, &ctaskp->pktp, sizeof (def_msg_t)); 3144 mutex_enter(&ldcp->hss_lock); 3145 ctaskp->hss_id = ldcp->hss_id; 3146 mutex_exit(&ldcp->hss_lock); 3147 3148 /* 3149 * Dispatch task to processing taskq if port is not in 3150 * the process of being detached. 3151 */ 3152 mutex_enter(&port->state_lock); 3153 if (port->state == VSW_PORT_INIT) { 3154 if ((vswp->taskq_p == NULL) || 3155 (ddi_taskq_dispatch(vswp->taskq_p, 3156 vsw_process_ctrl_pkt, ctaskp, DDI_NOSLEEP) 3157 != DDI_SUCCESS)) { 3158 DERR(vswp, "%s: unable to dispatch task to taskq", 3159 __func__); 3160 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 3161 mutex_exit(&port->state_lock); 3162 vsw_restart_handshake(ldcp); 3163 return; 3164 } 3165 } else { 3166 DWARN(vswp, "%s: port %d detaching, not dispatching " 3167 "task", __func__, port->p_instance); 3168 } 3169 3170 mutex_exit(&port->state_lock); 3171 3172 D2(vswp, "%s: dispatched task to taskq for chan %d", __func__, 3173 ldcp->ldc_id); 3174 D1(vswp, "%s: exit", __func__); 3175 } 3176 3177 /* 3178 * Process a VIO ctrl message. Invoked from taskq. 3179 */ 3180 static void 3181 vsw_process_ctrl_pkt(void *arg) 3182 { 3183 vsw_ctrl_task_t *ctaskp = (vsw_ctrl_task_t *)arg; 3184 vsw_ldc_t *ldcp = ctaskp->ldcp; 3185 vsw_t *vswp = ldcp->ldc_vswp; 3186 vio_msg_tag_t tag; 3187 uint16_t env; 3188 3189 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3190 3191 bcopy(&ctaskp->pktp, &tag, sizeof (vio_msg_tag_t)); 3192 env = tag.vio_subtype_env; 3193 3194 /* stale pkt check */ 3195 mutex_enter(&ldcp->hss_lock); 3196 if (ctaskp->hss_id < ldcp->hss_id) { 3197 DWARN(vswp, "%s: discarding stale packet belonging to" 3198 " earlier (%ld) handshake session", __func__, 3199 ctaskp->hss_id); 3200 mutex_exit(&ldcp->hss_lock); 3201 return; 3202 } 3203 mutex_exit(&ldcp->hss_lock); 3204 3205 /* session id check */ 3206 if (ldcp->session_status & VSW_PEER_SESSION) { 3207 if (ldcp->peer_session != tag.vio_sid) { 3208 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 3209 __func__, ldcp->ldc_id, tag.vio_sid); 3210 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 3211 vsw_restart_handshake(ldcp); 3212 return; 3213 } 3214 } 3215 3216 /* 3217 * Switch on vio_subtype envelope, then let lower routines 3218 * decide if its an INFO, ACK or NACK packet. 3219 */ 3220 switch (env) { 3221 case VIO_VER_INFO: 3222 vsw_process_ctrl_ver_pkt(ldcp, &ctaskp->pktp); 3223 break; 3224 case VIO_DRING_REG: 3225 vsw_process_ctrl_dring_reg_pkt(ldcp, &ctaskp->pktp); 3226 break; 3227 case VIO_DRING_UNREG: 3228 vsw_process_ctrl_dring_unreg_pkt(ldcp, &ctaskp->pktp); 3229 break; 3230 case VIO_ATTR_INFO: 3231 vsw_process_ctrl_attr_pkt(ldcp, &ctaskp->pktp); 3232 break; 3233 case VNET_MCAST_INFO: 3234 vsw_process_ctrl_mcst_pkt(ldcp, &ctaskp->pktp); 3235 break; 3236 case VIO_RDX: 3237 vsw_process_ctrl_rdx_pkt(ldcp, &ctaskp->pktp); 3238 break; 3239 default: 3240 DERR(vswp, "%s : unknown vio_subtype_env (%x)\n", 3241 __func__, env); 3242 } 3243 3244 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 3245 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3246 } 3247 3248 /* 3249 * Version negotiation. We can end up here either because our peer 3250 * has responded to a handshake message we have sent it, or our peer 3251 * has initiated a handshake with us. If its the former then can only 3252 * be ACK or NACK, if its the later can only be INFO. 3253 * 3254 * If its an ACK we move to the next stage of the handshake, namely 3255 * attribute exchange. If its a NACK we see if we can specify another 3256 * version, if we can't we stop. 3257 * 3258 * If it is an INFO we reset all params associated with communication 3259 * in that direction over this channel (remember connection is 3260 * essentially 2 independent simplex channels). 3261 */ 3262 void 3263 vsw_process_ctrl_ver_pkt(vsw_ldc_t *ldcp, void *pkt) 3264 { 3265 vio_ver_msg_t *ver_pkt; 3266 vsw_t *vswp = ldcp->ldc_vswp; 3267 3268 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3269 3270 /* 3271 * We know this is a ctrl/version packet so 3272 * cast it into the correct structure. 3273 */ 3274 ver_pkt = (vio_ver_msg_t *)pkt; 3275 3276 switch (ver_pkt->tag.vio_subtype) { 3277 case VIO_SUBTYPE_INFO: 3278 D2(vswp, "vsw_process_ctrl_ver_pkt: VIO_SUBTYPE_INFO\n"); 3279 3280 /* 3281 * Record the session id, which we will use from now 3282 * until we see another VER_INFO msg. Even then the 3283 * session id in most cases will be unchanged, execpt 3284 * if channel was reset. 3285 */ 3286 if ((ldcp->session_status & VSW_PEER_SESSION) && 3287 (ldcp->peer_session != ver_pkt->tag.vio_sid)) { 3288 DERR(vswp, "%s: updating session id for chan %lld " 3289 "from %llx to %llx", __func__, ldcp->ldc_id, 3290 ldcp->peer_session, ver_pkt->tag.vio_sid); 3291 } 3292 3293 ldcp->peer_session = ver_pkt->tag.vio_sid; 3294 ldcp->session_status |= VSW_PEER_SESSION; 3295 3296 /* Legal message at this time ? */ 3297 if (vsw_check_flag(ldcp, INBOUND, VSW_VER_INFO_RECV)) 3298 return; 3299 3300 /* 3301 * First check the device class. Currently only expect 3302 * to be talking to a network device. In the future may 3303 * also talk to another switch. 3304 */ 3305 if (ver_pkt->dev_class != VDEV_NETWORK) { 3306 DERR(vswp, "%s: illegal device class %d", __func__, 3307 ver_pkt->dev_class); 3308 3309 ver_pkt->tag.vio_sid = ldcp->local_session; 3310 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 3311 3312 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 3313 3314 vsw_send_msg(ldcp, (void *)ver_pkt, 3315 sizeof (vio_ver_msg_t)); 3316 3317 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 3318 vsw_next_milestone(ldcp); 3319 return; 3320 } else { 3321 ldcp->dev_class = ver_pkt->dev_class; 3322 } 3323 3324 /* 3325 * Now check the version. 3326 */ 3327 if (vsw_supported_version(ver_pkt) == 0) { 3328 /* 3329 * Support this major version and possibly 3330 * adjusted minor version. 3331 */ 3332 3333 D2(vswp, "%s: accepted ver %d:%d", __func__, 3334 ver_pkt->ver_major, ver_pkt->ver_minor); 3335 3336 /* Store accepted values */ 3337 ldcp->lane_in.ver_major = ver_pkt->ver_major; 3338 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 3339 3340 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3341 3342 ldcp->lane_in.lstate |= VSW_VER_ACK_SENT; 3343 } else { 3344 /* 3345 * NACK back with the next lower major/minor 3346 * pairing we support (if don't suuport any more 3347 * versions then they will be set to zero. 3348 */ 3349 3350 D2(vswp, "%s: replying with ver %d:%d", __func__, 3351 ver_pkt->ver_major, ver_pkt->ver_minor); 3352 3353 /* Store updated values */ 3354 ldcp->lane_in.ver_major = ver_pkt->ver_major; 3355 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 3356 3357 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 3358 3359 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 3360 } 3361 3362 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 3363 ver_pkt->tag.vio_sid = ldcp->local_session; 3364 vsw_send_msg(ldcp, (void *)ver_pkt, sizeof (vio_ver_msg_t)); 3365 3366 vsw_next_milestone(ldcp); 3367 break; 3368 3369 case VIO_SUBTYPE_ACK: 3370 D2(vswp, "%s: VIO_SUBTYPE_ACK\n", __func__); 3371 3372 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_ACK_RECV)) 3373 return; 3374 3375 /* Store updated values */ 3376 ldcp->lane_in.ver_major = ver_pkt->ver_major; 3377 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 3378 3379 3380 ldcp->lane_out.lstate |= VSW_VER_ACK_RECV; 3381 vsw_next_milestone(ldcp); 3382 3383 break; 3384 3385 case VIO_SUBTYPE_NACK: 3386 D2(vswp, "%s: VIO_SUBTYPE_NACK\n", __func__); 3387 3388 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_NACK_RECV)) 3389 return; 3390 3391 /* 3392 * If our peer sent us a NACK with the ver fields set to 3393 * zero then there is nothing more we can do. Otherwise see 3394 * if we support either the version suggested, or a lesser 3395 * one. 3396 */ 3397 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 3398 DERR(vswp, "%s: peer unable to negotiate any " 3399 "further.", __func__); 3400 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 3401 vsw_next_milestone(ldcp); 3402 return; 3403 } 3404 3405 /* 3406 * Check to see if we support this major version or 3407 * a lower one. If we don't then maj/min will be set 3408 * to zero. 3409 */ 3410 (void) vsw_supported_version(ver_pkt); 3411 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 3412 /* Nothing more we can do */ 3413 DERR(vswp, "%s: version negotiation failed.\n", 3414 __func__); 3415 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 3416 vsw_next_milestone(ldcp); 3417 } else { 3418 /* found a supported major version */ 3419 ldcp->lane_out.ver_major = ver_pkt->ver_major; 3420 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 3421 3422 D2(vswp, "%s: resending with updated values (%x, %x)", 3423 __func__, ver_pkt->ver_major, 3424 ver_pkt->ver_minor); 3425 3426 ldcp->lane_out.lstate |= VSW_VER_INFO_SENT; 3427 ver_pkt->tag.vio_sid = ldcp->local_session; 3428 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 3429 3430 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 3431 3432 vsw_send_msg(ldcp, (void *)ver_pkt, 3433 sizeof (vio_ver_msg_t)); 3434 3435 vsw_next_milestone(ldcp); 3436 3437 } 3438 break; 3439 3440 default: 3441 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 3442 ver_pkt->tag.vio_subtype); 3443 } 3444 3445 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 3446 } 3447 3448 /* 3449 * Process an attribute packet. We can end up here either because our peer 3450 * has ACK/NACK'ed back to an earlier ATTR msg we had sent it, or our 3451 * peer has sent us an attribute INFO message 3452 * 3453 * If its an ACK we then move to the next stage of the handshake which 3454 * is to send our descriptor ring info to our peer. If its a NACK then 3455 * there is nothing more we can (currently) do. 3456 * 3457 * If we get a valid/acceptable INFO packet (and we have already negotiated 3458 * a version) we ACK back and set channel state to ATTR_RECV, otherwise we 3459 * NACK back and reset channel state to INACTIV. 3460 * 3461 * FUTURE: in time we will probably negotiate over attributes, but for 3462 * the moment unacceptable attributes are regarded as a fatal error. 3463 * 3464 */ 3465 void 3466 vsw_process_ctrl_attr_pkt(vsw_ldc_t *ldcp, void *pkt) 3467 { 3468 vnet_attr_msg_t *attr_pkt; 3469 vsw_t *vswp = ldcp->ldc_vswp; 3470 vsw_port_t *port = ldcp->ldc_port; 3471 uint64_t macaddr = 0; 3472 int i; 3473 3474 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 3475 3476 /* 3477 * We know this is a ctrl/attr packet so 3478 * cast it into the correct structure. 3479 */ 3480 attr_pkt = (vnet_attr_msg_t *)pkt; 3481 3482 switch (attr_pkt->tag.vio_subtype) { 3483 case VIO_SUBTYPE_INFO: 3484 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3485 3486 if (vsw_check_flag(ldcp, INBOUND, VSW_ATTR_INFO_RECV)) 3487 return; 3488 3489 /* 3490 * If the attributes are unacceptable then we NACK back. 3491 */ 3492 if (vsw_check_attr(attr_pkt, ldcp->ldc_port)) { 3493 3494 DERR(vswp, "%s (chan %d): invalid attributes", 3495 __func__, ldcp->ldc_id); 3496 3497 vsw_free_lane_resources(ldcp, INBOUND); 3498 3499 attr_pkt->tag.vio_sid = ldcp->local_session; 3500 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 3501 3502 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 3503 ldcp->lane_in.lstate |= VSW_ATTR_NACK_SENT; 3504 vsw_send_msg(ldcp, (void *)attr_pkt, 3505 sizeof (vnet_attr_msg_t)); 3506 3507 vsw_next_milestone(ldcp); 3508 return; 3509 } 3510 3511 /* 3512 * Otherwise store attributes for this lane and update 3513 * lane state. 3514 */ 3515 ldcp->lane_in.mtu = attr_pkt->mtu; 3516 ldcp->lane_in.addr = attr_pkt->addr; 3517 ldcp->lane_in.addr_type = attr_pkt->addr_type; 3518 ldcp->lane_in.xfer_mode = attr_pkt->xfer_mode; 3519 ldcp->lane_in.ack_freq = attr_pkt->ack_freq; 3520 3521 macaddr = ldcp->lane_in.addr; 3522 for (i = ETHERADDRL - 1; i >= 0; i--) { 3523 port->p_macaddr.ether_addr_octet[i] = macaddr & 0xFF; 3524 macaddr >>= 8; 3525 } 3526 3527 /* create the fdb entry for this port/mac address */ 3528 (void) vsw_add_fdb(vswp, port); 3529 3530 /* setup device specifc xmit routines */ 3531 mutex_enter(&port->tx_lock); 3532 if (ldcp->lane_in.xfer_mode == VIO_DRING_MODE) { 3533 D2(vswp, "%s: mode = VIO_DRING_MODE", __func__); 3534 port->transmit = vsw_dringsend; 3535 } else if (ldcp->lane_in.xfer_mode == VIO_DESC_MODE) { 3536 D2(vswp, "%s: mode = VIO_DESC_MODE", __func__); 3537 vsw_create_privring(ldcp); 3538 port->transmit = vsw_descrsend; 3539 } 3540 mutex_exit(&port->tx_lock); 3541 3542 attr_pkt->tag.vio_sid = ldcp->local_session; 3543 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3544 3545 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 3546 3547 ldcp->lane_in.lstate |= VSW_ATTR_ACK_SENT; 3548 3549 vsw_send_msg(ldcp, (void *)attr_pkt, 3550 sizeof (vnet_attr_msg_t)); 3551 3552 vsw_next_milestone(ldcp); 3553 break; 3554 3555 case VIO_SUBTYPE_ACK: 3556 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3557 3558 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_ACK_RECV)) 3559 return; 3560 3561 ldcp->lane_out.lstate |= VSW_ATTR_ACK_RECV; 3562 vsw_next_milestone(ldcp); 3563 break; 3564 3565 case VIO_SUBTYPE_NACK: 3566 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3567 3568 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_NACK_RECV)) 3569 return; 3570 3571 ldcp->lane_out.lstate |= VSW_ATTR_NACK_RECV; 3572 vsw_next_milestone(ldcp); 3573 break; 3574 3575 default: 3576 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 3577 attr_pkt->tag.vio_subtype); 3578 } 3579 3580 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3581 } 3582 3583 /* 3584 * Process a dring info packet. We can end up here either because our peer 3585 * has ACK/NACK'ed back to an earlier DRING msg we had sent it, or our 3586 * peer has sent us a dring INFO message. 3587 * 3588 * If we get a valid/acceptable INFO packet (and we have already negotiated 3589 * a version) we ACK back and update the lane state, otherwise we NACK back. 3590 * 3591 * FUTURE: nothing to stop client from sending us info on multiple dring's 3592 * but for the moment we will just use the first one we are given. 3593 * 3594 */ 3595 void 3596 vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *ldcp, void *pkt) 3597 { 3598 vio_dring_reg_msg_t *dring_pkt; 3599 vsw_t *vswp = ldcp->ldc_vswp; 3600 ldc_mem_info_t minfo; 3601 dring_info_t *dp, *dbp; 3602 int dring_found = 0; 3603 3604 /* 3605 * We know this is a ctrl/dring packet so 3606 * cast it into the correct structure. 3607 */ 3608 dring_pkt = (vio_dring_reg_msg_t *)pkt; 3609 3610 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 3611 3612 switch (dring_pkt->tag.vio_subtype) { 3613 case VIO_SUBTYPE_INFO: 3614 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3615 3616 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 3617 return; 3618 3619 /* 3620 * If the dring params are unacceptable then we NACK back. 3621 */ 3622 if (vsw_check_dring_info(dring_pkt)) { 3623 3624 DERR(vswp, "%s (%lld): invalid dring info", 3625 __func__, ldcp->ldc_id); 3626 3627 vsw_free_lane_resources(ldcp, INBOUND); 3628 3629 dring_pkt->tag.vio_sid = ldcp->local_session; 3630 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 3631 3632 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 3633 3634 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 3635 3636 vsw_send_msg(ldcp, (void *)dring_pkt, 3637 sizeof (vio_dring_reg_msg_t)); 3638 3639 vsw_next_milestone(ldcp); 3640 return; 3641 } 3642 3643 /* 3644 * Otherwise, attempt to map in the dring using the 3645 * cookie. If that succeeds we send back a unique dring 3646 * identifier that the sending side will use in future 3647 * to refer to this descriptor ring. 3648 */ 3649 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 3650 3651 dp->num_descriptors = dring_pkt->num_descriptors; 3652 dp->descriptor_size = dring_pkt->descriptor_size; 3653 dp->options = dring_pkt->options; 3654 dp->ncookies = dring_pkt->ncookies; 3655 3656 /* 3657 * Note: should only get one cookie. Enforced in 3658 * the ldc layer. 3659 */ 3660 bcopy(&dring_pkt->cookie[0], &dp->cookie[0], 3661 sizeof (ldc_mem_cookie_t)); 3662 3663 D2(vswp, "%s: num_desc %ld : desc_size %ld", __func__, 3664 dp->num_descriptors, dp->descriptor_size); 3665 D2(vswp, "%s: options 0x%lx: ncookies %ld", __func__, 3666 dp->options, dp->ncookies); 3667 3668 if ((ldc_mem_dring_map(ldcp->ldc_handle, &dp->cookie[0], 3669 dp->ncookies, dp->num_descriptors, 3670 dp->descriptor_size, LDC_SHADOW_MAP, 3671 &(dp->handle))) != 0) { 3672 3673 DERR(vswp, "%s: dring_map failed\n", __func__); 3674 3675 kmem_free(dp, sizeof (dring_info_t)); 3676 vsw_free_lane_resources(ldcp, INBOUND); 3677 3678 dring_pkt->tag.vio_sid = ldcp->local_session; 3679 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 3680 3681 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 3682 3683 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 3684 vsw_send_msg(ldcp, (void *)dring_pkt, 3685 sizeof (vio_dring_reg_msg_t)); 3686 3687 vsw_next_milestone(ldcp); 3688 return; 3689 } 3690 3691 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 3692 3693 DERR(vswp, "%s: dring_addr failed\n", __func__); 3694 3695 kmem_free(dp, sizeof (dring_info_t)); 3696 vsw_free_lane_resources(ldcp, INBOUND); 3697 3698 dring_pkt->tag.vio_sid = ldcp->local_session; 3699 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 3700 3701 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 3702 3703 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 3704 vsw_send_msg(ldcp, (void *)dring_pkt, 3705 sizeof (vio_dring_reg_msg_t)); 3706 3707 vsw_next_milestone(ldcp); 3708 return; 3709 } else { 3710 /* store the address of the pub part of ring */ 3711 dp->pub_addr = minfo.vaddr; 3712 } 3713 3714 /* no private section as we are importing */ 3715 dp->priv_addr = NULL; 3716 3717 /* 3718 * Using simple mono increasing int for ident at 3719 * the moment. 3720 */ 3721 dp->ident = ldcp->next_ident; 3722 ldcp->next_ident++; 3723 3724 dp->end_idx = 0; 3725 dp->next = NULL; 3726 3727 /* 3728 * Link it onto the end of the list of drings 3729 * for this lane. 3730 */ 3731 if (ldcp->lane_in.dringp == NULL) { 3732 D2(vswp, "%s: adding first INBOUND dring", __func__); 3733 ldcp->lane_in.dringp = dp; 3734 } else { 3735 dbp = ldcp->lane_in.dringp; 3736 3737 while (dbp->next != NULL) 3738 dbp = dbp->next; 3739 3740 dbp->next = dp; 3741 } 3742 3743 /* acknowledge it */ 3744 dring_pkt->tag.vio_sid = ldcp->local_session; 3745 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3746 dring_pkt->dring_ident = dp->ident; 3747 3748 vsw_send_msg(ldcp, (void *)dring_pkt, 3749 sizeof (vio_dring_reg_msg_t)); 3750 3751 ldcp->lane_in.lstate |= VSW_DRING_ACK_SENT; 3752 vsw_next_milestone(ldcp); 3753 break; 3754 3755 case VIO_SUBTYPE_ACK: 3756 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3757 3758 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_ACK_RECV)) 3759 return; 3760 3761 /* 3762 * Peer is acknowledging our dring info and will have 3763 * sent us a dring identifier which we will use to 3764 * refer to this ring w.r.t. our peer. 3765 */ 3766 dp = ldcp->lane_out.dringp; 3767 if (dp != NULL) { 3768 /* 3769 * Find the ring this ident should be associated 3770 * with. 3771 */ 3772 if (vsw_dring_match(dp, dring_pkt)) { 3773 dring_found = 1; 3774 3775 } else while (dp != NULL) { 3776 if (vsw_dring_match(dp, dring_pkt)) { 3777 dring_found = 1; 3778 break; 3779 } 3780 dp = dp->next; 3781 } 3782 3783 if (dring_found == 0) { 3784 DERR(NULL, "%s: unrecognised ring cookie", 3785 __func__); 3786 vsw_restart_handshake(ldcp); 3787 return; 3788 } 3789 3790 } else { 3791 DERR(vswp, "%s: DRING ACK received but no drings " 3792 "allocated", __func__); 3793 vsw_restart_handshake(ldcp); 3794 return; 3795 } 3796 3797 /* store ident */ 3798 dp->ident = dring_pkt->dring_ident; 3799 ldcp->lane_out.lstate |= VSW_DRING_ACK_RECV; 3800 vsw_next_milestone(ldcp); 3801 break; 3802 3803 case VIO_SUBTYPE_NACK: 3804 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3805 3806 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_NACK_RECV)) 3807 return; 3808 3809 ldcp->lane_out.lstate |= VSW_DRING_NACK_RECV; 3810 vsw_next_milestone(ldcp); 3811 break; 3812 3813 default: 3814 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3815 dring_pkt->tag.vio_subtype); 3816 } 3817 3818 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3819 } 3820 3821 /* 3822 * Process a request from peer to unregister a dring. 3823 * 3824 * For the moment we just restart the handshake if our 3825 * peer endpoint attempts to unregister a dring. 3826 */ 3827 void 3828 vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *ldcp, void *pkt) 3829 { 3830 vsw_t *vswp = ldcp->ldc_vswp; 3831 vio_dring_unreg_msg_t *dring_pkt; 3832 3833 /* 3834 * We know this is a ctrl/dring packet so 3835 * cast it into the correct structure. 3836 */ 3837 dring_pkt = (vio_dring_unreg_msg_t *)pkt; 3838 3839 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3840 3841 switch (dring_pkt->tag.vio_subtype) { 3842 case VIO_SUBTYPE_INFO: 3843 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3844 3845 DWARN(vswp, "%s: restarting handshake..", __func__); 3846 vsw_restart_handshake(ldcp); 3847 break; 3848 3849 case VIO_SUBTYPE_ACK: 3850 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3851 3852 DWARN(vswp, "%s: restarting handshake..", __func__); 3853 vsw_restart_handshake(ldcp); 3854 break; 3855 3856 case VIO_SUBTYPE_NACK: 3857 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3858 3859 DWARN(vswp, "%s: restarting handshake..", __func__); 3860 vsw_restart_handshake(ldcp); 3861 break; 3862 3863 default: 3864 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3865 dring_pkt->tag.vio_subtype); 3866 vsw_restart_handshake(ldcp); 3867 } 3868 3869 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3870 } 3871 3872 #define SND_MCST_NACK(ldcp, pkt) \ 3873 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 3874 pkt->tag.vio_sid = ldcp->local_session; \ 3875 vsw_send_msg(ldcp, (void *)pkt, sizeof (vnet_mcast_msg_t)); 3876 3877 /* 3878 * Process a multicast request from a vnet. 3879 * 3880 * Vnet's specify a multicast address that they are interested in. This 3881 * address is used as a key into the hash table which forms the multicast 3882 * forwarding database (mFDB). 3883 * 3884 * The table keys are the multicast addresses, while the table entries 3885 * are pointers to lists of ports which wish to receive packets for the 3886 * specified multicast address. 3887 * 3888 * When a multicast packet is being switched we use the address as a key 3889 * into the hash table, and then walk the appropriate port list forwarding 3890 * the pkt to each port in turn. 3891 * 3892 * If a vnet is no longer interested in a particular multicast grouping 3893 * we simply find the correct location in the hash table and then delete 3894 * the relevant port from the port list. 3895 * 3896 * To deal with the case whereby a port is being deleted without first 3897 * removing itself from the lists in the hash table, we maintain a list 3898 * of multicast addresses the port has registered an interest in, within 3899 * the port structure itself. We then simply walk that list of addresses 3900 * using them as keys into the hash table and remove the port from the 3901 * appropriate lists. 3902 */ 3903 static void 3904 vsw_process_ctrl_mcst_pkt(vsw_ldc_t *ldcp, void *pkt) 3905 { 3906 vnet_mcast_msg_t *mcst_pkt; 3907 vsw_port_t *port = ldcp->ldc_port; 3908 vsw_t *vswp = ldcp->ldc_vswp; 3909 int i; 3910 3911 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3912 3913 /* 3914 * We know this is a ctrl/mcast packet so 3915 * cast it into the correct structure. 3916 */ 3917 mcst_pkt = (vnet_mcast_msg_t *)pkt; 3918 3919 switch (mcst_pkt->tag.vio_subtype) { 3920 case VIO_SUBTYPE_INFO: 3921 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3922 3923 /* 3924 * Check if in correct state to receive a multicast 3925 * message (i.e. handshake complete). If not reset 3926 * the handshake. 3927 */ 3928 if (vsw_check_flag(ldcp, INBOUND, VSW_MCST_INFO_RECV)) 3929 return; 3930 3931 /* 3932 * Before attempting to add or remove address check 3933 * that they are valid multicast addresses. 3934 * If not, then NACK back. 3935 */ 3936 for (i = 0; i < mcst_pkt->count; i++) { 3937 if ((mcst_pkt->mca[i].ether_addr_octet[0] & 01) != 1) { 3938 DERR(vswp, "%s: invalid multicast address", 3939 __func__); 3940 SND_MCST_NACK(ldcp, mcst_pkt); 3941 return; 3942 } 3943 } 3944 3945 /* 3946 * Now add/remove the addresses. If this fails we 3947 * NACK back. 3948 */ 3949 if (vsw_add_rem_mcst(mcst_pkt, port) != 0) { 3950 SND_MCST_NACK(ldcp, mcst_pkt); 3951 return; 3952 } 3953 3954 mcst_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3955 mcst_pkt->tag.vio_sid = ldcp->local_session; 3956 3957 DUMP_TAG_PTR((vio_msg_tag_t *)mcst_pkt); 3958 3959 vsw_send_msg(ldcp, (void *)mcst_pkt, 3960 sizeof (vnet_mcast_msg_t)); 3961 break; 3962 3963 case VIO_SUBTYPE_ACK: 3964 DWARN(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3965 3966 /* 3967 * We shouldn't ever get a multicast ACK message as 3968 * at the moment we never request multicast addresses 3969 * to be set on some other device. This may change in 3970 * the future if we have cascading switches. 3971 */ 3972 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_ACK_RECV)) 3973 return; 3974 3975 /* Do nothing */ 3976 break; 3977 3978 case VIO_SUBTYPE_NACK: 3979 DWARN(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3980 3981 /* 3982 * We shouldn't get a multicast NACK packet for the 3983 * same reasons as we shouldn't get a ACK packet. 3984 */ 3985 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_NACK_RECV)) 3986 return; 3987 3988 /* Do nothing */ 3989 break; 3990 3991 default: 3992 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 3993 mcst_pkt->tag.vio_subtype); 3994 } 3995 3996 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3997 } 3998 3999 static void 4000 vsw_process_ctrl_rdx_pkt(vsw_ldc_t *ldcp, void *pkt) 4001 { 4002 vio_rdx_msg_t *rdx_pkt; 4003 vsw_t *vswp = ldcp->ldc_vswp; 4004 4005 /* 4006 * We know this is a ctrl/rdx packet so 4007 * cast it into the correct structure. 4008 */ 4009 rdx_pkt = (vio_rdx_msg_t *)pkt; 4010 4011 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 4012 4013 switch (rdx_pkt->tag.vio_subtype) { 4014 case VIO_SUBTYPE_INFO: 4015 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 4016 4017 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_INFO_RECV)) 4018 return; 4019 4020 rdx_pkt->tag.vio_sid = ldcp->local_session; 4021 rdx_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 4022 4023 DUMP_TAG_PTR((vio_msg_tag_t *)rdx_pkt); 4024 4025 ldcp->lane_in.lstate |= VSW_RDX_ACK_SENT; 4026 4027 vsw_send_msg(ldcp, (void *)rdx_pkt, 4028 sizeof (vio_rdx_msg_t)); 4029 4030 vsw_next_milestone(ldcp); 4031 break; 4032 4033 case VIO_SUBTYPE_ACK: 4034 /* 4035 * Should be handled in-band by callback handler. 4036 */ 4037 DERR(vswp, "%s: Unexpected VIO_SUBTYPE_ACK", __func__); 4038 vsw_restart_handshake(ldcp); 4039 break; 4040 4041 case VIO_SUBTYPE_NACK: 4042 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 4043 4044 if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_NACK_RECV)) 4045 return; 4046 4047 ldcp->lane_out.lstate |= VSW_RDX_NACK_RECV; 4048 vsw_next_milestone(ldcp); 4049 break; 4050 4051 default: 4052 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 4053 rdx_pkt->tag.vio_subtype); 4054 } 4055 4056 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4057 } 4058 4059 static void 4060 vsw_process_data_pkt(vsw_ldc_t *ldcp, void *dpkt, vio_msg_tag_t tag) 4061 { 4062 uint16_t env = tag.vio_subtype_env; 4063 vsw_t *vswp = ldcp->ldc_vswp; 4064 4065 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4066 4067 /* session id check */ 4068 if (ldcp->session_status & VSW_PEER_SESSION) { 4069 if (ldcp->peer_session != tag.vio_sid) { 4070 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 4071 __func__, ldcp->ldc_id, tag.vio_sid); 4072 vsw_restart_handshake(ldcp); 4073 return; 4074 } 4075 } 4076 4077 /* 4078 * It is an error for us to be getting data packets 4079 * before the handshake has completed. 4080 */ 4081 if (ldcp->hphase != VSW_MILESTONE4) { 4082 DERR(vswp, "%s: got data packet before handshake complete " 4083 "hphase %d (%x: %x)", __func__, ldcp->hphase, 4084 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 4085 DUMP_FLAGS(ldcp->lane_in.lstate); 4086 DUMP_FLAGS(ldcp->lane_out.lstate); 4087 vsw_restart_handshake(ldcp); 4088 return; 4089 } 4090 4091 /* 4092 * Switch on vio_subtype envelope, then let lower routines 4093 * decide if its an INFO, ACK or NACK packet. 4094 */ 4095 if (env == VIO_DRING_DATA) { 4096 vsw_process_data_dring_pkt(ldcp, dpkt); 4097 } else if (env == VIO_PKT_DATA) { 4098 vsw_process_data_raw_pkt(ldcp, dpkt); 4099 } else if (env == VIO_DESC_DATA) { 4100 vsw_process_data_ibnd_pkt(ldcp, dpkt); 4101 } else { 4102 DERR(vswp, "%s : unknown vio_subtype_env (%x)\n", 4103 __func__, env); 4104 } 4105 4106 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4107 } 4108 4109 #define SND_DRING_NACK(ldcp, pkt) \ 4110 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 4111 pkt->tag.vio_sid = ldcp->local_session; \ 4112 vsw_send_msg(ldcp, (void *)pkt, sizeof (vio_dring_msg_t)); 4113 4114 static void 4115 vsw_process_data_dring_pkt(vsw_ldc_t *ldcp, void *dpkt) 4116 { 4117 vio_dring_msg_t *dring_pkt; 4118 vnet_public_desc_t *pub_addr = NULL; 4119 vsw_private_desc_t *priv_addr = NULL; 4120 dring_info_t *dp = NULL; 4121 vsw_t *vswp = ldcp->ldc_vswp; 4122 mblk_t *mp = NULL; 4123 mblk_t *bp = NULL; 4124 mblk_t *bpt = NULL; 4125 size_t nbytes = 0; 4126 size_t off = 0; 4127 uint64_t ncookies = 0; 4128 uint64_t chain = 0; 4129 uint64_t j, len, num; 4130 uint32_t start, end, datalen; 4131 int i, last_sync, rv; 4132 boolean_t ack_needed = B_FALSE; 4133 boolean_t sync_needed = B_TRUE; 4134 4135 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4136 4137 /* 4138 * We know this is a data/dring packet so 4139 * cast it into the correct structure. 4140 */ 4141 dring_pkt = (vio_dring_msg_t *)dpkt; 4142 4143 /* 4144 * Switch on the vio_subtype. If its INFO then we need to 4145 * process the data. If its an ACK we need to make sure 4146 * it makes sense (i.e did we send an earlier data/info), 4147 * and if its a NACK then we maybe attempt a retry. 4148 */ 4149 switch (dring_pkt->tag.vio_subtype) { 4150 case VIO_SUBTYPE_INFO: 4151 D2(vswp, "%s(%lld): VIO_SUBTYPE_INFO", __func__, ldcp->ldc_id); 4152 4153 if ((dp = vsw_ident2dring(&ldcp->lane_in, 4154 dring_pkt->dring_ident)) == NULL) { 4155 4156 DERR(vswp, "%s(%lld): unable to find dring from " 4157 "ident 0x%llx", __func__, ldcp->ldc_id, 4158 dring_pkt->dring_ident); 4159 4160 SND_DRING_NACK(ldcp, dring_pkt); 4161 return; 4162 } 4163 4164 start = end = 0; 4165 start = dring_pkt->start_idx; 4166 end = dring_pkt->end_idx; 4167 4168 D3(vswp, "%s(%lld): start index %ld : end %ld\n", 4169 __func__, ldcp->ldc_id, start, end); 4170 4171 /* basic sanity check */ 4172 len = dp->num_descriptors; 4173 if (end > len) { 4174 DERR(vswp, "%s(%lld): endpoint %lld outside ring" 4175 " length %lld", __func__, ldcp->ldc_id, 4176 end, len); 4177 4178 SND_DRING_NACK(ldcp, dring_pkt); 4179 return; 4180 } 4181 4182 /* sync data */ 4183 if ((rv = ldc_mem_dring_acquire(dp->handle, 4184 start, end)) != 0) { 4185 DERR(vswp, "%s(%lld): unable to acquire dring : err %d", 4186 __func__, ldcp->ldc_id, rv); 4187 return; 4188 } 4189 4190 pub_addr = (vnet_public_desc_t *)dp->pub_addr; 4191 4192 j = num = 0; 4193 4194 /* calculate # descriptors taking into a/c wrap around */ 4195 num = end >= start ? end - start + 1: (len - start + 1) + end; 4196 4197 last_sync = start; 4198 4199 for (i = start; j < num; i = (i + 1) % len, j++) { 4200 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 4201 4202 /* 4203 * Data is padded to align on 8 byte boundary, 4204 * datalen is actual data length, i.e. minus that 4205 * padding. 4206 */ 4207 datalen = pub_addr->nbytes; 4208 4209 /* 4210 * Does peer wish us to ACK when we have finished 4211 * with this descriptor ? 4212 */ 4213 if (pub_addr->hdr.ack) 4214 ack_needed = B_TRUE; 4215 4216 D2(vswp, "%s(%lld): processing desc %lld at pos" 4217 " 0x%llx : dstate 0x%lx : datalen 0x%lx", 4218 __func__, ldcp->ldc_id, i, pub_addr, 4219 pub_addr->hdr.dstate, datalen); 4220 4221 /* 4222 * XXXX : Is it a fatal error to be told to 4223 * process a packet when the READY bit is not 4224 * set ? 4225 */ 4226 if (pub_addr->hdr.dstate != VIO_DESC_READY) { 4227 DERR(vswp, "%s(%d): descriptor %lld at pos " 4228 " 0x%llx not READY (0x%lx)", __func__, 4229 ldcp->ldc_id, i, pub_addr, 4230 pub_addr->hdr.dstate); 4231 4232 SND_DRING_NACK(ldcp, dring_pkt); 4233 (void) ldc_mem_dring_release(dp->handle, 4234 start, end); 4235 return; 4236 } 4237 4238 /* 4239 * Mark that we are starting to process descriptor. 4240 */ 4241 pub_addr->hdr.dstate = VIO_DESC_ACCEPTED; 4242 4243 /* 4244 * allocb(9F) returns an aligned data block. We 4245 * need to ensure that we ask ldc for an aligned 4246 * number of bytes also. 4247 */ 4248 nbytes = datalen; 4249 if (nbytes & 0x7) { 4250 off = 8 - (nbytes & 0x7); 4251 nbytes += off; 4252 } 4253 mp = allocb(datalen, BPRI_MED); 4254 if (mp == NULL) { 4255 DERR(vswp, "%s(%lld): allocb failed", 4256 __func__, ldcp->ldc_id); 4257 (void) ldc_mem_dring_release(dp->handle, 4258 start, end); 4259 return; 4260 } 4261 4262 ncookies = pub_addr->ncookies; 4263 rv = ldc_mem_copy(ldcp->ldc_handle, 4264 (caddr_t)mp->b_rptr, 0, &nbytes, 4265 pub_addr->memcookie, ncookies, 4266 LDC_COPY_IN); 4267 4268 if (rv != 0) { 4269 DERR(vswp, "%s(%d): unable to copy in " 4270 "data from %d cookies", __func__, 4271 ldcp->ldc_id, ncookies); 4272 freemsg(mp); 4273 (void) ldc_mem_dring_release(dp->handle, 4274 start, end); 4275 return; 4276 } else { 4277 D2(vswp, "%s(%d): copied in %ld bytes" 4278 " using %d cookies", __func__, 4279 ldcp->ldc_id, nbytes, ncookies); 4280 } 4281 4282 /* point to the actual end of data */ 4283 mp->b_wptr = mp->b_rptr + datalen; 4284 4285 /* build a chain of received packets */ 4286 if (bp == NULL) { 4287 /* first pkt */ 4288 bp = mp; 4289 bp->b_next = bp->b_prev = NULL; 4290 bpt = bp; 4291 chain = 1; 4292 } else { 4293 mp->b_next = NULL; 4294 mp->b_prev = bpt; 4295 bpt->b_next = mp; 4296 bpt = mp; 4297 chain++; 4298 } 4299 4300 /* mark we are finished with this descriptor */ 4301 pub_addr->hdr.dstate = VIO_DESC_DONE; 4302 4303 /* 4304 * Send an ACK back to peer if requested, and sync 4305 * the rings up to this point so the remote side sees 4306 * the descriptor flag in a consistent state. 4307 */ 4308 if (ack_needed) { 4309 if ((rv = ldc_mem_dring_release( 4310 dp->handle, last_sync, i)) != 0) { 4311 DERR(vswp, "%s(%lld): unable to sync" 4312 " from %d to %d", __func__, 4313 ldcp->ldc_id, last_sync, i); 4314 } 4315 4316 ack_needed = B_FALSE; 4317 4318 if (i == end) 4319 sync_needed = B_FALSE; 4320 else 4321 sync_needed = B_TRUE; 4322 4323 last_sync = (i + 1) % len; 4324 4325 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 4326 dring_pkt->tag.vio_sid = ldcp->local_session; 4327 vsw_send_msg(ldcp, (void *)dring_pkt, 4328 sizeof (vio_dring_msg_t)); 4329 } 4330 } 4331 4332 if (sync_needed) { 4333 if ((rv = ldc_mem_dring_release(dp->handle, 4334 last_sync, end)) != 0) { 4335 DERR(vswp, "%s(%lld): unable to sync" 4336 " from %d to %d", __func__, 4337 ldcp->ldc_id, last_sync, end); 4338 } 4339 } 4340 4341 /* send the chain of packets to be switched */ 4342 D3(vswp, "%s(%lld): switching chain of %d msgs", __func__, 4343 ldcp->ldc_id, chain); 4344 vsw_switch_frame(vswp, bp, VSW_VNETPORT, 4345 ldcp->ldc_port, NULL); 4346 4347 break; 4348 4349 case VIO_SUBTYPE_ACK: 4350 D2(vswp, "%s(%lld): VIO_SUBTYPE_ACK", __func__, ldcp->ldc_id); 4351 /* 4352 * Verify that the relevant descriptors are all 4353 * marked as DONE 4354 */ 4355 if ((dp = vsw_ident2dring(&ldcp->lane_out, 4356 dring_pkt->dring_ident)) == NULL) { 4357 DERR(vswp, "%s: unknown ident in ACK", __func__); 4358 return; 4359 } 4360 4361 pub_addr = (vnet_public_desc_t *)dp->pub_addr; 4362 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 4363 4364 start = end = 0; 4365 start = dring_pkt->start_idx; 4366 end = dring_pkt->end_idx; 4367 len = dp->num_descriptors; 4368 4369 4370 j = num = 0; 4371 /* calculate # descriptors taking into a/c wrap around */ 4372 num = end >= start ? end - start + 1: (len - start + 1) + end; 4373 4374 D2(vswp, "%s(%lld): start index %ld : end %ld : num %ld\n", 4375 __func__, ldcp->ldc_id, start, end, num); 4376 4377 for (i = start; j < num; i = (i + 1) % len, j++) { 4378 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 4379 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 4380 4381 if (pub_addr->hdr.dstate != VIO_DESC_DONE) { 4382 DERR(vswp, "%s: descriptor %lld at pos " 4383 " 0x%llx not DONE (0x%lx)\n", __func__, 4384 i, pub_addr, pub_addr->hdr.dstate); 4385 return; 4386 } else { 4387 /* clear all the fields */ 4388 bzero(priv_addr->datap, priv_addr->datalen); 4389 priv_addr->datalen = 0; 4390 4391 pub_addr->hdr.dstate = VIO_DESC_FREE; 4392 pub_addr->hdr.ack = 0; 4393 priv_addr->dstate = VIO_DESC_FREE; 4394 4395 D3(vswp, "clearing descp %d : pub state " 4396 "0x%llx : priv state 0x%llx", i, 4397 pub_addr->hdr.dstate, 4398 priv_addr->dstate); 4399 } 4400 } 4401 4402 break; 4403 4404 case VIO_SUBTYPE_NACK: 4405 DWARN(vswp, "%s(%lld): VIO_SUBTYPE_NACK", 4406 __func__, ldcp->ldc_id); 4407 /* 4408 * Something is badly wrong if we are getting NACK's 4409 * for our data pkts. So reset the channel. 4410 */ 4411 vsw_restart_handshake(ldcp); 4412 4413 break; 4414 4415 default: 4416 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 4417 ldcp->ldc_id, dring_pkt->tag.vio_subtype); 4418 } 4419 4420 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 4421 } 4422 4423 /* 4424 * VIO_PKT_DATA (a.k.a raw data mode ) 4425 * 4426 * Note - currently not supported. Do nothing. 4427 */ 4428 static void 4429 vsw_process_data_raw_pkt(vsw_ldc_t *ldcp, void *dpkt) 4430 { 4431 _NOTE(ARGUNUSED(dpkt)) 4432 4433 D1(NULL, "%s (%lld): enter\n", __func__, ldcp->ldc_id); 4434 4435 DERR(NULL, "%s (%lld): currently not supported", 4436 __func__, ldcp->ldc_id); 4437 4438 D1(NULL, "%s (%lld): exit\n", __func__, ldcp->ldc_id); 4439 } 4440 4441 #define SND_IBND_DESC_NACK(ldcp, pkt) \ 4442 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 4443 pkt->tag.vio_sid = ldcp->local_session; \ 4444 vsw_send_msg(ldcp, (void *)pkt, sizeof (vio_ibnd_desc_t)); 4445 4446 /* 4447 * Process an in-band descriptor message (most likely from 4448 * OBP). 4449 */ 4450 static void 4451 vsw_process_data_ibnd_pkt(vsw_ldc_t *ldcp, void *pkt) 4452 { 4453 vio_ibnd_desc_t *ibnd_desc; 4454 dring_info_t *dp = NULL; 4455 vsw_private_desc_t *priv_addr = NULL; 4456 vsw_t *vswp = ldcp->ldc_vswp; 4457 mblk_t *mp = NULL; 4458 size_t nbytes = 0; 4459 size_t off = 0; 4460 uint64_t idx = 0; 4461 uint32_t datalen = 0; 4462 uint64_t ncookies = 0; 4463 int rv; 4464 4465 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4466 4467 ibnd_desc = (vio_ibnd_desc_t *)pkt; 4468 4469 switch (ibnd_desc->hdr.tag.vio_subtype) { 4470 case VIO_SUBTYPE_INFO: 4471 D1(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 4472 4473 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 4474 return; 4475 4476 /* 4477 * Data is padded to align on a 8 byte boundary, 4478 * nbytes is actual data length, i.e. minus that 4479 * padding. 4480 */ 4481 datalen = ibnd_desc->nbytes; 4482 4483 D2(vswp, "%s(%lld): processing inband desc : " 4484 ": datalen 0x%lx", __func__, ldcp->ldc_id, datalen); 4485 4486 ncookies = ibnd_desc->ncookies; 4487 4488 /* 4489 * allocb(9F) returns an aligned data block. We 4490 * need to ensure that we ask ldc for an aligned 4491 * number of bytes also. 4492 */ 4493 nbytes = datalen; 4494 if (nbytes & 0x7) { 4495 off = 8 - (nbytes & 0x7); 4496 nbytes += off; 4497 } 4498 4499 mp = allocb(datalen, BPRI_MED); 4500 if (mp == NULL) { 4501 DERR(vswp, "%s(%lld): allocb failed", 4502 __func__, ldcp->ldc_id); 4503 return; 4504 } 4505 4506 rv = ldc_mem_copy(ldcp->ldc_handle, (caddr_t)mp->b_rptr, 4507 0, &nbytes, ibnd_desc->memcookie, (uint64_t)ncookies, 4508 LDC_COPY_IN); 4509 4510 if (rv != 0) { 4511 DERR(vswp, "%s(%d): unable to copy in data from " 4512 "%d cookie(s)", __func__, 4513 ldcp->ldc_id, ncookies); 4514 freemsg(mp); 4515 return; 4516 } else { 4517 D2(vswp, "%s(%d): copied in %ld bytes using %d " 4518 "cookies", __func__, ldcp->ldc_id, nbytes, 4519 ncookies); 4520 } 4521 4522 /* point to the actual end of data */ 4523 mp->b_wptr = mp->b_rptr + datalen; 4524 4525 /* 4526 * We ACK back every in-band descriptor message we process 4527 */ 4528 ibnd_desc->hdr.tag.vio_subtype = VIO_SUBTYPE_ACK; 4529 ibnd_desc->hdr.tag.vio_sid = ldcp->local_session; 4530 vsw_send_msg(ldcp, (void *)ibnd_desc, 4531 sizeof (vio_ibnd_desc_t)); 4532 4533 /* send the packet to be switched */ 4534 vsw_switch_frame(vswp, mp, VSW_VNETPORT, 4535 ldcp->ldc_port, NULL); 4536 4537 break; 4538 4539 case VIO_SUBTYPE_ACK: 4540 D1(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 4541 4542 /* Verify the ACK is valid */ 4543 idx = ibnd_desc->hdr.desc_handle; 4544 4545 if (idx >= VSW_RING_NUM_EL) { 4546 cmn_err(CE_WARN, "%s: corrupted ACK received " 4547 "(idx %ld)", __func__, idx); 4548 return; 4549 } 4550 4551 if ((dp = ldcp->lane_out.dringp) == NULL) { 4552 DERR(vswp, "%s: no dring found", __func__); 4553 return; 4554 } 4555 4556 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 4557 4558 /* move to correct location in ring */ 4559 priv_addr += idx; 4560 4561 /* 4562 * When we sent the in-band message to our peer we 4563 * marked the copy in our private ring as READY. We now 4564 * check that the descriptor we are being ACK'ed for is in 4565 * fact READY, i.e. it is one we have shared with our peer. 4566 */ 4567 if (priv_addr->dstate != VIO_DESC_READY) { 4568 cmn_err(CE_WARN, "%s: (%ld) desc at index %ld not " 4569 "READY (0x%lx)", __func__, ldcp->ldc_id, idx, 4570 priv_addr->dstate); 4571 cmn_err(CE_CONT, "%s: bound %d: ncookies %ld\n", 4572 __func__, priv_addr->bound, 4573 priv_addr->ncookies); 4574 cmn_err(CE_CONT, "datalen %ld\n", priv_addr->datalen); 4575 return; 4576 } else { 4577 D2(vswp, "%s: (%lld) freeing descp at %lld", __func__, 4578 ldcp->ldc_id, idx); 4579 4580 /* release resources associated with sent msg */ 4581 bzero(priv_addr->datap, priv_addr->datalen); 4582 priv_addr->datalen = 0; 4583 priv_addr->dstate = VIO_DESC_FREE; 4584 } 4585 break; 4586 4587 case VIO_SUBTYPE_NACK: 4588 DERR(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 4589 4590 /* 4591 * We should only get a NACK if our peer doesn't like 4592 * something about a message we have sent it. If this 4593 * happens we just release the resources associated with 4594 * the message. (We are relying on higher layers to decide 4595 * whether or not to resend. 4596 */ 4597 4598 /* limit check */ 4599 idx = ibnd_desc->hdr.desc_handle; 4600 4601 if (idx >= VSW_RING_NUM_EL) { 4602 DERR(vswp, "%s: corrupted NACK received (idx %lld)", 4603 __func__, idx); 4604 return; 4605 } 4606 4607 if ((dp = ldcp->lane_out.dringp) == NULL) { 4608 DERR(vswp, "%s: no dring found", __func__); 4609 return; 4610 } 4611 4612 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 4613 4614 /* move to correct location in ring */ 4615 priv_addr += idx; 4616 4617 /* release resources associated with sent msg */ 4618 bzero(priv_addr->datap, priv_addr->datalen); 4619 priv_addr->datalen = 0; 4620 priv_addr->dstate = VIO_DESC_FREE; 4621 4622 break; 4623 4624 default: 4625 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 4626 ldcp->ldc_id, ibnd_desc->hdr.tag.vio_subtype); 4627 } 4628 4629 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 4630 } 4631 4632 static void 4633 vsw_process_err_pkt(vsw_ldc_t *ldcp, void *epkt, vio_msg_tag_t tag) 4634 { 4635 _NOTE(ARGUNUSED(epkt)) 4636 4637 vsw_t *vswp = ldcp->ldc_vswp; 4638 uint16_t env = tag.vio_subtype_env; 4639 4640 D1(vswp, "%s (%lld): enter\n", __func__, ldcp->ldc_id); 4641 4642 /* 4643 * Error vio_subtypes have yet to be defined. So for 4644 * the moment we can't do anything. 4645 */ 4646 D2(vswp, "%s: (%x) vio_subtype env", __func__, env); 4647 4648 D1(vswp, "%s (%lld): exit\n", __func__, ldcp->ldc_id); 4649 } 4650 4651 /* 4652 * Switch the given ethernet frame when operating in layer 2 mode. 4653 * 4654 * vswp: pointer to the vsw instance 4655 * mp: pointer to chain of ethernet frame(s) to be switched 4656 * caller: identifies the source of this frame as: 4657 * 1. VSW_VNETPORT - a vsw port (connected to a vnet). 4658 * 2. VSW_PHYSDEV - the physical ethernet device 4659 * 3. VSW_LOCALDEV - vsw configured as a virtual interface 4660 * arg: argument provided by the caller. 4661 * 1. for VNETPORT - pointer to the corresponding vsw_port_t. 4662 * 2. for PHYSDEV - NULL 4663 * 3. for LOCALDEV - pointer to to this vsw_t(self) 4664 */ 4665 void 4666 vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller, 4667 vsw_port_t *arg, mac_resource_handle_t mrh) 4668 { 4669 struct ether_header *ehp; 4670 vsw_port_t *port = NULL; 4671 mblk_t *bp, *ret_m; 4672 mblk_t *nmp = NULL; 4673 vsw_port_list_t *plist = &vswp->plist; 4674 4675 D1(vswp, "%s: enter (caller %d)", __func__, caller); 4676 4677 /* 4678 * PERF: rather than breaking up the chain here, scan it 4679 * to find all mblks heading to same destination and then 4680 * pass that sub-chain to the lower transmit functions. 4681 */ 4682 4683 /* process the chain of packets */ 4684 bp = mp; 4685 while (bp) { 4686 mp = bp; 4687 bp = bp->b_next; 4688 mp->b_next = mp->b_prev = NULL; 4689 ehp = (struct ether_header *)mp->b_rptr; 4690 4691 D2(vswp, "%s: mblk data buffer %lld : actual data size %lld", 4692 __func__, MBLKSIZE(mp), MBLKL(mp)); 4693 4694 READ_ENTER(&vswp->if_lockrw); 4695 if (ether_cmp(&ehp->ether_dhost, &vswp->if_addr) == 0) { 4696 /* 4697 * If destination is VSW_LOCALDEV (vsw as an eth 4698 * interface) and if the device is up & running, 4699 * send the packet up the stack on this host. 4700 * If the virtual interface is down, drop the packet. 4701 */ 4702 if (caller != VSW_LOCALDEV) { 4703 if (vswp->if_state & VSW_IF_UP) { 4704 RW_EXIT(&vswp->if_lockrw); 4705 mac_rx(vswp->if_macp, mrh, mp); 4706 } else { 4707 RW_EXIT(&vswp->if_lockrw); 4708 /* Interface down, drop pkt */ 4709 freemsg(mp); 4710 } 4711 } else { 4712 RW_EXIT(&vswp->if_lockrw); 4713 freemsg(mp); 4714 } 4715 continue; 4716 } 4717 RW_EXIT(&vswp->if_lockrw); 4718 4719 READ_ENTER(&plist->lockrw); 4720 port = vsw_lookup_fdb(vswp, ehp); 4721 if (port) { 4722 /* 4723 * Mark the port as in-use. 4724 */ 4725 mutex_enter(&port->ref_lock); 4726 port->ref_cnt++; 4727 mutex_exit(&port->ref_lock); 4728 RW_EXIT(&plist->lockrw); 4729 4730 /* 4731 * If plumbed and in promisc mode then copy msg 4732 * and send up the stack. 4733 */ 4734 READ_ENTER(&vswp->if_lockrw); 4735 if (VSW_U_P(vswp->if_state)) { 4736 RW_EXIT(&vswp->if_lockrw); 4737 nmp = copymsg(mp); 4738 if (nmp) 4739 mac_rx(vswp->if_macp, mrh, nmp); 4740 } else { 4741 RW_EXIT(&vswp->if_lockrw); 4742 } 4743 4744 /* 4745 * If the destination is in FDB, the packet 4746 * should be forwarded to the correponding 4747 * vsw_port (connected to a vnet device - 4748 * VSW_VNETPORT) 4749 */ 4750 (void) vsw_portsend(port, mp); 4751 4752 /* 4753 * Decrement use count in port and check if 4754 * should wake delete thread. 4755 */ 4756 mutex_enter(&port->ref_lock); 4757 port->ref_cnt--; 4758 if (port->ref_cnt == 0) 4759 cv_signal(&port->ref_cv); 4760 mutex_exit(&port->ref_lock); 4761 } else { 4762 RW_EXIT(&plist->lockrw); 4763 /* 4764 * Destination not in FDB. 4765 * 4766 * If the destination is broadcast or 4767 * multicast forward the packet to all 4768 * (VNETPORTs, PHYSDEV, LOCALDEV), 4769 * except the caller. 4770 */ 4771 if (IS_BROADCAST(ehp)) { 4772 D3(vswp, "%s: BROADCAST pkt", __func__); 4773 (void) vsw_forward_all(vswp, mp, 4774 caller, arg); 4775 } else if (IS_MULTICAST(ehp)) { 4776 D3(vswp, "%s: MULTICAST pkt", __func__); 4777 (void) vsw_forward_grp(vswp, mp, 4778 caller, arg); 4779 } else { 4780 /* 4781 * If the destination is unicast, and came 4782 * from either a logical network device or 4783 * the switch itself when it is plumbed, then 4784 * send it out on the physical device and also 4785 * up the stack if the logical interface is 4786 * in promiscious mode. 4787 * 4788 * NOTE: The assumption here is that if we 4789 * cannot find the destination in our fdb, its 4790 * a unicast address, and came from either a 4791 * vnet or down the stack (when plumbed) it 4792 * must be destinded for an ethernet device 4793 * outside our ldoms. 4794 */ 4795 if (caller == VSW_VNETPORT) { 4796 READ_ENTER(&vswp->if_lockrw); 4797 if (VSW_U_P(vswp->if_state)) { 4798 RW_EXIT(&vswp->if_lockrw); 4799 nmp = copymsg(mp); 4800 if (nmp) 4801 mac_rx(vswp->if_macp, 4802 mrh, nmp); 4803 } else { 4804 RW_EXIT(&vswp->if_lockrw); 4805 } 4806 if ((ret_m = vsw_tx_msg(vswp, mp)) 4807 != NULL) { 4808 DERR(vswp, "%s: drop mblks to " 4809 "phys dev", __func__); 4810 freemsg(ret_m); 4811 } 4812 4813 } else if (caller == VSW_PHYSDEV) { 4814 /* 4815 * Pkt seen because card in promisc 4816 * mode. Send up stack if plumbed in 4817 * promisc mode, else drop it. 4818 */ 4819 READ_ENTER(&vswp->if_lockrw); 4820 if (VSW_U_P(vswp->if_state)) { 4821 RW_EXIT(&vswp->if_lockrw); 4822 mac_rx(vswp->if_macp, mrh, mp); 4823 } else { 4824 RW_EXIT(&vswp->if_lockrw); 4825 freemsg(mp); 4826 } 4827 4828 } else if (caller == VSW_LOCALDEV) { 4829 /* 4830 * Pkt came down the stack, send out 4831 * over physical device. 4832 */ 4833 if ((ret_m = vsw_tx_msg(vswp, mp)) 4834 != NULL) { 4835 DERR(vswp, "%s: drop mblks to " 4836 "phys dev", __func__); 4837 freemsg(ret_m); 4838 } 4839 } 4840 } 4841 } 4842 } 4843 D1(vswp, "%s: exit\n", __func__); 4844 } 4845 4846 /* 4847 * Switch ethernet frame when in layer 3 mode (i.e. using IP 4848 * layer to do the routing). 4849 * 4850 * There is a large amount of overlap between this function and 4851 * vsw_switch_l2_frame. At some stage we need to revisit and refactor 4852 * both these functions. 4853 */ 4854 void 4855 vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller, 4856 vsw_port_t *arg, mac_resource_handle_t mrh) 4857 { 4858 struct ether_header *ehp; 4859 vsw_port_t *port = NULL; 4860 mblk_t *bp = NULL; 4861 vsw_port_list_t *plist = &vswp->plist; 4862 4863 D1(vswp, "%s: enter (caller %d)", __func__, caller); 4864 4865 /* 4866 * In layer 3 mode should only ever be switching packets 4867 * between IP layer and vnet devices. So make sure thats 4868 * who is invoking us. 4869 */ 4870 if ((caller != VSW_LOCALDEV) && (caller != VSW_VNETPORT)) { 4871 DERR(vswp, "%s: unexpected caller (%d)", __func__, caller); 4872 freemsgchain(mp); 4873 return; 4874 } 4875 4876 /* process the chain of packets */ 4877 bp = mp; 4878 while (bp) { 4879 mp = bp; 4880 bp = bp->b_next; 4881 mp->b_next = mp->b_prev = NULL; 4882 ehp = (struct ether_header *)mp->b_rptr; 4883 4884 D2(vswp, "%s: mblk data buffer %lld : actual data size %lld", 4885 __func__, MBLKSIZE(mp), MBLKL(mp)); 4886 4887 READ_ENTER(&plist->lockrw); 4888 port = vsw_lookup_fdb(vswp, ehp); 4889 if (port) { 4890 /* 4891 * Mark port as in-use. 4892 */ 4893 mutex_enter(&port->ref_lock); 4894 port->ref_cnt++; 4895 mutex_exit(&port->ref_lock); 4896 RW_EXIT(&plist->lockrw); 4897 4898 D2(vswp, "%s: sending to target port", __func__); 4899 (void) vsw_portsend(port, mp); 4900 4901 /* 4902 * Finished with port so decrement ref count and 4903 * check if should wake delete thread. 4904 */ 4905 mutex_enter(&port->ref_lock); 4906 port->ref_cnt--; 4907 if (port->ref_cnt == 0) 4908 cv_signal(&port->ref_cv); 4909 mutex_exit(&port->ref_lock); 4910 } else { 4911 RW_EXIT(&plist->lockrw); 4912 /* 4913 * Destination not in FDB 4914 * 4915 * If the destination is broadcast or 4916 * multicast forward the packet to all 4917 * (VNETPORTs, PHYSDEV, LOCALDEV), 4918 * except the caller. 4919 */ 4920 if (IS_BROADCAST(ehp)) { 4921 D2(vswp, "%s: BROADCAST pkt", __func__); 4922 (void) vsw_forward_all(vswp, mp, 4923 caller, arg); 4924 } else if (IS_MULTICAST(ehp)) { 4925 D2(vswp, "%s: MULTICAST pkt", __func__); 4926 (void) vsw_forward_grp(vswp, mp, 4927 caller, arg); 4928 } else { 4929 /* 4930 * Unicast pkt from vnet that we don't have 4931 * an FDB entry for, so must be destinded for 4932 * the outside world. Attempt to send up to the 4933 * IP layer to allow it to deal with it. 4934 */ 4935 if (caller == VSW_VNETPORT) { 4936 READ_ENTER(&vswp->if_lockrw); 4937 if (vswp->if_state & VSW_IF_UP) { 4938 RW_EXIT(&vswp->if_lockrw); 4939 D2(vswp, "%s: sending up", 4940 __func__); 4941 mac_rx(vswp->if_macp, mrh, mp); 4942 } else { 4943 RW_EXIT(&vswp->if_lockrw); 4944 /* Interface down, drop pkt */ 4945 D2(vswp, "%s I/F down", 4946 __func__); 4947 freemsg(mp); 4948 } 4949 } 4950 } 4951 } 4952 } 4953 4954 D1(vswp, "%s: exit", __func__); 4955 } 4956 4957 /* 4958 * Forward the ethernet frame to all ports (VNETPORTs, PHYSDEV, LOCALDEV), 4959 * except the caller (port on which frame arrived). 4960 */ 4961 static int 4962 vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg) 4963 { 4964 vsw_port_list_t *plist = &vswp->plist; 4965 vsw_port_t *portp; 4966 mblk_t *nmp = NULL; 4967 mblk_t *ret_m = NULL; 4968 int skip_port = 0; 4969 4970 D1(vswp, "vsw_forward_all: enter\n"); 4971 4972 /* 4973 * Broadcast message from inside ldoms so send to outside 4974 * world if in either of layer 2 modes. 4975 */ 4976 if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) || 4977 (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) && 4978 ((caller == VSW_LOCALDEV) || (caller == VSW_VNETPORT))) { 4979 4980 nmp = dupmsg(mp); 4981 if (nmp) { 4982 if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) { 4983 DERR(vswp, "%s: dropping pkt(s) " 4984 "consisting of %ld bytes of data for" 4985 " physical device", __func__, MBLKL(ret_m)); 4986 freemsg(ret_m); 4987 } 4988 } 4989 } 4990 4991 if (caller == VSW_VNETPORT) 4992 skip_port = 1; 4993 4994 /* 4995 * Broadcast message from other vnet (layer 2 or 3) or outside 4996 * world (layer 2 only), send up stack if plumbed. 4997 */ 4998 if ((caller == VSW_PHYSDEV) || (caller == VSW_VNETPORT)) { 4999 READ_ENTER(&vswp->if_lockrw); 5000 if (vswp->if_state & VSW_IF_UP) { 5001 RW_EXIT(&vswp->if_lockrw); 5002 nmp = copymsg(mp); 5003 if (nmp) 5004 mac_rx(vswp->if_macp, vswp->if_mrh, nmp); 5005 } else { 5006 RW_EXIT(&vswp->if_lockrw); 5007 } 5008 } 5009 5010 /* send it to all VNETPORTs */ 5011 READ_ENTER(&plist->lockrw); 5012 for (portp = plist->head; portp != NULL; portp = portp->p_next) { 5013 D2(vswp, "vsw_forward_all: port %d", portp->p_instance); 5014 /* 5015 * Caution ! - don't reorder these two checks as arg 5016 * will be NULL if the caller is PHYSDEV. skip_port is 5017 * only set if caller is VNETPORT. 5018 */ 5019 if ((skip_port) && (portp == arg)) 5020 continue; 5021 else { 5022 nmp = dupmsg(mp); 5023 if (nmp) { 5024 (void) vsw_portsend(portp, nmp); 5025 } else { 5026 DERR(vswp, "vsw_forward_all: nmp NULL"); 5027 } 5028 } 5029 } 5030 RW_EXIT(&plist->lockrw); 5031 5032 freemsg(mp); 5033 5034 D1(vswp, "vsw_forward_all: exit\n"); 5035 return (0); 5036 } 5037 5038 /* 5039 * Forward pkts to any devices or interfaces which have registered 5040 * an interest in them (i.e. multicast groups). 5041 */ 5042 static int 5043 vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg) 5044 { 5045 struct ether_header *ehp = (struct ether_header *)mp->b_rptr; 5046 mfdb_ent_t *entp = NULL; 5047 mfdb_ent_t *tpp = NULL; 5048 vsw_port_t *port; 5049 uint64_t key = 0; 5050 mblk_t *nmp = NULL; 5051 mblk_t *ret_m = NULL; 5052 boolean_t check_if = B_TRUE; 5053 5054 /* 5055 * Convert address to hash table key 5056 */ 5057 KEY_HASH(key, ehp->ether_dhost); 5058 5059 D1(vswp, "%s: key 0x%llx", __func__, key); 5060 5061 /* 5062 * If pkt came from either a vnet or down the stack (if we are 5063 * plumbed) and we are in layer 2 mode, then we send the pkt out 5064 * over the physical adapter, and then check to see if any other 5065 * vnets are interested in it. 5066 */ 5067 if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) || 5068 (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) && 5069 ((caller == VSW_VNETPORT) || (caller == VSW_LOCALDEV))) { 5070 nmp = dupmsg(mp); 5071 if (nmp) { 5072 if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) { 5073 DERR(vswp, "%s: dropping pkt(s) " 5074 "consisting of %ld bytes of " 5075 "data for physical device", 5076 __func__, MBLKL(ret_m)); 5077 freemsg(ret_m); 5078 } 5079 } 5080 } 5081 5082 READ_ENTER(&vswp->mfdbrw); 5083 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)key, 5084 (mod_hash_val_t *)&entp) != 0) { 5085 D3(vswp, "%s: no table entry found for addr 0x%llx", 5086 __func__, key); 5087 } else { 5088 /* 5089 * Send to list of devices associated with this address... 5090 */ 5091 for (tpp = entp; tpp != NULL; tpp = tpp->nextp) { 5092 5093 /* dont send to ourselves */ 5094 if ((caller == VSW_VNETPORT) && 5095 (tpp->d_addr == (void *)arg)) { 5096 port = (vsw_port_t *)tpp->d_addr; 5097 D3(vswp, "%s: not sending to ourselves" 5098 " : port %d", __func__, 5099 port->p_instance); 5100 continue; 5101 5102 } else if ((caller == VSW_LOCALDEV) && 5103 (tpp->d_type == VSW_LOCALDEV)) { 5104 D3(vswp, "%s: not sending back up stack", 5105 __func__); 5106 continue; 5107 } 5108 5109 if (tpp->d_type == VSW_VNETPORT) { 5110 port = (vsw_port_t *)tpp->d_addr; 5111 D3(vswp, "%s: sending to port %ld for " 5112 " addr 0x%llx", __func__, 5113 port->p_instance, key); 5114 5115 nmp = dupmsg(mp); 5116 if (nmp) 5117 (void) vsw_portsend(port, nmp); 5118 } else { 5119 if (vswp->if_state & VSW_IF_UP) { 5120 nmp = copymsg(mp); 5121 if (nmp) 5122 mac_rx(vswp->if_macp, 5123 vswp->if_mrh, nmp); 5124 check_if = B_FALSE; 5125 D3(vswp, "%s: sending up stack" 5126 " for addr 0x%llx", __func__, 5127 key); 5128 } 5129 } 5130 } 5131 } 5132 5133 RW_EXIT(&vswp->mfdbrw); 5134 5135 /* 5136 * If the pkt came from either a vnet or from physical device, 5137 * and if we havent already sent the pkt up the stack then we 5138 * check now if we can/should (i.e. the interface is plumbed 5139 * and in promisc mode). 5140 */ 5141 if ((check_if) && 5142 ((caller == VSW_VNETPORT) || (caller == VSW_PHYSDEV))) { 5143 READ_ENTER(&vswp->if_lockrw); 5144 if (VSW_U_P(vswp->if_state)) { 5145 RW_EXIT(&vswp->if_lockrw); 5146 D3(vswp, "%s: (caller %d) finally sending up stack" 5147 " for addr 0x%llx", __func__, caller, key); 5148 nmp = copymsg(mp); 5149 if (nmp) 5150 mac_rx(vswp->if_macp, vswp->if_mrh, nmp); 5151 } else { 5152 RW_EXIT(&vswp->if_lockrw); 5153 } 5154 } 5155 5156 freemsg(mp); 5157 5158 D1(vswp, "%s: exit", __func__); 5159 5160 return (0); 5161 } 5162 5163 /* transmit the packet over the given port */ 5164 static int 5165 vsw_portsend(vsw_port_t *port, mblk_t *mp) 5166 { 5167 vsw_ldc_list_t *ldcl = &port->p_ldclist; 5168 vsw_ldc_t *ldcp; 5169 int status = 0; 5170 5171 5172 READ_ENTER(&ldcl->lockrw); 5173 /* 5174 * Note for now, we have a single channel. 5175 */ 5176 ldcp = ldcl->head; 5177 if (ldcp == NULL) { 5178 DERR(port->p_vswp, "vsw_portsend: no ldc: dropping packet\n"); 5179 freemsg(mp); 5180 RW_EXIT(&ldcl->lockrw); 5181 return (1); 5182 } 5183 5184 /* 5185 * Send the message out using the appropriate 5186 * transmit function which will free mblock when it 5187 * is finished with it. 5188 */ 5189 mutex_enter(&port->tx_lock); 5190 if (port->transmit != NULL) 5191 status = (*port->transmit)(ldcp, mp); 5192 else { 5193 freemsg(mp); 5194 } 5195 mutex_exit(&port->tx_lock); 5196 5197 RW_EXIT(&ldcl->lockrw); 5198 5199 return (status); 5200 } 5201 5202 /* 5203 * Send packet out via descriptor ring to a logical device. 5204 */ 5205 static int 5206 vsw_dringsend(vsw_ldc_t *ldcp, mblk_t *mp) 5207 { 5208 vio_dring_msg_t dring_pkt; 5209 dring_info_t *dp = NULL; 5210 vsw_private_desc_t *priv_desc = NULL; 5211 vsw_t *vswp = ldcp->ldc_vswp; 5212 mblk_t *bp; 5213 size_t n, size; 5214 caddr_t bufp; 5215 int idx; 5216 int status = LDC_TX_SUCCESS; 5217 5218 D1(vswp, "%s(%lld): enter\n", __func__, ldcp->ldc_id); 5219 5220 /* TODO: make test a macro */ 5221 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 5222 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 5223 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 5224 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 5225 ldcp->lane_out.lstate); 5226 freemsg(mp); 5227 return (LDC_TX_FAILURE); 5228 } 5229 5230 /* 5231 * Note - using first ring only, this may change 5232 * in the future. 5233 */ 5234 if ((dp = ldcp->lane_out.dringp) == NULL) { 5235 DERR(vswp, "%s(%lld): no dring for outbound lane on" 5236 " channel %d", __func__, ldcp->ldc_id, ldcp->ldc_id); 5237 freemsg(mp); 5238 return (LDC_TX_FAILURE); 5239 } 5240 5241 mutex_enter(&dp->dlock); 5242 5243 size = msgsize(mp); 5244 if (size > (size_t)ETHERMAX) { 5245 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 5246 ldcp->ldc_id, size); 5247 status = LDC_TX_FAILURE; 5248 goto vsw_dringsend_free_exit; 5249 } 5250 5251 /* 5252 * Find a free descriptor 5253 * 5254 * Note: for the moment we are assuming that we will only 5255 * have one dring going from the switch to each of its 5256 * peers. This may change in the future. 5257 */ 5258 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 5259 DERR(vswp, "%s(%lld): no descriptor available for ring " 5260 "at 0x%llx", __func__, ldcp->ldc_id, dp); 5261 5262 /* nothing more we can do */ 5263 status = LDC_TX_NORESOURCES; 5264 goto vsw_dringsend_free_exit; 5265 } else { 5266 D2(vswp, "%s(%lld): free private descriptor found at pos " 5267 "%ld addr 0x%llx\n", __func__, ldcp->ldc_id, idx, 5268 priv_desc); 5269 } 5270 5271 /* copy data into the descriptor */ 5272 bufp = priv_desc->datap; 5273 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 5274 n = MBLKL(bp); 5275 bcopy(bp->b_rptr, bufp, n); 5276 bufp += n; 5277 } 5278 5279 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 5280 priv_desc->dstate = VIO_DESC_READY; 5281 5282 /* 5283 * Copy relevant sections of private descriptor 5284 * to public section 5285 */ 5286 vsw_dring_priv2pub(priv_desc); 5287 5288 /* 5289 * Send a vio_dring_msg to peer to prompt them to read 5290 * the updated descriptor ring. 5291 */ 5292 dring_pkt.tag.vio_msgtype = VIO_TYPE_DATA; 5293 dring_pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 5294 dring_pkt.tag.vio_subtype_env = VIO_DRING_DATA; 5295 dring_pkt.tag.vio_sid = ldcp->local_session; 5296 5297 /* Note - for now using first ring */ 5298 dring_pkt.dring_ident = dp->ident; 5299 5300 /* 5301 * Access to the seq_num is implicitly protected by the 5302 * fact that we have only one dring associated with the 5303 * lane currently and we hold the associated dring lock. 5304 */ 5305 dring_pkt.seq_num = ldcp->lane_out.seq_num++; 5306 5307 /* Note - only updating single descrip at time at the moment */ 5308 dring_pkt.start_idx = idx; 5309 dring_pkt.end_idx = idx; 5310 5311 D3(vswp, "%s(%lld): dring 0x%llx : ident 0x%llx\n", __func__, 5312 ldcp->ldc_id, dp, dring_pkt.dring_ident); 5313 D3(vswp, "%s(%lld): start %lld : end %lld : seq %lld\n", __func__, 5314 ldcp->ldc_id, dring_pkt.start_idx, dring_pkt.end_idx, 5315 dring_pkt.seq_num); 5316 5317 vsw_send_msg(ldcp, (void *)&dring_pkt, sizeof (vio_dring_msg_t)); 5318 5319 vsw_dringsend_free_exit: 5320 5321 mutex_exit(&dp->dlock); 5322 5323 /* free the message block */ 5324 freemsg(mp); 5325 5326 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 5327 return (status); 5328 } 5329 5330 /* 5331 * Send an in-band descriptor message over ldc. 5332 */ 5333 static int 5334 vsw_descrsend(vsw_ldc_t *ldcp, mblk_t *mp) 5335 { 5336 vsw_t *vswp = ldcp->ldc_vswp; 5337 vio_ibnd_desc_t ibnd_msg; 5338 vsw_private_desc_t *priv_desc = NULL; 5339 dring_info_t *dp = NULL; 5340 size_t n, size = 0; 5341 caddr_t bufp; 5342 mblk_t *bp; 5343 int idx, i; 5344 int status = LDC_TX_SUCCESS; 5345 static int warn_msg = 1; 5346 5347 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 5348 5349 ASSERT(mp != NULL); 5350 5351 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 5352 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 5353 DERR(vswp, "%s(%lld) status(%d) state (0x%llx), dropping pkt", 5354 __func__, ldcp->ldc_id, ldcp->ldc_status, 5355 ldcp->lane_out.lstate); 5356 freemsg(mp); 5357 return (LDC_TX_FAILURE); 5358 } 5359 5360 /* 5361 * only expect single dring to exist, which we use 5362 * as an internal buffer, rather than a transfer channel. 5363 */ 5364 if ((dp = ldcp->lane_out.dringp) == NULL) { 5365 DERR(vswp, "%s(%lld): no dring for outbound lane", 5366 __func__, ldcp->ldc_id); 5367 DERR(vswp, "%s(%lld) status(%d) state (0x%llx)", 5368 __func__, ldcp->ldc_id, ldcp->ldc_status, 5369 ldcp->lane_out.lstate); 5370 freemsg(mp); 5371 return (LDC_TX_FAILURE); 5372 } 5373 5374 mutex_enter(&dp->dlock); 5375 5376 size = msgsize(mp); 5377 if (size > (size_t)ETHERMAX) { 5378 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 5379 ldcp->ldc_id, size); 5380 status = LDC_TX_FAILURE; 5381 goto vsw_descrsend_free_exit; 5382 } 5383 5384 /* 5385 * Find a free descriptor in our buffer ring 5386 */ 5387 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 5388 if (warn_msg) { 5389 DERR(vswp, "%s(%lld): no descriptor available for ring " 5390 "at 0x%llx", __func__, ldcp->ldc_id, dp); 5391 warn_msg = 0; 5392 } 5393 5394 /* nothing more we can do */ 5395 status = LDC_TX_NORESOURCES; 5396 goto vsw_descrsend_free_exit; 5397 } else { 5398 D2(vswp, "%s(%lld): free private descriptor found at pos " 5399 "%ld addr 0x%x\n", __func__, ldcp->ldc_id, idx, 5400 priv_desc); 5401 warn_msg = 1; 5402 } 5403 5404 /* copy data into the descriptor */ 5405 bufp = priv_desc->datap; 5406 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 5407 n = MBLKL(bp); 5408 bcopy(bp->b_rptr, bufp, n); 5409 bufp += n; 5410 } 5411 5412 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 5413 priv_desc->dstate = VIO_DESC_READY; 5414 5415 /* create and send the in-band descp msg */ 5416 ibnd_msg.hdr.tag.vio_msgtype = VIO_TYPE_DATA; 5417 ibnd_msg.hdr.tag.vio_subtype = VIO_SUBTYPE_INFO; 5418 ibnd_msg.hdr.tag.vio_subtype_env = VIO_DESC_DATA; 5419 ibnd_msg.hdr.tag.vio_sid = ldcp->local_session; 5420 5421 /* 5422 * Access to the seq_num is implicitly protected by the 5423 * fact that we have only one dring associated with the 5424 * lane currently and we hold the associated dring lock. 5425 */ 5426 ibnd_msg.hdr.seq_num = ldcp->lane_out.seq_num++; 5427 5428 /* 5429 * Copy the mem cookies describing the data from the 5430 * private region of the descriptor ring into the inband 5431 * descriptor. 5432 */ 5433 for (i = 0; i < priv_desc->ncookies; i++) { 5434 bcopy(&priv_desc->memcookie[i], &ibnd_msg.memcookie[i], 5435 sizeof (ldc_mem_cookie_t)); 5436 } 5437 5438 ibnd_msg.hdr.desc_handle = idx; 5439 ibnd_msg.ncookies = priv_desc->ncookies; 5440 ibnd_msg.nbytes = size; 5441 5442 vsw_send_msg(ldcp, (void *)&ibnd_msg, sizeof (vio_ibnd_desc_t)); 5443 5444 vsw_descrsend_free_exit: 5445 5446 mutex_exit(&dp->dlock); 5447 5448 /* free the allocated message blocks */ 5449 freemsg(mp); 5450 5451 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 5452 return (status); 5453 } 5454 5455 static void 5456 vsw_send_ver(vsw_ldc_t *ldcp) 5457 { 5458 vsw_t *vswp = ldcp->ldc_vswp; 5459 lane_t *lp = &ldcp->lane_out; 5460 vio_ver_msg_t ver_msg; 5461 5462 D1(vswp, "%s enter", __func__); 5463 5464 ver_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 5465 ver_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 5466 ver_msg.tag.vio_subtype_env = VIO_VER_INFO; 5467 ver_msg.tag.vio_sid = ldcp->local_session; 5468 5469 ver_msg.ver_major = vsw_versions[0].ver_major; 5470 ver_msg.ver_minor = vsw_versions[0].ver_minor; 5471 ver_msg.dev_class = VDEV_NETWORK_SWITCH; 5472 5473 lp->lstate |= VSW_VER_INFO_SENT; 5474 lp->ver_major = ver_msg.ver_major; 5475 lp->ver_minor = ver_msg.ver_minor; 5476 5477 DUMP_TAG(ver_msg.tag); 5478 5479 vsw_send_msg(ldcp, &ver_msg, sizeof (vio_ver_msg_t)); 5480 5481 D1(vswp, "%s (%d): exit", __func__, ldcp->ldc_id); 5482 } 5483 5484 static void 5485 vsw_send_attr(vsw_ldc_t *ldcp) 5486 { 5487 vsw_t *vswp = ldcp->ldc_vswp; 5488 lane_t *lp = &ldcp->lane_out; 5489 vnet_attr_msg_t attr_msg; 5490 5491 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 5492 5493 /* 5494 * Subtype is set to INFO by default 5495 */ 5496 attr_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 5497 attr_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 5498 attr_msg.tag.vio_subtype_env = VIO_ATTR_INFO; 5499 attr_msg.tag.vio_sid = ldcp->local_session; 5500 5501 /* payload copied from default settings for lane */ 5502 attr_msg.mtu = lp->mtu; 5503 attr_msg.addr_type = lp->addr_type; 5504 attr_msg.xfer_mode = lp->xfer_mode; 5505 attr_msg.ack_freq = lp->xfer_mode; 5506 5507 READ_ENTER(&vswp->if_lockrw); 5508 bcopy(&(vswp->if_addr), &(attr_msg.addr), ETHERADDRL); 5509 RW_EXIT(&vswp->if_lockrw); 5510 5511 ldcp->lane_out.lstate |= VSW_ATTR_INFO_SENT; 5512 5513 DUMP_TAG(attr_msg.tag); 5514 5515 vsw_send_msg(ldcp, &attr_msg, sizeof (vnet_attr_msg_t)); 5516 5517 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 5518 } 5519 5520 /* 5521 * Create dring info msg (which also results in the creation of 5522 * a dring). 5523 */ 5524 static vio_dring_reg_msg_t * 5525 vsw_create_dring_info_pkt(vsw_ldc_t *ldcp) 5526 { 5527 vio_dring_reg_msg_t *mp; 5528 dring_info_t *dp; 5529 vsw_t *vswp = ldcp->ldc_vswp; 5530 5531 D1(vswp, "vsw_create_dring_info_pkt enter\n"); 5532 5533 /* 5534 * If we can't create a dring, obviously no point sending 5535 * a message. 5536 */ 5537 if ((dp = vsw_create_dring(ldcp)) == NULL) 5538 return (NULL); 5539 5540 mp = kmem_zalloc(sizeof (vio_dring_reg_msg_t), KM_SLEEP); 5541 5542 mp->tag.vio_msgtype = VIO_TYPE_CTRL; 5543 mp->tag.vio_subtype = VIO_SUBTYPE_INFO; 5544 mp->tag.vio_subtype_env = VIO_DRING_REG; 5545 mp->tag.vio_sid = ldcp->local_session; 5546 5547 /* payload */ 5548 mp->num_descriptors = dp->num_descriptors; 5549 mp->descriptor_size = dp->descriptor_size; 5550 mp->options = dp->options; 5551 mp->ncookies = dp->ncookies; 5552 bcopy(&dp->cookie[0], &mp->cookie[0], sizeof (ldc_mem_cookie_t)); 5553 5554 mp->dring_ident = 0; 5555 5556 D1(vswp, "vsw_create_dring_info_pkt exit\n"); 5557 5558 return (mp); 5559 } 5560 5561 static void 5562 vsw_send_dring_info(vsw_ldc_t *ldcp) 5563 { 5564 vio_dring_reg_msg_t *dring_msg; 5565 vsw_t *vswp = ldcp->ldc_vswp; 5566 5567 D1(vswp, "%s: (%ld) enter", __func__, ldcp->ldc_id); 5568 5569 dring_msg = vsw_create_dring_info_pkt(ldcp); 5570 if (dring_msg == NULL) { 5571 cmn_err(CE_WARN, "vsw_send_dring_info: error creating msg"); 5572 return; 5573 } 5574 5575 ldcp->lane_out.lstate |= VSW_DRING_INFO_SENT; 5576 5577 DUMP_TAG_PTR((vio_msg_tag_t *)dring_msg); 5578 5579 vsw_send_msg(ldcp, dring_msg, 5580 sizeof (vio_dring_reg_msg_t)); 5581 5582 kmem_free(dring_msg, sizeof (vio_dring_reg_msg_t)); 5583 5584 D1(vswp, "%s: (%ld) exit", __func__, ldcp->ldc_id); 5585 } 5586 5587 static void 5588 vsw_send_rdx(vsw_ldc_t *ldcp) 5589 { 5590 vsw_t *vswp = ldcp->ldc_vswp; 5591 vio_rdx_msg_t rdx_msg; 5592 5593 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 5594 5595 rdx_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 5596 rdx_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 5597 rdx_msg.tag.vio_subtype_env = VIO_RDX; 5598 rdx_msg.tag.vio_sid = ldcp->local_session; 5599 5600 ldcp->lane_out.lstate |= VSW_RDX_INFO_SENT; 5601 5602 DUMP_TAG(rdx_msg.tag); 5603 5604 vsw_send_msg(ldcp, &rdx_msg, sizeof (vio_rdx_msg_t)); 5605 5606 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 5607 } 5608 5609 /* 5610 * Generic routine to send message out over ldc channel. 5611 */ 5612 static void 5613 vsw_send_msg(vsw_ldc_t *ldcp, void *msgp, int size) 5614 { 5615 int rv; 5616 size_t msglen = size; 5617 vio_msg_tag_t *tag = (vio_msg_tag_t *)msgp; 5618 vsw_t *vswp = ldcp->ldc_vswp; 5619 5620 D1(vswp, "vsw_send_msg (%lld) enter : sending %d bytes", 5621 ldcp->ldc_id, size); 5622 5623 D2(vswp, "send_msg: type 0x%llx", tag->vio_msgtype); 5624 D2(vswp, "send_msg: stype 0x%llx", tag->vio_subtype); 5625 D2(vswp, "send_msg: senv 0x%llx", tag->vio_subtype_env); 5626 5627 mutex_enter(&ldcp->ldc_txlock); 5628 do { 5629 msglen = size; 5630 rv = ldc_write(ldcp->ldc_handle, (caddr_t)msgp, &msglen); 5631 } while (rv == EWOULDBLOCK && --vsw_wretries > 0); 5632 5633 mutex_exit(&ldcp->ldc_txlock); 5634 5635 if ((rv != 0) || (msglen != size)) { 5636 DERR(vswp, "vsw_send_msg:ldc_write failed: chan(%lld) " 5637 "rv(%d) size (%d) msglen(%d)\n", ldcp->ldc_id, 5638 rv, size, msglen); 5639 } 5640 5641 D1(vswp, "vsw_send_msg (%lld) exit : sent %d bytes", 5642 ldcp->ldc_id, msglen); 5643 } 5644 5645 /* 5646 * Add an entry into FDB, for the given mac address and port_id. 5647 * Returns 0 on success, 1 on failure. 5648 * 5649 * Lock protecting FDB must be held by calling process. 5650 */ 5651 static int 5652 vsw_add_fdb(vsw_t *vswp, vsw_port_t *port) 5653 { 5654 uint64_t addr = 0; 5655 5656 D1(vswp, "%s: enter", __func__); 5657 5658 KEY_HASH(addr, port->p_macaddr); 5659 5660 D2(vswp, "%s: key = 0x%llx", __func__, addr); 5661 5662 /* 5663 * Note: duplicate keys will be rejected by mod_hash. 5664 */ 5665 if (mod_hash_insert(vswp->fdb, (mod_hash_key_t)addr, 5666 (mod_hash_val_t)port) != 0) { 5667 DERR(vswp, "%s: unable to add entry into fdb.", __func__); 5668 return (1); 5669 } 5670 5671 D1(vswp, "%s: exit", __func__); 5672 return (0); 5673 } 5674 5675 /* 5676 * Remove an entry from FDB. 5677 * Returns 0 on success, 1 on failure. 5678 */ 5679 static int 5680 vsw_del_fdb(vsw_t *vswp, vsw_port_t *port) 5681 { 5682 uint64_t addr = 0; 5683 5684 D1(vswp, "%s: enter", __func__); 5685 5686 KEY_HASH(addr, port->p_macaddr); 5687 5688 D2(vswp, "%s: key = 0x%llx", __func__, addr); 5689 5690 (void) mod_hash_destroy(vswp->fdb, (mod_hash_val_t)addr); 5691 5692 D1(vswp, "%s: enter", __func__); 5693 5694 return (0); 5695 } 5696 5697 /* 5698 * Search fdb for a given mac address. 5699 * Returns pointer to the entry if found, else returns NULL. 5700 */ 5701 static vsw_port_t * 5702 vsw_lookup_fdb(vsw_t *vswp, struct ether_header *ehp) 5703 { 5704 uint64_t key = 0; 5705 vsw_port_t *port = NULL; 5706 5707 D1(vswp, "%s: enter", __func__); 5708 5709 KEY_HASH(key, ehp->ether_dhost); 5710 5711 D2(vswp, "%s: key = 0x%llx", __func__, key); 5712 5713 if (mod_hash_find(vswp->fdb, (mod_hash_key_t)key, 5714 (mod_hash_val_t *)&port) != 0) { 5715 return (NULL); 5716 } 5717 5718 D1(vswp, "%s: exit", __func__); 5719 5720 return (port); 5721 } 5722 5723 /* 5724 * Add or remove multicast address(es). 5725 * 5726 * Returns 0 on success, 1 on failure. 5727 */ 5728 static int 5729 vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port) 5730 { 5731 mcst_addr_t *mcst_p = NULL; 5732 vsw_t *vswp = port->p_vswp; 5733 uint64_t addr = 0x0; 5734 int i; 5735 5736 D1(vswp, "%s: enter", __func__); 5737 5738 D2(vswp, "%s: %d addresses", __func__, mcst_pkt->count); 5739 5740 for (i = 0; i < mcst_pkt->count; i++) { 5741 /* 5742 * Convert address into form that can be used 5743 * as hash table key. 5744 */ 5745 KEY_HASH(addr, mcst_pkt->mca[i]); 5746 5747 /* 5748 * Add or delete the specified address/port combination. 5749 */ 5750 if (mcst_pkt->set == 0x1) { 5751 D3(vswp, "%s: adding multicast address 0x%llx for " 5752 "port %ld", __func__, addr, port->p_instance); 5753 if (vsw_add_mcst(vswp, VSW_VNETPORT, addr, port) == 0) { 5754 /* 5755 * Update the list of multicast 5756 * addresses contained within the 5757 * port structure to include this new 5758 * one. 5759 */ 5760 mcst_p = kmem_alloc(sizeof (mcst_addr_t), 5761 KM_NOSLEEP); 5762 if (mcst_p == NULL) { 5763 DERR(vswp, "%s: unable to alloc mem", 5764 __func__); 5765 return (1); 5766 } 5767 5768 mcst_p->nextp = NULL; 5769 mcst_p->addr = addr; 5770 5771 mutex_enter(&port->mca_lock); 5772 mcst_p->nextp = port->mcap; 5773 port->mcap = mcst_p; 5774 mutex_exit(&port->mca_lock); 5775 5776 /* 5777 * Program the address into HW. If the addr 5778 * has already been programmed then the MAC 5779 * just increments a ref counter (which is 5780 * used when the address is being deleted) 5781 * 5782 * Note: 5783 * For the moment we dont care if this 5784 * succeeds because the card must be in 5785 * promics mode. When we have the ability 5786 * to program multiple unicst address into 5787 * the card then we will need to check this 5788 * return value. 5789 */ 5790 if (vswp->mh != NULL) 5791 (void) mac_multicst_add(vswp->mh, 5792 (uchar_t *)&mcst_pkt->mca[i]); 5793 5794 } else { 5795 DERR(vswp, "%s: error adding multicast " 5796 "address 0x%llx for port %ld", 5797 __func__, addr, port->p_instance); 5798 return (1); 5799 } 5800 } else { 5801 /* 5802 * Delete an entry from the multicast hash 5803 * table and update the address list 5804 * appropriately. 5805 */ 5806 if (vsw_del_mcst(vswp, VSW_VNETPORT, addr, port) == 0) { 5807 D3(vswp, "%s: deleting multicast address " 5808 "0x%llx for port %ld", __func__, addr, 5809 port->p_instance); 5810 5811 vsw_del_addr(VSW_VNETPORT, port, addr); 5812 5813 /* 5814 * Remove the address from HW. The address 5815 * will actually only be removed once the ref 5816 * count within the MAC layer has dropped to 5817 * zero. I.e. we can safely call this fn even 5818 * if other ports are interested in this 5819 * address. 5820 */ 5821 if (vswp->mh != NULL) 5822 (void) mac_multicst_remove(vswp->mh, 5823 (uchar_t *)&mcst_pkt->mca[i]); 5824 5825 } else { 5826 DERR(vswp, "%s: error deleting multicast " 5827 "addr 0x%llx for port %ld", 5828 __func__, addr, port->p_instance); 5829 return (1); 5830 } 5831 } 5832 } 5833 D1(vswp, "%s: exit", __func__); 5834 return (0); 5835 } 5836 5837 /* 5838 * Add a new multicast entry. 5839 * 5840 * Search hash table based on address. If match found then 5841 * update associated val (which is chain of ports), otherwise 5842 * create new key/val (addr/port) pair and insert into table. 5843 */ 5844 static int 5845 vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg) 5846 { 5847 int dup = 0; 5848 int rv = 0; 5849 mfdb_ent_t *ment = NULL; 5850 mfdb_ent_t *tmp_ent = NULL; 5851 mfdb_ent_t *new_ent = NULL; 5852 void *tgt = NULL; 5853 5854 if (devtype == VSW_VNETPORT) { 5855 /* 5856 * Being invoked from a vnet. 5857 */ 5858 ASSERT(arg != NULL); 5859 tgt = arg; 5860 D2(NULL, "%s: port %d : address 0x%llx", __func__, 5861 ((vsw_port_t *)arg)->p_instance, addr); 5862 } else { 5863 /* 5864 * We are being invoked via the m_multicst mac entry 5865 * point. 5866 */ 5867 D2(NULL, "%s: address 0x%llx", __func__, addr); 5868 tgt = (void *)vswp; 5869 } 5870 5871 WRITE_ENTER(&vswp->mfdbrw); 5872 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr, 5873 (mod_hash_val_t *)&ment) != 0) { 5874 5875 /* address not currently in table */ 5876 ment = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP); 5877 ment->d_addr = (void *)tgt; 5878 ment->d_type = devtype; 5879 ment->nextp = NULL; 5880 5881 if (mod_hash_insert(vswp->mfdb, (mod_hash_key_t)addr, 5882 (mod_hash_val_t)ment) != 0) { 5883 DERR(vswp, "%s: hash table insertion failed", __func__); 5884 kmem_free(ment, sizeof (mfdb_ent_t)); 5885 rv = 1; 5886 } else { 5887 D2(vswp, "%s: added initial entry for 0x%llx to " 5888 "table", __func__, addr); 5889 } 5890 } else { 5891 /* 5892 * Address in table. Check to see if specified port 5893 * is already associated with the address. If not add 5894 * it now. 5895 */ 5896 tmp_ent = ment; 5897 while (tmp_ent != NULL) { 5898 if (tmp_ent->d_addr == (void *)tgt) { 5899 if (devtype == VSW_VNETPORT) { 5900 DERR(vswp, "%s: duplicate port entry " 5901 "found for portid %ld and key " 5902 "0x%llx", __func__, 5903 ((vsw_port_t *)arg)->p_instance, 5904 addr); 5905 } else { 5906 DERR(vswp, "%s: duplicate entry found" 5907 "for key 0x%llx", 5908 __func__, addr); 5909 } 5910 rv = 1; 5911 dup = 1; 5912 break; 5913 } 5914 tmp_ent = tmp_ent->nextp; 5915 } 5916 5917 /* 5918 * Port not on list so add it to end now. 5919 */ 5920 if (0 == dup) { 5921 D2(vswp, "%s: added entry for 0x%llx to table", 5922 __func__, addr); 5923 new_ent = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP); 5924 new_ent->d_addr = (void *)tgt; 5925 new_ent->d_type = devtype; 5926 new_ent->nextp = NULL; 5927 5928 tmp_ent = ment; 5929 while (tmp_ent->nextp != NULL) 5930 tmp_ent = tmp_ent->nextp; 5931 5932 tmp_ent->nextp = new_ent; 5933 } 5934 } 5935 5936 RW_EXIT(&vswp->mfdbrw); 5937 return (rv); 5938 } 5939 5940 /* 5941 * Remove a multicast entry from the hashtable. 5942 * 5943 * Search hash table based on address. If match found, scan 5944 * list of ports associated with address. If specified port 5945 * found remove it from list. 5946 */ 5947 static int 5948 vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg) 5949 { 5950 mfdb_ent_t *ment = NULL; 5951 mfdb_ent_t *curr_p, *prev_p; 5952 void *tgt = NULL; 5953 5954 D1(vswp, "%s: enter", __func__); 5955 5956 if (devtype == VSW_VNETPORT) { 5957 tgt = (vsw_port_t *)arg; 5958 D2(vswp, "%s: removing port %d from mFDB for address" 5959 " 0x%llx", __func__, ((vsw_port_t *)tgt)->p_instance, 5960 addr); 5961 } else { 5962 D2(vswp, "%s: removing entry", __func__); 5963 tgt = (void *)vswp; 5964 } 5965 5966 WRITE_ENTER(&vswp->mfdbrw); 5967 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr, 5968 (mod_hash_val_t *)&ment) != 0) { 5969 D2(vswp, "%s: address 0x%llx not in table", __func__, addr); 5970 RW_EXIT(&vswp->mfdbrw); 5971 return (1); 5972 } 5973 5974 prev_p = curr_p = ment; 5975 5976 while (curr_p != NULL) { 5977 if (curr_p->d_addr == (void *)tgt) { 5978 if (devtype == VSW_VNETPORT) { 5979 D2(vswp, "%s: port %d found", __func__, 5980 ((vsw_port_t *)tgt)->p_instance); 5981 } else { 5982 D2(vswp, "%s: instance found", __func__); 5983 } 5984 5985 if (prev_p == curr_p) { 5986 /* 5987 * head of list, if no other element is in 5988 * list then destroy this entry, otherwise 5989 * just replace it with updated value. 5990 */ 5991 ment = curr_p->nextp; 5992 kmem_free(curr_p, sizeof (mfdb_ent_t)); 5993 if (ment == NULL) { 5994 (void) mod_hash_destroy(vswp->mfdb, 5995 (mod_hash_val_t)addr); 5996 } else { 5997 (void) mod_hash_replace(vswp->mfdb, 5998 (mod_hash_key_t)addr, 5999 (mod_hash_val_t)ment); 6000 } 6001 } else { 6002 /* 6003 * Not head of list, no need to do 6004 * replacement, just adjust list pointers. 6005 */ 6006 prev_p->nextp = curr_p->nextp; 6007 kmem_free(curr_p, sizeof (mfdb_ent_t)); 6008 } 6009 break; 6010 } 6011 6012 prev_p = curr_p; 6013 curr_p = curr_p->nextp; 6014 } 6015 6016 RW_EXIT(&vswp->mfdbrw); 6017 6018 D1(vswp, "%s: exit", __func__); 6019 6020 return (0); 6021 } 6022 6023 /* 6024 * Port is being deleted, but has registered an interest in one 6025 * or more multicast groups. Using the list of addresses maintained 6026 * within the port structure find the appropriate entry in the hash 6027 * table and remove this port from the list of interested ports. 6028 */ 6029 static void 6030 vsw_del_mcst_port(vsw_port_t *port) 6031 { 6032 mcst_addr_t *mcst_p = NULL; 6033 vsw_t *vswp = port->p_vswp; 6034 6035 D1(vswp, "%s: enter", __func__); 6036 6037 mutex_enter(&port->mca_lock); 6038 while (port->mcap != NULL) { 6039 (void) vsw_del_mcst(vswp, VSW_VNETPORT, 6040 port->mcap->addr, port); 6041 6042 mcst_p = port->mcap->nextp; 6043 kmem_free(port->mcap, sizeof (mcst_addr_t)); 6044 port->mcap = mcst_p; 6045 } 6046 mutex_exit(&port->mca_lock); 6047 6048 D1(vswp, "%s: exit", __func__); 6049 } 6050 6051 /* 6052 * This vsw instance is detaching, but has registered an interest in one 6053 * or more multicast groups. Using the list of addresses maintained 6054 * within the vsw structure find the appropriate entry in the hash 6055 * table and remove this instance from the list of interested ports. 6056 */ 6057 static void 6058 vsw_del_mcst_vsw(vsw_t *vswp) 6059 { 6060 mcst_addr_t *next_p = NULL; 6061 6062 D1(vswp, "%s: enter", __func__); 6063 6064 mutex_enter(&vswp->mca_lock); 6065 6066 while (vswp->mcap != NULL) { 6067 DERR(vswp, "%s: deleting addr 0x%llx", 6068 __func__, vswp->mcap->addr); 6069 (void) vsw_del_mcst(vswp, VSW_LOCALDEV, 6070 vswp->mcap->addr, NULL); 6071 6072 next_p = vswp->mcap->nextp; 6073 kmem_free(vswp->mcap, sizeof (mcst_addr_t)); 6074 vswp->mcap = next_p; 6075 } 6076 6077 vswp->mcap = NULL; 6078 mutex_exit(&vswp->mca_lock); 6079 6080 D1(vswp, "%s: exit", __func__); 6081 } 6082 6083 6084 /* 6085 * Remove the specified address from the list of address maintained 6086 * in this port node. 6087 */ 6088 static void 6089 vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr) 6090 { 6091 vsw_t *vswp = NULL; 6092 vsw_port_t *port = NULL; 6093 mcst_addr_t *prev_p = NULL; 6094 mcst_addr_t *curr_p = NULL; 6095 6096 D1(NULL, "%s: enter : devtype %d : addr 0x%llx", 6097 __func__, devtype, addr); 6098 6099 if (devtype == VSW_VNETPORT) { 6100 port = (vsw_port_t *)arg; 6101 mutex_enter(&port->mca_lock); 6102 prev_p = curr_p = port->mcap; 6103 } else { 6104 vswp = (vsw_t *)arg; 6105 mutex_enter(&vswp->mca_lock); 6106 prev_p = curr_p = vswp->mcap; 6107 } 6108 6109 while (curr_p != NULL) { 6110 if (curr_p->addr == addr) { 6111 D2(NULL, "%s: address found", __func__); 6112 /* match found */ 6113 if (prev_p == curr_p) { 6114 /* list head */ 6115 if (devtype == VSW_VNETPORT) 6116 port->mcap = curr_p->nextp; 6117 else 6118 vswp->mcap = curr_p->nextp; 6119 } else { 6120 prev_p->nextp = curr_p->nextp; 6121 } 6122 kmem_free(curr_p, sizeof (mcst_addr_t)); 6123 break; 6124 } else { 6125 prev_p = curr_p; 6126 curr_p = curr_p->nextp; 6127 } 6128 } 6129 6130 if (devtype == VSW_VNETPORT) 6131 mutex_exit(&port->mca_lock); 6132 else 6133 mutex_exit(&vswp->mca_lock); 6134 6135 D1(NULL, "%s: exit", __func__); 6136 } 6137 6138 /* 6139 * Creates a descriptor ring (dring) and links it into the 6140 * link of outbound drings for this channel. 6141 * 6142 * Returns NULL if creation failed. 6143 */ 6144 static dring_info_t * 6145 vsw_create_dring(vsw_ldc_t *ldcp) 6146 { 6147 vsw_private_desc_t *priv_addr = NULL; 6148 vsw_t *vswp = ldcp->ldc_vswp; 6149 ldc_mem_info_t minfo; 6150 dring_info_t *dp, *tp; 6151 int i; 6152 6153 dp = (dring_info_t *)kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 6154 6155 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 6156 6157 /* create public section of ring */ 6158 if ((ldc_mem_dring_create(VSW_RING_NUM_EL, 6159 VSW_PUB_SIZE, &dp->handle)) != 0) { 6160 6161 DERR(vswp, "vsw_create_dring(%lld): ldc dring create " 6162 "failed", ldcp->ldc_id); 6163 goto create_fail_exit; 6164 } 6165 6166 ASSERT(dp->handle != NULL); 6167 6168 /* 6169 * Get the base address of the public section of the ring. 6170 */ 6171 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 6172 DERR(vswp, "vsw_create_dring(%lld): dring info failed\n", 6173 ldcp->ldc_id); 6174 goto dring_fail_exit; 6175 } else { 6176 ASSERT(minfo.vaddr != 0); 6177 dp->pub_addr = minfo.vaddr; 6178 } 6179 6180 dp->num_descriptors = VSW_RING_NUM_EL; 6181 dp->descriptor_size = VSW_PUB_SIZE; 6182 dp->options = VIO_TX_DRING; 6183 dp->ncookies = 1; /* guaranteed by ldc */ 6184 6185 /* 6186 * create private portion of ring 6187 */ 6188 dp->priv_addr = (vsw_private_desc_t *)kmem_zalloc( 6189 (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL), KM_SLEEP); 6190 6191 if (vsw_setup_ring(ldcp, dp)) { 6192 DERR(vswp, "%s: unable to setup ring", __func__); 6193 goto dring_fail_exit; 6194 } 6195 6196 /* haven't used any descriptors yet */ 6197 dp->end_idx = 0; 6198 6199 /* bind dring to the channel */ 6200 if ((ldc_mem_dring_bind(ldcp->ldc_handle, dp->handle, 6201 LDC_SHADOW_MAP, LDC_MEM_RW, 6202 &dp->cookie[0], &dp->ncookies)) != 0) { 6203 DERR(vswp, "vsw_create_dring: unable to bind to channel " 6204 "%lld", ldcp->ldc_id); 6205 goto dring_fail_exit; 6206 } 6207 6208 /* 6209 * Only ever create rings for outgoing lane. Link it onto 6210 * end of list. 6211 */ 6212 if (ldcp->lane_out.dringp == NULL) { 6213 D2(vswp, "vsw_create_dring: adding first outbound ring"); 6214 ldcp->lane_out.dringp = dp; 6215 } else { 6216 tp = ldcp->lane_out.dringp; 6217 while (tp->next != NULL) 6218 tp = tp->next; 6219 6220 tp->next = dp; 6221 } 6222 6223 return (dp); 6224 6225 dring_fail_exit: 6226 (void) ldc_mem_dring_destroy(dp->handle); 6227 6228 create_fail_exit: 6229 if (dp->priv_addr != NULL) { 6230 priv_addr = dp->priv_addr; 6231 for (i = 0; i < VSW_RING_NUM_EL; i++) { 6232 if (priv_addr->memhandle != NULL) 6233 (void) ldc_mem_free_handle( 6234 priv_addr->memhandle); 6235 priv_addr++; 6236 } 6237 kmem_free(dp->priv_addr, 6238 (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL)); 6239 } 6240 mutex_destroy(&dp->dlock); 6241 6242 kmem_free(dp, sizeof (dring_info_t)); 6243 return (NULL); 6244 } 6245 6246 /* 6247 * Create a ring consisting of just a private portion and link 6248 * it into the list of rings for the outbound lane. 6249 * 6250 * These type of rings are used primarily for temporary data 6251 * storage (i.e. as data buffers). 6252 */ 6253 void 6254 vsw_create_privring(vsw_ldc_t *ldcp) 6255 { 6256 dring_info_t *dp, *tp; 6257 vsw_t *vswp = ldcp->ldc_vswp; 6258 6259 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 6260 6261 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 6262 6263 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 6264 6265 /* no public section */ 6266 dp->pub_addr = NULL; 6267 6268 dp->priv_addr = kmem_zalloc((sizeof (vsw_private_desc_t) * 6269 VSW_RING_NUM_EL), KM_SLEEP); 6270 6271 if (vsw_setup_ring(ldcp, dp)) { 6272 DERR(vswp, "%s: setup of ring failed", __func__); 6273 kmem_free(dp->priv_addr, 6274 (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL)); 6275 mutex_destroy(&dp->dlock); 6276 kmem_free(dp, sizeof (dring_info_t)); 6277 return; 6278 } 6279 6280 /* haven't used any descriptors yet */ 6281 dp->end_idx = 0; 6282 6283 /* 6284 * Only ever create rings for outgoing lane. Link it onto 6285 * end of list. 6286 */ 6287 if (ldcp->lane_out.dringp == NULL) { 6288 D2(vswp, "%s: adding first outbound privring", __func__); 6289 ldcp->lane_out.dringp = dp; 6290 } else { 6291 tp = ldcp->lane_out.dringp; 6292 while (tp->next != NULL) 6293 tp = tp->next; 6294 6295 tp->next = dp; 6296 } 6297 6298 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 6299 } 6300 6301 /* 6302 * Setup the descriptors in the dring. Returns 0 on success, 1 on 6303 * failure. 6304 */ 6305 int 6306 vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp) 6307 { 6308 vnet_public_desc_t *pub_addr = NULL; 6309 vsw_private_desc_t *priv_addr = NULL; 6310 vsw_t *vswp = ldcp->ldc_vswp; 6311 uint64_t *tmpp; 6312 uint64_t offset = 0; 6313 uint32_t ncookies = 0; 6314 static char *name = "vsw_setup_ring"; 6315 int i, j, rv; 6316 6317 /* note - public section may be null */ 6318 priv_addr = dp->priv_addr; 6319 pub_addr = dp->pub_addr; 6320 6321 /* 6322 * Allocate the region of memory which will be used to hold 6323 * the data the descriptors will refer to. 6324 */ 6325 dp->data_sz = (VSW_RING_NUM_EL * VSW_RING_EL_DATA_SZ); 6326 dp->data_addr = kmem_alloc(dp->data_sz, KM_SLEEP); 6327 6328 D2(vswp, "%s: allocated %lld bytes at 0x%llx\n", name, 6329 dp->data_sz, dp->data_addr); 6330 6331 tmpp = (uint64_t *)dp->data_addr; 6332 offset = VSW_RING_EL_DATA_SZ / sizeof (tmpp); 6333 6334 /* 6335 * Initialise some of the private and public (if they exist) 6336 * descriptor fields. 6337 */ 6338 for (i = 0; i < VSW_RING_NUM_EL; i++) { 6339 if ((ldc_mem_alloc_handle(ldcp->ldc_handle, 6340 &priv_addr->memhandle)) != 0) { 6341 DERR(vswp, "%s: alloc mem handle failed", name); 6342 goto setup_ring_cleanup; 6343 } 6344 6345 priv_addr->datap = (void *)tmpp; 6346 6347 rv = ldc_mem_bind_handle(priv_addr->memhandle, 6348 (caddr_t)priv_addr->datap, VSW_RING_EL_DATA_SZ, 6349 LDC_SHADOW_MAP, LDC_MEM_R|LDC_MEM_W, 6350 &(priv_addr->memcookie[0]), &ncookies); 6351 if (rv != 0) { 6352 DERR(vswp, "%s(%lld): ldc_mem_bind_handle failed " 6353 "(rv %d)", name, ldcp->ldc_id, rv); 6354 goto setup_ring_cleanup; 6355 } 6356 priv_addr->bound = 1; 6357 6358 D2(vswp, "%s: %d: memcookie 0 : addr 0x%llx : size 0x%llx", 6359 name, i, priv_addr->memcookie[0].addr, 6360 priv_addr->memcookie[0].size); 6361 6362 if (ncookies >= (uint32_t)(VSW_MAX_COOKIES + 1)) { 6363 DERR(vswp, "%s(%lld) ldc_mem_bind_handle returned " 6364 "invalid num of cookies (%d) for size 0x%llx", 6365 name, ldcp->ldc_id, ncookies, 6366 VSW_RING_EL_DATA_SZ); 6367 6368 goto setup_ring_cleanup; 6369 } else { 6370 for (j = 1; j < ncookies; j++) { 6371 rv = ldc_mem_nextcookie(priv_addr->memhandle, 6372 &(priv_addr->memcookie[j])); 6373 if (rv != 0) { 6374 DERR(vswp, "%s: ldc_mem_nextcookie " 6375 "failed rv (%d)", name, rv); 6376 goto setup_ring_cleanup; 6377 } 6378 D3(vswp, "%s: memcookie %d : addr 0x%llx : " 6379 "size 0x%llx", name, j, 6380 priv_addr->memcookie[j].addr, 6381 priv_addr->memcookie[j].size); 6382 } 6383 6384 } 6385 priv_addr->ncookies = ncookies; 6386 priv_addr->dstate = VIO_DESC_FREE; 6387 6388 if (pub_addr != NULL) { 6389 6390 /* link pub and private sides */ 6391 priv_addr->descp = pub_addr; 6392 6393 pub_addr->hdr.dstate = VIO_DESC_FREE; 6394 pub_addr++; 6395 } 6396 6397 /* 6398 * move to next element in the dring and the next 6399 * position in the data buffer. 6400 */ 6401 priv_addr++; 6402 tmpp += offset; 6403 } 6404 6405 return (0); 6406 6407 setup_ring_cleanup: 6408 priv_addr = dp->priv_addr; 6409 6410 for (i = 0; i < VSW_RING_NUM_EL; i++) { 6411 (void) ldc_mem_unbind_handle(priv_addr->memhandle); 6412 (void) ldc_mem_free_handle(priv_addr->memhandle); 6413 6414 priv_addr++; 6415 } 6416 kmem_free(dp->data_addr, dp->data_sz); 6417 6418 return (1); 6419 } 6420 6421 /* 6422 * Searches the private section of a ring for a free descriptor, 6423 * starting at the location of the last free descriptor found 6424 * previously. 6425 * 6426 * Returns 0 if free descriptor is available, 1 otherwise. 6427 * 6428 * FUTURE: might need to return contiguous range of descriptors 6429 * as dring info msg assumes all will be contiguous. 6430 */ 6431 static int 6432 vsw_dring_find_free_desc(dring_info_t *dringp, 6433 vsw_private_desc_t **priv_p, int *idx) 6434 { 6435 vsw_private_desc_t *addr; 6436 uint64_t i; 6437 uint64_t j = 0; 6438 uint64_t start = dringp->end_idx; 6439 int num = VSW_RING_NUM_EL; 6440 int ret = 1; 6441 6442 D1(NULL, "%s enter\n", __func__); 6443 6444 addr = dringp->priv_addr; 6445 6446 D2(NULL, "%s: searching ring, dringp 0x%llx : start pos %lld", 6447 __func__, dringp, start); 6448 6449 for (i = start; j < num; i = (i + 1) % num, j++) { 6450 addr = (vsw_private_desc_t *)dringp->priv_addr + i; 6451 D2(NULL, "%s: descriptor %lld : dstate 0x%llx\n", 6452 __func__, i, addr->dstate); 6453 if (addr->dstate == VIO_DESC_FREE) { 6454 D2(NULL, "%s: descriptor %lld is available", 6455 __func__, i); 6456 *priv_p = addr; 6457 *idx = i; 6458 dringp->end_idx = (i + 1) % num; 6459 ret = 0; 6460 break; 6461 } 6462 } 6463 6464 /* ring full */ 6465 if (ret == 1) { 6466 D2(NULL, "%s: no desp free: started at %d", __func__, start); 6467 } 6468 6469 D1(NULL, "%s: exit\n", __func__); 6470 6471 return (ret); 6472 } 6473 6474 /* 6475 * Copy relevant fields from the private descriptor into the 6476 * associated public side. 6477 */ 6478 static void 6479 vsw_dring_priv2pub(vsw_private_desc_t *priv) 6480 { 6481 vnet_public_desc_t *pub; 6482 int i; 6483 6484 D1(NULL, "vsw_dring_priv2pub enter\n"); 6485 6486 pub = priv->descp; 6487 6488 pub->ncookies = priv->ncookies; 6489 pub->nbytes = priv->datalen; 6490 6491 for (i = 0; i < pub->ncookies; i++) { 6492 bcopy(&priv->memcookie[i], &pub->memcookie[i], 6493 sizeof (ldc_mem_cookie_t)); 6494 } 6495 6496 pub->hdr.ack = 1; 6497 pub->hdr.dstate = VIO_DESC_READY; 6498 6499 D1(NULL, "vsw_dring_priv2pub exit"); 6500 } 6501 6502 /* 6503 * Map from a dring identifier to the ring itself. Returns 6504 * pointer to ring or NULL if no match found. 6505 */ 6506 static dring_info_t * 6507 vsw_ident2dring(lane_t *lane, uint64_t ident) 6508 { 6509 dring_info_t *dp = NULL; 6510 6511 if ((dp = lane->dringp) == NULL) { 6512 return (NULL); 6513 } else { 6514 if (dp->ident == ident) 6515 return (dp); 6516 6517 while (dp != NULL) { 6518 if (dp->ident == ident) 6519 break; 6520 dp = dp->next; 6521 } 6522 } 6523 6524 return (dp); 6525 } 6526 6527 /* 6528 * Set the default lane attributes. These are copied into 6529 * the attr msg we send to our peer. If they are not acceptable 6530 * then (currently) the handshake ends. 6531 */ 6532 static void 6533 vsw_set_lane_attr(vsw_t *vswp, lane_t *lp) 6534 { 6535 bzero(lp, sizeof (lane_t)); 6536 6537 READ_ENTER(&vswp->if_lockrw); 6538 ether_copy(&(vswp->if_addr), &(lp->addr)); 6539 RW_EXIT(&vswp->if_lockrw); 6540 6541 lp->mtu = VSW_MTU; 6542 lp->addr_type = ADDR_TYPE_MAC; 6543 lp->xfer_mode = VIO_DRING_MODE; 6544 lp->ack_freq = 0; /* for shared mode */ 6545 lp->seq_num = VNET_ISS; 6546 } 6547 6548 /* 6549 * Verify that the attributes are acceptable. 6550 * 6551 * FUTURE: If some attributes are not acceptable, change them 6552 * our desired values. 6553 */ 6554 static int 6555 vsw_check_attr(vnet_attr_msg_t *pkt, vsw_port_t *port) 6556 { 6557 int ret = 0; 6558 6559 D1(NULL, "vsw_check_attr enter\n"); 6560 6561 /* 6562 * Note we currently only support in-band descriptors 6563 * and descriptor rings, not packet based transfer (VIO_PKT_MODE) 6564 */ 6565 if ((pkt->xfer_mode != VIO_DESC_MODE) && 6566 (pkt->xfer_mode != VIO_DRING_MODE)) { 6567 D2(NULL, "vsw_check_attr: unknown mode %x\n", 6568 pkt->xfer_mode); 6569 ret = 1; 6570 } 6571 6572 /* Only support MAC addresses at moment. */ 6573 if ((pkt->addr_type != ADDR_TYPE_MAC) || (pkt->addr == 0)) { 6574 D2(NULL, "vsw_check_attr: invalid addr_type %x, " 6575 "or address 0x%llx\n", pkt->addr_type, 6576 pkt->addr); 6577 ret = 1; 6578 } 6579 6580 /* 6581 * MAC address supplied by device should match that stored 6582 * in the vsw-port OBP node. Need to decide what to do if they 6583 * don't match, for the moment just warn but don't fail. 6584 */ 6585 if (bcmp(&pkt->addr, &port->p_macaddr, ETHERADDRL) != 0) { 6586 DERR(NULL, "vsw_check_attr: device supplied address " 6587 "0x%llx doesn't match node address 0x%llx\n", 6588 pkt->addr, port->p_macaddr); 6589 } 6590 6591 /* 6592 * Ack freq only makes sense in pkt mode, in shared 6593 * mode the ring descriptors say whether or not to 6594 * send back an ACK. 6595 */ 6596 if ((pkt->xfer_mode == VIO_DRING_MODE) && 6597 (pkt->ack_freq > 0)) { 6598 D2(NULL, "vsw_check_attr: non zero ack freq " 6599 " in SHM mode\n"); 6600 ret = 1; 6601 } 6602 6603 /* 6604 * Note: for the moment we only support ETHER 6605 * frames. This may change in the future. 6606 */ 6607 if ((pkt->mtu > VSW_MTU) || (pkt->mtu <= 0)) { 6608 D2(NULL, "vsw_check_attr: invalid MTU (0x%llx)\n", 6609 pkt->mtu); 6610 ret = 1; 6611 } 6612 6613 D1(NULL, "vsw_check_attr exit\n"); 6614 6615 return (ret); 6616 } 6617 6618 /* 6619 * Returns 1 if there is a problem, 0 otherwise. 6620 */ 6621 static int 6622 vsw_check_dring_info(vio_dring_reg_msg_t *pkt) 6623 { 6624 _NOTE(ARGUNUSED(pkt)) 6625 6626 int ret = 0; 6627 6628 D1(NULL, "vsw_check_dring_info enter\n"); 6629 6630 if ((pkt->num_descriptors == 0) || 6631 (pkt->descriptor_size == 0) || 6632 (pkt->ncookies != 1)) { 6633 DERR(NULL, "vsw_check_dring_info: invalid dring msg"); 6634 ret = 1; 6635 } 6636 6637 D1(NULL, "vsw_check_dring_info exit\n"); 6638 6639 return (ret); 6640 } 6641 6642 /* 6643 * Returns 1 if two memory cookies match. Otherwise returns 0. 6644 */ 6645 static int 6646 vsw_mem_cookie_match(ldc_mem_cookie_t *m1, ldc_mem_cookie_t *m2) 6647 { 6648 if ((m1->addr != m2->addr) || 6649 (m2->size != m2->size)) { 6650 return (0); 6651 } else { 6652 return (1); 6653 } 6654 } 6655 6656 /* 6657 * Returns 1 if ring described in reg message matches that 6658 * described by dring_info structure. Otherwise returns 0. 6659 */ 6660 static int 6661 vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg) 6662 { 6663 if ((msg->descriptor_size != dp->descriptor_size) || 6664 (msg->num_descriptors != dp->num_descriptors) || 6665 (msg->ncookies != dp->ncookies) || 6666 !(vsw_mem_cookie_match(&msg->cookie[0], &dp->cookie[0]))) { 6667 return (0); 6668 } else { 6669 return (1); 6670 } 6671 6672 } 6673 6674 static caddr_t 6675 vsw_print_ethaddr(uint8_t *a, char *ebuf) 6676 { 6677 (void) sprintf(ebuf, "%x:%x:%x:%x:%x:%x", 6678 a[0], a[1], a[2], a[3], a[4], a[5]); 6679 return (ebuf); 6680 } 6681 6682 /* 6683 * Reset and free all the resources associated with 6684 * the channel. 6685 */ 6686 static void 6687 vsw_free_lane_resources(vsw_ldc_t *ldcp, uint64_t dir) 6688 { 6689 dring_info_t *dp, *dpp; 6690 lane_t *lp = NULL; 6691 int rv = 0; 6692 6693 ASSERT(ldcp != NULL); 6694 6695 D1(ldcp->ldc_vswp, "%s (%lld): enter", __func__, ldcp->ldc_id); 6696 6697 if (dir == INBOUND) { 6698 D2(ldcp->ldc_vswp, "%s: freeing INBOUND lane" 6699 " of channel %lld", __func__, ldcp->ldc_id); 6700 lp = &ldcp->lane_in; 6701 } else { 6702 D2(ldcp->ldc_vswp, "%s: freeing OUTBOUND lane" 6703 " of channel %lld", __func__, ldcp->ldc_id); 6704 lp = &ldcp->lane_out; 6705 } 6706 6707 lp->lstate = VSW_LANE_INACTIV; 6708 lp->seq_num = VNET_ISS; 6709 if (lp->dringp) { 6710 if (dir == INBOUND) { 6711 dp = lp->dringp; 6712 while (dp != NULL) { 6713 dpp = dp->next; 6714 if (dp->handle != NULL) 6715 (void) ldc_mem_dring_unmap(dp->handle); 6716 kmem_free(dp, sizeof (dring_info_t)); 6717 dp = dpp; 6718 } 6719 } else { 6720 /* 6721 * unbind, destroy exported dring, free dring struct 6722 */ 6723 dp = lp->dringp; 6724 rv = vsw_free_ring(dp); 6725 } 6726 if (rv == 0) { 6727 lp->dringp = NULL; 6728 } 6729 } 6730 6731 D1(ldcp->ldc_vswp, "%s (%lld): exit", __func__, ldcp->ldc_id); 6732 } 6733 6734 /* 6735 * Free ring and all associated resources. 6736 */ 6737 static int 6738 vsw_free_ring(dring_info_t *dp) 6739 { 6740 vsw_private_desc_t *paddr = NULL; 6741 dring_info_t *dpp; 6742 int i, rv = 1; 6743 6744 while (dp != NULL) { 6745 mutex_enter(&dp->dlock); 6746 dpp = dp->next; 6747 if (dp->priv_addr != NULL) { 6748 /* 6749 * First unbind and free the memory handles 6750 * stored in each descriptor within the ring. 6751 */ 6752 for (i = 0; i < VSW_RING_NUM_EL; i++) { 6753 paddr = (vsw_private_desc_t *) 6754 dp->priv_addr + i; 6755 if (paddr->memhandle != NULL) { 6756 if (paddr->bound == 1) { 6757 rv = ldc_mem_unbind_handle( 6758 paddr->memhandle); 6759 6760 if (rv != 0) { 6761 DERR(NULL, "error " 6762 "unbinding handle for " 6763 "ring 0x%llx at pos %d", 6764 dp, i); 6765 mutex_exit(&dp->dlock); 6766 return (rv); 6767 } 6768 paddr->bound = 0; 6769 } 6770 6771 rv = ldc_mem_free_handle( 6772 paddr->memhandle); 6773 if (rv != 0) { 6774 DERR(NULL, "error freeing " 6775 "handle for ring " 6776 "0x%llx at pos %d", 6777 dp, i); 6778 mutex_exit(&dp->dlock); 6779 return (rv); 6780 } 6781 paddr->memhandle = NULL; 6782 } 6783 } 6784 kmem_free(dp->priv_addr, (sizeof (vsw_private_desc_t) 6785 * VSW_RING_NUM_EL)); 6786 } 6787 6788 /* 6789 * Now unbind and destroy the ring itself. 6790 */ 6791 if (dp->handle != NULL) { 6792 (void) ldc_mem_dring_unbind(dp->handle); 6793 (void) ldc_mem_dring_destroy(dp->handle); 6794 } 6795 6796 if (dp->data_addr != NULL) { 6797 kmem_free(dp->data_addr, dp->data_sz); 6798 } 6799 6800 mutex_exit(&dp->dlock); 6801 mutex_destroy(&dp->dlock); 6802 kmem_free(dp, sizeof (dring_info_t)); 6803 6804 dp = dpp; 6805 } 6806 return (0); 6807 } 6808 6809 /* 6810 * Debugging routines 6811 */ 6812 static void 6813 display_state(void) 6814 { 6815 vsw_t *vswp; 6816 vsw_port_list_t *plist; 6817 vsw_port_t *port; 6818 vsw_ldc_list_t *ldcl; 6819 vsw_ldc_t *ldcp; 6820 6821 cmn_err(CE_NOTE, "***** system state *****"); 6822 6823 for (vswp = vsw_head; vswp; vswp = vswp->next) { 6824 plist = &vswp->plist; 6825 READ_ENTER(&plist->lockrw); 6826 cmn_err(CE_CONT, "vsw instance %d has %d ports attached\n", 6827 vswp->instance, plist->num_ports); 6828 6829 for (port = plist->head; port != NULL; port = port->p_next) { 6830 ldcl = &port->p_ldclist; 6831 cmn_err(CE_CONT, "port %d : %d ldcs attached\n", 6832 port->p_instance, ldcl->num_ldcs); 6833 READ_ENTER(&ldcl->lockrw); 6834 ldcp = ldcl->head; 6835 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 6836 cmn_err(CE_CONT, "chan %lu : dev %d : " 6837 "status %d : phase %u\n", 6838 ldcp->ldc_id, ldcp->dev_class, 6839 ldcp->ldc_status, ldcp->hphase); 6840 cmn_err(CE_CONT, "chan %lu : lsession %lu : " 6841 "psession %lu\n", 6842 ldcp->ldc_id, 6843 ldcp->local_session, 6844 ldcp->peer_session); 6845 6846 cmn_err(CE_CONT, "Inbound lane:\n"); 6847 display_lane(&ldcp->lane_in); 6848 cmn_err(CE_CONT, "Outbound lane:\n"); 6849 display_lane(&ldcp->lane_out); 6850 } 6851 RW_EXIT(&ldcl->lockrw); 6852 } 6853 RW_EXIT(&plist->lockrw); 6854 } 6855 cmn_err(CE_NOTE, "***** system state *****"); 6856 } 6857 6858 static void 6859 display_lane(lane_t *lp) 6860 { 6861 dring_info_t *drp; 6862 6863 cmn_err(CE_CONT, "ver 0x%x:0x%x : state %lx : mtu 0x%lx\n", 6864 lp->ver_major, lp->ver_minor, lp->lstate, lp->mtu); 6865 cmn_err(CE_CONT, "addr_type %d : addr 0x%lx : xmode %d\n", 6866 lp->addr_type, lp->addr, lp->xfer_mode); 6867 cmn_err(CE_CONT, "dringp 0x%lx\n", (uint64_t)lp->dringp); 6868 6869 cmn_err(CE_CONT, "Dring info:\n"); 6870 for (drp = lp->dringp; drp != NULL; drp = drp->next) { 6871 cmn_err(CE_CONT, "\tnum_desc %u : dsize %u\n", 6872 drp->num_descriptors, drp->descriptor_size); 6873 cmn_err(CE_CONT, "\thandle 0x%lx\n", drp->handle); 6874 cmn_err(CE_CONT, "\tpub_addr 0x%lx : priv_addr 0x%lx\n", 6875 (uint64_t)drp->pub_addr, (uint64_t)drp->priv_addr); 6876 cmn_err(CE_CONT, "\tident 0x%lx : end_idx %lu\n", 6877 drp->ident, drp->end_idx); 6878 display_ring(drp); 6879 } 6880 } 6881 6882 static void 6883 display_ring(dring_info_t *dringp) 6884 { 6885 uint64_t i; 6886 uint64_t priv_count = 0; 6887 uint64_t pub_count = 0; 6888 vnet_public_desc_t *pub_addr = NULL; 6889 vsw_private_desc_t *priv_addr = NULL; 6890 6891 for (i = 0; i < VSW_RING_NUM_EL; i++) { 6892 if (dringp->pub_addr != NULL) { 6893 pub_addr = (vnet_public_desc_t *)dringp->pub_addr + i; 6894 6895 if (pub_addr->hdr.dstate == VIO_DESC_FREE) 6896 pub_count++; 6897 } 6898 6899 if (dringp->priv_addr != NULL) { 6900 priv_addr = 6901 (vsw_private_desc_t *)dringp->priv_addr + i; 6902 6903 if (priv_addr->dstate == VIO_DESC_FREE) 6904 priv_count++; 6905 } 6906 } 6907 cmn_err(CE_CONT, "\t%lu elements: %lu priv free: %lu pub free\n", 6908 i, priv_count, pub_count); 6909 } 6910 6911 static void 6912 dump_flags(uint64_t state) 6913 { 6914 int i; 6915 6916 typedef struct flag_name { 6917 int flag_val; 6918 char *flag_name; 6919 } flag_name_t; 6920 6921 flag_name_t flags[] = { 6922 VSW_VER_INFO_SENT, "VSW_VER_INFO_SENT", 6923 VSW_VER_INFO_RECV, "VSW_VER_INFO_RECV", 6924 VSW_VER_ACK_RECV, "VSW_VER_ACK_RECV", 6925 VSW_VER_ACK_SENT, "VSW_VER_ACK_SENT", 6926 VSW_VER_NACK_RECV, "VSW_VER_NACK_RECV", 6927 VSW_VER_NACK_SENT, "VSW_VER_NACK_SENT", 6928 VSW_ATTR_INFO_SENT, "VSW_ATTR_INFO_SENT", 6929 VSW_ATTR_INFO_RECV, "VSW_ATTR_INFO_RECV", 6930 VSW_ATTR_ACK_SENT, "VSW_ATTR_ACK_SENT", 6931 VSW_ATTR_ACK_RECV, "VSW_ATTR_ACK_RECV", 6932 VSW_ATTR_NACK_SENT, "VSW_ATTR_NACK_SENT", 6933 VSW_ATTR_NACK_RECV, "VSW_ATTR_NACK_RECV", 6934 VSW_DRING_INFO_SENT, "VSW_DRING_INFO_SENT", 6935 VSW_DRING_INFO_RECV, "VSW_DRING_INFO_RECV", 6936 VSW_DRING_ACK_SENT, "VSW_DRING_ACK_SENT", 6937 VSW_DRING_ACK_RECV, "VSW_DRING_ACK_RECV", 6938 VSW_DRING_NACK_SENT, "VSW_DRING_NACK_SENT", 6939 VSW_DRING_NACK_RECV, "VSW_DRING_NACK_RECV", 6940 VSW_RDX_INFO_SENT, "VSW_RDX_INFO_SENT", 6941 VSW_RDX_INFO_RECV, "VSW_RDX_INFO_RECV", 6942 VSW_RDX_ACK_SENT, "VSW_RDX_ACK_SENT", 6943 VSW_RDX_ACK_RECV, "VSW_RDX_ACK_RECV", 6944 VSW_RDX_NACK_SENT, "VSW_RDX_NACK_SENT", 6945 VSW_RDX_NACK_RECV, "VSW_RDX_NACK_RECV", 6946 VSW_MCST_INFO_SENT, "VSW_MCST_INFO_SENT", 6947 VSW_MCST_INFO_RECV, "VSW_MCST_INFO_RECV", 6948 VSW_MCST_ACK_SENT, "VSW_MCST_ACK_SENT", 6949 VSW_MCST_ACK_RECV, "VSW_MCST_ACK_RECV", 6950 VSW_MCST_NACK_SENT, "VSW_MCST_NACK_SENT", 6951 VSW_MCST_NACK_RECV, "VSW_MCST_NACK_RECV", 6952 VSW_LANE_ACTIVE, "VSW_LANE_ACTIVE"}; 6953 6954 DERR(NULL, "DUMP_FLAGS: %llx\n", state); 6955 for (i = 0; i < sizeof (flags)/sizeof (flag_name_t); i++) { 6956 if (state & flags[i].flag_val) 6957 DERR(NULL, "DUMP_FLAGS %s", flags[i].flag_name); 6958 } 6959 } 6960