1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/errno.h> 31 #include <sys/debug.h> 32 #include <sys/time.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/user.h> 36 #include <sys/stropts.h> 37 #include <sys/stream.h> 38 #include <sys/strlog.h> 39 #include <sys/strsubr.h> 40 #include <sys/cmn_err.h> 41 #include <sys/cpu.h> 42 #include <sys/kmem.h> 43 #include <sys/conf.h> 44 #include <sys/ddi.h> 45 #include <sys/sunddi.h> 46 #include <sys/ksynch.h> 47 #include <sys/stat.h> 48 #include <sys/kstat.h> 49 #include <sys/vtrace.h> 50 #include <sys/strsun.h> 51 #include <sys/dlpi.h> 52 #include <sys/ethernet.h> 53 #include <net/if.h> 54 #include <sys/varargs.h> 55 #include <sys/machsystm.h> 56 #include <sys/modctl.h> 57 #include <sys/modhash.h> 58 #include <sys/mac.h> 59 #include <sys/mac_ether.h> 60 #include <sys/taskq.h> 61 #include <sys/note.h> 62 #include <sys/mach_descrip.h> 63 #include <sys/mac.h> 64 #include <sys/mdeg.h> 65 #include <sys/ldc.h> 66 #include <sys/vsw_fdb.h> 67 #include <sys/vsw.h> 68 #include <sys/vio_mailbox.h> 69 #include <sys/vnet_mailbox.h> 70 #include <sys/vnet_common.h> 71 #include <sys/vio_util.h> 72 #include <sys/sdt.h> 73 74 /* 75 * Function prototypes. 76 */ 77 static int vsw_attach(dev_info_t *, ddi_attach_cmd_t); 78 static int vsw_detach(dev_info_t *, ddi_detach_cmd_t); 79 static int vsw_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 80 static int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *); 81 static int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *, int *); 82 static int vsw_get_physaddr(vsw_t *); 83 static int vsw_setup_switching(vsw_t *); 84 static int vsw_setup_layer2(vsw_t *); 85 static int vsw_setup_layer3(vsw_t *); 86 87 /* MAC Ring table functions. */ 88 static void vsw_mac_ring_tbl_init(vsw_t *vswp); 89 static void vsw_mac_ring_tbl_destroy(vsw_t *vswp); 90 static void vsw_queue_worker(vsw_mac_ring_t *rrp); 91 static void vsw_queue_stop(vsw_queue_t *vqp); 92 static vsw_queue_t *vsw_queue_create(); 93 static void vsw_queue_destroy(vsw_queue_t *vqp); 94 95 /* MAC layer routines */ 96 static mac_resource_handle_t vsw_mac_ring_add_cb(void *arg, 97 mac_resource_t *mrp); 98 static int vsw_get_hw_maddr(vsw_t *); 99 static int vsw_set_hw(vsw_t *, vsw_port_t *); 100 static int vsw_set_hw_promisc(vsw_t *, vsw_port_t *); 101 static int vsw_unset_hw(vsw_t *, vsw_port_t *); 102 static int vsw_unset_hw_promisc(vsw_t *, vsw_port_t *); 103 static int vsw_reconfig_hw(vsw_t *); 104 static int vsw_mac_attach(vsw_t *vswp); 105 static void vsw_mac_detach(vsw_t *vswp); 106 107 static void vsw_rx_queue_cb(void *, mac_resource_handle_t, mblk_t *); 108 static void vsw_rx_cb(void *, mac_resource_handle_t, mblk_t *); 109 static mblk_t *vsw_tx_msg(vsw_t *, mblk_t *); 110 static int vsw_mac_register(vsw_t *); 111 static int vsw_mac_unregister(vsw_t *); 112 static int vsw_m_stat(void *, uint_t, uint64_t *); 113 static void vsw_m_stop(void *arg); 114 static int vsw_m_start(void *arg); 115 static int vsw_m_unicst(void *arg, const uint8_t *); 116 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *); 117 static int vsw_m_promisc(void *arg, boolean_t); 118 static mblk_t *vsw_m_tx(void *arg, mblk_t *); 119 120 /* MDEG routines */ 121 static int vsw_mdeg_register(vsw_t *vswp); 122 static void vsw_mdeg_unregister(vsw_t *vswp); 123 static int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *); 124 static int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *); 125 static void vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t); 126 static void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t); 127 128 /* Port add/deletion routines */ 129 static int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 130 static int vsw_port_attach(vsw_t *vswp, int p_instance, 131 uint64_t *ldcids, int nids, struct ether_addr *macaddr); 132 static int vsw_detach_ports(vsw_t *vswp); 133 static int vsw_port_detach(vsw_t *vswp, int p_instance); 134 static int vsw_port_delete(vsw_port_t *port); 135 static int vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id); 136 static int vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id); 137 static int vsw_init_ldcs(vsw_port_t *port); 138 static int vsw_uninit_ldcs(vsw_port_t *port); 139 static int vsw_ldc_init(vsw_ldc_t *ldcp); 140 static int vsw_ldc_uninit(vsw_ldc_t *ldcp); 141 static int vsw_drain_ldcs(vsw_port_t *port); 142 static int vsw_drain_port_taskq(vsw_port_t *port); 143 static void vsw_marker_task(void *); 144 static vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 145 static int vsw_plist_del_node(vsw_t *, vsw_port_t *port); 146 147 /* Interrupt routines */ 148 static uint_t vsw_ldc_cb(uint64_t cb, caddr_t arg); 149 150 /* Handshake routines */ 151 static void vsw_restart_ldc(vsw_ldc_t *); 152 static void vsw_restart_handshake(vsw_ldc_t *); 153 static void vsw_handle_reset(vsw_ldc_t *); 154 static int vsw_check_flag(vsw_ldc_t *, int, uint64_t); 155 static void vsw_next_milestone(vsw_ldc_t *); 156 static int vsw_supported_version(vio_ver_msg_t *); 157 158 /* Data processing routines */ 159 static void vsw_process_pkt(void *); 160 static void vsw_dispatch_ctrl_task(vsw_ldc_t *, void *, vio_msg_tag_t); 161 static void vsw_process_ctrl_pkt(void *); 162 static void vsw_process_ctrl_ver_pkt(vsw_ldc_t *, void *); 163 static void vsw_process_ctrl_attr_pkt(vsw_ldc_t *, void *); 164 static void vsw_process_ctrl_mcst_pkt(vsw_ldc_t *, void *); 165 static void vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *, void *); 166 static void vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *, void *); 167 static void vsw_process_ctrl_rdx_pkt(vsw_ldc_t *, void *); 168 static void vsw_process_data_pkt(vsw_ldc_t *, void *, vio_msg_tag_t); 169 static void vsw_process_data_dring_pkt(vsw_ldc_t *, void *); 170 static void vsw_process_data_raw_pkt(vsw_ldc_t *, void *); 171 static void vsw_process_data_ibnd_pkt(vsw_ldc_t *, void *); 172 static void vsw_process_err_pkt(vsw_ldc_t *, void *, vio_msg_tag_t); 173 174 /* Switching/data transmit routines */ 175 static void vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller, 176 vsw_port_t *port, mac_resource_handle_t); 177 static void vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller, 178 vsw_port_t *port, mac_resource_handle_t); 179 static int vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller, 180 vsw_port_t *port); 181 static int vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller, 182 vsw_port_t *port); 183 static int vsw_portsend(vsw_port_t *, mblk_t *); 184 static int vsw_dringsend(vsw_ldc_t *, mblk_t *); 185 static int vsw_descrsend(vsw_ldc_t *, mblk_t *); 186 187 /* Packet creation routines */ 188 static void vsw_send_ver(void *); 189 static void vsw_send_attr(vsw_ldc_t *); 190 static vio_dring_reg_msg_t *vsw_create_dring_info_pkt(vsw_ldc_t *); 191 static void vsw_send_dring_info(vsw_ldc_t *); 192 static void vsw_send_rdx(vsw_ldc_t *); 193 194 static void vsw_send_msg(vsw_ldc_t *, void *, int); 195 196 /* Forwarding database (FDB) routines */ 197 static int vsw_add_fdb(vsw_t *vswp, vsw_port_t *port); 198 static int vsw_del_fdb(vsw_t *vswp, vsw_port_t *port); 199 static vsw_port_t *vsw_lookup_fdb(vsw_t *vswp, struct ether_header *); 200 static int vsw_add_rem_mcst(vnet_mcast_msg_t *, vsw_port_t *); 201 static int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *); 202 static int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *); 203 static void vsw_del_addr(uint8_t, void *, uint64_t); 204 static void vsw_del_mcst_port(vsw_port_t *); 205 static void vsw_del_mcst_vsw(vsw_t *); 206 207 /* Dring routines */ 208 static dring_info_t *vsw_create_dring(vsw_ldc_t *); 209 static void vsw_create_privring(vsw_ldc_t *); 210 static int vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp); 211 static int vsw_dring_find_free_desc(dring_info_t *, vsw_private_desc_t **, 212 int *); 213 static dring_info_t *vsw_ident2dring(lane_t *, uint64_t); 214 215 static void vsw_set_lane_attr(vsw_t *, lane_t *); 216 static int vsw_check_attr(vnet_attr_msg_t *, vsw_port_t *); 217 static int vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg); 218 static int vsw_mem_cookie_match(ldc_mem_cookie_t *, ldc_mem_cookie_t *); 219 static int vsw_check_dring_info(vio_dring_reg_msg_t *); 220 221 /* Misc support routines */ 222 static caddr_t vsw_print_ethaddr(uint8_t *addr, char *ebuf); 223 static void vsw_free_lane_resources(vsw_ldc_t *, uint64_t); 224 static int vsw_free_ring(dring_info_t *); 225 226 /* Debugging routines */ 227 static void dump_flags(uint64_t); 228 static void display_state(void); 229 static void display_lane(lane_t *); 230 static void display_ring(dring_info_t *); 231 232 int vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */ 233 int vsw_wretries = 100; /* # of write attempts */ 234 int vsw_chain_len = 150; /* max # of mblks in msg chain */ 235 int vsw_desc_delay = 0; /* delay in us */ 236 int vsw_read_attempts = 5; /* # of reads of descriptor */ 237 238 uint32_t vsw_mblk_size = VSW_MBLK_SIZE; 239 uint32_t vsw_num_mblks = VSW_NUM_MBLKS; 240 241 static mac_callbacks_t vsw_m_callbacks = { 242 0, 243 vsw_m_stat, 244 vsw_m_start, 245 vsw_m_stop, 246 vsw_m_promisc, 247 vsw_m_multicst, 248 vsw_m_unicst, 249 vsw_m_tx, 250 NULL, 251 NULL, 252 NULL 253 }; 254 255 static struct cb_ops vsw_cb_ops = { 256 nulldev, /* cb_open */ 257 nulldev, /* cb_close */ 258 nodev, /* cb_strategy */ 259 nodev, /* cb_print */ 260 nodev, /* cb_dump */ 261 nodev, /* cb_read */ 262 nodev, /* cb_write */ 263 nodev, /* cb_ioctl */ 264 nodev, /* cb_devmap */ 265 nodev, /* cb_mmap */ 266 nodev, /* cb_segmap */ 267 nochpoll, /* cb_chpoll */ 268 ddi_prop_op, /* cb_prop_op */ 269 NULL, /* cb_stream */ 270 D_MP, /* cb_flag */ 271 CB_REV, /* rev */ 272 nodev, /* int (*cb_aread)() */ 273 nodev /* int (*cb_awrite)() */ 274 }; 275 276 static struct dev_ops vsw_ops = { 277 DEVO_REV, /* devo_rev */ 278 0, /* devo_refcnt */ 279 vsw_getinfo, /* devo_getinfo */ 280 nulldev, /* devo_identify */ 281 nulldev, /* devo_probe */ 282 vsw_attach, /* devo_attach */ 283 vsw_detach, /* devo_detach */ 284 nodev, /* devo_reset */ 285 &vsw_cb_ops, /* devo_cb_ops */ 286 (struct bus_ops *)NULL, /* devo_bus_ops */ 287 ddi_power /* devo_power */ 288 }; 289 290 extern struct mod_ops mod_driverops; 291 static struct modldrv vswmodldrv = { 292 &mod_driverops, 293 "sun4v Virtual Switch Driver %I%", 294 &vsw_ops, 295 }; 296 297 #define LDC_ENTER_LOCK(ldcp) \ 298 mutex_enter(&((ldcp)->ldc_cblock));\ 299 mutex_enter(&((ldcp)->ldc_txlock)); 300 #define LDC_EXIT_LOCK(ldcp) \ 301 mutex_exit(&((ldcp)->ldc_txlock));\ 302 mutex_exit(&((ldcp)->ldc_cblock)); 303 304 /* Driver soft state ptr */ 305 static void *vsw_state; 306 307 /* 308 * Linked list of "vsw_t" structures - one per instance. 309 */ 310 vsw_t *vsw_head = NULL; 311 krwlock_t vsw_rw; 312 313 /* 314 * Property names 315 */ 316 static char vdev_propname[] = "virtual-device"; 317 static char vsw_propname[] = "virtual-network-switch"; 318 static char physdev_propname[] = "vsw-phys-dev"; 319 static char smode_propname[] = "vsw-switch-mode"; 320 static char macaddr_propname[] = "local-mac-address"; 321 static char remaddr_propname[] = "remote-mac-address"; 322 static char ldcids_propname[] = "ldc-ids"; 323 static char chan_propname[] = "channel-endpoint"; 324 static char id_propname[] = "id"; 325 static char reg_propname[] = "reg"; 326 327 /* supported versions */ 328 static ver_sup_t vsw_versions[] = { {1, 0} }; 329 330 /* 331 * Matching criteria passed to the MDEG to register interest 332 * in changes to 'virtual-device-port' nodes identified by their 333 * 'id' property. 334 */ 335 static md_prop_match_t vport_prop_match[] = { 336 { MDET_PROP_VAL, "id" }, 337 { MDET_LIST_END, NULL } 338 }; 339 340 static mdeg_node_match_t vport_match = { "virtual-device-port", 341 vport_prop_match }; 342 343 /* 344 * Matching criteria passed to the MDEG to register interest 345 * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified 346 * by their 'name' and 'cfg-handle' properties. 347 */ 348 static md_prop_match_t vdev_prop_match[] = { 349 { MDET_PROP_STR, "name" }, 350 { MDET_PROP_VAL, "cfg-handle" }, 351 { MDET_LIST_END, NULL } 352 }; 353 354 static mdeg_node_match_t vdev_match = { "virtual-device", 355 vdev_prop_match }; 356 357 358 /* 359 * Specification of an MD node passed to the MDEG to filter any 360 * 'vport' nodes that do not belong to the specified node. This 361 * template is copied for each vsw instance and filled in with 362 * the appropriate 'cfg-handle' value before being passed to the MDEG. 363 */ 364 static mdeg_prop_spec_t vsw_prop_template[] = { 365 { MDET_PROP_STR, "name", vsw_propname }, 366 { MDET_PROP_VAL, "cfg-handle", NULL }, 367 { MDET_LIST_END, NULL, NULL } 368 }; 369 370 #define VSW_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val); 371 372 /* 373 * From /etc/system enable/disable thread per ring. This is a mode 374 * selection that is done a vsw driver attach time. 375 */ 376 boolean_t vsw_multi_ring_enable = B_FALSE; 377 int vsw_mac_rx_rings = VSW_MAC_RX_RINGS; 378 379 /* 380 * Print debug messages - set to 0x1f to enable all msgs 381 * or 0x0 to turn all off. 382 */ 383 int vswdbg = 0x0; 384 385 /* 386 * debug levels: 387 * 0x01: Function entry/exit tracing 388 * 0x02: Internal function messages 389 * 0x04: Verbose internal messages 390 * 0x08: Warning messages 391 * 0x10: Error messages 392 */ 393 394 static void 395 vswdebug(vsw_t *vswp, const char *fmt, ...) 396 { 397 char buf[512]; 398 va_list ap; 399 400 va_start(ap, fmt); 401 (void) vsprintf(buf, fmt, ap); 402 va_end(ap); 403 404 if (vswp == NULL) 405 cmn_err(CE_CONT, "%s\n", buf); 406 else 407 cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf); 408 } 409 410 /* 411 * For the moment the state dump routines have their own 412 * private flag. 413 */ 414 #define DUMP_STATE 0 415 416 #if DUMP_STATE 417 418 #define DUMP_TAG(tag) \ 419 { \ 420 D1(NULL, "DUMP_TAG: type 0x%llx", (tag).vio_msgtype); \ 421 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag).vio_subtype); \ 422 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag).vio_subtype_env); \ 423 } 424 425 #define DUMP_TAG_PTR(tag) \ 426 { \ 427 D1(NULL, "DUMP_TAG: type 0x%llx", (tag)->vio_msgtype); \ 428 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag)->vio_subtype); \ 429 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag)->vio_subtype_env); \ 430 } 431 432 #define DUMP_FLAGS(flags) dump_flags(flags); 433 #define DISPLAY_STATE() display_state() 434 435 #else 436 437 #define DUMP_TAG(tag) 438 #define DUMP_TAG_PTR(tag) 439 #define DUMP_FLAGS(state) 440 #define DISPLAY_STATE() 441 442 #endif /* DUMP_STATE */ 443 444 #ifdef DEBUG 445 446 #define D1 \ 447 if (vswdbg & 0x01) \ 448 vswdebug 449 450 #define D2 \ 451 if (vswdbg & 0x02) \ 452 vswdebug 453 454 #define D3 \ 455 if (vswdbg & 0x04) \ 456 vswdebug 457 458 #define DWARN \ 459 if (vswdbg & 0x08) \ 460 vswdebug 461 462 #define DERR \ 463 if (vswdbg & 0x10) \ 464 vswdebug 465 466 #else 467 468 #define DERR if (0) vswdebug 469 #define DWARN if (0) vswdebug 470 #define D1 if (0) vswdebug 471 #define D2 if (0) vswdebug 472 #define D3 if (0) vswdebug 473 474 #endif /* DEBUG */ 475 476 static struct modlinkage modlinkage = { 477 MODREV_1, 478 &vswmodldrv, 479 NULL 480 }; 481 482 int 483 _init(void) 484 { 485 int status; 486 487 rw_init(&vsw_rw, NULL, RW_DRIVER, NULL); 488 489 status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1); 490 if (status != 0) { 491 return (status); 492 } 493 494 mac_init_ops(&vsw_ops, "vsw"); 495 status = mod_install(&modlinkage); 496 if (status != 0) { 497 ddi_soft_state_fini(&vsw_state); 498 } 499 return (status); 500 } 501 502 int 503 _fini(void) 504 { 505 int status; 506 507 status = mod_remove(&modlinkage); 508 if (status != 0) 509 return (status); 510 mac_fini_ops(&vsw_ops); 511 ddi_soft_state_fini(&vsw_state); 512 513 rw_destroy(&vsw_rw); 514 515 return (status); 516 } 517 518 int 519 _info(struct modinfo *modinfop) 520 { 521 return (mod_info(&modlinkage, modinfop)); 522 } 523 524 static int 525 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 526 { 527 vsw_t *vswp; 528 int instance; 529 char hashname[MAXNAMELEN]; 530 char qname[TASKQ_NAMELEN]; 531 enum { PROG_init = 0x00, 532 PROG_if_lock = 0x01, 533 PROG_fdb = 0x02, 534 PROG_mfdb = 0x04, 535 PROG_report_dev = 0x08, 536 PROG_plist = 0x10, 537 PROG_taskq = 0x20} 538 progress; 539 540 progress = PROG_init; 541 542 switch (cmd) { 543 case DDI_ATTACH: 544 break; 545 case DDI_RESUME: 546 /* nothing to do for this non-device */ 547 return (DDI_SUCCESS); 548 case DDI_PM_RESUME: 549 default: 550 return (DDI_FAILURE); 551 } 552 553 instance = ddi_get_instance(dip); 554 if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) { 555 DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance); 556 return (DDI_FAILURE); 557 } 558 vswp = ddi_get_soft_state(vsw_state, instance); 559 560 if (vswp == NULL) { 561 DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance); 562 goto vsw_attach_fail; 563 } 564 565 vswp->dip = dip; 566 vswp->instance = instance; 567 ddi_set_driver_private(dip, (caddr_t)vswp); 568 569 mutex_init(&vswp->mac_lock, NULL, MUTEX_DRIVER, NULL); 570 rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL); 571 progress |= PROG_if_lock; 572 573 /* setup the unicast forwarding database */ 574 (void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d", 575 vswp->instance); 576 D2(vswp, "creating unicast hash table (%s)...", hashname); 577 vswp->fdb = mod_hash_create_ptrhash(hashname, VSW_NCHAINS, 578 mod_hash_null_valdtor, sizeof (void *)); 579 580 progress |= PROG_fdb; 581 582 /* setup the multicast fowarding database */ 583 (void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d", 584 vswp->instance); 585 D2(vswp, "creating multicast hash table %s)...", hashname); 586 rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL); 587 vswp->mfdb = mod_hash_create_ptrhash(hashname, VSW_NCHAINS, 588 mod_hash_null_valdtor, sizeof (void *)); 589 590 progress |= PROG_mfdb; 591 592 /* 593 * create lock protecting list of multicast addresses 594 * which could come via m_multicst() entry point when plumbed. 595 */ 596 mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL); 597 vswp->mcap = NULL; 598 599 ddi_report_dev(vswp->dip); 600 601 progress |= PROG_report_dev; 602 603 WRITE_ENTER(&vsw_rw); 604 vswp->next = vsw_head; 605 vsw_head = vswp; 606 RW_EXIT(&vsw_rw); 607 608 /* setup the port list */ 609 rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL); 610 vswp->plist.head = NULL; 611 612 progress |= PROG_plist; 613 614 /* 615 * Create the taskq which will process all the VIO 616 * control messages. 617 */ 618 (void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance); 619 if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1, 620 TASKQ_DEFAULTPRI, 0)) == NULL) { 621 cmn_err(CE_WARN, "!vsw%d: Unable to create task queue", 622 vswp->instance); 623 goto vsw_attach_fail; 624 } 625 626 progress |= PROG_taskq; 627 628 /* prevent auto-detaching */ 629 if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip, 630 DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) { 631 cmn_err(CE_NOTE, "!Unable to set \"%s\" property for " 632 "instance %u", DDI_NO_AUTODETACH, instance); 633 } 634 635 /* 636 * Now we have everything setup, register an interest in 637 * specific MD nodes. 638 * 639 * The callback is invoked in 2 cases, firstly if upon mdeg 640 * registration there are existing nodes which match our specified 641 * criteria, and secondly if the MD is changed (and again, there 642 * are nodes which we are interested in present within it. Note 643 * that our callback will be invoked even if our specified nodes 644 * have not actually changed). 645 * 646 * Until the callback is invoked we cannot switch any pkts as 647 * we don't know basic information such as what mode we are 648 * operating in. However we expect the callback to be invoked 649 * immediately upon registration as this driver should only 650 * be attaching if there are vsw nodes in the MD. 651 */ 652 if (vsw_mdeg_register(vswp)) 653 goto vsw_attach_fail; 654 655 return (DDI_SUCCESS); 656 657 vsw_attach_fail: 658 DERR(NULL, "vsw_attach: failed"); 659 660 if (progress & PROG_taskq) 661 ddi_taskq_destroy(vswp->taskq_p); 662 663 if (progress & PROG_plist) 664 rw_destroy(&vswp->plist.lockrw); 665 666 if (progress & PROG_report_dev) { 667 ddi_remove_minor_node(dip, NULL); 668 mutex_destroy(&vswp->mca_lock); 669 } 670 671 if (progress & PROG_mfdb) { 672 mod_hash_destroy_hash(vswp->mfdb); 673 vswp->mfdb = NULL; 674 rw_destroy(&vswp->mfdbrw); 675 } 676 677 if (progress & PROG_fdb) { 678 mod_hash_destroy_hash(vswp->fdb); 679 vswp->fdb = NULL; 680 } 681 682 if (progress & PROG_if_lock) { 683 rw_destroy(&vswp->if_lockrw); 684 mutex_destroy(&vswp->mac_lock); 685 } 686 687 ddi_soft_state_free(vsw_state, instance); 688 return (DDI_FAILURE); 689 } 690 691 static int 692 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 693 { 694 vio_mblk_pool_t *poolp, *npoolp; 695 vsw_t **vswpp, *vswp; 696 int instance; 697 698 instance = ddi_get_instance(dip); 699 vswp = ddi_get_soft_state(vsw_state, instance); 700 701 if (vswp == NULL) { 702 return (DDI_FAILURE); 703 } 704 705 switch (cmd) { 706 case DDI_DETACH: 707 break; 708 case DDI_SUSPEND: 709 case DDI_PM_SUSPEND: 710 default: 711 return (DDI_FAILURE); 712 } 713 714 D2(vswp, "detaching instance %d", instance); 715 716 if (vswp->if_state & VSW_IF_REG) { 717 if (vsw_mac_unregister(vswp) != 0) { 718 cmn_err(CE_WARN, "!vsw%d: Unable to detach from " 719 "MAC layer", vswp->instance); 720 return (DDI_FAILURE); 721 } 722 } 723 724 vsw_mdeg_unregister(vswp); 725 726 /* remove mac layer callback */ 727 mutex_enter(&vswp->mac_lock); 728 if ((vswp->mh != NULL) && (vswp->mrh != NULL)) { 729 mac_rx_remove(vswp->mh, vswp->mrh); 730 vswp->mrh = NULL; 731 } 732 mutex_exit(&vswp->mac_lock); 733 734 if (vsw_detach_ports(vswp) != 0) { 735 cmn_err(CE_WARN, "!vsw%d: Unable to detach ports", 736 vswp->instance); 737 return (DDI_FAILURE); 738 } 739 740 rw_destroy(&vswp->if_lockrw); 741 742 /* 743 * Now that the ports have been deleted, stop and close 744 * the physical device. 745 */ 746 mutex_enter(&vswp->mac_lock); 747 if (vswp->mh != NULL) { 748 if (vswp->mstarted) 749 mac_stop(vswp->mh); 750 if (vswp->mresources) 751 mac_resource_set(vswp->mh, NULL, NULL); 752 mac_close(vswp->mh); 753 754 vswp->mh = NULL; 755 vswp->txinfo = NULL; 756 } 757 mutex_exit(&vswp->mac_lock); 758 mutex_destroy(&vswp->mac_lock); 759 760 /* 761 * Destroy any free pools that may still exist. 762 */ 763 poolp = vswp->rxh; 764 while (poolp != NULL) { 765 npoolp = vswp->rxh = poolp->nextp; 766 if (vio_destroy_mblks(poolp) != 0) { 767 vswp->rxh = poolp; 768 return (DDI_FAILURE); 769 } 770 poolp = npoolp; 771 } 772 773 /* 774 * Remove this instance from any entries it may be on in 775 * the hash table by using the list of addresses maintained 776 * in the vsw_t structure. 777 */ 778 vsw_del_mcst_vsw(vswp); 779 780 vswp->mcap = NULL; 781 mutex_destroy(&vswp->mca_lock); 782 783 /* 784 * By now any pending tasks have finished and the underlying 785 * ldc's have been destroyed, so its safe to delete the control 786 * message taskq. 787 */ 788 if (vswp->taskq_p != NULL) 789 ddi_taskq_destroy(vswp->taskq_p); 790 791 /* 792 * At this stage all the data pointers in the hash table 793 * should be NULL, as all the ports have been removed and will 794 * have deleted themselves from the port lists which the data 795 * pointers point to. Hence we can destroy the table using the 796 * default destructors. 797 */ 798 D2(vswp, "vsw_detach: destroying hash tables.."); 799 mod_hash_destroy_hash(vswp->fdb); 800 vswp->fdb = NULL; 801 802 WRITE_ENTER(&vswp->mfdbrw); 803 mod_hash_destroy_hash(vswp->mfdb); 804 vswp->mfdb = NULL; 805 RW_EXIT(&vswp->mfdbrw); 806 rw_destroy(&vswp->mfdbrw); 807 808 ddi_remove_minor_node(dip, NULL); 809 810 rw_destroy(&vswp->plist.lockrw); 811 WRITE_ENTER(&vsw_rw); 812 for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) { 813 if (*vswpp == vswp) { 814 *vswpp = vswp->next; 815 break; 816 } 817 } 818 RW_EXIT(&vsw_rw); 819 ddi_soft_state_free(vsw_state, instance); 820 821 return (DDI_SUCCESS); 822 } 823 824 static int 825 vsw_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 826 { 827 _NOTE(ARGUNUSED(dip)) 828 829 vsw_t *vswp = NULL; 830 dev_t dev = (dev_t)arg; 831 int instance; 832 833 instance = getminor(dev); 834 835 switch (infocmd) { 836 case DDI_INFO_DEVT2DEVINFO: 837 if ((vswp = ddi_get_soft_state(vsw_state, instance)) == NULL) { 838 *result = NULL; 839 return (DDI_FAILURE); 840 } 841 *result = vswp->dip; 842 return (DDI_SUCCESS); 843 844 case DDI_INFO_DEVT2INSTANCE: 845 *result = (void *)(uintptr_t)instance; 846 return (DDI_SUCCESS); 847 848 default: 849 *result = NULL; 850 return (DDI_FAILURE); 851 } 852 } 853 854 /* 855 * Get the value of the "vsw-phys-dev" property in the specified 856 * node. This property is the name of the physical device that 857 * the virtual switch will use to talk to the outside world. 858 * 859 * Note it is valid for this property to be NULL (but the property 860 * itself must exist). Callers of this routine should verify that 861 * the value returned is what they expected (i.e. either NULL or non NULL). 862 * 863 * On success returns value of the property in region pointed to by 864 * the 'name' argument, and with return value of 0. Otherwise returns 1. 865 */ 866 static int 867 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name) 868 { 869 int len = 0; 870 char *physname = NULL; 871 char *dev; 872 873 if (md_get_prop_data(mdp, node, physdev_propname, 874 (uint8_t **)(&physname), &len) != 0) { 875 cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical " 876 "device(s) from MD", vswp->instance); 877 return (1); 878 } else if ((strlen(physname) + 1) > LIFNAMSIZ) { 879 cmn_err(CE_WARN, "!vsw%d: %s is too long a device name", 880 vswp->instance, physname); 881 return (1); 882 } else { 883 (void) strncpy(name, physname, strlen(physname) + 1); 884 D2(vswp, "%s: using first device specified (%s)", 885 __func__, physname); 886 } 887 888 #ifdef DEBUG 889 /* 890 * As a temporary measure to aid testing we check to see if there 891 * is a vsw.conf file present. If there is we use the value of the 892 * vsw_physname property in the file as the name of the physical 893 * device, overriding the value from the MD. 894 * 895 * There may be multiple devices listed, but for the moment 896 * we just use the first one. 897 */ 898 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0, 899 "vsw_physname", &dev) == DDI_PROP_SUCCESS) { 900 if ((strlen(dev) + 1) > LIFNAMSIZ) { 901 cmn_err(CE_WARN, "vsw%d: %s is too long a device name", 902 vswp->instance, dev); 903 ddi_prop_free(dev); 904 return (1); 905 } else { 906 cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from " 907 "config file", vswp->instance, dev); 908 909 (void) strncpy(name, dev, strlen(dev) + 1); 910 } 911 912 ddi_prop_free(dev); 913 } 914 #endif 915 916 return (0); 917 } 918 919 /* 920 * Read the 'vsw-switch-mode' property from the specified MD node. 921 * 922 * Returns 0 on success and the number of modes found in 'found', 923 * otherwise returns 1. 924 */ 925 static int 926 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node, 927 uint8_t *modes, int *found) 928 { 929 int len = 0; 930 int smode_num = 0; 931 char *smode = NULL; 932 char *curr_mode = NULL; 933 934 D1(vswp, "%s: enter", __func__); 935 936 /* 937 * Get the switch-mode property. The modes are listed in 938 * decreasing order of preference, i.e. prefered mode is 939 * first item in list. 940 */ 941 len = 0; 942 smode_num = 0; 943 if (md_get_prop_data(mdp, node, smode_propname, 944 (uint8_t **)(&smode), &len) != 0) { 945 /* 946 * Unable to get switch-mode property from MD, nothing 947 * more we can do. 948 */ 949 cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property" 950 " from the MD", vswp->instance); 951 *found = 0; 952 return (1); 953 } 954 955 curr_mode = smode; 956 /* 957 * Modes of operation: 958 * 'switched' - layer 2 switching, underlying HW in 959 * programmed mode. 960 * 'promiscuous' - layer 2 switching, underlying HW in 961 * promiscuous mode. 962 * 'routed' - layer 3 (i.e. IP) routing, underlying HW 963 * in non-promiscuous mode. 964 */ 965 while ((curr_mode < (smode + len)) && (smode_num < NUM_SMODES)) { 966 D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode); 967 if (strcmp(curr_mode, "switched") == 0) { 968 modes[smode_num++] = VSW_LAYER2; 969 } else if (strcmp(curr_mode, "promiscuous") == 0) { 970 modes[smode_num++] = VSW_LAYER2_PROMISC; 971 } else if (strcmp(curr_mode, "routed") == 0) { 972 modes[smode_num++] = VSW_LAYER3; 973 } else { 974 cmn_err(CE_WARN, "!vsw%d: Unknown switch mode %s, " 975 "setting to default switched mode", 976 vswp->instance, curr_mode); 977 modes[smode_num++] = VSW_LAYER2; 978 } 979 curr_mode += strlen(curr_mode) + 1; 980 } 981 *found = smode_num; 982 983 D2(vswp, "%s: %d modes found", __func__, smode_num); 984 985 D1(vswp, "%s: exit", __func__); 986 987 return (0); 988 } 989 990 /* 991 * Get the mac address of the physical device. 992 * 993 * Returns 0 on success, 1 on failure. 994 */ 995 static int 996 vsw_get_physaddr(vsw_t *vswp) 997 { 998 mac_handle_t mh; 999 char drv[LIFNAMSIZ]; 1000 uint_t ddi_instance; 1001 1002 D1(vswp, "%s: enter", __func__); 1003 1004 if (ddi_parse(vswp->physname, drv, &ddi_instance) != DDI_SUCCESS) 1005 return (1); 1006 1007 if (mac_open(vswp->physname, ddi_instance, &mh) != 0) { 1008 cmn_err(CE_WARN, "!vsw%d: mac_open %s failed", 1009 vswp->instance, vswp->physname); 1010 return (1); 1011 } 1012 1013 READ_ENTER(&vswp->if_lockrw); 1014 mac_unicst_get(mh, vswp->if_addr.ether_addr_octet); 1015 RW_EXIT(&vswp->if_lockrw); 1016 1017 mac_close(mh); 1018 1019 vswp->mdprops |= VSW_DEV_MACADDR; 1020 1021 D1(vswp, "%s: exit", __func__); 1022 1023 return (0); 1024 } 1025 1026 /* 1027 * Check to see if the card supports the setting of multiple unicst 1028 * addresses. 1029 * 1030 * Returns 0 if card supports the programming of multiple unicast addresses 1031 * and there are free address slots available, otherwise returns 1. 1032 */ 1033 static int 1034 vsw_get_hw_maddr(vsw_t *vswp) 1035 { 1036 D1(vswp, "%s: enter", __func__); 1037 1038 mutex_enter(&vswp->mac_lock); 1039 if (vswp->mh == NULL) { 1040 mutex_exit(&vswp->mac_lock); 1041 return (1); 1042 } 1043 1044 if (!mac_capab_get(vswp->mh, MAC_CAPAB_MULTIADDRESS, &vswp->maddr)) { 1045 DWARN(vswp, "Unable to get capabilities of" 1046 " underlying device (%s)", vswp->physname); 1047 mutex_exit(&vswp->mac_lock); 1048 return (1); 1049 } 1050 mutex_exit(&vswp->mac_lock); 1051 1052 if (vswp->maddr.maddr_naddrfree == 0) { 1053 cmn_err(CE_WARN, 1054 "!vsw%d: device %s has no free unicast address slots", 1055 vswp->instance, vswp->physname); 1056 return (1); 1057 } 1058 1059 D2(vswp, "%s: %d addrs : %d free", __func__, 1060 vswp->maddr.maddr_naddr, vswp->maddr.maddr_naddrfree); 1061 1062 D1(vswp, "%s: exit", __func__); 1063 1064 return (0); 1065 } 1066 1067 /* 1068 * Setup the required switching mode. 1069 * 1070 * Returns 0 on success, 1 on failure. 1071 */ 1072 static int 1073 vsw_setup_switching(vsw_t *vswp) 1074 { 1075 int i, rv = 1; 1076 1077 D1(vswp, "%s: enter", __func__); 1078 1079 /* select best switching mode */ 1080 for (i = 0; i < vswp->smode_num; i++) { 1081 vswp->smode_idx = i; 1082 switch (vswp->smode[i]) { 1083 case VSW_LAYER2: 1084 case VSW_LAYER2_PROMISC: 1085 rv = vsw_setup_layer2(vswp); 1086 break; 1087 1088 case VSW_LAYER3: 1089 rv = vsw_setup_layer3(vswp); 1090 break; 1091 1092 default: 1093 DERR(vswp, "unknown switch mode"); 1094 rv = 1; 1095 break; 1096 } 1097 1098 if (rv == 0) 1099 break; 1100 } 1101 1102 if (rv == 1) { 1103 cmn_err(CE_WARN, "!vsw%d: Unable to setup specified " 1104 "switching mode", vswp->instance); 1105 return (rv); 1106 } 1107 1108 D2(vswp, "%s: Operating in mode %d", __func__, 1109 vswp->smode[vswp->smode_idx]); 1110 1111 D1(vswp, "%s: exit", __func__); 1112 1113 return (0); 1114 } 1115 1116 /* 1117 * Setup for layer 2 switching. 1118 * 1119 * Returns 0 on success, 1 on failure. 1120 */ 1121 static int 1122 vsw_setup_layer2(vsw_t *vswp) 1123 { 1124 D1(vswp, "%s: enter", __func__); 1125 1126 vswp->vsw_switch_frame = vsw_switch_l2_frame; 1127 1128 /* 1129 * Attempt to link into the MAC layer so we can get 1130 * and send packets out over the physical adapter. 1131 */ 1132 if (vswp->mdprops & VSW_MD_PHYSNAME) { 1133 if (vsw_mac_attach(vswp) != 0) { 1134 /* 1135 * Registration with the MAC layer has failed, 1136 * so return 1 so that can fall back to next 1137 * prefered switching method. 1138 */ 1139 cmn_err(CE_WARN, "!vsw%d: Unable to join as MAC layer " 1140 "client", vswp->instance); 1141 return (1); 1142 } 1143 1144 if (vswp->smode[vswp->smode_idx] == VSW_LAYER2) { 1145 /* 1146 * Verify that underlying device can support multiple 1147 * unicast mac addresses, and has free capacity. 1148 */ 1149 if (vsw_get_hw_maddr(vswp) != 0) { 1150 cmn_err(CE_WARN, "!vsw%d: Unable to setup " 1151 "switching", vswp->instance); 1152 vsw_mac_detach(vswp); 1153 return (1); 1154 } 1155 } 1156 1157 } else { 1158 /* 1159 * No physical device name found in MD which is 1160 * required for layer 2. 1161 */ 1162 cmn_err(CE_WARN, "!vsw%d: no physical device name specified", 1163 vswp->instance); 1164 return (1); 1165 } 1166 1167 D1(vswp, "%s: exit", __func__); 1168 1169 return (0); 1170 } 1171 1172 static int 1173 vsw_setup_layer3(vsw_t *vswp) 1174 { 1175 D1(vswp, "%s: enter", __func__); 1176 1177 D2(vswp, "%s: operating in layer 3 mode", __func__); 1178 vswp->vsw_switch_frame = vsw_switch_l3_frame; 1179 1180 D1(vswp, "%s: exit", __func__); 1181 1182 return (0); 1183 } 1184 1185 /* 1186 * Link into the MAC layer to gain access to the services provided by 1187 * the underlying physical device driver (which should also have 1188 * registered with the MAC layer). 1189 * 1190 * Only when in layer 2 mode. 1191 */ 1192 static int 1193 vsw_mac_attach(vsw_t *vswp) 1194 { 1195 char drv[LIFNAMSIZ]; 1196 uint_t ddi_instance; 1197 1198 D1(vswp, "%s: enter", __func__); 1199 1200 ASSERT(vswp->mh == NULL); 1201 ASSERT(vswp->mrh == NULL); 1202 ASSERT(vswp->mstarted == B_FALSE); 1203 ASSERT(vswp->mresources == B_FALSE); 1204 1205 ASSERT(vswp->mdprops & VSW_MD_PHYSNAME); 1206 1207 mutex_enter(&vswp->mac_lock); 1208 if (ddi_parse(vswp->physname, drv, &ddi_instance) != DDI_SUCCESS) { 1209 cmn_err(CE_WARN, "!vsw%d: invalid device name: %s", 1210 vswp->instance, vswp->physname); 1211 goto mac_fail_exit; 1212 } 1213 1214 if ((mac_open(vswp->physname, ddi_instance, &vswp->mh)) != 0) { 1215 cmn_err(CE_WARN, "!vsw%d: mac_open %s failed", 1216 vswp->instance, vswp->physname); 1217 goto mac_fail_exit; 1218 } 1219 1220 ASSERT(vswp->mh != NULL); 1221 1222 D2(vswp, "vsw_mac_attach: using device %s", vswp->physname); 1223 1224 if (vsw_multi_ring_enable) { 1225 /* 1226 * Initialize the ring table. 1227 */ 1228 vsw_mac_ring_tbl_init(vswp); 1229 1230 /* 1231 * Register our rx callback function. 1232 */ 1233 vswp->mrh = mac_rx_add(vswp->mh, 1234 vsw_rx_queue_cb, (void *)vswp); 1235 ASSERT(vswp->mrh != NULL); 1236 1237 /* 1238 * Register our mac resource callback. 1239 */ 1240 mac_resource_set(vswp->mh, vsw_mac_ring_add_cb, (void *)vswp); 1241 vswp->mresources = B_TRUE; 1242 1243 /* 1244 * Get the ring resources available to us from 1245 * the mac below us. 1246 */ 1247 mac_resources(vswp->mh); 1248 } else { 1249 /* 1250 * Just register our rx callback function 1251 */ 1252 vswp->mrh = mac_rx_add(vswp->mh, vsw_rx_cb, (void *)vswp); 1253 ASSERT(vswp->mrh != NULL); 1254 } 1255 1256 /* Get the MAC tx fn */ 1257 vswp->txinfo = mac_tx_get(vswp->mh); 1258 1259 /* start the interface */ 1260 if (mac_start(vswp->mh) != 0) { 1261 cmn_err(CE_WARN, "!vsw%d: Could not start mac interface", 1262 vswp->instance); 1263 goto mac_fail_exit; 1264 } 1265 1266 mutex_exit(&vswp->mac_lock); 1267 1268 vswp->mstarted = B_TRUE; 1269 1270 D1(vswp, "%s: exit", __func__); 1271 return (0); 1272 1273 mac_fail_exit: 1274 mutex_exit(&vswp->mac_lock); 1275 vsw_mac_detach(vswp); 1276 1277 D1(vswp, "%s: exit", __func__); 1278 return (1); 1279 } 1280 1281 static void 1282 vsw_mac_detach(vsw_t *vswp) 1283 { 1284 D1(vswp, "vsw_mac_detach: enter"); 1285 1286 ASSERT(vswp != NULL); 1287 1288 if (vsw_multi_ring_enable) { 1289 vsw_mac_ring_tbl_destroy(vswp); 1290 } 1291 1292 mutex_enter(&vswp->mac_lock); 1293 1294 if (vswp->mh != NULL) { 1295 if (vswp->mstarted) 1296 mac_stop(vswp->mh); 1297 if (vswp->mrh != NULL) 1298 mac_rx_remove(vswp->mh, vswp->mrh); 1299 if (vswp->mresources) 1300 mac_resource_set(vswp->mh, NULL, NULL); 1301 mac_close(vswp->mh); 1302 } 1303 1304 vswp->mrh = NULL; 1305 vswp->mh = NULL; 1306 vswp->txinfo = NULL; 1307 vswp->mstarted = B_FALSE; 1308 1309 mutex_exit(&vswp->mac_lock); 1310 1311 D1(vswp, "vsw_mac_detach: exit"); 1312 } 1313 1314 /* 1315 * Depending on the mode specified, the capabilites and capacity 1316 * of the underlying device setup the physical device. 1317 * 1318 * If in layer 3 mode, then do nothing. 1319 * 1320 * If in layer 2 programmed mode attempt to program the unicast address 1321 * associated with the port into the physical device. If this is not 1322 * possible due to resource exhaustion or simply because the device does 1323 * not support multiple unicast addresses then if required fallback onto 1324 * putting the card into promisc mode. 1325 * 1326 * If in promisc mode then simply set the card into promisc mode. 1327 * 1328 * Returns 0 success, 1 on failure. 1329 */ 1330 static int 1331 vsw_set_hw(vsw_t *vswp, vsw_port_t *port) 1332 { 1333 mac_multi_addr_t mac_addr; 1334 void *mah; 1335 int err; 1336 1337 D1(vswp, "%s: enter", __func__); 1338 1339 if (vswp->smode[vswp->smode_idx] == VSW_LAYER3) 1340 return (0); 1341 1342 if (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC) { 1343 return (vsw_set_hw_promisc(vswp, port)); 1344 } 1345 1346 if (vswp->maddr.maddr_handle == NULL) 1347 return (1); 1348 1349 mah = vswp->maddr.maddr_handle; 1350 1351 /* 1352 * Attempt to program the unicast address into the HW. 1353 */ 1354 mac_addr.mma_addrlen = ETHERADDRL; 1355 ether_copy(&port->p_macaddr, &mac_addr.mma_addr); 1356 1357 err = vswp->maddr.maddr_add(mah, &mac_addr); 1358 if (err != 0) { 1359 cmn_err(CE_WARN, "!vsw%d: failed to program addr " 1360 "%x:%x:%x:%x:%x:%x for port %d into device %s " 1361 ": err %d", vswp->instance, 1362 port->p_macaddr.ether_addr_octet[0], 1363 port->p_macaddr.ether_addr_octet[1], 1364 port->p_macaddr.ether_addr_octet[2], 1365 port->p_macaddr.ether_addr_octet[3], 1366 port->p_macaddr.ether_addr_octet[4], 1367 port->p_macaddr.ether_addr_octet[5], 1368 port->p_instance, vswp->physname, err); 1369 1370 /* 1371 * Mark that attempt should be made to re-config sometime 1372 * in future if a port is deleted. 1373 */ 1374 vswp->recfg_reqd = B_TRUE; 1375 1376 /* 1377 * Only 1 mode specified, nothing more to do. 1378 */ 1379 if (vswp->smode_num == 1) 1380 return (err); 1381 1382 /* 1383 * If promiscuous was next mode specified try to 1384 * set the card into that mode. 1385 */ 1386 if ((vswp->smode_idx <= (vswp->smode_num - 2)) && 1387 (vswp->smode[vswp->smode_idx + 1] 1388 == VSW_LAYER2_PROMISC)) { 1389 vswp->smode_idx += 1; 1390 return (vsw_set_hw_promisc(vswp, port)); 1391 } 1392 return (err); 1393 } 1394 1395 port->addr_slot = mac_addr.mma_slot; 1396 port->addr_set = VSW_ADDR_HW; 1397 1398 D2(vswp, "programmed addr %x:%x:%x:%x:%x:%x for port %d " 1399 "into slot %d of device %s", 1400 port->p_macaddr.ether_addr_octet[0], 1401 port->p_macaddr.ether_addr_octet[1], 1402 port->p_macaddr.ether_addr_octet[2], 1403 port->p_macaddr.ether_addr_octet[3], 1404 port->p_macaddr.ether_addr_octet[4], 1405 port->p_macaddr.ether_addr_octet[5], 1406 port->p_instance, port->addr_slot, vswp->physname); 1407 1408 D1(vswp, "%s: exit", __func__); 1409 1410 return (0); 1411 } 1412 1413 /* 1414 * If in layer 3 mode do nothing. 1415 * 1416 * If in layer 2 switched mode remove the address from the physical 1417 * device. 1418 * 1419 * If in layer 2 promiscuous mode disable promisc mode. 1420 * 1421 * Returns 0 on success. 1422 */ 1423 static int 1424 vsw_unset_hw(vsw_t *vswp, vsw_port_t *port) 1425 { 1426 int err; 1427 void *mah; 1428 1429 D1(vswp, "%s: enter", __func__); 1430 1431 if (vswp->smode[vswp->smode_idx] == VSW_LAYER3) 1432 return (0); 1433 1434 if (port->addr_set == VSW_ADDR_PROMISC) { 1435 return (vsw_unset_hw_promisc(vswp, port)); 1436 } 1437 1438 if (port->addr_set == VSW_ADDR_HW) { 1439 if (vswp->maddr.maddr_handle == NULL) 1440 return (1); 1441 1442 mah = vswp->maddr.maddr_handle; 1443 1444 err = vswp->maddr.maddr_remove(mah, port->addr_slot); 1445 if (err != 0) { 1446 cmn_err(CE_WARN, "!vsw%d: Unable to remove addr " 1447 "%x:%x:%x:%x:%x:%x for port %d from device %s" 1448 " : (err %d)", vswp->instance, 1449 port->p_macaddr.ether_addr_octet[0], 1450 port->p_macaddr.ether_addr_octet[1], 1451 port->p_macaddr.ether_addr_octet[2], 1452 port->p_macaddr.ether_addr_octet[3], 1453 port->p_macaddr.ether_addr_octet[4], 1454 port->p_macaddr.ether_addr_octet[5], 1455 port->p_instance, vswp->physname, err); 1456 return (err); 1457 } 1458 1459 port->addr_set = VSW_ADDR_UNSET; 1460 1461 D2(vswp, "removed addr %x:%x:%x:%x:%x:%x for " 1462 "port %d from device %s", 1463 port->p_macaddr.ether_addr_octet[0], 1464 port->p_macaddr.ether_addr_octet[1], 1465 port->p_macaddr.ether_addr_octet[2], 1466 port->p_macaddr.ether_addr_octet[3], 1467 port->p_macaddr.ether_addr_octet[4], 1468 port->p_macaddr.ether_addr_octet[5], 1469 port->p_instance, vswp->physname); 1470 } 1471 1472 D1(vswp, "%s: exit", __func__); 1473 return (0); 1474 } 1475 1476 /* 1477 * Set network card into promisc mode. 1478 * 1479 * Returns 0 on success, 1 on failure. 1480 */ 1481 static int 1482 vsw_set_hw_promisc(vsw_t *vswp, vsw_port_t *port) 1483 { 1484 D1(vswp, "%s: enter", __func__); 1485 1486 mutex_enter(&vswp->mac_lock); 1487 if (vswp->mh == NULL) { 1488 mutex_exit(&vswp->mac_lock); 1489 return (1); 1490 } 1491 1492 if (vswp->promisc_cnt++ == 0) { 1493 if (mac_promisc_set(vswp->mh, B_TRUE, MAC_DEVPROMISC) != 0) { 1494 vswp->promisc_cnt--; 1495 mutex_exit(&vswp->mac_lock); 1496 return (1); 1497 } 1498 cmn_err(CE_NOTE, "!vsw%d: switching device %s into " 1499 "promiscuous mode", vswp->instance, vswp->physname); 1500 } 1501 mutex_exit(&vswp->mac_lock); 1502 port->addr_set = VSW_ADDR_PROMISC; 1503 1504 D1(vswp, "%s: exit", __func__); 1505 1506 return (0); 1507 } 1508 1509 /* 1510 * Turn off promiscuous mode on network card. 1511 * 1512 * Returns 0 on success, 1 on failure. 1513 */ 1514 static int 1515 vsw_unset_hw_promisc(vsw_t *vswp, vsw_port_t *port) 1516 { 1517 vsw_port_list_t *plist = &vswp->plist; 1518 1519 D2(vswp, "%s: enter", __func__); 1520 1521 mutex_enter(&vswp->mac_lock); 1522 if (vswp->mh == NULL) { 1523 mutex_exit(&vswp->mac_lock); 1524 return (1); 1525 } 1526 1527 ASSERT(port->addr_set == VSW_ADDR_PROMISC); 1528 1529 if (--vswp->promisc_cnt == 0) { 1530 if (mac_promisc_set(vswp->mh, B_FALSE, MAC_DEVPROMISC) != 0) { 1531 vswp->promisc_cnt++; 1532 mutex_exit(&vswp->mac_lock); 1533 return (1); 1534 } 1535 1536 /* 1537 * We are exiting promisc mode either because we were 1538 * only in promisc mode because we had failed over from 1539 * switched mode due to HW resource issues, or the user 1540 * wanted the card in promisc mode for all the ports and 1541 * the last port is now being deleted. Tweak the message 1542 * accordingly. 1543 */ 1544 if (plist->num_ports != 0) { 1545 cmn_err(CE_NOTE, "!vsw%d: switching device %s back to " 1546 "programmed mode", vswp->instance, 1547 vswp->physname); 1548 } else { 1549 cmn_err(CE_NOTE, "!vsw%d: switching device %s out of " 1550 "promiscuous mode", vswp->instance, 1551 vswp->physname); 1552 } 1553 } 1554 mutex_exit(&vswp->mac_lock); 1555 port->addr_set = VSW_ADDR_UNSET; 1556 1557 D1(vswp, "%s: exit", __func__); 1558 return (0); 1559 } 1560 1561 /* 1562 * Determine whether or not we are operating in our prefered 1563 * mode and if not whether the physical resources now allow us 1564 * to operate in it. 1565 * 1566 * Should only be invoked after port which is being deleted has been 1567 * removed from the port list. 1568 */ 1569 static int 1570 vsw_reconfig_hw(vsw_t *vswp) 1571 { 1572 vsw_port_list_t *plist = &vswp->plist; 1573 mac_multi_addr_t mac_addr; 1574 vsw_port_t *tp; 1575 void *mah; 1576 int rv = 0; 1577 int s_idx; 1578 1579 D1(vswp, "%s: enter", __func__); 1580 1581 if (vswp->maddr.maddr_handle == NULL) 1582 return (1); 1583 1584 /* 1585 * Check if there are now sufficient HW resources to 1586 * attempt a re-config. 1587 */ 1588 if (plist->num_ports > vswp->maddr.maddr_naddrfree) 1589 return (1); 1590 1591 /* 1592 * If we are in layer 2 (i.e. switched) or would like to be 1593 * in layer 2 then check if any ports need to be programmed 1594 * into the HW. 1595 * 1596 * This can happen in two cases - switched was specified as 1597 * the prefered mode of operation but we exhausted the HW 1598 * resources and so failed over to the next specifed mode, 1599 * or switched was the only mode specified so after HW 1600 * resources were exhausted there was nothing more we 1601 * could do. 1602 */ 1603 if (vswp->smode_idx > 0) 1604 s_idx = vswp->smode_idx - 1; 1605 else 1606 s_idx = vswp->smode_idx; 1607 1608 if (vswp->smode[s_idx] == VSW_LAYER2) { 1609 mah = vswp->maddr.maddr_handle; 1610 1611 D2(vswp, "%s: attempting reconfig..", __func__); 1612 1613 /* 1614 * Scan the port list for any port whose address has not 1615 * be programmed in HW - there should be a max of one. 1616 */ 1617 for (tp = plist->head; tp != NULL; tp = tp->p_next) { 1618 if (tp->addr_set != VSW_ADDR_HW) { 1619 mac_addr.mma_addrlen = ETHERADDRL; 1620 ether_copy(&tp->p_macaddr, &mac_addr.mma_addr); 1621 1622 rv = vswp->maddr.maddr_add(mah, &mac_addr); 1623 if (rv != 0) { 1624 DWARN(vswp, "Error setting addr in " 1625 "HW for port %d err %d", 1626 tp->p_instance, rv); 1627 goto reconfig_err_exit; 1628 } 1629 tp->addr_slot = mac_addr.mma_slot; 1630 1631 D2(vswp, "re-programmed port %d " 1632 "addr %x:%x:%x:%x:%x:%x into slot %d" 1633 " of device %s", tp->p_instance, 1634 tp->p_macaddr.ether_addr_octet[0], 1635 tp->p_macaddr.ether_addr_octet[1], 1636 tp->p_macaddr.ether_addr_octet[2], 1637 tp->p_macaddr.ether_addr_octet[3], 1638 tp->p_macaddr.ether_addr_octet[4], 1639 tp->p_macaddr.ether_addr_octet[5], 1640 tp->addr_slot, vswp->physname); 1641 1642 /* 1643 * If up to now we had to put the card into 1644 * promisc mode to see this address, we 1645 * can now safely disable promisc mode. 1646 */ 1647 if (tp->addr_set == VSW_ADDR_PROMISC) 1648 (void) vsw_unset_hw_promisc(vswp, tp); 1649 1650 tp->addr_set = VSW_ADDR_HW; 1651 } 1652 } 1653 1654 /* no further re-config needed */ 1655 vswp->recfg_reqd = B_FALSE; 1656 1657 vswp->smode_idx = s_idx; 1658 1659 return (0); 1660 } 1661 1662 reconfig_err_exit: 1663 return (rv); 1664 } 1665 1666 static void 1667 vsw_mac_ring_tbl_entry_init(vsw_t *vswp, vsw_mac_ring_t *ringp) 1668 { 1669 ringp->ring_state = VSW_MAC_RING_FREE; 1670 ringp->ring_arg = NULL; 1671 ringp->ring_blank = NULL; 1672 ringp->ring_vqp = NULL; 1673 ringp->ring_vswp = vswp; 1674 } 1675 1676 static void 1677 vsw_mac_ring_tbl_init(vsw_t *vswp) 1678 { 1679 int i; 1680 1681 mutex_init(&vswp->mac_ring_lock, NULL, MUTEX_DRIVER, NULL); 1682 1683 vswp->mac_ring_tbl_sz = vsw_mac_rx_rings; 1684 vswp->mac_ring_tbl = 1685 kmem_alloc(vsw_mac_rx_rings * sizeof (vsw_mac_ring_t), 1686 KM_SLEEP); 1687 1688 for (i = 0; i < vswp->mac_ring_tbl_sz; i++) 1689 vsw_mac_ring_tbl_entry_init(vswp, &vswp->mac_ring_tbl[i]); 1690 } 1691 1692 static void 1693 vsw_mac_ring_tbl_destroy(vsw_t *vswp) 1694 { 1695 int i; 1696 vsw_mac_ring_t *ringp; 1697 1698 mutex_enter(&vswp->mac_ring_lock); 1699 for (i = 0; i < vswp->mac_ring_tbl_sz; i++) { 1700 ringp = &vswp->mac_ring_tbl[i]; 1701 1702 if (ringp->ring_state != VSW_MAC_RING_FREE) { 1703 /* 1704 * Destroy the queue. 1705 */ 1706 vsw_queue_stop(ringp->ring_vqp); 1707 vsw_queue_destroy(ringp->ring_vqp); 1708 1709 /* 1710 * Re-initialize the structure. 1711 */ 1712 vsw_mac_ring_tbl_entry_init(vswp, ringp); 1713 } 1714 } 1715 mutex_exit(&vswp->mac_ring_lock); 1716 1717 mutex_destroy(&vswp->mac_ring_lock); 1718 kmem_free(vswp->mac_ring_tbl, 1719 vswp->mac_ring_tbl_sz * sizeof (vsw_mac_ring_t)); 1720 vswp->mac_ring_tbl_sz = 0; 1721 } 1722 1723 /* 1724 * Handle resource add callbacks from the driver below. 1725 */ 1726 static mac_resource_handle_t 1727 vsw_mac_ring_add_cb(void *arg, mac_resource_t *mrp) 1728 { 1729 vsw_t *vswp = (vsw_t *)arg; 1730 mac_rx_fifo_t *mrfp = (mac_rx_fifo_t *)mrp; 1731 vsw_mac_ring_t *ringp; 1732 vsw_queue_t *vqp; 1733 int i; 1734 1735 ASSERT(vswp != NULL); 1736 ASSERT(mrp != NULL); 1737 ASSERT(vswp->mac_ring_tbl != NULL); 1738 1739 D1(vswp, "%s: enter", __func__); 1740 1741 /* 1742 * Check to make sure we have the correct resource type. 1743 */ 1744 if (mrp->mr_type != MAC_RX_FIFO) 1745 return (NULL); 1746 1747 /* 1748 * Find a open entry in the ring table. 1749 */ 1750 mutex_enter(&vswp->mac_ring_lock); 1751 for (i = 0; i < vswp->mac_ring_tbl_sz; i++) { 1752 ringp = &vswp->mac_ring_tbl[i]; 1753 1754 /* 1755 * Check for an empty slot, if found, then setup queue 1756 * and thread. 1757 */ 1758 if (ringp->ring_state == VSW_MAC_RING_FREE) { 1759 /* 1760 * Create the queue for this ring. 1761 */ 1762 vqp = vsw_queue_create(); 1763 1764 /* 1765 * Initialize the ring data structure. 1766 */ 1767 ringp->ring_vqp = vqp; 1768 ringp->ring_arg = mrfp->mrf_arg; 1769 ringp->ring_blank = mrfp->mrf_blank; 1770 ringp->ring_state = VSW_MAC_RING_INUSE; 1771 1772 /* 1773 * Create the worker thread. 1774 */ 1775 vqp->vq_worker = thread_create(NULL, 0, 1776 vsw_queue_worker, ringp, 0, &p0, 1777 TS_RUN, minclsyspri); 1778 if (vqp->vq_worker == NULL) { 1779 vsw_queue_destroy(vqp); 1780 vsw_mac_ring_tbl_entry_init(vswp, ringp); 1781 ringp = NULL; 1782 } 1783 1784 if (ringp != NULL) { 1785 /* 1786 * Make sure thread get's running state for 1787 * this ring. 1788 */ 1789 mutex_enter(&vqp->vq_lock); 1790 while ((vqp->vq_state != VSW_QUEUE_RUNNING) && 1791 (vqp->vq_state != VSW_QUEUE_DRAINED)) { 1792 cv_wait(&vqp->vq_cv, &vqp->vq_lock); 1793 } 1794 1795 /* 1796 * If the thread is not running, cleanup. 1797 */ 1798 if (vqp->vq_state == VSW_QUEUE_DRAINED) { 1799 vsw_queue_destroy(vqp); 1800 vsw_mac_ring_tbl_entry_init(vswp, 1801 ringp); 1802 ringp = NULL; 1803 } 1804 mutex_exit(&vqp->vq_lock); 1805 } 1806 1807 mutex_exit(&vswp->mac_ring_lock); 1808 D1(vswp, "%s: exit", __func__); 1809 return ((mac_resource_handle_t)ringp); 1810 } 1811 } 1812 mutex_exit(&vswp->mac_ring_lock); 1813 1814 /* 1815 * No slots in the ring table available. 1816 */ 1817 D1(vswp, "%s: exit", __func__); 1818 return (NULL); 1819 } 1820 1821 static void 1822 vsw_queue_stop(vsw_queue_t *vqp) 1823 { 1824 mutex_enter(&vqp->vq_lock); 1825 1826 if (vqp->vq_state == VSW_QUEUE_RUNNING) { 1827 vqp->vq_state = VSW_QUEUE_STOP; 1828 cv_signal(&vqp->vq_cv); 1829 1830 while (vqp->vq_state != VSW_QUEUE_DRAINED) 1831 cv_wait(&vqp->vq_cv, &vqp->vq_lock); 1832 } 1833 1834 vqp->vq_state = VSW_QUEUE_STOPPED; 1835 1836 mutex_exit(&vqp->vq_lock); 1837 } 1838 1839 static vsw_queue_t * 1840 vsw_queue_create() 1841 { 1842 vsw_queue_t *vqp; 1843 1844 vqp = kmem_zalloc(sizeof (vsw_queue_t), KM_SLEEP); 1845 1846 mutex_init(&vqp->vq_lock, NULL, MUTEX_DRIVER, NULL); 1847 cv_init(&vqp->vq_cv, NULL, CV_DRIVER, NULL); 1848 vqp->vq_first = NULL; 1849 vqp->vq_last = NULL; 1850 vqp->vq_state = VSW_QUEUE_STOPPED; 1851 1852 return (vqp); 1853 } 1854 1855 static void 1856 vsw_queue_destroy(vsw_queue_t *vqp) 1857 { 1858 cv_destroy(&vqp->vq_cv); 1859 mutex_destroy(&vqp->vq_lock); 1860 kmem_free(vqp, sizeof (vsw_queue_t)); 1861 } 1862 1863 static void 1864 vsw_queue_worker(vsw_mac_ring_t *rrp) 1865 { 1866 mblk_t *mp; 1867 vsw_queue_t *vqp = rrp->ring_vqp; 1868 vsw_t *vswp = rrp->ring_vswp; 1869 1870 mutex_enter(&vqp->vq_lock); 1871 1872 ASSERT(vqp->vq_state == VSW_QUEUE_STOPPED); 1873 1874 /* 1875 * Set the state to running, since the thread is now active. 1876 */ 1877 vqp->vq_state = VSW_QUEUE_RUNNING; 1878 cv_signal(&vqp->vq_cv); 1879 1880 while (vqp->vq_state == VSW_QUEUE_RUNNING) { 1881 /* 1882 * Wait for work to do or the state has changed 1883 * to not running. 1884 */ 1885 while ((vqp->vq_state == VSW_QUEUE_RUNNING) && 1886 (vqp->vq_first == NULL)) { 1887 cv_wait(&vqp->vq_cv, &vqp->vq_lock); 1888 } 1889 1890 /* 1891 * Process packets that we received from the interface. 1892 */ 1893 if (vqp->vq_first != NULL) { 1894 mp = vqp->vq_first; 1895 1896 vqp->vq_first = NULL; 1897 vqp->vq_last = NULL; 1898 1899 mutex_exit(&vqp->vq_lock); 1900 1901 /* switch the chain of packets received */ 1902 vswp->vsw_switch_frame(vswp, mp, 1903 VSW_PHYSDEV, NULL, NULL); 1904 1905 mutex_enter(&vqp->vq_lock); 1906 } 1907 } 1908 1909 /* 1910 * We are drained and signal we are done. 1911 */ 1912 vqp->vq_state = VSW_QUEUE_DRAINED; 1913 cv_signal(&vqp->vq_cv); 1914 1915 /* 1916 * Exit lock and drain the remaining packets. 1917 */ 1918 mutex_exit(&vqp->vq_lock); 1919 1920 /* 1921 * Exit the thread 1922 */ 1923 thread_exit(); 1924 } 1925 1926 /* 1927 * static void 1928 * vsw_rx_queue_cb() - Receive callback routine when 1929 * vsw_multi_ring_enable is non-zero. Queue the packets 1930 * to a packet queue for a worker thread to process. 1931 */ 1932 static void 1933 vsw_rx_queue_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp) 1934 { 1935 vsw_mac_ring_t *ringp = (vsw_mac_ring_t *)mrh; 1936 vsw_t *vswp = (vsw_t *)arg; 1937 vsw_queue_t *vqp; 1938 mblk_t *bp, *last; 1939 1940 ASSERT(mrh != NULL); 1941 ASSERT(vswp != NULL); 1942 ASSERT(mp != NULL); 1943 1944 D1(vswp, "%s: enter", __func__); 1945 1946 /* 1947 * Find the last element in the mblk chain. 1948 */ 1949 bp = mp; 1950 do { 1951 last = bp; 1952 bp = bp->b_next; 1953 } while (bp != NULL); 1954 1955 /* Get the queue for the packets */ 1956 vqp = ringp->ring_vqp; 1957 1958 /* 1959 * Grab the lock such we can queue the packets. 1960 */ 1961 mutex_enter(&vqp->vq_lock); 1962 1963 if (vqp->vq_state != VSW_QUEUE_RUNNING) { 1964 freemsg(mp); 1965 mutex_exit(&vqp->vq_lock); 1966 goto vsw_rx_queue_cb_exit; 1967 } 1968 1969 /* 1970 * Add the mblk chain to the queue. If there 1971 * is some mblks in the queue, then add the new 1972 * chain to the end. 1973 */ 1974 if (vqp->vq_first == NULL) 1975 vqp->vq_first = mp; 1976 else 1977 vqp->vq_last->b_next = mp; 1978 1979 vqp->vq_last = last; 1980 1981 /* 1982 * Signal the worker thread that there is work to 1983 * do. 1984 */ 1985 cv_signal(&vqp->vq_cv); 1986 1987 /* 1988 * Let go of the lock and exit. 1989 */ 1990 mutex_exit(&vqp->vq_lock); 1991 1992 vsw_rx_queue_cb_exit: 1993 D1(vswp, "%s: exit", __func__); 1994 } 1995 1996 /* 1997 * receive callback routine. Invoked by MAC layer when there 1998 * are pkts being passed up from physical device. 1999 * 2000 * PERF: It may be more efficient when the card is in promisc 2001 * mode to check the dest address of the pkts here (against 2002 * the FDB) rather than checking later. Needs to be investigated. 2003 */ 2004 static void 2005 vsw_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp) 2006 { 2007 _NOTE(ARGUNUSED(mrh)) 2008 2009 vsw_t *vswp = (vsw_t *)arg; 2010 2011 ASSERT(vswp != NULL); 2012 2013 D1(vswp, "vsw_rx_cb: enter"); 2014 2015 /* switch the chain of packets received */ 2016 vswp->vsw_switch_frame(vswp, mp, VSW_PHYSDEV, NULL, NULL); 2017 2018 D1(vswp, "vsw_rx_cb: exit"); 2019 } 2020 2021 /* 2022 * Send a message out over the physical device via the MAC layer. 2023 * 2024 * Returns any mblks that it was unable to transmit. 2025 */ 2026 static mblk_t * 2027 vsw_tx_msg(vsw_t *vswp, mblk_t *mp) 2028 { 2029 const mac_txinfo_t *mtp; 2030 mblk_t *nextp; 2031 2032 mutex_enter(&vswp->mac_lock); 2033 if (vswp->mh == NULL) { 2034 DERR(vswp, "vsw_tx_msg: dropping pkts: no tx routine avail"); 2035 mutex_exit(&vswp->mac_lock); 2036 return (mp); 2037 } else { 2038 for (;;) { 2039 nextp = mp->b_next; 2040 mp->b_next = NULL; 2041 2042 mtp = vswp->txinfo; 2043 2044 if ((mp = mtp->mt_fn(mtp->mt_arg, mp)) != NULL) { 2045 mp->b_next = nextp; 2046 break; 2047 } 2048 2049 if ((mp = nextp) == NULL) 2050 break; 2051 } 2052 } 2053 mutex_exit(&vswp->mac_lock); 2054 2055 return (mp); 2056 } 2057 2058 /* 2059 * Register with the MAC layer as a network device, so we 2060 * can be plumbed if necessary. 2061 */ 2062 static int 2063 vsw_mac_register(vsw_t *vswp) 2064 { 2065 mac_register_t *macp; 2066 int rv; 2067 2068 D1(vswp, "%s: enter", __func__); 2069 2070 if ((macp = mac_alloc(MAC_VERSION)) == NULL) 2071 return (EINVAL); 2072 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 2073 macp->m_driver = vswp; 2074 macp->m_dip = vswp->dip; 2075 macp->m_src_addr = (uint8_t *)&vswp->if_addr; 2076 macp->m_callbacks = &vsw_m_callbacks; 2077 macp->m_min_sdu = 0; 2078 macp->m_max_sdu = ETHERMTU; 2079 rv = mac_register(macp, &vswp->if_mh); 2080 mac_free(macp); 2081 if (rv == 0) 2082 vswp->if_state |= VSW_IF_REG; 2083 2084 D1(vswp, "%s: exit", __func__); 2085 2086 return (rv); 2087 } 2088 2089 static int 2090 vsw_mac_unregister(vsw_t *vswp) 2091 { 2092 int rv = 0; 2093 2094 D1(vswp, "%s: enter", __func__); 2095 2096 WRITE_ENTER(&vswp->if_lockrw); 2097 2098 if (vswp->if_state & VSW_IF_REG) { 2099 rv = mac_unregister(vswp->if_mh); 2100 if (rv != 0) { 2101 DWARN(vswp, "%s: unable to unregister from MAC " 2102 "framework", __func__); 2103 2104 RW_EXIT(&vswp->if_lockrw); 2105 D1(vswp, "%s: fail exit", __func__); 2106 return (rv); 2107 } 2108 2109 /* mark i/f as down and unregistered */ 2110 vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG); 2111 } 2112 RW_EXIT(&vswp->if_lockrw); 2113 2114 D1(vswp, "%s: exit", __func__); 2115 2116 return (rv); 2117 } 2118 2119 static int 2120 vsw_m_stat(void *arg, uint_t stat, uint64_t *val) 2121 { 2122 vsw_t *vswp = (vsw_t *)arg; 2123 2124 D1(vswp, "%s: enter", __func__); 2125 2126 mutex_enter(&vswp->mac_lock); 2127 if (vswp->mh == NULL) { 2128 mutex_exit(&vswp->mac_lock); 2129 return (EINVAL); 2130 } 2131 2132 /* return stats from underlying device */ 2133 *val = mac_stat_get(vswp->mh, stat); 2134 2135 mutex_exit(&vswp->mac_lock); 2136 2137 return (0); 2138 } 2139 2140 static void 2141 vsw_m_stop(void *arg) 2142 { 2143 vsw_t *vswp = (vsw_t *)arg; 2144 2145 D1(vswp, "%s: enter", __func__); 2146 2147 WRITE_ENTER(&vswp->if_lockrw); 2148 vswp->if_state &= ~VSW_IF_UP; 2149 RW_EXIT(&vswp->if_lockrw); 2150 2151 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 2152 } 2153 2154 static int 2155 vsw_m_start(void *arg) 2156 { 2157 vsw_t *vswp = (vsw_t *)arg; 2158 2159 D1(vswp, "%s: enter", __func__); 2160 2161 WRITE_ENTER(&vswp->if_lockrw); 2162 vswp->if_state |= VSW_IF_UP; 2163 RW_EXIT(&vswp->if_lockrw); 2164 2165 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 2166 return (0); 2167 } 2168 2169 /* 2170 * Change the local interface address. 2171 */ 2172 static int 2173 vsw_m_unicst(void *arg, const uint8_t *macaddr) 2174 { 2175 vsw_t *vswp = (vsw_t *)arg; 2176 2177 D1(vswp, "%s: enter", __func__); 2178 2179 WRITE_ENTER(&vswp->if_lockrw); 2180 ether_copy(macaddr, &vswp->if_addr); 2181 RW_EXIT(&vswp->if_lockrw); 2182 2183 D1(vswp, "%s: exit", __func__); 2184 2185 return (0); 2186 } 2187 2188 static int 2189 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca) 2190 { 2191 vsw_t *vswp = (vsw_t *)arg; 2192 mcst_addr_t *mcst_p = NULL; 2193 uint64_t addr = 0x0; 2194 int i, ret = 0; 2195 2196 D1(vswp, "%s: enter", __func__); 2197 2198 /* 2199 * Convert address into form that can be used 2200 * as hash table key. 2201 */ 2202 for (i = 0; i < ETHERADDRL; i++) { 2203 addr = (addr << 8) | mca[i]; 2204 } 2205 2206 D2(vswp, "%s: addr = 0x%llx", __func__, addr); 2207 2208 if (add) { 2209 D2(vswp, "%s: adding multicast", __func__); 2210 if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 2211 /* 2212 * Update the list of multicast addresses 2213 * contained within the vsw_t structure to 2214 * include this new one. 2215 */ 2216 mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP); 2217 if (mcst_p == NULL) { 2218 DERR(vswp, "%s unable to alloc mem", __func__); 2219 return (1); 2220 } 2221 mcst_p->addr = addr; 2222 2223 mutex_enter(&vswp->mca_lock); 2224 mcst_p->nextp = vswp->mcap; 2225 vswp->mcap = mcst_p; 2226 mutex_exit(&vswp->mca_lock); 2227 2228 /* 2229 * Call into the underlying driver to program the 2230 * address into HW. 2231 */ 2232 mutex_enter(&vswp->mac_lock); 2233 if (vswp->mh != NULL) { 2234 ret = mac_multicst_add(vswp->mh, mca); 2235 if (ret != 0) { 2236 cmn_err(CE_WARN, "!vsw%d: unable to " 2237 "add multicast address", 2238 vswp->instance); 2239 mutex_exit(&vswp->mac_lock); 2240 goto vsw_remove_addr; 2241 } 2242 } 2243 mutex_exit(&vswp->mac_lock); 2244 } else { 2245 cmn_err(CE_WARN, "!vsw%d: unable to add multicast " 2246 "address", vswp->instance); 2247 } 2248 return (ret); 2249 } 2250 2251 vsw_remove_addr: 2252 2253 D2(vswp, "%s: removing multicast", __func__); 2254 /* 2255 * Remove the address from the hash table.. 2256 */ 2257 if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 2258 2259 /* 2260 * ..and then from the list maintained in the 2261 * vsw_t structure. 2262 */ 2263 vsw_del_addr(VSW_LOCALDEV, vswp, addr); 2264 2265 mutex_enter(&vswp->mac_lock); 2266 if (vswp->mh != NULL) 2267 (void) mac_multicst_remove(vswp->mh, mca); 2268 mutex_exit(&vswp->mac_lock); 2269 } 2270 2271 D1(vswp, "%s: exit", __func__); 2272 2273 return (0); 2274 } 2275 2276 static int 2277 vsw_m_promisc(void *arg, boolean_t on) 2278 { 2279 vsw_t *vswp = (vsw_t *)arg; 2280 2281 D1(vswp, "%s: enter", __func__); 2282 2283 WRITE_ENTER(&vswp->if_lockrw); 2284 if (on) 2285 vswp->if_state |= VSW_IF_PROMISC; 2286 else 2287 vswp->if_state &= ~VSW_IF_PROMISC; 2288 RW_EXIT(&vswp->if_lockrw); 2289 2290 D1(vswp, "%s: exit", __func__); 2291 2292 return (0); 2293 } 2294 2295 static mblk_t * 2296 vsw_m_tx(void *arg, mblk_t *mp) 2297 { 2298 vsw_t *vswp = (vsw_t *)arg; 2299 2300 D1(vswp, "%s: enter", __func__); 2301 2302 vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL); 2303 2304 D1(vswp, "%s: exit", __func__); 2305 2306 return (NULL); 2307 } 2308 2309 /* 2310 * Register for machine description (MD) updates. 2311 * 2312 * Returns 0 on success, 1 on failure. 2313 */ 2314 static int 2315 vsw_mdeg_register(vsw_t *vswp) 2316 { 2317 mdeg_prop_spec_t *pspecp; 2318 mdeg_node_spec_t *inst_specp; 2319 mdeg_handle_t mdeg_hdl, mdeg_port_hdl; 2320 size_t templatesz; 2321 int inst, rv; 2322 2323 D1(vswp, "%s: enter", __func__); 2324 2325 /* 2326 * In each 'virtual-device' node in the MD there is a 2327 * 'cfg-handle' property which is the MD's concept of 2328 * an instance number (this may be completely different from 2329 * the device drivers instance #). OBP reads that value and 2330 * stores it in the 'reg' property of the appropriate node in 2331 * the device tree. So we use the 'reg' value when registering 2332 * with the mdeg framework, to ensure we get events for the 2333 * correct nodes. 2334 */ 2335 inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip, 2336 DDI_PROP_DONTPASS, reg_propname, -1); 2337 if (inst == -1) { 2338 cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from " 2339 "OBP device tree", vswp->instance, reg_propname); 2340 return (1); 2341 } 2342 2343 D2(vswp, "%s: instance %d registering with mdeg", __func__, inst); 2344 2345 /* 2346 * Allocate and initialize a per-instance copy 2347 * of the global property spec array that will 2348 * uniquely identify this vsw instance. 2349 */ 2350 templatesz = sizeof (vsw_prop_template); 2351 pspecp = kmem_zalloc(templatesz, KM_SLEEP); 2352 2353 bcopy(vsw_prop_template, pspecp, templatesz); 2354 2355 VSW_SET_MDEG_PROP_INST(pspecp, inst); 2356 2357 /* initialize the complete prop spec structure */ 2358 inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP); 2359 inst_specp->namep = "virtual-device"; 2360 inst_specp->specp = pspecp; 2361 2362 /* 2363 * Register an interest in 'virtual-device' nodes with a 2364 * 'name' property of 'virtual-network-switch' 2365 */ 2366 rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb, 2367 (void *)vswp, &mdeg_hdl); 2368 if (rv != MDEG_SUCCESS) { 2369 DERR(vswp, "%s: mdeg_register failed (%d) for vsw node", 2370 __func__, rv); 2371 goto mdeg_reg_fail; 2372 } 2373 2374 /* 2375 * Register an interest in 'vsw-port' nodes. 2376 */ 2377 rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb, 2378 (void *)vswp, &mdeg_port_hdl); 2379 if (rv != MDEG_SUCCESS) { 2380 DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv); 2381 (void) mdeg_unregister(mdeg_hdl); 2382 goto mdeg_reg_fail; 2383 } 2384 2385 /* save off data that will be needed later */ 2386 vswp->inst_spec = inst_specp; 2387 vswp->mdeg_hdl = mdeg_hdl; 2388 vswp->mdeg_port_hdl = mdeg_port_hdl; 2389 2390 D1(vswp, "%s: exit", __func__); 2391 return (0); 2392 2393 mdeg_reg_fail: 2394 cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks", 2395 vswp->instance); 2396 kmem_free(pspecp, templatesz); 2397 kmem_free(inst_specp, sizeof (mdeg_node_spec_t)); 2398 2399 vswp->mdeg_hdl = NULL; 2400 vswp->mdeg_port_hdl = NULL; 2401 2402 return (1); 2403 } 2404 2405 static void 2406 vsw_mdeg_unregister(vsw_t *vswp) 2407 { 2408 D1(vswp, "vsw_mdeg_unregister: enter"); 2409 2410 if (vswp->mdeg_hdl != NULL) 2411 (void) mdeg_unregister(vswp->mdeg_hdl); 2412 2413 if (vswp->mdeg_port_hdl != NULL) 2414 (void) mdeg_unregister(vswp->mdeg_port_hdl); 2415 2416 if (vswp->inst_spec != NULL) { 2417 if (vswp->inst_spec->specp != NULL) { 2418 (void) kmem_free(vswp->inst_spec->specp, 2419 sizeof (vsw_prop_template)); 2420 vswp->inst_spec->specp = NULL; 2421 } 2422 2423 (void) kmem_free(vswp->inst_spec, 2424 sizeof (mdeg_node_spec_t)); 2425 vswp->inst_spec = NULL; 2426 } 2427 2428 D1(vswp, "vsw_mdeg_unregister: exit"); 2429 } 2430 2431 /* 2432 * Mdeg callback invoked for the vsw node itself. 2433 */ 2434 static int 2435 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 2436 { 2437 vsw_t *vswp; 2438 int idx; 2439 md_t *mdp; 2440 mde_cookie_t node; 2441 uint64_t inst; 2442 char *node_name = NULL; 2443 2444 if (resp == NULL) 2445 return (MDEG_FAILURE); 2446 2447 vswp = (vsw_t *)cb_argp; 2448 2449 D1(vswp, "%s: added %d : removed %d : curr matched %d" 2450 " : prev matched %d", __func__, resp->added.nelem, 2451 resp->removed.nelem, resp->match_curr.nelem, 2452 resp->match_prev.nelem); 2453 2454 /* 2455 * Expect 'added' to be non-zero if virtual-network-switch 2456 * nodes exist in the MD when the driver attaches. 2457 */ 2458 for (idx = 0; idx < resp->added.nelem; idx++) { 2459 mdp = resp->added.mdp; 2460 node = resp->added.mdep[idx]; 2461 2462 if (md_get_prop_str(mdp, node, "name", &node_name) != 0) { 2463 DERR(vswp, "%s: unable to get node name for " 2464 "node(%d) 0x%lx", __func__, idx, node); 2465 continue; 2466 } 2467 2468 if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) { 2469 DERR(vswp, "%s: prop(cfg-handle) not found port(%d)", 2470 __func__, idx); 2471 continue; 2472 } 2473 2474 D2(vswp, "%s: added node(%d) 0x%lx with name %s " 2475 "and inst %d", __func__, idx, node, node_name, inst); 2476 2477 vsw_get_initial_md_properties(vswp, mdp, node); 2478 } 2479 2480 /* 2481 * A non-zero 'match' value indicates that the MD has been 2482 * updated and that a virtual-network-switch node is present 2483 * which may or may not have been updated. It is up to the clients 2484 * to examine their own nodes and determine if they have changed. 2485 */ 2486 for (idx = 0; idx < resp->match_curr.nelem; idx++) { 2487 mdp = resp->match_curr.mdp; 2488 node = resp->match_curr.mdep[idx]; 2489 2490 if (md_get_prop_str(mdp, node, "name", &node_name) != 0) { 2491 DERR(vswp, "%s: unable to get node name for " 2492 "node(%d) 0x%lx", __func__, idx, node); 2493 continue; 2494 } 2495 2496 if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) { 2497 DERR(vswp, "%s: prop(cfg-handle) not found port(%d)", 2498 __func__, idx); 2499 continue; 2500 } 2501 2502 D2(vswp, "%s: changed node(%d) 0x%lx with name %s " 2503 "and inst %d", __func__, idx, node, node_name, inst); 2504 2505 vsw_update_md_prop(vswp, mdp, node); 2506 } 2507 2508 return (MDEG_SUCCESS); 2509 } 2510 2511 /* 2512 * Mdeg callback invoked for changes to the vsw-port nodes 2513 * under the vsw node. 2514 */ 2515 static int 2516 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 2517 { 2518 vsw_t *vswp; 2519 int idx; 2520 md_t *mdp; 2521 mde_cookie_t node; 2522 uint64_t inst; 2523 2524 if ((resp == NULL) || (cb_argp == NULL)) 2525 return (MDEG_FAILURE); 2526 2527 vswp = (vsw_t *)cb_argp; 2528 2529 D2(vswp, "%s: added %d : removed %d : curr matched %d" 2530 " : prev matched %d", __func__, resp->added.nelem, 2531 resp->removed.nelem, resp->match_curr.nelem, 2532 resp->match_prev.nelem); 2533 2534 /* process added ports */ 2535 for (idx = 0; idx < resp->added.nelem; idx++) { 2536 mdp = resp->added.mdp; 2537 node = resp->added.mdep[idx]; 2538 2539 D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node); 2540 2541 if (vsw_port_add(vswp, mdp, &node) != 0) { 2542 cmn_err(CE_WARN, "!vsw%d: Unable to add new port " 2543 "(0x%lx)", vswp->instance, node); 2544 } 2545 } 2546 2547 /* process removed ports */ 2548 for (idx = 0; idx < resp->removed.nelem; idx++) { 2549 mdp = resp->removed.mdp; 2550 node = resp->removed.mdep[idx]; 2551 2552 if (md_get_prop_val(mdp, node, id_propname, &inst)) { 2553 DERR(vswp, "%s: prop(%s) not found in port(%d)", 2554 __func__, id_propname, idx); 2555 continue; 2556 } 2557 2558 D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node); 2559 2560 if (vsw_port_detach(vswp, inst) != 0) { 2561 cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld", 2562 vswp->instance, inst); 2563 } 2564 } 2565 2566 /* 2567 * Currently no support for updating already active ports. 2568 * So, ignore the match_curr and match_priv arrays for now. 2569 */ 2570 2571 D1(vswp, "%s: exit", __func__); 2572 2573 return (MDEG_SUCCESS); 2574 } 2575 2576 /* 2577 * Read the initial start-of-day values from the specified MD node. 2578 */ 2579 static void 2580 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 2581 { 2582 int i; 2583 uint64_t macaddr = 0; 2584 2585 D1(vswp, "%s: enter", __func__); 2586 2587 if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) == 0) { 2588 /* 2589 * Note it is valid for the physname property to 2590 * be NULL so check actual name length to determine 2591 * if we have a actual device name. 2592 */ 2593 if (strlen(vswp->physname) > 0) 2594 vswp->mdprops |= VSW_MD_PHYSNAME; 2595 } else { 2596 cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical " 2597 "device from MD", vswp->instance); 2598 return; 2599 } 2600 2601 /* mac address for vswitch device itself */ 2602 if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) { 2603 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD", 2604 vswp->instance); 2605 2606 /* 2607 * Fallback to using the mac address of the physical 2608 * device. 2609 */ 2610 if (vsw_get_physaddr(vswp) == 0) { 2611 cmn_err(CE_NOTE, "!vsw%d: Using MAC address from " 2612 "physical device (%s)", vswp->instance, 2613 vswp->physname); 2614 } else { 2615 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address" 2616 "from device %s", vswp->instance, 2617 vswp->physname); 2618 } 2619 } else { 2620 WRITE_ENTER(&vswp->if_lockrw); 2621 for (i = ETHERADDRL - 1; i >= 0; i--) { 2622 vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF; 2623 macaddr >>= 8; 2624 } 2625 RW_EXIT(&vswp->if_lockrw); 2626 vswp->mdprops |= VSW_MD_MACADDR; 2627 } 2628 2629 if (vsw_get_md_smodes(vswp, mdp, node, 2630 vswp->smode, &vswp->smode_num)) { 2631 cmn_err(CE_WARN, "vsw%d: Unable to read %s property from " 2632 "MD, defaulting to programmed mode", vswp->instance, 2633 smode_propname); 2634 2635 for (i = 0; i < NUM_SMODES; i++) 2636 vswp->smode[i] = VSW_LAYER2; 2637 2638 vswp->smode_num = NUM_SMODES; 2639 } else { 2640 ASSERT(vswp->smode_num != 0); 2641 vswp->mdprops |= VSW_MD_SMODE; 2642 } 2643 2644 /* 2645 * Unable to setup any switching mode, nothing more 2646 * we can do. 2647 */ 2648 if (vsw_setup_switching(vswp)) 2649 return; 2650 2651 WRITE_ENTER(&vswp->if_lockrw); 2652 vswp->if_state &= ~VSW_IF_UP; 2653 RW_EXIT(&vswp->if_lockrw); 2654 if (vswp->mdprops & (VSW_MD_MACADDR | VSW_DEV_MACADDR)) { 2655 if (vsw_mac_register(vswp) != 0) { 2656 /* 2657 * Treat this as a non-fatal error as we may be 2658 * able to operate in some other mode. 2659 */ 2660 cmn_err(CE_WARN, "vsw%d: Unable to register as " 2661 "provider with MAC layer", vswp->instance); 2662 } 2663 } 2664 2665 D1(vswp, "%s: exit", __func__); 2666 } 2667 2668 /* 2669 * Check to see if the relevant properties in the specified node have 2670 * changed, and if so take the appropriate action. 2671 * 2672 * If any of the properties are missing or invalid we don't take 2673 * any action, as this function should only be invoked when modifications 2674 * have been made to what we assume is a working configuration, which 2675 * we leave active. 2676 * 2677 * Note it is legal for this routine to be invoked even if none of the 2678 * properties in the port node within the MD have actually changed. 2679 */ 2680 static void 2681 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 2682 { 2683 char physname[LIFNAMSIZ]; 2684 char drv[LIFNAMSIZ]; 2685 uint_t ddi_instance; 2686 uint8_t new_smode[NUM_SMODES]; 2687 int i, smode_num = 0; 2688 uint64_t macaddr = 0; 2689 vsw_port_list_t *plist = &vswp->plist; 2690 vsw_port_t *port = NULL; 2691 enum {MD_init = 0x1, 2692 MD_physname = 0x2, 2693 MD_macaddr = 0x4, 2694 MD_smode = 0x8} updated; 2695 2696 updated = MD_init; 2697 2698 D1(vswp, "%s: enter", __func__); 2699 2700 /* 2701 * Check if name of physical device in MD has changed. 2702 */ 2703 if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) { 2704 /* 2705 * Do basic sanity check on new device name/instance, 2706 * if its non NULL. It is valid for the device name to 2707 * have changed from a non NULL to a NULL value, i.e. 2708 * the vsw is being changed to 'routed' mode. 2709 */ 2710 if ((strlen(physname) != 0) && 2711 (ddi_parse(physname, drv, 2712 &ddi_instance) != DDI_SUCCESS)) { 2713 cmn_err(CE_WARN, "!vsw%d: new device name %s is not" 2714 " a valid device name/instance", 2715 vswp->instance, physname); 2716 goto fail_reconf; 2717 } 2718 2719 if (strcmp(physname, vswp->physname)) { 2720 D2(vswp, "%s: device name changed from %s to %s", 2721 __func__, vswp->physname, physname); 2722 2723 updated |= MD_physname; 2724 } else { 2725 D2(vswp, "%s: device name unchanged at %s", 2726 __func__, vswp->physname); 2727 } 2728 } else { 2729 cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical " 2730 "device from updated MD.", vswp->instance); 2731 goto fail_reconf; 2732 } 2733 2734 /* 2735 * Check if MAC address has changed. 2736 */ 2737 if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) { 2738 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD", 2739 vswp->instance); 2740 goto fail_reconf; 2741 } else { 2742 READ_ENTER(&vswp->if_lockrw); 2743 for (i = ETHERADDRL - 1; i >= 0; i--) { 2744 if (vswp->if_addr.ether_addr_octet[i] 2745 != (macaddr & 0xFF)) { 2746 D2(vswp, "%s: octet[%d] 0x%x != 0x%x", 2747 __func__, i, 2748 vswp->if_addr.ether_addr_octet[i], 2749 (macaddr & 0xFF)); 2750 updated |= MD_macaddr; 2751 break; 2752 } 2753 macaddr >>= 8; 2754 } 2755 RW_EXIT(&vswp->if_lockrw); 2756 } 2757 2758 /* 2759 * Check if switching modes have changed. 2760 */ 2761 if (vsw_get_md_smodes(vswp, mdp, node, 2762 new_smode, &smode_num)) { 2763 cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD", 2764 vswp->instance, smode_propname); 2765 goto fail_reconf; 2766 } else { 2767 ASSERT(smode_num != 0); 2768 if (smode_num != vswp->smode_num) { 2769 D2(vswp, "%s: number of modes changed from %d to %d", 2770 __func__, vswp->smode_num, smode_num); 2771 } 2772 2773 for (i = 0; i < smode_num; i++) { 2774 if (new_smode[i] != vswp->smode[i]) { 2775 D2(vswp, "%s: mode changed from %d to %d", 2776 __func__, vswp->smode[i], new_smode[i]); 2777 updated |= MD_smode; 2778 break; 2779 } 2780 } 2781 } 2782 2783 /* 2784 * Now make any changes which are needed... 2785 */ 2786 2787 if (updated & (MD_physname | MD_smode)) { 2788 /* 2789 * Disconnect all ports from the current card 2790 */ 2791 WRITE_ENTER(&plist->lockrw); 2792 for (port = plist->head; port != NULL; port = port->p_next) { 2793 /* Remove address if was programmed into HW. */ 2794 if (vsw_unset_hw(vswp, port)) { 2795 RW_EXIT(&plist->lockrw); 2796 goto fail_update; 2797 } 2798 } 2799 RW_EXIT(&plist->lockrw); 2800 2801 /* 2802 * Stop, detach the old device.. 2803 */ 2804 vsw_mac_detach(vswp); 2805 2806 /* 2807 * Update phys name. 2808 */ 2809 if (updated & MD_physname) { 2810 cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s", 2811 vswp->instance, vswp->physname, physname); 2812 (void) strncpy(vswp->physname, 2813 physname, strlen(physname) + 1); 2814 2815 if (strlen(vswp->physname) > 0) 2816 vswp->mdprops |= VSW_MD_PHYSNAME; 2817 } 2818 2819 /* 2820 * Update array with the new switch mode values. 2821 */ 2822 if (updated & MD_smode) { 2823 for (i = 0; i < smode_num; i++) 2824 vswp->smode[i] = new_smode[i]; 2825 2826 vswp->smode_num = smode_num; 2827 vswp->smode_idx = 0; 2828 } 2829 2830 /* 2831 * ..and attach, start the new device. 2832 */ 2833 if (vsw_setup_switching(vswp)) 2834 goto fail_update; 2835 2836 /* 2837 * Connect ports to new card. 2838 */ 2839 WRITE_ENTER(&plist->lockrw); 2840 for (port = plist->head; port != NULL; port = port->p_next) { 2841 if (vsw_set_hw(vswp, port)) { 2842 RW_EXIT(&plist->lockrw); 2843 goto fail_update; 2844 } 2845 } 2846 RW_EXIT(&plist->lockrw); 2847 } 2848 2849 if (updated & MD_macaddr) { 2850 cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx", 2851 vswp->instance, macaddr); 2852 2853 WRITE_ENTER(&vswp->if_lockrw); 2854 for (i = ETHERADDRL - 1; i >= 0; i--) { 2855 vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF; 2856 macaddr >>= 8; 2857 } 2858 RW_EXIT(&vswp->if_lockrw); 2859 2860 /* 2861 * Notify the MAC layer of the changed address. 2862 */ 2863 mac_unicst_update(vswp->if_mh, (uint8_t *)&vswp->if_addr); 2864 } 2865 2866 return; 2867 2868 fail_reconf: 2869 cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance); 2870 return; 2871 2872 fail_update: 2873 cmn_err(CE_WARN, "!vsw%d: update of configuration failed", 2874 vswp->instance); 2875 } 2876 2877 /* 2878 * Add a new port to the system. 2879 * 2880 * Returns 0 on success, 1 on failure. 2881 */ 2882 int 2883 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node) 2884 { 2885 uint64_t ldc_id; 2886 uint8_t *addrp; 2887 int i, addrsz; 2888 int num_nodes = 0, nchan = 0; 2889 int listsz = 0; 2890 mde_cookie_t *listp = NULL; 2891 struct ether_addr ea; 2892 uint64_t macaddr; 2893 uint64_t inst = 0; 2894 vsw_port_t *port; 2895 2896 if (md_get_prop_val(mdp, *node, id_propname, &inst)) { 2897 DWARN(vswp, "%s: prop(%s) not found", __func__, 2898 id_propname); 2899 return (1); 2900 } 2901 2902 /* 2903 * Find the channel endpoint node(s) (which should be under this 2904 * port node) which contain the channel id(s). 2905 */ 2906 if ((num_nodes = md_node_count(mdp)) <= 0) { 2907 DERR(vswp, "%s: invalid number of nodes found (%d)", 2908 __func__, num_nodes); 2909 return (1); 2910 } 2911 2912 D2(vswp, "%s: %d nodes found", __func__, num_nodes); 2913 2914 /* allocate enough space for node list */ 2915 listsz = num_nodes * sizeof (mde_cookie_t); 2916 listp = kmem_zalloc(listsz, KM_SLEEP); 2917 2918 nchan = md_scan_dag(mdp, *node, 2919 md_find_name(mdp, chan_propname), 2920 md_find_name(mdp, "fwd"), listp); 2921 2922 if (nchan <= 0) { 2923 DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname); 2924 kmem_free(listp, listsz); 2925 return (1); 2926 } 2927 2928 D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname); 2929 2930 /* use property from first node found */ 2931 if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) { 2932 DWARN(vswp, "%s: prop(%s) not found\n", __func__, 2933 id_propname); 2934 kmem_free(listp, listsz); 2935 return (1); 2936 } 2937 2938 /* don't need list any more */ 2939 kmem_free(listp, listsz); 2940 2941 D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id); 2942 2943 /* read mac-address property */ 2944 if (md_get_prop_data(mdp, *node, remaddr_propname, 2945 &addrp, &addrsz)) { 2946 DWARN(vswp, "%s: prop(%s) not found", 2947 __func__, remaddr_propname); 2948 return (1); 2949 } 2950 2951 if (addrsz < ETHERADDRL) { 2952 DWARN(vswp, "%s: invalid address size", __func__); 2953 return (1); 2954 } 2955 2956 macaddr = *((uint64_t *)addrp); 2957 D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr); 2958 2959 for (i = ETHERADDRL - 1; i >= 0; i--) { 2960 ea.ether_addr_octet[i] = macaddr & 0xFF; 2961 macaddr >>= 8; 2962 } 2963 2964 if (vsw_port_attach(vswp, (int)inst, &ldc_id, 1, &ea) != 0) { 2965 DERR(vswp, "%s: failed to attach port", __func__); 2966 return (1); 2967 } 2968 2969 port = vsw_lookup_port(vswp, (int)inst); 2970 2971 /* just successfuly created the port, so it should exist */ 2972 ASSERT(port != NULL); 2973 2974 return (0); 2975 } 2976 2977 /* 2978 * Attach the specified port. 2979 * 2980 * Returns 0 on success, 1 on failure. 2981 */ 2982 static int 2983 vsw_port_attach(vsw_t *vswp, int p_instance, uint64_t *ldcids, int nids, 2984 struct ether_addr *macaddr) 2985 { 2986 vsw_port_list_t *plist = &vswp->plist; 2987 vsw_port_t *port, **prev_port; 2988 int i; 2989 2990 D1(vswp, "%s: enter : port %d", __func__, p_instance); 2991 2992 /* port already exists? */ 2993 READ_ENTER(&plist->lockrw); 2994 for (port = plist->head; port != NULL; port = port->p_next) { 2995 if (port->p_instance == p_instance) { 2996 DWARN(vswp, "%s: port instance %d already attached", 2997 __func__, p_instance); 2998 RW_EXIT(&plist->lockrw); 2999 return (1); 3000 } 3001 } 3002 RW_EXIT(&plist->lockrw); 3003 3004 port = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP); 3005 port->p_vswp = vswp; 3006 port->p_instance = p_instance; 3007 port->p_ldclist.num_ldcs = 0; 3008 port->p_ldclist.head = NULL; 3009 port->addr_set = VSW_ADDR_UNSET; 3010 3011 rw_init(&port->p_ldclist.lockrw, NULL, RW_DRIVER, NULL); 3012 3013 mutex_init(&port->tx_lock, NULL, MUTEX_DRIVER, NULL); 3014 mutex_init(&port->mca_lock, NULL, MUTEX_DRIVER, NULL); 3015 3016 mutex_init(&port->ref_lock, NULL, MUTEX_DRIVER, NULL); 3017 cv_init(&port->ref_cv, NULL, CV_DRIVER, NULL); 3018 3019 mutex_init(&port->state_lock, NULL, MUTEX_DRIVER, NULL); 3020 cv_init(&port->state_cv, NULL, CV_DRIVER, NULL); 3021 port->state = VSW_PORT_INIT; 3022 3023 if (nids > VSW_PORT_MAX_LDCS) { 3024 D2(vswp, "%s: using first of %d ldc ids", 3025 __func__, nids); 3026 nids = VSW_PORT_MAX_LDCS; 3027 } 3028 3029 D2(vswp, "%s: %d nids", __func__, nids); 3030 for (i = 0; i < nids; i++) { 3031 D2(vswp, "%s: ldcid (%llx)", __func__, (uint64_t)ldcids[i]); 3032 if (vsw_ldc_attach(port, (uint64_t)ldcids[i]) != 0) { 3033 DERR(vswp, "%s: ldc_attach failed", __func__); 3034 3035 rw_destroy(&port->p_ldclist.lockrw); 3036 3037 cv_destroy(&port->ref_cv); 3038 mutex_destroy(&port->ref_lock); 3039 3040 cv_destroy(&port->state_cv); 3041 mutex_destroy(&port->state_lock); 3042 3043 mutex_destroy(&port->tx_lock); 3044 mutex_destroy(&port->mca_lock); 3045 kmem_free(port, sizeof (vsw_port_t)); 3046 return (1); 3047 } 3048 } 3049 3050 ether_copy(macaddr, &port->p_macaddr); 3051 3052 WRITE_ENTER(&plist->lockrw); 3053 3054 /* create the fdb entry for this port/mac address */ 3055 (void) vsw_add_fdb(vswp, port); 3056 3057 (void) vsw_set_hw(vswp, port); 3058 3059 /* link it into the list of ports for this vsw instance */ 3060 prev_port = (vsw_port_t **)(&plist->head); 3061 port->p_next = *prev_port; 3062 *prev_port = port; 3063 plist->num_ports++; 3064 RW_EXIT(&plist->lockrw); 3065 3066 /* 3067 * Initialise the port and any ldc's under it. 3068 */ 3069 (void) vsw_init_ldcs(port); 3070 3071 D1(vswp, "%s: exit", __func__); 3072 return (0); 3073 } 3074 3075 /* 3076 * Detach the specified port. 3077 * 3078 * Returns 0 on success, 1 on failure. 3079 */ 3080 static int 3081 vsw_port_detach(vsw_t *vswp, int p_instance) 3082 { 3083 vsw_port_t *port = NULL; 3084 vsw_port_list_t *plist = &vswp->plist; 3085 3086 D1(vswp, "%s: enter: port id %d", __func__, p_instance); 3087 3088 WRITE_ENTER(&plist->lockrw); 3089 3090 if ((port = vsw_lookup_port(vswp, p_instance)) == NULL) { 3091 RW_EXIT(&plist->lockrw); 3092 return (1); 3093 } 3094 3095 if (vsw_plist_del_node(vswp, port)) { 3096 RW_EXIT(&plist->lockrw); 3097 return (1); 3098 } 3099 3100 /* Remove address if was programmed into HW. */ 3101 (void) vsw_unset_hw(vswp, port); 3102 3103 /* Remove the fdb entry for this port/mac address */ 3104 (void) vsw_del_fdb(vswp, port); 3105 3106 /* Remove any multicast addresses.. */ 3107 vsw_del_mcst_port(port); 3108 3109 /* 3110 * No longer need to hold writer lock on port list now 3111 * that we have unlinked the target port from the list. 3112 */ 3113 RW_EXIT(&plist->lockrw); 3114 3115 READ_ENTER(&plist->lockrw); 3116 3117 if (vswp->recfg_reqd) 3118 (void) vsw_reconfig_hw(vswp); 3119 3120 RW_EXIT(&plist->lockrw); 3121 3122 if (vsw_port_delete(port)) { 3123 return (1); 3124 } 3125 3126 D1(vswp, "%s: exit: p_instance(%d)", __func__, p_instance); 3127 return (0); 3128 } 3129 3130 /* 3131 * Detach all active ports. 3132 * 3133 * Returns 0 on success, 1 on failure. 3134 */ 3135 static int 3136 vsw_detach_ports(vsw_t *vswp) 3137 { 3138 vsw_port_list_t *plist = &vswp->plist; 3139 vsw_port_t *port = NULL; 3140 3141 D1(vswp, "%s: enter", __func__); 3142 3143 WRITE_ENTER(&plist->lockrw); 3144 3145 while ((port = plist->head) != NULL) { 3146 if (vsw_plist_del_node(vswp, port)) { 3147 DERR(vswp, "%s: Error deleting port %d" 3148 " from port list", __func__, 3149 port->p_instance); 3150 RW_EXIT(&plist->lockrw); 3151 return (1); 3152 } 3153 3154 /* Remove address if was programmed into HW. */ 3155 (void) vsw_unset_hw(vswp, port); 3156 3157 /* Remove the fdb entry for this port/mac address */ 3158 (void) vsw_del_fdb(vswp, port); 3159 3160 /* Remove any multicast addresses.. */ 3161 vsw_del_mcst_port(port); 3162 3163 /* 3164 * No longer need to hold the lock on the port list 3165 * now that we have unlinked the target port from the 3166 * list. 3167 */ 3168 RW_EXIT(&plist->lockrw); 3169 if (vsw_port_delete(port)) { 3170 DERR(vswp, "%s: Error deleting port %d", 3171 __func__, port->p_instance); 3172 return (1); 3173 } 3174 WRITE_ENTER(&plist->lockrw); 3175 } 3176 RW_EXIT(&plist->lockrw); 3177 3178 D1(vswp, "%s: exit", __func__); 3179 3180 return (0); 3181 } 3182 3183 /* 3184 * Delete the specified port. 3185 * 3186 * Returns 0 on success, 1 on failure. 3187 */ 3188 static int 3189 vsw_port_delete(vsw_port_t *port) 3190 { 3191 vsw_ldc_list_t *ldcl; 3192 vsw_t *vswp = port->p_vswp; 3193 3194 D1(vswp, "%s: enter : port id %d", __func__, port->p_instance); 3195 3196 (void) vsw_uninit_ldcs(port); 3197 3198 /* 3199 * Wait for any pending ctrl msg tasks which reference this 3200 * port to finish. 3201 */ 3202 if (vsw_drain_port_taskq(port)) 3203 return (1); 3204 3205 /* 3206 * Wait for port reference count to hit zero. 3207 */ 3208 mutex_enter(&port->ref_lock); 3209 while (port->ref_cnt != 0) 3210 cv_wait(&port->ref_cv, &port->ref_lock); 3211 mutex_exit(&port->ref_lock); 3212 3213 /* 3214 * Wait for any active callbacks to finish 3215 */ 3216 if (vsw_drain_ldcs(port)) 3217 return (1); 3218 3219 ldcl = &port->p_ldclist; 3220 WRITE_ENTER(&ldcl->lockrw); 3221 while (ldcl->num_ldcs > 0) { 3222 if (vsw_ldc_detach(port, ldcl->head->ldc_id) != 0) {; 3223 cmn_err(CE_WARN, "!vsw%d: unable to detach ldc %ld", 3224 vswp->instance, ldcl->head->ldc_id); 3225 RW_EXIT(&ldcl->lockrw); 3226 return (1); 3227 } 3228 } 3229 RW_EXIT(&ldcl->lockrw); 3230 3231 rw_destroy(&port->p_ldclist.lockrw); 3232 3233 mutex_destroy(&port->mca_lock); 3234 mutex_destroy(&port->tx_lock); 3235 cv_destroy(&port->ref_cv); 3236 mutex_destroy(&port->ref_lock); 3237 3238 cv_destroy(&port->state_cv); 3239 mutex_destroy(&port->state_lock); 3240 3241 kmem_free(port, sizeof (vsw_port_t)); 3242 3243 D1(vswp, "%s: exit", __func__); 3244 3245 return (0); 3246 } 3247 3248 /* 3249 * Attach a logical domain channel (ldc) under a specified port. 3250 * 3251 * Returns 0 on success, 1 on failure. 3252 */ 3253 static int 3254 vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id) 3255 { 3256 vsw_t *vswp = port->p_vswp; 3257 vsw_ldc_list_t *ldcl = &port->p_ldclist; 3258 vsw_ldc_t *ldcp = NULL; 3259 ldc_attr_t attr; 3260 ldc_status_t istatus; 3261 int status = DDI_FAILURE; 3262 int rv; 3263 enum { PROG_init = 0x0, PROG_mblks = 0x1, 3264 PROG_callback = 0x2} 3265 progress; 3266 3267 progress = PROG_init; 3268 3269 D1(vswp, "%s: enter", __func__); 3270 3271 ldcp = kmem_zalloc(sizeof (vsw_ldc_t), KM_NOSLEEP); 3272 if (ldcp == NULL) { 3273 DERR(vswp, "%s: kmem_zalloc failed", __func__); 3274 return (1); 3275 } 3276 ldcp->ldc_id = ldc_id; 3277 3278 /* allocate pool of receive mblks */ 3279 rv = vio_create_mblks(vsw_num_mblks, vsw_mblk_size, &(ldcp->rxh)); 3280 if (rv) { 3281 DWARN(vswp, "%s: unable to create free mblk pool for" 3282 " channel %ld (rv %d)", __func__, ldc_id, rv); 3283 kmem_free(ldcp, sizeof (vsw_ldc_t)); 3284 return (1); 3285 } 3286 3287 progress |= PROG_mblks; 3288 3289 mutex_init(&ldcp->ldc_txlock, NULL, MUTEX_DRIVER, NULL); 3290 mutex_init(&ldcp->ldc_cblock, NULL, MUTEX_DRIVER, NULL); 3291 mutex_init(&ldcp->drain_cv_lock, NULL, MUTEX_DRIVER, NULL); 3292 cv_init(&ldcp->drain_cv, NULL, CV_DRIVER, NULL); 3293 rw_init(&ldcp->lane_in.dlistrw, NULL, RW_DRIVER, NULL); 3294 rw_init(&ldcp->lane_out.dlistrw, NULL, RW_DRIVER, NULL); 3295 3296 /* required for handshake with peer */ 3297 ldcp->local_session = (uint64_t)ddi_get_lbolt(); 3298 ldcp->peer_session = 0; 3299 ldcp->session_status = 0; 3300 3301 mutex_init(&ldcp->hss_lock, NULL, MUTEX_DRIVER, NULL); 3302 ldcp->hss_id = 1; /* Initial handshake session id */ 3303 3304 /* only set for outbound lane, inbound set by peer */ 3305 mutex_init(&ldcp->lane_in.seq_lock, NULL, MUTEX_DRIVER, NULL); 3306 mutex_init(&ldcp->lane_out.seq_lock, NULL, MUTEX_DRIVER, NULL); 3307 vsw_set_lane_attr(vswp, &ldcp->lane_out); 3308 3309 attr.devclass = LDC_DEV_NT_SVC; 3310 attr.instance = ddi_get_instance(vswp->dip); 3311 attr.mode = LDC_MODE_UNRELIABLE; 3312 attr.mtu = VSW_LDC_MTU; 3313 status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle); 3314 if (status != 0) { 3315 DERR(vswp, "%s(%lld): ldc_init failed, rv (%d)", 3316 __func__, ldc_id, status); 3317 goto ldc_attach_fail; 3318 } 3319 3320 status = ldc_reg_callback(ldcp->ldc_handle, vsw_ldc_cb, (caddr_t)ldcp); 3321 if (status != 0) { 3322 DERR(vswp, "%s(%lld): ldc_reg_callback failed, rv (%d)", 3323 __func__, ldc_id, status); 3324 (void) ldc_fini(ldcp->ldc_handle); 3325 goto ldc_attach_fail; 3326 } 3327 3328 progress |= PROG_callback; 3329 3330 mutex_init(&ldcp->status_lock, NULL, MUTEX_DRIVER, NULL); 3331 3332 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 3333 DERR(vswp, "%s: ldc_status failed", __func__); 3334 mutex_destroy(&ldcp->status_lock); 3335 goto ldc_attach_fail; 3336 } 3337 3338 ldcp->ldc_status = istatus; 3339 ldcp->ldc_port = port; 3340 ldcp->ldc_vswp = vswp; 3341 3342 /* link it into the list of channels for this port */ 3343 WRITE_ENTER(&ldcl->lockrw); 3344 ldcp->ldc_next = ldcl->head; 3345 ldcl->head = ldcp; 3346 ldcl->num_ldcs++; 3347 RW_EXIT(&ldcl->lockrw); 3348 3349 D1(vswp, "%s: exit", __func__); 3350 return (0); 3351 3352 ldc_attach_fail: 3353 mutex_destroy(&ldcp->ldc_txlock); 3354 mutex_destroy(&ldcp->ldc_cblock); 3355 3356 cv_destroy(&ldcp->drain_cv); 3357 3358 rw_destroy(&ldcp->lane_in.dlistrw); 3359 rw_destroy(&ldcp->lane_out.dlistrw); 3360 3361 if (progress & PROG_callback) { 3362 (void) ldc_unreg_callback(ldcp->ldc_handle); 3363 } 3364 3365 if ((progress & PROG_mblks) && (ldcp->rxh != NULL)) { 3366 if (vio_destroy_mblks(ldcp->rxh) != 0) { 3367 /* 3368 * Something odd has happened, as the destroy 3369 * will only fail if some mblks have been allocated 3370 * from the pool already (which shouldn't happen) 3371 * and have not been returned. 3372 * 3373 * Add the pool pointer to a list maintained in 3374 * the device instance. Another attempt will be made 3375 * to free the pool when the device itself detaches. 3376 */ 3377 cmn_err(CE_WARN, "!vsw%d: Creation of ldc channel %ld " 3378 "failed and cannot destroy associated mblk " 3379 "pool", vswp->instance, ldc_id); 3380 ldcp->rxh->nextp = vswp->rxh; 3381 vswp->rxh = ldcp->rxh; 3382 } 3383 } 3384 mutex_destroy(&ldcp->drain_cv_lock); 3385 mutex_destroy(&ldcp->hss_lock); 3386 3387 mutex_destroy(&ldcp->lane_in.seq_lock); 3388 mutex_destroy(&ldcp->lane_out.seq_lock); 3389 kmem_free(ldcp, sizeof (vsw_ldc_t)); 3390 3391 return (1); 3392 } 3393 3394 /* 3395 * Detach a logical domain channel (ldc) belonging to a 3396 * particular port. 3397 * 3398 * Returns 0 on success, 1 on failure. 3399 */ 3400 static int 3401 vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id) 3402 { 3403 vsw_t *vswp = port->p_vswp; 3404 vsw_ldc_t *ldcp, *prev_ldcp; 3405 vsw_ldc_list_t *ldcl = &port->p_ldclist; 3406 int rv; 3407 3408 prev_ldcp = ldcl->head; 3409 for (; (ldcp = prev_ldcp) != NULL; prev_ldcp = ldcp->ldc_next) { 3410 if (ldcp->ldc_id == ldc_id) { 3411 break; 3412 } 3413 } 3414 3415 /* specified ldc id not found */ 3416 if (ldcp == NULL) { 3417 DERR(vswp, "%s: ldcp = NULL", __func__); 3418 return (1); 3419 } 3420 3421 D2(vswp, "%s: detaching channel %lld", __func__, ldcp->ldc_id); 3422 3423 /* 3424 * Before we can close the channel we must release any mapped 3425 * resources (e.g. drings). 3426 */ 3427 vsw_free_lane_resources(ldcp, INBOUND); 3428 vsw_free_lane_resources(ldcp, OUTBOUND); 3429 3430 /* 3431 * If the close fails we are in serious trouble, as won't 3432 * be able to delete the parent port. 3433 */ 3434 if ((rv = ldc_close(ldcp->ldc_handle)) != 0) { 3435 DERR(vswp, "%s: error %d closing channel %lld", 3436 __func__, rv, ldcp->ldc_id); 3437 return (1); 3438 } 3439 3440 (void) ldc_fini(ldcp->ldc_handle); 3441 3442 ldcp->ldc_status = LDC_INIT; 3443 ldcp->ldc_handle = NULL; 3444 ldcp->ldc_vswp = NULL; 3445 3446 if (ldcp->rxh != NULL) { 3447 if (vio_destroy_mblks(ldcp->rxh)) { 3448 /* 3449 * Mostly likely some mblks are still in use and 3450 * have not been returned to the pool. Add the pool 3451 * to the list maintained in the device instance. 3452 * Another attempt will be made to destroy the pool 3453 * when the device detaches. 3454 */ 3455 ldcp->rxh->nextp = vswp->rxh; 3456 vswp->rxh = ldcp->rxh; 3457 } 3458 } 3459 3460 /* unlink it from the list */ 3461 prev_ldcp = ldcp->ldc_next; 3462 ldcl->num_ldcs--; 3463 3464 mutex_destroy(&ldcp->ldc_txlock); 3465 mutex_destroy(&ldcp->ldc_cblock); 3466 cv_destroy(&ldcp->drain_cv); 3467 mutex_destroy(&ldcp->drain_cv_lock); 3468 mutex_destroy(&ldcp->hss_lock); 3469 mutex_destroy(&ldcp->lane_in.seq_lock); 3470 mutex_destroy(&ldcp->lane_out.seq_lock); 3471 mutex_destroy(&ldcp->status_lock); 3472 rw_destroy(&ldcp->lane_in.dlistrw); 3473 rw_destroy(&ldcp->lane_out.dlistrw); 3474 3475 kmem_free(ldcp, sizeof (vsw_ldc_t)); 3476 3477 return (0); 3478 } 3479 3480 /* 3481 * Open and attempt to bring up the channel. Note that channel 3482 * can only be brought up if peer has also opened channel. 3483 * 3484 * Returns 0 if can open and bring up channel, otherwise 3485 * returns 1. 3486 */ 3487 static int 3488 vsw_ldc_init(vsw_ldc_t *ldcp) 3489 { 3490 vsw_t *vswp = ldcp->ldc_vswp; 3491 ldc_status_t istatus = 0; 3492 int rv; 3493 3494 D1(vswp, "%s: enter", __func__); 3495 3496 LDC_ENTER_LOCK(ldcp); 3497 3498 /* don't start at 0 in case clients don't like that */ 3499 ldcp->next_ident = 1; 3500 3501 rv = ldc_open(ldcp->ldc_handle); 3502 if (rv != 0) { 3503 DERR(vswp, "%s: ldc_open failed: id(%lld) rv(%d)", 3504 __func__, ldcp->ldc_id, rv); 3505 LDC_EXIT_LOCK(ldcp); 3506 return (1); 3507 } 3508 3509 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 3510 DERR(vswp, "%s: unable to get status", __func__); 3511 LDC_EXIT_LOCK(ldcp); 3512 return (1); 3513 3514 } else if (istatus != LDC_OPEN && istatus != LDC_READY) { 3515 DERR(vswp, "%s: id (%lld) status(%d) is not OPEN/READY", 3516 __func__, ldcp->ldc_id, istatus); 3517 LDC_EXIT_LOCK(ldcp); 3518 return (1); 3519 } 3520 3521 mutex_enter(&ldcp->status_lock); 3522 ldcp->ldc_status = istatus; 3523 mutex_exit(&ldcp->status_lock); 3524 3525 rv = ldc_up(ldcp->ldc_handle); 3526 if (rv != 0) { 3527 /* 3528 * Not a fatal error for ldc_up() to fail, as peer 3529 * end point may simply not be ready yet. 3530 */ 3531 D2(vswp, "%s: ldc_up err id(%lld) rv(%d)", __func__, 3532 ldcp->ldc_id, rv); 3533 LDC_EXIT_LOCK(ldcp); 3534 return (1); 3535 } 3536 3537 /* 3538 * ldc_up() call is non-blocking so need to explicitly 3539 * check channel status to see if in fact the channel 3540 * is UP. 3541 */ 3542 mutex_enter(&ldcp->status_lock); 3543 istatus = ldcp->ldc_status; 3544 if (ldc_status(ldcp->ldc_handle, &ldcp->ldc_status) != 0) { 3545 DERR(vswp, "%s: unable to get status", __func__); 3546 mutex_exit(&ldcp->status_lock); 3547 LDC_EXIT_LOCK(ldcp); 3548 return (1); 3549 3550 } 3551 mutex_exit(&ldcp->status_lock); 3552 LDC_EXIT_LOCK(ldcp); 3553 3554 if ((istatus != LDC_UP) && (ldcp->ldc_status == LDC_UP)) { 3555 D2(vswp, "%s: channel %ld now UP (%ld)", __func__, 3556 ldcp->ldc_id, istatus); 3557 vsw_restart_handshake(ldcp); 3558 } 3559 3560 D1(vswp, "%s: exit", __func__); 3561 return (0); 3562 } 3563 3564 /* disable callbacks on the channel */ 3565 static int 3566 vsw_ldc_uninit(vsw_ldc_t *ldcp) 3567 { 3568 vsw_t *vswp = ldcp->ldc_vswp; 3569 int rv; 3570 3571 D1(vswp, "vsw_ldc_uninit: enter: id(%lx)\n", ldcp->ldc_id); 3572 3573 LDC_ENTER_LOCK(ldcp); 3574 3575 rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE); 3576 if (rv != 0) { 3577 DERR(vswp, "vsw_ldc_uninit(%lld): error disabling " 3578 "interrupts (rv = %d)\n", ldcp->ldc_id, rv); 3579 LDC_EXIT_LOCK(ldcp); 3580 return (1); 3581 } 3582 3583 mutex_enter(&ldcp->status_lock); 3584 ldcp->ldc_status = LDC_INIT; 3585 mutex_exit(&ldcp->status_lock); 3586 3587 LDC_EXIT_LOCK(ldcp); 3588 3589 D1(vswp, "vsw_ldc_uninit: exit: id(%lx)", ldcp->ldc_id); 3590 3591 return (0); 3592 } 3593 3594 static int 3595 vsw_init_ldcs(vsw_port_t *port) 3596 { 3597 vsw_ldc_list_t *ldcl = &port->p_ldclist; 3598 vsw_ldc_t *ldcp; 3599 3600 READ_ENTER(&ldcl->lockrw); 3601 ldcp = ldcl->head; 3602 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 3603 (void) vsw_ldc_init(ldcp); 3604 } 3605 RW_EXIT(&ldcl->lockrw); 3606 3607 return (0); 3608 } 3609 3610 static int 3611 vsw_uninit_ldcs(vsw_port_t *port) 3612 { 3613 vsw_ldc_list_t *ldcl = &port->p_ldclist; 3614 vsw_ldc_t *ldcp; 3615 3616 D1(NULL, "vsw_uninit_ldcs: enter\n"); 3617 3618 READ_ENTER(&ldcl->lockrw); 3619 ldcp = ldcl->head; 3620 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 3621 (void) vsw_ldc_uninit(ldcp); 3622 } 3623 RW_EXIT(&ldcl->lockrw); 3624 3625 D1(NULL, "vsw_uninit_ldcs: exit\n"); 3626 3627 return (0); 3628 } 3629 3630 /* 3631 * Wait until the callback(s) associated with the ldcs under the specified 3632 * port have completed. 3633 * 3634 * Prior to this function being invoked each channel under this port 3635 * should have been quiesced via ldc_set_cb_mode(DISABLE). 3636 * 3637 * A short explaination of what we are doing below.. 3638 * 3639 * The simplest approach would be to have a reference counter in 3640 * the ldc structure which is increment/decremented by the callbacks as 3641 * they use the channel. The drain function could then simply disable any 3642 * further callbacks and do a cv_wait for the ref to hit zero. Unfortunately 3643 * there is a tiny window here - before the callback is able to get the lock 3644 * on the channel it is interrupted and this function gets to execute. It 3645 * sees that the ref count is zero and believes its free to delete the 3646 * associated data structures. 3647 * 3648 * We get around this by taking advantage of the fact that before the ldc 3649 * framework invokes a callback it sets a flag to indicate that there is a 3650 * callback active (or about to become active). If when we attempt to 3651 * unregister a callback when this active flag is set then the unregister 3652 * will fail with EWOULDBLOCK. 3653 * 3654 * If the unregister fails we do a cv_timedwait. We will either be signaled 3655 * by the callback as it is exiting (note we have to wait a short period to 3656 * allow the callback to return fully to the ldc framework and it to clear 3657 * the active flag), or by the timer expiring. In either case we again attempt 3658 * the unregister. We repeat this until we can succesfully unregister the 3659 * callback. 3660 * 3661 * The reason we use a cv_timedwait rather than a simple cv_wait is to catch 3662 * the case where the callback has finished but the ldc framework has not yet 3663 * cleared the active flag. In this case we would never get a cv_signal. 3664 */ 3665 static int 3666 vsw_drain_ldcs(vsw_port_t *port) 3667 { 3668 vsw_ldc_list_t *ldcl = &port->p_ldclist; 3669 vsw_ldc_t *ldcp; 3670 vsw_t *vswp = port->p_vswp; 3671 3672 D1(vswp, "%s: enter", __func__); 3673 3674 READ_ENTER(&ldcl->lockrw); 3675 3676 ldcp = ldcl->head; 3677 3678 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 3679 /* 3680 * If we can unregister the channel callback then we 3681 * know that there is no callback either running or 3682 * scheduled to run for this channel so move on to next 3683 * channel in the list. 3684 */ 3685 mutex_enter(&ldcp->drain_cv_lock); 3686 3687 /* prompt active callbacks to quit */ 3688 ldcp->drain_state = VSW_LDC_DRAINING; 3689 3690 if ((ldc_unreg_callback(ldcp->ldc_handle)) == 0) { 3691 D2(vswp, "%s: unreg callback for chan %ld", __func__, 3692 ldcp->ldc_id); 3693 mutex_exit(&ldcp->drain_cv_lock); 3694 continue; 3695 } else { 3696 /* 3697 * If we end up here we know that either 1) a callback 3698 * is currently executing, 2) is about to start (i.e. 3699 * the ldc framework has set the active flag but 3700 * has not actually invoked the callback yet, or 3) 3701 * has finished and has returned to the ldc framework 3702 * but the ldc framework has not yet cleared the 3703 * active bit. 3704 * 3705 * Wait for it to finish. 3706 */ 3707 while (ldc_unreg_callback(ldcp->ldc_handle) 3708 == EWOULDBLOCK) 3709 (void) cv_timedwait(&ldcp->drain_cv, 3710 &ldcp->drain_cv_lock, lbolt + hz); 3711 3712 mutex_exit(&ldcp->drain_cv_lock); 3713 D2(vswp, "%s: unreg callback for chan %ld after " 3714 "timeout", __func__, ldcp->ldc_id); 3715 } 3716 } 3717 RW_EXIT(&ldcl->lockrw); 3718 3719 D1(vswp, "%s: exit", __func__); 3720 return (0); 3721 } 3722 3723 /* 3724 * Wait until all tasks which reference this port have completed. 3725 * 3726 * Prior to this function being invoked each channel under this port 3727 * should have been quiesced via ldc_set_cb_mode(DISABLE). 3728 */ 3729 static int 3730 vsw_drain_port_taskq(vsw_port_t *port) 3731 { 3732 vsw_t *vswp = port->p_vswp; 3733 3734 D1(vswp, "%s: enter", __func__); 3735 3736 /* 3737 * Mark the port as in the process of being detached, and 3738 * dispatch a marker task to the queue so we know when all 3739 * relevant tasks have completed. 3740 */ 3741 mutex_enter(&port->state_lock); 3742 port->state = VSW_PORT_DETACHING; 3743 3744 if ((vswp->taskq_p == NULL) || 3745 (ddi_taskq_dispatch(vswp->taskq_p, vsw_marker_task, 3746 port, DDI_NOSLEEP) != DDI_SUCCESS)) { 3747 DERR(vswp, "%s: unable to dispatch marker task", 3748 __func__); 3749 mutex_exit(&port->state_lock); 3750 return (1); 3751 } 3752 3753 /* 3754 * Wait for the marker task to finish. 3755 */ 3756 while (port->state != VSW_PORT_DETACHABLE) 3757 cv_wait(&port->state_cv, &port->state_lock); 3758 3759 mutex_exit(&port->state_lock); 3760 3761 D1(vswp, "%s: exit", __func__); 3762 3763 return (0); 3764 } 3765 3766 static void 3767 vsw_marker_task(void *arg) 3768 { 3769 vsw_port_t *port = arg; 3770 vsw_t *vswp = port->p_vswp; 3771 3772 D1(vswp, "%s: enter", __func__); 3773 3774 mutex_enter(&port->state_lock); 3775 3776 /* 3777 * No further tasks should be dispatched which reference 3778 * this port so ok to mark it as safe to detach. 3779 */ 3780 port->state = VSW_PORT_DETACHABLE; 3781 3782 cv_signal(&port->state_cv); 3783 3784 mutex_exit(&port->state_lock); 3785 3786 D1(vswp, "%s: exit", __func__); 3787 } 3788 3789 static vsw_port_t * 3790 vsw_lookup_port(vsw_t *vswp, int p_instance) 3791 { 3792 vsw_port_list_t *plist = &vswp->plist; 3793 vsw_port_t *port; 3794 3795 for (port = plist->head; port != NULL; port = port->p_next) { 3796 if (port->p_instance == p_instance) { 3797 D2(vswp, "vsw_lookup_port: found p_instance\n"); 3798 return (port); 3799 } 3800 } 3801 3802 return (NULL); 3803 } 3804 3805 /* 3806 * Search for and remove the specified port from the port 3807 * list. Returns 0 if able to locate and remove port, otherwise 3808 * returns 1. 3809 */ 3810 static int 3811 vsw_plist_del_node(vsw_t *vswp, vsw_port_t *port) 3812 { 3813 vsw_port_list_t *plist = &vswp->plist; 3814 vsw_port_t *curr_p, *prev_p; 3815 3816 if (plist->head == NULL) 3817 return (1); 3818 3819 curr_p = prev_p = plist->head; 3820 3821 while (curr_p != NULL) { 3822 if (curr_p == port) { 3823 if (prev_p == curr_p) { 3824 plist->head = curr_p->p_next; 3825 } else { 3826 prev_p->p_next = curr_p->p_next; 3827 } 3828 plist->num_ports--; 3829 break; 3830 } else { 3831 prev_p = curr_p; 3832 curr_p = curr_p->p_next; 3833 } 3834 } 3835 return (0); 3836 } 3837 3838 /* 3839 * Interrupt handler for ldc messages. 3840 */ 3841 static uint_t 3842 vsw_ldc_cb(uint64_t event, caddr_t arg) 3843 { 3844 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 3845 vsw_t *vswp = ldcp->ldc_vswp; 3846 ldc_status_t lstatus; 3847 int rv; 3848 3849 D1(vswp, "%s: enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 3850 3851 mutex_enter(&ldcp->ldc_cblock); 3852 3853 if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) { 3854 mutex_exit(&ldcp->ldc_cblock); 3855 return (LDC_SUCCESS); 3856 } 3857 3858 mutex_enter(&ldcp->status_lock); 3859 lstatus = ldcp->ldc_status; 3860 rv = ldc_status(ldcp->ldc_handle, &ldcp->ldc_status); 3861 mutex_exit(&ldcp->status_lock); 3862 if (rv != 0) { 3863 cmn_err(CE_WARN, "!vsw%d: Unable to read channel state", 3864 vswp->instance); 3865 goto vsw_cb_exit; 3866 } 3867 3868 if (event & LDC_EVT_UP) { 3869 /* 3870 * Channel has come up, get the state and then start 3871 * the handshake. 3872 */ 3873 D2(vswp, "%s: id(%ld) event(%llx) UP: status(%ld)", 3874 __func__, ldcp->ldc_id, event, lstatus); 3875 D2(vswp, "%s: UP: old status %ld : cur status %ld", 3876 __func__, lstatus, ldcp->ldc_status); 3877 if ((ldcp->ldc_status != lstatus) && 3878 (ldcp->ldc_status == LDC_UP)) { 3879 ldcp->reset_active = 0; 3880 vsw_restart_handshake(ldcp); 3881 } 3882 3883 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 3884 } 3885 3886 if (event & LDC_EVT_READ) { 3887 /* 3888 * Data available for reading. 3889 */ 3890 D2(vswp, "%s: id(ld) event(%llx) data READ", 3891 __func__, ldcp->ldc_id, event); 3892 3893 vsw_process_pkt(ldcp); 3894 3895 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 3896 3897 goto vsw_cb_exit; 3898 } 3899 3900 if (event & (LDC_EVT_DOWN | LDC_EVT_RESET)) { 3901 D2(vswp, "%s: id(%ld) event(%llx) DOWN/RESET", 3902 __func__, ldcp->ldc_id, event); 3903 3904 /* attempt to restart the connection */ 3905 vsw_restart_ldc(ldcp); 3906 3907 /* 3908 * vsw_restart_ldc() will attempt to bring the channel 3909 * back up. Check here to see if that succeeded. 3910 */ 3911 mutex_enter(&ldcp->status_lock); 3912 lstatus = ldcp->ldc_status; 3913 rv = ldc_status(ldcp->ldc_handle, &ldcp->ldc_status); 3914 mutex_exit(&ldcp->status_lock); 3915 if (rv != 0) { 3916 DERR(vswp, "%s: unable to read status for channel %ld", 3917 __func__, ldcp->ldc_id); 3918 goto vsw_cb_exit; 3919 } 3920 3921 D2(vswp, "%s: id(%ld) event(%llx) DOWN/RESET event:" 3922 " old status %ld : cur status %ld", __func__, 3923 ldcp->ldc_id, event, lstatus, ldcp->ldc_status); 3924 3925 /* 3926 * If channel was not previously UP then (re)start the 3927 * handshake. 3928 */ 3929 if ((ldcp->ldc_status == LDC_UP) && (lstatus != LDC_UP)) { 3930 D2(vswp, "%s: channel %ld now UP, restarting " 3931 "handshake", __func__, ldcp->ldc_id); 3932 ldcp->reset_active = 0; 3933 vsw_restart_handshake(ldcp); 3934 } 3935 } 3936 3937 /* 3938 * Catch either LDC_EVT_WRITE which we don't support or any 3939 * unknown event. 3940 */ 3941 if (event & ~(LDC_EVT_UP | LDC_EVT_RESET 3942 | LDC_EVT_DOWN | LDC_EVT_READ)) { 3943 3944 DERR(vswp, "%s: id(%ld) Unexpected event=(%llx) status(%ld)", 3945 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 3946 } 3947 3948 vsw_cb_exit: 3949 mutex_exit(&ldcp->ldc_cblock); 3950 3951 /* 3952 * Let the drain function know we are finishing if it 3953 * is waiting. 3954 */ 3955 mutex_enter(&ldcp->drain_cv_lock); 3956 if (ldcp->drain_state == VSW_LDC_DRAINING) 3957 cv_signal(&ldcp->drain_cv); 3958 mutex_exit(&ldcp->drain_cv_lock); 3959 3960 return (LDC_SUCCESS); 3961 } 3962 3963 /* 3964 * Restart the connection with our peer. Free any existing 3965 * data structures and then attempt to bring channel back 3966 * up. 3967 */ 3968 static void 3969 vsw_restart_ldc(vsw_ldc_t *ldcp) 3970 { 3971 int rv; 3972 vsw_t *vswp = ldcp->ldc_vswp; 3973 vsw_port_t *port; 3974 vsw_ldc_list_t *ldcl; 3975 3976 D1(vswp, "%s: enter", __func__); 3977 3978 /* 3979 * Check if reset already in progress for this channel. 3980 */ 3981 if (ldstub((uint8_t *)&ldcp->reset_active)) 3982 return; 3983 3984 port = ldcp->ldc_port; 3985 ldcl = &port->p_ldclist; 3986 3987 READ_ENTER(&ldcl->lockrw); 3988 3989 D2(vswp, "%s: in 0x%llx : out 0x%llx", __func__, 3990 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 3991 3992 vsw_free_lane_resources(ldcp, INBOUND); 3993 vsw_free_lane_resources(ldcp, OUTBOUND); 3994 RW_EXIT(&ldcl->lockrw); 3995 3996 ldcp->lane_in.lstate = 0; 3997 ldcp->lane_out.lstate = 0; 3998 3999 /* 4000 * Remove parent port from any multicast groups 4001 * it may have registered with. Client must resend 4002 * multicast add command after handshake completes. 4003 */ 4004 (void) vsw_del_fdb(vswp, port); 4005 4006 vsw_del_mcst_port(port); 4007 4008 ldcp->peer_session = 0; 4009 ldcp->session_status = 0; 4010 ldcp->hcnt = 0; 4011 ldcp->hphase = VSW_MILESTONE0; 4012 4013 rv = ldc_up(ldcp->ldc_handle); 4014 if (rv != 0) { 4015 /* 4016 * Not a fatal error for ldc_up() to fail, as peer 4017 * end point may simply not be ready yet. 4018 */ 4019 D2(vswp, "%s: ldc_up err id(%lld) rv(%d)", __func__, 4020 ldcp->ldc_id, rv); 4021 } 4022 4023 D1(vswp, "%s: exit", __func__); 4024 } 4025 4026 /* 4027 * (Re)start a handshake with our peer by sending them 4028 * our version info. 4029 */ 4030 static void 4031 vsw_restart_handshake(vsw_ldc_t *ldcp) 4032 { 4033 vsw_t *vswp = ldcp->ldc_vswp; 4034 4035 D1(vswp, "vsw_restart_handshake: enter"); 4036 4037 if (ldcp->hphase != VSW_MILESTONE0) { 4038 vsw_restart_ldc(ldcp); 4039 } 4040 4041 /* 4042 * We now increment the transaction group id. This allows 4043 * us to identify and disard any tasks which are still pending 4044 * on the taskq and refer to the handshake session we are about 4045 * to restart. These stale messages no longer have any real 4046 * meaning. 4047 */ 4048 mutex_enter(&ldcp->hss_lock); 4049 ldcp->hss_id++; 4050 mutex_exit(&ldcp->hss_lock); 4051 4052 if (ldcp->hcnt++ > vsw_num_handshakes) { 4053 cmn_err(CE_WARN, "!vsw%d: exceeded number of permitted " 4054 "handshake attempts (%d) on channel %ld", 4055 vswp->instance, ldcp->hcnt, ldcp->ldc_id); 4056 return; 4057 } 4058 4059 if ((vswp->taskq_p == NULL) || 4060 (ddi_taskq_dispatch(vswp->taskq_p, vsw_send_ver, ldcp, 4061 DDI_NOSLEEP) != DDI_SUCCESS)) { 4062 cmn_err(CE_WARN, "!vsw%d: Can't dispatch version handshake " 4063 "task", vswp->instance); 4064 } 4065 4066 D1(vswp, "vsw_restart_handshake: exit"); 4067 } 4068 4069 /* 4070 * Deal appropriately with a ECONNRESET event encountered in a ldc_* 4071 * call. 4072 */ 4073 static void 4074 vsw_handle_reset(vsw_ldc_t *ldcp) 4075 { 4076 vsw_t *vswp = ldcp->ldc_vswp; 4077 ldc_status_t lstatus; 4078 4079 D1(vswp, "%s: enter", __func__); 4080 4081 mutex_enter(&ldcp->status_lock); 4082 lstatus = ldcp->ldc_status; 4083 if (ldc_status(ldcp->ldc_handle, &ldcp->ldc_status) != 0) { 4084 DERR(vswp, "%s: unable to read status for channel %ld", 4085 __func__, ldcp->ldc_id); 4086 mutex_exit(&ldcp->status_lock); 4087 return; 4088 } 4089 mutex_exit(&ldcp->status_lock); 4090 4091 /* 4092 * Check the channel's previous recorded state to 4093 * determine if this is the first ECONNRESET event 4094 * we've gotten for this particular channel (i.e. was 4095 * previously up but is no longer). If so, terminate 4096 * the channel. 4097 */ 4098 if ((ldcp->ldc_status != LDC_UP) && (lstatus == LDC_UP)) { 4099 vsw_restart_ldc(ldcp); 4100 } 4101 4102 /* 4103 * vsw_restart_ldc() will also attempt to bring channel 4104 * back up. Check here if that succeeds. 4105 */ 4106 mutex_enter(&ldcp->status_lock); 4107 lstatus = ldcp->ldc_status; 4108 if (ldc_status(ldcp->ldc_handle, &ldcp->ldc_status) != 0) { 4109 DERR(vswp, "%s: unable to read status for channel %ld", 4110 __func__, ldcp->ldc_id); 4111 mutex_exit(&ldcp->status_lock); 4112 return; 4113 } 4114 mutex_exit(&ldcp->status_lock); 4115 4116 /* 4117 * If channel is now up and no one else (i.e. the callback routine) 4118 * has dealt with it then we restart the handshake here. 4119 */ 4120 if ((lstatus != LDC_UP) && (ldcp->ldc_status == LDC_UP)) { 4121 ldcp->reset_active = 0; 4122 vsw_restart_handshake(ldcp); 4123 } 4124 4125 D1(vswp, "%s: exit", __func__); 4126 } 4127 4128 /* 4129 * returns 0 if legal for event signified by flag to have 4130 * occured at the time it did. Otherwise returns 1. 4131 */ 4132 int 4133 vsw_check_flag(vsw_ldc_t *ldcp, int dir, uint64_t flag) 4134 { 4135 vsw_t *vswp = ldcp->ldc_vswp; 4136 uint64_t state; 4137 uint64_t phase; 4138 4139 if (dir == INBOUND) 4140 state = ldcp->lane_in.lstate; 4141 else 4142 state = ldcp->lane_out.lstate; 4143 4144 phase = ldcp->hphase; 4145 4146 switch (flag) { 4147 case VSW_VER_INFO_RECV: 4148 if (phase > VSW_MILESTONE0) { 4149 DERR(vswp, "vsw_check_flag (%d): VER_INFO_RECV" 4150 " when in state %d\n", ldcp->ldc_id, phase); 4151 vsw_restart_handshake(ldcp); 4152 return (1); 4153 } 4154 break; 4155 4156 case VSW_VER_ACK_RECV: 4157 case VSW_VER_NACK_RECV: 4158 if (!(state & VSW_VER_INFO_SENT)) { 4159 DERR(vswp, "vsw_check_flag (%d): spurious VER_ACK" 4160 " or VER_NACK when in state %d\n", 4161 ldcp->ldc_id, phase); 4162 vsw_restart_handshake(ldcp); 4163 return (1); 4164 } else 4165 state &= ~VSW_VER_INFO_SENT; 4166 break; 4167 4168 case VSW_ATTR_INFO_RECV: 4169 if ((phase < VSW_MILESTONE1) || (phase >= VSW_MILESTONE2)) { 4170 DERR(vswp, "vsw_check_flag (%d): ATTR_INFO_RECV" 4171 " when in state %d\n", ldcp->ldc_id, phase); 4172 vsw_restart_handshake(ldcp); 4173 return (1); 4174 } 4175 break; 4176 4177 case VSW_ATTR_ACK_RECV: 4178 case VSW_ATTR_NACK_RECV: 4179 if (!(state & VSW_ATTR_INFO_SENT)) { 4180 DERR(vswp, "vsw_check_flag (%d): spurious ATTR_ACK" 4181 " or ATTR_NACK when in state %d\n", 4182 ldcp->ldc_id, phase); 4183 vsw_restart_handshake(ldcp); 4184 return (1); 4185 } else 4186 state &= ~VSW_ATTR_INFO_SENT; 4187 break; 4188 4189 case VSW_DRING_INFO_RECV: 4190 if (phase < VSW_MILESTONE1) { 4191 DERR(vswp, "vsw_check_flag (%d): DRING_INFO_RECV" 4192 " when in state %d\n", ldcp->ldc_id, phase); 4193 vsw_restart_handshake(ldcp); 4194 return (1); 4195 } 4196 break; 4197 4198 case VSW_DRING_ACK_RECV: 4199 case VSW_DRING_NACK_RECV: 4200 if (!(state & VSW_DRING_INFO_SENT)) { 4201 DERR(vswp, "vsw_check_flag (%d): spurious DRING_ACK" 4202 " or DRING_NACK when in state %d\n", 4203 ldcp->ldc_id, phase); 4204 vsw_restart_handshake(ldcp); 4205 return (1); 4206 } else 4207 state &= ~VSW_DRING_INFO_SENT; 4208 break; 4209 4210 case VSW_RDX_INFO_RECV: 4211 if (phase < VSW_MILESTONE3) { 4212 DERR(vswp, "vsw_check_flag (%d): RDX_INFO_RECV" 4213 " when in state %d\n", ldcp->ldc_id, phase); 4214 vsw_restart_handshake(ldcp); 4215 return (1); 4216 } 4217 break; 4218 4219 case VSW_RDX_ACK_RECV: 4220 case VSW_RDX_NACK_RECV: 4221 if (!(state & VSW_RDX_INFO_SENT)) { 4222 DERR(vswp, "vsw_check_flag (%d): spurious RDX_ACK" 4223 " or RDX_NACK when in state %d\n", 4224 ldcp->ldc_id, phase); 4225 vsw_restart_handshake(ldcp); 4226 return (1); 4227 } else 4228 state &= ~VSW_RDX_INFO_SENT; 4229 break; 4230 4231 case VSW_MCST_INFO_RECV: 4232 if (phase < VSW_MILESTONE3) { 4233 DERR(vswp, "vsw_check_flag (%d): VSW_MCST_INFO_RECV" 4234 " when in state %d\n", ldcp->ldc_id, phase); 4235 vsw_restart_handshake(ldcp); 4236 return (1); 4237 } 4238 break; 4239 4240 default: 4241 DERR(vswp, "vsw_check_flag (%lld): unknown flag (%llx)", 4242 ldcp->ldc_id, flag); 4243 return (1); 4244 } 4245 4246 if (dir == INBOUND) 4247 ldcp->lane_in.lstate = state; 4248 else 4249 ldcp->lane_out.lstate = state; 4250 4251 D1(vswp, "vsw_check_flag (chan %lld): exit", ldcp->ldc_id); 4252 4253 return (0); 4254 } 4255 4256 void 4257 vsw_next_milestone(vsw_ldc_t *ldcp) 4258 { 4259 vsw_t *vswp = ldcp->ldc_vswp; 4260 4261 D1(vswp, "%s (chan %lld): enter (phase %ld)", __func__, 4262 ldcp->ldc_id, ldcp->hphase); 4263 4264 DUMP_FLAGS(ldcp->lane_in.lstate); 4265 DUMP_FLAGS(ldcp->lane_out.lstate); 4266 4267 switch (ldcp->hphase) { 4268 4269 case VSW_MILESTONE0: 4270 /* 4271 * If we haven't started to handshake with our peer, 4272 * start to do so now. 4273 */ 4274 if (ldcp->lane_out.lstate == 0) { 4275 D2(vswp, "%s: (chan %lld) starting handshake " 4276 "with peer", __func__, ldcp->ldc_id); 4277 vsw_restart_handshake(ldcp); 4278 } 4279 4280 /* 4281 * Only way to pass this milestone is to have successfully 4282 * negotiated version info. 4283 */ 4284 if ((ldcp->lane_in.lstate & VSW_VER_ACK_SENT) && 4285 (ldcp->lane_out.lstate & VSW_VER_ACK_RECV)) { 4286 4287 D2(vswp, "%s: (chan %lld) leaving milestone 0", 4288 __func__, ldcp->ldc_id); 4289 4290 /* 4291 * Next milestone is passed when attribute 4292 * information has been successfully exchanged. 4293 */ 4294 ldcp->hphase = VSW_MILESTONE1; 4295 vsw_send_attr(ldcp); 4296 4297 } 4298 break; 4299 4300 case VSW_MILESTONE1: 4301 /* 4302 * Only way to pass this milestone is to have successfully 4303 * negotiated attribute information. 4304 */ 4305 if (ldcp->lane_in.lstate & VSW_ATTR_ACK_SENT) { 4306 4307 ldcp->hphase = VSW_MILESTONE2; 4308 4309 /* 4310 * If the peer device has said it wishes to 4311 * use descriptor rings then we send it our ring 4312 * info, otherwise we just set up a private ring 4313 * which we use an internal buffer 4314 */ 4315 if (ldcp->lane_in.xfer_mode == VIO_DRING_MODE) 4316 vsw_send_dring_info(ldcp); 4317 } 4318 break; 4319 4320 case VSW_MILESTONE2: 4321 /* 4322 * If peer has indicated in its attribute message that 4323 * it wishes to use descriptor rings then the only way 4324 * to pass this milestone is for us to have received 4325 * valid dring info. 4326 * 4327 * If peer is not using descriptor rings then just fall 4328 * through. 4329 */ 4330 if ((ldcp->lane_in.xfer_mode == VIO_DRING_MODE) && 4331 (!(ldcp->lane_in.lstate & VSW_DRING_ACK_SENT))) 4332 break; 4333 4334 D2(vswp, "%s: (chan %lld) leaving milestone 2", 4335 __func__, ldcp->ldc_id); 4336 4337 ldcp->hphase = VSW_MILESTONE3; 4338 vsw_send_rdx(ldcp); 4339 break; 4340 4341 case VSW_MILESTONE3: 4342 /* 4343 * Pass this milestone when all paramaters have been 4344 * successfully exchanged and RDX sent in both directions. 4345 * 4346 * Mark outbound lane as available to transmit data. 4347 */ 4348 if ((ldcp->lane_in.lstate & VSW_RDX_ACK_SENT) && 4349 (ldcp->lane_out.lstate & VSW_RDX_ACK_RECV)) { 4350 4351 D2(vswp, "%s: (chan %lld) leaving milestone 3", 4352 __func__, ldcp->ldc_id); 4353 D2(vswp, "%s: ** handshake complete (0x%llx : " 4354 "0x%llx) **", __func__, ldcp->lane_in.lstate, 4355 ldcp->lane_out.lstate); 4356 ldcp->lane_out.lstate |= VSW_LANE_ACTIVE; 4357 ldcp->hphase = VSW_MILESTONE4; 4358 ldcp->hcnt = 0; 4359 DISPLAY_STATE(); 4360 } else { 4361 D2(vswp, "%s: still in milestone 3 (0x%llx :" 4362 " 0x%llx", __func__, ldcp->lane_in.lstate, 4363 ldcp->lane_out.lstate); 4364 } 4365 break; 4366 4367 case VSW_MILESTONE4: 4368 D2(vswp, "%s: (chan %lld) in milestone 4", __func__, 4369 ldcp->ldc_id); 4370 break; 4371 4372 default: 4373 DERR(vswp, "%s: (chan %lld) Unknown Phase %x", __func__, 4374 ldcp->ldc_id, ldcp->hphase); 4375 } 4376 4377 D1(vswp, "%s (chan %lld): exit (phase %ld)", __func__, ldcp->ldc_id, 4378 ldcp->hphase); 4379 } 4380 4381 /* 4382 * Check if major version is supported. 4383 * 4384 * Returns 0 if finds supported major number, and if necessary 4385 * adjusts the minor field. 4386 * 4387 * Returns 1 if can't match major number exactly. Sets mjor/minor 4388 * to next lowest support values, or to zero if no other values possible. 4389 */ 4390 static int 4391 vsw_supported_version(vio_ver_msg_t *vp) 4392 { 4393 int i; 4394 4395 D1(NULL, "vsw_supported_version: enter"); 4396 4397 for (i = 0; i < VSW_NUM_VER; i++) { 4398 if (vsw_versions[i].ver_major == vp->ver_major) { 4399 /* 4400 * Matching or lower major version found. Update 4401 * minor number if necessary. 4402 */ 4403 if (vp->ver_minor > vsw_versions[i].ver_minor) { 4404 D2(NULL, "%s: adjusting minor value" 4405 " from %d to %d", __func__, 4406 vp->ver_minor, 4407 vsw_versions[i].ver_minor); 4408 vp->ver_minor = vsw_versions[i].ver_minor; 4409 } 4410 4411 return (0); 4412 } 4413 4414 if (vsw_versions[i].ver_major < vp->ver_major) { 4415 if (vp->ver_minor > vsw_versions[i].ver_minor) { 4416 D2(NULL, "%s: adjusting minor value" 4417 " from %d to %d", __func__, 4418 vp->ver_minor, 4419 vsw_versions[i].ver_minor); 4420 vp->ver_minor = vsw_versions[i].ver_minor; 4421 } 4422 return (1); 4423 } 4424 } 4425 4426 /* No match was possible, zero out fields */ 4427 vp->ver_major = 0; 4428 vp->ver_minor = 0; 4429 4430 D1(NULL, "vsw_supported_version: exit"); 4431 4432 return (1); 4433 } 4434 4435 /* 4436 * Main routine for processing messages received over LDC. 4437 */ 4438 static void 4439 vsw_process_pkt(void *arg) 4440 { 4441 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4442 vsw_t *vswp = ldcp->ldc_vswp; 4443 size_t msglen; 4444 vio_msg_tag_t tag; 4445 def_msg_t dmsg; 4446 int rv = 0; 4447 4448 4449 D1(vswp, "%s enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 4450 4451 /* 4452 * If channel is up read messages until channel is empty. 4453 */ 4454 do { 4455 msglen = sizeof (dmsg); 4456 rv = ldc_read(ldcp->ldc_handle, (caddr_t)&dmsg, &msglen); 4457 4458 if (rv != 0) { 4459 DERR(vswp, "%s :ldc_read err id(%lld) rv(%d) " 4460 "len(%d)\n", __func__, ldcp->ldc_id, 4461 rv, msglen); 4462 } 4463 4464 /* channel has been reset */ 4465 if (rv == ECONNRESET) { 4466 vsw_handle_reset(ldcp); 4467 break; 4468 } 4469 4470 if (msglen == 0) { 4471 D2(vswp, "%s: ldc_read id(%lld) NODATA", __func__, 4472 ldcp->ldc_id); 4473 break; 4474 } 4475 4476 D2(vswp, "%s: ldc_read id(%lld): msglen(%d)", __func__, 4477 ldcp->ldc_id, msglen); 4478 4479 /* 4480 * Figure out what sort of packet we have gotten by 4481 * examining the msg tag, and then switch it appropriately. 4482 */ 4483 bcopy(&dmsg, &tag, sizeof (vio_msg_tag_t)); 4484 4485 switch (tag.vio_msgtype) { 4486 case VIO_TYPE_CTRL: 4487 vsw_dispatch_ctrl_task(ldcp, &dmsg, tag); 4488 break; 4489 case VIO_TYPE_DATA: 4490 vsw_process_data_pkt(ldcp, &dmsg, tag); 4491 break; 4492 case VIO_TYPE_ERR: 4493 vsw_process_err_pkt(ldcp, &dmsg, tag); 4494 break; 4495 default: 4496 DERR(vswp, "%s: Unknown tag(%lx) ", __func__, 4497 "id(%lx)\n", tag.vio_msgtype, ldcp->ldc_id); 4498 break; 4499 } 4500 } while (msglen); 4501 4502 D1(vswp, "%s exit: ldcid (%lld)\n", __func__, ldcp->ldc_id); 4503 } 4504 4505 /* 4506 * Dispatch a task to process a VIO control message. 4507 */ 4508 static void 4509 vsw_dispatch_ctrl_task(vsw_ldc_t *ldcp, void *cpkt, vio_msg_tag_t tag) 4510 { 4511 vsw_ctrl_task_t *ctaskp = NULL; 4512 vsw_port_t *port = ldcp->ldc_port; 4513 vsw_t *vswp = port->p_vswp; 4514 4515 D1(vswp, "%s: enter", __func__); 4516 4517 /* 4518 * We need to handle RDX ACK messages in-band as once they 4519 * are exchanged it is possible that we will get an 4520 * immediate (legitimate) data packet. 4521 */ 4522 if ((tag.vio_subtype_env == VIO_RDX) && 4523 (tag.vio_subtype == VIO_SUBTYPE_ACK)) { 4524 4525 if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_ACK_RECV)) 4526 return; 4527 4528 ldcp->lane_out.lstate |= VSW_RDX_ACK_RECV; 4529 D2(vswp, "%s (%ld) handling RDX_ACK in place " 4530 "(ostate 0x%llx : hphase %d)", __func__, 4531 ldcp->ldc_id, ldcp->lane_out.lstate, ldcp->hphase); 4532 vsw_next_milestone(ldcp); 4533 return; 4534 } 4535 4536 ctaskp = kmem_alloc(sizeof (vsw_ctrl_task_t), KM_NOSLEEP); 4537 4538 if (ctaskp == NULL) { 4539 DERR(vswp, "%s: unable to alloc space for ctrl" 4540 " msg", __func__); 4541 vsw_restart_handshake(ldcp); 4542 return; 4543 } 4544 4545 ctaskp->ldcp = ldcp; 4546 bcopy((def_msg_t *)cpkt, &ctaskp->pktp, sizeof (def_msg_t)); 4547 mutex_enter(&ldcp->hss_lock); 4548 ctaskp->hss_id = ldcp->hss_id; 4549 mutex_exit(&ldcp->hss_lock); 4550 4551 /* 4552 * Dispatch task to processing taskq if port is not in 4553 * the process of being detached. 4554 */ 4555 mutex_enter(&port->state_lock); 4556 if (port->state == VSW_PORT_INIT) { 4557 if ((vswp->taskq_p == NULL) || 4558 (ddi_taskq_dispatch(vswp->taskq_p, 4559 vsw_process_ctrl_pkt, ctaskp, DDI_NOSLEEP) 4560 != DDI_SUCCESS)) { 4561 DERR(vswp, "%s: unable to dispatch task to taskq", 4562 __func__); 4563 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 4564 mutex_exit(&port->state_lock); 4565 vsw_restart_handshake(ldcp); 4566 return; 4567 } 4568 } else { 4569 DWARN(vswp, "%s: port %d detaching, not dispatching " 4570 "task", __func__, port->p_instance); 4571 } 4572 4573 mutex_exit(&port->state_lock); 4574 4575 D2(vswp, "%s: dispatched task to taskq for chan %d", __func__, 4576 ldcp->ldc_id); 4577 D1(vswp, "%s: exit", __func__); 4578 } 4579 4580 /* 4581 * Process a VIO ctrl message. Invoked from taskq. 4582 */ 4583 static void 4584 vsw_process_ctrl_pkt(void *arg) 4585 { 4586 vsw_ctrl_task_t *ctaskp = (vsw_ctrl_task_t *)arg; 4587 vsw_ldc_t *ldcp = ctaskp->ldcp; 4588 vsw_t *vswp = ldcp->ldc_vswp; 4589 vio_msg_tag_t tag; 4590 uint16_t env; 4591 4592 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4593 4594 bcopy(&ctaskp->pktp, &tag, sizeof (vio_msg_tag_t)); 4595 env = tag.vio_subtype_env; 4596 4597 /* stale pkt check */ 4598 mutex_enter(&ldcp->hss_lock); 4599 if (ctaskp->hss_id < ldcp->hss_id) { 4600 DWARN(vswp, "%s: discarding stale packet belonging to" 4601 " earlier (%ld) handshake session", __func__, 4602 ctaskp->hss_id); 4603 mutex_exit(&ldcp->hss_lock); 4604 return; 4605 } 4606 mutex_exit(&ldcp->hss_lock); 4607 4608 /* session id check */ 4609 if (ldcp->session_status & VSW_PEER_SESSION) { 4610 if (ldcp->peer_session != tag.vio_sid) { 4611 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 4612 __func__, ldcp->ldc_id, tag.vio_sid); 4613 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 4614 vsw_restart_handshake(ldcp); 4615 return; 4616 } 4617 } 4618 4619 /* 4620 * Switch on vio_subtype envelope, then let lower routines 4621 * decide if its an INFO, ACK or NACK packet. 4622 */ 4623 switch (env) { 4624 case VIO_VER_INFO: 4625 vsw_process_ctrl_ver_pkt(ldcp, &ctaskp->pktp); 4626 break; 4627 case VIO_DRING_REG: 4628 vsw_process_ctrl_dring_reg_pkt(ldcp, &ctaskp->pktp); 4629 break; 4630 case VIO_DRING_UNREG: 4631 vsw_process_ctrl_dring_unreg_pkt(ldcp, &ctaskp->pktp); 4632 break; 4633 case VIO_ATTR_INFO: 4634 vsw_process_ctrl_attr_pkt(ldcp, &ctaskp->pktp); 4635 break; 4636 case VNET_MCAST_INFO: 4637 vsw_process_ctrl_mcst_pkt(ldcp, &ctaskp->pktp); 4638 break; 4639 case VIO_RDX: 4640 vsw_process_ctrl_rdx_pkt(ldcp, &ctaskp->pktp); 4641 break; 4642 default: 4643 DERR(vswp, "%s : unknown vio_subtype_env (%x)\n", 4644 __func__, env); 4645 } 4646 4647 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 4648 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4649 } 4650 4651 /* 4652 * Version negotiation. We can end up here either because our peer 4653 * has responded to a handshake message we have sent it, or our peer 4654 * has initiated a handshake with us. If its the former then can only 4655 * be ACK or NACK, if its the later can only be INFO. 4656 * 4657 * If its an ACK we move to the next stage of the handshake, namely 4658 * attribute exchange. If its a NACK we see if we can specify another 4659 * version, if we can't we stop. 4660 * 4661 * If it is an INFO we reset all params associated with communication 4662 * in that direction over this channel (remember connection is 4663 * essentially 2 independent simplex channels). 4664 */ 4665 void 4666 vsw_process_ctrl_ver_pkt(vsw_ldc_t *ldcp, void *pkt) 4667 { 4668 vio_ver_msg_t *ver_pkt; 4669 vsw_t *vswp = ldcp->ldc_vswp; 4670 4671 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4672 4673 /* 4674 * We know this is a ctrl/version packet so 4675 * cast it into the correct structure. 4676 */ 4677 ver_pkt = (vio_ver_msg_t *)pkt; 4678 4679 switch (ver_pkt->tag.vio_subtype) { 4680 case VIO_SUBTYPE_INFO: 4681 D2(vswp, "vsw_process_ctrl_ver_pkt: VIO_SUBTYPE_INFO\n"); 4682 4683 /* 4684 * Record the session id, which we will use from now 4685 * until we see another VER_INFO msg. Even then the 4686 * session id in most cases will be unchanged, execpt 4687 * if channel was reset. 4688 */ 4689 if ((ldcp->session_status & VSW_PEER_SESSION) && 4690 (ldcp->peer_session != ver_pkt->tag.vio_sid)) { 4691 DERR(vswp, "%s: updating session id for chan %lld " 4692 "from %llx to %llx", __func__, ldcp->ldc_id, 4693 ldcp->peer_session, ver_pkt->tag.vio_sid); 4694 } 4695 4696 ldcp->peer_session = ver_pkt->tag.vio_sid; 4697 ldcp->session_status |= VSW_PEER_SESSION; 4698 4699 /* Legal message at this time ? */ 4700 if (vsw_check_flag(ldcp, INBOUND, VSW_VER_INFO_RECV)) 4701 return; 4702 4703 /* 4704 * First check the device class. Currently only expect 4705 * to be talking to a network device. In the future may 4706 * also talk to another switch. 4707 */ 4708 if (ver_pkt->dev_class != VDEV_NETWORK) { 4709 DERR(vswp, "%s: illegal device class %d", __func__, 4710 ver_pkt->dev_class); 4711 4712 ver_pkt->tag.vio_sid = ldcp->local_session; 4713 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 4714 4715 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 4716 4717 vsw_send_msg(ldcp, (void *)ver_pkt, 4718 sizeof (vio_ver_msg_t)); 4719 4720 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 4721 vsw_next_milestone(ldcp); 4722 return; 4723 } else { 4724 ldcp->dev_class = ver_pkt->dev_class; 4725 } 4726 4727 /* 4728 * Now check the version. 4729 */ 4730 if (vsw_supported_version(ver_pkt) == 0) { 4731 /* 4732 * Support this major version and possibly 4733 * adjusted minor version. 4734 */ 4735 4736 D2(vswp, "%s: accepted ver %d:%d", __func__, 4737 ver_pkt->ver_major, ver_pkt->ver_minor); 4738 4739 /* Store accepted values */ 4740 ldcp->lane_in.ver_major = ver_pkt->ver_major; 4741 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 4742 4743 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 4744 4745 ldcp->lane_in.lstate |= VSW_VER_ACK_SENT; 4746 } else { 4747 /* 4748 * NACK back with the next lower major/minor 4749 * pairing we support (if don't suuport any more 4750 * versions then they will be set to zero. 4751 */ 4752 4753 D2(vswp, "%s: replying with ver %d:%d", __func__, 4754 ver_pkt->ver_major, ver_pkt->ver_minor); 4755 4756 /* Store updated values */ 4757 ldcp->lane_in.ver_major = ver_pkt->ver_major; 4758 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 4759 4760 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 4761 4762 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 4763 } 4764 4765 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 4766 ver_pkt->tag.vio_sid = ldcp->local_session; 4767 vsw_send_msg(ldcp, (void *)ver_pkt, sizeof (vio_ver_msg_t)); 4768 4769 vsw_next_milestone(ldcp); 4770 break; 4771 4772 case VIO_SUBTYPE_ACK: 4773 D2(vswp, "%s: VIO_SUBTYPE_ACK\n", __func__); 4774 4775 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_ACK_RECV)) 4776 return; 4777 4778 /* Store updated values */ 4779 ldcp->lane_in.ver_major = ver_pkt->ver_major; 4780 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 4781 4782 4783 ldcp->lane_out.lstate |= VSW_VER_ACK_RECV; 4784 vsw_next_milestone(ldcp); 4785 4786 break; 4787 4788 case VIO_SUBTYPE_NACK: 4789 D2(vswp, "%s: VIO_SUBTYPE_NACK\n", __func__); 4790 4791 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_NACK_RECV)) 4792 return; 4793 4794 /* 4795 * If our peer sent us a NACK with the ver fields set to 4796 * zero then there is nothing more we can do. Otherwise see 4797 * if we support either the version suggested, or a lesser 4798 * one. 4799 */ 4800 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 4801 DERR(vswp, "%s: peer unable to negotiate any " 4802 "further.", __func__); 4803 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 4804 vsw_next_milestone(ldcp); 4805 return; 4806 } 4807 4808 /* 4809 * Check to see if we support this major version or 4810 * a lower one. If we don't then maj/min will be set 4811 * to zero. 4812 */ 4813 (void) vsw_supported_version(ver_pkt); 4814 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 4815 /* Nothing more we can do */ 4816 DERR(vswp, "%s: version negotiation failed.\n", 4817 __func__); 4818 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 4819 vsw_next_milestone(ldcp); 4820 } else { 4821 /* found a supported major version */ 4822 ldcp->lane_out.ver_major = ver_pkt->ver_major; 4823 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 4824 4825 D2(vswp, "%s: resending with updated values (%x, %x)", 4826 __func__, ver_pkt->ver_major, 4827 ver_pkt->ver_minor); 4828 4829 ldcp->lane_out.lstate |= VSW_VER_INFO_SENT; 4830 ver_pkt->tag.vio_sid = ldcp->local_session; 4831 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 4832 4833 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 4834 4835 vsw_send_msg(ldcp, (void *)ver_pkt, 4836 sizeof (vio_ver_msg_t)); 4837 4838 vsw_next_milestone(ldcp); 4839 4840 } 4841 break; 4842 4843 default: 4844 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 4845 ver_pkt->tag.vio_subtype); 4846 } 4847 4848 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 4849 } 4850 4851 /* 4852 * Process an attribute packet. We can end up here either because our peer 4853 * has ACK/NACK'ed back to an earlier ATTR msg we had sent it, or our 4854 * peer has sent us an attribute INFO message 4855 * 4856 * If its an ACK we then move to the next stage of the handshake which 4857 * is to send our descriptor ring info to our peer. If its a NACK then 4858 * there is nothing more we can (currently) do. 4859 * 4860 * If we get a valid/acceptable INFO packet (and we have already negotiated 4861 * a version) we ACK back and set channel state to ATTR_RECV, otherwise we 4862 * NACK back and reset channel state to INACTIV. 4863 * 4864 * FUTURE: in time we will probably negotiate over attributes, but for 4865 * the moment unacceptable attributes are regarded as a fatal error. 4866 * 4867 */ 4868 void 4869 vsw_process_ctrl_attr_pkt(vsw_ldc_t *ldcp, void *pkt) 4870 { 4871 vnet_attr_msg_t *attr_pkt; 4872 vsw_t *vswp = ldcp->ldc_vswp; 4873 vsw_port_t *port = ldcp->ldc_port; 4874 uint64_t macaddr = 0; 4875 int i; 4876 4877 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 4878 4879 /* 4880 * We know this is a ctrl/attr packet so 4881 * cast it into the correct structure. 4882 */ 4883 attr_pkt = (vnet_attr_msg_t *)pkt; 4884 4885 switch (attr_pkt->tag.vio_subtype) { 4886 case VIO_SUBTYPE_INFO: 4887 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 4888 4889 if (vsw_check_flag(ldcp, INBOUND, VSW_ATTR_INFO_RECV)) 4890 return; 4891 4892 /* 4893 * If the attributes are unacceptable then we NACK back. 4894 */ 4895 if (vsw_check_attr(attr_pkt, ldcp->ldc_port)) { 4896 4897 DERR(vswp, "%s (chan %d): invalid attributes", 4898 __func__, ldcp->ldc_id); 4899 4900 vsw_free_lane_resources(ldcp, INBOUND); 4901 4902 attr_pkt->tag.vio_sid = ldcp->local_session; 4903 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 4904 4905 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 4906 ldcp->lane_in.lstate |= VSW_ATTR_NACK_SENT; 4907 vsw_send_msg(ldcp, (void *)attr_pkt, 4908 sizeof (vnet_attr_msg_t)); 4909 4910 vsw_next_milestone(ldcp); 4911 return; 4912 } 4913 4914 /* 4915 * Otherwise store attributes for this lane and update 4916 * lane state. 4917 */ 4918 ldcp->lane_in.mtu = attr_pkt->mtu; 4919 ldcp->lane_in.addr = attr_pkt->addr; 4920 ldcp->lane_in.addr_type = attr_pkt->addr_type; 4921 ldcp->lane_in.xfer_mode = attr_pkt->xfer_mode; 4922 ldcp->lane_in.ack_freq = attr_pkt->ack_freq; 4923 4924 macaddr = ldcp->lane_in.addr; 4925 for (i = ETHERADDRL - 1; i >= 0; i--) { 4926 port->p_macaddr.ether_addr_octet[i] = macaddr & 0xFF; 4927 macaddr >>= 8; 4928 } 4929 4930 /* create the fdb entry for this port/mac address */ 4931 (void) vsw_add_fdb(vswp, port); 4932 4933 /* setup device specifc xmit routines */ 4934 mutex_enter(&port->tx_lock); 4935 if (ldcp->lane_in.xfer_mode == VIO_DRING_MODE) { 4936 D2(vswp, "%s: mode = VIO_DRING_MODE", __func__); 4937 port->transmit = vsw_dringsend; 4938 } else if (ldcp->lane_in.xfer_mode == VIO_DESC_MODE) { 4939 D2(vswp, "%s: mode = VIO_DESC_MODE", __func__); 4940 vsw_create_privring(ldcp); 4941 port->transmit = vsw_descrsend; 4942 } 4943 mutex_exit(&port->tx_lock); 4944 4945 attr_pkt->tag.vio_sid = ldcp->local_session; 4946 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 4947 4948 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 4949 4950 ldcp->lane_in.lstate |= VSW_ATTR_ACK_SENT; 4951 4952 vsw_send_msg(ldcp, (void *)attr_pkt, 4953 sizeof (vnet_attr_msg_t)); 4954 4955 vsw_next_milestone(ldcp); 4956 break; 4957 4958 case VIO_SUBTYPE_ACK: 4959 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 4960 4961 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_ACK_RECV)) 4962 return; 4963 4964 ldcp->lane_out.lstate |= VSW_ATTR_ACK_RECV; 4965 vsw_next_milestone(ldcp); 4966 break; 4967 4968 case VIO_SUBTYPE_NACK: 4969 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 4970 4971 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_NACK_RECV)) 4972 return; 4973 4974 ldcp->lane_out.lstate |= VSW_ATTR_NACK_RECV; 4975 vsw_next_milestone(ldcp); 4976 break; 4977 4978 default: 4979 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 4980 attr_pkt->tag.vio_subtype); 4981 } 4982 4983 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 4984 } 4985 4986 /* 4987 * Process a dring info packet. We can end up here either because our peer 4988 * has ACK/NACK'ed back to an earlier DRING msg we had sent it, or our 4989 * peer has sent us a dring INFO message. 4990 * 4991 * If we get a valid/acceptable INFO packet (and we have already negotiated 4992 * a version) we ACK back and update the lane state, otherwise we NACK back. 4993 * 4994 * FUTURE: nothing to stop client from sending us info on multiple dring's 4995 * but for the moment we will just use the first one we are given. 4996 * 4997 */ 4998 void 4999 vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *ldcp, void *pkt) 5000 { 5001 vio_dring_reg_msg_t *dring_pkt; 5002 vsw_t *vswp = ldcp->ldc_vswp; 5003 ldc_mem_info_t minfo; 5004 dring_info_t *dp, *dbp; 5005 int dring_found = 0; 5006 5007 /* 5008 * We know this is a ctrl/dring packet so 5009 * cast it into the correct structure. 5010 */ 5011 dring_pkt = (vio_dring_reg_msg_t *)pkt; 5012 5013 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 5014 5015 switch (dring_pkt->tag.vio_subtype) { 5016 case VIO_SUBTYPE_INFO: 5017 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 5018 5019 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 5020 return; 5021 5022 /* 5023 * If the dring params are unacceptable then we NACK back. 5024 */ 5025 if (vsw_check_dring_info(dring_pkt)) { 5026 5027 DERR(vswp, "%s (%lld): invalid dring info", 5028 __func__, ldcp->ldc_id); 5029 5030 vsw_free_lane_resources(ldcp, INBOUND); 5031 5032 dring_pkt->tag.vio_sid = ldcp->local_session; 5033 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 5034 5035 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 5036 5037 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 5038 5039 vsw_send_msg(ldcp, (void *)dring_pkt, 5040 sizeof (vio_dring_reg_msg_t)); 5041 5042 vsw_next_milestone(ldcp); 5043 return; 5044 } 5045 5046 /* 5047 * Otherwise, attempt to map in the dring using the 5048 * cookie. If that succeeds we send back a unique dring 5049 * identifier that the sending side will use in future 5050 * to refer to this descriptor ring. 5051 */ 5052 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 5053 5054 dp->num_descriptors = dring_pkt->num_descriptors; 5055 dp->descriptor_size = dring_pkt->descriptor_size; 5056 dp->options = dring_pkt->options; 5057 dp->ncookies = dring_pkt->ncookies; 5058 5059 /* 5060 * Note: should only get one cookie. Enforced in 5061 * the ldc layer. 5062 */ 5063 bcopy(&dring_pkt->cookie[0], &dp->cookie[0], 5064 sizeof (ldc_mem_cookie_t)); 5065 5066 D2(vswp, "%s: num_desc %ld : desc_size %ld", __func__, 5067 dp->num_descriptors, dp->descriptor_size); 5068 D2(vswp, "%s: options 0x%lx: ncookies %ld", __func__, 5069 dp->options, dp->ncookies); 5070 5071 if ((ldc_mem_dring_map(ldcp->ldc_handle, &dp->cookie[0], 5072 dp->ncookies, dp->num_descriptors, 5073 dp->descriptor_size, LDC_SHADOW_MAP, 5074 &(dp->handle))) != 0) { 5075 5076 DERR(vswp, "%s: dring_map failed\n", __func__); 5077 5078 kmem_free(dp, sizeof (dring_info_t)); 5079 vsw_free_lane_resources(ldcp, INBOUND); 5080 5081 dring_pkt->tag.vio_sid = ldcp->local_session; 5082 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 5083 5084 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 5085 5086 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 5087 vsw_send_msg(ldcp, (void *)dring_pkt, 5088 sizeof (vio_dring_reg_msg_t)); 5089 5090 vsw_next_milestone(ldcp); 5091 return; 5092 } 5093 5094 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 5095 5096 DERR(vswp, "%s: dring_addr failed\n", __func__); 5097 5098 kmem_free(dp, sizeof (dring_info_t)); 5099 vsw_free_lane_resources(ldcp, INBOUND); 5100 5101 dring_pkt->tag.vio_sid = ldcp->local_session; 5102 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 5103 5104 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 5105 5106 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 5107 vsw_send_msg(ldcp, (void *)dring_pkt, 5108 sizeof (vio_dring_reg_msg_t)); 5109 5110 vsw_next_milestone(ldcp); 5111 return; 5112 } else { 5113 /* store the address of the pub part of ring */ 5114 dp->pub_addr = minfo.vaddr; 5115 } 5116 5117 /* no private section as we are importing */ 5118 dp->priv_addr = NULL; 5119 5120 /* 5121 * Using simple mono increasing int for ident at 5122 * the moment. 5123 */ 5124 dp->ident = ldcp->next_ident; 5125 ldcp->next_ident++; 5126 5127 dp->end_idx = 0; 5128 dp->next = NULL; 5129 5130 /* 5131 * Link it onto the end of the list of drings 5132 * for this lane. 5133 */ 5134 if (ldcp->lane_in.dringp == NULL) { 5135 D2(vswp, "%s: adding first INBOUND dring", __func__); 5136 ldcp->lane_in.dringp = dp; 5137 } else { 5138 dbp = ldcp->lane_in.dringp; 5139 5140 while (dbp->next != NULL) 5141 dbp = dbp->next; 5142 5143 dbp->next = dp; 5144 } 5145 5146 /* acknowledge it */ 5147 dring_pkt->tag.vio_sid = ldcp->local_session; 5148 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 5149 dring_pkt->dring_ident = dp->ident; 5150 5151 vsw_send_msg(ldcp, (void *)dring_pkt, 5152 sizeof (vio_dring_reg_msg_t)); 5153 5154 ldcp->lane_in.lstate |= VSW_DRING_ACK_SENT; 5155 vsw_next_milestone(ldcp); 5156 break; 5157 5158 case VIO_SUBTYPE_ACK: 5159 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 5160 5161 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_ACK_RECV)) 5162 return; 5163 5164 /* 5165 * Peer is acknowledging our dring info and will have 5166 * sent us a dring identifier which we will use to 5167 * refer to this ring w.r.t. our peer. 5168 */ 5169 dp = ldcp->lane_out.dringp; 5170 if (dp != NULL) { 5171 /* 5172 * Find the ring this ident should be associated 5173 * with. 5174 */ 5175 if (vsw_dring_match(dp, dring_pkt)) { 5176 dring_found = 1; 5177 5178 } else while (dp != NULL) { 5179 if (vsw_dring_match(dp, dring_pkt)) { 5180 dring_found = 1; 5181 break; 5182 } 5183 dp = dp->next; 5184 } 5185 5186 if (dring_found == 0) { 5187 DERR(NULL, "%s: unrecognised ring cookie", 5188 __func__); 5189 vsw_restart_handshake(ldcp); 5190 return; 5191 } 5192 5193 } else { 5194 DERR(vswp, "%s: DRING ACK received but no drings " 5195 "allocated", __func__); 5196 vsw_restart_handshake(ldcp); 5197 return; 5198 } 5199 5200 /* store ident */ 5201 dp->ident = dring_pkt->dring_ident; 5202 ldcp->lane_out.lstate |= VSW_DRING_ACK_RECV; 5203 vsw_next_milestone(ldcp); 5204 break; 5205 5206 case VIO_SUBTYPE_NACK: 5207 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 5208 5209 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_NACK_RECV)) 5210 return; 5211 5212 ldcp->lane_out.lstate |= VSW_DRING_NACK_RECV; 5213 vsw_next_milestone(ldcp); 5214 break; 5215 5216 default: 5217 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 5218 dring_pkt->tag.vio_subtype); 5219 } 5220 5221 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 5222 } 5223 5224 /* 5225 * Process a request from peer to unregister a dring. 5226 * 5227 * For the moment we just restart the handshake if our 5228 * peer endpoint attempts to unregister a dring. 5229 */ 5230 void 5231 vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *ldcp, void *pkt) 5232 { 5233 vsw_t *vswp = ldcp->ldc_vswp; 5234 vio_dring_unreg_msg_t *dring_pkt; 5235 5236 /* 5237 * We know this is a ctrl/dring packet so 5238 * cast it into the correct structure. 5239 */ 5240 dring_pkt = (vio_dring_unreg_msg_t *)pkt; 5241 5242 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 5243 5244 switch (dring_pkt->tag.vio_subtype) { 5245 case VIO_SUBTYPE_INFO: 5246 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 5247 5248 DWARN(vswp, "%s: restarting handshake..", __func__); 5249 vsw_restart_handshake(ldcp); 5250 break; 5251 5252 case VIO_SUBTYPE_ACK: 5253 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 5254 5255 DWARN(vswp, "%s: restarting handshake..", __func__); 5256 vsw_restart_handshake(ldcp); 5257 break; 5258 5259 case VIO_SUBTYPE_NACK: 5260 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 5261 5262 DWARN(vswp, "%s: restarting handshake..", __func__); 5263 vsw_restart_handshake(ldcp); 5264 break; 5265 5266 default: 5267 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 5268 dring_pkt->tag.vio_subtype); 5269 vsw_restart_handshake(ldcp); 5270 } 5271 5272 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 5273 } 5274 5275 #define SND_MCST_NACK(ldcp, pkt) \ 5276 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 5277 pkt->tag.vio_sid = ldcp->local_session; \ 5278 vsw_send_msg(ldcp, (void *)pkt, sizeof (vnet_mcast_msg_t)); 5279 5280 /* 5281 * Process a multicast request from a vnet. 5282 * 5283 * Vnet's specify a multicast address that they are interested in. This 5284 * address is used as a key into the hash table which forms the multicast 5285 * forwarding database (mFDB). 5286 * 5287 * The table keys are the multicast addresses, while the table entries 5288 * are pointers to lists of ports which wish to receive packets for the 5289 * specified multicast address. 5290 * 5291 * When a multicast packet is being switched we use the address as a key 5292 * into the hash table, and then walk the appropriate port list forwarding 5293 * the pkt to each port in turn. 5294 * 5295 * If a vnet is no longer interested in a particular multicast grouping 5296 * we simply find the correct location in the hash table and then delete 5297 * the relevant port from the port list. 5298 * 5299 * To deal with the case whereby a port is being deleted without first 5300 * removing itself from the lists in the hash table, we maintain a list 5301 * of multicast addresses the port has registered an interest in, within 5302 * the port structure itself. We then simply walk that list of addresses 5303 * using them as keys into the hash table and remove the port from the 5304 * appropriate lists. 5305 */ 5306 static void 5307 vsw_process_ctrl_mcst_pkt(vsw_ldc_t *ldcp, void *pkt) 5308 { 5309 vnet_mcast_msg_t *mcst_pkt; 5310 vsw_port_t *port = ldcp->ldc_port; 5311 vsw_t *vswp = ldcp->ldc_vswp; 5312 int i; 5313 5314 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 5315 5316 /* 5317 * We know this is a ctrl/mcast packet so 5318 * cast it into the correct structure. 5319 */ 5320 mcst_pkt = (vnet_mcast_msg_t *)pkt; 5321 5322 switch (mcst_pkt->tag.vio_subtype) { 5323 case VIO_SUBTYPE_INFO: 5324 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 5325 5326 /* 5327 * Check if in correct state to receive a multicast 5328 * message (i.e. handshake complete). If not reset 5329 * the handshake. 5330 */ 5331 if (vsw_check_flag(ldcp, INBOUND, VSW_MCST_INFO_RECV)) 5332 return; 5333 5334 /* 5335 * Before attempting to add or remove address check 5336 * that they are valid multicast addresses. 5337 * If not, then NACK back. 5338 */ 5339 for (i = 0; i < mcst_pkt->count; i++) { 5340 if ((mcst_pkt->mca[i].ether_addr_octet[0] & 01) != 1) { 5341 DERR(vswp, "%s: invalid multicast address", 5342 __func__); 5343 SND_MCST_NACK(ldcp, mcst_pkt); 5344 return; 5345 } 5346 } 5347 5348 /* 5349 * Now add/remove the addresses. If this fails we 5350 * NACK back. 5351 */ 5352 if (vsw_add_rem_mcst(mcst_pkt, port) != 0) { 5353 SND_MCST_NACK(ldcp, mcst_pkt); 5354 return; 5355 } 5356 5357 mcst_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 5358 mcst_pkt->tag.vio_sid = ldcp->local_session; 5359 5360 DUMP_TAG_PTR((vio_msg_tag_t *)mcst_pkt); 5361 5362 vsw_send_msg(ldcp, (void *)mcst_pkt, 5363 sizeof (vnet_mcast_msg_t)); 5364 break; 5365 5366 case VIO_SUBTYPE_ACK: 5367 DWARN(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 5368 5369 /* 5370 * We shouldn't ever get a multicast ACK message as 5371 * at the moment we never request multicast addresses 5372 * to be set on some other device. This may change in 5373 * the future if we have cascading switches. 5374 */ 5375 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_ACK_RECV)) 5376 return; 5377 5378 /* Do nothing */ 5379 break; 5380 5381 case VIO_SUBTYPE_NACK: 5382 DWARN(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 5383 5384 /* 5385 * We shouldn't get a multicast NACK packet for the 5386 * same reasons as we shouldn't get a ACK packet. 5387 */ 5388 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_NACK_RECV)) 5389 return; 5390 5391 /* Do nothing */ 5392 break; 5393 5394 default: 5395 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 5396 mcst_pkt->tag.vio_subtype); 5397 } 5398 5399 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 5400 } 5401 5402 static void 5403 vsw_process_ctrl_rdx_pkt(vsw_ldc_t *ldcp, void *pkt) 5404 { 5405 vio_rdx_msg_t *rdx_pkt; 5406 vsw_t *vswp = ldcp->ldc_vswp; 5407 5408 /* 5409 * We know this is a ctrl/rdx packet so 5410 * cast it into the correct structure. 5411 */ 5412 rdx_pkt = (vio_rdx_msg_t *)pkt; 5413 5414 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 5415 5416 switch (rdx_pkt->tag.vio_subtype) { 5417 case VIO_SUBTYPE_INFO: 5418 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 5419 5420 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_INFO_RECV)) 5421 return; 5422 5423 rdx_pkt->tag.vio_sid = ldcp->local_session; 5424 rdx_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 5425 5426 DUMP_TAG_PTR((vio_msg_tag_t *)rdx_pkt); 5427 5428 ldcp->lane_in.lstate |= VSW_RDX_ACK_SENT; 5429 5430 vsw_send_msg(ldcp, (void *)rdx_pkt, 5431 sizeof (vio_rdx_msg_t)); 5432 5433 vsw_next_milestone(ldcp); 5434 break; 5435 5436 case VIO_SUBTYPE_ACK: 5437 /* 5438 * Should be handled in-band by callback handler. 5439 */ 5440 DERR(vswp, "%s: Unexpected VIO_SUBTYPE_ACK", __func__); 5441 vsw_restart_handshake(ldcp); 5442 break; 5443 5444 case VIO_SUBTYPE_NACK: 5445 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 5446 5447 if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_NACK_RECV)) 5448 return; 5449 5450 ldcp->lane_out.lstate |= VSW_RDX_NACK_RECV; 5451 vsw_next_milestone(ldcp); 5452 break; 5453 5454 default: 5455 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 5456 rdx_pkt->tag.vio_subtype); 5457 } 5458 5459 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 5460 } 5461 5462 static void 5463 vsw_process_data_pkt(vsw_ldc_t *ldcp, void *dpkt, vio_msg_tag_t tag) 5464 { 5465 uint16_t env = tag.vio_subtype_env; 5466 vsw_t *vswp = ldcp->ldc_vswp; 5467 5468 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 5469 5470 /* session id check */ 5471 if (ldcp->session_status & VSW_PEER_SESSION) { 5472 if (ldcp->peer_session != tag.vio_sid) { 5473 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 5474 __func__, ldcp->ldc_id, tag.vio_sid); 5475 vsw_restart_handshake(ldcp); 5476 return; 5477 } 5478 } 5479 5480 /* 5481 * It is an error for us to be getting data packets 5482 * before the handshake has completed. 5483 */ 5484 if (ldcp->hphase != VSW_MILESTONE4) { 5485 DERR(vswp, "%s: got data packet before handshake complete " 5486 "hphase %d (%x: %x)", __func__, ldcp->hphase, 5487 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 5488 DUMP_FLAGS(ldcp->lane_in.lstate); 5489 DUMP_FLAGS(ldcp->lane_out.lstate); 5490 vsw_restart_handshake(ldcp); 5491 return; 5492 } 5493 5494 /* 5495 * Switch on vio_subtype envelope, then let lower routines 5496 * decide if its an INFO, ACK or NACK packet. 5497 */ 5498 if (env == VIO_DRING_DATA) { 5499 vsw_process_data_dring_pkt(ldcp, dpkt); 5500 } else if (env == VIO_PKT_DATA) { 5501 vsw_process_data_raw_pkt(ldcp, dpkt); 5502 } else if (env == VIO_DESC_DATA) { 5503 vsw_process_data_ibnd_pkt(ldcp, dpkt); 5504 } else { 5505 DERR(vswp, "%s : unknown vio_subtype_env (%x)\n", 5506 __func__, env); 5507 } 5508 5509 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 5510 } 5511 5512 #define SND_DRING_NACK(ldcp, pkt) \ 5513 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 5514 pkt->tag.vio_sid = ldcp->local_session; \ 5515 vsw_send_msg(ldcp, (void *)pkt, sizeof (vio_dring_msg_t)); 5516 5517 static void 5518 vsw_process_data_dring_pkt(vsw_ldc_t *ldcp, void *dpkt) 5519 { 5520 vio_dring_msg_t *dring_pkt; 5521 vnet_public_desc_t *pub_addr = NULL; 5522 vsw_private_desc_t *priv_addr = NULL; 5523 dring_info_t *dp = NULL; 5524 vsw_t *vswp = ldcp->ldc_vswp; 5525 mblk_t *mp = NULL; 5526 mblk_t *bp = NULL; 5527 mblk_t *bpt = NULL; 5528 size_t nbytes = 0; 5529 size_t off = 0; 5530 uint64_t ncookies = 0; 5531 uint64_t chain = 0; 5532 uint64_t j, len; 5533 uint32_t pos, start, datalen; 5534 uint32_t range_start, range_end; 5535 int32_t end, num, cnt = 0; 5536 int i, rv; 5537 boolean_t ack_needed = B_FALSE; 5538 boolean_t prev_desc_ack = B_FALSE; 5539 int read_attempts = 0; 5540 5541 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 5542 5543 /* 5544 * We know this is a data/dring packet so 5545 * cast it into the correct structure. 5546 */ 5547 dring_pkt = (vio_dring_msg_t *)dpkt; 5548 5549 /* 5550 * Switch on the vio_subtype. If its INFO then we need to 5551 * process the data. If its an ACK we need to make sure 5552 * it makes sense (i.e did we send an earlier data/info), 5553 * and if its a NACK then we maybe attempt a retry. 5554 */ 5555 switch (dring_pkt->tag.vio_subtype) { 5556 case VIO_SUBTYPE_INFO: 5557 D2(vswp, "%s(%lld): VIO_SUBTYPE_INFO", __func__, ldcp->ldc_id); 5558 5559 READ_ENTER(&ldcp->lane_in.dlistrw); 5560 if ((dp = vsw_ident2dring(&ldcp->lane_in, 5561 dring_pkt->dring_ident)) == NULL) { 5562 RW_EXIT(&ldcp->lane_in.dlistrw); 5563 5564 DERR(vswp, "%s(%lld): unable to find dring from " 5565 "ident 0x%llx", __func__, ldcp->ldc_id, 5566 dring_pkt->dring_ident); 5567 5568 SND_DRING_NACK(ldcp, dring_pkt); 5569 return; 5570 } 5571 5572 start = pos = dring_pkt->start_idx; 5573 end = dring_pkt->end_idx; 5574 len = dp->num_descriptors; 5575 5576 range_start = range_end = pos; 5577 5578 D2(vswp, "%s(%lld): start index %ld : end %ld\n", 5579 __func__, ldcp->ldc_id, start, end); 5580 5581 if (end == -1) { 5582 num = -1; 5583 } else if (end >= 0) { 5584 num = end >= pos ? 5585 end - pos + 1: (len - pos + 1) + end; 5586 5587 /* basic sanity check */ 5588 if (end > len) { 5589 RW_EXIT(&ldcp->lane_in.dlistrw); 5590 DERR(vswp, "%s(%lld): endpoint %lld outside " 5591 "ring length %lld", __func__, 5592 ldcp->ldc_id, end, len); 5593 5594 SND_DRING_NACK(ldcp, dring_pkt); 5595 return; 5596 } 5597 } else { 5598 RW_EXIT(&ldcp->lane_in.dlistrw); 5599 DERR(vswp, "%s(%lld): invalid endpoint %lld", 5600 __func__, ldcp->ldc_id, end); 5601 SND_DRING_NACK(ldcp, dring_pkt); 5602 return; 5603 } 5604 5605 while (cnt != num) { 5606 vsw_recheck_desc: 5607 if ((rv = ldc_mem_dring_acquire(dp->handle, 5608 pos, pos)) != 0) { 5609 RW_EXIT(&ldcp->lane_in.dlistrw); 5610 DERR(vswp, "%s(%lld): unable to acquire " 5611 "descriptor at pos %d: err %d", 5612 __func__, pos, ldcp->ldc_id, rv); 5613 SND_DRING_NACK(ldcp, dring_pkt); 5614 return; 5615 } 5616 5617 pub_addr = (vnet_public_desc_t *)dp->pub_addr + pos; 5618 5619 /* 5620 * When given a bounded range of descriptors 5621 * to process, its an error to hit a descriptor 5622 * which is not ready. In the non-bounded case 5623 * (end_idx == -1) this simply indicates we have 5624 * reached the end of the current active range. 5625 */ 5626 if (pub_addr->hdr.dstate != VIO_DESC_READY) { 5627 /* unbound - no error */ 5628 if (end == -1) { 5629 if (read_attempts == vsw_read_attempts) 5630 break; 5631 5632 delay(drv_usectohz(vsw_desc_delay)); 5633 read_attempts++; 5634 goto vsw_recheck_desc; 5635 } 5636 5637 /* bounded - error - so NACK back */ 5638 RW_EXIT(&ldcp->lane_in.dlistrw); 5639 DERR(vswp, "%s(%lld): descriptor not READY " 5640 "(%d)", __func__, ldcp->ldc_id, 5641 pub_addr->hdr.dstate); 5642 SND_DRING_NACK(ldcp, dring_pkt); 5643 return; 5644 } 5645 5646 DTRACE_PROBE1(read_attempts, int, read_attempts); 5647 5648 range_end = pos; 5649 5650 /* 5651 * If we ACK'd the previous descriptor then now 5652 * record the new range start position for later 5653 * ACK's. 5654 */ 5655 if (prev_desc_ack) { 5656 range_start = pos; 5657 5658 D2(vswp, "%s(%lld): updating range start " 5659 "to be %d", __func__, ldcp->ldc_id, 5660 range_start); 5661 5662 prev_desc_ack = B_FALSE; 5663 } 5664 5665 /* 5666 * Data is padded to align on 8 byte boundary, 5667 * datalen is actual data length, i.e. minus that 5668 * padding. 5669 */ 5670 datalen = pub_addr->nbytes; 5671 5672 /* 5673 * Does peer wish us to ACK when we have finished 5674 * with this descriptor ? 5675 */ 5676 if (pub_addr->hdr.ack) 5677 ack_needed = B_TRUE; 5678 5679 D2(vswp, "%s(%lld): processing desc %lld at pos" 5680 " 0x%llx : dstate 0x%lx : datalen 0x%lx", 5681 __func__, ldcp->ldc_id, pos, pub_addr, 5682 pub_addr->hdr.dstate, datalen); 5683 5684 /* 5685 * Mark that we are starting to process descriptor. 5686 */ 5687 pub_addr->hdr.dstate = VIO_DESC_ACCEPTED; 5688 5689 mp = vio_allocb(ldcp->rxh); 5690 if (mp == NULL) { 5691 /* 5692 * No free receive buffers available, so 5693 * fallback onto allocb(9F). Make sure that 5694 * we get a data buffer which is a multiple 5695 * of 8 as this is required by ldc_mem_copy. 5696 */ 5697 DTRACE_PROBE(allocb); 5698 mp = allocb(datalen + VNET_IPALIGN + 8, 5699 BPRI_MED); 5700 } 5701 5702 /* 5703 * Ensure that we ask ldc for an aligned 5704 * number of bytes. 5705 */ 5706 nbytes = datalen + VNET_IPALIGN; 5707 if (nbytes & 0x7) { 5708 off = 8 - (nbytes & 0x7); 5709 nbytes += off; 5710 } 5711 5712 ncookies = pub_addr->ncookies; 5713 rv = ldc_mem_copy(ldcp->ldc_handle, 5714 (caddr_t)mp->b_rptr, 0, &nbytes, 5715 pub_addr->memcookie, ncookies, 5716 LDC_COPY_IN); 5717 5718 if (rv != 0) { 5719 DERR(vswp, "%s(%d): unable to copy in " 5720 "data from %d cookies in desc %d" 5721 " (rv %d)", __func__, ldcp->ldc_id, 5722 ncookies, pos, rv); 5723 freemsg(mp); 5724 5725 pub_addr->hdr.dstate = VIO_DESC_DONE; 5726 (void) ldc_mem_dring_release(dp->handle, 5727 pos, pos); 5728 break; 5729 } else { 5730 D2(vswp, "%s(%d): copied in %ld bytes" 5731 " using %d cookies", __func__, 5732 ldcp->ldc_id, nbytes, ncookies); 5733 } 5734 5735 /* adjust the read pointer to skip over the padding */ 5736 mp->b_rptr += VNET_IPALIGN; 5737 5738 /* point to the actual end of data */ 5739 mp->b_wptr = mp->b_rptr + datalen; 5740 5741 /* build a chain of received packets */ 5742 if (bp == NULL) { 5743 /* first pkt */ 5744 bp = mp; 5745 bp->b_next = bp->b_prev = NULL; 5746 bpt = bp; 5747 chain = 1; 5748 } else { 5749 mp->b_next = NULL; 5750 mp->b_prev = bpt; 5751 bpt->b_next = mp; 5752 bpt = mp; 5753 chain++; 5754 } 5755 5756 /* mark we are finished with this descriptor */ 5757 pub_addr->hdr.dstate = VIO_DESC_DONE; 5758 5759 (void) ldc_mem_dring_release(dp->handle, pos, pos); 5760 5761 /* 5762 * Send an ACK back to peer if requested. 5763 */ 5764 if (ack_needed) { 5765 ack_needed = B_FALSE; 5766 5767 dring_pkt->start_idx = range_start; 5768 dring_pkt->end_idx = range_end; 5769 5770 DERR(vswp, "%s(%lld): processed %d %d, ACK" 5771 " requested", __func__, ldcp->ldc_id, 5772 dring_pkt->start_idx, 5773 dring_pkt->end_idx); 5774 5775 dring_pkt->dring_process_state = VIO_DP_ACTIVE; 5776 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 5777 dring_pkt->tag.vio_sid = ldcp->local_session; 5778 vsw_send_msg(ldcp, (void *)dring_pkt, 5779 sizeof (vio_dring_msg_t)); 5780 5781 prev_desc_ack = B_TRUE; 5782 range_start = pos; 5783 } 5784 5785 /* next descriptor */ 5786 pos = (pos + 1) % len; 5787 cnt++; 5788 5789 /* 5790 * Break out of loop here and stop processing to 5791 * allow some other network device (or disk) to 5792 * get access to the cpu. 5793 */ 5794 /* send the chain of packets to be switched */ 5795 if (chain > vsw_chain_len) { 5796 D3(vswp, "%s(%lld): switching chain of %d " 5797 "msgs", __func__, ldcp->ldc_id, chain); 5798 vswp->vsw_switch_frame(vswp, bp, VSW_VNETPORT, 5799 ldcp->ldc_port, NULL); 5800 bp = NULL; 5801 break; 5802 } 5803 } 5804 RW_EXIT(&ldcp->lane_in.dlistrw); 5805 5806 /* send the chain of packets to be switched */ 5807 if (bp != NULL) { 5808 D3(vswp, "%s(%lld): switching chain of %d msgs", 5809 __func__, ldcp->ldc_id, chain); 5810 vswp->vsw_switch_frame(vswp, bp, VSW_VNETPORT, 5811 ldcp->ldc_port, NULL); 5812 } 5813 5814 DTRACE_PROBE1(msg_cnt, int, cnt); 5815 5816 /* 5817 * We are now finished so ACK back with the state 5818 * set to STOPPING so our peer knows we are finished 5819 */ 5820 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 5821 dring_pkt->tag.vio_sid = ldcp->local_session; 5822 5823 dring_pkt->dring_process_state = VIO_DP_STOPPED; 5824 5825 DTRACE_PROBE(stop_process_sent); 5826 5827 /* 5828 * We have not processed any more descriptors beyond 5829 * the last one we ACK'd. 5830 */ 5831 if (prev_desc_ack) 5832 range_start = range_end; 5833 5834 dring_pkt->start_idx = range_start; 5835 dring_pkt->end_idx = range_end; 5836 5837 D2(vswp, "%s(%lld) processed : %d : %d, now stopping", 5838 __func__, ldcp->ldc_id, dring_pkt->start_idx, 5839 dring_pkt->end_idx); 5840 5841 vsw_send_msg(ldcp, (void *)dring_pkt, 5842 sizeof (vio_dring_msg_t)); 5843 break; 5844 5845 case VIO_SUBTYPE_ACK: 5846 D2(vswp, "%s(%lld): VIO_SUBTYPE_ACK", __func__, ldcp->ldc_id); 5847 /* 5848 * Verify that the relevant descriptors are all 5849 * marked as DONE 5850 */ 5851 READ_ENTER(&ldcp->lane_out.dlistrw); 5852 if ((dp = vsw_ident2dring(&ldcp->lane_out, 5853 dring_pkt->dring_ident)) == NULL) { 5854 RW_EXIT(&ldcp->lane_out.dlistrw); 5855 DERR(vswp, "%s: unknown ident in ACK", __func__); 5856 return; 5857 } 5858 5859 pub_addr = (vnet_public_desc_t *)dp->pub_addr; 5860 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 5861 5862 start = end = 0; 5863 start = dring_pkt->start_idx; 5864 end = dring_pkt->end_idx; 5865 len = dp->num_descriptors; 5866 5867 j = num = 0; 5868 /* calculate # descriptors taking into a/c wrap around */ 5869 num = end >= start ? end - start + 1: (len - start + 1) + end; 5870 5871 D2(vswp, "%s(%lld): start index %ld : end %ld : num %ld\n", 5872 __func__, ldcp->ldc_id, start, end, num); 5873 5874 mutex_enter(&dp->dlock); 5875 dp->last_ack_recv = end; 5876 mutex_exit(&dp->dlock); 5877 5878 for (i = start; j < num; i = (i + 1) % len, j++) { 5879 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 5880 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 5881 5882 /* 5883 * If the last descriptor in a range has the ACK 5884 * bit set then we will get two messages from our 5885 * peer relating to it. The normal ACK msg and then 5886 * a subsequent STOP msg. The first message will have 5887 * resulted in the descriptor being reclaimed and 5888 * its state set to FREE so when we encounter a non 5889 * DONE descriptor we need to check to see if its 5890 * because we have just reclaimed it. 5891 */ 5892 mutex_enter(&priv_addr->dstate_lock); 5893 if (pub_addr->hdr.dstate == VIO_DESC_DONE) { 5894 /* clear all the fields */ 5895 bzero(priv_addr->datap, priv_addr->datalen); 5896 priv_addr->datalen = 0; 5897 5898 pub_addr->hdr.dstate = VIO_DESC_FREE; 5899 pub_addr->hdr.ack = 0; 5900 5901 priv_addr->dstate = VIO_DESC_FREE; 5902 mutex_exit(&priv_addr->dstate_lock); 5903 5904 D3(vswp, "clearing descp %d : pub state " 5905 "0x%llx : priv state 0x%llx", i, 5906 pub_addr->hdr.dstate, 5907 priv_addr->dstate); 5908 5909 } else { 5910 mutex_exit(&priv_addr->dstate_lock); 5911 5912 if (dring_pkt->dring_process_state != 5913 VIO_DP_STOPPED) { 5914 DERR(vswp, "%s: descriptor %lld at pos " 5915 " 0x%llx not DONE (0x%lx)\n", 5916 __func__, i, pub_addr, 5917 pub_addr->hdr.dstate); 5918 RW_EXIT(&ldcp->lane_out.dlistrw); 5919 return; 5920 } 5921 } 5922 } 5923 5924 /* 5925 * If our peer is stopping processing descriptors then 5926 * we check to make sure it has processed all the descriptors 5927 * we have updated. If not then we send it a new message 5928 * to prompt it to restart. 5929 */ 5930 if (dring_pkt->dring_process_state == VIO_DP_STOPPED) { 5931 DTRACE_PROBE(stop_process_recv); 5932 D2(vswp, "%s(%lld): got stopping msg : %d : %d", 5933 __func__, ldcp->ldc_id, dring_pkt->start_idx, 5934 dring_pkt->end_idx); 5935 5936 /* 5937 * Check next descriptor in public section of ring. 5938 * If its marked as READY then we need to prompt our 5939 * peer to start processing the ring again. 5940 */ 5941 i = (end + 1) % len; 5942 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 5943 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 5944 5945 /* 5946 * Hold the restart lock across all of this to 5947 * make sure that its not possible for us to 5948 * decide that a msg needs to be sent in the future 5949 * but the sending code having already checked is 5950 * about to exit. 5951 */ 5952 mutex_enter(&dp->restart_lock); 5953 mutex_enter(&priv_addr->dstate_lock); 5954 if (pub_addr->hdr.dstate == VIO_DESC_READY) { 5955 5956 mutex_exit(&priv_addr->dstate_lock); 5957 5958 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 5959 dring_pkt->tag.vio_sid = ldcp->local_session; 5960 5961 mutex_enter(&ldcp->lane_out.seq_lock); 5962 dring_pkt->seq_num = ldcp->lane_out.seq_num++; 5963 mutex_exit(&ldcp->lane_out.seq_lock); 5964 5965 dring_pkt->start_idx = (end + 1) % len; 5966 dring_pkt->end_idx = -1; 5967 5968 D2(vswp, "%s(%lld) : sending restart msg:" 5969 " %d : %d", __func__, ldcp->ldc_id, 5970 dring_pkt->start_idx, 5971 dring_pkt->end_idx); 5972 5973 vsw_send_msg(ldcp, (void *)dring_pkt, 5974 sizeof (vio_dring_msg_t)); 5975 } else { 5976 mutex_exit(&priv_addr->dstate_lock); 5977 dp->restart_reqd = B_TRUE; 5978 } 5979 mutex_exit(&dp->restart_lock); 5980 } 5981 RW_EXIT(&ldcp->lane_out.dlistrw); 5982 break; 5983 5984 case VIO_SUBTYPE_NACK: 5985 DWARN(vswp, "%s(%lld): VIO_SUBTYPE_NACK", 5986 __func__, ldcp->ldc_id); 5987 /* 5988 * Something is badly wrong if we are getting NACK's 5989 * for our data pkts. So reset the channel. 5990 */ 5991 vsw_restart_handshake(ldcp); 5992 5993 break; 5994 5995 default: 5996 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 5997 ldcp->ldc_id, dring_pkt->tag.vio_subtype); 5998 } 5999 6000 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 6001 } 6002 6003 /* 6004 * VIO_PKT_DATA (a.k.a raw data mode ) 6005 * 6006 * Note - currently not supported. Do nothing. 6007 */ 6008 static void 6009 vsw_process_data_raw_pkt(vsw_ldc_t *ldcp, void *dpkt) 6010 { 6011 _NOTE(ARGUNUSED(dpkt)) 6012 6013 D1(NULL, "%s (%lld): enter\n", __func__, ldcp->ldc_id); 6014 6015 DERR(NULL, "%s (%lld): currently not supported", 6016 __func__, ldcp->ldc_id); 6017 6018 D1(NULL, "%s (%lld): exit\n", __func__, ldcp->ldc_id); 6019 } 6020 6021 /* 6022 * Process an in-band descriptor message (most likely from 6023 * OBP). 6024 */ 6025 static void 6026 vsw_process_data_ibnd_pkt(vsw_ldc_t *ldcp, void *pkt) 6027 { 6028 vnet_ibnd_desc_t *ibnd_desc; 6029 dring_info_t *dp = NULL; 6030 vsw_private_desc_t *priv_addr = NULL; 6031 vsw_t *vswp = ldcp->ldc_vswp; 6032 mblk_t *mp = NULL; 6033 size_t nbytes = 0; 6034 size_t off = 0; 6035 uint64_t idx = 0; 6036 uint32_t num = 1, len, datalen = 0; 6037 uint64_t ncookies = 0; 6038 int i, rv; 6039 int j = 0; 6040 6041 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 6042 6043 ibnd_desc = (vnet_ibnd_desc_t *)pkt; 6044 6045 switch (ibnd_desc->hdr.tag.vio_subtype) { 6046 case VIO_SUBTYPE_INFO: 6047 D1(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 6048 6049 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 6050 return; 6051 6052 /* 6053 * Data is padded to align on a 8 byte boundary, 6054 * nbytes is actual data length, i.e. minus that 6055 * padding. 6056 */ 6057 datalen = ibnd_desc->nbytes; 6058 6059 D2(vswp, "%s(%lld): processing inband desc : " 6060 ": datalen 0x%lx", __func__, ldcp->ldc_id, datalen); 6061 6062 ncookies = ibnd_desc->ncookies; 6063 6064 /* 6065 * allocb(9F) returns an aligned data block. We 6066 * need to ensure that we ask ldc for an aligned 6067 * number of bytes also. 6068 */ 6069 nbytes = datalen; 6070 if (nbytes & 0x7) { 6071 off = 8 - (nbytes & 0x7); 6072 nbytes += off; 6073 } 6074 6075 mp = allocb(datalen, BPRI_MED); 6076 if (mp == NULL) { 6077 DERR(vswp, "%s(%lld): allocb failed", 6078 __func__, ldcp->ldc_id); 6079 return; 6080 } 6081 6082 rv = ldc_mem_copy(ldcp->ldc_handle, (caddr_t)mp->b_rptr, 6083 0, &nbytes, ibnd_desc->memcookie, (uint64_t)ncookies, 6084 LDC_COPY_IN); 6085 6086 if (rv != 0) { 6087 DERR(vswp, "%s(%d): unable to copy in data from " 6088 "%d cookie(s)", __func__, 6089 ldcp->ldc_id, ncookies); 6090 freemsg(mp); 6091 return; 6092 } else { 6093 D2(vswp, "%s(%d): copied in %ld bytes using %d " 6094 "cookies", __func__, ldcp->ldc_id, nbytes, 6095 ncookies); 6096 } 6097 6098 /* point to the actual end of data */ 6099 mp->b_wptr = mp->b_rptr + datalen; 6100 6101 /* 6102 * We ACK back every in-band descriptor message we process 6103 */ 6104 ibnd_desc->hdr.tag.vio_subtype = VIO_SUBTYPE_ACK; 6105 ibnd_desc->hdr.tag.vio_sid = ldcp->local_session; 6106 vsw_send_msg(ldcp, (void *)ibnd_desc, 6107 sizeof (vnet_ibnd_desc_t)); 6108 6109 /* send the packet to be switched */ 6110 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, 6111 ldcp->ldc_port, NULL); 6112 6113 break; 6114 6115 case VIO_SUBTYPE_ACK: 6116 D1(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 6117 6118 /* Verify the ACK is valid */ 6119 idx = ibnd_desc->hdr.desc_handle; 6120 6121 if (idx >= VSW_RING_NUM_EL) { 6122 cmn_err(CE_WARN, "!vsw%d: corrupted ACK received " 6123 "(idx %ld)", vswp->instance, idx); 6124 return; 6125 } 6126 6127 if ((dp = ldcp->lane_out.dringp) == NULL) { 6128 DERR(vswp, "%s: no dring found", __func__); 6129 return; 6130 } 6131 6132 len = dp->num_descriptors; 6133 /* 6134 * If the descriptor we are being ACK'ed for is not the 6135 * one we expected, then pkts were lost somwhere, either 6136 * when we tried to send a msg, or a previous ACK msg from 6137 * our peer. In either case we now reclaim the descriptors 6138 * in the range from the last ACK we received up to the 6139 * current ACK. 6140 */ 6141 if (idx != dp->last_ack_recv) { 6142 DWARN(vswp, "%s: dropped pkts detected, (%ld, %ld)", 6143 __func__, dp->last_ack_recv, idx); 6144 num = idx >= dp->last_ack_recv ? 6145 idx - dp->last_ack_recv + 1: 6146 (len - dp->last_ack_recv + 1) + idx; 6147 } 6148 6149 /* 6150 * When we sent the in-band message to our peer we 6151 * marked the copy in our private ring as READY. We now 6152 * check that the descriptor we are being ACK'ed for is in 6153 * fact READY, i.e. it is one we have shared with our peer. 6154 * 6155 * If its not we flag an error, but still reset the descr 6156 * back to FREE. 6157 */ 6158 for (i = dp->last_ack_recv; j < num; i = (i + 1) % len, j++) { 6159 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 6160 mutex_enter(&priv_addr->dstate_lock); 6161 if (priv_addr->dstate != VIO_DESC_READY) { 6162 DERR(vswp, "%s: (%ld) desc at index %ld not " 6163 "READY (0x%lx)", __func__, 6164 ldcp->ldc_id, idx, priv_addr->dstate); 6165 DERR(vswp, "%s: bound %d: ncookies %ld : " 6166 "datalen %ld", __func__, 6167 priv_addr->bound, priv_addr->ncookies, 6168 priv_addr->datalen); 6169 } 6170 D2(vswp, "%s: (%lld) freeing descp at %lld", __func__, 6171 ldcp->ldc_id, idx); 6172 /* release resources associated with sent msg */ 6173 bzero(priv_addr->datap, priv_addr->datalen); 6174 priv_addr->datalen = 0; 6175 priv_addr->dstate = VIO_DESC_FREE; 6176 mutex_exit(&priv_addr->dstate_lock); 6177 } 6178 /* update to next expected value */ 6179 dp->last_ack_recv = (idx + 1) % dp->num_descriptors; 6180 6181 break; 6182 6183 case VIO_SUBTYPE_NACK: 6184 DERR(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 6185 6186 /* 6187 * We should only get a NACK if our peer doesn't like 6188 * something about a message we have sent it. If this 6189 * happens we just release the resources associated with 6190 * the message. (We are relying on higher layers to decide 6191 * whether or not to resend. 6192 */ 6193 6194 /* limit check */ 6195 idx = ibnd_desc->hdr.desc_handle; 6196 6197 if (idx >= VSW_RING_NUM_EL) { 6198 DERR(vswp, "%s: corrupted NACK received (idx %lld)", 6199 __func__, idx); 6200 return; 6201 } 6202 6203 if ((dp = ldcp->lane_out.dringp) == NULL) { 6204 DERR(vswp, "%s: no dring found", __func__); 6205 return; 6206 } 6207 6208 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 6209 6210 /* move to correct location in ring */ 6211 priv_addr += idx; 6212 6213 /* release resources associated with sent msg */ 6214 mutex_enter(&priv_addr->dstate_lock); 6215 bzero(priv_addr->datap, priv_addr->datalen); 6216 priv_addr->datalen = 0; 6217 priv_addr->dstate = VIO_DESC_FREE; 6218 mutex_exit(&priv_addr->dstate_lock); 6219 6220 break; 6221 6222 default: 6223 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 6224 ldcp->ldc_id, ibnd_desc->hdr.tag.vio_subtype); 6225 } 6226 6227 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 6228 } 6229 6230 static void 6231 vsw_process_err_pkt(vsw_ldc_t *ldcp, void *epkt, vio_msg_tag_t tag) 6232 { 6233 _NOTE(ARGUNUSED(epkt)) 6234 6235 vsw_t *vswp = ldcp->ldc_vswp; 6236 uint16_t env = tag.vio_subtype_env; 6237 6238 D1(vswp, "%s (%lld): enter\n", __func__, ldcp->ldc_id); 6239 6240 /* 6241 * Error vio_subtypes have yet to be defined. So for 6242 * the moment we can't do anything. 6243 */ 6244 D2(vswp, "%s: (%x) vio_subtype env", __func__, env); 6245 6246 D1(vswp, "%s (%lld): exit\n", __func__, ldcp->ldc_id); 6247 } 6248 6249 /* 6250 * Switch the given ethernet frame when operating in layer 2 mode. 6251 * 6252 * vswp: pointer to the vsw instance 6253 * mp: pointer to chain of ethernet frame(s) to be switched 6254 * caller: identifies the source of this frame as: 6255 * 1. VSW_VNETPORT - a vsw port (connected to a vnet). 6256 * 2. VSW_PHYSDEV - the physical ethernet device 6257 * 3. VSW_LOCALDEV - vsw configured as a virtual interface 6258 * arg: argument provided by the caller. 6259 * 1. for VNETPORT - pointer to the corresponding vsw_port_t. 6260 * 2. for PHYSDEV - NULL 6261 * 3. for LOCALDEV - pointer to to this vsw_t(self) 6262 */ 6263 void 6264 vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller, 6265 vsw_port_t *arg, mac_resource_handle_t mrh) 6266 { 6267 struct ether_header *ehp; 6268 vsw_port_t *port = NULL; 6269 mblk_t *bp, *ret_m; 6270 mblk_t *nmp = NULL; 6271 vsw_port_list_t *plist = &vswp->plist; 6272 6273 D1(vswp, "%s: enter (caller %d)", __func__, caller); 6274 6275 /* 6276 * PERF: rather than breaking up the chain here, scan it 6277 * to find all mblks heading to same destination and then 6278 * pass that sub-chain to the lower transmit functions. 6279 */ 6280 6281 /* process the chain of packets */ 6282 bp = mp; 6283 while (bp) { 6284 mp = bp; 6285 bp = bp->b_next; 6286 mp->b_next = mp->b_prev = NULL; 6287 ehp = (struct ether_header *)mp->b_rptr; 6288 6289 D2(vswp, "%s: mblk data buffer %lld : actual data size %lld", 6290 __func__, MBLKSIZE(mp), MBLKL(mp)); 6291 6292 READ_ENTER(&vswp->if_lockrw); 6293 if (ether_cmp(&ehp->ether_dhost, &vswp->if_addr) == 0) { 6294 /* 6295 * If destination is VSW_LOCALDEV (vsw as an eth 6296 * interface) and if the device is up & running, 6297 * send the packet up the stack on this host. 6298 * If the virtual interface is down, drop the packet. 6299 */ 6300 if (caller != VSW_LOCALDEV) { 6301 if (vswp->if_state & VSW_IF_UP) { 6302 RW_EXIT(&vswp->if_lockrw); 6303 mac_rx(vswp->if_mh, mrh, mp); 6304 } else { 6305 RW_EXIT(&vswp->if_lockrw); 6306 /* Interface down, drop pkt */ 6307 freemsg(mp); 6308 } 6309 } else { 6310 RW_EXIT(&vswp->if_lockrw); 6311 freemsg(mp); 6312 } 6313 continue; 6314 } 6315 RW_EXIT(&vswp->if_lockrw); 6316 6317 READ_ENTER(&plist->lockrw); 6318 port = vsw_lookup_fdb(vswp, ehp); 6319 if (port) { 6320 /* 6321 * Mark the port as in-use. 6322 */ 6323 mutex_enter(&port->ref_lock); 6324 port->ref_cnt++; 6325 mutex_exit(&port->ref_lock); 6326 RW_EXIT(&plist->lockrw); 6327 6328 /* 6329 * If plumbed and in promisc mode then copy msg 6330 * and send up the stack. 6331 */ 6332 READ_ENTER(&vswp->if_lockrw); 6333 if (VSW_U_P(vswp->if_state)) { 6334 RW_EXIT(&vswp->if_lockrw); 6335 nmp = copymsg(mp); 6336 if (nmp) 6337 mac_rx(vswp->if_mh, mrh, nmp); 6338 } else { 6339 RW_EXIT(&vswp->if_lockrw); 6340 } 6341 6342 /* 6343 * If the destination is in FDB, the packet 6344 * should be forwarded to the correponding 6345 * vsw_port (connected to a vnet device - 6346 * VSW_VNETPORT) 6347 */ 6348 (void) vsw_portsend(port, mp); 6349 6350 /* 6351 * Decrement use count in port and check if 6352 * should wake delete thread. 6353 */ 6354 mutex_enter(&port->ref_lock); 6355 port->ref_cnt--; 6356 if (port->ref_cnt == 0) 6357 cv_signal(&port->ref_cv); 6358 mutex_exit(&port->ref_lock); 6359 } else { 6360 RW_EXIT(&plist->lockrw); 6361 /* 6362 * Destination not in FDB. 6363 * 6364 * If the destination is broadcast or 6365 * multicast forward the packet to all 6366 * (VNETPORTs, PHYSDEV, LOCALDEV), 6367 * except the caller. 6368 */ 6369 if (IS_BROADCAST(ehp)) { 6370 D3(vswp, "%s: BROADCAST pkt", __func__); 6371 (void) vsw_forward_all(vswp, mp, 6372 caller, arg); 6373 } else if (IS_MULTICAST(ehp)) { 6374 D3(vswp, "%s: MULTICAST pkt", __func__); 6375 (void) vsw_forward_grp(vswp, mp, 6376 caller, arg); 6377 } else { 6378 /* 6379 * If the destination is unicast, and came 6380 * from either a logical network device or 6381 * the switch itself when it is plumbed, then 6382 * send it out on the physical device and also 6383 * up the stack if the logical interface is 6384 * in promiscious mode. 6385 * 6386 * NOTE: The assumption here is that if we 6387 * cannot find the destination in our fdb, its 6388 * a unicast address, and came from either a 6389 * vnet or down the stack (when plumbed) it 6390 * must be destinded for an ethernet device 6391 * outside our ldoms. 6392 */ 6393 if (caller == VSW_VNETPORT) { 6394 READ_ENTER(&vswp->if_lockrw); 6395 if (VSW_U_P(vswp->if_state)) { 6396 RW_EXIT(&vswp->if_lockrw); 6397 nmp = copymsg(mp); 6398 if (nmp) 6399 mac_rx(vswp->if_mh, 6400 mrh, nmp); 6401 } else { 6402 RW_EXIT(&vswp->if_lockrw); 6403 } 6404 if ((ret_m = vsw_tx_msg(vswp, mp)) 6405 != NULL) { 6406 DERR(vswp, "%s: drop mblks to " 6407 "phys dev", __func__); 6408 freemsg(ret_m); 6409 } 6410 6411 } else if (caller == VSW_PHYSDEV) { 6412 /* 6413 * Pkt seen because card in promisc 6414 * mode. Send up stack if plumbed in 6415 * promisc mode, else drop it. 6416 */ 6417 READ_ENTER(&vswp->if_lockrw); 6418 if (VSW_U_P(vswp->if_state)) { 6419 RW_EXIT(&vswp->if_lockrw); 6420 mac_rx(vswp->if_mh, mrh, mp); 6421 } else { 6422 RW_EXIT(&vswp->if_lockrw); 6423 freemsg(mp); 6424 } 6425 6426 } else if (caller == VSW_LOCALDEV) { 6427 /* 6428 * Pkt came down the stack, send out 6429 * over physical device. 6430 */ 6431 if ((ret_m = vsw_tx_msg(vswp, mp)) 6432 != NULL) { 6433 DERR(vswp, "%s: drop mblks to " 6434 "phys dev", __func__); 6435 freemsg(ret_m); 6436 } 6437 } 6438 } 6439 } 6440 } 6441 D1(vswp, "%s: exit\n", __func__); 6442 } 6443 6444 /* 6445 * Switch ethernet frame when in layer 3 mode (i.e. using IP 6446 * layer to do the routing). 6447 * 6448 * There is a large amount of overlap between this function and 6449 * vsw_switch_l2_frame. At some stage we need to revisit and refactor 6450 * both these functions. 6451 */ 6452 void 6453 vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller, 6454 vsw_port_t *arg, mac_resource_handle_t mrh) 6455 { 6456 struct ether_header *ehp; 6457 vsw_port_t *port = NULL; 6458 mblk_t *bp = NULL; 6459 vsw_port_list_t *plist = &vswp->plist; 6460 6461 D1(vswp, "%s: enter (caller %d)", __func__, caller); 6462 6463 /* 6464 * In layer 3 mode should only ever be switching packets 6465 * between IP layer and vnet devices. So make sure thats 6466 * who is invoking us. 6467 */ 6468 if ((caller != VSW_LOCALDEV) && (caller != VSW_VNETPORT)) { 6469 DERR(vswp, "%s: unexpected caller (%d)", __func__, caller); 6470 freemsgchain(mp); 6471 return; 6472 } 6473 6474 /* process the chain of packets */ 6475 bp = mp; 6476 while (bp) { 6477 mp = bp; 6478 bp = bp->b_next; 6479 mp->b_next = mp->b_prev = NULL; 6480 ehp = (struct ether_header *)mp->b_rptr; 6481 6482 D2(vswp, "%s: mblk data buffer %lld : actual data size %lld", 6483 __func__, MBLKSIZE(mp), MBLKL(mp)); 6484 6485 READ_ENTER(&plist->lockrw); 6486 port = vsw_lookup_fdb(vswp, ehp); 6487 if (port) { 6488 /* 6489 * Mark port as in-use. 6490 */ 6491 mutex_enter(&port->ref_lock); 6492 port->ref_cnt++; 6493 mutex_exit(&port->ref_lock); 6494 RW_EXIT(&plist->lockrw); 6495 6496 D2(vswp, "%s: sending to target port", __func__); 6497 (void) vsw_portsend(port, mp); 6498 6499 /* 6500 * Finished with port so decrement ref count and 6501 * check if should wake delete thread. 6502 */ 6503 mutex_enter(&port->ref_lock); 6504 port->ref_cnt--; 6505 if (port->ref_cnt == 0) 6506 cv_signal(&port->ref_cv); 6507 mutex_exit(&port->ref_lock); 6508 } else { 6509 RW_EXIT(&plist->lockrw); 6510 /* 6511 * Destination not in FDB 6512 * 6513 * If the destination is broadcast or 6514 * multicast forward the packet to all 6515 * (VNETPORTs, PHYSDEV, LOCALDEV), 6516 * except the caller. 6517 */ 6518 if (IS_BROADCAST(ehp)) { 6519 D2(vswp, "%s: BROADCAST pkt", __func__); 6520 (void) vsw_forward_all(vswp, mp, 6521 caller, arg); 6522 } else if (IS_MULTICAST(ehp)) { 6523 D2(vswp, "%s: MULTICAST pkt", __func__); 6524 (void) vsw_forward_grp(vswp, mp, 6525 caller, arg); 6526 } else { 6527 /* 6528 * Unicast pkt from vnet that we don't have 6529 * an FDB entry for, so must be destinded for 6530 * the outside world. Attempt to send up to the 6531 * IP layer to allow it to deal with it. 6532 */ 6533 if (caller == VSW_VNETPORT) { 6534 READ_ENTER(&vswp->if_lockrw); 6535 if (vswp->if_state & VSW_IF_UP) { 6536 RW_EXIT(&vswp->if_lockrw); 6537 D2(vswp, "%s: sending up", 6538 __func__); 6539 mac_rx(vswp->if_mh, mrh, mp); 6540 } else { 6541 RW_EXIT(&vswp->if_lockrw); 6542 /* Interface down, drop pkt */ 6543 D2(vswp, "%s I/F down", 6544 __func__); 6545 freemsg(mp); 6546 } 6547 } 6548 } 6549 } 6550 } 6551 6552 D1(vswp, "%s: exit", __func__); 6553 } 6554 6555 /* 6556 * Forward the ethernet frame to all ports (VNETPORTs, PHYSDEV, LOCALDEV), 6557 * except the caller (port on which frame arrived). 6558 */ 6559 static int 6560 vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg) 6561 { 6562 vsw_port_list_t *plist = &vswp->plist; 6563 vsw_port_t *portp; 6564 mblk_t *nmp = NULL; 6565 mblk_t *ret_m = NULL; 6566 int skip_port = 0; 6567 6568 D1(vswp, "vsw_forward_all: enter\n"); 6569 6570 /* 6571 * Broadcast message from inside ldoms so send to outside 6572 * world if in either of layer 2 modes. 6573 */ 6574 if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) || 6575 (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) && 6576 ((caller == VSW_LOCALDEV) || (caller == VSW_VNETPORT))) { 6577 6578 nmp = dupmsg(mp); 6579 if (nmp) { 6580 if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) { 6581 DERR(vswp, "%s: dropping pkt(s) " 6582 "consisting of %ld bytes of data for" 6583 " physical device", __func__, MBLKL(ret_m)); 6584 freemsg(ret_m); 6585 } 6586 } 6587 } 6588 6589 if (caller == VSW_VNETPORT) 6590 skip_port = 1; 6591 6592 /* 6593 * Broadcast message from other vnet (layer 2 or 3) or outside 6594 * world (layer 2 only), send up stack if plumbed. 6595 */ 6596 if ((caller == VSW_PHYSDEV) || (caller == VSW_VNETPORT)) { 6597 READ_ENTER(&vswp->if_lockrw); 6598 if (vswp->if_state & VSW_IF_UP) { 6599 RW_EXIT(&vswp->if_lockrw); 6600 nmp = copymsg(mp); 6601 if (nmp) 6602 mac_rx(vswp->if_mh, NULL, nmp); 6603 } else { 6604 RW_EXIT(&vswp->if_lockrw); 6605 } 6606 } 6607 6608 /* send it to all VNETPORTs */ 6609 READ_ENTER(&plist->lockrw); 6610 for (portp = plist->head; portp != NULL; portp = portp->p_next) { 6611 D2(vswp, "vsw_forward_all: port %d", portp->p_instance); 6612 /* 6613 * Caution ! - don't reorder these two checks as arg 6614 * will be NULL if the caller is PHYSDEV. skip_port is 6615 * only set if caller is VNETPORT. 6616 */ 6617 if ((skip_port) && (portp == arg)) 6618 continue; 6619 else { 6620 nmp = dupmsg(mp); 6621 if (nmp) { 6622 (void) vsw_portsend(portp, nmp); 6623 } else { 6624 DERR(vswp, "vsw_forward_all: nmp NULL"); 6625 } 6626 } 6627 } 6628 RW_EXIT(&plist->lockrw); 6629 6630 freemsg(mp); 6631 6632 D1(vswp, "vsw_forward_all: exit\n"); 6633 return (0); 6634 } 6635 6636 /* 6637 * Forward pkts to any devices or interfaces which have registered 6638 * an interest in them (i.e. multicast groups). 6639 */ 6640 static int 6641 vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg) 6642 { 6643 struct ether_header *ehp = (struct ether_header *)mp->b_rptr; 6644 mfdb_ent_t *entp = NULL; 6645 mfdb_ent_t *tpp = NULL; 6646 vsw_port_t *port; 6647 uint64_t key = 0; 6648 mblk_t *nmp = NULL; 6649 mblk_t *ret_m = NULL; 6650 boolean_t check_if = B_TRUE; 6651 6652 /* 6653 * Convert address to hash table key 6654 */ 6655 KEY_HASH(key, ehp->ether_dhost); 6656 6657 D1(vswp, "%s: key 0x%llx", __func__, key); 6658 6659 /* 6660 * If pkt came from either a vnet or down the stack (if we are 6661 * plumbed) and we are in layer 2 mode, then we send the pkt out 6662 * over the physical adapter, and then check to see if any other 6663 * vnets are interested in it. 6664 */ 6665 if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) || 6666 (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) && 6667 ((caller == VSW_VNETPORT) || (caller == VSW_LOCALDEV))) { 6668 nmp = dupmsg(mp); 6669 if (nmp) { 6670 if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) { 6671 DERR(vswp, "%s: dropping pkt(s) " 6672 "consisting of %ld bytes of " 6673 "data for physical device", 6674 __func__, MBLKL(ret_m)); 6675 freemsg(ret_m); 6676 } 6677 } 6678 } 6679 6680 READ_ENTER(&vswp->mfdbrw); 6681 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)key, 6682 (mod_hash_val_t *)&entp) != 0) { 6683 D3(vswp, "%s: no table entry found for addr 0x%llx", 6684 __func__, key); 6685 } else { 6686 /* 6687 * Send to list of devices associated with this address... 6688 */ 6689 for (tpp = entp; tpp != NULL; tpp = tpp->nextp) { 6690 6691 /* dont send to ourselves */ 6692 if ((caller == VSW_VNETPORT) && 6693 (tpp->d_addr == (void *)arg)) { 6694 port = (vsw_port_t *)tpp->d_addr; 6695 D3(vswp, "%s: not sending to ourselves" 6696 " : port %d", __func__, 6697 port->p_instance); 6698 continue; 6699 6700 } else if ((caller == VSW_LOCALDEV) && 6701 (tpp->d_type == VSW_LOCALDEV)) { 6702 D3(vswp, "%s: not sending back up stack", 6703 __func__); 6704 continue; 6705 } 6706 6707 if (tpp->d_type == VSW_VNETPORT) { 6708 port = (vsw_port_t *)tpp->d_addr; 6709 D3(vswp, "%s: sending to port %ld for " 6710 " addr 0x%llx", __func__, 6711 port->p_instance, key); 6712 6713 nmp = dupmsg(mp); 6714 if (nmp) 6715 (void) vsw_portsend(port, nmp); 6716 } else { 6717 if (vswp->if_state & VSW_IF_UP) { 6718 nmp = copymsg(mp); 6719 if (nmp) 6720 mac_rx(vswp->if_mh, NULL, nmp); 6721 check_if = B_FALSE; 6722 D3(vswp, "%s: sending up stack" 6723 " for addr 0x%llx", __func__, 6724 key); 6725 } 6726 } 6727 } 6728 } 6729 6730 RW_EXIT(&vswp->mfdbrw); 6731 6732 /* 6733 * If the pkt came from either a vnet or from physical device, 6734 * and if we havent already sent the pkt up the stack then we 6735 * check now if we can/should (i.e. the interface is plumbed 6736 * and in promisc mode). 6737 */ 6738 if ((check_if) && 6739 ((caller == VSW_VNETPORT) || (caller == VSW_PHYSDEV))) { 6740 READ_ENTER(&vswp->if_lockrw); 6741 if (VSW_U_P(vswp->if_state)) { 6742 RW_EXIT(&vswp->if_lockrw); 6743 D3(vswp, "%s: (caller %d) finally sending up stack" 6744 " for addr 0x%llx", __func__, caller, key); 6745 nmp = copymsg(mp); 6746 if (nmp) 6747 mac_rx(vswp->if_mh, NULL, nmp); 6748 } else { 6749 RW_EXIT(&vswp->if_lockrw); 6750 } 6751 } 6752 6753 freemsg(mp); 6754 6755 D1(vswp, "%s: exit", __func__); 6756 6757 return (0); 6758 } 6759 6760 /* transmit the packet over the given port */ 6761 static int 6762 vsw_portsend(vsw_port_t *port, mblk_t *mp) 6763 { 6764 vsw_ldc_list_t *ldcl = &port->p_ldclist; 6765 vsw_ldc_t *ldcp; 6766 int status = 0; 6767 6768 6769 READ_ENTER(&ldcl->lockrw); 6770 /* 6771 * Note for now, we have a single channel. 6772 */ 6773 ldcp = ldcl->head; 6774 if (ldcp == NULL) { 6775 DERR(port->p_vswp, "vsw_portsend: no ldc: dropping packet\n"); 6776 freemsg(mp); 6777 RW_EXIT(&ldcl->lockrw); 6778 return (1); 6779 } 6780 6781 /* 6782 * Send the message out using the appropriate 6783 * transmit function which will free mblock when it 6784 * is finished with it. 6785 */ 6786 mutex_enter(&port->tx_lock); 6787 if (port->transmit != NULL) 6788 status = (*port->transmit)(ldcp, mp); 6789 else { 6790 freemsg(mp); 6791 } 6792 mutex_exit(&port->tx_lock); 6793 6794 RW_EXIT(&ldcl->lockrw); 6795 6796 return (status); 6797 } 6798 6799 /* 6800 * Send packet out via descriptor ring to a logical device. 6801 */ 6802 static int 6803 vsw_dringsend(vsw_ldc_t *ldcp, mblk_t *mp) 6804 { 6805 vio_dring_msg_t dring_pkt; 6806 dring_info_t *dp = NULL; 6807 vsw_private_desc_t *priv_desc = NULL; 6808 vnet_public_desc_t *pub = NULL; 6809 vsw_t *vswp = ldcp->ldc_vswp; 6810 mblk_t *bp; 6811 size_t n, size; 6812 caddr_t bufp; 6813 int idx; 6814 int status = LDC_TX_SUCCESS; 6815 6816 D1(vswp, "%s(%lld): enter\n", __func__, ldcp->ldc_id); 6817 6818 /* TODO: make test a macro */ 6819 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 6820 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 6821 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 6822 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 6823 ldcp->lane_out.lstate); 6824 freemsg(mp); 6825 return (LDC_TX_FAILURE); 6826 } 6827 6828 /* 6829 * Note - using first ring only, this may change 6830 * in the future. 6831 */ 6832 READ_ENTER(&ldcp->lane_out.dlistrw); 6833 if ((dp = ldcp->lane_out.dringp) == NULL) { 6834 RW_EXIT(&ldcp->lane_out.dlistrw); 6835 DERR(vswp, "%s(%lld): no dring for outbound lane on" 6836 " channel %d", __func__, ldcp->ldc_id, ldcp->ldc_id); 6837 freemsg(mp); 6838 return (LDC_TX_FAILURE); 6839 } 6840 6841 size = msgsize(mp); 6842 if (size > (size_t)ETHERMAX) { 6843 RW_EXIT(&ldcp->lane_out.dlistrw); 6844 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 6845 ldcp->ldc_id, size); 6846 freemsg(mp); 6847 return (LDC_TX_FAILURE); 6848 } 6849 6850 /* 6851 * Find a free descriptor 6852 * 6853 * Note: for the moment we are assuming that we will only 6854 * have one dring going from the switch to each of its 6855 * peers. This may change in the future. 6856 */ 6857 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 6858 D2(vswp, "%s(%lld): no descriptor available for ring " 6859 "at 0x%llx", __func__, ldcp->ldc_id, dp); 6860 6861 /* nothing more we can do */ 6862 status = LDC_TX_NORESOURCES; 6863 goto vsw_dringsend_free_exit; 6864 } else { 6865 D2(vswp, "%s(%lld): free private descriptor found at pos " 6866 "%ld addr 0x%llx\n", __func__, ldcp->ldc_id, idx, 6867 priv_desc); 6868 } 6869 6870 /* copy data into the descriptor */ 6871 bufp = priv_desc->datap; 6872 bufp += VNET_IPALIGN; 6873 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 6874 n = MBLKL(bp); 6875 bcopy(bp->b_rptr, bufp, n); 6876 bufp += n; 6877 } 6878 6879 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 6880 6881 pub = priv_desc->descp; 6882 pub->nbytes = priv_desc->datalen; 6883 6884 mutex_enter(&priv_desc->dstate_lock); 6885 pub->hdr.dstate = VIO_DESC_READY; 6886 mutex_exit(&priv_desc->dstate_lock); 6887 6888 /* 6889 * Determine whether or not we need to send a message to our 6890 * peer prompting them to read our newly updated descriptor(s). 6891 */ 6892 mutex_enter(&dp->restart_lock); 6893 if (dp->restart_reqd) { 6894 dp->restart_reqd = B_FALSE; 6895 mutex_exit(&dp->restart_lock); 6896 6897 /* 6898 * Send a vio_dring_msg to peer to prompt them to read 6899 * the updated descriptor ring. 6900 */ 6901 dring_pkt.tag.vio_msgtype = VIO_TYPE_DATA; 6902 dring_pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 6903 dring_pkt.tag.vio_subtype_env = VIO_DRING_DATA; 6904 dring_pkt.tag.vio_sid = ldcp->local_session; 6905 6906 /* Note - for now using first ring */ 6907 dring_pkt.dring_ident = dp->ident; 6908 6909 mutex_enter(&ldcp->lane_out.seq_lock); 6910 dring_pkt.seq_num = ldcp->lane_out.seq_num++; 6911 mutex_exit(&ldcp->lane_out.seq_lock); 6912 6913 /* 6914 * If last_ack_recv is -1 then we know we've not 6915 * received any ack's yet, so this must be the first 6916 * msg sent, so set the start to the begining of the ring. 6917 */ 6918 mutex_enter(&dp->dlock); 6919 if (dp->last_ack_recv == -1) { 6920 dring_pkt.start_idx = 0; 6921 } else { 6922 dring_pkt.start_idx = (dp->last_ack_recv + 1) % 6923 dp->num_descriptors; 6924 } 6925 dring_pkt.end_idx = -1; 6926 mutex_exit(&dp->dlock); 6927 6928 D3(vswp, "%s(%lld): dring 0x%llx : ident 0x%llx\n", __func__, 6929 ldcp->ldc_id, dp, dring_pkt.dring_ident); 6930 D3(vswp, "%s(%lld): start %lld : end %lld : seq %lld\n", 6931 __func__, ldcp->ldc_id, dring_pkt.start_idx, 6932 dring_pkt.end_idx, dring_pkt.seq_num); 6933 6934 vsw_send_msg(ldcp, (void *)&dring_pkt, 6935 sizeof (vio_dring_msg_t)); 6936 } else { 6937 mutex_exit(&dp->restart_lock); 6938 D2(vswp, "%s(%lld): updating descp %d", __func__, 6939 ldcp->ldc_id, idx); 6940 } 6941 6942 vsw_dringsend_free_exit: 6943 6944 RW_EXIT(&ldcp->lane_out.dlistrw); 6945 6946 /* free the message block */ 6947 freemsg(mp); 6948 6949 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 6950 return (status); 6951 } 6952 6953 /* 6954 * Send an in-band descriptor message over ldc. 6955 */ 6956 static int 6957 vsw_descrsend(vsw_ldc_t *ldcp, mblk_t *mp) 6958 { 6959 vsw_t *vswp = ldcp->ldc_vswp; 6960 vnet_ibnd_desc_t ibnd_msg; 6961 vsw_private_desc_t *priv_desc = NULL; 6962 dring_info_t *dp = NULL; 6963 size_t n, size = 0; 6964 caddr_t bufp; 6965 mblk_t *bp; 6966 int idx, i; 6967 int status = LDC_TX_SUCCESS; 6968 static int warn_msg = 1; 6969 6970 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 6971 6972 ASSERT(mp != NULL); 6973 6974 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 6975 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 6976 DERR(vswp, "%s(%lld) status(%d) state (0x%llx), dropping pkt", 6977 __func__, ldcp->ldc_id, ldcp->ldc_status, 6978 ldcp->lane_out.lstate); 6979 freemsg(mp); 6980 return (LDC_TX_FAILURE); 6981 } 6982 6983 /* 6984 * only expect single dring to exist, which we use 6985 * as an internal buffer, rather than a transfer channel. 6986 */ 6987 READ_ENTER(&ldcp->lane_out.dlistrw); 6988 if ((dp = ldcp->lane_out.dringp) == NULL) { 6989 DERR(vswp, "%s(%lld): no dring for outbound lane", 6990 __func__, ldcp->ldc_id); 6991 DERR(vswp, "%s(%lld) status(%d) state (0x%llx)", 6992 __func__, ldcp->ldc_id, ldcp->ldc_status, 6993 ldcp->lane_out.lstate); 6994 RW_EXIT(&ldcp->lane_out.dlistrw); 6995 freemsg(mp); 6996 return (LDC_TX_FAILURE); 6997 } 6998 6999 size = msgsize(mp); 7000 if (size > (size_t)ETHERMAX) { 7001 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 7002 ldcp->ldc_id, size); 7003 freemsg(mp); 7004 return (LDC_TX_FAILURE); 7005 } 7006 7007 /* 7008 * Find a free descriptor in our buffer ring 7009 */ 7010 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 7011 if (warn_msg) { 7012 DERR(vswp, "%s(%lld): no descriptor available for ring " 7013 "at 0x%llx", __func__, ldcp->ldc_id, dp); 7014 warn_msg = 0; 7015 } 7016 7017 /* nothing more we can do */ 7018 status = LDC_TX_NORESOURCES; 7019 goto vsw_descrsend_free_exit; 7020 } else { 7021 D2(vswp, "%s(%lld): free private descriptor found at pos " 7022 "%ld addr 0x%x\n", __func__, ldcp->ldc_id, idx, 7023 priv_desc); 7024 warn_msg = 1; 7025 } 7026 7027 /* copy data into the descriptor */ 7028 bufp = priv_desc->datap; 7029 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 7030 n = MBLKL(bp); 7031 bcopy(bp->b_rptr, bufp, n); 7032 bufp += n; 7033 } 7034 7035 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 7036 7037 /* create and send the in-band descp msg */ 7038 ibnd_msg.hdr.tag.vio_msgtype = VIO_TYPE_DATA; 7039 ibnd_msg.hdr.tag.vio_subtype = VIO_SUBTYPE_INFO; 7040 ibnd_msg.hdr.tag.vio_subtype_env = VIO_DESC_DATA; 7041 ibnd_msg.hdr.tag.vio_sid = ldcp->local_session; 7042 7043 mutex_enter(&ldcp->lane_out.seq_lock); 7044 ibnd_msg.hdr.seq_num = ldcp->lane_out.seq_num++; 7045 mutex_exit(&ldcp->lane_out.seq_lock); 7046 7047 /* 7048 * Copy the mem cookies describing the data from the 7049 * private region of the descriptor ring into the inband 7050 * descriptor. 7051 */ 7052 for (i = 0; i < priv_desc->ncookies; i++) { 7053 bcopy(&priv_desc->memcookie[i], &ibnd_msg.memcookie[i], 7054 sizeof (ldc_mem_cookie_t)); 7055 } 7056 7057 ibnd_msg.hdr.desc_handle = idx; 7058 ibnd_msg.ncookies = priv_desc->ncookies; 7059 ibnd_msg.nbytes = size; 7060 7061 vsw_send_msg(ldcp, (void *)&ibnd_msg, sizeof (vnet_ibnd_desc_t)); 7062 7063 vsw_descrsend_free_exit: 7064 7065 RW_EXIT(&ldcp->lane_out.dlistrw); 7066 7067 /* free the allocated message blocks */ 7068 freemsg(mp); 7069 7070 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 7071 return (status); 7072 } 7073 7074 static void 7075 vsw_send_ver(void *arg) 7076 { 7077 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 7078 vsw_t *vswp = ldcp->ldc_vswp; 7079 lane_t *lp = &ldcp->lane_out; 7080 vio_ver_msg_t ver_msg; 7081 7082 D1(vswp, "%s enter", __func__); 7083 7084 ver_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 7085 ver_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 7086 ver_msg.tag.vio_subtype_env = VIO_VER_INFO; 7087 ver_msg.tag.vio_sid = ldcp->local_session; 7088 7089 ver_msg.ver_major = vsw_versions[0].ver_major; 7090 ver_msg.ver_minor = vsw_versions[0].ver_minor; 7091 ver_msg.dev_class = VDEV_NETWORK_SWITCH; 7092 7093 lp->lstate |= VSW_VER_INFO_SENT; 7094 lp->ver_major = ver_msg.ver_major; 7095 lp->ver_minor = ver_msg.ver_minor; 7096 7097 DUMP_TAG(ver_msg.tag); 7098 7099 vsw_send_msg(ldcp, &ver_msg, sizeof (vio_ver_msg_t)); 7100 7101 D1(vswp, "%s (%d): exit", __func__, ldcp->ldc_id); 7102 } 7103 7104 static void 7105 vsw_send_attr(vsw_ldc_t *ldcp) 7106 { 7107 vsw_t *vswp = ldcp->ldc_vswp; 7108 lane_t *lp = &ldcp->lane_out; 7109 vnet_attr_msg_t attr_msg; 7110 7111 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 7112 7113 /* 7114 * Subtype is set to INFO by default 7115 */ 7116 attr_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 7117 attr_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 7118 attr_msg.tag.vio_subtype_env = VIO_ATTR_INFO; 7119 attr_msg.tag.vio_sid = ldcp->local_session; 7120 7121 /* payload copied from default settings for lane */ 7122 attr_msg.mtu = lp->mtu; 7123 attr_msg.addr_type = lp->addr_type; 7124 attr_msg.xfer_mode = lp->xfer_mode; 7125 attr_msg.ack_freq = lp->xfer_mode; 7126 7127 READ_ENTER(&vswp->if_lockrw); 7128 bcopy(&(vswp->if_addr), &(attr_msg.addr), ETHERADDRL); 7129 RW_EXIT(&vswp->if_lockrw); 7130 7131 ldcp->lane_out.lstate |= VSW_ATTR_INFO_SENT; 7132 7133 DUMP_TAG(attr_msg.tag); 7134 7135 vsw_send_msg(ldcp, &attr_msg, sizeof (vnet_attr_msg_t)); 7136 7137 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 7138 } 7139 7140 /* 7141 * Create dring info msg (which also results in the creation of 7142 * a dring). 7143 */ 7144 static vio_dring_reg_msg_t * 7145 vsw_create_dring_info_pkt(vsw_ldc_t *ldcp) 7146 { 7147 vio_dring_reg_msg_t *mp; 7148 dring_info_t *dp; 7149 vsw_t *vswp = ldcp->ldc_vswp; 7150 7151 D1(vswp, "vsw_create_dring_info_pkt enter\n"); 7152 7153 /* 7154 * If we can't create a dring, obviously no point sending 7155 * a message. 7156 */ 7157 if ((dp = vsw_create_dring(ldcp)) == NULL) 7158 return (NULL); 7159 7160 mp = kmem_zalloc(sizeof (vio_dring_reg_msg_t), KM_SLEEP); 7161 7162 mp->tag.vio_msgtype = VIO_TYPE_CTRL; 7163 mp->tag.vio_subtype = VIO_SUBTYPE_INFO; 7164 mp->tag.vio_subtype_env = VIO_DRING_REG; 7165 mp->tag.vio_sid = ldcp->local_session; 7166 7167 /* payload */ 7168 mp->num_descriptors = dp->num_descriptors; 7169 mp->descriptor_size = dp->descriptor_size; 7170 mp->options = dp->options; 7171 mp->ncookies = dp->ncookies; 7172 bcopy(&dp->cookie[0], &mp->cookie[0], sizeof (ldc_mem_cookie_t)); 7173 7174 mp->dring_ident = 0; 7175 7176 D1(vswp, "vsw_create_dring_info_pkt exit\n"); 7177 7178 return (mp); 7179 } 7180 7181 static void 7182 vsw_send_dring_info(vsw_ldc_t *ldcp) 7183 { 7184 vio_dring_reg_msg_t *dring_msg; 7185 vsw_t *vswp = ldcp->ldc_vswp; 7186 7187 D1(vswp, "%s: (%ld) enter", __func__, ldcp->ldc_id); 7188 7189 dring_msg = vsw_create_dring_info_pkt(ldcp); 7190 if (dring_msg == NULL) { 7191 cmn_err(CE_WARN, "!vsw%d: %s: error creating msg", 7192 vswp->instance, __func__); 7193 return; 7194 } 7195 7196 ldcp->lane_out.lstate |= VSW_DRING_INFO_SENT; 7197 7198 DUMP_TAG_PTR((vio_msg_tag_t *)dring_msg); 7199 7200 vsw_send_msg(ldcp, dring_msg, 7201 sizeof (vio_dring_reg_msg_t)); 7202 7203 kmem_free(dring_msg, sizeof (vio_dring_reg_msg_t)); 7204 7205 D1(vswp, "%s: (%ld) exit", __func__, ldcp->ldc_id); 7206 } 7207 7208 static void 7209 vsw_send_rdx(vsw_ldc_t *ldcp) 7210 { 7211 vsw_t *vswp = ldcp->ldc_vswp; 7212 vio_rdx_msg_t rdx_msg; 7213 7214 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 7215 7216 rdx_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 7217 rdx_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 7218 rdx_msg.tag.vio_subtype_env = VIO_RDX; 7219 rdx_msg.tag.vio_sid = ldcp->local_session; 7220 7221 ldcp->lane_out.lstate |= VSW_RDX_INFO_SENT; 7222 7223 DUMP_TAG(rdx_msg.tag); 7224 7225 vsw_send_msg(ldcp, &rdx_msg, sizeof (vio_rdx_msg_t)); 7226 7227 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 7228 } 7229 7230 /* 7231 * Generic routine to send message out over ldc channel. 7232 */ 7233 static void 7234 vsw_send_msg(vsw_ldc_t *ldcp, void *msgp, int size) 7235 { 7236 int rv; 7237 size_t msglen = size; 7238 vio_msg_tag_t *tag = (vio_msg_tag_t *)msgp; 7239 vsw_t *vswp = ldcp->ldc_vswp; 7240 7241 D1(vswp, "vsw_send_msg (%lld) enter : sending %d bytes", 7242 ldcp->ldc_id, size); 7243 7244 D2(vswp, "send_msg: type 0x%llx", tag->vio_msgtype); 7245 D2(vswp, "send_msg: stype 0x%llx", tag->vio_subtype); 7246 D2(vswp, "send_msg: senv 0x%llx", tag->vio_subtype_env); 7247 7248 mutex_enter(&ldcp->ldc_txlock); 7249 do { 7250 msglen = size; 7251 rv = ldc_write(ldcp->ldc_handle, (caddr_t)msgp, &msglen); 7252 } while (rv == EWOULDBLOCK && --vsw_wretries > 0); 7253 7254 if ((rv != 0) || (msglen != size)) { 7255 DERR(vswp, "vsw_send_msg:ldc_write failed: chan(%lld) " 7256 "rv(%d) size (%d) msglen(%d)\n", ldcp->ldc_id, 7257 rv, size, msglen); 7258 } 7259 mutex_exit(&ldcp->ldc_txlock); 7260 7261 /* channel has been reset */ 7262 if (rv == ECONNRESET) { 7263 vsw_handle_reset(ldcp); 7264 } 7265 7266 D1(vswp, "vsw_send_msg (%lld) exit : sent %d bytes", 7267 ldcp->ldc_id, msglen); 7268 } 7269 7270 /* 7271 * Add an entry into FDB, for the given mac address and port_id. 7272 * Returns 0 on success, 1 on failure. 7273 * 7274 * Lock protecting FDB must be held by calling process. 7275 */ 7276 static int 7277 vsw_add_fdb(vsw_t *vswp, vsw_port_t *port) 7278 { 7279 uint64_t addr = 0; 7280 7281 D1(vswp, "%s: enter", __func__); 7282 7283 KEY_HASH(addr, port->p_macaddr); 7284 7285 D2(vswp, "%s: key = 0x%llx", __func__, addr); 7286 7287 /* 7288 * Note: duplicate keys will be rejected by mod_hash. 7289 */ 7290 if (mod_hash_insert(vswp->fdb, (mod_hash_key_t)addr, 7291 (mod_hash_val_t)port) != 0) { 7292 DERR(vswp, "%s: unable to add entry into fdb.", __func__); 7293 return (1); 7294 } 7295 7296 D1(vswp, "%s: exit", __func__); 7297 return (0); 7298 } 7299 7300 /* 7301 * Remove an entry from FDB. 7302 * Returns 0 on success, 1 on failure. 7303 */ 7304 static int 7305 vsw_del_fdb(vsw_t *vswp, vsw_port_t *port) 7306 { 7307 uint64_t addr = 0; 7308 7309 D1(vswp, "%s: enter", __func__); 7310 7311 KEY_HASH(addr, port->p_macaddr); 7312 7313 D2(vswp, "%s: key = 0x%llx", __func__, addr); 7314 7315 (void) mod_hash_destroy(vswp->fdb, (mod_hash_val_t)addr); 7316 7317 D1(vswp, "%s: enter", __func__); 7318 7319 return (0); 7320 } 7321 7322 /* 7323 * Search fdb for a given mac address. 7324 * Returns pointer to the entry if found, else returns NULL. 7325 */ 7326 static vsw_port_t * 7327 vsw_lookup_fdb(vsw_t *vswp, struct ether_header *ehp) 7328 { 7329 uint64_t key = 0; 7330 vsw_port_t *port = NULL; 7331 7332 D1(vswp, "%s: enter", __func__); 7333 7334 KEY_HASH(key, ehp->ether_dhost); 7335 7336 D2(vswp, "%s: key = 0x%llx", __func__, key); 7337 7338 if (mod_hash_find(vswp->fdb, (mod_hash_key_t)key, 7339 (mod_hash_val_t *)&port) != 0) { 7340 D2(vswp, "%s: no port found", __func__); 7341 return (NULL); 7342 } 7343 7344 D1(vswp, "%s: exit", __func__); 7345 7346 return (port); 7347 } 7348 7349 /* 7350 * Add or remove multicast address(es). 7351 * 7352 * Returns 0 on success, 1 on failure. 7353 */ 7354 static int 7355 vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port) 7356 { 7357 mcst_addr_t *mcst_p = NULL; 7358 vsw_t *vswp = port->p_vswp; 7359 uint64_t addr = 0x0; 7360 int i; 7361 7362 D1(vswp, "%s: enter", __func__); 7363 7364 D2(vswp, "%s: %d addresses", __func__, mcst_pkt->count); 7365 7366 mutex_enter(&vswp->mac_lock); 7367 if (vswp->mh == NULL) { 7368 mutex_exit(&vswp->mac_lock); 7369 return (1); 7370 } 7371 mutex_exit(&vswp->mac_lock); 7372 7373 for (i = 0; i < mcst_pkt->count; i++) { 7374 /* 7375 * Convert address into form that can be used 7376 * as hash table key. 7377 */ 7378 KEY_HASH(addr, mcst_pkt->mca[i]); 7379 7380 /* 7381 * Add or delete the specified address/port combination. 7382 */ 7383 if (mcst_pkt->set == 0x1) { 7384 D3(vswp, "%s: adding multicast address 0x%llx for " 7385 "port %ld", __func__, addr, port->p_instance); 7386 if (vsw_add_mcst(vswp, VSW_VNETPORT, addr, port) == 0) { 7387 /* 7388 * Update the list of multicast 7389 * addresses contained within the 7390 * port structure to include this new 7391 * one. 7392 */ 7393 mcst_p = kmem_alloc(sizeof (mcst_addr_t), 7394 KM_NOSLEEP); 7395 if (mcst_p == NULL) { 7396 DERR(vswp, "%s: unable to alloc mem", 7397 __func__); 7398 return (1); 7399 } 7400 7401 mcst_p->nextp = NULL; 7402 mcst_p->addr = addr; 7403 7404 mutex_enter(&port->mca_lock); 7405 mcst_p->nextp = port->mcap; 7406 port->mcap = mcst_p; 7407 mutex_exit(&port->mca_lock); 7408 7409 /* 7410 * Program the address into HW. If the addr 7411 * has already been programmed then the MAC 7412 * just increments a ref counter (which is 7413 * used when the address is being deleted) 7414 */ 7415 mutex_enter(&vswp->mac_lock); 7416 if ((vswp->mh == NULL) || 7417 mac_multicst_add(vswp->mh, 7418 (uchar_t *)&mcst_pkt->mca[i])) { 7419 mutex_exit(&vswp->mac_lock); 7420 cmn_err(CE_WARN, "!vsw%d: unable to " 7421 "add multicast address", 7422 vswp->instance); 7423 (void) vsw_del_mcst(vswp, VSW_VNETPORT, 7424 addr, port); 7425 vsw_del_addr(VSW_VNETPORT, port, addr); 7426 return (1); 7427 } 7428 mutex_exit(&vswp->mac_lock); 7429 7430 } else { 7431 DERR(vswp, "%s: error adding multicast " 7432 "address 0x%llx for port %ld", 7433 __func__, addr, port->p_instance); 7434 return (1); 7435 } 7436 } else { 7437 /* 7438 * Delete an entry from the multicast hash 7439 * table and update the address list 7440 * appropriately. 7441 */ 7442 if (vsw_del_mcst(vswp, VSW_VNETPORT, addr, port) == 0) { 7443 D3(vswp, "%s: deleting multicast address " 7444 "0x%llx for port %ld", __func__, addr, 7445 port->p_instance); 7446 7447 vsw_del_addr(VSW_VNETPORT, port, addr); 7448 7449 /* 7450 * Remove the address from HW. The address 7451 * will actually only be removed once the ref 7452 * count within the MAC layer has dropped to 7453 * zero. I.e. we can safely call this fn even 7454 * if other ports are interested in this 7455 * address. 7456 */ 7457 mutex_enter(&vswp->mac_lock); 7458 if ((vswp->mh == NULL) || 7459 mac_multicst_remove(vswp->mh, 7460 (uchar_t *)&mcst_pkt->mca[i])) { 7461 mutex_exit(&vswp->mac_lock); 7462 cmn_err(CE_WARN, "!vsw%d: unable to " 7463 "remove multicast address", 7464 vswp->instance); 7465 return (1); 7466 } 7467 mutex_exit(&vswp->mac_lock); 7468 7469 } else { 7470 DERR(vswp, "%s: error deleting multicast " 7471 "addr 0x%llx for port %ld", 7472 __func__, addr, port->p_instance); 7473 return (1); 7474 } 7475 } 7476 } 7477 D1(vswp, "%s: exit", __func__); 7478 return (0); 7479 } 7480 7481 /* 7482 * Add a new multicast entry. 7483 * 7484 * Search hash table based on address. If match found then 7485 * update associated val (which is chain of ports), otherwise 7486 * create new key/val (addr/port) pair and insert into table. 7487 */ 7488 static int 7489 vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg) 7490 { 7491 int dup = 0; 7492 int rv = 0; 7493 mfdb_ent_t *ment = NULL; 7494 mfdb_ent_t *tmp_ent = NULL; 7495 mfdb_ent_t *new_ent = NULL; 7496 void *tgt = NULL; 7497 7498 if (devtype == VSW_VNETPORT) { 7499 /* 7500 * Being invoked from a vnet. 7501 */ 7502 ASSERT(arg != NULL); 7503 tgt = arg; 7504 D2(NULL, "%s: port %d : address 0x%llx", __func__, 7505 ((vsw_port_t *)arg)->p_instance, addr); 7506 } else { 7507 /* 7508 * We are being invoked via the m_multicst mac entry 7509 * point. 7510 */ 7511 D2(NULL, "%s: address 0x%llx", __func__, addr); 7512 tgt = (void *)vswp; 7513 } 7514 7515 WRITE_ENTER(&vswp->mfdbrw); 7516 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr, 7517 (mod_hash_val_t *)&ment) != 0) { 7518 7519 /* address not currently in table */ 7520 ment = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP); 7521 ment->d_addr = (void *)tgt; 7522 ment->d_type = devtype; 7523 ment->nextp = NULL; 7524 7525 if (mod_hash_insert(vswp->mfdb, (mod_hash_key_t)addr, 7526 (mod_hash_val_t)ment) != 0) { 7527 DERR(vswp, "%s: hash table insertion failed", __func__); 7528 kmem_free(ment, sizeof (mfdb_ent_t)); 7529 rv = 1; 7530 } else { 7531 D2(vswp, "%s: added initial entry for 0x%llx to " 7532 "table", __func__, addr); 7533 } 7534 } else { 7535 /* 7536 * Address in table. Check to see if specified port 7537 * is already associated with the address. If not add 7538 * it now. 7539 */ 7540 tmp_ent = ment; 7541 while (tmp_ent != NULL) { 7542 if (tmp_ent->d_addr == (void *)tgt) { 7543 if (devtype == VSW_VNETPORT) { 7544 DERR(vswp, "%s: duplicate port entry " 7545 "found for portid %ld and key " 7546 "0x%llx", __func__, 7547 ((vsw_port_t *)arg)->p_instance, 7548 addr); 7549 } else { 7550 DERR(vswp, "%s: duplicate entry found" 7551 "for key 0x%llx", 7552 __func__, addr); 7553 } 7554 rv = 1; 7555 dup = 1; 7556 break; 7557 } 7558 tmp_ent = tmp_ent->nextp; 7559 } 7560 7561 /* 7562 * Port not on list so add it to end now. 7563 */ 7564 if (0 == dup) { 7565 D2(vswp, "%s: added entry for 0x%llx to table", 7566 __func__, addr); 7567 new_ent = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP); 7568 new_ent->d_addr = (void *)tgt; 7569 new_ent->d_type = devtype; 7570 new_ent->nextp = NULL; 7571 7572 tmp_ent = ment; 7573 while (tmp_ent->nextp != NULL) 7574 tmp_ent = tmp_ent->nextp; 7575 7576 tmp_ent->nextp = new_ent; 7577 } 7578 } 7579 7580 RW_EXIT(&vswp->mfdbrw); 7581 return (rv); 7582 } 7583 7584 /* 7585 * Remove a multicast entry from the hashtable. 7586 * 7587 * Search hash table based on address. If match found, scan 7588 * list of ports associated with address. If specified port 7589 * found remove it from list. 7590 */ 7591 static int 7592 vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg) 7593 { 7594 mfdb_ent_t *ment = NULL; 7595 mfdb_ent_t *curr_p, *prev_p; 7596 void *tgt = NULL; 7597 7598 D1(vswp, "%s: enter", __func__); 7599 7600 if (devtype == VSW_VNETPORT) { 7601 tgt = (vsw_port_t *)arg; 7602 D2(vswp, "%s: removing port %d from mFDB for address" 7603 " 0x%llx", __func__, ((vsw_port_t *)tgt)->p_instance, 7604 addr); 7605 } else { 7606 D2(vswp, "%s: removing entry", __func__); 7607 tgt = (void *)vswp; 7608 } 7609 7610 WRITE_ENTER(&vswp->mfdbrw); 7611 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr, 7612 (mod_hash_val_t *)&ment) != 0) { 7613 D2(vswp, "%s: address 0x%llx not in table", __func__, addr); 7614 RW_EXIT(&vswp->mfdbrw); 7615 return (1); 7616 } 7617 7618 prev_p = curr_p = ment; 7619 7620 while (curr_p != NULL) { 7621 if (curr_p->d_addr == (void *)tgt) { 7622 if (devtype == VSW_VNETPORT) { 7623 D2(vswp, "%s: port %d found", __func__, 7624 ((vsw_port_t *)tgt)->p_instance); 7625 } else { 7626 D2(vswp, "%s: instance found", __func__); 7627 } 7628 7629 if (prev_p == curr_p) { 7630 /* 7631 * head of list, if no other element is in 7632 * list then destroy this entry, otherwise 7633 * just replace it with updated value. 7634 */ 7635 ment = curr_p->nextp; 7636 kmem_free(curr_p, sizeof (mfdb_ent_t)); 7637 if (ment == NULL) { 7638 (void) mod_hash_destroy(vswp->mfdb, 7639 (mod_hash_val_t)addr); 7640 } else { 7641 (void) mod_hash_replace(vswp->mfdb, 7642 (mod_hash_key_t)addr, 7643 (mod_hash_val_t)ment); 7644 } 7645 } else { 7646 /* 7647 * Not head of list, no need to do 7648 * replacement, just adjust list pointers. 7649 */ 7650 prev_p->nextp = curr_p->nextp; 7651 kmem_free(curr_p, sizeof (mfdb_ent_t)); 7652 } 7653 break; 7654 } 7655 7656 prev_p = curr_p; 7657 curr_p = curr_p->nextp; 7658 } 7659 7660 RW_EXIT(&vswp->mfdbrw); 7661 7662 D1(vswp, "%s: exit", __func__); 7663 7664 return (0); 7665 } 7666 7667 /* 7668 * Port is being deleted, but has registered an interest in one 7669 * or more multicast groups. Using the list of addresses maintained 7670 * within the port structure find the appropriate entry in the hash 7671 * table and remove this port from the list of interested ports. 7672 */ 7673 static void 7674 vsw_del_mcst_port(vsw_port_t *port) 7675 { 7676 mcst_addr_t *mcst_p = NULL; 7677 vsw_t *vswp = port->p_vswp; 7678 7679 D1(vswp, "%s: enter", __func__); 7680 7681 mutex_enter(&port->mca_lock); 7682 while (port->mcap != NULL) { 7683 (void) vsw_del_mcst(vswp, VSW_VNETPORT, 7684 port->mcap->addr, port); 7685 7686 mcst_p = port->mcap->nextp; 7687 kmem_free(port->mcap, sizeof (mcst_addr_t)); 7688 port->mcap = mcst_p; 7689 } 7690 mutex_exit(&port->mca_lock); 7691 7692 D1(vswp, "%s: exit", __func__); 7693 } 7694 7695 /* 7696 * This vsw instance is detaching, but has registered an interest in one 7697 * or more multicast groups. Using the list of addresses maintained 7698 * within the vsw structure find the appropriate entry in the hash 7699 * table and remove this instance from the list of interested ports. 7700 */ 7701 static void 7702 vsw_del_mcst_vsw(vsw_t *vswp) 7703 { 7704 mcst_addr_t *next_p = NULL; 7705 7706 D1(vswp, "%s: enter", __func__); 7707 7708 mutex_enter(&vswp->mca_lock); 7709 7710 while (vswp->mcap != NULL) { 7711 DERR(vswp, "%s: deleting addr 0x%llx", 7712 __func__, vswp->mcap->addr); 7713 (void) vsw_del_mcst(vswp, VSW_LOCALDEV, 7714 vswp->mcap->addr, NULL); 7715 7716 next_p = vswp->mcap->nextp; 7717 kmem_free(vswp->mcap, sizeof (mcst_addr_t)); 7718 vswp->mcap = next_p; 7719 } 7720 7721 vswp->mcap = NULL; 7722 mutex_exit(&vswp->mca_lock); 7723 7724 D1(vswp, "%s: exit", __func__); 7725 } 7726 7727 7728 /* 7729 * Remove the specified address from the list of address maintained 7730 * in this port node. 7731 */ 7732 static void 7733 vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr) 7734 { 7735 vsw_t *vswp = NULL; 7736 vsw_port_t *port = NULL; 7737 mcst_addr_t *prev_p = NULL; 7738 mcst_addr_t *curr_p = NULL; 7739 7740 D1(NULL, "%s: enter : devtype %d : addr 0x%llx", 7741 __func__, devtype, addr); 7742 7743 if (devtype == VSW_VNETPORT) { 7744 port = (vsw_port_t *)arg; 7745 mutex_enter(&port->mca_lock); 7746 prev_p = curr_p = port->mcap; 7747 } else { 7748 vswp = (vsw_t *)arg; 7749 mutex_enter(&vswp->mca_lock); 7750 prev_p = curr_p = vswp->mcap; 7751 } 7752 7753 while (curr_p != NULL) { 7754 if (curr_p->addr == addr) { 7755 D2(NULL, "%s: address found", __func__); 7756 /* match found */ 7757 if (prev_p == curr_p) { 7758 /* list head */ 7759 if (devtype == VSW_VNETPORT) 7760 port->mcap = curr_p->nextp; 7761 else 7762 vswp->mcap = curr_p->nextp; 7763 } else { 7764 prev_p->nextp = curr_p->nextp; 7765 } 7766 kmem_free(curr_p, sizeof (mcst_addr_t)); 7767 break; 7768 } else { 7769 prev_p = curr_p; 7770 curr_p = curr_p->nextp; 7771 } 7772 } 7773 7774 if (devtype == VSW_VNETPORT) 7775 mutex_exit(&port->mca_lock); 7776 else 7777 mutex_exit(&vswp->mca_lock); 7778 7779 D1(NULL, "%s: exit", __func__); 7780 } 7781 7782 /* 7783 * Creates a descriptor ring (dring) and links it into the 7784 * link of outbound drings for this channel. 7785 * 7786 * Returns NULL if creation failed. 7787 */ 7788 static dring_info_t * 7789 vsw_create_dring(vsw_ldc_t *ldcp) 7790 { 7791 vsw_private_desc_t *priv_addr = NULL; 7792 vsw_t *vswp = ldcp->ldc_vswp; 7793 ldc_mem_info_t minfo; 7794 dring_info_t *dp, *tp; 7795 int i; 7796 7797 dp = (dring_info_t *)kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 7798 7799 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 7800 7801 /* create public section of ring */ 7802 if ((ldc_mem_dring_create(VSW_RING_NUM_EL, 7803 VSW_PUB_SIZE, &dp->handle)) != 0) { 7804 7805 DERR(vswp, "vsw_create_dring(%lld): ldc dring create " 7806 "failed", ldcp->ldc_id); 7807 goto create_fail_exit; 7808 } 7809 7810 ASSERT(dp->handle != NULL); 7811 7812 /* 7813 * Get the base address of the public section of the ring. 7814 */ 7815 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 7816 DERR(vswp, "vsw_create_dring(%lld): dring info failed\n", 7817 ldcp->ldc_id); 7818 goto dring_fail_exit; 7819 } else { 7820 ASSERT(minfo.vaddr != 0); 7821 dp->pub_addr = minfo.vaddr; 7822 } 7823 7824 dp->num_descriptors = VSW_RING_NUM_EL; 7825 dp->descriptor_size = VSW_PUB_SIZE; 7826 dp->options = VIO_TX_DRING; 7827 dp->ncookies = 1; /* guaranteed by ldc */ 7828 7829 /* 7830 * create private portion of ring 7831 */ 7832 dp->priv_addr = (vsw_private_desc_t *)kmem_zalloc( 7833 (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL), KM_SLEEP); 7834 7835 if (vsw_setup_ring(ldcp, dp)) { 7836 DERR(vswp, "%s: unable to setup ring", __func__); 7837 goto dring_fail_exit; 7838 } 7839 7840 /* haven't used any descriptors yet */ 7841 dp->end_idx = 0; 7842 dp->last_ack_recv = -1; 7843 7844 /* bind dring to the channel */ 7845 if ((ldc_mem_dring_bind(ldcp->ldc_handle, dp->handle, 7846 LDC_SHADOW_MAP, LDC_MEM_RW, 7847 &dp->cookie[0], &dp->ncookies)) != 0) { 7848 DERR(vswp, "vsw_create_dring: unable to bind to channel " 7849 "%lld", ldcp->ldc_id); 7850 goto dring_fail_exit; 7851 } 7852 7853 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 7854 dp->restart_reqd = B_TRUE; 7855 7856 /* 7857 * Only ever create rings for outgoing lane. Link it onto 7858 * end of list. 7859 */ 7860 WRITE_ENTER(&ldcp->lane_out.dlistrw); 7861 if (ldcp->lane_out.dringp == NULL) { 7862 D2(vswp, "vsw_create_dring: adding first outbound ring"); 7863 ldcp->lane_out.dringp = dp; 7864 } else { 7865 tp = ldcp->lane_out.dringp; 7866 while (tp->next != NULL) 7867 tp = tp->next; 7868 7869 tp->next = dp; 7870 } 7871 RW_EXIT(&ldcp->lane_out.dlistrw); 7872 7873 return (dp); 7874 7875 dring_fail_exit: 7876 (void) ldc_mem_dring_destroy(dp->handle); 7877 7878 create_fail_exit: 7879 if (dp->priv_addr != NULL) { 7880 priv_addr = dp->priv_addr; 7881 for (i = 0; i < VSW_RING_NUM_EL; i++) { 7882 if (priv_addr->memhandle != NULL) 7883 (void) ldc_mem_free_handle( 7884 priv_addr->memhandle); 7885 priv_addr++; 7886 } 7887 kmem_free(dp->priv_addr, 7888 (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL)); 7889 } 7890 mutex_destroy(&dp->dlock); 7891 7892 kmem_free(dp, sizeof (dring_info_t)); 7893 return (NULL); 7894 } 7895 7896 /* 7897 * Create a ring consisting of just a private portion and link 7898 * it into the list of rings for the outbound lane. 7899 * 7900 * These type of rings are used primarily for temporary data 7901 * storage (i.e. as data buffers). 7902 */ 7903 void 7904 vsw_create_privring(vsw_ldc_t *ldcp) 7905 { 7906 dring_info_t *dp, *tp; 7907 vsw_t *vswp = ldcp->ldc_vswp; 7908 7909 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 7910 7911 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 7912 7913 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 7914 7915 /* no public section */ 7916 dp->pub_addr = NULL; 7917 7918 dp->priv_addr = kmem_zalloc((sizeof (vsw_private_desc_t) * 7919 VSW_RING_NUM_EL), KM_SLEEP); 7920 7921 dp->num_descriptors = VSW_RING_NUM_EL; 7922 7923 if (vsw_setup_ring(ldcp, dp)) { 7924 DERR(vswp, "%s: setup of ring failed", __func__); 7925 kmem_free(dp->priv_addr, 7926 (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL)); 7927 mutex_destroy(&dp->dlock); 7928 kmem_free(dp, sizeof (dring_info_t)); 7929 return; 7930 } 7931 7932 /* haven't used any descriptors yet */ 7933 dp->end_idx = 0; 7934 7935 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 7936 dp->restart_reqd = B_TRUE; 7937 7938 /* 7939 * Only ever create rings for outgoing lane. Link it onto 7940 * end of list. 7941 */ 7942 WRITE_ENTER(&ldcp->lane_out.dlistrw); 7943 if (ldcp->lane_out.dringp == NULL) { 7944 D2(vswp, "%s: adding first outbound privring", __func__); 7945 ldcp->lane_out.dringp = dp; 7946 } else { 7947 tp = ldcp->lane_out.dringp; 7948 while (tp->next != NULL) 7949 tp = tp->next; 7950 7951 tp->next = dp; 7952 } 7953 RW_EXIT(&ldcp->lane_out.dlistrw); 7954 7955 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 7956 } 7957 7958 /* 7959 * Setup the descriptors in the dring. Returns 0 on success, 1 on 7960 * failure. 7961 */ 7962 int 7963 vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp) 7964 { 7965 vnet_public_desc_t *pub_addr = NULL; 7966 vsw_private_desc_t *priv_addr = NULL; 7967 vsw_t *vswp = ldcp->ldc_vswp; 7968 uint64_t *tmpp; 7969 uint64_t offset = 0; 7970 uint32_t ncookies = 0; 7971 static char *name = "vsw_setup_ring"; 7972 int i, j, nc, rv; 7973 7974 priv_addr = dp->priv_addr; 7975 pub_addr = dp->pub_addr; 7976 7977 /* public section may be null but private should never be */ 7978 ASSERT(priv_addr != NULL); 7979 7980 /* 7981 * Allocate the region of memory which will be used to hold 7982 * the data the descriptors will refer to. 7983 */ 7984 dp->data_sz = (VSW_RING_NUM_EL * VSW_RING_EL_DATA_SZ); 7985 dp->data_addr = kmem_alloc(dp->data_sz, KM_SLEEP); 7986 7987 D2(vswp, "%s: allocated %lld bytes at 0x%llx\n", name, 7988 dp->data_sz, dp->data_addr); 7989 7990 tmpp = (uint64_t *)dp->data_addr; 7991 offset = VSW_RING_EL_DATA_SZ / sizeof (tmpp); 7992 7993 /* 7994 * Initialise some of the private and public (if they exist) 7995 * descriptor fields. 7996 */ 7997 for (i = 0; i < VSW_RING_NUM_EL; i++) { 7998 mutex_init(&priv_addr->dstate_lock, NULL, MUTEX_DRIVER, NULL); 7999 8000 if ((ldc_mem_alloc_handle(ldcp->ldc_handle, 8001 &priv_addr->memhandle)) != 0) { 8002 DERR(vswp, "%s: alloc mem handle failed", name); 8003 goto setup_ring_cleanup; 8004 } 8005 8006 priv_addr->datap = (void *)tmpp; 8007 8008 rv = ldc_mem_bind_handle(priv_addr->memhandle, 8009 (caddr_t)priv_addr->datap, VSW_RING_EL_DATA_SZ, 8010 LDC_SHADOW_MAP, LDC_MEM_R|LDC_MEM_W, 8011 &(priv_addr->memcookie[0]), &ncookies); 8012 if (rv != 0) { 8013 DERR(vswp, "%s(%lld): ldc_mem_bind_handle failed " 8014 "(rv %d)", name, ldcp->ldc_id, rv); 8015 goto setup_ring_cleanup; 8016 } 8017 priv_addr->bound = 1; 8018 8019 D2(vswp, "%s: %d: memcookie 0 : addr 0x%llx : size 0x%llx", 8020 name, i, priv_addr->memcookie[0].addr, 8021 priv_addr->memcookie[0].size); 8022 8023 if (ncookies >= (uint32_t)(VSW_MAX_COOKIES + 1)) { 8024 DERR(vswp, "%s(%lld) ldc_mem_bind_handle returned " 8025 "invalid num of cookies (%d) for size 0x%llx", 8026 name, ldcp->ldc_id, ncookies, 8027 VSW_RING_EL_DATA_SZ); 8028 8029 goto setup_ring_cleanup; 8030 } else { 8031 for (j = 1; j < ncookies; j++) { 8032 rv = ldc_mem_nextcookie(priv_addr->memhandle, 8033 &(priv_addr->memcookie[j])); 8034 if (rv != 0) { 8035 DERR(vswp, "%s: ldc_mem_nextcookie " 8036 "failed rv (%d)", name, rv); 8037 goto setup_ring_cleanup; 8038 } 8039 D3(vswp, "%s: memcookie %d : addr 0x%llx : " 8040 "size 0x%llx", name, j, 8041 priv_addr->memcookie[j].addr, 8042 priv_addr->memcookie[j].size); 8043 } 8044 8045 } 8046 priv_addr->ncookies = ncookies; 8047 priv_addr->dstate = VIO_DESC_FREE; 8048 8049 if (pub_addr != NULL) { 8050 8051 /* link pub and private sides */ 8052 priv_addr->descp = pub_addr; 8053 8054 pub_addr->ncookies = priv_addr->ncookies; 8055 8056 for (nc = 0; nc < pub_addr->ncookies; nc++) { 8057 bcopy(&priv_addr->memcookie[nc], 8058 &pub_addr->memcookie[nc], 8059 sizeof (ldc_mem_cookie_t)); 8060 } 8061 8062 pub_addr->hdr.dstate = VIO_DESC_FREE; 8063 pub_addr++; 8064 } 8065 8066 /* 8067 * move to next element in the dring and the next 8068 * position in the data buffer. 8069 */ 8070 priv_addr++; 8071 tmpp += offset; 8072 } 8073 8074 return (0); 8075 8076 setup_ring_cleanup: 8077 priv_addr = dp->priv_addr; 8078 8079 for (j = 0; j < i; j++) { 8080 (void) ldc_mem_unbind_handle(priv_addr->memhandle); 8081 (void) ldc_mem_free_handle(priv_addr->memhandle); 8082 8083 mutex_destroy(&priv_addr->dstate_lock); 8084 8085 priv_addr++; 8086 } 8087 kmem_free(dp->data_addr, dp->data_sz); 8088 8089 return (1); 8090 } 8091 8092 /* 8093 * Searches the private section of a ring for a free descriptor, 8094 * starting at the location of the last free descriptor found 8095 * previously. 8096 * 8097 * Returns 0 if free descriptor is available, and updates state 8098 * of private descriptor to VIO_DESC_READY, otherwise returns 1. 8099 * 8100 * FUTURE: might need to return contiguous range of descriptors 8101 * as dring info msg assumes all will be contiguous. 8102 */ 8103 static int 8104 vsw_dring_find_free_desc(dring_info_t *dringp, 8105 vsw_private_desc_t **priv_p, int *idx) 8106 { 8107 vsw_private_desc_t *addr = NULL; 8108 int num = VSW_RING_NUM_EL; 8109 int ret = 1; 8110 8111 D1(NULL, "%s enter\n", __func__); 8112 8113 ASSERT(dringp->priv_addr != NULL); 8114 8115 D2(NULL, "%s: searching ring, dringp 0x%llx : start pos %lld", 8116 __func__, dringp, dringp->end_idx); 8117 8118 addr = (vsw_private_desc_t *)dringp->priv_addr + dringp->end_idx; 8119 8120 mutex_enter(&addr->dstate_lock); 8121 if (addr->dstate == VIO_DESC_FREE) { 8122 addr->dstate = VIO_DESC_READY; 8123 *priv_p = addr; 8124 *idx = dringp->end_idx; 8125 dringp->end_idx = (dringp->end_idx + 1) % num; 8126 ret = 0; 8127 8128 } 8129 mutex_exit(&addr->dstate_lock); 8130 8131 /* ring full */ 8132 if (ret == 1) { 8133 D2(NULL, "%s: no desp free: started at %d", __func__, 8134 dringp->end_idx); 8135 } 8136 8137 D1(NULL, "%s: exit\n", __func__); 8138 8139 return (ret); 8140 } 8141 8142 /* 8143 * Map from a dring identifier to the ring itself. Returns 8144 * pointer to ring or NULL if no match found. 8145 * 8146 * Should be called with dlistrw rwlock held as reader. 8147 */ 8148 static dring_info_t * 8149 vsw_ident2dring(lane_t *lane, uint64_t ident) 8150 { 8151 dring_info_t *dp = NULL; 8152 8153 if ((dp = lane->dringp) == NULL) { 8154 return (NULL); 8155 } else { 8156 if (dp->ident == ident) 8157 return (dp); 8158 8159 while (dp != NULL) { 8160 if (dp->ident == ident) 8161 break; 8162 dp = dp->next; 8163 } 8164 } 8165 8166 return (dp); 8167 } 8168 8169 /* 8170 * Set the default lane attributes. These are copied into 8171 * the attr msg we send to our peer. If they are not acceptable 8172 * then (currently) the handshake ends. 8173 */ 8174 static void 8175 vsw_set_lane_attr(vsw_t *vswp, lane_t *lp) 8176 { 8177 bzero(lp, sizeof (lane_t)); 8178 8179 READ_ENTER(&vswp->if_lockrw); 8180 ether_copy(&(vswp->if_addr), &(lp->addr)); 8181 RW_EXIT(&vswp->if_lockrw); 8182 8183 lp->mtu = VSW_MTU; 8184 lp->addr_type = ADDR_TYPE_MAC; 8185 lp->xfer_mode = VIO_DRING_MODE; 8186 lp->ack_freq = 0; /* for shared mode */ 8187 8188 mutex_enter(&lp->seq_lock); 8189 lp->seq_num = VNET_ISS; 8190 mutex_exit(&lp->seq_lock); 8191 } 8192 8193 /* 8194 * Verify that the attributes are acceptable. 8195 * 8196 * FUTURE: If some attributes are not acceptable, change them 8197 * our desired values. 8198 */ 8199 static int 8200 vsw_check_attr(vnet_attr_msg_t *pkt, vsw_port_t *port) 8201 { 8202 int ret = 0; 8203 8204 D1(NULL, "vsw_check_attr enter\n"); 8205 8206 /* 8207 * Note we currently only support in-band descriptors 8208 * and descriptor rings, not packet based transfer (VIO_PKT_MODE) 8209 */ 8210 if ((pkt->xfer_mode != VIO_DESC_MODE) && 8211 (pkt->xfer_mode != VIO_DRING_MODE)) { 8212 D2(NULL, "vsw_check_attr: unknown mode %x\n", 8213 pkt->xfer_mode); 8214 ret = 1; 8215 } 8216 8217 /* Only support MAC addresses at moment. */ 8218 if ((pkt->addr_type != ADDR_TYPE_MAC) || (pkt->addr == 0)) { 8219 D2(NULL, "vsw_check_attr: invalid addr_type %x, " 8220 "or address 0x%llx\n", pkt->addr_type, 8221 pkt->addr); 8222 ret = 1; 8223 } 8224 8225 /* 8226 * MAC address supplied by device should match that stored 8227 * in the vsw-port OBP node. Need to decide what to do if they 8228 * don't match, for the moment just warn but don't fail. 8229 */ 8230 if (bcmp(&pkt->addr, &port->p_macaddr, ETHERADDRL) != 0) { 8231 DERR(NULL, "vsw_check_attr: device supplied address " 8232 "0x%llx doesn't match node address 0x%llx\n", 8233 pkt->addr, port->p_macaddr); 8234 } 8235 8236 /* 8237 * Ack freq only makes sense in pkt mode, in shared 8238 * mode the ring descriptors say whether or not to 8239 * send back an ACK. 8240 */ 8241 if ((pkt->xfer_mode == VIO_DRING_MODE) && 8242 (pkt->ack_freq > 0)) { 8243 D2(NULL, "vsw_check_attr: non zero ack freq " 8244 " in SHM mode\n"); 8245 ret = 1; 8246 } 8247 8248 /* 8249 * Note: for the moment we only support ETHER 8250 * frames. This may change in the future. 8251 */ 8252 if ((pkt->mtu > VSW_MTU) || (pkt->mtu <= 0)) { 8253 D2(NULL, "vsw_check_attr: invalid MTU (0x%llx)\n", 8254 pkt->mtu); 8255 ret = 1; 8256 } 8257 8258 D1(NULL, "vsw_check_attr exit\n"); 8259 8260 return (ret); 8261 } 8262 8263 /* 8264 * Returns 1 if there is a problem, 0 otherwise. 8265 */ 8266 static int 8267 vsw_check_dring_info(vio_dring_reg_msg_t *pkt) 8268 { 8269 _NOTE(ARGUNUSED(pkt)) 8270 8271 int ret = 0; 8272 8273 D1(NULL, "vsw_check_dring_info enter\n"); 8274 8275 if ((pkt->num_descriptors == 0) || 8276 (pkt->descriptor_size == 0) || 8277 (pkt->ncookies != 1)) { 8278 DERR(NULL, "vsw_check_dring_info: invalid dring msg"); 8279 ret = 1; 8280 } 8281 8282 D1(NULL, "vsw_check_dring_info exit\n"); 8283 8284 return (ret); 8285 } 8286 8287 /* 8288 * Returns 1 if two memory cookies match. Otherwise returns 0. 8289 */ 8290 static int 8291 vsw_mem_cookie_match(ldc_mem_cookie_t *m1, ldc_mem_cookie_t *m2) 8292 { 8293 if ((m1->addr != m2->addr) || 8294 (m2->size != m2->size)) { 8295 return (0); 8296 } else { 8297 return (1); 8298 } 8299 } 8300 8301 /* 8302 * Returns 1 if ring described in reg message matches that 8303 * described by dring_info structure. Otherwise returns 0. 8304 */ 8305 static int 8306 vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg) 8307 { 8308 if ((msg->descriptor_size != dp->descriptor_size) || 8309 (msg->num_descriptors != dp->num_descriptors) || 8310 (msg->ncookies != dp->ncookies) || 8311 !(vsw_mem_cookie_match(&msg->cookie[0], &dp->cookie[0]))) { 8312 return (0); 8313 } else { 8314 return (1); 8315 } 8316 8317 } 8318 8319 static caddr_t 8320 vsw_print_ethaddr(uint8_t *a, char *ebuf) 8321 { 8322 (void) sprintf(ebuf, "%x:%x:%x:%x:%x:%x", 8323 a[0], a[1], a[2], a[3], a[4], a[5]); 8324 return (ebuf); 8325 } 8326 8327 /* 8328 * Reset and free all the resources associated with 8329 * the channel. 8330 */ 8331 static void 8332 vsw_free_lane_resources(vsw_ldc_t *ldcp, uint64_t dir) 8333 { 8334 dring_info_t *dp, *dpp; 8335 lane_t *lp = NULL; 8336 int rv = 0; 8337 8338 ASSERT(ldcp != NULL); 8339 8340 D1(ldcp->ldc_vswp, "%s (%lld): enter", __func__, ldcp->ldc_id); 8341 8342 if (dir == INBOUND) { 8343 D2(ldcp->ldc_vswp, "%s: freeing INBOUND lane" 8344 " of channel %lld", __func__, ldcp->ldc_id); 8345 lp = &ldcp->lane_in; 8346 } else { 8347 D2(ldcp->ldc_vswp, "%s: freeing OUTBOUND lane" 8348 " of channel %lld", __func__, ldcp->ldc_id); 8349 lp = &ldcp->lane_out; 8350 } 8351 8352 lp->lstate = VSW_LANE_INACTIV; 8353 mutex_enter(&lp->seq_lock); 8354 lp->seq_num = VNET_ISS; 8355 mutex_exit(&lp->seq_lock); 8356 if (lp->dringp) { 8357 if (dir == INBOUND) { 8358 WRITE_ENTER(&lp->dlistrw); 8359 dp = lp->dringp; 8360 while (dp != NULL) { 8361 dpp = dp->next; 8362 if (dp->handle != NULL) 8363 (void) ldc_mem_dring_unmap(dp->handle); 8364 kmem_free(dp, sizeof (dring_info_t)); 8365 dp = dpp; 8366 } 8367 RW_EXIT(&lp->dlistrw); 8368 } else { 8369 /* 8370 * unbind, destroy exported dring, free dring struct 8371 */ 8372 WRITE_ENTER(&lp->dlistrw); 8373 dp = lp->dringp; 8374 rv = vsw_free_ring(dp); 8375 RW_EXIT(&lp->dlistrw); 8376 } 8377 if (rv == 0) { 8378 lp->dringp = NULL; 8379 } 8380 } 8381 8382 D1(ldcp->ldc_vswp, "%s (%lld): exit", __func__, ldcp->ldc_id); 8383 } 8384 8385 /* 8386 * Free ring and all associated resources. 8387 * 8388 * Should be called with dlistrw rwlock held as writer. 8389 */ 8390 static int 8391 vsw_free_ring(dring_info_t *dp) 8392 { 8393 vsw_private_desc_t *paddr = NULL; 8394 dring_info_t *dpp; 8395 int i, rv = 1; 8396 8397 while (dp != NULL) { 8398 mutex_enter(&dp->dlock); 8399 dpp = dp->next; 8400 if (dp->priv_addr != NULL) { 8401 /* 8402 * First unbind and free the memory handles 8403 * stored in each descriptor within the ring. 8404 */ 8405 for (i = 0; i < VSW_RING_NUM_EL; i++) { 8406 paddr = (vsw_private_desc_t *) 8407 dp->priv_addr + i; 8408 if (paddr->memhandle != NULL) { 8409 if (paddr->bound == 1) { 8410 rv = ldc_mem_unbind_handle( 8411 paddr->memhandle); 8412 8413 if (rv != 0) { 8414 DERR(NULL, "error " 8415 "unbinding handle for " 8416 "ring 0x%llx at pos %d", 8417 dp, i); 8418 mutex_exit(&dp->dlock); 8419 return (rv); 8420 } 8421 paddr->bound = 0; 8422 } 8423 8424 rv = ldc_mem_free_handle( 8425 paddr->memhandle); 8426 if (rv != 0) { 8427 DERR(NULL, "error freeing " 8428 "handle for ring " 8429 "0x%llx at pos %d", 8430 dp, i); 8431 mutex_exit(&dp->dlock); 8432 return (rv); 8433 } 8434 paddr->memhandle = NULL; 8435 } 8436 mutex_destroy(&paddr->dstate_lock); 8437 } 8438 kmem_free(dp->priv_addr, (sizeof (vsw_private_desc_t) 8439 * VSW_RING_NUM_EL)); 8440 } 8441 8442 /* 8443 * Now unbind and destroy the ring itself. 8444 */ 8445 if (dp->handle != NULL) { 8446 (void) ldc_mem_dring_unbind(dp->handle); 8447 (void) ldc_mem_dring_destroy(dp->handle); 8448 } 8449 8450 if (dp->data_addr != NULL) { 8451 kmem_free(dp->data_addr, dp->data_sz); 8452 } 8453 8454 mutex_exit(&dp->dlock); 8455 mutex_destroy(&dp->dlock); 8456 mutex_destroy(&dp->restart_lock); 8457 kmem_free(dp, sizeof (dring_info_t)); 8458 8459 dp = dpp; 8460 } 8461 return (0); 8462 } 8463 8464 /* 8465 * Debugging routines 8466 */ 8467 static void 8468 display_state(void) 8469 { 8470 vsw_t *vswp; 8471 vsw_port_list_t *plist; 8472 vsw_port_t *port; 8473 vsw_ldc_list_t *ldcl; 8474 vsw_ldc_t *ldcp; 8475 8476 cmn_err(CE_NOTE, "***** system state *****"); 8477 8478 for (vswp = vsw_head; vswp; vswp = vswp->next) { 8479 plist = &vswp->plist; 8480 READ_ENTER(&plist->lockrw); 8481 cmn_err(CE_CONT, "vsw instance %d has %d ports attached\n", 8482 vswp->instance, plist->num_ports); 8483 8484 for (port = plist->head; port != NULL; port = port->p_next) { 8485 ldcl = &port->p_ldclist; 8486 cmn_err(CE_CONT, "port %d : %d ldcs attached\n", 8487 port->p_instance, ldcl->num_ldcs); 8488 READ_ENTER(&ldcl->lockrw); 8489 ldcp = ldcl->head; 8490 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 8491 cmn_err(CE_CONT, "chan %lu : dev %d : " 8492 "status %d : phase %u\n", 8493 ldcp->ldc_id, ldcp->dev_class, 8494 ldcp->ldc_status, ldcp->hphase); 8495 cmn_err(CE_CONT, "chan %lu : lsession %lu : " 8496 "psession %lu\n", 8497 ldcp->ldc_id, 8498 ldcp->local_session, 8499 ldcp->peer_session); 8500 8501 cmn_err(CE_CONT, "Inbound lane:\n"); 8502 display_lane(&ldcp->lane_in); 8503 cmn_err(CE_CONT, "Outbound lane:\n"); 8504 display_lane(&ldcp->lane_out); 8505 } 8506 RW_EXIT(&ldcl->lockrw); 8507 } 8508 RW_EXIT(&plist->lockrw); 8509 } 8510 cmn_err(CE_NOTE, "***** system state *****"); 8511 } 8512 8513 static void 8514 display_lane(lane_t *lp) 8515 { 8516 dring_info_t *drp; 8517 8518 cmn_err(CE_CONT, "ver 0x%x:0x%x : state %lx : mtu 0x%lx\n", 8519 lp->ver_major, lp->ver_minor, lp->lstate, lp->mtu); 8520 cmn_err(CE_CONT, "addr_type %d : addr 0x%lx : xmode %d\n", 8521 lp->addr_type, lp->addr, lp->xfer_mode); 8522 cmn_err(CE_CONT, "dringp 0x%lx\n", (uint64_t)lp->dringp); 8523 8524 cmn_err(CE_CONT, "Dring info:\n"); 8525 for (drp = lp->dringp; drp != NULL; drp = drp->next) { 8526 cmn_err(CE_CONT, "\tnum_desc %u : dsize %u\n", 8527 drp->num_descriptors, drp->descriptor_size); 8528 cmn_err(CE_CONT, "\thandle 0x%lx\n", drp->handle); 8529 cmn_err(CE_CONT, "\tpub_addr 0x%lx : priv_addr 0x%lx\n", 8530 (uint64_t)drp->pub_addr, (uint64_t)drp->priv_addr); 8531 cmn_err(CE_CONT, "\tident 0x%lx : end_idx %lu\n", 8532 drp->ident, drp->end_idx); 8533 display_ring(drp); 8534 } 8535 } 8536 8537 static void 8538 display_ring(dring_info_t *dringp) 8539 { 8540 uint64_t i; 8541 uint64_t priv_count = 0; 8542 uint64_t pub_count = 0; 8543 vnet_public_desc_t *pub_addr = NULL; 8544 vsw_private_desc_t *priv_addr = NULL; 8545 8546 for (i = 0; i < VSW_RING_NUM_EL; i++) { 8547 if (dringp->pub_addr != NULL) { 8548 pub_addr = (vnet_public_desc_t *)dringp->pub_addr + i; 8549 8550 if (pub_addr->hdr.dstate == VIO_DESC_FREE) 8551 pub_count++; 8552 } 8553 8554 if (dringp->priv_addr != NULL) { 8555 priv_addr = 8556 (vsw_private_desc_t *)dringp->priv_addr + i; 8557 8558 if (priv_addr->dstate == VIO_DESC_FREE) 8559 priv_count++; 8560 } 8561 } 8562 cmn_err(CE_CONT, "\t%lu elements: %lu priv free: %lu pub free\n", 8563 i, priv_count, pub_count); 8564 } 8565 8566 static void 8567 dump_flags(uint64_t state) 8568 { 8569 int i; 8570 8571 typedef struct flag_name { 8572 int flag_val; 8573 char *flag_name; 8574 } flag_name_t; 8575 8576 flag_name_t flags[] = { 8577 VSW_VER_INFO_SENT, "VSW_VER_INFO_SENT", 8578 VSW_VER_INFO_RECV, "VSW_VER_INFO_RECV", 8579 VSW_VER_ACK_RECV, "VSW_VER_ACK_RECV", 8580 VSW_VER_ACK_SENT, "VSW_VER_ACK_SENT", 8581 VSW_VER_NACK_RECV, "VSW_VER_NACK_RECV", 8582 VSW_VER_NACK_SENT, "VSW_VER_NACK_SENT", 8583 VSW_ATTR_INFO_SENT, "VSW_ATTR_INFO_SENT", 8584 VSW_ATTR_INFO_RECV, "VSW_ATTR_INFO_RECV", 8585 VSW_ATTR_ACK_SENT, "VSW_ATTR_ACK_SENT", 8586 VSW_ATTR_ACK_RECV, "VSW_ATTR_ACK_RECV", 8587 VSW_ATTR_NACK_SENT, "VSW_ATTR_NACK_SENT", 8588 VSW_ATTR_NACK_RECV, "VSW_ATTR_NACK_RECV", 8589 VSW_DRING_INFO_SENT, "VSW_DRING_INFO_SENT", 8590 VSW_DRING_INFO_RECV, "VSW_DRING_INFO_RECV", 8591 VSW_DRING_ACK_SENT, "VSW_DRING_ACK_SENT", 8592 VSW_DRING_ACK_RECV, "VSW_DRING_ACK_RECV", 8593 VSW_DRING_NACK_SENT, "VSW_DRING_NACK_SENT", 8594 VSW_DRING_NACK_RECV, "VSW_DRING_NACK_RECV", 8595 VSW_RDX_INFO_SENT, "VSW_RDX_INFO_SENT", 8596 VSW_RDX_INFO_RECV, "VSW_RDX_INFO_RECV", 8597 VSW_RDX_ACK_SENT, "VSW_RDX_ACK_SENT", 8598 VSW_RDX_ACK_RECV, "VSW_RDX_ACK_RECV", 8599 VSW_RDX_NACK_SENT, "VSW_RDX_NACK_SENT", 8600 VSW_RDX_NACK_RECV, "VSW_RDX_NACK_RECV", 8601 VSW_MCST_INFO_SENT, "VSW_MCST_INFO_SENT", 8602 VSW_MCST_INFO_RECV, "VSW_MCST_INFO_RECV", 8603 VSW_MCST_ACK_SENT, "VSW_MCST_ACK_SENT", 8604 VSW_MCST_ACK_RECV, "VSW_MCST_ACK_RECV", 8605 VSW_MCST_NACK_SENT, "VSW_MCST_NACK_SENT", 8606 VSW_MCST_NACK_RECV, "VSW_MCST_NACK_RECV", 8607 VSW_LANE_ACTIVE, "VSW_LANE_ACTIVE"}; 8608 8609 DERR(NULL, "DUMP_FLAGS: %llx\n", state); 8610 for (i = 0; i < sizeof (flags)/sizeof (flag_name_t); i++) { 8611 if (state & flags[i].flag_val) 8612 DERR(NULL, "DUMP_FLAGS %s", flags[i].flag_name); 8613 } 8614 } 8615