1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/errno.h> 31 #include <sys/debug.h> 32 #include <sys/time.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/user.h> 36 #include <sys/stropts.h> 37 #include <sys/stream.h> 38 #include <sys/strlog.h> 39 #include <sys/strsubr.h> 40 #include <sys/cmn_err.h> 41 #include <sys/cpu.h> 42 #include <sys/kmem.h> 43 #include <sys/conf.h> 44 #include <sys/ddi.h> 45 #include <sys/sunddi.h> 46 #include <sys/ksynch.h> 47 #include <sys/stat.h> 48 #include <sys/kstat.h> 49 #include <sys/vtrace.h> 50 #include <sys/strsun.h> 51 #include <sys/dlpi.h> 52 #include <sys/ethernet.h> 53 #include <net/if.h> 54 #include <sys/varargs.h> 55 #include <sys/machsystm.h> 56 #include <sys/modctl.h> 57 #include <sys/modhash.h> 58 #include <sys/mac.h> 59 #include <sys/mac_ether.h> 60 #include <sys/taskq.h> 61 #include <sys/note.h> 62 #include <sys/mach_descrip.h> 63 #include <sys/mac.h> 64 #include <sys/mdeg.h> 65 #include <sys/ldc.h> 66 #include <sys/vsw_fdb.h> 67 #include <sys/vsw.h> 68 #include <sys/vio_mailbox.h> 69 #include <sys/vnet_mailbox.h> 70 #include <sys/vnet_common.h> 71 #include <sys/vio_util.h> 72 #include <sys/sdt.h> 73 74 /* 75 * Function prototypes. 76 */ 77 static int vsw_attach(dev_info_t *, ddi_attach_cmd_t); 78 static int vsw_detach(dev_info_t *, ddi_detach_cmd_t); 79 static int vsw_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 80 static int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *); 81 static int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *, int *); 82 static int vsw_get_physaddr(vsw_t *); 83 static int vsw_setup_switching(vsw_t *); 84 static int vsw_setup_layer2(vsw_t *); 85 static int vsw_setup_layer3(vsw_t *); 86 87 /* MAC Ring table functions. */ 88 static void vsw_mac_ring_tbl_init(vsw_t *vswp); 89 static void vsw_mac_ring_tbl_destroy(vsw_t *vswp); 90 static void vsw_queue_worker(vsw_mac_ring_t *rrp); 91 static void vsw_queue_stop(vsw_queue_t *vqp); 92 static vsw_queue_t *vsw_queue_create(); 93 static void vsw_queue_destroy(vsw_queue_t *vqp); 94 95 /* MAC layer routines */ 96 static mac_resource_handle_t vsw_mac_ring_add_cb(void *arg, 97 mac_resource_t *mrp); 98 static int vsw_get_hw_maddr(vsw_t *); 99 static int vsw_set_hw(vsw_t *, vsw_port_t *); 100 static int vsw_set_hw_promisc(vsw_t *, vsw_port_t *); 101 static int vsw_unset_hw(vsw_t *, vsw_port_t *); 102 static int vsw_unset_hw_promisc(vsw_t *, vsw_port_t *); 103 static int vsw_reconfig_hw(vsw_t *); 104 static int vsw_mac_attach(vsw_t *vswp); 105 static void vsw_mac_detach(vsw_t *vswp); 106 107 static void vsw_rx_queue_cb(void *, mac_resource_handle_t, mblk_t *); 108 static void vsw_rx_cb(void *, mac_resource_handle_t, mblk_t *); 109 static mblk_t *vsw_tx_msg(vsw_t *, mblk_t *); 110 static int vsw_mac_register(vsw_t *); 111 static int vsw_mac_unregister(vsw_t *); 112 static int vsw_m_stat(void *, uint_t, uint64_t *); 113 static void vsw_m_stop(void *arg); 114 static int vsw_m_start(void *arg); 115 static int vsw_m_unicst(void *arg, const uint8_t *); 116 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *); 117 static int vsw_m_promisc(void *arg, boolean_t); 118 static mblk_t *vsw_m_tx(void *arg, mblk_t *); 119 120 /* MDEG routines */ 121 static int vsw_mdeg_register(vsw_t *vswp); 122 static void vsw_mdeg_unregister(vsw_t *vswp); 123 static int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *); 124 static int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *); 125 static void vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t); 126 static void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t); 127 128 /* Port add/deletion routines */ 129 static int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 130 static int vsw_port_attach(vsw_t *vswp, int p_instance, 131 uint64_t *ldcids, int nids, struct ether_addr *macaddr); 132 static int vsw_detach_ports(vsw_t *vswp); 133 static int vsw_port_detach(vsw_t *vswp, int p_instance); 134 static int vsw_port_delete(vsw_port_t *port); 135 static int vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id); 136 static int vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id); 137 static int vsw_init_ldcs(vsw_port_t *port); 138 static int vsw_uninit_ldcs(vsw_port_t *port); 139 static int vsw_ldc_init(vsw_ldc_t *ldcp); 140 static int vsw_ldc_uninit(vsw_ldc_t *ldcp); 141 static int vsw_drain_ldcs(vsw_port_t *port); 142 static int vsw_drain_port_taskq(vsw_port_t *port); 143 static void vsw_marker_task(void *); 144 static vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 145 static int vsw_plist_del_node(vsw_t *, vsw_port_t *port); 146 147 /* Interrupt routines */ 148 static uint_t vsw_ldc_cb(uint64_t cb, caddr_t arg); 149 150 /* Handshake routines */ 151 static void vsw_restart_ldc(vsw_ldc_t *); 152 static void vsw_restart_handshake(vsw_ldc_t *); 153 static void vsw_handle_reset(vsw_ldc_t *); 154 static int vsw_check_flag(vsw_ldc_t *, int, uint64_t); 155 static void vsw_next_milestone(vsw_ldc_t *); 156 static int vsw_supported_version(vio_ver_msg_t *); 157 158 /* Data processing routines */ 159 static void vsw_process_pkt(void *); 160 static void vsw_dispatch_ctrl_task(vsw_ldc_t *, void *, vio_msg_tag_t); 161 static void vsw_process_ctrl_pkt(void *); 162 static void vsw_process_ctrl_ver_pkt(vsw_ldc_t *, void *); 163 static void vsw_process_ctrl_attr_pkt(vsw_ldc_t *, void *); 164 static void vsw_process_ctrl_mcst_pkt(vsw_ldc_t *, void *); 165 static void vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *, void *); 166 static void vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *, void *); 167 static void vsw_process_ctrl_rdx_pkt(vsw_ldc_t *, void *); 168 static void vsw_process_data_pkt(vsw_ldc_t *, void *, vio_msg_tag_t); 169 static void vsw_process_data_dring_pkt(vsw_ldc_t *, void *); 170 static void vsw_process_data_raw_pkt(vsw_ldc_t *, void *); 171 static void vsw_process_data_ibnd_pkt(vsw_ldc_t *, void *); 172 static void vsw_process_err_pkt(vsw_ldc_t *, void *, vio_msg_tag_t); 173 174 /* Switching/data transmit routines */ 175 static void vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller, 176 vsw_port_t *port, mac_resource_handle_t); 177 static void vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller, 178 vsw_port_t *port, mac_resource_handle_t); 179 static int vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller, 180 vsw_port_t *port); 181 static int vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller, 182 vsw_port_t *port); 183 static int vsw_portsend(vsw_port_t *, mblk_t *); 184 static int vsw_dringsend(vsw_ldc_t *, mblk_t *); 185 static int vsw_descrsend(vsw_ldc_t *, mblk_t *); 186 187 /* Packet creation routines */ 188 static void vsw_send_ver(void *); 189 static void vsw_send_attr(vsw_ldc_t *); 190 static vio_dring_reg_msg_t *vsw_create_dring_info_pkt(vsw_ldc_t *); 191 static void vsw_send_dring_info(vsw_ldc_t *); 192 static void vsw_send_rdx(vsw_ldc_t *); 193 194 static void vsw_send_msg(vsw_ldc_t *, void *, int); 195 196 /* Forwarding database (FDB) routines */ 197 static int vsw_add_fdb(vsw_t *vswp, vsw_port_t *port); 198 static int vsw_del_fdb(vsw_t *vswp, vsw_port_t *port); 199 static vsw_port_t *vsw_lookup_fdb(vsw_t *vswp, struct ether_header *); 200 static int vsw_add_rem_mcst(vnet_mcast_msg_t *, vsw_port_t *); 201 static int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *); 202 static int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *); 203 static void vsw_del_addr(uint8_t, void *, uint64_t); 204 static void vsw_del_mcst_port(vsw_port_t *); 205 static void vsw_del_mcst_vsw(vsw_t *); 206 207 /* Dring routines */ 208 static dring_info_t *vsw_create_dring(vsw_ldc_t *); 209 static void vsw_create_privring(vsw_ldc_t *); 210 static int vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp); 211 static int vsw_dring_find_free_desc(dring_info_t *, vsw_private_desc_t **, 212 int *); 213 static dring_info_t *vsw_ident2dring(lane_t *, uint64_t); 214 215 static void vsw_set_lane_attr(vsw_t *, lane_t *); 216 static int vsw_check_attr(vnet_attr_msg_t *, vsw_port_t *); 217 static int vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg); 218 static int vsw_mem_cookie_match(ldc_mem_cookie_t *, ldc_mem_cookie_t *); 219 static int vsw_check_dring_info(vio_dring_reg_msg_t *); 220 221 /* Misc support routines */ 222 static caddr_t vsw_print_ethaddr(uint8_t *addr, char *ebuf); 223 static void vsw_free_lane_resources(vsw_ldc_t *, uint64_t); 224 static int vsw_free_ring(dring_info_t *); 225 226 /* Debugging routines */ 227 static void dump_flags(uint64_t); 228 static void display_state(void); 229 static void display_lane(lane_t *); 230 static void display_ring(dring_info_t *); 231 232 int vsw_num_handshakes = 3; /* # of handshake attempts */ 233 int vsw_wretries = 100; /* # of write attempts */ 234 int vsw_chain_len = 150; /* max # of mblks in msg chain */ 235 int vsw_desc_delay = 0; /* delay in us */ 236 int vsw_read_attempts = 5; /* # of reads of descriptor */ 237 238 uint32_t vsw_mblk_size = VSW_MBLK_SIZE; 239 uint32_t vsw_num_mblks = VSW_NUM_MBLKS; 240 241 static mac_callbacks_t vsw_m_callbacks = { 242 0, 243 vsw_m_stat, 244 vsw_m_start, 245 vsw_m_stop, 246 vsw_m_promisc, 247 vsw_m_multicst, 248 vsw_m_unicst, 249 vsw_m_tx, 250 NULL, 251 NULL, 252 NULL 253 }; 254 255 static struct cb_ops vsw_cb_ops = { 256 nulldev, /* cb_open */ 257 nulldev, /* cb_close */ 258 nodev, /* cb_strategy */ 259 nodev, /* cb_print */ 260 nodev, /* cb_dump */ 261 nodev, /* cb_read */ 262 nodev, /* cb_write */ 263 nodev, /* cb_ioctl */ 264 nodev, /* cb_devmap */ 265 nodev, /* cb_mmap */ 266 nodev, /* cb_segmap */ 267 nochpoll, /* cb_chpoll */ 268 ddi_prop_op, /* cb_prop_op */ 269 NULL, /* cb_stream */ 270 D_MP, /* cb_flag */ 271 CB_REV, /* rev */ 272 nodev, /* int (*cb_aread)() */ 273 nodev /* int (*cb_awrite)() */ 274 }; 275 276 static struct dev_ops vsw_ops = { 277 DEVO_REV, /* devo_rev */ 278 0, /* devo_refcnt */ 279 vsw_getinfo, /* devo_getinfo */ 280 nulldev, /* devo_identify */ 281 nulldev, /* devo_probe */ 282 vsw_attach, /* devo_attach */ 283 vsw_detach, /* devo_detach */ 284 nodev, /* devo_reset */ 285 &vsw_cb_ops, /* devo_cb_ops */ 286 (struct bus_ops *)NULL, /* devo_bus_ops */ 287 ddi_power /* devo_power */ 288 }; 289 290 extern struct mod_ops mod_driverops; 291 static struct modldrv vswmodldrv = { 292 &mod_driverops, 293 "sun4v Virtual Switch Driver %I%", 294 &vsw_ops, 295 }; 296 297 #define LDC_ENTER_LOCK(ldcp) \ 298 mutex_enter(&((ldcp)->ldc_cblock));\ 299 mutex_enter(&((ldcp)->ldc_txlock)); 300 #define LDC_EXIT_LOCK(ldcp) \ 301 mutex_exit(&((ldcp)->ldc_txlock));\ 302 mutex_exit(&((ldcp)->ldc_cblock)); 303 304 /* Driver soft state ptr */ 305 static void *vsw_state; 306 307 /* 308 * Linked list of "vsw_t" structures - one per instance. 309 */ 310 vsw_t *vsw_head = NULL; 311 krwlock_t vsw_rw; 312 313 /* 314 * Property names 315 */ 316 static char vdev_propname[] = "virtual-device"; 317 static char vsw_propname[] = "virtual-network-switch"; 318 static char physdev_propname[] = "vsw-phys-dev"; 319 static char smode_propname[] = "vsw-switch-mode"; 320 static char macaddr_propname[] = "local-mac-address"; 321 static char remaddr_propname[] = "remote-mac-address"; 322 static char ldcids_propname[] = "ldc-ids"; 323 static char chan_propname[] = "channel-endpoint"; 324 static char id_propname[] = "id"; 325 static char reg_propname[] = "reg"; 326 327 /* supported versions */ 328 static ver_sup_t vsw_versions[] = { {1, 0} }; 329 330 /* 331 * Matching criteria passed to the MDEG to register interest 332 * in changes to 'virtual-device-port' nodes identified by their 333 * 'id' property. 334 */ 335 static md_prop_match_t vport_prop_match[] = { 336 { MDET_PROP_VAL, "id" }, 337 { MDET_LIST_END, NULL } 338 }; 339 340 static mdeg_node_match_t vport_match = { "virtual-device-port", 341 vport_prop_match }; 342 343 /* 344 * Matching criteria passed to the MDEG to register interest 345 * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified 346 * by their 'name' and 'cfg-handle' properties. 347 */ 348 static md_prop_match_t vdev_prop_match[] = { 349 { MDET_PROP_STR, "name" }, 350 { MDET_PROP_VAL, "cfg-handle" }, 351 { MDET_LIST_END, NULL } 352 }; 353 354 static mdeg_node_match_t vdev_match = { "virtual-device", 355 vdev_prop_match }; 356 357 358 /* 359 * Specification of an MD node passed to the MDEG to filter any 360 * 'vport' nodes that do not belong to the specified node. This 361 * template is copied for each vsw instance and filled in with 362 * the appropriate 'cfg-handle' value before being passed to the MDEG. 363 */ 364 static mdeg_prop_spec_t vsw_prop_template[] = { 365 { MDET_PROP_STR, "name", vsw_propname }, 366 { MDET_PROP_VAL, "cfg-handle", NULL }, 367 { MDET_LIST_END, NULL, NULL } 368 }; 369 370 #define VSW_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val); 371 372 /* 373 * From /etc/system enable/disable thread per ring. This is a mode 374 * selection that is done a vsw driver attach time. 375 */ 376 boolean_t vsw_multi_ring_enable = B_FALSE; 377 int vsw_mac_rx_rings = VSW_MAC_RX_RINGS; 378 379 /* 380 * Print debug messages - set to 0x1f to enable all msgs 381 * or 0x0 to turn all off. 382 */ 383 int vswdbg = 0x0; 384 385 /* 386 * debug levels: 387 * 0x01: Function entry/exit tracing 388 * 0x02: Internal function messages 389 * 0x04: Verbose internal messages 390 * 0x08: Warning messages 391 * 0x10: Error messages 392 */ 393 394 static void 395 vswdebug(vsw_t *vswp, const char *fmt, ...) 396 { 397 char buf[512]; 398 va_list ap; 399 400 va_start(ap, fmt); 401 (void) vsprintf(buf, fmt, ap); 402 va_end(ap); 403 404 if (vswp == NULL) 405 cmn_err(CE_CONT, "%s\n", buf); 406 else 407 cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf); 408 } 409 410 /* 411 * For the moment the state dump routines have their own 412 * private flag. 413 */ 414 #define DUMP_STATE 0 415 416 #if DUMP_STATE 417 418 #define DUMP_TAG(tag) \ 419 { \ 420 D1(NULL, "DUMP_TAG: type 0x%llx", (tag).vio_msgtype); \ 421 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag).vio_subtype); \ 422 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag).vio_subtype_env); \ 423 } 424 425 #define DUMP_TAG_PTR(tag) \ 426 { \ 427 D1(NULL, "DUMP_TAG: type 0x%llx", (tag)->vio_msgtype); \ 428 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag)->vio_subtype); \ 429 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag)->vio_subtype_env); \ 430 } 431 432 #define DUMP_FLAGS(flags) dump_flags(flags); 433 #define DISPLAY_STATE() display_state() 434 435 #else 436 437 #define DUMP_TAG(tag) 438 #define DUMP_TAG_PTR(tag) 439 #define DUMP_FLAGS(state) 440 #define DISPLAY_STATE() 441 442 #endif /* DUMP_STATE */ 443 444 #ifdef DEBUG 445 446 #define D1 \ 447 if (vswdbg & 0x01) \ 448 vswdebug 449 450 #define D2 \ 451 if (vswdbg & 0x02) \ 452 vswdebug 453 454 #define D3 \ 455 if (vswdbg & 0x04) \ 456 vswdebug 457 458 #define DWARN \ 459 if (vswdbg & 0x08) \ 460 vswdebug 461 462 #define DERR \ 463 if (vswdbg & 0x10) \ 464 vswdebug 465 466 #else 467 468 #define DERR if (0) vswdebug 469 #define DWARN if (0) vswdebug 470 #define D1 if (0) vswdebug 471 #define D2 if (0) vswdebug 472 #define D3 if (0) vswdebug 473 474 #endif /* DEBUG */ 475 476 static struct modlinkage modlinkage = { 477 MODREV_1, 478 &vswmodldrv, 479 NULL 480 }; 481 482 int 483 _init(void) 484 { 485 int status; 486 487 rw_init(&vsw_rw, NULL, RW_DRIVER, NULL); 488 489 status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1); 490 if (status != 0) { 491 return (status); 492 } 493 494 mac_init_ops(&vsw_ops, "vsw"); 495 status = mod_install(&modlinkage); 496 if (status != 0) { 497 ddi_soft_state_fini(&vsw_state); 498 } 499 return (status); 500 } 501 502 int 503 _fini(void) 504 { 505 int status; 506 507 status = mod_remove(&modlinkage); 508 if (status != 0) 509 return (status); 510 mac_fini_ops(&vsw_ops); 511 ddi_soft_state_fini(&vsw_state); 512 513 rw_destroy(&vsw_rw); 514 515 return (status); 516 } 517 518 int 519 _info(struct modinfo *modinfop) 520 { 521 return (mod_info(&modlinkage, modinfop)); 522 } 523 524 static int 525 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 526 { 527 vsw_t *vswp; 528 int instance; 529 char hashname[MAXNAMELEN]; 530 char qname[TASKQ_NAMELEN]; 531 enum { PROG_init = 0x00, 532 PROG_if_lock = 0x01, 533 PROG_fdb = 0x02, 534 PROG_mfdb = 0x04, 535 PROG_report_dev = 0x08, 536 PROG_plist = 0x10, 537 PROG_taskq = 0x20} 538 progress; 539 540 progress = PROG_init; 541 542 switch (cmd) { 543 case DDI_ATTACH: 544 break; 545 case DDI_RESUME: 546 /* nothing to do for this non-device */ 547 return (DDI_SUCCESS); 548 case DDI_PM_RESUME: 549 default: 550 return (DDI_FAILURE); 551 } 552 553 instance = ddi_get_instance(dip); 554 if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) { 555 DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance); 556 return (DDI_FAILURE); 557 } 558 vswp = ddi_get_soft_state(vsw_state, instance); 559 560 if (vswp == NULL) { 561 DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance); 562 goto vsw_attach_fail; 563 } 564 565 vswp->dip = dip; 566 vswp->instance = instance; 567 ddi_set_driver_private(dip, (caddr_t)vswp); 568 569 mutex_init(&vswp->mac_lock, NULL, MUTEX_DRIVER, NULL); 570 rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL); 571 progress |= PROG_if_lock; 572 573 /* setup the unicast forwarding database */ 574 (void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d", 575 vswp->instance); 576 D2(vswp, "creating unicast hash table (%s)...", hashname); 577 vswp->fdb = mod_hash_create_ptrhash(hashname, VSW_NCHAINS, 578 mod_hash_null_valdtor, sizeof (void *)); 579 580 progress |= PROG_fdb; 581 582 /* setup the multicast fowarding database */ 583 (void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d", 584 vswp->instance); 585 D2(vswp, "creating multicast hash table %s)...", hashname); 586 rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL); 587 vswp->mfdb = mod_hash_create_ptrhash(hashname, VSW_NCHAINS, 588 mod_hash_null_valdtor, sizeof (void *)); 589 590 progress |= PROG_mfdb; 591 592 /* 593 * create lock protecting list of multicast addresses 594 * which could come via m_multicst() entry point when plumbed. 595 */ 596 mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL); 597 vswp->mcap = NULL; 598 599 ddi_report_dev(vswp->dip); 600 601 progress |= PROG_report_dev; 602 603 WRITE_ENTER(&vsw_rw); 604 vswp->next = vsw_head; 605 vsw_head = vswp; 606 RW_EXIT(&vsw_rw); 607 608 /* setup the port list */ 609 rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL); 610 vswp->plist.head = NULL; 611 612 progress |= PROG_plist; 613 614 /* 615 * Create the taskq which will process all the VIO 616 * control messages. 617 */ 618 (void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance); 619 if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1, 620 TASKQ_DEFAULTPRI, 0)) == NULL) { 621 cmn_err(CE_WARN, "!vsw%d: Unable to create task queue", 622 vswp->instance); 623 goto vsw_attach_fail; 624 } 625 626 progress |= PROG_taskq; 627 628 /* prevent auto-detaching */ 629 if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip, 630 DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) { 631 cmn_err(CE_NOTE, "!Unable to set \"%s\" property for " 632 "instance %u", DDI_NO_AUTODETACH, instance); 633 } 634 635 /* 636 * Now we have everything setup, register an interest in 637 * specific MD nodes. 638 * 639 * The callback is invoked in 2 cases, firstly if upon mdeg 640 * registration there are existing nodes which match our specified 641 * criteria, and secondly if the MD is changed (and again, there 642 * are nodes which we are interested in present within it. Note 643 * that our callback will be invoked even if our specified nodes 644 * have not actually changed). 645 * 646 * Until the callback is invoked we cannot switch any pkts as 647 * we don't know basic information such as what mode we are 648 * operating in. However we expect the callback to be invoked 649 * immediately upon registration as this driver should only 650 * be attaching if there are vsw nodes in the MD. 651 */ 652 if (vsw_mdeg_register(vswp)) 653 goto vsw_attach_fail; 654 655 return (DDI_SUCCESS); 656 657 vsw_attach_fail: 658 DERR(NULL, "vsw_attach: failed"); 659 660 if (progress & PROG_taskq) 661 ddi_taskq_destroy(vswp->taskq_p); 662 663 if (progress & PROG_plist) 664 rw_destroy(&vswp->plist.lockrw); 665 666 if (progress & PROG_report_dev) { 667 ddi_remove_minor_node(dip, NULL); 668 mutex_destroy(&vswp->mca_lock); 669 } 670 671 if (progress & PROG_mfdb) { 672 mod_hash_destroy_hash(vswp->mfdb); 673 vswp->mfdb = NULL; 674 rw_destroy(&vswp->mfdbrw); 675 } 676 677 if (progress & PROG_fdb) { 678 mod_hash_destroy_hash(vswp->fdb); 679 vswp->fdb = NULL; 680 } 681 682 if (progress & PROG_if_lock) { 683 rw_destroy(&vswp->if_lockrw); 684 mutex_destroy(&vswp->mac_lock); 685 } 686 687 ddi_soft_state_free(vsw_state, instance); 688 return (DDI_FAILURE); 689 } 690 691 static int 692 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 693 { 694 vio_mblk_pool_t *poolp, *npoolp; 695 vsw_t **vswpp, *vswp; 696 int instance; 697 698 instance = ddi_get_instance(dip); 699 vswp = ddi_get_soft_state(vsw_state, instance); 700 701 if (vswp == NULL) { 702 return (DDI_FAILURE); 703 } 704 705 switch (cmd) { 706 case DDI_DETACH: 707 break; 708 case DDI_SUSPEND: 709 case DDI_PM_SUSPEND: 710 default: 711 return (DDI_FAILURE); 712 } 713 714 D2(vswp, "detaching instance %d", instance); 715 716 if (vswp->if_state & VSW_IF_REG) { 717 if (vsw_mac_unregister(vswp) != 0) { 718 cmn_err(CE_WARN, "!vsw%d: Unable to detach from " 719 "MAC layer", vswp->instance); 720 return (DDI_FAILURE); 721 } 722 } 723 724 vsw_mdeg_unregister(vswp); 725 726 /* remove mac layer callback */ 727 mutex_enter(&vswp->mac_lock); 728 if ((vswp->mh != NULL) && (vswp->mrh != NULL)) { 729 mac_rx_remove(vswp->mh, vswp->mrh); 730 vswp->mrh = NULL; 731 } 732 mutex_exit(&vswp->mac_lock); 733 734 if (vsw_detach_ports(vswp) != 0) { 735 cmn_err(CE_WARN, "!vsw%d: Unable to detach ports", 736 vswp->instance); 737 return (DDI_FAILURE); 738 } 739 740 rw_destroy(&vswp->if_lockrw); 741 742 /* 743 * Now that the ports have been deleted, stop and close 744 * the physical device. 745 */ 746 mutex_enter(&vswp->mac_lock); 747 if (vswp->mh != NULL) { 748 if (vswp->mstarted) 749 mac_stop(vswp->mh); 750 if (vswp->mresources) 751 mac_resource_set(vswp->mh, NULL, NULL); 752 mac_close(vswp->mh); 753 754 vswp->mh = NULL; 755 vswp->txinfo = NULL; 756 } 757 mutex_exit(&vswp->mac_lock); 758 mutex_destroy(&vswp->mac_lock); 759 760 /* 761 * Destroy any free pools that may still exist. 762 */ 763 poolp = vswp->rxh; 764 while (poolp != NULL) { 765 npoolp = vswp->rxh = poolp->nextp; 766 if (vio_destroy_mblks(poolp) != 0) { 767 vswp->rxh = poolp; 768 return (DDI_FAILURE); 769 } 770 poolp = npoolp; 771 } 772 773 /* 774 * Remove this instance from any entries it may be on in 775 * the hash table by using the list of addresses maintained 776 * in the vsw_t structure. 777 */ 778 vsw_del_mcst_vsw(vswp); 779 780 vswp->mcap = NULL; 781 mutex_destroy(&vswp->mca_lock); 782 783 /* 784 * By now any pending tasks have finished and the underlying 785 * ldc's have been destroyed, so its safe to delete the control 786 * message taskq. 787 */ 788 if (vswp->taskq_p != NULL) 789 ddi_taskq_destroy(vswp->taskq_p); 790 791 /* 792 * At this stage all the data pointers in the hash table 793 * should be NULL, as all the ports have been removed and will 794 * have deleted themselves from the port lists which the data 795 * pointers point to. Hence we can destroy the table using the 796 * default destructors. 797 */ 798 D2(vswp, "vsw_detach: destroying hash tables.."); 799 mod_hash_destroy_hash(vswp->fdb); 800 vswp->fdb = NULL; 801 802 WRITE_ENTER(&vswp->mfdbrw); 803 mod_hash_destroy_hash(vswp->mfdb); 804 vswp->mfdb = NULL; 805 RW_EXIT(&vswp->mfdbrw); 806 rw_destroy(&vswp->mfdbrw); 807 808 ddi_remove_minor_node(dip, NULL); 809 810 rw_destroy(&vswp->plist.lockrw); 811 WRITE_ENTER(&vsw_rw); 812 for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) { 813 if (*vswpp == vswp) { 814 *vswpp = vswp->next; 815 break; 816 } 817 } 818 RW_EXIT(&vsw_rw); 819 ddi_soft_state_free(vsw_state, instance); 820 821 return (DDI_SUCCESS); 822 } 823 824 static int 825 vsw_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 826 { 827 _NOTE(ARGUNUSED(dip)) 828 829 vsw_t *vswp = NULL; 830 dev_t dev = (dev_t)arg; 831 int instance; 832 833 instance = getminor(dev); 834 835 switch (infocmd) { 836 case DDI_INFO_DEVT2DEVINFO: 837 if ((vswp = ddi_get_soft_state(vsw_state, instance)) == NULL) { 838 *result = NULL; 839 return (DDI_FAILURE); 840 } 841 *result = vswp->dip; 842 return (DDI_SUCCESS); 843 844 case DDI_INFO_DEVT2INSTANCE: 845 *result = (void *)(uintptr_t)instance; 846 return (DDI_SUCCESS); 847 848 default: 849 *result = NULL; 850 return (DDI_FAILURE); 851 } 852 } 853 854 /* 855 * Get the value of the "vsw-phys-dev" property in the specified 856 * node. This property is the name of the physical device that 857 * the virtual switch will use to talk to the outside world. 858 * 859 * Note it is valid for this property to be NULL (but the property 860 * itself must exist). Callers of this routine should verify that 861 * the value returned is what they expected (i.e. either NULL or non NULL). 862 * 863 * On success returns value of the property in region pointed to by 864 * the 'name' argument, and with return value of 0. Otherwise returns 1. 865 */ 866 static int 867 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name) 868 { 869 int len = 0; 870 char *physname = NULL; 871 char *dev; 872 873 if (md_get_prop_data(mdp, node, physdev_propname, 874 (uint8_t **)(&physname), &len) != 0) { 875 cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical " 876 "device(s) from MD", vswp->instance); 877 return (1); 878 } else if ((strlen(physname) + 1) > LIFNAMSIZ) { 879 cmn_err(CE_WARN, "!vsw%d: %s is too long a device name", 880 vswp->instance, physname); 881 return (1); 882 } else { 883 (void) strncpy(name, physname, strlen(physname) + 1); 884 D2(vswp, "%s: using first device specified (%s)", 885 __func__, physname); 886 } 887 888 #ifdef DEBUG 889 /* 890 * As a temporary measure to aid testing we check to see if there 891 * is a vsw.conf file present. If there is we use the value of the 892 * vsw_physname property in the file as the name of the physical 893 * device, overriding the value from the MD. 894 * 895 * There may be multiple devices listed, but for the moment 896 * we just use the first one. 897 */ 898 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0, 899 "vsw_physname", &dev) == DDI_PROP_SUCCESS) { 900 if ((strlen(dev) + 1) > LIFNAMSIZ) { 901 cmn_err(CE_WARN, "vsw%d: %s is too long a device name", 902 vswp->instance, dev); 903 ddi_prop_free(dev); 904 return (1); 905 } else { 906 cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from " 907 "config file", vswp->instance, dev); 908 909 (void) strncpy(name, dev, strlen(dev) + 1); 910 } 911 912 ddi_prop_free(dev); 913 } 914 #endif 915 916 return (0); 917 } 918 919 /* 920 * Read the 'vsw-switch-mode' property from the specified MD node. 921 * 922 * Returns 0 on success and the number of modes found in 'found', 923 * otherwise returns 1. 924 */ 925 static int 926 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node, 927 uint8_t *modes, int *found) 928 { 929 int len = 0; 930 int smode_num = 0; 931 char *smode = NULL; 932 char *curr_mode = NULL; 933 934 D1(vswp, "%s: enter", __func__); 935 936 /* 937 * Get the switch-mode property. The modes are listed in 938 * decreasing order of preference, i.e. prefered mode is 939 * first item in list. 940 */ 941 len = 0; 942 smode_num = 0; 943 if (md_get_prop_data(mdp, node, smode_propname, 944 (uint8_t **)(&smode), &len) != 0) { 945 /* 946 * Unable to get switch-mode property from MD, nothing 947 * more we can do. 948 */ 949 cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property" 950 " from the MD", vswp->instance); 951 *found = 0; 952 return (1); 953 } 954 955 curr_mode = smode; 956 /* 957 * Modes of operation: 958 * 'switched' - layer 2 switching, underlying HW in 959 * programmed mode. 960 * 'promiscuous' - layer 2 switching, underlying HW in 961 * promiscuous mode. 962 * 'routed' - layer 3 (i.e. IP) routing, underlying HW 963 * in non-promiscuous mode. 964 */ 965 while ((curr_mode < (smode + len)) && (smode_num < NUM_SMODES)) { 966 D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode); 967 if (strcmp(curr_mode, "switched") == 0) { 968 modes[smode_num++] = VSW_LAYER2; 969 } else if (strcmp(curr_mode, "promiscuous") == 0) { 970 modes[smode_num++] = VSW_LAYER2_PROMISC; 971 } else if (strcmp(curr_mode, "routed") == 0) { 972 modes[smode_num++] = VSW_LAYER3; 973 } else { 974 cmn_err(CE_WARN, "!vsw%d: Unknown switch mode %s, " 975 "setting to default switched mode", 976 vswp->instance, curr_mode); 977 modes[smode_num++] = VSW_LAYER2; 978 } 979 curr_mode += strlen(curr_mode) + 1; 980 } 981 *found = smode_num; 982 983 D2(vswp, "%s: %d modes found", __func__, smode_num); 984 985 D1(vswp, "%s: exit", __func__); 986 987 return (0); 988 } 989 990 /* 991 * Get the mac address of the physical device. 992 * 993 * Returns 0 on success, 1 on failure. 994 */ 995 static int 996 vsw_get_physaddr(vsw_t *vswp) 997 { 998 mac_handle_t mh; 999 char drv[LIFNAMSIZ]; 1000 uint_t ddi_instance; 1001 1002 D1(vswp, "%s: enter", __func__); 1003 1004 if (ddi_parse(vswp->physname, drv, &ddi_instance) != DDI_SUCCESS) 1005 return (1); 1006 1007 if (mac_open(vswp->physname, ddi_instance, &mh) != 0) { 1008 cmn_err(CE_WARN, "!vsw%d: mac_open %s failed", 1009 vswp->instance, vswp->physname); 1010 return (1); 1011 } 1012 1013 READ_ENTER(&vswp->if_lockrw); 1014 mac_unicst_get(mh, vswp->if_addr.ether_addr_octet); 1015 RW_EXIT(&vswp->if_lockrw); 1016 1017 mac_close(mh); 1018 1019 vswp->mdprops |= VSW_DEV_MACADDR; 1020 1021 D1(vswp, "%s: exit", __func__); 1022 1023 return (0); 1024 } 1025 1026 /* 1027 * Check to see if the card supports the setting of multiple unicst 1028 * addresses. 1029 * 1030 * Returns 0 if card supports the programming of multiple unicast addresses 1031 * and there are free address slots available, otherwise returns 1. 1032 */ 1033 static int 1034 vsw_get_hw_maddr(vsw_t *vswp) 1035 { 1036 D1(vswp, "%s: enter", __func__); 1037 1038 mutex_enter(&vswp->mac_lock); 1039 if (vswp->mh == NULL) { 1040 mutex_exit(&vswp->mac_lock); 1041 return (1); 1042 } 1043 1044 if (!mac_capab_get(vswp->mh, MAC_CAPAB_MULTIADDRESS, &vswp->maddr)) { 1045 DWARN(vswp, "Unable to get capabilities of" 1046 " underlying device (%s)", vswp->physname); 1047 mutex_exit(&vswp->mac_lock); 1048 return (1); 1049 } 1050 mutex_exit(&vswp->mac_lock); 1051 1052 if (vswp->maddr.maddr_naddrfree == 0) { 1053 cmn_err(CE_WARN, 1054 "!vsw%d: device %s has no free unicast address slots", 1055 vswp->instance, vswp->physname); 1056 return (1); 1057 } 1058 1059 D2(vswp, "%s: %d addrs : %d free", __func__, 1060 vswp->maddr.maddr_naddr, vswp->maddr.maddr_naddrfree); 1061 1062 D1(vswp, "%s: exit", __func__); 1063 1064 return (0); 1065 } 1066 1067 /* 1068 * Setup the required switching mode. 1069 * 1070 * Returns 0 on success, 1 on failure. 1071 */ 1072 static int 1073 vsw_setup_switching(vsw_t *vswp) 1074 { 1075 int i, rv = 1; 1076 1077 D1(vswp, "%s: enter", __func__); 1078 1079 /* select best switching mode */ 1080 for (i = 0; i < vswp->smode_num; i++) { 1081 vswp->smode_idx = i; 1082 switch (vswp->smode[i]) { 1083 case VSW_LAYER2: 1084 case VSW_LAYER2_PROMISC: 1085 rv = vsw_setup_layer2(vswp); 1086 break; 1087 1088 case VSW_LAYER3: 1089 rv = vsw_setup_layer3(vswp); 1090 break; 1091 1092 default: 1093 DERR(vswp, "unknown switch mode"); 1094 rv = 1; 1095 break; 1096 } 1097 1098 if (rv == 0) 1099 break; 1100 } 1101 1102 if (rv == 1) { 1103 cmn_err(CE_WARN, "!vsw%d: Unable to setup specified " 1104 "switching mode", vswp->instance); 1105 return (rv); 1106 } 1107 1108 D2(vswp, "%s: Operating in mode %d", __func__, 1109 vswp->smode[vswp->smode_idx]); 1110 1111 D1(vswp, "%s: exit", __func__); 1112 1113 return (0); 1114 } 1115 1116 /* 1117 * Setup for layer 2 switching. 1118 * 1119 * Returns 0 on success, 1 on failure. 1120 */ 1121 static int 1122 vsw_setup_layer2(vsw_t *vswp) 1123 { 1124 D1(vswp, "%s: enter", __func__); 1125 1126 vswp->vsw_switch_frame = vsw_switch_l2_frame; 1127 1128 /* 1129 * Attempt to link into the MAC layer so we can get 1130 * and send packets out over the physical adapter. 1131 */ 1132 if (vswp->mdprops & VSW_MD_PHYSNAME) { 1133 if (vsw_mac_attach(vswp) != 0) { 1134 /* 1135 * Registration with the MAC layer has failed, 1136 * so return 1 so that can fall back to next 1137 * prefered switching method. 1138 */ 1139 cmn_err(CE_WARN, "!vsw%d: Unable to join as MAC layer " 1140 "client", vswp->instance); 1141 return (1); 1142 } 1143 1144 if (vswp->smode[vswp->smode_idx] == VSW_LAYER2) { 1145 /* 1146 * Verify that underlying device can support multiple 1147 * unicast mac addresses, and has free capacity. 1148 */ 1149 if (vsw_get_hw_maddr(vswp) != 0) { 1150 cmn_err(CE_WARN, "!vsw%d: Unable to setup " 1151 "switching", vswp->instance); 1152 vsw_mac_detach(vswp); 1153 return (1); 1154 } 1155 } 1156 1157 } else { 1158 /* 1159 * No physical device name found in MD which is 1160 * required for layer 2. 1161 */ 1162 cmn_err(CE_WARN, "!vsw%d: no physical device name specified", 1163 vswp->instance); 1164 return (1); 1165 } 1166 1167 D1(vswp, "%s: exit", __func__); 1168 1169 return (0); 1170 } 1171 1172 static int 1173 vsw_setup_layer3(vsw_t *vswp) 1174 { 1175 D1(vswp, "%s: enter", __func__); 1176 1177 D2(vswp, "%s: operating in layer 3 mode", __func__); 1178 vswp->vsw_switch_frame = vsw_switch_l3_frame; 1179 1180 D1(vswp, "%s: exit", __func__); 1181 1182 return (0); 1183 } 1184 1185 /* 1186 * Link into the MAC layer to gain access to the services provided by 1187 * the underlying physical device driver (which should also have 1188 * registered with the MAC layer). 1189 * 1190 * Only when in layer 2 mode. 1191 */ 1192 static int 1193 vsw_mac_attach(vsw_t *vswp) 1194 { 1195 char drv[LIFNAMSIZ]; 1196 uint_t ddi_instance; 1197 1198 D1(vswp, "%s: enter", __func__); 1199 1200 ASSERT(vswp->mh == NULL); 1201 ASSERT(vswp->mrh == NULL); 1202 ASSERT(vswp->mstarted == B_FALSE); 1203 ASSERT(vswp->mresources == B_FALSE); 1204 1205 ASSERT(vswp->mdprops & VSW_MD_PHYSNAME); 1206 1207 mutex_enter(&vswp->mac_lock); 1208 if (ddi_parse(vswp->physname, drv, &ddi_instance) != DDI_SUCCESS) { 1209 cmn_err(CE_WARN, "!vsw%d: invalid device name: %s", 1210 vswp->instance, vswp->physname); 1211 goto mac_fail_exit; 1212 } 1213 1214 if ((mac_open(vswp->physname, ddi_instance, &vswp->mh)) != 0) { 1215 cmn_err(CE_WARN, "!vsw%d: mac_open %s failed", 1216 vswp->instance, vswp->physname); 1217 goto mac_fail_exit; 1218 } 1219 1220 ASSERT(vswp->mh != NULL); 1221 1222 D2(vswp, "vsw_mac_attach: using device %s", vswp->physname); 1223 1224 if (vsw_multi_ring_enable) { 1225 /* 1226 * Initialize the ring table. 1227 */ 1228 vsw_mac_ring_tbl_init(vswp); 1229 1230 /* 1231 * Register our rx callback function. 1232 */ 1233 vswp->mrh = mac_rx_add(vswp->mh, 1234 vsw_rx_queue_cb, (void *)vswp); 1235 ASSERT(vswp->mrh != NULL); 1236 1237 /* 1238 * Register our mac resource callback. 1239 */ 1240 mac_resource_set(vswp->mh, vsw_mac_ring_add_cb, (void *)vswp); 1241 vswp->mresources = B_TRUE; 1242 1243 /* 1244 * Get the ring resources available to us from 1245 * the mac below us. 1246 */ 1247 mac_resources(vswp->mh); 1248 } else { 1249 /* 1250 * Just register our rx callback function 1251 */ 1252 vswp->mrh = mac_rx_add(vswp->mh, vsw_rx_cb, (void *)vswp); 1253 ASSERT(vswp->mrh != NULL); 1254 } 1255 1256 /* Get the MAC tx fn */ 1257 vswp->txinfo = mac_tx_get(vswp->mh); 1258 1259 /* start the interface */ 1260 if (mac_start(vswp->mh) != 0) { 1261 cmn_err(CE_WARN, "!vsw%d: Could not start mac interface", 1262 vswp->instance); 1263 goto mac_fail_exit; 1264 } 1265 1266 mutex_exit(&vswp->mac_lock); 1267 1268 vswp->mstarted = B_TRUE; 1269 1270 D1(vswp, "%s: exit", __func__); 1271 return (0); 1272 1273 mac_fail_exit: 1274 mutex_exit(&vswp->mac_lock); 1275 vsw_mac_detach(vswp); 1276 1277 D1(vswp, "%s: exit", __func__); 1278 return (1); 1279 } 1280 1281 static void 1282 vsw_mac_detach(vsw_t *vswp) 1283 { 1284 D1(vswp, "vsw_mac_detach: enter"); 1285 1286 ASSERT(vswp != NULL); 1287 1288 if (vsw_multi_ring_enable) { 1289 vsw_mac_ring_tbl_destroy(vswp); 1290 } 1291 1292 mutex_enter(&vswp->mac_lock); 1293 1294 if (vswp->mh != NULL) { 1295 if (vswp->mstarted) 1296 mac_stop(vswp->mh); 1297 if (vswp->mrh != NULL) 1298 mac_rx_remove(vswp->mh, vswp->mrh); 1299 if (vswp->mresources) 1300 mac_resource_set(vswp->mh, NULL, NULL); 1301 mac_close(vswp->mh); 1302 } 1303 1304 vswp->mrh = NULL; 1305 vswp->mh = NULL; 1306 vswp->txinfo = NULL; 1307 vswp->mstarted = B_FALSE; 1308 1309 mutex_exit(&vswp->mac_lock); 1310 1311 D1(vswp, "vsw_mac_detach: exit"); 1312 } 1313 1314 /* 1315 * Depending on the mode specified, the capabilites and capacity 1316 * of the underlying device setup the physical device. 1317 * 1318 * If in layer 3 mode, then do nothing. 1319 * 1320 * If in layer 2 programmed mode attempt to program the unicast address 1321 * associated with the port into the physical device. If this is not 1322 * possible due to resource exhaustion or simply because the device does 1323 * not support multiple unicast addresses then if required fallback onto 1324 * putting the card into promisc mode. 1325 * 1326 * If in promisc mode then simply set the card into promisc mode. 1327 * 1328 * Returns 0 success, 1 on failure. 1329 */ 1330 static int 1331 vsw_set_hw(vsw_t *vswp, vsw_port_t *port) 1332 { 1333 mac_multi_addr_t mac_addr; 1334 void *mah; 1335 int err; 1336 1337 D1(vswp, "%s: enter", __func__); 1338 1339 if (vswp->smode[vswp->smode_idx] == VSW_LAYER3) 1340 return (0); 1341 1342 if (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC) { 1343 return (vsw_set_hw_promisc(vswp, port)); 1344 } 1345 1346 if (vswp->maddr.maddr_handle == NULL) 1347 return (1); 1348 1349 mah = vswp->maddr.maddr_handle; 1350 1351 /* 1352 * Attempt to program the unicast address into the HW. 1353 */ 1354 mac_addr.mma_addrlen = ETHERADDRL; 1355 ether_copy(&port->p_macaddr, &mac_addr.mma_addr); 1356 1357 err = vswp->maddr.maddr_add(mah, &mac_addr); 1358 if (err != 0) { 1359 cmn_err(CE_WARN, "!vsw%d: failed to program addr " 1360 "%x:%x:%x:%x:%x:%x for port %d into device %s " 1361 ": err %d", vswp->instance, 1362 port->p_macaddr.ether_addr_octet[0], 1363 port->p_macaddr.ether_addr_octet[1], 1364 port->p_macaddr.ether_addr_octet[2], 1365 port->p_macaddr.ether_addr_octet[3], 1366 port->p_macaddr.ether_addr_octet[4], 1367 port->p_macaddr.ether_addr_octet[5], 1368 port->p_instance, vswp->physname, err); 1369 1370 /* 1371 * Mark that attempt should be made to re-config sometime 1372 * in future if a port is deleted. 1373 */ 1374 vswp->recfg_reqd = B_TRUE; 1375 1376 /* 1377 * Only 1 mode specified, nothing more to do. 1378 */ 1379 if (vswp->smode_num == 1) 1380 return (err); 1381 1382 /* 1383 * If promiscuous was next mode specified try to 1384 * set the card into that mode. 1385 */ 1386 if ((vswp->smode_idx <= (vswp->smode_num - 2)) && 1387 (vswp->smode[vswp->smode_idx + 1] 1388 == VSW_LAYER2_PROMISC)) { 1389 vswp->smode_idx += 1; 1390 return (vsw_set_hw_promisc(vswp, port)); 1391 } 1392 return (err); 1393 } 1394 1395 port->addr_slot = mac_addr.mma_slot; 1396 port->addr_set = VSW_ADDR_HW; 1397 1398 D2(vswp, "programmed addr %x:%x:%x:%x:%x:%x for port %d " 1399 "into slot %d of device %s", 1400 port->p_macaddr.ether_addr_octet[0], 1401 port->p_macaddr.ether_addr_octet[1], 1402 port->p_macaddr.ether_addr_octet[2], 1403 port->p_macaddr.ether_addr_octet[3], 1404 port->p_macaddr.ether_addr_octet[4], 1405 port->p_macaddr.ether_addr_octet[5], 1406 port->p_instance, port->addr_slot, vswp->physname); 1407 1408 D1(vswp, "%s: exit", __func__); 1409 1410 return (0); 1411 } 1412 1413 /* 1414 * If in layer 3 mode do nothing. 1415 * 1416 * If in layer 2 switched mode remove the address from the physical 1417 * device. 1418 * 1419 * If in layer 2 promiscuous mode disable promisc mode. 1420 * 1421 * Returns 0 on success. 1422 */ 1423 static int 1424 vsw_unset_hw(vsw_t *vswp, vsw_port_t *port) 1425 { 1426 int err; 1427 void *mah; 1428 1429 D1(vswp, "%s: enter", __func__); 1430 1431 if (vswp->smode[vswp->smode_idx] == VSW_LAYER3) 1432 return (0); 1433 1434 if (port->addr_set == VSW_ADDR_PROMISC) { 1435 return (vsw_unset_hw_promisc(vswp, port)); 1436 } 1437 1438 if (port->addr_set == VSW_ADDR_HW) { 1439 if (vswp->maddr.maddr_handle == NULL) 1440 return (1); 1441 1442 mah = vswp->maddr.maddr_handle; 1443 1444 err = vswp->maddr.maddr_remove(mah, port->addr_slot); 1445 if (err != 0) { 1446 cmn_err(CE_WARN, "!vsw%d: Unable to remove addr " 1447 "%x:%x:%x:%x:%x:%x for port %d from device %s" 1448 " : (err %d)", vswp->instance, 1449 port->p_macaddr.ether_addr_octet[0], 1450 port->p_macaddr.ether_addr_octet[1], 1451 port->p_macaddr.ether_addr_octet[2], 1452 port->p_macaddr.ether_addr_octet[3], 1453 port->p_macaddr.ether_addr_octet[4], 1454 port->p_macaddr.ether_addr_octet[5], 1455 port->p_instance, vswp->physname, err); 1456 return (err); 1457 } 1458 1459 port->addr_set = VSW_ADDR_UNSET; 1460 1461 D2(vswp, "removed addr %x:%x:%x:%x:%x:%x for " 1462 "port %d from device %s", 1463 port->p_macaddr.ether_addr_octet[0], 1464 port->p_macaddr.ether_addr_octet[1], 1465 port->p_macaddr.ether_addr_octet[2], 1466 port->p_macaddr.ether_addr_octet[3], 1467 port->p_macaddr.ether_addr_octet[4], 1468 port->p_macaddr.ether_addr_octet[5], 1469 port->p_instance, vswp->physname); 1470 } 1471 1472 D1(vswp, "%s: exit", __func__); 1473 return (0); 1474 } 1475 1476 /* 1477 * Set network card into promisc mode. 1478 * 1479 * Returns 0 on success, 1 on failure. 1480 */ 1481 static int 1482 vsw_set_hw_promisc(vsw_t *vswp, vsw_port_t *port) 1483 { 1484 D1(vswp, "%s: enter", __func__); 1485 1486 mutex_enter(&vswp->mac_lock); 1487 if (vswp->mh == NULL) { 1488 mutex_exit(&vswp->mac_lock); 1489 return (1); 1490 } 1491 1492 if (vswp->promisc_cnt++ == 0) { 1493 if (mac_promisc_set(vswp->mh, B_TRUE, MAC_DEVPROMISC) != 0) { 1494 vswp->promisc_cnt--; 1495 mutex_exit(&vswp->mac_lock); 1496 return (1); 1497 } 1498 cmn_err(CE_NOTE, "!vsw%d: switching device %s into " 1499 "promiscuous mode", vswp->instance, vswp->physname); 1500 } 1501 mutex_exit(&vswp->mac_lock); 1502 port->addr_set = VSW_ADDR_PROMISC; 1503 1504 D1(vswp, "%s: exit", __func__); 1505 1506 return (0); 1507 } 1508 1509 /* 1510 * Turn off promiscuous mode on network card. 1511 * 1512 * Returns 0 on success, 1 on failure. 1513 */ 1514 static int 1515 vsw_unset_hw_promisc(vsw_t *vswp, vsw_port_t *port) 1516 { 1517 vsw_port_list_t *plist = &vswp->plist; 1518 1519 D2(vswp, "%s: enter", __func__); 1520 1521 mutex_enter(&vswp->mac_lock); 1522 if (vswp->mh == NULL) { 1523 mutex_exit(&vswp->mac_lock); 1524 return (1); 1525 } 1526 1527 ASSERT(port->addr_set == VSW_ADDR_PROMISC); 1528 1529 if (--vswp->promisc_cnt == 0) { 1530 if (mac_promisc_set(vswp->mh, B_FALSE, MAC_DEVPROMISC) != 0) { 1531 vswp->promisc_cnt++; 1532 mutex_exit(&vswp->mac_lock); 1533 return (1); 1534 } 1535 1536 /* 1537 * We are exiting promisc mode either because we were 1538 * only in promisc mode because we had failed over from 1539 * switched mode due to HW resource issues, or the user 1540 * wanted the card in promisc mode for all the ports and 1541 * the last port is now being deleted. Tweak the message 1542 * accordingly. 1543 */ 1544 if (plist->num_ports != 0) { 1545 cmn_err(CE_NOTE, "!vsw%d: switching device %s back to " 1546 "programmed mode", vswp->instance, 1547 vswp->physname); 1548 } else { 1549 cmn_err(CE_NOTE, "!vsw%d: switching device %s out of " 1550 "promiscuous mode", vswp->instance, 1551 vswp->physname); 1552 } 1553 } 1554 mutex_exit(&vswp->mac_lock); 1555 port->addr_set = VSW_ADDR_UNSET; 1556 1557 D1(vswp, "%s: exit", __func__); 1558 return (0); 1559 } 1560 1561 /* 1562 * Determine whether or not we are operating in our prefered 1563 * mode and if not whether the physical resources now allow us 1564 * to operate in it. 1565 * 1566 * Should only be invoked after port which is being deleted has been 1567 * removed from the port list. 1568 */ 1569 static int 1570 vsw_reconfig_hw(vsw_t *vswp) 1571 { 1572 vsw_port_list_t *plist = &vswp->plist; 1573 mac_multi_addr_t mac_addr; 1574 vsw_port_t *tp; 1575 void *mah; 1576 int rv = 0; 1577 int s_idx; 1578 1579 D1(vswp, "%s: enter", __func__); 1580 1581 if (vswp->maddr.maddr_handle == NULL) 1582 return (1); 1583 1584 /* 1585 * Check if there are now sufficient HW resources to 1586 * attempt a re-config. 1587 */ 1588 if (plist->num_ports > vswp->maddr.maddr_naddrfree) 1589 return (1); 1590 1591 /* 1592 * If we are in layer 2 (i.e. switched) or would like to be 1593 * in layer 2 then check if any ports need to be programmed 1594 * into the HW. 1595 * 1596 * This can happen in two cases - switched was specified as 1597 * the prefered mode of operation but we exhausted the HW 1598 * resources and so failed over to the next specifed mode, 1599 * or switched was the only mode specified so after HW 1600 * resources were exhausted there was nothing more we 1601 * could do. 1602 */ 1603 if (vswp->smode_idx > 0) 1604 s_idx = vswp->smode_idx - 1; 1605 else 1606 s_idx = vswp->smode_idx; 1607 1608 if (vswp->smode[s_idx] == VSW_LAYER2) { 1609 mah = vswp->maddr.maddr_handle; 1610 1611 D2(vswp, "%s: attempting reconfig..", __func__); 1612 1613 /* 1614 * Scan the port list for any port whose address has not 1615 * be programmed in HW - there should be a max of one. 1616 */ 1617 for (tp = plist->head; tp != NULL; tp = tp->p_next) { 1618 if (tp->addr_set != VSW_ADDR_HW) { 1619 mac_addr.mma_addrlen = ETHERADDRL; 1620 ether_copy(&tp->p_macaddr, &mac_addr.mma_addr); 1621 1622 rv = vswp->maddr.maddr_add(mah, &mac_addr); 1623 if (rv != 0) { 1624 DWARN(vswp, "Error setting addr in " 1625 "HW for port %d err %d", 1626 tp->p_instance, rv); 1627 goto reconfig_err_exit; 1628 } 1629 tp->addr_slot = mac_addr.mma_slot; 1630 1631 D2(vswp, "re-programmed port %d " 1632 "addr %x:%x:%x:%x:%x:%x into slot %d" 1633 " of device %s", tp->p_instance, 1634 tp->p_macaddr.ether_addr_octet[0], 1635 tp->p_macaddr.ether_addr_octet[1], 1636 tp->p_macaddr.ether_addr_octet[2], 1637 tp->p_macaddr.ether_addr_octet[3], 1638 tp->p_macaddr.ether_addr_octet[4], 1639 tp->p_macaddr.ether_addr_octet[5], 1640 tp->addr_slot, vswp->physname); 1641 1642 /* 1643 * If up to now we had to put the card into 1644 * promisc mode to see this address, we 1645 * can now safely disable promisc mode. 1646 */ 1647 if (tp->addr_set == VSW_ADDR_PROMISC) 1648 (void) vsw_unset_hw_promisc(vswp, tp); 1649 1650 tp->addr_set = VSW_ADDR_HW; 1651 } 1652 } 1653 1654 /* no further re-config needed */ 1655 vswp->recfg_reqd = B_FALSE; 1656 1657 vswp->smode_idx = s_idx; 1658 1659 return (0); 1660 } 1661 1662 reconfig_err_exit: 1663 return (rv); 1664 } 1665 1666 static void 1667 vsw_mac_ring_tbl_entry_init(vsw_t *vswp, vsw_mac_ring_t *ringp) 1668 { 1669 ringp->ring_state = VSW_MAC_RING_FREE; 1670 ringp->ring_arg = NULL; 1671 ringp->ring_blank = NULL; 1672 ringp->ring_vqp = NULL; 1673 ringp->ring_vswp = vswp; 1674 } 1675 1676 static void 1677 vsw_mac_ring_tbl_init(vsw_t *vswp) 1678 { 1679 int i; 1680 1681 mutex_init(&vswp->mac_ring_lock, NULL, MUTEX_DRIVER, NULL); 1682 1683 vswp->mac_ring_tbl_sz = vsw_mac_rx_rings; 1684 vswp->mac_ring_tbl = 1685 kmem_alloc(vsw_mac_rx_rings * sizeof (vsw_mac_ring_t), 1686 KM_SLEEP); 1687 1688 for (i = 0; i < vswp->mac_ring_tbl_sz; i++) 1689 vsw_mac_ring_tbl_entry_init(vswp, &vswp->mac_ring_tbl[i]); 1690 } 1691 1692 static void 1693 vsw_mac_ring_tbl_destroy(vsw_t *vswp) 1694 { 1695 int i; 1696 vsw_mac_ring_t *ringp; 1697 1698 mutex_enter(&vswp->mac_ring_lock); 1699 for (i = 0; i < vswp->mac_ring_tbl_sz; i++) { 1700 ringp = &vswp->mac_ring_tbl[i]; 1701 1702 if (ringp->ring_state != VSW_MAC_RING_FREE) { 1703 /* 1704 * Destroy the queue. 1705 */ 1706 vsw_queue_stop(ringp->ring_vqp); 1707 vsw_queue_destroy(ringp->ring_vqp); 1708 1709 /* 1710 * Re-initialize the structure. 1711 */ 1712 vsw_mac_ring_tbl_entry_init(vswp, ringp); 1713 } 1714 } 1715 mutex_exit(&vswp->mac_ring_lock); 1716 1717 mutex_destroy(&vswp->mac_ring_lock); 1718 kmem_free(vswp->mac_ring_tbl, 1719 vswp->mac_ring_tbl_sz * sizeof (vsw_mac_ring_t)); 1720 vswp->mac_ring_tbl_sz = 0; 1721 } 1722 1723 /* 1724 * Handle resource add callbacks from the driver below. 1725 */ 1726 static mac_resource_handle_t 1727 vsw_mac_ring_add_cb(void *arg, mac_resource_t *mrp) 1728 { 1729 vsw_t *vswp = (vsw_t *)arg; 1730 mac_rx_fifo_t *mrfp = (mac_rx_fifo_t *)mrp; 1731 vsw_mac_ring_t *ringp; 1732 vsw_queue_t *vqp; 1733 int i; 1734 1735 ASSERT(vswp != NULL); 1736 ASSERT(mrp != NULL); 1737 ASSERT(vswp->mac_ring_tbl != NULL); 1738 1739 D1(vswp, "%s: enter", __func__); 1740 1741 /* 1742 * Check to make sure we have the correct resource type. 1743 */ 1744 if (mrp->mr_type != MAC_RX_FIFO) 1745 return (NULL); 1746 1747 /* 1748 * Find a open entry in the ring table. 1749 */ 1750 mutex_enter(&vswp->mac_ring_lock); 1751 for (i = 0; i < vswp->mac_ring_tbl_sz; i++) { 1752 ringp = &vswp->mac_ring_tbl[i]; 1753 1754 /* 1755 * Check for an empty slot, if found, then setup queue 1756 * and thread. 1757 */ 1758 if (ringp->ring_state == VSW_MAC_RING_FREE) { 1759 /* 1760 * Create the queue for this ring. 1761 */ 1762 vqp = vsw_queue_create(); 1763 1764 /* 1765 * Initialize the ring data structure. 1766 */ 1767 ringp->ring_vqp = vqp; 1768 ringp->ring_arg = mrfp->mrf_arg; 1769 ringp->ring_blank = mrfp->mrf_blank; 1770 ringp->ring_state = VSW_MAC_RING_INUSE; 1771 1772 /* 1773 * Create the worker thread. 1774 */ 1775 vqp->vq_worker = thread_create(NULL, 0, 1776 vsw_queue_worker, ringp, 0, &p0, 1777 TS_RUN, minclsyspri); 1778 if (vqp->vq_worker == NULL) { 1779 vsw_queue_destroy(vqp); 1780 vsw_mac_ring_tbl_entry_init(vswp, ringp); 1781 ringp = NULL; 1782 } 1783 1784 if (ringp != NULL) { 1785 /* 1786 * Make sure thread get's running state for 1787 * this ring. 1788 */ 1789 mutex_enter(&vqp->vq_lock); 1790 while ((vqp->vq_state != VSW_QUEUE_RUNNING) && 1791 (vqp->vq_state != VSW_QUEUE_DRAINED)) { 1792 cv_wait(&vqp->vq_cv, &vqp->vq_lock); 1793 } 1794 1795 /* 1796 * If the thread is not running, cleanup. 1797 */ 1798 if (vqp->vq_state == VSW_QUEUE_DRAINED) { 1799 vsw_queue_destroy(vqp); 1800 vsw_mac_ring_tbl_entry_init(vswp, 1801 ringp); 1802 ringp = NULL; 1803 } 1804 mutex_exit(&vqp->vq_lock); 1805 } 1806 1807 mutex_exit(&vswp->mac_ring_lock); 1808 D1(vswp, "%s: exit", __func__); 1809 return ((mac_resource_handle_t)ringp); 1810 } 1811 } 1812 mutex_exit(&vswp->mac_ring_lock); 1813 1814 /* 1815 * No slots in the ring table available. 1816 */ 1817 D1(vswp, "%s: exit", __func__); 1818 return (NULL); 1819 } 1820 1821 static void 1822 vsw_queue_stop(vsw_queue_t *vqp) 1823 { 1824 mutex_enter(&vqp->vq_lock); 1825 1826 if (vqp->vq_state == VSW_QUEUE_RUNNING) { 1827 vqp->vq_state = VSW_QUEUE_STOP; 1828 cv_signal(&vqp->vq_cv); 1829 1830 while (vqp->vq_state != VSW_QUEUE_DRAINED) 1831 cv_wait(&vqp->vq_cv, &vqp->vq_lock); 1832 } 1833 1834 vqp->vq_state = VSW_QUEUE_STOPPED; 1835 1836 mutex_exit(&vqp->vq_lock); 1837 } 1838 1839 static vsw_queue_t * 1840 vsw_queue_create() 1841 { 1842 vsw_queue_t *vqp; 1843 1844 vqp = kmem_zalloc(sizeof (vsw_queue_t), KM_SLEEP); 1845 1846 mutex_init(&vqp->vq_lock, NULL, MUTEX_DRIVER, NULL); 1847 cv_init(&vqp->vq_cv, NULL, CV_DRIVER, NULL); 1848 vqp->vq_first = NULL; 1849 vqp->vq_last = NULL; 1850 vqp->vq_state = VSW_QUEUE_STOPPED; 1851 1852 return (vqp); 1853 } 1854 1855 static void 1856 vsw_queue_destroy(vsw_queue_t *vqp) 1857 { 1858 cv_destroy(&vqp->vq_cv); 1859 mutex_destroy(&vqp->vq_lock); 1860 kmem_free(vqp, sizeof (vsw_queue_t)); 1861 } 1862 1863 static void 1864 vsw_queue_worker(vsw_mac_ring_t *rrp) 1865 { 1866 mblk_t *mp; 1867 vsw_queue_t *vqp = rrp->ring_vqp; 1868 vsw_t *vswp = rrp->ring_vswp; 1869 1870 mutex_enter(&vqp->vq_lock); 1871 1872 ASSERT(vqp->vq_state == VSW_QUEUE_STOPPED); 1873 1874 /* 1875 * Set the state to running, since the thread is now active. 1876 */ 1877 vqp->vq_state = VSW_QUEUE_RUNNING; 1878 cv_signal(&vqp->vq_cv); 1879 1880 while (vqp->vq_state == VSW_QUEUE_RUNNING) { 1881 /* 1882 * Wait for work to do or the state has changed 1883 * to not running. 1884 */ 1885 while ((vqp->vq_state == VSW_QUEUE_RUNNING) && 1886 (vqp->vq_first == NULL)) { 1887 cv_wait(&vqp->vq_cv, &vqp->vq_lock); 1888 } 1889 1890 /* 1891 * Process packets that we received from the interface. 1892 */ 1893 if (vqp->vq_first != NULL) { 1894 mp = vqp->vq_first; 1895 1896 vqp->vq_first = NULL; 1897 vqp->vq_last = NULL; 1898 1899 mutex_exit(&vqp->vq_lock); 1900 1901 /* switch the chain of packets received */ 1902 vswp->vsw_switch_frame(vswp, mp, 1903 VSW_PHYSDEV, NULL, NULL); 1904 1905 mutex_enter(&vqp->vq_lock); 1906 } 1907 } 1908 1909 /* 1910 * We are drained and signal we are done. 1911 */ 1912 vqp->vq_state = VSW_QUEUE_DRAINED; 1913 cv_signal(&vqp->vq_cv); 1914 1915 /* 1916 * Exit lock and drain the remaining packets. 1917 */ 1918 mutex_exit(&vqp->vq_lock); 1919 1920 /* 1921 * Exit the thread 1922 */ 1923 thread_exit(); 1924 } 1925 1926 /* 1927 * static void 1928 * vsw_rx_queue_cb() - Receive callback routine when 1929 * vsw_multi_ring_enable is non-zero. Queue the packets 1930 * to a packet queue for a worker thread to process. 1931 */ 1932 static void 1933 vsw_rx_queue_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp) 1934 { 1935 vsw_mac_ring_t *ringp = (vsw_mac_ring_t *)mrh; 1936 vsw_t *vswp = (vsw_t *)arg; 1937 vsw_queue_t *vqp; 1938 mblk_t *bp, *last; 1939 1940 ASSERT(mrh != NULL); 1941 ASSERT(vswp != NULL); 1942 ASSERT(mp != NULL); 1943 1944 D1(vswp, "%s: enter", __func__); 1945 1946 /* 1947 * Find the last element in the mblk chain. 1948 */ 1949 bp = mp; 1950 do { 1951 last = bp; 1952 bp = bp->b_next; 1953 } while (bp != NULL); 1954 1955 /* Get the queue for the packets */ 1956 vqp = ringp->ring_vqp; 1957 1958 /* 1959 * Grab the lock such we can queue the packets. 1960 */ 1961 mutex_enter(&vqp->vq_lock); 1962 1963 if (vqp->vq_state != VSW_QUEUE_RUNNING) { 1964 freemsg(mp); 1965 mutex_exit(&vqp->vq_lock); 1966 goto vsw_rx_queue_cb_exit; 1967 } 1968 1969 /* 1970 * Add the mblk chain to the queue. If there 1971 * is some mblks in the queue, then add the new 1972 * chain to the end. 1973 */ 1974 if (vqp->vq_first == NULL) 1975 vqp->vq_first = mp; 1976 else 1977 vqp->vq_last->b_next = mp; 1978 1979 vqp->vq_last = last; 1980 1981 /* 1982 * Signal the worker thread that there is work to 1983 * do. 1984 */ 1985 cv_signal(&vqp->vq_cv); 1986 1987 /* 1988 * Let go of the lock and exit. 1989 */ 1990 mutex_exit(&vqp->vq_lock); 1991 1992 vsw_rx_queue_cb_exit: 1993 D1(vswp, "%s: exit", __func__); 1994 } 1995 1996 /* 1997 * receive callback routine. Invoked by MAC layer when there 1998 * are pkts being passed up from physical device. 1999 * 2000 * PERF: It may be more efficient when the card is in promisc 2001 * mode to check the dest address of the pkts here (against 2002 * the FDB) rather than checking later. Needs to be investigated. 2003 */ 2004 static void 2005 vsw_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp) 2006 { 2007 _NOTE(ARGUNUSED(mrh)) 2008 2009 vsw_t *vswp = (vsw_t *)arg; 2010 2011 ASSERT(vswp != NULL); 2012 2013 D1(vswp, "vsw_rx_cb: enter"); 2014 2015 /* switch the chain of packets received */ 2016 vswp->vsw_switch_frame(vswp, mp, VSW_PHYSDEV, NULL, NULL); 2017 2018 D1(vswp, "vsw_rx_cb: exit"); 2019 } 2020 2021 /* 2022 * Send a message out over the physical device via the MAC layer. 2023 * 2024 * Returns any mblks that it was unable to transmit. 2025 */ 2026 static mblk_t * 2027 vsw_tx_msg(vsw_t *vswp, mblk_t *mp) 2028 { 2029 const mac_txinfo_t *mtp; 2030 mblk_t *nextp; 2031 2032 mutex_enter(&vswp->mac_lock); 2033 if (vswp->mh == NULL) { 2034 DERR(vswp, "vsw_tx_msg: dropping pkts: no tx routine avail"); 2035 mutex_exit(&vswp->mac_lock); 2036 return (mp); 2037 } else { 2038 for (;;) { 2039 nextp = mp->b_next; 2040 mp->b_next = NULL; 2041 2042 mtp = vswp->txinfo; 2043 2044 if ((mp = mtp->mt_fn(mtp->mt_arg, mp)) != NULL) { 2045 mp->b_next = nextp; 2046 break; 2047 } 2048 2049 if ((mp = nextp) == NULL) 2050 break; 2051 } 2052 } 2053 mutex_exit(&vswp->mac_lock); 2054 2055 return (mp); 2056 } 2057 2058 /* 2059 * Register with the MAC layer as a network device, so we 2060 * can be plumbed if necessary. 2061 */ 2062 static int 2063 vsw_mac_register(vsw_t *vswp) 2064 { 2065 mac_register_t *macp; 2066 int rv; 2067 2068 D1(vswp, "%s: enter", __func__); 2069 2070 if ((macp = mac_alloc(MAC_VERSION)) == NULL) 2071 return (EINVAL); 2072 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 2073 macp->m_driver = vswp; 2074 macp->m_dip = vswp->dip; 2075 macp->m_src_addr = (uint8_t *)&vswp->if_addr; 2076 macp->m_callbacks = &vsw_m_callbacks; 2077 macp->m_min_sdu = 0; 2078 macp->m_max_sdu = ETHERMTU; 2079 rv = mac_register(macp, &vswp->if_mh); 2080 mac_free(macp); 2081 if (rv == 0) 2082 vswp->if_state |= VSW_IF_REG; 2083 2084 D1(vswp, "%s: exit", __func__); 2085 2086 return (rv); 2087 } 2088 2089 static int 2090 vsw_mac_unregister(vsw_t *vswp) 2091 { 2092 int rv = 0; 2093 2094 D1(vswp, "%s: enter", __func__); 2095 2096 WRITE_ENTER(&vswp->if_lockrw); 2097 2098 if (vswp->if_state & VSW_IF_REG) { 2099 rv = mac_unregister(vswp->if_mh); 2100 if (rv != 0) { 2101 DWARN(vswp, "%s: unable to unregister from MAC " 2102 "framework", __func__); 2103 2104 RW_EXIT(&vswp->if_lockrw); 2105 D1(vswp, "%s: fail exit", __func__); 2106 return (rv); 2107 } 2108 2109 /* mark i/f as down and unregistered */ 2110 vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG); 2111 } 2112 RW_EXIT(&vswp->if_lockrw); 2113 2114 D1(vswp, "%s: exit", __func__); 2115 2116 return (rv); 2117 } 2118 2119 static int 2120 vsw_m_stat(void *arg, uint_t stat, uint64_t *val) 2121 { 2122 vsw_t *vswp = (vsw_t *)arg; 2123 2124 D1(vswp, "%s: enter", __func__); 2125 2126 mutex_enter(&vswp->mac_lock); 2127 if (vswp->mh == NULL) { 2128 mutex_exit(&vswp->mac_lock); 2129 return (EINVAL); 2130 } 2131 2132 /* return stats from underlying device */ 2133 *val = mac_stat_get(vswp->mh, stat); 2134 2135 mutex_exit(&vswp->mac_lock); 2136 2137 return (0); 2138 } 2139 2140 static void 2141 vsw_m_stop(void *arg) 2142 { 2143 vsw_t *vswp = (vsw_t *)arg; 2144 2145 D1(vswp, "%s: enter", __func__); 2146 2147 WRITE_ENTER(&vswp->if_lockrw); 2148 vswp->if_state &= ~VSW_IF_UP; 2149 RW_EXIT(&vswp->if_lockrw); 2150 2151 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 2152 } 2153 2154 static int 2155 vsw_m_start(void *arg) 2156 { 2157 vsw_t *vswp = (vsw_t *)arg; 2158 2159 D1(vswp, "%s: enter", __func__); 2160 2161 WRITE_ENTER(&vswp->if_lockrw); 2162 vswp->if_state |= VSW_IF_UP; 2163 RW_EXIT(&vswp->if_lockrw); 2164 2165 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 2166 return (0); 2167 } 2168 2169 /* 2170 * Change the local interface address. 2171 */ 2172 static int 2173 vsw_m_unicst(void *arg, const uint8_t *macaddr) 2174 { 2175 vsw_t *vswp = (vsw_t *)arg; 2176 2177 D1(vswp, "%s: enter", __func__); 2178 2179 WRITE_ENTER(&vswp->if_lockrw); 2180 ether_copy(macaddr, &vswp->if_addr); 2181 RW_EXIT(&vswp->if_lockrw); 2182 2183 D1(vswp, "%s: exit", __func__); 2184 2185 return (0); 2186 } 2187 2188 static int 2189 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca) 2190 { 2191 vsw_t *vswp = (vsw_t *)arg; 2192 mcst_addr_t *mcst_p = NULL; 2193 uint64_t addr = 0x0; 2194 int i, ret = 0; 2195 2196 D1(vswp, "%s: enter", __func__); 2197 2198 /* 2199 * Convert address into form that can be used 2200 * as hash table key. 2201 */ 2202 for (i = 0; i < ETHERADDRL; i++) { 2203 addr = (addr << 8) | mca[i]; 2204 } 2205 2206 D2(vswp, "%s: addr = 0x%llx", __func__, addr); 2207 2208 if (add) { 2209 D2(vswp, "%s: adding multicast", __func__); 2210 if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 2211 /* 2212 * Update the list of multicast addresses 2213 * contained within the vsw_t structure to 2214 * include this new one. 2215 */ 2216 mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP); 2217 if (mcst_p == NULL) { 2218 DERR(vswp, "%s unable to alloc mem", __func__); 2219 return (1); 2220 } 2221 mcst_p->addr = addr; 2222 2223 mutex_enter(&vswp->mca_lock); 2224 mcst_p->nextp = vswp->mcap; 2225 vswp->mcap = mcst_p; 2226 mutex_exit(&vswp->mca_lock); 2227 2228 /* 2229 * Call into the underlying driver to program the 2230 * address into HW. 2231 */ 2232 mutex_enter(&vswp->mac_lock); 2233 if (vswp->mh != NULL) { 2234 ret = mac_multicst_add(vswp->mh, mca); 2235 if (ret != 0) { 2236 cmn_err(CE_WARN, "!vsw%d: unable to " 2237 "add multicast address", 2238 vswp->instance); 2239 mutex_exit(&vswp->mac_lock); 2240 goto vsw_remove_addr; 2241 } 2242 } 2243 mutex_exit(&vswp->mac_lock); 2244 } else { 2245 cmn_err(CE_WARN, "!vsw%d: unable to add multicast " 2246 "address", vswp->instance); 2247 } 2248 return (ret); 2249 } 2250 2251 vsw_remove_addr: 2252 2253 D2(vswp, "%s: removing multicast", __func__); 2254 /* 2255 * Remove the address from the hash table.. 2256 */ 2257 if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 2258 2259 /* 2260 * ..and then from the list maintained in the 2261 * vsw_t structure. 2262 */ 2263 vsw_del_addr(VSW_LOCALDEV, vswp, addr); 2264 2265 mutex_enter(&vswp->mac_lock); 2266 if (vswp->mh != NULL) 2267 (void) mac_multicst_remove(vswp->mh, mca); 2268 mutex_exit(&vswp->mac_lock); 2269 } 2270 2271 D1(vswp, "%s: exit", __func__); 2272 2273 return (0); 2274 } 2275 2276 static int 2277 vsw_m_promisc(void *arg, boolean_t on) 2278 { 2279 vsw_t *vswp = (vsw_t *)arg; 2280 2281 D1(vswp, "%s: enter", __func__); 2282 2283 WRITE_ENTER(&vswp->if_lockrw); 2284 if (on) 2285 vswp->if_state |= VSW_IF_PROMISC; 2286 else 2287 vswp->if_state &= ~VSW_IF_PROMISC; 2288 RW_EXIT(&vswp->if_lockrw); 2289 2290 D1(vswp, "%s: exit", __func__); 2291 2292 return (0); 2293 } 2294 2295 static mblk_t * 2296 vsw_m_tx(void *arg, mblk_t *mp) 2297 { 2298 vsw_t *vswp = (vsw_t *)arg; 2299 2300 D1(vswp, "%s: enter", __func__); 2301 2302 vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL); 2303 2304 D1(vswp, "%s: exit", __func__); 2305 2306 return (NULL); 2307 } 2308 2309 /* 2310 * Register for machine description (MD) updates. 2311 * 2312 * Returns 0 on success, 1 on failure. 2313 */ 2314 static int 2315 vsw_mdeg_register(vsw_t *vswp) 2316 { 2317 mdeg_prop_spec_t *pspecp; 2318 mdeg_node_spec_t *inst_specp; 2319 mdeg_handle_t mdeg_hdl, mdeg_port_hdl; 2320 size_t templatesz; 2321 int inst, rv; 2322 2323 D1(vswp, "%s: enter", __func__); 2324 2325 /* 2326 * In each 'virtual-device' node in the MD there is a 2327 * 'cfg-handle' property which is the MD's concept of 2328 * an instance number (this may be completely different from 2329 * the device drivers instance #). OBP reads that value and 2330 * stores it in the 'reg' property of the appropriate node in 2331 * the device tree. So we use the 'reg' value when registering 2332 * with the mdeg framework, to ensure we get events for the 2333 * correct nodes. 2334 */ 2335 inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip, 2336 DDI_PROP_DONTPASS, reg_propname, -1); 2337 if (inst == -1) { 2338 cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from " 2339 "OBP device tree", vswp->instance, reg_propname); 2340 return (1); 2341 } 2342 2343 D2(vswp, "%s: instance %d registering with mdeg", __func__, inst); 2344 2345 /* 2346 * Allocate and initialize a per-instance copy 2347 * of the global property spec array that will 2348 * uniquely identify this vsw instance. 2349 */ 2350 templatesz = sizeof (vsw_prop_template); 2351 pspecp = kmem_zalloc(templatesz, KM_SLEEP); 2352 2353 bcopy(vsw_prop_template, pspecp, templatesz); 2354 2355 VSW_SET_MDEG_PROP_INST(pspecp, inst); 2356 2357 /* initialize the complete prop spec structure */ 2358 inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP); 2359 inst_specp->namep = "virtual-device"; 2360 inst_specp->specp = pspecp; 2361 2362 /* 2363 * Register an interest in 'virtual-device' nodes with a 2364 * 'name' property of 'virtual-network-switch' 2365 */ 2366 rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb, 2367 (void *)vswp, &mdeg_hdl); 2368 if (rv != MDEG_SUCCESS) { 2369 DERR(vswp, "%s: mdeg_register failed (%d) for vsw node", 2370 __func__, rv); 2371 goto mdeg_reg_fail; 2372 } 2373 2374 /* 2375 * Register an interest in 'vsw-port' nodes. 2376 */ 2377 rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb, 2378 (void *)vswp, &mdeg_port_hdl); 2379 if (rv != MDEG_SUCCESS) { 2380 DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv); 2381 (void) mdeg_unregister(mdeg_hdl); 2382 goto mdeg_reg_fail; 2383 } 2384 2385 /* save off data that will be needed later */ 2386 vswp->inst_spec = inst_specp; 2387 vswp->mdeg_hdl = mdeg_hdl; 2388 vswp->mdeg_port_hdl = mdeg_port_hdl; 2389 2390 D1(vswp, "%s: exit", __func__); 2391 return (0); 2392 2393 mdeg_reg_fail: 2394 cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks", 2395 vswp->instance); 2396 kmem_free(pspecp, templatesz); 2397 kmem_free(inst_specp, sizeof (mdeg_node_spec_t)); 2398 2399 vswp->mdeg_hdl = NULL; 2400 vswp->mdeg_port_hdl = NULL; 2401 2402 return (1); 2403 } 2404 2405 static void 2406 vsw_mdeg_unregister(vsw_t *vswp) 2407 { 2408 D1(vswp, "vsw_mdeg_unregister: enter"); 2409 2410 if (vswp->mdeg_hdl != NULL) 2411 (void) mdeg_unregister(vswp->mdeg_hdl); 2412 2413 if (vswp->mdeg_port_hdl != NULL) 2414 (void) mdeg_unregister(vswp->mdeg_port_hdl); 2415 2416 if (vswp->inst_spec != NULL) { 2417 if (vswp->inst_spec->specp != NULL) { 2418 (void) kmem_free(vswp->inst_spec->specp, 2419 sizeof (vsw_prop_template)); 2420 vswp->inst_spec->specp = NULL; 2421 } 2422 2423 (void) kmem_free(vswp->inst_spec, 2424 sizeof (mdeg_node_spec_t)); 2425 vswp->inst_spec = NULL; 2426 } 2427 2428 D1(vswp, "vsw_mdeg_unregister: exit"); 2429 } 2430 2431 /* 2432 * Mdeg callback invoked for the vsw node itself. 2433 */ 2434 static int 2435 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 2436 { 2437 vsw_t *vswp; 2438 int idx; 2439 md_t *mdp; 2440 mde_cookie_t node; 2441 uint64_t inst; 2442 char *node_name = NULL; 2443 2444 if (resp == NULL) 2445 return (MDEG_FAILURE); 2446 2447 vswp = (vsw_t *)cb_argp; 2448 2449 D1(vswp, "%s: added %d : removed %d : curr matched %d" 2450 " : prev matched %d", __func__, resp->added.nelem, 2451 resp->removed.nelem, resp->match_curr.nelem, 2452 resp->match_prev.nelem); 2453 2454 /* 2455 * Expect 'added' to be non-zero if virtual-network-switch 2456 * nodes exist in the MD when the driver attaches. 2457 */ 2458 for (idx = 0; idx < resp->added.nelem; idx++) { 2459 mdp = resp->added.mdp; 2460 node = resp->added.mdep[idx]; 2461 2462 if (md_get_prop_str(mdp, node, "name", &node_name) != 0) { 2463 DERR(vswp, "%s: unable to get node name for " 2464 "node(%d) 0x%lx", __func__, idx, node); 2465 continue; 2466 } 2467 2468 if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) { 2469 DERR(vswp, "%s: prop(cfg-handle) not found port(%d)", 2470 __func__, idx); 2471 continue; 2472 } 2473 2474 D2(vswp, "%s: added node(%d) 0x%lx with name %s " 2475 "and inst %d", __func__, idx, node, node_name, inst); 2476 2477 vsw_get_initial_md_properties(vswp, mdp, node); 2478 } 2479 2480 /* 2481 * A non-zero 'match' value indicates that the MD has been 2482 * updated and that a virtual-network-switch node is present 2483 * which may or may not have been updated. It is up to the clients 2484 * to examine their own nodes and determine if they have changed. 2485 */ 2486 for (idx = 0; idx < resp->match_curr.nelem; idx++) { 2487 mdp = resp->match_curr.mdp; 2488 node = resp->match_curr.mdep[idx]; 2489 2490 if (md_get_prop_str(mdp, node, "name", &node_name) != 0) { 2491 DERR(vswp, "%s: unable to get node name for " 2492 "node(%d) 0x%lx", __func__, idx, node); 2493 continue; 2494 } 2495 2496 if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) { 2497 DERR(vswp, "%s: prop(cfg-handle) not found port(%d)", 2498 __func__, idx); 2499 continue; 2500 } 2501 2502 D2(vswp, "%s: changed node(%d) 0x%lx with name %s " 2503 "and inst %d", __func__, idx, node, node_name, inst); 2504 2505 vsw_update_md_prop(vswp, mdp, node); 2506 } 2507 2508 return (MDEG_SUCCESS); 2509 } 2510 2511 /* 2512 * Mdeg callback invoked for changes to the vsw-port nodes 2513 * under the vsw node. 2514 */ 2515 static int 2516 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 2517 { 2518 vsw_t *vswp; 2519 int idx; 2520 md_t *mdp; 2521 mde_cookie_t node; 2522 uint64_t inst; 2523 2524 if ((resp == NULL) || (cb_argp == NULL)) 2525 return (MDEG_FAILURE); 2526 2527 vswp = (vsw_t *)cb_argp; 2528 2529 D2(vswp, "%s: added %d : removed %d : curr matched %d" 2530 " : prev matched %d", __func__, resp->added.nelem, 2531 resp->removed.nelem, resp->match_curr.nelem, 2532 resp->match_prev.nelem); 2533 2534 /* process added ports */ 2535 for (idx = 0; idx < resp->added.nelem; idx++) { 2536 mdp = resp->added.mdp; 2537 node = resp->added.mdep[idx]; 2538 2539 D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node); 2540 2541 if (vsw_port_add(vswp, mdp, &node) != 0) { 2542 cmn_err(CE_WARN, "!vsw%d: Unable to add new port " 2543 "(0x%lx)", vswp->instance, node); 2544 } 2545 } 2546 2547 /* process removed ports */ 2548 for (idx = 0; idx < resp->removed.nelem; idx++) { 2549 mdp = resp->removed.mdp; 2550 node = resp->removed.mdep[idx]; 2551 2552 if (md_get_prop_val(mdp, node, id_propname, &inst)) { 2553 DERR(vswp, "%s: prop(%s) not found in port(%d)", 2554 __func__, id_propname, idx); 2555 continue; 2556 } 2557 2558 D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node); 2559 2560 if (vsw_port_detach(vswp, inst) != 0) { 2561 cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld", 2562 vswp->instance, inst); 2563 } 2564 } 2565 2566 /* 2567 * Currently no support for updating already active ports. 2568 * So, ignore the match_curr and match_priv arrays for now. 2569 */ 2570 2571 D1(vswp, "%s: exit", __func__); 2572 2573 return (MDEG_SUCCESS); 2574 } 2575 2576 /* 2577 * Read the initial start-of-day values from the specified MD node. 2578 */ 2579 static void 2580 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 2581 { 2582 int i; 2583 uint64_t macaddr = 0; 2584 2585 D1(vswp, "%s: enter", __func__); 2586 2587 if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) == 0) { 2588 /* 2589 * Note it is valid for the physname property to 2590 * be NULL so check actual name length to determine 2591 * if we have a actual device name. 2592 */ 2593 if (strlen(vswp->physname) > 0) 2594 vswp->mdprops |= VSW_MD_PHYSNAME; 2595 } else { 2596 cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical " 2597 "device from MD", vswp->instance); 2598 return; 2599 } 2600 2601 /* mac address for vswitch device itself */ 2602 if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) { 2603 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD", 2604 vswp->instance); 2605 2606 /* 2607 * Fallback to using the mac address of the physical 2608 * device. 2609 */ 2610 if (vsw_get_physaddr(vswp) == 0) { 2611 cmn_err(CE_NOTE, "!vsw%d: Using MAC address from " 2612 "physical device (%s)", vswp->instance, 2613 vswp->physname); 2614 } else { 2615 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address" 2616 "from device %s", vswp->instance, 2617 vswp->physname); 2618 } 2619 } else { 2620 WRITE_ENTER(&vswp->if_lockrw); 2621 for (i = ETHERADDRL - 1; i >= 0; i--) { 2622 vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF; 2623 macaddr >>= 8; 2624 } 2625 RW_EXIT(&vswp->if_lockrw); 2626 vswp->mdprops |= VSW_MD_MACADDR; 2627 } 2628 2629 if (vsw_get_md_smodes(vswp, mdp, node, 2630 vswp->smode, &vswp->smode_num)) { 2631 cmn_err(CE_WARN, "vsw%d: Unable to read %s property from " 2632 "MD, defaulting to programmed mode", vswp->instance, 2633 smode_propname); 2634 2635 for (i = 0; i < NUM_SMODES; i++) 2636 vswp->smode[i] = VSW_LAYER2; 2637 2638 vswp->smode_num = NUM_SMODES; 2639 } else { 2640 ASSERT(vswp->smode_num != 0); 2641 vswp->mdprops |= VSW_MD_SMODE; 2642 } 2643 2644 /* 2645 * Unable to setup any switching mode, nothing more 2646 * we can do. 2647 */ 2648 if (vsw_setup_switching(vswp)) 2649 return; 2650 2651 WRITE_ENTER(&vswp->if_lockrw); 2652 vswp->if_state &= ~VSW_IF_UP; 2653 RW_EXIT(&vswp->if_lockrw); 2654 if (vswp->mdprops & (VSW_MD_MACADDR | VSW_DEV_MACADDR)) { 2655 if (vsw_mac_register(vswp) != 0) { 2656 /* 2657 * Treat this as a non-fatal error as we may be 2658 * able to operate in some other mode. 2659 */ 2660 cmn_err(CE_WARN, "vsw%d: Unable to register as " 2661 "provider with MAC layer", vswp->instance); 2662 } 2663 } 2664 2665 D1(vswp, "%s: exit", __func__); 2666 } 2667 2668 /* 2669 * Check to see if the relevant properties in the specified node have 2670 * changed, and if so take the appropriate action. 2671 * 2672 * If any of the properties are missing or invalid we don't take 2673 * any action, as this function should only be invoked when modifications 2674 * have been made to what we assume is a working configuration, which 2675 * we leave active. 2676 * 2677 * Note it is legal for this routine to be invoked even if none of the 2678 * properties in the port node within the MD have actually changed. 2679 */ 2680 static void 2681 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 2682 { 2683 char physname[LIFNAMSIZ]; 2684 char drv[LIFNAMSIZ]; 2685 uint_t ddi_instance; 2686 uint8_t new_smode[NUM_SMODES]; 2687 int i, smode_num = 0; 2688 uint64_t macaddr = 0; 2689 vsw_port_list_t *plist = &vswp->plist; 2690 vsw_port_t *port = NULL; 2691 enum {MD_init = 0x1, 2692 MD_physname = 0x2, 2693 MD_macaddr = 0x4, 2694 MD_smode = 0x8} updated; 2695 2696 updated = MD_init; 2697 2698 D1(vswp, "%s: enter", __func__); 2699 2700 /* 2701 * Check if name of physical device in MD has changed. 2702 */ 2703 if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) { 2704 /* 2705 * Do basic sanity check on new device name/instance, 2706 * if its non NULL. It is valid for the device name to 2707 * have changed from a non NULL to a NULL value, i.e. 2708 * the vsw is being changed to 'routed' mode. 2709 */ 2710 if ((strlen(physname) != 0) && 2711 (ddi_parse(physname, drv, 2712 &ddi_instance) != DDI_SUCCESS)) { 2713 cmn_err(CE_WARN, "!vsw%d: new device name %s is not" 2714 " a valid device name/instance", 2715 vswp->instance, physname); 2716 goto fail_reconf; 2717 } 2718 2719 if (strcmp(physname, vswp->physname)) { 2720 D2(vswp, "%s: device name changed from %s to %s", 2721 __func__, vswp->physname, physname); 2722 2723 updated |= MD_physname; 2724 } else { 2725 D2(vswp, "%s: device name unchanged at %s", 2726 __func__, vswp->physname); 2727 } 2728 } else { 2729 cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical " 2730 "device from updated MD.", vswp->instance); 2731 goto fail_reconf; 2732 } 2733 2734 /* 2735 * Check if MAC address has changed. 2736 */ 2737 if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) { 2738 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD", 2739 vswp->instance); 2740 goto fail_reconf; 2741 } else { 2742 READ_ENTER(&vswp->if_lockrw); 2743 for (i = ETHERADDRL - 1; i >= 0; i--) { 2744 if (vswp->if_addr.ether_addr_octet[i] 2745 != (macaddr & 0xFF)) { 2746 D2(vswp, "%s: octet[%d] 0x%x != 0x%x", 2747 __func__, i, 2748 vswp->if_addr.ether_addr_octet[i], 2749 (macaddr & 0xFF)); 2750 updated |= MD_macaddr; 2751 break; 2752 } 2753 macaddr >>= 8; 2754 } 2755 RW_EXIT(&vswp->if_lockrw); 2756 } 2757 2758 /* 2759 * Check if switching modes have changed. 2760 */ 2761 if (vsw_get_md_smodes(vswp, mdp, node, 2762 new_smode, &smode_num)) { 2763 cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD", 2764 vswp->instance, smode_propname); 2765 goto fail_reconf; 2766 } else { 2767 ASSERT(smode_num != 0); 2768 if (smode_num != vswp->smode_num) { 2769 D2(vswp, "%s: number of modes changed from %d to %d", 2770 __func__, vswp->smode_num, smode_num); 2771 } 2772 2773 for (i = 0; i < smode_num; i++) { 2774 if (new_smode[i] != vswp->smode[i]) { 2775 D2(vswp, "%s: mode changed from %d to %d", 2776 __func__, vswp->smode[i], new_smode[i]); 2777 updated |= MD_smode; 2778 break; 2779 } 2780 } 2781 } 2782 2783 /* 2784 * Now make any changes which are needed... 2785 */ 2786 2787 if (updated & (MD_physname | MD_smode)) { 2788 /* 2789 * Disconnect all ports from the current card 2790 */ 2791 WRITE_ENTER(&plist->lockrw); 2792 for (port = plist->head; port != NULL; port = port->p_next) { 2793 /* Remove address if was programmed into HW. */ 2794 if (vsw_unset_hw(vswp, port)) { 2795 RW_EXIT(&plist->lockrw); 2796 goto fail_update; 2797 } 2798 } 2799 RW_EXIT(&plist->lockrw); 2800 2801 /* 2802 * Stop, detach the old device.. 2803 */ 2804 vsw_mac_detach(vswp); 2805 2806 /* 2807 * Update phys name. 2808 */ 2809 if (updated & MD_physname) { 2810 cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s", 2811 vswp->instance, vswp->physname, physname); 2812 (void) strncpy(vswp->physname, 2813 physname, strlen(physname) + 1); 2814 2815 if (strlen(vswp->physname) > 0) 2816 vswp->mdprops |= VSW_MD_PHYSNAME; 2817 } 2818 2819 /* 2820 * Update array with the new switch mode values. 2821 */ 2822 if (updated & MD_smode) { 2823 for (i = 0; i < smode_num; i++) 2824 vswp->smode[i] = new_smode[i]; 2825 2826 vswp->smode_num = smode_num; 2827 vswp->smode_idx = 0; 2828 } 2829 2830 /* 2831 * ..and attach, start the new device. 2832 */ 2833 if (vsw_setup_switching(vswp)) 2834 goto fail_update; 2835 2836 /* 2837 * Connect ports to new card. 2838 */ 2839 WRITE_ENTER(&plist->lockrw); 2840 for (port = plist->head; port != NULL; port = port->p_next) { 2841 if (vsw_set_hw(vswp, port)) { 2842 RW_EXIT(&plist->lockrw); 2843 goto fail_update; 2844 } 2845 } 2846 RW_EXIT(&plist->lockrw); 2847 } 2848 2849 if (updated & MD_macaddr) { 2850 cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx", 2851 vswp->instance, macaddr); 2852 2853 WRITE_ENTER(&vswp->if_lockrw); 2854 for (i = ETHERADDRL - 1; i >= 0; i--) { 2855 vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF; 2856 macaddr >>= 8; 2857 } 2858 RW_EXIT(&vswp->if_lockrw); 2859 2860 /* 2861 * Notify the MAC layer of the changed address. 2862 */ 2863 mac_unicst_update(vswp->if_mh, (uint8_t *)&vswp->if_addr); 2864 } 2865 2866 return; 2867 2868 fail_reconf: 2869 cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance); 2870 return; 2871 2872 fail_update: 2873 cmn_err(CE_WARN, "!vsw%d: update of configuration failed", 2874 vswp->instance); 2875 } 2876 2877 /* 2878 * Add a new port to the system. 2879 * 2880 * Returns 0 on success, 1 on failure. 2881 */ 2882 int 2883 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node) 2884 { 2885 uint64_t ldc_id; 2886 uint8_t *addrp; 2887 int i, addrsz; 2888 int num_nodes = 0, nchan = 0; 2889 int listsz = 0; 2890 mde_cookie_t *listp = NULL; 2891 struct ether_addr ea; 2892 uint64_t macaddr; 2893 uint64_t inst = 0; 2894 vsw_port_t *port; 2895 2896 if (md_get_prop_val(mdp, *node, id_propname, &inst)) { 2897 DWARN(vswp, "%s: prop(%s) not found", __func__, 2898 id_propname); 2899 return (1); 2900 } 2901 2902 /* 2903 * Find the channel endpoint node(s) (which should be under this 2904 * port node) which contain the channel id(s). 2905 */ 2906 if ((num_nodes = md_node_count(mdp)) <= 0) { 2907 DERR(vswp, "%s: invalid number of nodes found (%d)", 2908 __func__, num_nodes); 2909 return (1); 2910 } 2911 2912 D2(vswp, "%s: %d nodes found", __func__, num_nodes); 2913 2914 /* allocate enough space for node list */ 2915 listsz = num_nodes * sizeof (mde_cookie_t); 2916 listp = kmem_zalloc(listsz, KM_SLEEP); 2917 2918 nchan = md_scan_dag(mdp, *node, 2919 md_find_name(mdp, chan_propname), 2920 md_find_name(mdp, "fwd"), listp); 2921 2922 if (nchan <= 0) { 2923 DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname); 2924 kmem_free(listp, listsz); 2925 return (1); 2926 } 2927 2928 D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname); 2929 2930 /* use property from first node found */ 2931 if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) { 2932 DWARN(vswp, "%s: prop(%s) not found\n", __func__, 2933 id_propname); 2934 kmem_free(listp, listsz); 2935 return (1); 2936 } 2937 2938 /* don't need list any more */ 2939 kmem_free(listp, listsz); 2940 2941 D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id); 2942 2943 /* read mac-address property */ 2944 if (md_get_prop_data(mdp, *node, remaddr_propname, 2945 &addrp, &addrsz)) { 2946 DWARN(vswp, "%s: prop(%s) not found", 2947 __func__, remaddr_propname); 2948 return (1); 2949 } 2950 2951 if (addrsz < ETHERADDRL) { 2952 DWARN(vswp, "%s: invalid address size", __func__); 2953 return (1); 2954 } 2955 2956 macaddr = *((uint64_t *)addrp); 2957 D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr); 2958 2959 for (i = ETHERADDRL - 1; i >= 0; i--) { 2960 ea.ether_addr_octet[i] = macaddr & 0xFF; 2961 macaddr >>= 8; 2962 } 2963 2964 if (vsw_port_attach(vswp, (int)inst, &ldc_id, 1, &ea) != 0) { 2965 DERR(vswp, "%s: failed to attach port", __func__); 2966 return (1); 2967 } 2968 2969 port = vsw_lookup_port(vswp, (int)inst); 2970 2971 /* just successfuly created the port, so it should exist */ 2972 ASSERT(port != NULL); 2973 2974 return (0); 2975 } 2976 2977 /* 2978 * Attach the specified port. 2979 * 2980 * Returns 0 on success, 1 on failure. 2981 */ 2982 static int 2983 vsw_port_attach(vsw_t *vswp, int p_instance, uint64_t *ldcids, int nids, 2984 struct ether_addr *macaddr) 2985 { 2986 vsw_port_list_t *plist = &vswp->plist; 2987 vsw_port_t *port, **prev_port; 2988 int i; 2989 2990 D1(vswp, "%s: enter : port %d", __func__, p_instance); 2991 2992 /* port already exists? */ 2993 READ_ENTER(&plist->lockrw); 2994 for (port = plist->head; port != NULL; port = port->p_next) { 2995 if (port->p_instance == p_instance) { 2996 DWARN(vswp, "%s: port instance %d already attached", 2997 __func__, p_instance); 2998 RW_EXIT(&plist->lockrw); 2999 return (1); 3000 } 3001 } 3002 RW_EXIT(&plist->lockrw); 3003 3004 port = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP); 3005 port->p_vswp = vswp; 3006 port->p_instance = p_instance; 3007 port->p_ldclist.num_ldcs = 0; 3008 port->p_ldclist.head = NULL; 3009 port->addr_set = VSW_ADDR_UNSET; 3010 3011 rw_init(&port->p_ldclist.lockrw, NULL, RW_DRIVER, NULL); 3012 3013 mutex_init(&port->tx_lock, NULL, MUTEX_DRIVER, NULL); 3014 mutex_init(&port->mca_lock, NULL, MUTEX_DRIVER, NULL); 3015 3016 mutex_init(&port->ref_lock, NULL, MUTEX_DRIVER, NULL); 3017 cv_init(&port->ref_cv, NULL, CV_DRIVER, NULL); 3018 3019 mutex_init(&port->state_lock, NULL, MUTEX_DRIVER, NULL); 3020 cv_init(&port->state_cv, NULL, CV_DRIVER, NULL); 3021 port->state = VSW_PORT_INIT; 3022 3023 if (nids > VSW_PORT_MAX_LDCS) { 3024 D2(vswp, "%s: using first of %d ldc ids", 3025 __func__, nids); 3026 nids = VSW_PORT_MAX_LDCS; 3027 } 3028 3029 D2(vswp, "%s: %d nids", __func__, nids); 3030 for (i = 0; i < nids; i++) { 3031 D2(vswp, "%s: ldcid (%llx)", __func__, (uint64_t)ldcids[i]); 3032 if (vsw_ldc_attach(port, (uint64_t)ldcids[i]) != 0) { 3033 DERR(vswp, "%s: ldc_attach failed", __func__); 3034 3035 rw_destroy(&port->p_ldclist.lockrw); 3036 3037 cv_destroy(&port->ref_cv); 3038 mutex_destroy(&port->ref_lock); 3039 3040 cv_destroy(&port->state_cv); 3041 mutex_destroy(&port->state_lock); 3042 3043 mutex_destroy(&port->tx_lock); 3044 mutex_destroy(&port->mca_lock); 3045 kmem_free(port, sizeof (vsw_port_t)); 3046 return (1); 3047 } 3048 } 3049 3050 ether_copy(macaddr, &port->p_macaddr); 3051 3052 WRITE_ENTER(&plist->lockrw); 3053 3054 /* create the fdb entry for this port/mac address */ 3055 (void) vsw_add_fdb(vswp, port); 3056 3057 (void) vsw_set_hw(vswp, port); 3058 3059 /* link it into the list of ports for this vsw instance */ 3060 prev_port = (vsw_port_t **)(&plist->head); 3061 port->p_next = *prev_port; 3062 *prev_port = port; 3063 plist->num_ports++; 3064 RW_EXIT(&plist->lockrw); 3065 3066 /* 3067 * Initialise the port and any ldc's under it. 3068 */ 3069 (void) vsw_init_ldcs(port); 3070 3071 D1(vswp, "%s: exit", __func__); 3072 return (0); 3073 } 3074 3075 /* 3076 * Detach the specified port. 3077 * 3078 * Returns 0 on success, 1 on failure. 3079 */ 3080 static int 3081 vsw_port_detach(vsw_t *vswp, int p_instance) 3082 { 3083 vsw_port_t *port = NULL; 3084 vsw_port_list_t *plist = &vswp->plist; 3085 3086 D1(vswp, "%s: enter: port id %d", __func__, p_instance); 3087 3088 WRITE_ENTER(&plist->lockrw); 3089 3090 if ((port = vsw_lookup_port(vswp, p_instance)) == NULL) { 3091 RW_EXIT(&plist->lockrw); 3092 return (1); 3093 } 3094 3095 if (vsw_plist_del_node(vswp, port)) { 3096 RW_EXIT(&plist->lockrw); 3097 return (1); 3098 } 3099 3100 /* Remove address if was programmed into HW. */ 3101 (void) vsw_unset_hw(vswp, port); 3102 3103 /* Remove the fdb entry for this port/mac address */ 3104 (void) vsw_del_fdb(vswp, port); 3105 3106 /* Remove any multicast addresses.. */ 3107 vsw_del_mcst_port(port); 3108 3109 /* 3110 * No longer need to hold writer lock on port list now 3111 * that we have unlinked the target port from the list. 3112 */ 3113 RW_EXIT(&plist->lockrw); 3114 3115 READ_ENTER(&plist->lockrw); 3116 3117 if (vswp->recfg_reqd) 3118 (void) vsw_reconfig_hw(vswp); 3119 3120 RW_EXIT(&plist->lockrw); 3121 3122 if (vsw_port_delete(port)) { 3123 return (1); 3124 } 3125 3126 D1(vswp, "%s: exit: p_instance(%d)", __func__, p_instance); 3127 return (0); 3128 } 3129 3130 /* 3131 * Detach all active ports. 3132 * 3133 * Returns 0 on success, 1 on failure. 3134 */ 3135 static int 3136 vsw_detach_ports(vsw_t *vswp) 3137 { 3138 vsw_port_list_t *plist = &vswp->plist; 3139 vsw_port_t *port = NULL; 3140 3141 D1(vswp, "%s: enter", __func__); 3142 3143 WRITE_ENTER(&plist->lockrw); 3144 3145 while ((port = plist->head) != NULL) { 3146 if (vsw_plist_del_node(vswp, port)) { 3147 DERR(vswp, "%s: Error deleting port %d" 3148 " from port list", __func__, 3149 port->p_instance); 3150 RW_EXIT(&plist->lockrw); 3151 return (1); 3152 } 3153 3154 /* Remove address if was programmed into HW. */ 3155 (void) vsw_unset_hw(vswp, port); 3156 3157 /* Remove the fdb entry for this port/mac address */ 3158 (void) vsw_del_fdb(vswp, port); 3159 3160 /* Remove any multicast addresses.. */ 3161 vsw_del_mcst_port(port); 3162 3163 /* 3164 * No longer need to hold the lock on the port list 3165 * now that we have unlinked the target port from the 3166 * list. 3167 */ 3168 RW_EXIT(&plist->lockrw); 3169 if (vsw_port_delete(port)) { 3170 DERR(vswp, "%s: Error deleting port %d", 3171 __func__, port->p_instance); 3172 return (1); 3173 } 3174 WRITE_ENTER(&plist->lockrw); 3175 } 3176 RW_EXIT(&plist->lockrw); 3177 3178 D1(vswp, "%s: exit", __func__); 3179 3180 return (0); 3181 } 3182 3183 /* 3184 * Delete the specified port. 3185 * 3186 * Returns 0 on success, 1 on failure. 3187 */ 3188 static int 3189 vsw_port_delete(vsw_port_t *port) 3190 { 3191 vsw_ldc_list_t *ldcl; 3192 vsw_t *vswp = port->p_vswp; 3193 3194 D1(vswp, "%s: enter : port id %d", __func__, port->p_instance); 3195 3196 (void) vsw_uninit_ldcs(port); 3197 3198 /* 3199 * Wait for any pending ctrl msg tasks which reference this 3200 * port to finish. 3201 */ 3202 if (vsw_drain_port_taskq(port)) 3203 return (1); 3204 3205 /* 3206 * Wait for port reference count to hit zero. 3207 */ 3208 mutex_enter(&port->ref_lock); 3209 while (port->ref_cnt != 0) 3210 cv_wait(&port->ref_cv, &port->ref_lock); 3211 mutex_exit(&port->ref_lock); 3212 3213 /* 3214 * Wait for any active callbacks to finish 3215 */ 3216 if (vsw_drain_ldcs(port)) 3217 return (1); 3218 3219 ldcl = &port->p_ldclist; 3220 WRITE_ENTER(&ldcl->lockrw); 3221 while (ldcl->num_ldcs > 0) { 3222 if (vsw_ldc_detach(port, ldcl->head->ldc_id) != 0) {; 3223 cmn_err(CE_WARN, "!vsw%d: unable to detach ldc %ld", 3224 vswp->instance, ldcl->head->ldc_id); 3225 RW_EXIT(&ldcl->lockrw); 3226 return (1); 3227 } 3228 } 3229 RW_EXIT(&ldcl->lockrw); 3230 3231 rw_destroy(&port->p_ldclist.lockrw); 3232 3233 mutex_destroy(&port->mca_lock); 3234 mutex_destroy(&port->tx_lock); 3235 cv_destroy(&port->ref_cv); 3236 mutex_destroy(&port->ref_lock); 3237 3238 cv_destroy(&port->state_cv); 3239 mutex_destroy(&port->state_lock); 3240 3241 kmem_free(port, sizeof (vsw_port_t)); 3242 3243 D1(vswp, "%s: exit", __func__); 3244 3245 return (0); 3246 } 3247 3248 /* 3249 * Attach a logical domain channel (ldc) under a specified port. 3250 * 3251 * Returns 0 on success, 1 on failure. 3252 */ 3253 static int 3254 vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id) 3255 { 3256 vsw_t *vswp = port->p_vswp; 3257 vsw_ldc_list_t *ldcl = &port->p_ldclist; 3258 vsw_ldc_t *ldcp = NULL; 3259 ldc_attr_t attr; 3260 ldc_status_t istatus; 3261 int status = DDI_FAILURE; 3262 int rv; 3263 enum { PROG_init = 0x0, PROG_mblks = 0x1, 3264 PROG_callback = 0x2} 3265 progress; 3266 3267 progress = PROG_init; 3268 3269 D1(vswp, "%s: enter", __func__); 3270 3271 ldcp = kmem_zalloc(sizeof (vsw_ldc_t), KM_NOSLEEP); 3272 if (ldcp == NULL) { 3273 DERR(vswp, "%s: kmem_zalloc failed", __func__); 3274 return (1); 3275 } 3276 ldcp->ldc_id = ldc_id; 3277 3278 /* allocate pool of receive mblks */ 3279 rv = vio_create_mblks(vsw_num_mblks, vsw_mblk_size, &(ldcp->rxh)); 3280 if (rv) { 3281 DWARN(vswp, "%s: unable to create free mblk pool for" 3282 " channel %ld (rv %d)", __func__, ldc_id, rv); 3283 kmem_free(ldcp, sizeof (vsw_ldc_t)); 3284 return (1); 3285 } 3286 3287 progress |= PROG_mblks; 3288 3289 mutex_init(&ldcp->ldc_txlock, NULL, MUTEX_DRIVER, NULL); 3290 mutex_init(&ldcp->ldc_cblock, NULL, MUTEX_DRIVER, NULL); 3291 mutex_init(&ldcp->drain_cv_lock, NULL, MUTEX_DRIVER, NULL); 3292 cv_init(&ldcp->drain_cv, NULL, CV_DRIVER, NULL); 3293 3294 /* required for handshake with peer */ 3295 ldcp->local_session = (uint64_t)ddi_get_lbolt(); 3296 ldcp->peer_session = 0; 3297 ldcp->session_status = 0; 3298 3299 mutex_init(&ldcp->hss_lock, NULL, MUTEX_DRIVER, NULL); 3300 ldcp->hss_id = 1; /* Initial handshake session id */ 3301 3302 /* only set for outbound lane, inbound set by peer */ 3303 mutex_init(&ldcp->lane_in.seq_lock, NULL, MUTEX_DRIVER, NULL); 3304 mutex_init(&ldcp->lane_out.seq_lock, NULL, MUTEX_DRIVER, NULL); 3305 vsw_set_lane_attr(vswp, &ldcp->lane_out); 3306 3307 attr.devclass = LDC_DEV_NT_SVC; 3308 attr.instance = ddi_get_instance(vswp->dip); 3309 attr.mode = LDC_MODE_UNRELIABLE; 3310 attr.mtu = VSW_LDC_MTU; 3311 status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle); 3312 if (status != 0) { 3313 DERR(vswp, "%s(%lld): ldc_init failed, rv (%d)", 3314 __func__, ldc_id, status); 3315 goto ldc_attach_fail; 3316 } 3317 3318 status = ldc_reg_callback(ldcp->ldc_handle, vsw_ldc_cb, (caddr_t)ldcp); 3319 if (status != 0) { 3320 DERR(vswp, "%s(%lld): ldc_reg_callback failed, rv (%d)", 3321 __func__, ldc_id, status); 3322 (void) ldc_fini(ldcp->ldc_handle); 3323 goto ldc_attach_fail; 3324 } 3325 3326 progress |= PROG_callback; 3327 3328 mutex_init(&ldcp->status_lock, NULL, MUTEX_DRIVER, NULL); 3329 3330 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 3331 DERR(vswp, "%s: ldc_status failed", __func__); 3332 mutex_destroy(&ldcp->status_lock); 3333 goto ldc_attach_fail; 3334 } 3335 3336 ldcp->ldc_status = istatus; 3337 ldcp->ldc_port = port; 3338 ldcp->ldc_vswp = vswp; 3339 3340 /* link it into the list of channels for this port */ 3341 WRITE_ENTER(&ldcl->lockrw); 3342 ldcp->ldc_next = ldcl->head; 3343 ldcl->head = ldcp; 3344 ldcl->num_ldcs++; 3345 RW_EXIT(&ldcl->lockrw); 3346 3347 D1(vswp, "%s: exit", __func__); 3348 return (0); 3349 3350 ldc_attach_fail: 3351 mutex_destroy(&ldcp->ldc_txlock); 3352 mutex_destroy(&ldcp->ldc_cblock); 3353 3354 cv_destroy(&ldcp->drain_cv); 3355 3356 if (progress & PROG_callback) { 3357 (void) ldc_unreg_callback(ldcp->ldc_handle); 3358 } 3359 3360 if ((progress & PROG_mblks) && (ldcp->rxh != NULL)) { 3361 if (vio_destroy_mblks(ldcp->rxh) != 0) { 3362 /* 3363 * Something odd has happened, as the destroy 3364 * will only fail if some mblks have been allocated 3365 * from the pool already (which shouldn't happen) 3366 * and have not been returned. 3367 * 3368 * Add the pool pointer to a list maintained in 3369 * the device instance. Another attempt will be made 3370 * to free the pool when the device itself detaches. 3371 */ 3372 cmn_err(CE_WARN, "!vsw%d: Creation of ldc channel %ld " 3373 "failed and cannot destroy associated mblk " 3374 "pool", vswp->instance, ldc_id); 3375 ldcp->rxh->nextp = vswp->rxh; 3376 vswp->rxh = ldcp->rxh; 3377 } 3378 } 3379 mutex_destroy(&ldcp->drain_cv_lock); 3380 mutex_destroy(&ldcp->hss_lock); 3381 3382 mutex_destroy(&ldcp->lane_in.seq_lock); 3383 mutex_destroy(&ldcp->lane_out.seq_lock); 3384 kmem_free(ldcp, sizeof (vsw_ldc_t)); 3385 3386 return (1); 3387 } 3388 3389 /* 3390 * Detach a logical domain channel (ldc) belonging to a 3391 * particular port. 3392 * 3393 * Returns 0 on success, 1 on failure. 3394 */ 3395 static int 3396 vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id) 3397 { 3398 vsw_t *vswp = port->p_vswp; 3399 vsw_ldc_t *ldcp, *prev_ldcp; 3400 vsw_ldc_list_t *ldcl = &port->p_ldclist; 3401 int rv; 3402 3403 prev_ldcp = ldcl->head; 3404 for (; (ldcp = prev_ldcp) != NULL; prev_ldcp = ldcp->ldc_next) { 3405 if (ldcp->ldc_id == ldc_id) { 3406 break; 3407 } 3408 } 3409 3410 /* specified ldc id not found */ 3411 if (ldcp == NULL) { 3412 DERR(vswp, "%s: ldcp = NULL", __func__); 3413 return (1); 3414 } 3415 3416 D2(vswp, "%s: detaching channel %lld", __func__, ldcp->ldc_id); 3417 3418 /* 3419 * Before we can close the channel we must release any mapped 3420 * resources (e.g. drings). 3421 */ 3422 vsw_free_lane_resources(ldcp, INBOUND); 3423 vsw_free_lane_resources(ldcp, OUTBOUND); 3424 3425 /* 3426 * If the close fails we are in serious trouble, as won't 3427 * be able to delete the parent port. 3428 */ 3429 if ((rv = ldc_close(ldcp->ldc_handle)) != 0) { 3430 DERR(vswp, "%s: error %d closing channel %lld", 3431 __func__, rv, ldcp->ldc_id); 3432 return (1); 3433 } 3434 3435 (void) ldc_fini(ldcp->ldc_handle); 3436 3437 ldcp->ldc_status = LDC_INIT; 3438 ldcp->ldc_handle = NULL; 3439 ldcp->ldc_vswp = NULL; 3440 3441 if (ldcp->rxh != NULL) { 3442 if (vio_destroy_mblks(ldcp->rxh)) { 3443 /* 3444 * Mostly likely some mblks are still in use and 3445 * have not been returned to the pool. Add the pool 3446 * to the list maintained in the device instance. 3447 * Another attempt will be made to destroy the pool 3448 * when the device detaches. 3449 */ 3450 ldcp->rxh->nextp = vswp->rxh; 3451 vswp->rxh = ldcp->rxh; 3452 } 3453 } 3454 3455 /* unlink it from the list */ 3456 prev_ldcp = ldcp->ldc_next; 3457 ldcl->num_ldcs--; 3458 3459 mutex_destroy(&ldcp->ldc_txlock); 3460 mutex_destroy(&ldcp->ldc_cblock); 3461 cv_destroy(&ldcp->drain_cv); 3462 mutex_destroy(&ldcp->drain_cv_lock); 3463 mutex_destroy(&ldcp->hss_lock); 3464 mutex_destroy(&ldcp->lane_in.seq_lock); 3465 mutex_destroy(&ldcp->lane_out.seq_lock); 3466 mutex_destroy(&ldcp->status_lock); 3467 3468 kmem_free(ldcp, sizeof (vsw_ldc_t)); 3469 3470 return (0); 3471 } 3472 3473 /* 3474 * Open and attempt to bring up the channel. Note that channel 3475 * can only be brought up if peer has also opened channel. 3476 * 3477 * Returns 0 if can open and bring up channel, otherwise 3478 * returns 1. 3479 */ 3480 static int 3481 vsw_ldc_init(vsw_ldc_t *ldcp) 3482 { 3483 vsw_t *vswp = ldcp->ldc_vswp; 3484 ldc_status_t istatus = 0; 3485 int rv; 3486 3487 D1(vswp, "%s: enter", __func__); 3488 3489 LDC_ENTER_LOCK(ldcp); 3490 3491 /* don't start at 0 in case clients don't like that */ 3492 ldcp->next_ident = 1; 3493 3494 rv = ldc_open(ldcp->ldc_handle); 3495 if (rv != 0) { 3496 DERR(vswp, "%s: ldc_open failed: id(%lld) rv(%d)", 3497 __func__, ldcp->ldc_id, rv); 3498 LDC_EXIT_LOCK(ldcp); 3499 return (1); 3500 } 3501 3502 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 3503 DERR(vswp, "%s: unable to get status", __func__); 3504 LDC_EXIT_LOCK(ldcp); 3505 return (1); 3506 3507 } else if (istatus != LDC_OPEN && istatus != LDC_READY) { 3508 DERR(vswp, "%s: id (%lld) status(%d) is not OPEN/READY", 3509 __func__, ldcp->ldc_id, istatus); 3510 LDC_EXIT_LOCK(ldcp); 3511 return (1); 3512 } 3513 3514 mutex_enter(&ldcp->status_lock); 3515 ldcp->ldc_status = istatus; 3516 mutex_exit(&ldcp->status_lock); 3517 3518 rv = ldc_up(ldcp->ldc_handle); 3519 if (rv != 0) { 3520 /* 3521 * Not a fatal error for ldc_up() to fail, as peer 3522 * end point may simply not be ready yet. 3523 */ 3524 D2(vswp, "%s: ldc_up err id(%lld) rv(%d)", __func__, 3525 ldcp->ldc_id, rv); 3526 LDC_EXIT_LOCK(ldcp); 3527 return (1); 3528 } 3529 3530 /* 3531 * ldc_up() call is non-blocking so need to explicitly 3532 * check channel status to see if in fact the channel 3533 * is UP. 3534 */ 3535 mutex_enter(&ldcp->status_lock); 3536 istatus = ldcp->ldc_status; 3537 if (ldc_status(ldcp->ldc_handle, &ldcp->ldc_status) != 0) { 3538 DERR(vswp, "%s: unable to get status", __func__); 3539 mutex_exit(&ldcp->status_lock); 3540 LDC_EXIT_LOCK(ldcp); 3541 return (1); 3542 3543 } 3544 mutex_exit(&ldcp->status_lock); 3545 LDC_EXIT_LOCK(ldcp); 3546 3547 if ((istatus != LDC_UP) && (ldcp->ldc_status == LDC_UP)) { 3548 D2(vswp, "%s: channel %ld now UP (%ld)", __func__, 3549 ldcp->ldc_id, istatus); 3550 vsw_restart_handshake(ldcp); 3551 } 3552 3553 D1(vswp, "%s: exit", __func__); 3554 return (0); 3555 } 3556 3557 /* disable callbacks on the channel */ 3558 static int 3559 vsw_ldc_uninit(vsw_ldc_t *ldcp) 3560 { 3561 vsw_t *vswp = ldcp->ldc_vswp; 3562 int rv; 3563 3564 D1(vswp, "vsw_ldc_uninit: enter: id(%lx)\n", ldcp->ldc_id); 3565 3566 LDC_ENTER_LOCK(ldcp); 3567 3568 rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE); 3569 if (rv != 0) { 3570 DERR(vswp, "vsw_ldc_uninit(%lld): error disabling " 3571 "interrupts (rv = %d)\n", ldcp->ldc_id, rv); 3572 LDC_EXIT_LOCK(ldcp); 3573 return (1); 3574 } 3575 3576 mutex_enter(&ldcp->status_lock); 3577 ldcp->ldc_status = LDC_INIT; 3578 mutex_exit(&ldcp->status_lock); 3579 3580 LDC_EXIT_LOCK(ldcp); 3581 3582 D1(vswp, "vsw_ldc_uninit: exit: id(%lx)", ldcp->ldc_id); 3583 3584 return (0); 3585 } 3586 3587 static int 3588 vsw_init_ldcs(vsw_port_t *port) 3589 { 3590 vsw_ldc_list_t *ldcl = &port->p_ldclist; 3591 vsw_ldc_t *ldcp; 3592 3593 READ_ENTER(&ldcl->lockrw); 3594 ldcp = ldcl->head; 3595 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 3596 (void) vsw_ldc_init(ldcp); 3597 } 3598 RW_EXIT(&ldcl->lockrw); 3599 3600 return (0); 3601 } 3602 3603 static int 3604 vsw_uninit_ldcs(vsw_port_t *port) 3605 { 3606 vsw_ldc_list_t *ldcl = &port->p_ldclist; 3607 vsw_ldc_t *ldcp; 3608 3609 D1(NULL, "vsw_uninit_ldcs: enter\n"); 3610 3611 READ_ENTER(&ldcl->lockrw); 3612 ldcp = ldcl->head; 3613 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 3614 (void) vsw_ldc_uninit(ldcp); 3615 } 3616 RW_EXIT(&ldcl->lockrw); 3617 3618 D1(NULL, "vsw_uninit_ldcs: exit\n"); 3619 3620 return (0); 3621 } 3622 3623 /* 3624 * Wait until the callback(s) associated with the ldcs under the specified 3625 * port have completed. 3626 * 3627 * Prior to this function being invoked each channel under this port 3628 * should have been quiesced via ldc_set_cb_mode(DISABLE). 3629 * 3630 * A short explaination of what we are doing below.. 3631 * 3632 * The simplest approach would be to have a reference counter in 3633 * the ldc structure which is increment/decremented by the callbacks as 3634 * they use the channel. The drain function could then simply disable any 3635 * further callbacks and do a cv_wait for the ref to hit zero. Unfortunately 3636 * there is a tiny window here - before the callback is able to get the lock 3637 * on the channel it is interrupted and this function gets to execute. It 3638 * sees that the ref count is zero and believes its free to delete the 3639 * associated data structures. 3640 * 3641 * We get around this by taking advantage of the fact that before the ldc 3642 * framework invokes a callback it sets a flag to indicate that there is a 3643 * callback active (or about to become active). If when we attempt to 3644 * unregister a callback when this active flag is set then the unregister 3645 * will fail with EWOULDBLOCK. 3646 * 3647 * If the unregister fails we do a cv_timedwait. We will either be signaled 3648 * by the callback as it is exiting (note we have to wait a short period to 3649 * allow the callback to return fully to the ldc framework and it to clear 3650 * the active flag), or by the timer expiring. In either case we again attempt 3651 * the unregister. We repeat this until we can succesfully unregister the 3652 * callback. 3653 * 3654 * The reason we use a cv_timedwait rather than a simple cv_wait is to catch 3655 * the case where the callback has finished but the ldc framework has not yet 3656 * cleared the active flag. In this case we would never get a cv_signal. 3657 */ 3658 static int 3659 vsw_drain_ldcs(vsw_port_t *port) 3660 { 3661 vsw_ldc_list_t *ldcl = &port->p_ldclist; 3662 vsw_ldc_t *ldcp; 3663 vsw_t *vswp = port->p_vswp; 3664 3665 D1(vswp, "%s: enter", __func__); 3666 3667 READ_ENTER(&ldcl->lockrw); 3668 3669 ldcp = ldcl->head; 3670 3671 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 3672 /* 3673 * If we can unregister the channel callback then we 3674 * know that there is no callback either running or 3675 * scheduled to run for this channel so move on to next 3676 * channel in the list. 3677 */ 3678 mutex_enter(&ldcp->drain_cv_lock); 3679 3680 /* prompt active callbacks to quit */ 3681 ldcp->drain_state = VSW_LDC_DRAINING; 3682 3683 if ((ldc_unreg_callback(ldcp->ldc_handle)) == 0) { 3684 D2(vswp, "%s: unreg callback for chan %ld", __func__, 3685 ldcp->ldc_id); 3686 mutex_exit(&ldcp->drain_cv_lock); 3687 continue; 3688 } else { 3689 /* 3690 * If we end up here we know that either 1) a callback 3691 * is currently executing, 2) is about to start (i.e. 3692 * the ldc framework has set the active flag but 3693 * has not actually invoked the callback yet, or 3) 3694 * has finished and has returned to the ldc framework 3695 * but the ldc framework has not yet cleared the 3696 * active bit. 3697 * 3698 * Wait for it to finish. 3699 */ 3700 while (ldc_unreg_callback(ldcp->ldc_handle) 3701 == EWOULDBLOCK) 3702 (void) cv_timedwait(&ldcp->drain_cv, 3703 &ldcp->drain_cv_lock, lbolt + hz); 3704 3705 mutex_exit(&ldcp->drain_cv_lock); 3706 D2(vswp, "%s: unreg callback for chan %ld after " 3707 "timeout", __func__, ldcp->ldc_id); 3708 } 3709 } 3710 RW_EXIT(&ldcl->lockrw); 3711 3712 D1(vswp, "%s: exit", __func__); 3713 return (0); 3714 } 3715 3716 /* 3717 * Wait until all tasks which reference this port have completed. 3718 * 3719 * Prior to this function being invoked each channel under this port 3720 * should have been quiesced via ldc_set_cb_mode(DISABLE). 3721 */ 3722 static int 3723 vsw_drain_port_taskq(vsw_port_t *port) 3724 { 3725 vsw_t *vswp = port->p_vswp; 3726 3727 D1(vswp, "%s: enter", __func__); 3728 3729 /* 3730 * Mark the port as in the process of being detached, and 3731 * dispatch a marker task to the queue so we know when all 3732 * relevant tasks have completed. 3733 */ 3734 mutex_enter(&port->state_lock); 3735 port->state = VSW_PORT_DETACHING; 3736 3737 if ((vswp->taskq_p == NULL) || 3738 (ddi_taskq_dispatch(vswp->taskq_p, vsw_marker_task, 3739 port, DDI_NOSLEEP) != DDI_SUCCESS)) { 3740 DERR(vswp, "%s: unable to dispatch marker task", 3741 __func__); 3742 mutex_exit(&port->state_lock); 3743 return (1); 3744 } 3745 3746 /* 3747 * Wait for the marker task to finish. 3748 */ 3749 while (port->state != VSW_PORT_DETACHABLE) 3750 cv_wait(&port->state_cv, &port->state_lock); 3751 3752 mutex_exit(&port->state_lock); 3753 3754 D1(vswp, "%s: exit", __func__); 3755 3756 return (0); 3757 } 3758 3759 static void 3760 vsw_marker_task(void *arg) 3761 { 3762 vsw_port_t *port = arg; 3763 vsw_t *vswp = port->p_vswp; 3764 3765 D1(vswp, "%s: enter", __func__); 3766 3767 mutex_enter(&port->state_lock); 3768 3769 /* 3770 * No further tasks should be dispatched which reference 3771 * this port so ok to mark it as safe to detach. 3772 */ 3773 port->state = VSW_PORT_DETACHABLE; 3774 3775 cv_signal(&port->state_cv); 3776 3777 mutex_exit(&port->state_lock); 3778 3779 D1(vswp, "%s: exit", __func__); 3780 } 3781 3782 static vsw_port_t * 3783 vsw_lookup_port(vsw_t *vswp, int p_instance) 3784 { 3785 vsw_port_list_t *plist = &vswp->plist; 3786 vsw_port_t *port; 3787 3788 for (port = plist->head; port != NULL; port = port->p_next) { 3789 if (port->p_instance == p_instance) { 3790 D2(vswp, "vsw_lookup_port: found p_instance\n"); 3791 return (port); 3792 } 3793 } 3794 3795 return (NULL); 3796 } 3797 3798 /* 3799 * Search for and remove the specified port from the port 3800 * list. Returns 0 if able to locate and remove port, otherwise 3801 * returns 1. 3802 */ 3803 static int 3804 vsw_plist_del_node(vsw_t *vswp, vsw_port_t *port) 3805 { 3806 vsw_port_list_t *plist = &vswp->plist; 3807 vsw_port_t *curr_p, *prev_p; 3808 3809 if (plist->head == NULL) 3810 return (1); 3811 3812 curr_p = prev_p = plist->head; 3813 3814 while (curr_p != NULL) { 3815 if (curr_p == port) { 3816 if (prev_p == curr_p) { 3817 plist->head = curr_p->p_next; 3818 } else { 3819 prev_p->p_next = curr_p->p_next; 3820 } 3821 plist->num_ports--; 3822 break; 3823 } else { 3824 prev_p = curr_p; 3825 curr_p = curr_p->p_next; 3826 } 3827 } 3828 return (0); 3829 } 3830 3831 /* 3832 * Interrupt handler for ldc messages. 3833 */ 3834 static uint_t 3835 vsw_ldc_cb(uint64_t event, caddr_t arg) 3836 { 3837 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 3838 vsw_t *vswp = ldcp->ldc_vswp; 3839 ldc_status_t lstatus; 3840 int rv; 3841 3842 D1(vswp, "%s: enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 3843 3844 mutex_enter(&ldcp->ldc_cblock); 3845 3846 if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) { 3847 mutex_exit(&ldcp->ldc_cblock); 3848 return (LDC_SUCCESS); 3849 } 3850 3851 mutex_enter(&ldcp->status_lock); 3852 lstatus = ldcp->ldc_status; 3853 rv = ldc_status(ldcp->ldc_handle, &ldcp->ldc_status); 3854 mutex_exit(&ldcp->status_lock); 3855 if (rv != 0) { 3856 cmn_err(CE_WARN, "!vsw%d: Unable to read channel state", 3857 vswp->instance); 3858 goto vsw_cb_exit; 3859 } 3860 3861 if (event & LDC_EVT_UP) { 3862 /* 3863 * Channel has come up, get the state and then start 3864 * the handshake. 3865 */ 3866 D2(vswp, "%s: id(%ld) event(%llx) UP: status(%ld)", 3867 __func__, ldcp->ldc_id, event, lstatus); 3868 D2(vswp, "%s: UP: old status %ld : cur status %ld", 3869 __func__, lstatus, ldcp->ldc_status); 3870 if ((ldcp->ldc_status != lstatus) && 3871 (ldcp->ldc_status == LDC_UP)) { 3872 vsw_restart_handshake(ldcp); 3873 } 3874 3875 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 3876 } 3877 3878 if (event & LDC_EVT_READ) { 3879 /* 3880 * Data available for reading. 3881 */ 3882 D2(vswp, "%s: id(ld) event(%llx) data READ", 3883 __func__, ldcp->ldc_id, event); 3884 3885 vsw_process_pkt(ldcp); 3886 3887 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 3888 3889 goto vsw_cb_exit; 3890 } 3891 3892 if (event & (LDC_EVT_DOWN | LDC_EVT_RESET)) { 3893 D2(vswp, "%s: id(%ld) event(%llx) DOWN/RESET", 3894 __func__, ldcp->ldc_id, event); 3895 3896 /* attempt to restart the connection */ 3897 vsw_restart_ldc(ldcp); 3898 3899 /* 3900 * vsw_restart_ldc() will attempt to bring the channel 3901 * back up. Check here to see if that succeeded. 3902 */ 3903 mutex_enter(&ldcp->status_lock); 3904 lstatus = ldcp->ldc_status; 3905 rv = ldc_status(ldcp->ldc_handle, &ldcp->ldc_status); 3906 mutex_exit(&ldcp->status_lock); 3907 if (rv != 0) { 3908 DERR(vswp, "%s: unable to read status for channel %ld", 3909 __func__, ldcp->ldc_id); 3910 goto vsw_cb_exit; 3911 } 3912 3913 D2(vswp, "%s: id(%ld) event(%llx) DOWN/RESET event:" 3914 " old status %ld : cur status %ld", __func__, 3915 ldcp->ldc_id, event, lstatus, ldcp->ldc_status); 3916 3917 /* 3918 * If channel was not previously UP then (re)start the 3919 * handshake. 3920 */ 3921 if ((ldcp->ldc_status == LDC_UP) && (lstatus != LDC_UP)) { 3922 D2(vswp, "%s: channel %ld now UP, restarting " 3923 "handshake", __func__, ldcp->ldc_id); 3924 vsw_restart_handshake(ldcp); 3925 } 3926 } 3927 3928 /* 3929 * Catch either LDC_EVT_WRITE which we don't support or any 3930 * unknown event. 3931 */ 3932 if (event & ~(LDC_EVT_UP | LDC_EVT_RESET 3933 | LDC_EVT_DOWN | LDC_EVT_READ)) { 3934 3935 DERR(vswp, "%s: id(%ld) Unexpected event=(%llx) status(%ld)", 3936 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 3937 } 3938 3939 vsw_cb_exit: 3940 mutex_exit(&ldcp->ldc_cblock); 3941 3942 /* 3943 * Let the drain function know we are finishing if it 3944 * is waiting. 3945 */ 3946 mutex_enter(&ldcp->drain_cv_lock); 3947 if (ldcp->drain_state == VSW_LDC_DRAINING) 3948 cv_signal(&ldcp->drain_cv); 3949 mutex_exit(&ldcp->drain_cv_lock); 3950 3951 return (LDC_SUCCESS); 3952 } 3953 3954 /* 3955 * Restart the connection with our peer. Free any existing 3956 * data structures and then attempt to bring channel back 3957 * up. 3958 */ 3959 static void 3960 vsw_restart_ldc(vsw_ldc_t *ldcp) 3961 { 3962 int rv; 3963 vsw_t *vswp = ldcp->ldc_vswp; 3964 vsw_port_t *port; 3965 vsw_ldc_list_t *ldcl; 3966 3967 D1(vswp, "%s: enter", __func__); 3968 3969 port = ldcp->ldc_port; 3970 ldcl = &port->p_ldclist; 3971 3972 READ_ENTER(&ldcl->lockrw); 3973 3974 D2(vswp, "%s: in 0x%llx : out 0x%llx", __func__, 3975 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 3976 3977 vsw_free_lane_resources(ldcp, INBOUND); 3978 vsw_free_lane_resources(ldcp, OUTBOUND); 3979 RW_EXIT(&ldcl->lockrw); 3980 3981 ldcp->lane_in.lstate = 0; 3982 ldcp->lane_out.lstate = 0; 3983 3984 /* 3985 * Remove parent port from any multicast groups 3986 * it may have registered with. Client must resend 3987 * multicast add command after handshake completes. 3988 */ 3989 (void) vsw_del_fdb(vswp, port); 3990 3991 vsw_del_mcst_port(port); 3992 3993 ldcp->peer_session = 0; 3994 ldcp->session_status = 0; 3995 ldcp->hcnt = 0; 3996 ldcp->hphase = VSW_MILESTONE0; 3997 3998 rv = ldc_up(ldcp->ldc_handle); 3999 if (rv != 0) { 4000 /* 4001 * Not a fatal error for ldc_up() to fail, as peer 4002 * end point may simply not be ready yet. 4003 */ 4004 D2(vswp, "%s: ldc_up err id(%lld) rv(%d)", __func__, 4005 ldcp->ldc_id, rv); 4006 } 4007 4008 D1(vswp, "%s: exit", __func__); 4009 } 4010 4011 /* 4012 * (Re)start a handshake with our peer by sending them 4013 * our version info. 4014 */ 4015 static void 4016 vsw_restart_handshake(vsw_ldc_t *ldcp) 4017 { 4018 vsw_t *vswp = ldcp->ldc_vswp; 4019 4020 D1(vswp, "vsw_restart_handshake: enter"); 4021 4022 if (ldcp->hphase != VSW_MILESTONE0) { 4023 vsw_restart_ldc(ldcp); 4024 } 4025 4026 /* 4027 * We now increment the transaction group id. This allows 4028 * us to identify and disard any tasks which are still pending 4029 * on the taskq and refer to the handshake session we are about 4030 * to restart. These stale messages no longer have any real 4031 * meaning. 4032 */ 4033 mutex_enter(&ldcp->hss_lock); 4034 ldcp->hss_id++; 4035 mutex_exit(&ldcp->hss_lock); 4036 4037 if (ldcp->hcnt++ > vsw_num_handshakes) { 4038 cmn_err(CE_WARN, "!vsw%d: exceeded number of permitted " 4039 "handshake attempts (%d) on channel %ld", 4040 vswp->instance, ldcp->hcnt, ldcp->ldc_id); 4041 return; 4042 } 4043 4044 if ((vswp->taskq_p == NULL) || 4045 (ddi_taskq_dispatch(vswp->taskq_p, vsw_send_ver, ldcp, 4046 DDI_NOSLEEP) != DDI_SUCCESS)) { 4047 cmn_err(CE_WARN, "!vsw%d: Can't dispatch version handshake " 4048 "task", vswp->instance); 4049 } 4050 4051 D1(vswp, "vsw_restart_handshake: exit"); 4052 } 4053 4054 /* 4055 * Deal appropriately with a ECONNRESET event encountered in a ldc_* 4056 * call. 4057 */ 4058 static void 4059 vsw_handle_reset(vsw_ldc_t *ldcp) 4060 { 4061 vsw_t *vswp = ldcp->ldc_vswp; 4062 ldc_status_t lstatus; 4063 4064 D1(vswp, "%s: enter", __func__); 4065 4066 mutex_enter(&ldcp->status_lock); 4067 lstatus = ldcp->ldc_status; 4068 if (ldc_status(ldcp->ldc_handle, &ldcp->ldc_status) != 0) { 4069 DERR(vswp, "%s: unable to read status for channel %ld", 4070 __func__, ldcp->ldc_id); 4071 mutex_exit(&ldcp->status_lock); 4072 return; 4073 } 4074 mutex_exit(&ldcp->status_lock); 4075 4076 /* 4077 * Check the channel's previous recorded state to 4078 * determine if this is the first ECONNRESET event 4079 * we've gotten for this particular channel (i.e. was 4080 * previously up but is no longer). If so, terminate 4081 * the channel. 4082 */ 4083 if ((ldcp->ldc_status != LDC_UP) && (lstatus == LDC_UP)) { 4084 vsw_restart_ldc(ldcp); 4085 } 4086 4087 /* 4088 * vsw_restart_ldc() will also attempt to bring channel 4089 * back up. Check here if that succeeds. 4090 */ 4091 mutex_enter(&ldcp->status_lock); 4092 lstatus = ldcp->ldc_status; 4093 if (ldc_status(ldcp->ldc_handle, &ldcp->ldc_status) != 0) { 4094 DERR(vswp, "%s: unable to read status for channel %ld", 4095 __func__, ldcp->ldc_id); 4096 mutex_exit(&ldcp->status_lock); 4097 return; 4098 } 4099 mutex_exit(&ldcp->status_lock); 4100 4101 /* 4102 * If channel is now up and no one else (i.e. the callback routine) 4103 * has dealt with it then we restart the handshake here. 4104 */ 4105 if ((lstatus != LDC_UP) && (ldcp->ldc_status == LDC_UP)) { 4106 vsw_restart_handshake(ldcp); 4107 } 4108 4109 D1(vswp, "%s: exit", __func__); 4110 } 4111 4112 /* 4113 * returns 0 if legal for event signified by flag to have 4114 * occured at the time it did. Otherwise returns 1. 4115 */ 4116 int 4117 vsw_check_flag(vsw_ldc_t *ldcp, int dir, uint64_t flag) 4118 { 4119 vsw_t *vswp = ldcp->ldc_vswp; 4120 uint64_t state; 4121 uint64_t phase; 4122 4123 if (dir == INBOUND) 4124 state = ldcp->lane_in.lstate; 4125 else 4126 state = ldcp->lane_out.lstate; 4127 4128 phase = ldcp->hphase; 4129 4130 switch (flag) { 4131 case VSW_VER_INFO_RECV: 4132 if (phase > VSW_MILESTONE0) { 4133 DERR(vswp, "vsw_check_flag (%d): VER_INFO_RECV" 4134 " when in state %d\n", ldcp->ldc_id, phase); 4135 vsw_restart_handshake(ldcp); 4136 return (1); 4137 } 4138 break; 4139 4140 case VSW_VER_ACK_RECV: 4141 case VSW_VER_NACK_RECV: 4142 if (!(state & VSW_VER_INFO_SENT)) { 4143 DERR(vswp, "vsw_check_flag (%d): spurious VER_ACK" 4144 " or VER_NACK when in state %d\n", 4145 ldcp->ldc_id, phase); 4146 vsw_restart_handshake(ldcp); 4147 return (1); 4148 } else 4149 state &= ~VSW_VER_INFO_SENT; 4150 break; 4151 4152 case VSW_ATTR_INFO_RECV: 4153 if ((phase < VSW_MILESTONE1) || (phase >= VSW_MILESTONE2)) { 4154 DERR(vswp, "vsw_check_flag (%d): ATTR_INFO_RECV" 4155 " when in state %d\n", ldcp->ldc_id, phase); 4156 vsw_restart_handshake(ldcp); 4157 return (1); 4158 } 4159 break; 4160 4161 case VSW_ATTR_ACK_RECV: 4162 case VSW_ATTR_NACK_RECV: 4163 if (!(state & VSW_ATTR_INFO_SENT)) { 4164 DERR(vswp, "vsw_check_flag (%d): spurious ATTR_ACK" 4165 " or ATTR_NACK when in state %d\n", 4166 ldcp->ldc_id, phase); 4167 vsw_restart_handshake(ldcp); 4168 return (1); 4169 } else 4170 state &= ~VSW_ATTR_INFO_SENT; 4171 break; 4172 4173 case VSW_DRING_INFO_RECV: 4174 if (phase < VSW_MILESTONE1) { 4175 DERR(vswp, "vsw_check_flag (%d): DRING_INFO_RECV" 4176 " when in state %d\n", ldcp->ldc_id, phase); 4177 vsw_restart_handshake(ldcp); 4178 return (1); 4179 } 4180 break; 4181 4182 case VSW_DRING_ACK_RECV: 4183 case VSW_DRING_NACK_RECV: 4184 if (!(state & VSW_DRING_INFO_SENT)) { 4185 DERR(vswp, "vsw_check_flag (%d): spurious DRING_ACK" 4186 " or DRING_NACK when in state %d\n", 4187 ldcp->ldc_id, phase); 4188 vsw_restart_handshake(ldcp); 4189 return (1); 4190 } else 4191 state &= ~VSW_DRING_INFO_SENT; 4192 break; 4193 4194 case VSW_RDX_INFO_RECV: 4195 if (phase < VSW_MILESTONE3) { 4196 DERR(vswp, "vsw_check_flag (%d): RDX_INFO_RECV" 4197 " when in state %d\n", ldcp->ldc_id, phase); 4198 vsw_restart_handshake(ldcp); 4199 return (1); 4200 } 4201 break; 4202 4203 case VSW_RDX_ACK_RECV: 4204 case VSW_RDX_NACK_RECV: 4205 if (!(state & VSW_RDX_INFO_SENT)) { 4206 DERR(vswp, "vsw_check_flag (%d): spurious RDX_ACK" 4207 " or RDX_NACK when in state %d\n", 4208 ldcp->ldc_id, phase); 4209 vsw_restart_handshake(ldcp); 4210 return (1); 4211 } else 4212 state &= ~VSW_RDX_INFO_SENT; 4213 break; 4214 4215 case VSW_MCST_INFO_RECV: 4216 if (phase < VSW_MILESTONE3) { 4217 DERR(vswp, "vsw_check_flag (%d): VSW_MCST_INFO_RECV" 4218 " when in state %d\n", ldcp->ldc_id, phase); 4219 vsw_restart_handshake(ldcp); 4220 return (1); 4221 } 4222 break; 4223 4224 default: 4225 DERR(vswp, "vsw_check_flag (%lld): unknown flag (%llx)", 4226 ldcp->ldc_id, flag); 4227 return (1); 4228 } 4229 4230 if (dir == INBOUND) 4231 ldcp->lane_in.lstate = state; 4232 else 4233 ldcp->lane_out.lstate = state; 4234 4235 D1(vswp, "vsw_check_flag (chan %lld): exit", ldcp->ldc_id); 4236 4237 return (0); 4238 } 4239 4240 void 4241 vsw_next_milestone(vsw_ldc_t *ldcp) 4242 { 4243 vsw_t *vswp = ldcp->ldc_vswp; 4244 4245 D1(vswp, "%s (chan %lld): enter (phase %ld)", __func__, 4246 ldcp->ldc_id, ldcp->hphase); 4247 4248 DUMP_FLAGS(ldcp->lane_in.lstate); 4249 DUMP_FLAGS(ldcp->lane_out.lstate); 4250 4251 switch (ldcp->hphase) { 4252 4253 case VSW_MILESTONE0: 4254 /* 4255 * If we haven't started to handshake with our peer, 4256 * start to do so now. 4257 */ 4258 if (ldcp->lane_out.lstate == 0) { 4259 D2(vswp, "%s: (chan %lld) starting handshake " 4260 "with peer", __func__, ldcp->ldc_id); 4261 vsw_restart_handshake(ldcp); 4262 } 4263 4264 /* 4265 * Only way to pass this milestone is to have successfully 4266 * negotiated version info. 4267 */ 4268 if ((ldcp->lane_in.lstate & VSW_VER_ACK_SENT) && 4269 (ldcp->lane_out.lstate & VSW_VER_ACK_RECV)) { 4270 4271 D2(vswp, "%s: (chan %lld) leaving milestone 0", 4272 __func__, ldcp->ldc_id); 4273 4274 /* 4275 * Next milestone is passed when attribute 4276 * information has been successfully exchanged. 4277 */ 4278 ldcp->hphase = VSW_MILESTONE1; 4279 vsw_send_attr(ldcp); 4280 4281 } 4282 break; 4283 4284 case VSW_MILESTONE1: 4285 /* 4286 * Only way to pass this milestone is to have successfully 4287 * negotiated attribute information. 4288 */ 4289 if (ldcp->lane_in.lstate & VSW_ATTR_ACK_SENT) { 4290 4291 ldcp->hphase = VSW_MILESTONE2; 4292 4293 /* 4294 * If the peer device has said it wishes to 4295 * use descriptor rings then we send it our ring 4296 * info, otherwise we just set up a private ring 4297 * which we use an internal buffer 4298 */ 4299 if (ldcp->lane_in.xfer_mode == VIO_DRING_MODE) 4300 vsw_send_dring_info(ldcp); 4301 } 4302 break; 4303 4304 4305 case VSW_MILESTONE2: 4306 /* 4307 * If peer has indicated in its attribute message that 4308 * it wishes to use descriptor rings then the only way 4309 * to pass this milestone is for us to have received 4310 * valid dring info. 4311 * 4312 * If peer is not using descriptor rings then just fall 4313 * through. 4314 */ 4315 if ((ldcp->lane_in.xfer_mode == VIO_DRING_MODE) && 4316 (!(ldcp->lane_in.lstate & VSW_DRING_ACK_SENT))) 4317 break; 4318 4319 D2(vswp, "%s: (chan %lld) leaving milestone 2", 4320 __func__, ldcp->ldc_id); 4321 4322 ldcp->hphase = VSW_MILESTONE3; 4323 vsw_send_rdx(ldcp); 4324 break; 4325 4326 case VSW_MILESTONE3: 4327 /* 4328 * Pass this milestone when all paramaters have been 4329 * successfully exchanged and RDX sent in both directions. 4330 * 4331 * Mark outbound lane as available to transmit data. 4332 */ 4333 if ((ldcp->lane_in.lstate & VSW_RDX_ACK_SENT) && 4334 (ldcp->lane_out.lstate & VSW_RDX_ACK_RECV)) { 4335 4336 D2(vswp, "%s: (chan %lld) leaving milestone 3", 4337 __func__, ldcp->ldc_id); 4338 D2(vswp, "%s: ** handshake complete (0x%llx : " 4339 "0x%llx) **", __func__, ldcp->lane_in.lstate, 4340 ldcp->lane_out.lstate); 4341 ldcp->lane_out.lstate |= VSW_LANE_ACTIVE; 4342 ldcp->hphase = VSW_MILESTONE4; 4343 ldcp->hcnt = 0; 4344 DISPLAY_STATE(); 4345 } else { 4346 D2(vswp, "%s: still in milestone 3 (0x%llx :" 4347 " 0x%llx", __func__, ldcp->lane_in.lstate, 4348 ldcp->lane_out.lstate); 4349 } 4350 break; 4351 4352 case VSW_MILESTONE4: 4353 D2(vswp, "%s: (chan %lld) in milestone 4", __func__, 4354 ldcp->ldc_id); 4355 break; 4356 4357 default: 4358 DERR(vswp, "%s: (chan %lld) Unknown Phase %x", __func__, 4359 ldcp->ldc_id, ldcp->hphase); 4360 } 4361 4362 D1(vswp, "%s (chan %lld): exit (phase %ld)", __func__, ldcp->ldc_id, 4363 ldcp->hphase); 4364 } 4365 4366 /* 4367 * Check if major version is supported. 4368 * 4369 * Returns 0 if finds supported major number, and if necessary 4370 * adjusts the minor field. 4371 * 4372 * Returns 1 if can't match major number exactly. Sets mjor/minor 4373 * to next lowest support values, or to zero if no other values possible. 4374 */ 4375 static int 4376 vsw_supported_version(vio_ver_msg_t *vp) 4377 { 4378 int i; 4379 4380 D1(NULL, "vsw_supported_version: enter"); 4381 4382 for (i = 0; i < VSW_NUM_VER; i++) { 4383 if (vsw_versions[i].ver_major == vp->ver_major) { 4384 /* 4385 * Matching or lower major version found. Update 4386 * minor number if necessary. 4387 */ 4388 if (vp->ver_minor > vsw_versions[i].ver_minor) { 4389 D2(NULL, "%s: adjusting minor value" 4390 " from %d to %d", __func__, 4391 vp->ver_minor, 4392 vsw_versions[i].ver_minor); 4393 vp->ver_minor = vsw_versions[i].ver_minor; 4394 } 4395 4396 return (0); 4397 } 4398 4399 if (vsw_versions[i].ver_major < vp->ver_major) { 4400 if (vp->ver_minor > vsw_versions[i].ver_minor) { 4401 D2(NULL, "%s: adjusting minor value" 4402 " from %d to %d", __func__, 4403 vp->ver_minor, 4404 vsw_versions[i].ver_minor); 4405 vp->ver_minor = vsw_versions[i].ver_minor; 4406 } 4407 return (1); 4408 } 4409 } 4410 4411 /* No match was possible, zero out fields */ 4412 vp->ver_major = 0; 4413 vp->ver_minor = 0; 4414 4415 D1(NULL, "vsw_supported_version: exit"); 4416 4417 return (1); 4418 } 4419 4420 /* 4421 * Main routine for processing messages received over LDC. 4422 */ 4423 static void 4424 vsw_process_pkt(void *arg) 4425 { 4426 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4427 vsw_t *vswp = ldcp->ldc_vswp; 4428 size_t msglen; 4429 vio_msg_tag_t tag; 4430 def_msg_t dmsg; 4431 int rv = 0; 4432 4433 4434 D1(vswp, "%s enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 4435 4436 /* 4437 * If channel is up read messages until channel is empty. 4438 */ 4439 do { 4440 msglen = sizeof (dmsg); 4441 rv = ldc_read(ldcp->ldc_handle, (caddr_t)&dmsg, &msglen); 4442 4443 if (rv != 0) { 4444 DERR(vswp, "%s :ldc_read err id(%lld) rv(%d) " 4445 "len(%d)\n", __func__, ldcp->ldc_id, 4446 rv, msglen); 4447 } 4448 4449 /* channel has been reset */ 4450 if (rv == ECONNRESET) { 4451 vsw_handle_reset(ldcp); 4452 break; 4453 } 4454 4455 if (msglen == 0) { 4456 D2(vswp, "%s: ldc_read id(%lld) NODATA", __func__, 4457 ldcp->ldc_id); 4458 break; 4459 } 4460 4461 D2(vswp, "%s: ldc_read id(%lld): msglen(%d)", __func__, 4462 ldcp->ldc_id, msglen); 4463 4464 /* 4465 * Figure out what sort of packet we have gotten by 4466 * examining the msg tag, and then switch it appropriately. 4467 */ 4468 bcopy(&dmsg, &tag, sizeof (vio_msg_tag_t)); 4469 4470 switch (tag.vio_msgtype) { 4471 case VIO_TYPE_CTRL: 4472 vsw_dispatch_ctrl_task(ldcp, &dmsg, tag); 4473 break; 4474 case VIO_TYPE_DATA: 4475 vsw_process_data_pkt(ldcp, &dmsg, tag); 4476 break; 4477 case VIO_TYPE_ERR: 4478 vsw_process_err_pkt(ldcp, &dmsg, tag); 4479 break; 4480 default: 4481 DERR(vswp, "%s: Unknown tag(%lx) ", __func__, 4482 "id(%lx)\n", tag.vio_msgtype, ldcp->ldc_id); 4483 break; 4484 } 4485 } while (msglen); 4486 4487 D1(vswp, "%s exit: ldcid (%lld)\n", __func__, ldcp->ldc_id); 4488 } 4489 4490 /* 4491 * Dispatch a task to process a VIO control message. 4492 */ 4493 static void 4494 vsw_dispatch_ctrl_task(vsw_ldc_t *ldcp, void *cpkt, vio_msg_tag_t tag) 4495 { 4496 vsw_ctrl_task_t *ctaskp = NULL; 4497 vsw_port_t *port = ldcp->ldc_port; 4498 vsw_t *vswp = port->p_vswp; 4499 4500 D1(vswp, "%s: enter", __func__); 4501 4502 /* 4503 * We need to handle RDX ACK messages in-band as once they 4504 * are exchanged it is possible that we will get an 4505 * immediate (legitimate) data packet. 4506 */ 4507 if ((tag.vio_subtype_env == VIO_RDX) && 4508 (tag.vio_subtype == VIO_SUBTYPE_ACK)) { 4509 4510 if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_ACK_RECV)) 4511 return; 4512 4513 ldcp->lane_out.lstate |= VSW_RDX_ACK_RECV; 4514 D2(vswp, "%s (%ld) handling RDX_ACK in place " 4515 "(ostate 0x%llx : hphase %d)", __func__, 4516 ldcp->ldc_id, ldcp->lane_out.lstate, ldcp->hphase); 4517 vsw_next_milestone(ldcp); 4518 return; 4519 } 4520 4521 ctaskp = kmem_alloc(sizeof (vsw_ctrl_task_t), KM_NOSLEEP); 4522 4523 if (ctaskp == NULL) { 4524 DERR(vswp, "%s: unable to alloc space for ctrl" 4525 " msg", __func__); 4526 vsw_restart_handshake(ldcp); 4527 return; 4528 } 4529 4530 ctaskp->ldcp = ldcp; 4531 bcopy((def_msg_t *)cpkt, &ctaskp->pktp, sizeof (def_msg_t)); 4532 mutex_enter(&ldcp->hss_lock); 4533 ctaskp->hss_id = ldcp->hss_id; 4534 mutex_exit(&ldcp->hss_lock); 4535 4536 /* 4537 * Dispatch task to processing taskq if port is not in 4538 * the process of being detached. 4539 */ 4540 mutex_enter(&port->state_lock); 4541 if (port->state == VSW_PORT_INIT) { 4542 if ((vswp->taskq_p == NULL) || 4543 (ddi_taskq_dispatch(vswp->taskq_p, 4544 vsw_process_ctrl_pkt, ctaskp, DDI_NOSLEEP) 4545 != DDI_SUCCESS)) { 4546 DERR(vswp, "%s: unable to dispatch task to taskq", 4547 __func__); 4548 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 4549 mutex_exit(&port->state_lock); 4550 vsw_restart_handshake(ldcp); 4551 return; 4552 } 4553 } else { 4554 DWARN(vswp, "%s: port %d detaching, not dispatching " 4555 "task", __func__, port->p_instance); 4556 } 4557 4558 mutex_exit(&port->state_lock); 4559 4560 D2(vswp, "%s: dispatched task to taskq for chan %d", __func__, 4561 ldcp->ldc_id); 4562 D1(vswp, "%s: exit", __func__); 4563 } 4564 4565 /* 4566 * Process a VIO ctrl message. Invoked from taskq. 4567 */ 4568 static void 4569 vsw_process_ctrl_pkt(void *arg) 4570 { 4571 vsw_ctrl_task_t *ctaskp = (vsw_ctrl_task_t *)arg; 4572 vsw_ldc_t *ldcp = ctaskp->ldcp; 4573 vsw_t *vswp = ldcp->ldc_vswp; 4574 vio_msg_tag_t tag; 4575 uint16_t env; 4576 4577 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4578 4579 bcopy(&ctaskp->pktp, &tag, sizeof (vio_msg_tag_t)); 4580 env = tag.vio_subtype_env; 4581 4582 /* stale pkt check */ 4583 mutex_enter(&ldcp->hss_lock); 4584 if (ctaskp->hss_id < ldcp->hss_id) { 4585 DWARN(vswp, "%s: discarding stale packet belonging to" 4586 " earlier (%ld) handshake session", __func__, 4587 ctaskp->hss_id); 4588 mutex_exit(&ldcp->hss_lock); 4589 return; 4590 } 4591 mutex_exit(&ldcp->hss_lock); 4592 4593 /* session id check */ 4594 if (ldcp->session_status & VSW_PEER_SESSION) { 4595 if (ldcp->peer_session != tag.vio_sid) { 4596 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 4597 __func__, ldcp->ldc_id, tag.vio_sid); 4598 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 4599 vsw_restart_handshake(ldcp); 4600 return; 4601 } 4602 } 4603 4604 /* 4605 * Switch on vio_subtype envelope, then let lower routines 4606 * decide if its an INFO, ACK or NACK packet. 4607 */ 4608 switch (env) { 4609 case VIO_VER_INFO: 4610 vsw_process_ctrl_ver_pkt(ldcp, &ctaskp->pktp); 4611 break; 4612 case VIO_DRING_REG: 4613 vsw_process_ctrl_dring_reg_pkt(ldcp, &ctaskp->pktp); 4614 break; 4615 case VIO_DRING_UNREG: 4616 vsw_process_ctrl_dring_unreg_pkt(ldcp, &ctaskp->pktp); 4617 break; 4618 case VIO_ATTR_INFO: 4619 vsw_process_ctrl_attr_pkt(ldcp, &ctaskp->pktp); 4620 break; 4621 case VNET_MCAST_INFO: 4622 vsw_process_ctrl_mcst_pkt(ldcp, &ctaskp->pktp); 4623 break; 4624 case VIO_RDX: 4625 vsw_process_ctrl_rdx_pkt(ldcp, &ctaskp->pktp); 4626 break; 4627 default: 4628 DERR(vswp, "%s : unknown vio_subtype_env (%x)\n", 4629 __func__, env); 4630 } 4631 4632 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 4633 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4634 } 4635 4636 /* 4637 * Version negotiation. We can end up here either because our peer 4638 * has responded to a handshake message we have sent it, or our peer 4639 * has initiated a handshake with us. If its the former then can only 4640 * be ACK or NACK, if its the later can only be INFO. 4641 * 4642 * If its an ACK we move to the next stage of the handshake, namely 4643 * attribute exchange. If its a NACK we see if we can specify another 4644 * version, if we can't we stop. 4645 * 4646 * If it is an INFO we reset all params associated with communication 4647 * in that direction over this channel (remember connection is 4648 * essentially 2 independent simplex channels). 4649 */ 4650 void 4651 vsw_process_ctrl_ver_pkt(vsw_ldc_t *ldcp, void *pkt) 4652 { 4653 vio_ver_msg_t *ver_pkt; 4654 vsw_t *vswp = ldcp->ldc_vswp; 4655 4656 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4657 4658 /* 4659 * We know this is a ctrl/version packet so 4660 * cast it into the correct structure. 4661 */ 4662 ver_pkt = (vio_ver_msg_t *)pkt; 4663 4664 switch (ver_pkt->tag.vio_subtype) { 4665 case VIO_SUBTYPE_INFO: 4666 D2(vswp, "vsw_process_ctrl_ver_pkt: VIO_SUBTYPE_INFO\n"); 4667 4668 /* 4669 * Record the session id, which we will use from now 4670 * until we see another VER_INFO msg. Even then the 4671 * session id in most cases will be unchanged, execpt 4672 * if channel was reset. 4673 */ 4674 if ((ldcp->session_status & VSW_PEER_SESSION) && 4675 (ldcp->peer_session != ver_pkt->tag.vio_sid)) { 4676 DERR(vswp, "%s: updating session id for chan %lld " 4677 "from %llx to %llx", __func__, ldcp->ldc_id, 4678 ldcp->peer_session, ver_pkt->tag.vio_sid); 4679 } 4680 4681 ldcp->peer_session = ver_pkt->tag.vio_sid; 4682 ldcp->session_status |= VSW_PEER_SESSION; 4683 4684 /* Legal message at this time ? */ 4685 if (vsw_check_flag(ldcp, INBOUND, VSW_VER_INFO_RECV)) 4686 return; 4687 4688 /* 4689 * First check the device class. Currently only expect 4690 * to be talking to a network device. In the future may 4691 * also talk to another switch. 4692 */ 4693 if (ver_pkt->dev_class != VDEV_NETWORK) { 4694 DERR(vswp, "%s: illegal device class %d", __func__, 4695 ver_pkt->dev_class); 4696 4697 ver_pkt->tag.vio_sid = ldcp->local_session; 4698 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 4699 4700 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 4701 4702 vsw_send_msg(ldcp, (void *)ver_pkt, 4703 sizeof (vio_ver_msg_t)); 4704 4705 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 4706 vsw_next_milestone(ldcp); 4707 return; 4708 } else { 4709 ldcp->dev_class = ver_pkt->dev_class; 4710 } 4711 4712 /* 4713 * Now check the version. 4714 */ 4715 if (vsw_supported_version(ver_pkt) == 0) { 4716 /* 4717 * Support this major version and possibly 4718 * adjusted minor version. 4719 */ 4720 4721 D2(vswp, "%s: accepted ver %d:%d", __func__, 4722 ver_pkt->ver_major, ver_pkt->ver_minor); 4723 4724 /* Store accepted values */ 4725 ldcp->lane_in.ver_major = ver_pkt->ver_major; 4726 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 4727 4728 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 4729 4730 ldcp->lane_in.lstate |= VSW_VER_ACK_SENT; 4731 } else { 4732 /* 4733 * NACK back with the next lower major/minor 4734 * pairing we support (if don't suuport any more 4735 * versions then they will be set to zero. 4736 */ 4737 4738 D2(vswp, "%s: replying with ver %d:%d", __func__, 4739 ver_pkt->ver_major, ver_pkt->ver_minor); 4740 4741 /* Store updated values */ 4742 ldcp->lane_in.ver_major = ver_pkt->ver_major; 4743 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 4744 4745 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 4746 4747 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 4748 } 4749 4750 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 4751 ver_pkt->tag.vio_sid = ldcp->local_session; 4752 vsw_send_msg(ldcp, (void *)ver_pkt, sizeof (vio_ver_msg_t)); 4753 4754 vsw_next_milestone(ldcp); 4755 break; 4756 4757 case VIO_SUBTYPE_ACK: 4758 D2(vswp, "%s: VIO_SUBTYPE_ACK\n", __func__); 4759 4760 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_ACK_RECV)) 4761 return; 4762 4763 /* Store updated values */ 4764 ldcp->lane_in.ver_major = ver_pkt->ver_major; 4765 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 4766 4767 4768 ldcp->lane_out.lstate |= VSW_VER_ACK_RECV; 4769 vsw_next_milestone(ldcp); 4770 4771 break; 4772 4773 case VIO_SUBTYPE_NACK: 4774 D2(vswp, "%s: VIO_SUBTYPE_NACK\n", __func__); 4775 4776 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_NACK_RECV)) 4777 return; 4778 4779 /* 4780 * If our peer sent us a NACK with the ver fields set to 4781 * zero then there is nothing more we can do. Otherwise see 4782 * if we support either the version suggested, or a lesser 4783 * one. 4784 */ 4785 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 4786 DERR(vswp, "%s: peer unable to negotiate any " 4787 "further.", __func__); 4788 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 4789 vsw_next_milestone(ldcp); 4790 return; 4791 } 4792 4793 /* 4794 * Check to see if we support this major version or 4795 * a lower one. If we don't then maj/min will be set 4796 * to zero. 4797 */ 4798 (void) vsw_supported_version(ver_pkt); 4799 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 4800 /* Nothing more we can do */ 4801 DERR(vswp, "%s: version negotiation failed.\n", 4802 __func__); 4803 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 4804 vsw_next_milestone(ldcp); 4805 } else { 4806 /* found a supported major version */ 4807 ldcp->lane_out.ver_major = ver_pkt->ver_major; 4808 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 4809 4810 D2(vswp, "%s: resending with updated values (%x, %x)", 4811 __func__, ver_pkt->ver_major, 4812 ver_pkt->ver_minor); 4813 4814 ldcp->lane_out.lstate |= VSW_VER_INFO_SENT; 4815 ver_pkt->tag.vio_sid = ldcp->local_session; 4816 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 4817 4818 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 4819 4820 vsw_send_msg(ldcp, (void *)ver_pkt, 4821 sizeof (vio_ver_msg_t)); 4822 4823 vsw_next_milestone(ldcp); 4824 4825 } 4826 break; 4827 4828 default: 4829 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 4830 ver_pkt->tag.vio_subtype); 4831 } 4832 4833 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 4834 } 4835 4836 /* 4837 * Process an attribute packet. We can end up here either because our peer 4838 * has ACK/NACK'ed back to an earlier ATTR msg we had sent it, or our 4839 * peer has sent us an attribute INFO message 4840 * 4841 * If its an ACK we then move to the next stage of the handshake which 4842 * is to send our descriptor ring info to our peer. If its a NACK then 4843 * there is nothing more we can (currently) do. 4844 * 4845 * If we get a valid/acceptable INFO packet (and we have already negotiated 4846 * a version) we ACK back and set channel state to ATTR_RECV, otherwise we 4847 * NACK back and reset channel state to INACTIV. 4848 * 4849 * FUTURE: in time we will probably negotiate over attributes, but for 4850 * the moment unacceptable attributes are regarded as a fatal error. 4851 * 4852 */ 4853 void 4854 vsw_process_ctrl_attr_pkt(vsw_ldc_t *ldcp, void *pkt) 4855 { 4856 vnet_attr_msg_t *attr_pkt; 4857 vsw_t *vswp = ldcp->ldc_vswp; 4858 vsw_port_t *port = ldcp->ldc_port; 4859 uint64_t macaddr = 0; 4860 int i; 4861 4862 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 4863 4864 /* 4865 * We know this is a ctrl/attr packet so 4866 * cast it into the correct structure. 4867 */ 4868 attr_pkt = (vnet_attr_msg_t *)pkt; 4869 4870 switch (attr_pkt->tag.vio_subtype) { 4871 case VIO_SUBTYPE_INFO: 4872 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 4873 4874 if (vsw_check_flag(ldcp, INBOUND, VSW_ATTR_INFO_RECV)) 4875 return; 4876 4877 /* 4878 * If the attributes are unacceptable then we NACK back. 4879 */ 4880 if (vsw_check_attr(attr_pkt, ldcp->ldc_port)) { 4881 4882 DERR(vswp, "%s (chan %d): invalid attributes", 4883 __func__, ldcp->ldc_id); 4884 4885 vsw_free_lane_resources(ldcp, INBOUND); 4886 4887 attr_pkt->tag.vio_sid = ldcp->local_session; 4888 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 4889 4890 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 4891 ldcp->lane_in.lstate |= VSW_ATTR_NACK_SENT; 4892 vsw_send_msg(ldcp, (void *)attr_pkt, 4893 sizeof (vnet_attr_msg_t)); 4894 4895 vsw_next_milestone(ldcp); 4896 return; 4897 } 4898 4899 /* 4900 * Otherwise store attributes for this lane and update 4901 * lane state. 4902 */ 4903 ldcp->lane_in.mtu = attr_pkt->mtu; 4904 ldcp->lane_in.addr = attr_pkt->addr; 4905 ldcp->lane_in.addr_type = attr_pkt->addr_type; 4906 ldcp->lane_in.xfer_mode = attr_pkt->xfer_mode; 4907 ldcp->lane_in.ack_freq = attr_pkt->ack_freq; 4908 4909 macaddr = ldcp->lane_in.addr; 4910 for (i = ETHERADDRL - 1; i >= 0; i--) { 4911 port->p_macaddr.ether_addr_octet[i] = macaddr & 0xFF; 4912 macaddr >>= 8; 4913 } 4914 4915 /* create the fdb entry for this port/mac address */ 4916 (void) vsw_add_fdb(vswp, port); 4917 4918 /* setup device specifc xmit routines */ 4919 mutex_enter(&port->tx_lock); 4920 if (ldcp->lane_in.xfer_mode == VIO_DRING_MODE) { 4921 D2(vswp, "%s: mode = VIO_DRING_MODE", __func__); 4922 port->transmit = vsw_dringsend; 4923 } else if (ldcp->lane_in.xfer_mode == VIO_DESC_MODE) { 4924 D2(vswp, "%s: mode = VIO_DESC_MODE", __func__); 4925 vsw_create_privring(ldcp); 4926 port->transmit = vsw_descrsend; 4927 } 4928 mutex_exit(&port->tx_lock); 4929 4930 attr_pkt->tag.vio_sid = ldcp->local_session; 4931 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 4932 4933 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 4934 4935 ldcp->lane_in.lstate |= VSW_ATTR_ACK_SENT; 4936 4937 vsw_send_msg(ldcp, (void *)attr_pkt, 4938 sizeof (vnet_attr_msg_t)); 4939 4940 vsw_next_milestone(ldcp); 4941 break; 4942 4943 case VIO_SUBTYPE_ACK: 4944 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 4945 4946 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_ACK_RECV)) 4947 return; 4948 4949 ldcp->lane_out.lstate |= VSW_ATTR_ACK_RECV; 4950 vsw_next_milestone(ldcp); 4951 break; 4952 4953 case VIO_SUBTYPE_NACK: 4954 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 4955 4956 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_NACK_RECV)) 4957 return; 4958 4959 ldcp->lane_out.lstate |= VSW_ATTR_NACK_RECV; 4960 vsw_next_milestone(ldcp); 4961 break; 4962 4963 default: 4964 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 4965 attr_pkt->tag.vio_subtype); 4966 } 4967 4968 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 4969 } 4970 4971 /* 4972 * Process a dring info packet. We can end up here either because our peer 4973 * has ACK/NACK'ed back to an earlier DRING msg we had sent it, or our 4974 * peer has sent us a dring INFO message. 4975 * 4976 * If we get a valid/acceptable INFO packet (and we have already negotiated 4977 * a version) we ACK back and update the lane state, otherwise we NACK back. 4978 * 4979 * FUTURE: nothing to stop client from sending us info on multiple dring's 4980 * but for the moment we will just use the first one we are given. 4981 * 4982 */ 4983 void 4984 vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *ldcp, void *pkt) 4985 { 4986 vio_dring_reg_msg_t *dring_pkt; 4987 vsw_t *vswp = ldcp->ldc_vswp; 4988 ldc_mem_info_t minfo; 4989 dring_info_t *dp, *dbp; 4990 int dring_found = 0; 4991 4992 /* 4993 * We know this is a ctrl/dring packet so 4994 * cast it into the correct structure. 4995 */ 4996 dring_pkt = (vio_dring_reg_msg_t *)pkt; 4997 4998 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 4999 5000 switch (dring_pkt->tag.vio_subtype) { 5001 case VIO_SUBTYPE_INFO: 5002 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 5003 5004 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 5005 return; 5006 5007 /* 5008 * If the dring params are unacceptable then we NACK back. 5009 */ 5010 if (vsw_check_dring_info(dring_pkt)) { 5011 5012 DERR(vswp, "%s (%lld): invalid dring info", 5013 __func__, ldcp->ldc_id); 5014 5015 vsw_free_lane_resources(ldcp, INBOUND); 5016 5017 dring_pkt->tag.vio_sid = ldcp->local_session; 5018 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 5019 5020 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 5021 5022 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 5023 5024 vsw_send_msg(ldcp, (void *)dring_pkt, 5025 sizeof (vio_dring_reg_msg_t)); 5026 5027 vsw_next_milestone(ldcp); 5028 return; 5029 } 5030 5031 /* 5032 * Otherwise, attempt to map in the dring using the 5033 * cookie. If that succeeds we send back a unique dring 5034 * identifier that the sending side will use in future 5035 * to refer to this descriptor ring. 5036 */ 5037 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 5038 5039 dp->num_descriptors = dring_pkt->num_descriptors; 5040 dp->descriptor_size = dring_pkt->descriptor_size; 5041 dp->options = dring_pkt->options; 5042 dp->ncookies = dring_pkt->ncookies; 5043 5044 /* 5045 * Note: should only get one cookie. Enforced in 5046 * the ldc layer. 5047 */ 5048 bcopy(&dring_pkt->cookie[0], &dp->cookie[0], 5049 sizeof (ldc_mem_cookie_t)); 5050 5051 D2(vswp, "%s: num_desc %ld : desc_size %ld", __func__, 5052 dp->num_descriptors, dp->descriptor_size); 5053 D2(vswp, "%s: options 0x%lx: ncookies %ld", __func__, 5054 dp->options, dp->ncookies); 5055 5056 if ((ldc_mem_dring_map(ldcp->ldc_handle, &dp->cookie[0], 5057 dp->ncookies, dp->num_descriptors, 5058 dp->descriptor_size, LDC_SHADOW_MAP, 5059 &(dp->handle))) != 0) { 5060 5061 DERR(vswp, "%s: dring_map failed\n", __func__); 5062 5063 kmem_free(dp, sizeof (dring_info_t)); 5064 vsw_free_lane_resources(ldcp, INBOUND); 5065 5066 dring_pkt->tag.vio_sid = ldcp->local_session; 5067 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 5068 5069 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 5070 5071 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 5072 vsw_send_msg(ldcp, (void *)dring_pkt, 5073 sizeof (vio_dring_reg_msg_t)); 5074 5075 vsw_next_milestone(ldcp); 5076 return; 5077 } 5078 5079 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 5080 5081 DERR(vswp, "%s: dring_addr failed\n", __func__); 5082 5083 kmem_free(dp, sizeof (dring_info_t)); 5084 vsw_free_lane_resources(ldcp, INBOUND); 5085 5086 dring_pkt->tag.vio_sid = ldcp->local_session; 5087 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 5088 5089 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 5090 5091 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 5092 vsw_send_msg(ldcp, (void *)dring_pkt, 5093 sizeof (vio_dring_reg_msg_t)); 5094 5095 vsw_next_milestone(ldcp); 5096 return; 5097 } else { 5098 /* store the address of the pub part of ring */ 5099 dp->pub_addr = minfo.vaddr; 5100 } 5101 5102 /* no private section as we are importing */ 5103 dp->priv_addr = NULL; 5104 5105 /* 5106 * Using simple mono increasing int for ident at 5107 * the moment. 5108 */ 5109 dp->ident = ldcp->next_ident; 5110 ldcp->next_ident++; 5111 5112 dp->end_idx = 0; 5113 dp->next = NULL; 5114 5115 /* 5116 * Link it onto the end of the list of drings 5117 * for this lane. 5118 */ 5119 if (ldcp->lane_in.dringp == NULL) { 5120 D2(vswp, "%s: adding first INBOUND dring", __func__); 5121 ldcp->lane_in.dringp = dp; 5122 } else { 5123 dbp = ldcp->lane_in.dringp; 5124 5125 while (dbp->next != NULL) 5126 dbp = dbp->next; 5127 5128 dbp->next = dp; 5129 } 5130 5131 /* acknowledge it */ 5132 dring_pkt->tag.vio_sid = ldcp->local_session; 5133 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 5134 dring_pkt->dring_ident = dp->ident; 5135 5136 vsw_send_msg(ldcp, (void *)dring_pkt, 5137 sizeof (vio_dring_reg_msg_t)); 5138 5139 ldcp->lane_in.lstate |= VSW_DRING_ACK_SENT; 5140 vsw_next_milestone(ldcp); 5141 break; 5142 5143 case VIO_SUBTYPE_ACK: 5144 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 5145 5146 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_ACK_RECV)) 5147 return; 5148 5149 /* 5150 * Peer is acknowledging our dring info and will have 5151 * sent us a dring identifier which we will use to 5152 * refer to this ring w.r.t. our peer. 5153 */ 5154 dp = ldcp->lane_out.dringp; 5155 if (dp != NULL) { 5156 /* 5157 * Find the ring this ident should be associated 5158 * with. 5159 */ 5160 if (vsw_dring_match(dp, dring_pkt)) { 5161 dring_found = 1; 5162 5163 } else while (dp != NULL) { 5164 if (vsw_dring_match(dp, dring_pkt)) { 5165 dring_found = 1; 5166 break; 5167 } 5168 dp = dp->next; 5169 } 5170 5171 if (dring_found == 0) { 5172 DERR(NULL, "%s: unrecognised ring cookie", 5173 __func__); 5174 vsw_restart_handshake(ldcp); 5175 return; 5176 } 5177 5178 } else { 5179 DERR(vswp, "%s: DRING ACK received but no drings " 5180 "allocated", __func__); 5181 vsw_restart_handshake(ldcp); 5182 return; 5183 } 5184 5185 /* store ident */ 5186 dp->ident = dring_pkt->dring_ident; 5187 ldcp->lane_out.lstate |= VSW_DRING_ACK_RECV; 5188 vsw_next_milestone(ldcp); 5189 break; 5190 5191 case VIO_SUBTYPE_NACK: 5192 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 5193 5194 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_NACK_RECV)) 5195 return; 5196 5197 ldcp->lane_out.lstate |= VSW_DRING_NACK_RECV; 5198 vsw_next_milestone(ldcp); 5199 break; 5200 5201 default: 5202 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 5203 dring_pkt->tag.vio_subtype); 5204 } 5205 5206 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 5207 } 5208 5209 /* 5210 * Process a request from peer to unregister a dring. 5211 * 5212 * For the moment we just restart the handshake if our 5213 * peer endpoint attempts to unregister a dring. 5214 */ 5215 void 5216 vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *ldcp, void *pkt) 5217 { 5218 vsw_t *vswp = ldcp->ldc_vswp; 5219 vio_dring_unreg_msg_t *dring_pkt; 5220 5221 /* 5222 * We know this is a ctrl/dring packet so 5223 * cast it into the correct structure. 5224 */ 5225 dring_pkt = (vio_dring_unreg_msg_t *)pkt; 5226 5227 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 5228 5229 switch (dring_pkt->tag.vio_subtype) { 5230 case VIO_SUBTYPE_INFO: 5231 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 5232 5233 DWARN(vswp, "%s: restarting handshake..", __func__); 5234 vsw_restart_handshake(ldcp); 5235 break; 5236 5237 case VIO_SUBTYPE_ACK: 5238 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 5239 5240 DWARN(vswp, "%s: restarting handshake..", __func__); 5241 vsw_restart_handshake(ldcp); 5242 break; 5243 5244 case VIO_SUBTYPE_NACK: 5245 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 5246 5247 DWARN(vswp, "%s: restarting handshake..", __func__); 5248 vsw_restart_handshake(ldcp); 5249 break; 5250 5251 default: 5252 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 5253 dring_pkt->tag.vio_subtype); 5254 vsw_restart_handshake(ldcp); 5255 } 5256 5257 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 5258 } 5259 5260 #define SND_MCST_NACK(ldcp, pkt) \ 5261 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 5262 pkt->tag.vio_sid = ldcp->local_session; \ 5263 vsw_send_msg(ldcp, (void *)pkt, sizeof (vnet_mcast_msg_t)); 5264 5265 /* 5266 * Process a multicast request from a vnet. 5267 * 5268 * Vnet's specify a multicast address that they are interested in. This 5269 * address is used as a key into the hash table which forms the multicast 5270 * forwarding database (mFDB). 5271 * 5272 * The table keys are the multicast addresses, while the table entries 5273 * are pointers to lists of ports which wish to receive packets for the 5274 * specified multicast address. 5275 * 5276 * When a multicast packet is being switched we use the address as a key 5277 * into the hash table, and then walk the appropriate port list forwarding 5278 * the pkt to each port in turn. 5279 * 5280 * If a vnet is no longer interested in a particular multicast grouping 5281 * we simply find the correct location in the hash table and then delete 5282 * the relevant port from the port list. 5283 * 5284 * To deal with the case whereby a port is being deleted without first 5285 * removing itself from the lists in the hash table, we maintain a list 5286 * of multicast addresses the port has registered an interest in, within 5287 * the port structure itself. We then simply walk that list of addresses 5288 * using them as keys into the hash table and remove the port from the 5289 * appropriate lists. 5290 */ 5291 static void 5292 vsw_process_ctrl_mcst_pkt(vsw_ldc_t *ldcp, void *pkt) 5293 { 5294 vnet_mcast_msg_t *mcst_pkt; 5295 vsw_port_t *port = ldcp->ldc_port; 5296 vsw_t *vswp = ldcp->ldc_vswp; 5297 int i; 5298 5299 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 5300 5301 /* 5302 * We know this is a ctrl/mcast packet so 5303 * cast it into the correct structure. 5304 */ 5305 mcst_pkt = (vnet_mcast_msg_t *)pkt; 5306 5307 switch (mcst_pkt->tag.vio_subtype) { 5308 case VIO_SUBTYPE_INFO: 5309 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 5310 5311 /* 5312 * Check if in correct state to receive a multicast 5313 * message (i.e. handshake complete). If not reset 5314 * the handshake. 5315 */ 5316 if (vsw_check_flag(ldcp, INBOUND, VSW_MCST_INFO_RECV)) 5317 return; 5318 5319 /* 5320 * Before attempting to add or remove address check 5321 * that they are valid multicast addresses. 5322 * If not, then NACK back. 5323 */ 5324 for (i = 0; i < mcst_pkt->count; i++) { 5325 if ((mcst_pkt->mca[i].ether_addr_octet[0] & 01) != 1) { 5326 DERR(vswp, "%s: invalid multicast address", 5327 __func__); 5328 SND_MCST_NACK(ldcp, mcst_pkt); 5329 return; 5330 } 5331 } 5332 5333 /* 5334 * Now add/remove the addresses. If this fails we 5335 * NACK back. 5336 */ 5337 if (vsw_add_rem_mcst(mcst_pkt, port) != 0) { 5338 SND_MCST_NACK(ldcp, mcst_pkt); 5339 return; 5340 } 5341 5342 mcst_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 5343 mcst_pkt->tag.vio_sid = ldcp->local_session; 5344 5345 DUMP_TAG_PTR((vio_msg_tag_t *)mcst_pkt); 5346 5347 vsw_send_msg(ldcp, (void *)mcst_pkt, 5348 sizeof (vnet_mcast_msg_t)); 5349 break; 5350 5351 case VIO_SUBTYPE_ACK: 5352 DWARN(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 5353 5354 /* 5355 * We shouldn't ever get a multicast ACK message as 5356 * at the moment we never request multicast addresses 5357 * to be set on some other device. This may change in 5358 * the future if we have cascading switches. 5359 */ 5360 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_ACK_RECV)) 5361 return; 5362 5363 /* Do nothing */ 5364 break; 5365 5366 case VIO_SUBTYPE_NACK: 5367 DWARN(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 5368 5369 /* 5370 * We shouldn't get a multicast NACK packet for the 5371 * same reasons as we shouldn't get a ACK packet. 5372 */ 5373 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_NACK_RECV)) 5374 return; 5375 5376 /* Do nothing */ 5377 break; 5378 5379 default: 5380 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 5381 mcst_pkt->tag.vio_subtype); 5382 } 5383 5384 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 5385 } 5386 5387 static void 5388 vsw_process_ctrl_rdx_pkt(vsw_ldc_t *ldcp, void *pkt) 5389 { 5390 vio_rdx_msg_t *rdx_pkt; 5391 vsw_t *vswp = ldcp->ldc_vswp; 5392 5393 /* 5394 * We know this is a ctrl/rdx packet so 5395 * cast it into the correct structure. 5396 */ 5397 rdx_pkt = (vio_rdx_msg_t *)pkt; 5398 5399 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 5400 5401 switch (rdx_pkt->tag.vio_subtype) { 5402 case VIO_SUBTYPE_INFO: 5403 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 5404 5405 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_INFO_RECV)) 5406 return; 5407 5408 rdx_pkt->tag.vio_sid = ldcp->local_session; 5409 rdx_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 5410 5411 DUMP_TAG_PTR((vio_msg_tag_t *)rdx_pkt); 5412 5413 ldcp->lane_in.lstate |= VSW_RDX_ACK_SENT; 5414 5415 vsw_send_msg(ldcp, (void *)rdx_pkt, 5416 sizeof (vio_rdx_msg_t)); 5417 5418 vsw_next_milestone(ldcp); 5419 break; 5420 5421 case VIO_SUBTYPE_ACK: 5422 /* 5423 * Should be handled in-band by callback handler. 5424 */ 5425 DERR(vswp, "%s: Unexpected VIO_SUBTYPE_ACK", __func__); 5426 vsw_restart_handshake(ldcp); 5427 break; 5428 5429 case VIO_SUBTYPE_NACK: 5430 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 5431 5432 if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_NACK_RECV)) 5433 return; 5434 5435 ldcp->lane_out.lstate |= VSW_RDX_NACK_RECV; 5436 vsw_next_milestone(ldcp); 5437 break; 5438 5439 default: 5440 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 5441 rdx_pkt->tag.vio_subtype); 5442 } 5443 5444 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 5445 } 5446 5447 static void 5448 vsw_process_data_pkt(vsw_ldc_t *ldcp, void *dpkt, vio_msg_tag_t tag) 5449 { 5450 uint16_t env = tag.vio_subtype_env; 5451 vsw_t *vswp = ldcp->ldc_vswp; 5452 5453 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 5454 5455 /* session id check */ 5456 if (ldcp->session_status & VSW_PEER_SESSION) { 5457 if (ldcp->peer_session != tag.vio_sid) { 5458 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 5459 __func__, ldcp->ldc_id, tag.vio_sid); 5460 vsw_restart_handshake(ldcp); 5461 return; 5462 } 5463 } 5464 5465 /* 5466 * It is an error for us to be getting data packets 5467 * before the handshake has completed. 5468 */ 5469 if (ldcp->hphase != VSW_MILESTONE4) { 5470 DERR(vswp, "%s: got data packet before handshake complete " 5471 "hphase %d (%x: %x)", __func__, ldcp->hphase, 5472 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 5473 DUMP_FLAGS(ldcp->lane_in.lstate); 5474 DUMP_FLAGS(ldcp->lane_out.lstate); 5475 vsw_restart_handshake(ldcp); 5476 return; 5477 } 5478 5479 /* 5480 * Switch on vio_subtype envelope, then let lower routines 5481 * decide if its an INFO, ACK or NACK packet. 5482 */ 5483 if (env == VIO_DRING_DATA) { 5484 vsw_process_data_dring_pkt(ldcp, dpkt); 5485 } else if (env == VIO_PKT_DATA) { 5486 vsw_process_data_raw_pkt(ldcp, dpkt); 5487 } else if (env == VIO_DESC_DATA) { 5488 vsw_process_data_ibnd_pkt(ldcp, dpkt); 5489 } else { 5490 DERR(vswp, "%s : unknown vio_subtype_env (%x)\n", 5491 __func__, env); 5492 } 5493 5494 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 5495 } 5496 5497 #define SND_DRING_NACK(ldcp, pkt) \ 5498 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 5499 pkt->tag.vio_sid = ldcp->local_session; \ 5500 vsw_send_msg(ldcp, (void *)pkt, sizeof (vio_dring_msg_t)); 5501 5502 static void 5503 vsw_process_data_dring_pkt(vsw_ldc_t *ldcp, void *dpkt) 5504 { 5505 vio_dring_msg_t *dring_pkt; 5506 vnet_public_desc_t *pub_addr = NULL; 5507 vsw_private_desc_t *priv_addr = NULL; 5508 dring_info_t *dp = NULL; 5509 vsw_t *vswp = ldcp->ldc_vswp; 5510 mblk_t *mp = NULL; 5511 mblk_t *bp = NULL; 5512 mblk_t *bpt = NULL; 5513 size_t nbytes = 0; 5514 size_t off = 0; 5515 uint64_t ncookies = 0; 5516 uint64_t chain = 0; 5517 uint64_t j, len; 5518 uint32_t pos, start, datalen; 5519 uint32_t range_start, range_end; 5520 int32_t end, num, cnt = 0; 5521 int i, rv; 5522 boolean_t ack_needed = B_FALSE; 5523 boolean_t prev_desc_ack = B_FALSE; 5524 int read_attempts = 0; 5525 5526 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 5527 5528 /* 5529 * We know this is a data/dring packet so 5530 * cast it into the correct structure. 5531 */ 5532 dring_pkt = (vio_dring_msg_t *)dpkt; 5533 5534 /* 5535 * Switch on the vio_subtype. If its INFO then we need to 5536 * process the data. If its an ACK we need to make sure 5537 * it makes sense (i.e did we send an earlier data/info), 5538 * and if its a NACK then we maybe attempt a retry. 5539 */ 5540 switch (dring_pkt->tag.vio_subtype) { 5541 case VIO_SUBTYPE_INFO: 5542 D2(vswp, "%s(%lld): VIO_SUBTYPE_INFO", __func__, ldcp->ldc_id); 5543 5544 if ((dp = vsw_ident2dring(&ldcp->lane_in, 5545 dring_pkt->dring_ident)) == NULL) { 5546 5547 DERR(vswp, "%s(%lld): unable to find dring from " 5548 "ident 0x%llx", __func__, ldcp->ldc_id, 5549 dring_pkt->dring_ident); 5550 5551 SND_DRING_NACK(ldcp, dring_pkt); 5552 return; 5553 } 5554 5555 start = pos = dring_pkt->start_idx; 5556 end = dring_pkt->end_idx; 5557 len = dp->num_descriptors; 5558 5559 range_start = range_end = pos; 5560 5561 D2(vswp, "%s(%lld): start index %ld : end %ld\n", 5562 __func__, ldcp->ldc_id, start, end); 5563 5564 if (end == -1) { 5565 num = -1; 5566 } else if (end >= 0) { 5567 num = end >= pos ? 5568 end - pos + 1: (len - pos + 1) + end; 5569 5570 /* basic sanity check */ 5571 if (end > len) { 5572 DERR(vswp, "%s(%lld): endpoint %lld outside " 5573 "ring length %lld", __func__, 5574 ldcp->ldc_id, end, len); 5575 5576 SND_DRING_NACK(ldcp, dring_pkt); 5577 return; 5578 } 5579 } else { 5580 DERR(vswp, "%s(%lld): invalid endpoint %lld", 5581 __func__, ldcp->ldc_id, end); 5582 SND_DRING_NACK(ldcp, dring_pkt); 5583 return; 5584 } 5585 5586 while (cnt != num) { 5587 vsw_recheck_desc: 5588 if ((rv = ldc_mem_dring_acquire(dp->handle, 5589 pos, pos)) != 0) { 5590 DERR(vswp, "%s(%lld): unable to acquire " 5591 "descriptor at pos %d: err %d", 5592 __func__, pos, ldcp->ldc_id, rv); 5593 SND_DRING_NACK(ldcp, dring_pkt); 5594 return; 5595 } 5596 5597 pub_addr = (vnet_public_desc_t *)dp->pub_addr + pos; 5598 5599 /* 5600 * When given a bounded range of descriptors 5601 * to process, its an error to hit a descriptor 5602 * which is not ready. In the non-bounded case 5603 * (end_idx == -1) this simply indicates we have 5604 * reached the end of the current active range. 5605 */ 5606 if (pub_addr->hdr.dstate != VIO_DESC_READY) { 5607 /* unbound - no error */ 5608 if (end == -1) { 5609 if (read_attempts == vsw_read_attempts) 5610 break; 5611 5612 delay(drv_usectohz(vsw_desc_delay)); 5613 read_attempts++; 5614 goto vsw_recheck_desc; 5615 } 5616 5617 /* bounded - error - so NACK back */ 5618 DERR(vswp, "%s(%lld): descriptor not READY " 5619 "(%d)", __func__, ldcp->ldc_id, 5620 pub_addr->hdr.dstate); 5621 SND_DRING_NACK(ldcp, dring_pkt); 5622 return; 5623 } 5624 5625 DTRACE_PROBE1(read_attempts, int, read_attempts); 5626 5627 range_end = pos; 5628 5629 /* 5630 * If we ACK'd the previous descriptor then now 5631 * record the new range start position for later 5632 * ACK's. 5633 */ 5634 if (prev_desc_ack) { 5635 range_start = pos; 5636 5637 D2(vswp, "%s(%lld): updating range start " 5638 "to be %d", __func__, ldcp->ldc_id, 5639 range_start); 5640 5641 prev_desc_ack = B_FALSE; 5642 } 5643 5644 /* 5645 * Data is padded to align on 8 byte boundary, 5646 * datalen is actual data length, i.e. minus that 5647 * padding. 5648 */ 5649 datalen = pub_addr->nbytes; 5650 5651 /* 5652 * Does peer wish us to ACK when we have finished 5653 * with this descriptor ? 5654 */ 5655 if (pub_addr->hdr.ack) 5656 ack_needed = B_TRUE; 5657 5658 D2(vswp, "%s(%lld): processing desc %lld at pos" 5659 " 0x%llx : dstate 0x%lx : datalen 0x%lx", 5660 __func__, ldcp->ldc_id, pos, pub_addr, 5661 pub_addr->hdr.dstate, datalen); 5662 5663 /* 5664 * Mark that we are starting to process descriptor. 5665 */ 5666 pub_addr->hdr.dstate = VIO_DESC_ACCEPTED; 5667 5668 mp = vio_allocb(ldcp->rxh); 5669 if (mp == NULL) { 5670 /* 5671 * No free receive buffers available, so 5672 * fallback onto allocb(9F). Make sure that 5673 * we get a data buffer which is a multiple 5674 * of 8 as this is required by ldc_mem_copy. 5675 */ 5676 DTRACE_PROBE(allocb); 5677 mp = allocb(datalen + VNET_IPALIGN + 8, 5678 BPRI_MED); 5679 } 5680 5681 /* 5682 * Ensure that we ask ldc for an aligned 5683 * number of bytes. 5684 */ 5685 nbytes = datalen + VNET_IPALIGN; 5686 if (nbytes & 0x7) { 5687 off = 8 - (nbytes & 0x7); 5688 nbytes += off; 5689 } 5690 5691 ncookies = pub_addr->ncookies; 5692 rv = ldc_mem_copy(ldcp->ldc_handle, 5693 (caddr_t)mp->b_rptr, 0, &nbytes, 5694 pub_addr->memcookie, ncookies, 5695 LDC_COPY_IN); 5696 5697 if (rv != 0) { 5698 DERR(vswp, "%s(%d): unable to copy in " 5699 "data from %d cookies in desc %d" 5700 " (rv %d)", __func__, ldcp->ldc_id, 5701 ncookies, pos, rv); 5702 freemsg(mp); 5703 5704 pub_addr->hdr.dstate = VIO_DESC_DONE; 5705 (void) ldc_mem_dring_release(dp->handle, 5706 pos, pos); 5707 break; 5708 } else { 5709 D2(vswp, "%s(%d): copied in %ld bytes" 5710 " using %d cookies", __func__, 5711 ldcp->ldc_id, nbytes, ncookies); 5712 } 5713 5714 /* adjust the read pointer to skip over the padding */ 5715 mp->b_rptr += VNET_IPALIGN; 5716 5717 /* point to the actual end of data */ 5718 mp->b_wptr = mp->b_rptr + datalen; 5719 5720 /* build a chain of received packets */ 5721 if (bp == NULL) { 5722 /* first pkt */ 5723 bp = mp; 5724 bp->b_next = bp->b_prev = NULL; 5725 bpt = bp; 5726 chain = 1; 5727 } else { 5728 mp->b_next = NULL; 5729 mp->b_prev = bpt; 5730 bpt->b_next = mp; 5731 bpt = mp; 5732 chain++; 5733 } 5734 5735 /* mark we are finished with this descriptor */ 5736 pub_addr->hdr.dstate = VIO_DESC_DONE; 5737 5738 (void) ldc_mem_dring_release(dp->handle, pos, pos); 5739 5740 /* 5741 * Send an ACK back to peer if requested. 5742 */ 5743 if (ack_needed) { 5744 ack_needed = B_FALSE; 5745 5746 dring_pkt->start_idx = range_start; 5747 dring_pkt->end_idx = range_end; 5748 5749 DERR(vswp, "%s(%lld): processed %d %d, ACK" 5750 " requested", __func__, ldcp->ldc_id, 5751 dring_pkt->start_idx, 5752 dring_pkt->end_idx); 5753 5754 dring_pkt->dring_process_state = VIO_DP_ACTIVE; 5755 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 5756 dring_pkt->tag.vio_sid = ldcp->local_session; 5757 vsw_send_msg(ldcp, (void *)dring_pkt, 5758 sizeof (vio_dring_msg_t)); 5759 5760 prev_desc_ack = B_TRUE; 5761 range_start = pos; 5762 } 5763 5764 /* next descriptor */ 5765 pos = (pos + 1) % len; 5766 cnt++; 5767 5768 /* 5769 * Break out of loop here and stop processing to 5770 * allow some other network device (or disk) to 5771 * get access to the cpu. 5772 */ 5773 /* send the chain of packets to be switched */ 5774 if (chain > vsw_chain_len) { 5775 D3(vswp, "%s(%lld): switching chain of %d " 5776 "msgs", __func__, ldcp->ldc_id, chain); 5777 vswp->vsw_switch_frame(vswp, bp, VSW_VNETPORT, 5778 ldcp->ldc_port, NULL); 5779 bp = NULL; 5780 break; 5781 } 5782 } 5783 5784 /* send the chain of packets to be switched */ 5785 if (bp != NULL) { 5786 D3(vswp, "%s(%lld): switching chain of %d msgs", 5787 __func__, ldcp->ldc_id, chain); 5788 vswp->vsw_switch_frame(vswp, bp, VSW_VNETPORT, 5789 ldcp->ldc_port, NULL); 5790 } 5791 5792 DTRACE_PROBE1(msg_cnt, int, cnt); 5793 5794 /* 5795 * We are now finished so ACK back with the state 5796 * set to STOPPING so our peer knows we are finished 5797 */ 5798 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 5799 dring_pkt->tag.vio_sid = ldcp->local_session; 5800 5801 dring_pkt->dring_process_state = VIO_DP_STOPPED; 5802 5803 DTRACE_PROBE(stop_process_sent); 5804 5805 /* 5806 * We have not processed any more descriptors beyond 5807 * the last one we ACK'd. 5808 */ 5809 if (prev_desc_ack) 5810 range_start = range_end; 5811 5812 dring_pkt->start_idx = range_start; 5813 dring_pkt->end_idx = range_end; 5814 5815 D2(vswp, "%s(%lld) processed : %d : %d, now stopping", 5816 __func__, ldcp->ldc_id, dring_pkt->start_idx, 5817 dring_pkt->end_idx); 5818 5819 vsw_send_msg(ldcp, (void *)dring_pkt, 5820 sizeof (vio_dring_msg_t)); 5821 break; 5822 5823 case VIO_SUBTYPE_ACK: 5824 D2(vswp, "%s(%lld): VIO_SUBTYPE_ACK", __func__, ldcp->ldc_id); 5825 /* 5826 * Verify that the relevant descriptors are all 5827 * marked as DONE 5828 */ 5829 if ((dp = vsw_ident2dring(&ldcp->lane_out, 5830 dring_pkt->dring_ident)) == NULL) { 5831 DERR(vswp, "%s: unknown ident in ACK", __func__); 5832 return; 5833 } 5834 5835 pub_addr = (vnet_public_desc_t *)dp->pub_addr; 5836 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 5837 5838 start = end = 0; 5839 start = dring_pkt->start_idx; 5840 end = dring_pkt->end_idx; 5841 len = dp->num_descriptors; 5842 5843 j = num = 0; 5844 /* calculate # descriptors taking into a/c wrap around */ 5845 num = end >= start ? end - start + 1: (len - start + 1) + end; 5846 5847 D2(vswp, "%s(%lld): start index %ld : end %ld : num %ld\n", 5848 __func__, ldcp->ldc_id, start, end, num); 5849 5850 mutex_enter(&dp->dlock); 5851 dp->last_ack_recv = end; 5852 mutex_exit(&dp->dlock); 5853 5854 for (i = start; j < num; i = (i + 1) % len, j++) { 5855 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 5856 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 5857 5858 /* 5859 * If the last descriptor in a range has the ACK 5860 * bit set then we will get two messages from our 5861 * peer relating to it. The normal ACK msg and then 5862 * a subsequent STOP msg. The first message will have 5863 * resulted in the descriptor being reclaimed and 5864 * its state set to FREE so when we encounter a non 5865 * DONE descriptor we need to check to see if its 5866 * because we have just reclaimed it. 5867 */ 5868 mutex_enter(&priv_addr->dstate_lock); 5869 if (pub_addr->hdr.dstate == VIO_DESC_DONE) { 5870 /* clear all the fields */ 5871 bzero(priv_addr->datap, priv_addr->datalen); 5872 priv_addr->datalen = 0; 5873 5874 pub_addr->hdr.dstate = VIO_DESC_FREE; 5875 pub_addr->hdr.ack = 0; 5876 5877 priv_addr->dstate = VIO_DESC_FREE; 5878 mutex_exit(&priv_addr->dstate_lock); 5879 5880 D3(vswp, "clearing descp %d : pub state " 5881 "0x%llx : priv state 0x%llx", i, 5882 pub_addr->hdr.dstate, 5883 priv_addr->dstate); 5884 5885 } else { 5886 mutex_exit(&priv_addr->dstate_lock); 5887 5888 if (dring_pkt->dring_process_state != 5889 VIO_DP_STOPPED) { 5890 DERR(vswp, "%s: descriptor %lld at pos " 5891 " 0x%llx not DONE (0x%lx)\n", 5892 __func__, i, pub_addr, 5893 pub_addr->hdr.dstate); 5894 return; 5895 } 5896 } 5897 } 5898 5899 /* 5900 * If our peer is stopping processing descriptors then 5901 * we check to make sure it has processed all the descriptors 5902 * we have updated. If not then we send it a new message 5903 * to prompt it to restart. 5904 */ 5905 if (dring_pkt->dring_process_state == VIO_DP_STOPPED) { 5906 DTRACE_PROBE(stop_process_recv); 5907 D2(vswp, "%s(%lld): got stopping msg : %d : %d", 5908 __func__, ldcp->ldc_id, dring_pkt->start_idx, 5909 dring_pkt->end_idx); 5910 5911 /* 5912 * Check next descriptor in public section of ring. 5913 * If its marked as READY then we need to prompt our 5914 * peer to start processing the ring again. 5915 */ 5916 i = (end + 1) % len; 5917 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 5918 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 5919 5920 /* 5921 * Hold the restart lock across all of this to 5922 * make sure that its not possible for us to 5923 * decide that a msg needs to be sent in the future 5924 * but the sending code having already checked is 5925 * about to exit. 5926 */ 5927 mutex_enter(&dp->restart_lock); 5928 mutex_enter(&priv_addr->dstate_lock); 5929 if (pub_addr->hdr.dstate == VIO_DESC_READY) { 5930 5931 mutex_exit(&priv_addr->dstate_lock); 5932 5933 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 5934 dring_pkt->tag.vio_sid = ldcp->local_session; 5935 5936 mutex_enter(&ldcp->lane_out.seq_lock); 5937 dring_pkt->seq_num = ldcp->lane_out.seq_num++; 5938 mutex_exit(&ldcp->lane_out.seq_lock); 5939 5940 dring_pkt->start_idx = (end + 1) % len; 5941 dring_pkt->end_idx = -1; 5942 5943 D2(vswp, "%s(%lld) : sending restart msg:" 5944 " %d : %d", __func__, ldcp->ldc_id, 5945 dring_pkt->start_idx, 5946 dring_pkt->end_idx); 5947 5948 vsw_send_msg(ldcp, (void *)dring_pkt, 5949 sizeof (vio_dring_msg_t)); 5950 } else { 5951 mutex_exit(&priv_addr->dstate_lock); 5952 dp->restart_reqd = B_TRUE; 5953 } 5954 mutex_exit(&dp->restart_lock); 5955 } 5956 break; 5957 5958 case VIO_SUBTYPE_NACK: 5959 DWARN(vswp, "%s(%lld): VIO_SUBTYPE_NACK", 5960 __func__, ldcp->ldc_id); 5961 /* 5962 * Something is badly wrong if we are getting NACK's 5963 * for our data pkts. So reset the channel. 5964 */ 5965 vsw_restart_handshake(ldcp); 5966 5967 break; 5968 5969 default: 5970 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 5971 ldcp->ldc_id, dring_pkt->tag.vio_subtype); 5972 } 5973 5974 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 5975 } 5976 5977 /* 5978 * VIO_PKT_DATA (a.k.a raw data mode ) 5979 * 5980 * Note - currently not supported. Do nothing. 5981 */ 5982 static void 5983 vsw_process_data_raw_pkt(vsw_ldc_t *ldcp, void *dpkt) 5984 { 5985 _NOTE(ARGUNUSED(dpkt)) 5986 5987 D1(NULL, "%s (%lld): enter\n", __func__, ldcp->ldc_id); 5988 5989 DERR(NULL, "%s (%lld): currently not supported", 5990 __func__, ldcp->ldc_id); 5991 5992 D1(NULL, "%s (%lld): exit\n", __func__, ldcp->ldc_id); 5993 } 5994 5995 #define SND_IBND_DESC_NACK(ldcp, pkt) \ 5996 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 5997 pkt->tag.vio_sid = ldcp->local_session; \ 5998 vsw_send_msg(ldcp, (void *)pkt, sizeof (vio_ibnd_desc_t)); 5999 6000 /* 6001 * Process an in-band descriptor message (most likely from 6002 * OBP). 6003 */ 6004 static void 6005 vsw_process_data_ibnd_pkt(vsw_ldc_t *ldcp, void *pkt) 6006 { 6007 vio_ibnd_desc_t *ibnd_desc; 6008 dring_info_t *dp = NULL; 6009 vsw_private_desc_t *priv_addr = NULL; 6010 vsw_t *vswp = ldcp->ldc_vswp; 6011 mblk_t *mp = NULL; 6012 size_t nbytes = 0; 6013 size_t off = 0; 6014 uint64_t idx = 0; 6015 uint32_t num = 1, len, datalen = 0; 6016 uint64_t ncookies = 0; 6017 int i, rv; 6018 int j = 0; 6019 6020 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 6021 6022 ibnd_desc = (vio_ibnd_desc_t *)pkt; 6023 6024 switch (ibnd_desc->hdr.tag.vio_subtype) { 6025 case VIO_SUBTYPE_INFO: 6026 D1(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 6027 6028 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 6029 return; 6030 6031 /* 6032 * Data is padded to align on a 8 byte boundary, 6033 * nbytes is actual data length, i.e. minus that 6034 * padding. 6035 */ 6036 datalen = ibnd_desc->nbytes; 6037 6038 D2(vswp, "%s(%lld): processing inband desc : " 6039 ": datalen 0x%lx", __func__, ldcp->ldc_id, datalen); 6040 6041 ncookies = ibnd_desc->ncookies; 6042 6043 /* 6044 * allocb(9F) returns an aligned data block. We 6045 * need to ensure that we ask ldc for an aligned 6046 * number of bytes also. 6047 */ 6048 nbytes = datalen; 6049 if (nbytes & 0x7) { 6050 off = 8 - (nbytes & 0x7); 6051 nbytes += off; 6052 } 6053 6054 mp = allocb(datalen, BPRI_MED); 6055 if (mp == NULL) { 6056 DERR(vswp, "%s(%lld): allocb failed", 6057 __func__, ldcp->ldc_id); 6058 return; 6059 } 6060 6061 rv = ldc_mem_copy(ldcp->ldc_handle, (caddr_t)mp->b_rptr, 6062 0, &nbytes, ibnd_desc->memcookie, (uint64_t)ncookies, 6063 LDC_COPY_IN); 6064 6065 if (rv != 0) { 6066 DERR(vswp, "%s(%d): unable to copy in data from " 6067 "%d cookie(s)", __func__, 6068 ldcp->ldc_id, ncookies); 6069 freemsg(mp); 6070 return; 6071 } else { 6072 D2(vswp, "%s(%d): copied in %ld bytes using %d " 6073 "cookies", __func__, ldcp->ldc_id, nbytes, 6074 ncookies); 6075 } 6076 6077 /* point to the actual end of data */ 6078 mp->b_wptr = mp->b_rptr + datalen; 6079 6080 /* 6081 * We ACK back every in-band descriptor message we process 6082 */ 6083 ibnd_desc->hdr.tag.vio_subtype = VIO_SUBTYPE_ACK; 6084 ibnd_desc->hdr.tag.vio_sid = ldcp->local_session; 6085 vsw_send_msg(ldcp, (void *)ibnd_desc, 6086 sizeof (vio_ibnd_desc_t)); 6087 6088 /* send the packet to be switched */ 6089 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, 6090 ldcp->ldc_port, NULL); 6091 6092 break; 6093 6094 case VIO_SUBTYPE_ACK: 6095 D1(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 6096 6097 /* Verify the ACK is valid */ 6098 idx = ibnd_desc->hdr.desc_handle; 6099 6100 if (idx >= VSW_RING_NUM_EL) { 6101 cmn_err(CE_WARN, "!vsw%d: corrupted ACK received " 6102 "(idx %ld)", vswp->instance, idx); 6103 return; 6104 } 6105 6106 if ((dp = ldcp->lane_out.dringp) == NULL) { 6107 DERR(vswp, "%s: no dring found", __func__); 6108 return; 6109 } 6110 6111 len = dp->num_descriptors; 6112 /* 6113 * If the descriptor we are being ACK'ed for is not the 6114 * one we expected, then pkts were lost somwhere, either 6115 * when we tried to send a msg, or a previous ACK msg from 6116 * our peer. In either case we now reclaim the descriptors 6117 * in the range from the last ACK we received up to the 6118 * current ACK. 6119 */ 6120 if (idx != dp->last_ack_recv) { 6121 DWARN(vswp, "%s: dropped pkts detected, (%ld, %ld)", 6122 __func__, dp->last_ack_recv, idx); 6123 num = idx >= dp->last_ack_recv ? 6124 idx - dp->last_ack_recv + 1: 6125 (len - dp->last_ack_recv + 1) + idx; 6126 } 6127 6128 /* 6129 * When we sent the in-band message to our peer we 6130 * marked the copy in our private ring as READY. We now 6131 * check that the descriptor we are being ACK'ed for is in 6132 * fact READY, i.e. it is one we have shared with our peer. 6133 * 6134 * If its not we flag an error, but still reset the descr 6135 * back to FREE. 6136 */ 6137 for (i = dp->last_ack_recv; j < num; i = (i + 1) % len, j++) { 6138 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 6139 mutex_enter(&priv_addr->dstate_lock); 6140 if (priv_addr->dstate != VIO_DESC_READY) { 6141 DERR(vswp, "%s: (%ld) desc at index %ld not " 6142 "READY (0x%lx)", __func__, 6143 ldcp->ldc_id, idx, priv_addr->dstate); 6144 DERR(vswp, "%s: bound %d: ncookies %ld : " 6145 "datalen %ld", __func__, 6146 priv_addr->bound, priv_addr->ncookies, 6147 priv_addr->datalen); 6148 } 6149 D2(vswp, "%s: (%lld) freeing descp at %lld", __func__, 6150 ldcp->ldc_id, idx); 6151 /* release resources associated with sent msg */ 6152 bzero(priv_addr->datap, priv_addr->datalen); 6153 priv_addr->datalen = 0; 6154 priv_addr->dstate = VIO_DESC_FREE; 6155 mutex_exit(&priv_addr->dstate_lock); 6156 } 6157 /* update to next expected value */ 6158 dp->last_ack_recv = (idx + 1) % dp->num_descriptors; 6159 6160 break; 6161 6162 case VIO_SUBTYPE_NACK: 6163 DERR(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 6164 6165 /* 6166 * We should only get a NACK if our peer doesn't like 6167 * something about a message we have sent it. If this 6168 * happens we just release the resources associated with 6169 * the message. (We are relying on higher layers to decide 6170 * whether or not to resend. 6171 */ 6172 6173 /* limit check */ 6174 idx = ibnd_desc->hdr.desc_handle; 6175 6176 if (idx >= VSW_RING_NUM_EL) { 6177 DERR(vswp, "%s: corrupted NACK received (idx %lld)", 6178 __func__, idx); 6179 return; 6180 } 6181 6182 if ((dp = ldcp->lane_out.dringp) == NULL) { 6183 DERR(vswp, "%s: no dring found", __func__); 6184 return; 6185 } 6186 6187 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 6188 6189 /* move to correct location in ring */ 6190 priv_addr += idx; 6191 6192 /* release resources associated with sent msg */ 6193 mutex_enter(&priv_addr->dstate_lock); 6194 bzero(priv_addr->datap, priv_addr->datalen); 6195 priv_addr->datalen = 0; 6196 priv_addr->dstate = VIO_DESC_FREE; 6197 mutex_exit(&priv_addr->dstate_lock); 6198 6199 break; 6200 6201 default: 6202 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 6203 ldcp->ldc_id, ibnd_desc->hdr.tag.vio_subtype); 6204 } 6205 6206 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 6207 } 6208 6209 static void 6210 vsw_process_err_pkt(vsw_ldc_t *ldcp, void *epkt, vio_msg_tag_t tag) 6211 { 6212 _NOTE(ARGUNUSED(epkt)) 6213 6214 vsw_t *vswp = ldcp->ldc_vswp; 6215 uint16_t env = tag.vio_subtype_env; 6216 6217 D1(vswp, "%s (%lld): enter\n", __func__, ldcp->ldc_id); 6218 6219 /* 6220 * Error vio_subtypes have yet to be defined. So for 6221 * the moment we can't do anything. 6222 */ 6223 D2(vswp, "%s: (%x) vio_subtype env", __func__, env); 6224 6225 D1(vswp, "%s (%lld): exit\n", __func__, ldcp->ldc_id); 6226 } 6227 6228 /* 6229 * Switch the given ethernet frame when operating in layer 2 mode. 6230 * 6231 * vswp: pointer to the vsw instance 6232 * mp: pointer to chain of ethernet frame(s) to be switched 6233 * caller: identifies the source of this frame as: 6234 * 1. VSW_VNETPORT - a vsw port (connected to a vnet). 6235 * 2. VSW_PHYSDEV - the physical ethernet device 6236 * 3. VSW_LOCALDEV - vsw configured as a virtual interface 6237 * arg: argument provided by the caller. 6238 * 1. for VNETPORT - pointer to the corresponding vsw_port_t. 6239 * 2. for PHYSDEV - NULL 6240 * 3. for LOCALDEV - pointer to to this vsw_t(self) 6241 */ 6242 void 6243 vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller, 6244 vsw_port_t *arg, mac_resource_handle_t mrh) 6245 { 6246 struct ether_header *ehp; 6247 vsw_port_t *port = NULL; 6248 mblk_t *bp, *ret_m; 6249 mblk_t *nmp = NULL; 6250 vsw_port_list_t *plist = &vswp->plist; 6251 6252 D1(vswp, "%s: enter (caller %d)", __func__, caller); 6253 6254 /* 6255 * PERF: rather than breaking up the chain here, scan it 6256 * to find all mblks heading to same destination and then 6257 * pass that sub-chain to the lower transmit functions. 6258 */ 6259 6260 /* process the chain of packets */ 6261 bp = mp; 6262 while (bp) { 6263 mp = bp; 6264 bp = bp->b_next; 6265 mp->b_next = mp->b_prev = NULL; 6266 ehp = (struct ether_header *)mp->b_rptr; 6267 6268 D2(vswp, "%s: mblk data buffer %lld : actual data size %lld", 6269 __func__, MBLKSIZE(mp), MBLKL(mp)); 6270 6271 READ_ENTER(&vswp->if_lockrw); 6272 if (ether_cmp(&ehp->ether_dhost, &vswp->if_addr) == 0) { 6273 /* 6274 * If destination is VSW_LOCALDEV (vsw as an eth 6275 * interface) and if the device is up & running, 6276 * send the packet up the stack on this host. 6277 * If the virtual interface is down, drop the packet. 6278 */ 6279 if (caller != VSW_LOCALDEV) { 6280 if (vswp->if_state & VSW_IF_UP) { 6281 RW_EXIT(&vswp->if_lockrw); 6282 mac_rx(vswp->if_mh, mrh, mp); 6283 } else { 6284 RW_EXIT(&vswp->if_lockrw); 6285 /* Interface down, drop pkt */ 6286 freemsg(mp); 6287 } 6288 } else { 6289 RW_EXIT(&vswp->if_lockrw); 6290 freemsg(mp); 6291 } 6292 continue; 6293 } 6294 RW_EXIT(&vswp->if_lockrw); 6295 6296 READ_ENTER(&plist->lockrw); 6297 port = vsw_lookup_fdb(vswp, ehp); 6298 if (port) { 6299 /* 6300 * Mark the port as in-use. 6301 */ 6302 mutex_enter(&port->ref_lock); 6303 port->ref_cnt++; 6304 mutex_exit(&port->ref_lock); 6305 RW_EXIT(&plist->lockrw); 6306 6307 /* 6308 * If plumbed and in promisc mode then copy msg 6309 * and send up the stack. 6310 */ 6311 READ_ENTER(&vswp->if_lockrw); 6312 if (VSW_U_P(vswp->if_state)) { 6313 RW_EXIT(&vswp->if_lockrw); 6314 nmp = copymsg(mp); 6315 if (nmp) 6316 mac_rx(vswp->if_mh, mrh, nmp); 6317 } else { 6318 RW_EXIT(&vswp->if_lockrw); 6319 } 6320 6321 /* 6322 * If the destination is in FDB, the packet 6323 * should be forwarded to the correponding 6324 * vsw_port (connected to a vnet device - 6325 * VSW_VNETPORT) 6326 */ 6327 (void) vsw_portsend(port, mp); 6328 6329 /* 6330 * Decrement use count in port and check if 6331 * should wake delete thread. 6332 */ 6333 mutex_enter(&port->ref_lock); 6334 port->ref_cnt--; 6335 if (port->ref_cnt == 0) 6336 cv_signal(&port->ref_cv); 6337 mutex_exit(&port->ref_lock); 6338 } else { 6339 RW_EXIT(&plist->lockrw); 6340 /* 6341 * Destination not in FDB. 6342 * 6343 * If the destination is broadcast or 6344 * multicast forward the packet to all 6345 * (VNETPORTs, PHYSDEV, LOCALDEV), 6346 * except the caller. 6347 */ 6348 if (IS_BROADCAST(ehp)) { 6349 D3(vswp, "%s: BROADCAST pkt", __func__); 6350 (void) vsw_forward_all(vswp, mp, 6351 caller, arg); 6352 } else if (IS_MULTICAST(ehp)) { 6353 D3(vswp, "%s: MULTICAST pkt", __func__); 6354 (void) vsw_forward_grp(vswp, mp, 6355 caller, arg); 6356 } else { 6357 /* 6358 * If the destination is unicast, and came 6359 * from either a logical network device or 6360 * the switch itself when it is plumbed, then 6361 * send it out on the physical device and also 6362 * up the stack if the logical interface is 6363 * in promiscious mode. 6364 * 6365 * NOTE: The assumption here is that if we 6366 * cannot find the destination in our fdb, its 6367 * a unicast address, and came from either a 6368 * vnet or down the stack (when plumbed) it 6369 * must be destinded for an ethernet device 6370 * outside our ldoms. 6371 */ 6372 if (caller == VSW_VNETPORT) { 6373 READ_ENTER(&vswp->if_lockrw); 6374 if (VSW_U_P(vswp->if_state)) { 6375 RW_EXIT(&vswp->if_lockrw); 6376 nmp = copymsg(mp); 6377 if (nmp) 6378 mac_rx(vswp->if_mh, 6379 mrh, nmp); 6380 } else { 6381 RW_EXIT(&vswp->if_lockrw); 6382 } 6383 if ((ret_m = vsw_tx_msg(vswp, mp)) 6384 != NULL) { 6385 DERR(vswp, "%s: drop mblks to " 6386 "phys dev", __func__); 6387 freemsg(ret_m); 6388 } 6389 6390 } else if (caller == VSW_PHYSDEV) { 6391 /* 6392 * Pkt seen because card in promisc 6393 * mode. Send up stack if plumbed in 6394 * promisc mode, else drop it. 6395 */ 6396 READ_ENTER(&vswp->if_lockrw); 6397 if (VSW_U_P(vswp->if_state)) { 6398 RW_EXIT(&vswp->if_lockrw); 6399 mac_rx(vswp->if_mh, mrh, mp); 6400 } else { 6401 RW_EXIT(&vswp->if_lockrw); 6402 freemsg(mp); 6403 } 6404 6405 } else if (caller == VSW_LOCALDEV) { 6406 /* 6407 * Pkt came down the stack, send out 6408 * over physical device. 6409 */ 6410 if ((ret_m = vsw_tx_msg(vswp, mp)) 6411 != NULL) { 6412 DERR(vswp, "%s: drop mblks to " 6413 "phys dev", __func__); 6414 freemsg(ret_m); 6415 } 6416 } 6417 } 6418 } 6419 } 6420 D1(vswp, "%s: exit\n", __func__); 6421 } 6422 6423 /* 6424 * Switch ethernet frame when in layer 3 mode (i.e. using IP 6425 * layer to do the routing). 6426 * 6427 * There is a large amount of overlap between this function and 6428 * vsw_switch_l2_frame. At some stage we need to revisit and refactor 6429 * both these functions. 6430 */ 6431 void 6432 vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller, 6433 vsw_port_t *arg, mac_resource_handle_t mrh) 6434 { 6435 struct ether_header *ehp; 6436 vsw_port_t *port = NULL; 6437 mblk_t *bp = NULL; 6438 vsw_port_list_t *plist = &vswp->plist; 6439 6440 D1(vswp, "%s: enter (caller %d)", __func__, caller); 6441 6442 /* 6443 * In layer 3 mode should only ever be switching packets 6444 * between IP layer and vnet devices. So make sure thats 6445 * who is invoking us. 6446 */ 6447 if ((caller != VSW_LOCALDEV) && (caller != VSW_VNETPORT)) { 6448 DERR(vswp, "%s: unexpected caller (%d)", __func__, caller); 6449 freemsgchain(mp); 6450 return; 6451 } 6452 6453 /* process the chain of packets */ 6454 bp = mp; 6455 while (bp) { 6456 mp = bp; 6457 bp = bp->b_next; 6458 mp->b_next = mp->b_prev = NULL; 6459 ehp = (struct ether_header *)mp->b_rptr; 6460 6461 D2(vswp, "%s: mblk data buffer %lld : actual data size %lld", 6462 __func__, MBLKSIZE(mp), MBLKL(mp)); 6463 6464 READ_ENTER(&plist->lockrw); 6465 port = vsw_lookup_fdb(vswp, ehp); 6466 if (port) { 6467 /* 6468 * Mark port as in-use. 6469 */ 6470 mutex_enter(&port->ref_lock); 6471 port->ref_cnt++; 6472 mutex_exit(&port->ref_lock); 6473 RW_EXIT(&plist->lockrw); 6474 6475 D2(vswp, "%s: sending to target port", __func__); 6476 (void) vsw_portsend(port, mp); 6477 6478 /* 6479 * Finished with port so decrement ref count and 6480 * check if should wake delete thread. 6481 */ 6482 mutex_enter(&port->ref_lock); 6483 port->ref_cnt--; 6484 if (port->ref_cnt == 0) 6485 cv_signal(&port->ref_cv); 6486 mutex_exit(&port->ref_lock); 6487 } else { 6488 RW_EXIT(&plist->lockrw); 6489 /* 6490 * Destination not in FDB 6491 * 6492 * If the destination is broadcast or 6493 * multicast forward the packet to all 6494 * (VNETPORTs, PHYSDEV, LOCALDEV), 6495 * except the caller. 6496 */ 6497 if (IS_BROADCAST(ehp)) { 6498 D2(vswp, "%s: BROADCAST pkt", __func__); 6499 (void) vsw_forward_all(vswp, mp, 6500 caller, arg); 6501 } else if (IS_MULTICAST(ehp)) { 6502 D2(vswp, "%s: MULTICAST pkt", __func__); 6503 (void) vsw_forward_grp(vswp, mp, 6504 caller, arg); 6505 } else { 6506 /* 6507 * Unicast pkt from vnet that we don't have 6508 * an FDB entry for, so must be destinded for 6509 * the outside world. Attempt to send up to the 6510 * IP layer to allow it to deal with it. 6511 */ 6512 if (caller == VSW_VNETPORT) { 6513 READ_ENTER(&vswp->if_lockrw); 6514 if (vswp->if_state & VSW_IF_UP) { 6515 RW_EXIT(&vswp->if_lockrw); 6516 D2(vswp, "%s: sending up", 6517 __func__); 6518 mac_rx(vswp->if_mh, mrh, mp); 6519 } else { 6520 RW_EXIT(&vswp->if_lockrw); 6521 /* Interface down, drop pkt */ 6522 D2(vswp, "%s I/F down", 6523 __func__); 6524 freemsg(mp); 6525 } 6526 } 6527 } 6528 } 6529 } 6530 6531 D1(vswp, "%s: exit", __func__); 6532 } 6533 6534 /* 6535 * Forward the ethernet frame to all ports (VNETPORTs, PHYSDEV, LOCALDEV), 6536 * except the caller (port on which frame arrived). 6537 */ 6538 static int 6539 vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg) 6540 { 6541 vsw_port_list_t *plist = &vswp->plist; 6542 vsw_port_t *portp; 6543 mblk_t *nmp = NULL; 6544 mblk_t *ret_m = NULL; 6545 int skip_port = 0; 6546 6547 D1(vswp, "vsw_forward_all: enter\n"); 6548 6549 /* 6550 * Broadcast message from inside ldoms so send to outside 6551 * world if in either of layer 2 modes. 6552 */ 6553 if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) || 6554 (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) && 6555 ((caller == VSW_LOCALDEV) || (caller == VSW_VNETPORT))) { 6556 6557 nmp = dupmsg(mp); 6558 if (nmp) { 6559 if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) { 6560 DERR(vswp, "%s: dropping pkt(s) " 6561 "consisting of %ld bytes of data for" 6562 " physical device", __func__, MBLKL(ret_m)); 6563 freemsg(ret_m); 6564 } 6565 } 6566 } 6567 6568 if (caller == VSW_VNETPORT) 6569 skip_port = 1; 6570 6571 /* 6572 * Broadcast message from other vnet (layer 2 or 3) or outside 6573 * world (layer 2 only), send up stack if plumbed. 6574 */ 6575 if ((caller == VSW_PHYSDEV) || (caller == VSW_VNETPORT)) { 6576 READ_ENTER(&vswp->if_lockrw); 6577 if (vswp->if_state & VSW_IF_UP) { 6578 RW_EXIT(&vswp->if_lockrw); 6579 nmp = copymsg(mp); 6580 if (nmp) 6581 mac_rx(vswp->if_mh, NULL, nmp); 6582 } else { 6583 RW_EXIT(&vswp->if_lockrw); 6584 } 6585 } 6586 6587 /* send it to all VNETPORTs */ 6588 READ_ENTER(&plist->lockrw); 6589 for (portp = plist->head; portp != NULL; portp = portp->p_next) { 6590 D2(vswp, "vsw_forward_all: port %d", portp->p_instance); 6591 /* 6592 * Caution ! - don't reorder these two checks as arg 6593 * will be NULL if the caller is PHYSDEV. skip_port is 6594 * only set if caller is VNETPORT. 6595 */ 6596 if ((skip_port) && (portp == arg)) 6597 continue; 6598 else { 6599 nmp = dupmsg(mp); 6600 if (nmp) { 6601 (void) vsw_portsend(portp, nmp); 6602 } else { 6603 DERR(vswp, "vsw_forward_all: nmp NULL"); 6604 } 6605 } 6606 } 6607 RW_EXIT(&plist->lockrw); 6608 6609 freemsg(mp); 6610 6611 D1(vswp, "vsw_forward_all: exit\n"); 6612 return (0); 6613 } 6614 6615 /* 6616 * Forward pkts to any devices or interfaces which have registered 6617 * an interest in them (i.e. multicast groups). 6618 */ 6619 static int 6620 vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg) 6621 { 6622 struct ether_header *ehp = (struct ether_header *)mp->b_rptr; 6623 mfdb_ent_t *entp = NULL; 6624 mfdb_ent_t *tpp = NULL; 6625 vsw_port_t *port; 6626 uint64_t key = 0; 6627 mblk_t *nmp = NULL; 6628 mblk_t *ret_m = NULL; 6629 boolean_t check_if = B_TRUE; 6630 6631 /* 6632 * Convert address to hash table key 6633 */ 6634 KEY_HASH(key, ehp->ether_dhost); 6635 6636 D1(vswp, "%s: key 0x%llx", __func__, key); 6637 6638 /* 6639 * If pkt came from either a vnet or down the stack (if we are 6640 * plumbed) and we are in layer 2 mode, then we send the pkt out 6641 * over the physical adapter, and then check to see if any other 6642 * vnets are interested in it. 6643 */ 6644 if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) || 6645 (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) && 6646 ((caller == VSW_VNETPORT) || (caller == VSW_LOCALDEV))) { 6647 nmp = dupmsg(mp); 6648 if (nmp) { 6649 if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) { 6650 DERR(vswp, "%s: dropping pkt(s) " 6651 "consisting of %ld bytes of " 6652 "data for physical device", 6653 __func__, MBLKL(ret_m)); 6654 freemsg(ret_m); 6655 } 6656 } 6657 } 6658 6659 READ_ENTER(&vswp->mfdbrw); 6660 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)key, 6661 (mod_hash_val_t *)&entp) != 0) { 6662 D3(vswp, "%s: no table entry found for addr 0x%llx", 6663 __func__, key); 6664 } else { 6665 /* 6666 * Send to list of devices associated with this address... 6667 */ 6668 for (tpp = entp; tpp != NULL; tpp = tpp->nextp) { 6669 6670 /* dont send to ourselves */ 6671 if ((caller == VSW_VNETPORT) && 6672 (tpp->d_addr == (void *)arg)) { 6673 port = (vsw_port_t *)tpp->d_addr; 6674 D3(vswp, "%s: not sending to ourselves" 6675 " : port %d", __func__, 6676 port->p_instance); 6677 continue; 6678 6679 } else if ((caller == VSW_LOCALDEV) && 6680 (tpp->d_type == VSW_LOCALDEV)) { 6681 D3(vswp, "%s: not sending back up stack", 6682 __func__); 6683 continue; 6684 } 6685 6686 if (tpp->d_type == VSW_VNETPORT) { 6687 port = (vsw_port_t *)tpp->d_addr; 6688 D3(vswp, "%s: sending to port %ld for " 6689 " addr 0x%llx", __func__, 6690 port->p_instance, key); 6691 6692 nmp = dupmsg(mp); 6693 if (nmp) 6694 (void) vsw_portsend(port, nmp); 6695 } else { 6696 if (vswp->if_state & VSW_IF_UP) { 6697 nmp = copymsg(mp); 6698 if (nmp) 6699 mac_rx(vswp->if_mh, NULL, nmp); 6700 check_if = B_FALSE; 6701 D3(vswp, "%s: sending up stack" 6702 " for addr 0x%llx", __func__, 6703 key); 6704 } 6705 } 6706 } 6707 } 6708 6709 RW_EXIT(&vswp->mfdbrw); 6710 6711 /* 6712 * If the pkt came from either a vnet or from physical device, 6713 * and if we havent already sent the pkt up the stack then we 6714 * check now if we can/should (i.e. the interface is plumbed 6715 * and in promisc mode). 6716 */ 6717 if ((check_if) && 6718 ((caller == VSW_VNETPORT) || (caller == VSW_PHYSDEV))) { 6719 READ_ENTER(&vswp->if_lockrw); 6720 if (VSW_U_P(vswp->if_state)) { 6721 RW_EXIT(&vswp->if_lockrw); 6722 D3(vswp, "%s: (caller %d) finally sending up stack" 6723 " for addr 0x%llx", __func__, caller, key); 6724 nmp = copymsg(mp); 6725 if (nmp) 6726 mac_rx(vswp->if_mh, NULL, nmp); 6727 } else { 6728 RW_EXIT(&vswp->if_lockrw); 6729 } 6730 } 6731 6732 freemsg(mp); 6733 6734 D1(vswp, "%s: exit", __func__); 6735 6736 return (0); 6737 } 6738 6739 /* transmit the packet over the given port */ 6740 static int 6741 vsw_portsend(vsw_port_t *port, mblk_t *mp) 6742 { 6743 vsw_ldc_list_t *ldcl = &port->p_ldclist; 6744 vsw_ldc_t *ldcp; 6745 int status = 0; 6746 6747 6748 READ_ENTER(&ldcl->lockrw); 6749 /* 6750 * Note for now, we have a single channel. 6751 */ 6752 ldcp = ldcl->head; 6753 if (ldcp == NULL) { 6754 DERR(port->p_vswp, "vsw_portsend: no ldc: dropping packet\n"); 6755 freemsg(mp); 6756 RW_EXIT(&ldcl->lockrw); 6757 return (1); 6758 } 6759 6760 /* 6761 * Send the message out using the appropriate 6762 * transmit function which will free mblock when it 6763 * is finished with it. 6764 */ 6765 mutex_enter(&port->tx_lock); 6766 if (port->transmit != NULL) 6767 status = (*port->transmit)(ldcp, mp); 6768 else { 6769 freemsg(mp); 6770 } 6771 mutex_exit(&port->tx_lock); 6772 6773 RW_EXIT(&ldcl->lockrw); 6774 6775 return (status); 6776 } 6777 6778 /* 6779 * Send packet out via descriptor ring to a logical device. 6780 */ 6781 static int 6782 vsw_dringsend(vsw_ldc_t *ldcp, mblk_t *mp) 6783 { 6784 vio_dring_msg_t dring_pkt; 6785 dring_info_t *dp = NULL; 6786 vsw_private_desc_t *priv_desc = NULL; 6787 vnet_public_desc_t *pub = NULL; 6788 vsw_t *vswp = ldcp->ldc_vswp; 6789 mblk_t *bp; 6790 size_t n, size; 6791 caddr_t bufp; 6792 int idx; 6793 int status = LDC_TX_SUCCESS; 6794 6795 D1(vswp, "%s(%lld): enter\n", __func__, ldcp->ldc_id); 6796 6797 /* TODO: make test a macro */ 6798 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 6799 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 6800 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 6801 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 6802 ldcp->lane_out.lstate); 6803 freemsg(mp); 6804 return (LDC_TX_FAILURE); 6805 } 6806 6807 /* 6808 * Note - using first ring only, this may change 6809 * in the future. 6810 */ 6811 if ((dp = ldcp->lane_out.dringp) == NULL) { 6812 DERR(vswp, "%s(%lld): no dring for outbound lane on" 6813 " channel %d", __func__, ldcp->ldc_id, ldcp->ldc_id); 6814 freemsg(mp); 6815 return (LDC_TX_FAILURE); 6816 } 6817 6818 size = msgsize(mp); 6819 if (size > (size_t)ETHERMAX) { 6820 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 6821 ldcp->ldc_id, size); 6822 freemsg(mp); 6823 return (LDC_TX_FAILURE); 6824 } 6825 6826 /* 6827 * Find a free descriptor 6828 * 6829 * Note: for the moment we are assuming that we will only 6830 * have one dring going from the switch to each of its 6831 * peers. This may change in the future. 6832 */ 6833 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 6834 D2(vswp, "%s(%lld): no descriptor available for ring " 6835 "at 0x%llx", __func__, ldcp->ldc_id, dp); 6836 6837 /* nothing more we can do */ 6838 status = LDC_TX_NORESOURCES; 6839 goto vsw_dringsend_free_exit; 6840 } else { 6841 D2(vswp, "%s(%lld): free private descriptor found at pos " 6842 "%ld addr 0x%llx\n", __func__, ldcp->ldc_id, idx, 6843 priv_desc); 6844 } 6845 6846 /* copy data into the descriptor */ 6847 bufp = priv_desc->datap; 6848 bufp += VNET_IPALIGN; 6849 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 6850 n = MBLKL(bp); 6851 bcopy(bp->b_rptr, bufp, n); 6852 bufp += n; 6853 } 6854 6855 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 6856 6857 pub = priv_desc->descp; 6858 pub->nbytes = priv_desc->datalen; 6859 6860 mutex_enter(&priv_desc->dstate_lock); 6861 pub->hdr.dstate = VIO_DESC_READY; 6862 mutex_exit(&priv_desc->dstate_lock); 6863 6864 /* 6865 * Determine whether or not we need to send a message to our 6866 * peer prompting them to read our newly updated descriptor(s). 6867 */ 6868 mutex_enter(&dp->restart_lock); 6869 if (dp->restart_reqd) { 6870 dp->restart_reqd = B_FALSE; 6871 mutex_exit(&dp->restart_lock); 6872 6873 /* 6874 * Send a vio_dring_msg to peer to prompt them to read 6875 * the updated descriptor ring. 6876 */ 6877 dring_pkt.tag.vio_msgtype = VIO_TYPE_DATA; 6878 dring_pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 6879 dring_pkt.tag.vio_subtype_env = VIO_DRING_DATA; 6880 dring_pkt.tag.vio_sid = ldcp->local_session; 6881 6882 /* Note - for now using first ring */ 6883 dring_pkt.dring_ident = dp->ident; 6884 6885 mutex_enter(&ldcp->lane_out.seq_lock); 6886 dring_pkt.seq_num = ldcp->lane_out.seq_num++; 6887 mutex_exit(&ldcp->lane_out.seq_lock); 6888 6889 /* 6890 * If last_ack_recv is -1 then we know we've not 6891 * received any ack's yet, so this must be the first 6892 * msg sent, so set the start to the begining of the ring. 6893 */ 6894 mutex_enter(&dp->dlock); 6895 if (dp->last_ack_recv == -1) { 6896 dring_pkt.start_idx = 0; 6897 } else { 6898 dring_pkt.start_idx = (dp->last_ack_recv + 1) % 6899 dp->num_descriptors; 6900 } 6901 dring_pkt.end_idx = -1; 6902 mutex_exit(&dp->dlock); 6903 6904 D3(vswp, "%s(%lld): dring 0x%llx : ident 0x%llx\n", __func__, 6905 ldcp->ldc_id, dp, dring_pkt.dring_ident); 6906 D3(vswp, "%s(%lld): start %lld : end %lld : seq %lld\n", 6907 __func__, ldcp->ldc_id, dring_pkt.start_idx, 6908 dring_pkt.end_idx, dring_pkt.seq_num); 6909 6910 vsw_send_msg(ldcp, (void *)&dring_pkt, 6911 sizeof (vio_dring_msg_t)); 6912 } else { 6913 mutex_exit(&dp->restart_lock); 6914 D2(vswp, "%s(%lld): updating descp %d", __func__, 6915 ldcp->ldc_id, idx); 6916 } 6917 6918 vsw_dringsend_free_exit: 6919 6920 /* free the message block */ 6921 freemsg(mp); 6922 6923 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 6924 return (status); 6925 } 6926 6927 /* 6928 * Send an in-band descriptor message over ldc. 6929 */ 6930 static int 6931 vsw_descrsend(vsw_ldc_t *ldcp, mblk_t *mp) 6932 { 6933 vsw_t *vswp = ldcp->ldc_vswp; 6934 vio_ibnd_desc_t ibnd_msg; 6935 vsw_private_desc_t *priv_desc = NULL; 6936 dring_info_t *dp = NULL; 6937 size_t n, size = 0; 6938 caddr_t bufp; 6939 mblk_t *bp; 6940 int idx, i; 6941 int status = LDC_TX_SUCCESS; 6942 static int warn_msg = 1; 6943 6944 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 6945 6946 ASSERT(mp != NULL); 6947 6948 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 6949 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 6950 DERR(vswp, "%s(%lld) status(%d) state (0x%llx), dropping pkt", 6951 __func__, ldcp->ldc_id, ldcp->ldc_status, 6952 ldcp->lane_out.lstate); 6953 freemsg(mp); 6954 return (LDC_TX_FAILURE); 6955 } 6956 6957 /* 6958 * only expect single dring to exist, which we use 6959 * as an internal buffer, rather than a transfer channel. 6960 */ 6961 if ((dp = ldcp->lane_out.dringp) == NULL) { 6962 DERR(vswp, "%s(%lld): no dring for outbound lane", 6963 __func__, ldcp->ldc_id); 6964 DERR(vswp, "%s(%lld) status(%d) state (0x%llx)", 6965 __func__, ldcp->ldc_id, ldcp->ldc_status, 6966 ldcp->lane_out.lstate); 6967 freemsg(mp); 6968 return (LDC_TX_FAILURE); 6969 } 6970 6971 size = msgsize(mp); 6972 if (size > (size_t)ETHERMAX) { 6973 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 6974 ldcp->ldc_id, size); 6975 freemsg(mp); 6976 return (LDC_TX_FAILURE); 6977 } 6978 6979 /* 6980 * Find a free descriptor in our buffer ring 6981 */ 6982 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 6983 if (warn_msg) { 6984 DERR(vswp, "%s(%lld): no descriptor available for ring " 6985 "at 0x%llx", __func__, ldcp->ldc_id, dp); 6986 warn_msg = 0; 6987 } 6988 6989 /* nothing more we can do */ 6990 status = LDC_TX_NORESOURCES; 6991 goto vsw_descrsend_free_exit; 6992 } else { 6993 D2(vswp, "%s(%lld): free private descriptor found at pos " 6994 "%ld addr 0x%x\n", __func__, ldcp->ldc_id, idx, 6995 priv_desc); 6996 warn_msg = 1; 6997 } 6998 6999 /* copy data into the descriptor */ 7000 bufp = priv_desc->datap; 7001 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 7002 n = MBLKL(bp); 7003 bcopy(bp->b_rptr, bufp, n); 7004 bufp += n; 7005 } 7006 7007 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 7008 7009 /* create and send the in-band descp msg */ 7010 ibnd_msg.hdr.tag.vio_msgtype = VIO_TYPE_DATA; 7011 ibnd_msg.hdr.tag.vio_subtype = VIO_SUBTYPE_INFO; 7012 ibnd_msg.hdr.tag.vio_subtype_env = VIO_DESC_DATA; 7013 ibnd_msg.hdr.tag.vio_sid = ldcp->local_session; 7014 7015 mutex_enter(&ldcp->lane_out.seq_lock); 7016 ibnd_msg.hdr.seq_num = ldcp->lane_out.seq_num++; 7017 mutex_exit(&ldcp->lane_out.seq_lock); 7018 7019 /* 7020 * Copy the mem cookies describing the data from the 7021 * private region of the descriptor ring into the inband 7022 * descriptor. 7023 */ 7024 for (i = 0; i < priv_desc->ncookies; i++) { 7025 bcopy(&priv_desc->memcookie[i], &ibnd_msg.memcookie[i], 7026 sizeof (ldc_mem_cookie_t)); 7027 } 7028 7029 ibnd_msg.hdr.desc_handle = idx; 7030 ibnd_msg.ncookies = priv_desc->ncookies; 7031 ibnd_msg.nbytes = size; 7032 7033 vsw_send_msg(ldcp, (void *)&ibnd_msg, sizeof (vio_ibnd_desc_t)); 7034 7035 vsw_descrsend_free_exit: 7036 7037 /* free the allocated message blocks */ 7038 freemsg(mp); 7039 7040 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 7041 return (status); 7042 } 7043 7044 static void 7045 vsw_send_ver(void *arg) 7046 { 7047 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 7048 vsw_t *vswp = ldcp->ldc_vswp; 7049 lane_t *lp = &ldcp->lane_out; 7050 vio_ver_msg_t ver_msg; 7051 7052 D1(vswp, "%s enter", __func__); 7053 7054 ver_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 7055 ver_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 7056 ver_msg.tag.vio_subtype_env = VIO_VER_INFO; 7057 ver_msg.tag.vio_sid = ldcp->local_session; 7058 7059 ver_msg.ver_major = vsw_versions[0].ver_major; 7060 ver_msg.ver_minor = vsw_versions[0].ver_minor; 7061 ver_msg.dev_class = VDEV_NETWORK_SWITCH; 7062 7063 lp->lstate |= VSW_VER_INFO_SENT; 7064 lp->ver_major = ver_msg.ver_major; 7065 lp->ver_minor = ver_msg.ver_minor; 7066 7067 DUMP_TAG(ver_msg.tag); 7068 7069 vsw_send_msg(ldcp, &ver_msg, sizeof (vio_ver_msg_t)); 7070 7071 D1(vswp, "%s (%d): exit", __func__, ldcp->ldc_id); 7072 } 7073 7074 static void 7075 vsw_send_attr(vsw_ldc_t *ldcp) 7076 { 7077 vsw_t *vswp = ldcp->ldc_vswp; 7078 lane_t *lp = &ldcp->lane_out; 7079 vnet_attr_msg_t attr_msg; 7080 7081 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 7082 7083 /* 7084 * Subtype is set to INFO by default 7085 */ 7086 attr_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 7087 attr_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 7088 attr_msg.tag.vio_subtype_env = VIO_ATTR_INFO; 7089 attr_msg.tag.vio_sid = ldcp->local_session; 7090 7091 /* payload copied from default settings for lane */ 7092 attr_msg.mtu = lp->mtu; 7093 attr_msg.addr_type = lp->addr_type; 7094 attr_msg.xfer_mode = lp->xfer_mode; 7095 attr_msg.ack_freq = lp->xfer_mode; 7096 7097 READ_ENTER(&vswp->if_lockrw); 7098 bcopy(&(vswp->if_addr), &(attr_msg.addr), ETHERADDRL); 7099 RW_EXIT(&vswp->if_lockrw); 7100 7101 ldcp->lane_out.lstate |= VSW_ATTR_INFO_SENT; 7102 7103 DUMP_TAG(attr_msg.tag); 7104 7105 vsw_send_msg(ldcp, &attr_msg, sizeof (vnet_attr_msg_t)); 7106 7107 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 7108 } 7109 7110 /* 7111 * Create dring info msg (which also results in the creation of 7112 * a dring). 7113 */ 7114 static vio_dring_reg_msg_t * 7115 vsw_create_dring_info_pkt(vsw_ldc_t *ldcp) 7116 { 7117 vio_dring_reg_msg_t *mp; 7118 dring_info_t *dp; 7119 vsw_t *vswp = ldcp->ldc_vswp; 7120 7121 D1(vswp, "vsw_create_dring_info_pkt enter\n"); 7122 7123 /* 7124 * If we can't create a dring, obviously no point sending 7125 * a message. 7126 */ 7127 if ((dp = vsw_create_dring(ldcp)) == NULL) 7128 return (NULL); 7129 7130 mp = kmem_zalloc(sizeof (vio_dring_reg_msg_t), KM_SLEEP); 7131 7132 mp->tag.vio_msgtype = VIO_TYPE_CTRL; 7133 mp->tag.vio_subtype = VIO_SUBTYPE_INFO; 7134 mp->tag.vio_subtype_env = VIO_DRING_REG; 7135 mp->tag.vio_sid = ldcp->local_session; 7136 7137 /* payload */ 7138 mp->num_descriptors = dp->num_descriptors; 7139 mp->descriptor_size = dp->descriptor_size; 7140 mp->options = dp->options; 7141 mp->ncookies = dp->ncookies; 7142 bcopy(&dp->cookie[0], &mp->cookie[0], sizeof (ldc_mem_cookie_t)); 7143 7144 mp->dring_ident = 0; 7145 7146 D1(vswp, "vsw_create_dring_info_pkt exit\n"); 7147 7148 return (mp); 7149 } 7150 7151 static void 7152 vsw_send_dring_info(vsw_ldc_t *ldcp) 7153 { 7154 vio_dring_reg_msg_t *dring_msg; 7155 vsw_t *vswp = ldcp->ldc_vswp; 7156 7157 D1(vswp, "%s: (%ld) enter", __func__, ldcp->ldc_id); 7158 7159 dring_msg = vsw_create_dring_info_pkt(ldcp); 7160 if (dring_msg == NULL) { 7161 cmn_err(CE_WARN, "!vsw%d: %s: error creating msg", 7162 vswp->instance, __func__); 7163 return; 7164 } 7165 7166 ldcp->lane_out.lstate |= VSW_DRING_INFO_SENT; 7167 7168 DUMP_TAG_PTR((vio_msg_tag_t *)dring_msg); 7169 7170 vsw_send_msg(ldcp, dring_msg, 7171 sizeof (vio_dring_reg_msg_t)); 7172 7173 kmem_free(dring_msg, sizeof (vio_dring_reg_msg_t)); 7174 7175 D1(vswp, "%s: (%ld) exit", __func__, ldcp->ldc_id); 7176 } 7177 7178 static void 7179 vsw_send_rdx(vsw_ldc_t *ldcp) 7180 { 7181 vsw_t *vswp = ldcp->ldc_vswp; 7182 vio_rdx_msg_t rdx_msg; 7183 7184 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 7185 7186 rdx_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 7187 rdx_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 7188 rdx_msg.tag.vio_subtype_env = VIO_RDX; 7189 rdx_msg.tag.vio_sid = ldcp->local_session; 7190 7191 ldcp->lane_out.lstate |= VSW_RDX_INFO_SENT; 7192 7193 DUMP_TAG(rdx_msg.tag); 7194 7195 vsw_send_msg(ldcp, &rdx_msg, sizeof (vio_rdx_msg_t)); 7196 7197 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 7198 } 7199 7200 /* 7201 * Generic routine to send message out over ldc channel. 7202 */ 7203 static void 7204 vsw_send_msg(vsw_ldc_t *ldcp, void *msgp, int size) 7205 { 7206 int rv; 7207 size_t msglen = size; 7208 vio_msg_tag_t *tag = (vio_msg_tag_t *)msgp; 7209 vsw_t *vswp = ldcp->ldc_vswp; 7210 7211 D1(vswp, "vsw_send_msg (%lld) enter : sending %d bytes", 7212 ldcp->ldc_id, size); 7213 7214 D2(vswp, "send_msg: type 0x%llx", tag->vio_msgtype); 7215 D2(vswp, "send_msg: stype 0x%llx", tag->vio_subtype); 7216 D2(vswp, "send_msg: senv 0x%llx", tag->vio_subtype_env); 7217 7218 mutex_enter(&ldcp->ldc_txlock); 7219 do { 7220 msglen = size; 7221 rv = ldc_write(ldcp->ldc_handle, (caddr_t)msgp, &msglen); 7222 } while (rv == EWOULDBLOCK && --vsw_wretries > 0); 7223 7224 if ((rv != 0) || (msglen != size)) { 7225 DERR(vswp, "vsw_send_msg:ldc_write failed: chan(%lld) " 7226 "rv(%d) size (%d) msglen(%d)\n", ldcp->ldc_id, 7227 rv, size, msglen); 7228 } 7229 mutex_exit(&ldcp->ldc_txlock); 7230 7231 /* channel has been reset */ 7232 if (rv == ECONNRESET) { 7233 vsw_handle_reset(ldcp); 7234 } 7235 7236 D1(vswp, "vsw_send_msg (%lld) exit : sent %d bytes", 7237 ldcp->ldc_id, msglen); 7238 } 7239 7240 /* 7241 * Add an entry into FDB, for the given mac address and port_id. 7242 * Returns 0 on success, 1 on failure. 7243 * 7244 * Lock protecting FDB must be held by calling process. 7245 */ 7246 static int 7247 vsw_add_fdb(vsw_t *vswp, vsw_port_t *port) 7248 { 7249 uint64_t addr = 0; 7250 7251 D1(vswp, "%s: enter", __func__); 7252 7253 KEY_HASH(addr, port->p_macaddr); 7254 7255 D2(vswp, "%s: key = 0x%llx", __func__, addr); 7256 7257 /* 7258 * Note: duplicate keys will be rejected by mod_hash. 7259 */ 7260 if (mod_hash_insert(vswp->fdb, (mod_hash_key_t)addr, 7261 (mod_hash_val_t)port) != 0) { 7262 DERR(vswp, "%s: unable to add entry into fdb.", __func__); 7263 return (1); 7264 } 7265 7266 D1(vswp, "%s: exit", __func__); 7267 return (0); 7268 } 7269 7270 /* 7271 * Remove an entry from FDB. 7272 * Returns 0 on success, 1 on failure. 7273 */ 7274 static int 7275 vsw_del_fdb(vsw_t *vswp, vsw_port_t *port) 7276 { 7277 uint64_t addr = 0; 7278 7279 D1(vswp, "%s: enter", __func__); 7280 7281 KEY_HASH(addr, port->p_macaddr); 7282 7283 D2(vswp, "%s: key = 0x%llx", __func__, addr); 7284 7285 (void) mod_hash_destroy(vswp->fdb, (mod_hash_val_t)addr); 7286 7287 D1(vswp, "%s: enter", __func__); 7288 7289 return (0); 7290 } 7291 7292 /* 7293 * Search fdb for a given mac address. 7294 * Returns pointer to the entry if found, else returns NULL. 7295 */ 7296 static vsw_port_t * 7297 vsw_lookup_fdb(vsw_t *vswp, struct ether_header *ehp) 7298 { 7299 uint64_t key = 0; 7300 vsw_port_t *port = NULL; 7301 7302 D1(vswp, "%s: enter", __func__); 7303 7304 KEY_HASH(key, ehp->ether_dhost); 7305 7306 D2(vswp, "%s: key = 0x%llx", __func__, key); 7307 7308 if (mod_hash_find(vswp->fdb, (mod_hash_key_t)key, 7309 (mod_hash_val_t *)&port) != 0) { 7310 D2(vswp, "%s: no port found", __func__); 7311 return (NULL); 7312 } 7313 7314 D1(vswp, "%s: exit", __func__); 7315 7316 return (port); 7317 } 7318 7319 /* 7320 * Add or remove multicast address(es). 7321 * 7322 * Returns 0 on success, 1 on failure. 7323 */ 7324 static int 7325 vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port) 7326 { 7327 mcst_addr_t *mcst_p = NULL; 7328 vsw_t *vswp = port->p_vswp; 7329 uint64_t addr = 0x0; 7330 int i; 7331 7332 D1(vswp, "%s: enter", __func__); 7333 7334 D2(vswp, "%s: %d addresses", __func__, mcst_pkt->count); 7335 7336 mutex_enter(&vswp->mac_lock); 7337 if (vswp->mh == NULL) { 7338 mutex_exit(&vswp->mac_lock); 7339 return (1); 7340 } 7341 mutex_exit(&vswp->mac_lock); 7342 7343 for (i = 0; i < mcst_pkt->count; i++) { 7344 /* 7345 * Convert address into form that can be used 7346 * as hash table key. 7347 */ 7348 KEY_HASH(addr, mcst_pkt->mca[i]); 7349 7350 /* 7351 * Add or delete the specified address/port combination. 7352 */ 7353 if (mcst_pkt->set == 0x1) { 7354 D3(vswp, "%s: adding multicast address 0x%llx for " 7355 "port %ld", __func__, addr, port->p_instance); 7356 if (vsw_add_mcst(vswp, VSW_VNETPORT, addr, port) == 0) { 7357 /* 7358 * Update the list of multicast 7359 * addresses contained within the 7360 * port structure to include this new 7361 * one. 7362 */ 7363 mcst_p = kmem_alloc(sizeof (mcst_addr_t), 7364 KM_NOSLEEP); 7365 if (mcst_p == NULL) { 7366 DERR(vswp, "%s: unable to alloc mem", 7367 __func__); 7368 return (1); 7369 } 7370 7371 mcst_p->nextp = NULL; 7372 mcst_p->addr = addr; 7373 7374 mutex_enter(&port->mca_lock); 7375 mcst_p->nextp = port->mcap; 7376 port->mcap = mcst_p; 7377 mutex_exit(&port->mca_lock); 7378 7379 /* 7380 * Program the address into HW. If the addr 7381 * has already been programmed then the MAC 7382 * just increments a ref counter (which is 7383 * used when the address is being deleted) 7384 */ 7385 mutex_enter(&vswp->mac_lock); 7386 if ((vswp->mh == NULL) || 7387 mac_multicst_add(vswp->mh, 7388 (uchar_t *)&mcst_pkt->mca[i])) { 7389 mutex_exit(&vswp->mac_lock); 7390 cmn_err(CE_WARN, "!vsw%d: unable to " 7391 "add multicast address", 7392 vswp->instance); 7393 (void) vsw_del_mcst(vswp, VSW_VNETPORT, 7394 addr, port); 7395 vsw_del_addr(VSW_VNETPORT, port, addr); 7396 return (1); 7397 } 7398 mutex_exit(&vswp->mac_lock); 7399 7400 } else { 7401 DERR(vswp, "%s: error adding multicast " 7402 "address 0x%llx for port %ld", 7403 __func__, addr, port->p_instance); 7404 return (1); 7405 } 7406 } else { 7407 /* 7408 * Delete an entry from the multicast hash 7409 * table and update the address list 7410 * appropriately. 7411 */ 7412 if (vsw_del_mcst(vswp, VSW_VNETPORT, addr, port) == 0) { 7413 D3(vswp, "%s: deleting multicast address " 7414 "0x%llx for port %ld", __func__, addr, 7415 port->p_instance); 7416 7417 vsw_del_addr(VSW_VNETPORT, port, addr); 7418 7419 /* 7420 * Remove the address from HW. The address 7421 * will actually only be removed once the ref 7422 * count within the MAC layer has dropped to 7423 * zero. I.e. we can safely call this fn even 7424 * if other ports are interested in this 7425 * address. 7426 */ 7427 mutex_enter(&vswp->mac_lock); 7428 if ((vswp->mh == NULL) || 7429 mac_multicst_remove(vswp->mh, 7430 (uchar_t *)&mcst_pkt->mca[i])) { 7431 mutex_exit(&vswp->mac_lock); 7432 cmn_err(CE_WARN, "!vsw%d: unable to " 7433 "remove multicast address", 7434 vswp->instance); 7435 return (1); 7436 } 7437 mutex_exit(&vswp->mac_lock); 7438 7439 } else { 7440 DERR(vswp, "%s: error deleting multicast " 7441 "addr 0x%llx for port %ld", 7442 __func__, addr, port->p_instance); 7443 return (1); 7444 } 7445 } 7446 } 7447 D1(vswp, "%s: exit", __func__); 7448 return (0); 7449 } 7450 7451 /* 7452 * Add a new multicast entry. 7453 * 7454 * Search hash table based on address. If match found then 7455 * update associated val (which is chain of ports), otherwise 7456 * create new key/val (addr/port) pair and insert into table. 7457 */ 7458 static int 7459 vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg) 7460 { 7461 int dup = 0; 7462 int rv = 0; 7463 mfdb_ent_t *ment = NULL; 7464 mfdb_ent_t *tmp_ent = NULL; 7465 mfdb_ent_t *new_ent = NULL; 7466 void *tgt = NULL; 7467 7468 if (devtype == VSW_VNETPORT) { 7469 /* 7470 * Being invoked from a vnet. 7471 */ 7472 ASSERT(arg != NULL); 7473 tgt = arg; 7474 D2(NULL, "%s: port %d : address 0x%llx", __func__, 7475 ((vsw_port_t *)arg)->p_instance, addr); 7476 } else { 7477 /* 7478 * We are being invoked via the m_multicst mac entry 7479 * point. 7480 */ 7481 D2(NULL, "%s: address 0x%llx", __func__, addr); 7482 tgt = (void *)vswp; 7483 } 7484 7485 WRITE_ENTER(&vswp->mfdbrw); 7486 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr, 7487 (mod_hash_val_t *)&ment) != 0) { 7488 7489 /* address not currently in table */ 7490 ment = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP); 7491 ment->d_addr = (void *)tgt; 7492 ment->d_type = devtype; 7493 ment->nextp = NULL; 7494 7495 if (mod_hash_insert(vswp->mfdb, (mod_hash_key_t)addr, 7496 (mod_hash_val_t)ment) != 0) { 7497 DERR(vswp, "%s: hash table insertion failed", __func__); 7498 kmem_free(ment, sizeof (mfdb_ent_t)); 7499 rv = 1; 7500 } else { 7501 D2(vswp, "%s: added initial entry for 0x%llx to " 7502 "table", __func__, addr); 7503 } 7504 } else { 7505 /* 7506 * Address in table. Check to see if specified port 7507 * is already associated with the address. If not add 7508 * it now. 7509 */ 7510 tmp_ent = ment; 7511 while (tmp_ent != NULL) { 7512 if (tmp_ent->d_addr == (void *)tgt) { 7513 if (devtype == VSW_VNETPORT) { 7514 DERR(vswp, "%s: duplicate port entry " 7515 "found for portid %ld and key " 7516 "0x%llx", __func__, 7517 ((vsw_port_t *)arg)->p_instance, 7518 addr); 7519 } else { 7520 DERR(vswp, "%s: duplicate entry found" 7521 "for key 0x%llx", 7522 __func__, addr); 7523 } 7524 rv = 1; 7525 dup = 1; 7526 break; 7527 } 7528 tmp_ent = tmp_ent->nextp; 7529 } 7530 7531 /* 7532 * Port not on list so add it to end now. 7533 */ 7534 if (0 == dup) { 7535 D2(vswp, "%s: added entry for 0x%llx to table", 7536 __func__, addr); 7537 new_ent = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP); 7538 new_ent->d_addr = (void *)tgt; 7539 new_ent->d_type = devtype; 7540 new_ent->nextp = NULL; 7541 7542 tmp_ent = ment; 7543 while (tmp_ent->nextp != NULL) 7544 tmp_ent = tmp_ent->nextp; 7545 7546 tmp_ent->nextp = new_ent; 7547 } 7548 } 7549 7550 RW_EXIT(&vswp->mfdbrw); 7551 return (rv); 7552 } 7553 7554 /* 7555 * Remove a multicast entry from the hashtable. 7556 * 7557 * Search hash table based on address. If match found, scan 7558 * list of ports associated with address. If specified port 7559 * found remove it from list. 7560 */ 7561 static int 7562 vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg) 7563 { 7564 mfdb_ent_t *ment = NULL; 7565 mfdb_ent_t *curr_p, *prev_p; 7566 void *tgt = NULL; 7567 7568 D1(vswp, "%s: enter", __func__); 7569 7570 if (devtype == VSW_VNETPORT) { 7571 tgt = (vsw_port_t *)arg; 7572 D2(vswp, "%s: removing port %d from mFDB for address" 7573 " 0x%llx", __func__, ((vsw_port_t *)tgt)->p_instance, 7574 addr); 7575 } else { 7576 D2(vswp, "%s: removing entry", __func__); 7577 tgt = (void *)vswp; 7578 } 7579 7580 WRITE_ENTER(&vswp->mfdbrw); 7581 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr, 7582 (mod_hash_val_t *)&ment) != 0) { 7583 D2(vswp, "%s: address 0x%llx not in table", __func__, addr); 7584 RW_EXIT(&vswp->mfdbrw); 7585 return (1); 7586 } 7587 7588 prev_p = curr_p = ment; 7589 7590 while (curr_p != NULL) { 7591 if (curr_p->d_addr == (void *)tgt) { 7592 if (devtype == VSW_VNETPORT) { 7593 D2(vswp, "%s: port %d found", __func__, 7594 ((vsw_port_t *)tgt)->p_instance); 7595 } else { 7596 D2(vswp, "%s: instance found", __func__); 7597 } 7598 7599 if (prev_p == curr_p) { 7600 /* 7601 * head of list, if no other element is in 7602 * list then destroy this entry, otherwise 7603 * just replace it with updated value. 7604 */ 7605 ment = curr_p->nextp; 7606 kmem_free(curr_p, sizeof (mfdb_ent_t)); 7607 if (ment == NULL) { 7608 (void) mod_hash_destroy(vswp->mfdb, 7609 (mod_hash_val_t)addr); 7610 } else { 7611 (void) mod_hash_replace(vswp->mfdb, 7612 (mod_hash_key_t)addr, 7613 (mod_hash_val_t)ment); 7614 } 7615 } else { 7616 /* 7617 * Not head of list, no need to do 7618 * replacement, just adjust list pointers. 7619 */ 7620 prev_p->nextp = curr_p->nextp; 7621 kmem_free(curr_p, sizeof (mfdb_ent_t)); 7622 } 7623 break; 7624 } 7625 7626 prev_p = curr_p; 7627 curr_p = curr_p->nextp; 7628 } 7629 7630 RW_EXIT(&vswp->mfdbrw); 7631 7632 D1(vswp, "%s: exit", __func__); 7633 7634 return (0); 7635 } 7636 7637 /* 7638 * Port is being deleted, but has registered an interest in one 7639 * or more multicast groups. Using the list of addresses maintained 7640 * within the port structure find the appropriate entry in the hash 7641 * table and remove this port from the list of interested ports. 7642 */ 7643 static void 7644 vsw_del_mcst_port(vsw_port_t *port) 7645 { 7646 mcst_addr_t *mcst_p = NULL; 7647 vsw_t *vswp = port->p_vswp; 7648 7649 D1(vswp, "%s: enter", __func__); 7650 7651 mutex_enter(&port->mca_lock); 7652 while (port->mcap != NULL) { 7653 (void) vsw_del_mcst(vswp, VSW_VNETPORT, 7654 port->mcap->addr, port); 7655 7656 mcst_p = port->mcap->nextp; 7657 kmem_free(port->mcap, sizeof (mcst_addr_t)); 7658 port->mcap = mcst_p; 7659 } 7660 mutex_exit(&port->mca_lock); 7661 7662 D1(vswp, "%s: exit", __func__); 7663 } 7664 7665 /* 7666 * This vsw instance is detaching, but has registered an interest in one 7667 * or more multicast groups. Using the list of addresses maintained 7668 * within the vsw structure find the appropriate entry in the hash 7669 * table and remove this instance from the list of interested ports. 7670 */ 7671 static void 7672 vsw_del_mcst_vsw(vsw_t *vswp) 7673 { 7674 mcst_addr_t *next_p = NULL; 7675 7676 D1(vswp, "%s: enter", __func__); 7677 7678 mutex_enter(&vswp->mca_lock); 7679 7680 while (vswp->mcap != NULL) { 7681 DERR(vswp, "%s: deleting addr 0x%llx", 7682 __func__, vswp->mcap->addr); 7683 (void) vsw_del_mcst(vswp, VSW_LOCALDEV, 7684 vswp->mcap->addr, NULL); 7685 7686 next_p = vswp->mcap->nextp; 7687 kmem_free(vswp->mcap, sizeof (mcst_addr_t)); 7688 vswp->mcap = next_p; 7689 } 7690 7691 vswp->mcap = NULL; 7692 mutex_exit(&vswp->mca_lock); 7693 7694 D1(vswp, "%s: exit", __func__); 7695 } 7696 7697 7698 /* 7699 * Remove the specified address from the list of address maintained 7700 * in this port node. 7701 */ 7702 static void 7703 vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr) 7704 { 7705 vsw_t *vswp = NULL; 7706 vsw_port_t *port = NULL; 7707 mcst_addr_t *prev_p = NULL; 7708 mcst_addr_t *curr_p = NULL; 7709 7710 D1(NULL, "%s: enter : devtype %d : addr 0x%llx", 7711 __func__, devtype, addr); 7712 7713 if (devtype == VSW_VNETPORT) { 7714 port = (vsw_port_t *)arg; 7715 mutex_enter(&port->mca_lock); 7716 prev_p = curr_p = port->mcap; 7717 } else { 7718 vswp = (vsw_t *)arg; 7719 mutex_enter(&vswp->mca_lock); 7720 prev_p = curr_p = vswp->mcap; 7721 } 7722 7723 while (curr_p != NULL) { 7724 if (curr_p->addr == addr) { 7725 D2(NULL, "%s: address found", __func__); 7726 /* match found */ 7727 if (prev_p == curr_p) { 7728 /* list head */ 7729 if (devtype == VSW_VNETPORT) 7730 port->mcap = curr_p->nextp; 7731 else 7732 vswp->mcap = curr_p->nextp; 7733 } else { 7734 prev_p->nextp = curr_p->nextp; 7735 } 7736 kmem_free(curr_p, sizeof (mcst_addr_t)); 7737 break; 7738 } else { 7739 prev_p = curr_p; 7740 curr_p = curr_p->nextp; 7741 } 7742 } 7743 7744 if (devtype == VSW_VNETPORT) 7745 mutex_exit(&port->mca_lock); 7746 else 7747 mutex_exit(&vswp->mca_lock); 7748 7749 D1(NULL, "%s: exit", __func__); 7750 } 7751 7752 /* 7753 * Creates a descriptor ring (dring) and links it into the 7754 * link of outbound drings for this channel. 7755 * 7756 * Returns NULL if creation failed. 7757 */ 7758 static dring_info_t * 7759 vsw_create_dring(vsw_ldc_t *ldcp) 7760 { 7761 vsw_private_desc_t *priv_addr = NULL; 7762 vsw_t *vswp = ldcp->ldc_vswp; 7763 ldc_mem_info_t minfo; 7764 dring_info_t *dp, *tp; 7765 int i; 7766 7767 dp = (dring_info_t *)kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 7768 7769 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 7770 7771 /* create public section of ring */ 7772 if ((ldc_mem_dring_create(VSW_RING_NUM_EL, 7773 VSW_PUB_SIZE, &dp->handle)) != 0) { 7774 7775 DERR(vswp, "vsw_create_dring(%lld): ldc dring create " 7776 "failed", ldcp->ldc_id); 7777 goto create_fail_exit; 7778 } 7779 7780 ASSERT(dp->handle != NULL); 7781 7782 /* 7783 * Get the base address of the public section of the ring. 7784 */ 7785 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 7786 DERR(vswp, "vsw_create_dring(%lld): dring info failed\n", 7787 ldcp->ldc_id); 7788 goto dring_fail_exit; 7789 } else { 7790 ASSERT(minfo.vaddr != 0); 7791 dp->pub_addr = minfo.vaddr; 7792 } 7793 7794 dp->num_descriptors = VSW_RING_NUM_EL; 7795 dp->descriptor_size = VSW_PUB_SIZE; 7796 dp->options = VIO_TX_DRING; 7797 dp->ncookies = 1; /* guaranteed by ldc */ 7798 7799 /* 7800 * create private portion of ring 7801 */ 7802 dp->priv_addr = (vsw_private_desc_t *)kmem_zalloc( 7803 (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL), KM_SLEEP); 7804 7805 if (vsw_setup_ring(ldcp, dp)) { 7806 DERR(vswp, "%s: unable to setup ring", __func__); 7807 goto dring_fail_exit; 7808 } 7809 7810 /* haven't used any descriptors yet */ 7811 dp->end_idx = 0; 7812 dp->last_ack_recv = -1; 7813 7814 /* bind dring to the channel */ 7815 if ((ldc_mem_dring_bind(ldcp->ldc_handle, dp->handle, 7816 LDC_SHADOW_MAP, LDC_MEM_RW, 7817 &dp->cookie[0], &dp->ncookies)) != 0) { 7818 DERR(vswp, "vsw_create_dring: unable to bind to channel " 7819 "%lld", ldcp->ldc_id); 7820 goto dring_fail_exit; 7821 } 7822 7823 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 7824 dp->restart_reqd = B_TRUE; 7825 7826 /* 7827 * Only ever create rings for outgoing lane. Link it onto 7828 * end of list. 7829 */ 7830 if (ldcp->lane_out.dringp == NULL) { 7831 D2(vswp, "vsw_create_dring: adding first outbound ring"); 7832 ldcp->lane_out.dringp = dp; 7833 } else { 7834 tp = ldcp->lane_out.dringp; 7835 while (tp->next != NULL) 7836 tp = tp->next; 7837 7838 tp->next = dp; 7839 } 7840 7841 return (dp); 7842 7843 dring_fail_exit: 7844 (void) ldc_mem_dring_destroy(dp->handle); 7845 7846 create_fail_exit: 7847 if (dp->priv_addr != NULL) { 7848 priv_addr = dp->priv_addr; 7849 for (i = 0; i < VSW_RING_NUM_EL; i++) { 7850 if (priv_addr->memhandle != NULL) 7851 (void) ldc_mem_free_handle( 7852 priv_addr->memhandle); 7853 priv_addr++; 7854 } 7855 kmem_free(dp->priv_addr, 7856 (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL)); 7857 } 7858 mutex_destroy(&dp->dlock); 7859 7860 kmem_free(dp, sizeof (dring_info_t)); 7861 return (NULL); 7862 } 7863 7864 /* 7865 * Create a ring consisting of just a private portion and link 7866 * it into the list of rings for the outbound lane. 7867 * 7868 * These type of rings are used primarily for temporary data 7869 * storage (i.e. as data buffers). 7870 */ 7871 void 7872 vsw_create_privring(vsw_ldc_t *ldcp) 7873 { 7874 dring_info_t *dp, *tp; 7875 vsw_t *vswp = ldcp->ldc_vswp; 7876 7877 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 7878 7879 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 7880 7881 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 7882 7883 /* no public section */ 7884 dp->pub_addr = NULL; 7885 7886 dp->priv_addr = kmem_zalloc((sizeof (vsw_private_desc_t) * 7887 VSW_RING_NUM_EL), KM_SLEEP); 7888 7889 dp->num_descriptors = VSW_RING_NUM_EL; 7890 7891 if (vsw_setup_ring(ldcp, dp)) { 7892 DERR(vswp, "%s: setup of ring failed", __func__); 7893 kmem_free(dp->priv_addr, 7894 (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL)); 7895 mutex_destroy(&dp->dlock); 7896 kmem_free(dp, sizeof (dring_info_t)); 7897 return; 7898 } 7899 7900 /* haven't used any descriptors yet */ 7901 dp->end_idx = 0; 7902 7903 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 7904 dp->restart_reqd = B_TRUE; 7905 7906 /* 7907 * Only ever create rings for outgoing lane. Link it onto 7908 * end of list. 7909 */ 7910 if (ldcp->lane_out.dringp == NULL) { 7911 D2(vswp, "%s: adding first outbound privring", __func__); 7912 ldcp->lane_out.dringp = dp; 7913 } else { 7914 tp = ldcp->lane_out.dringp; 7915 while (tp->next != NULL) 7916 tp = tp->next; 7917 7918 tp->next = dp; 7919 } 7920 7921 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 7922 } 7923 7924 /* 7925 * Setup the descriptors in the dring. Returns 0 on success, 1 on 7926 * failure. 7927 */ 7928 int 7929 vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp) 7930 { 7931 vnet_public_desc_t *pub_addr = NULL; 7932 vsw_private_desc_t *priv_addr = NULL; 7933 vsw_t *vswp = ldcp->ldc_vswp; 7934 uint64_t *tmpp; 7935 uint64_t offset = 0; 7936 uint32_t ncookies = 0; 7937 static char *name = "vsw_setup_ring"; 7938 int i, j, nc, rv; 7939 7940 priv_addr = dp->priv_addr; 7941 pub_addr = dp->pub_addr; 7942 7943 /* public section may be null but private should never be */ 7944 ASSERT(priv_addr != NULL); 7945 7946 /* 7947 * Allocate the region of memory which will be used to hold 7948 * the data the descriptors will refer to. 7949 */ 7950 dp->data_sz = (VSW_RING_NUM_EL * VSW_RING_EL_DATA_SZ); 7951 dp->data_addr = kmem_alloc(dp->data_sz, KM_SLEEP); 7952 7953 D2(vswp, "%s: allocated %lld bytes at 0x%llx\n", name, 7954 dp->data_sz, dp->data_addr); 7955 7956 tmpp = (uint64_t *)dp->data_addr; 7957 offset = VSW_RING_EL_DATA_SZ / sizeof (tmpp); 7958 7959 /* 7960 * Initialise some of the private and public (if they exist) 7961 * descriptor fields. 7962 */ 7963 for (i = 0; i < VSW_RING_NUM_EL; i++) { 7964 mutex_init(&priv_addr->dstate_lock, NULL, MUTEX_DRIVER, NULL); 7965 7966 if ((ldc_mem_alloc_handle(ldcp->ldc_handle, 7967 &priv_addr->memhandle)) != 0) { 7968 DERR(vswp, "%s: alloc mem handle failed", name); 7969 goto setup_ring_cleanup; 7970 } 7971 7972 priv_addr->datap = (void *)tmpp; 7973 7974 rv = ldc_mem_bind_handle(priv_addr->memhandle, 7975 (caddr_t)priv_addr->datap, VSW_RING_EL_DATA_SZ, 7976 LDC_SHADOW_MAP, LDC_MEM_R|LDC_MEM_W, 7977 &(priv_addr->memcookie[0]), &ncookies); 7978 if (rv != 0) { 7979 DERR(vswp, "%s(%lld): ldc_mem_bind_handle failed " 7980 "(rv %d)", name, ldcp->ldc_id, rv); 7981 goto setup_ring_cleanup; 7982 } 7983 priv_addr->bound = 1; 7984 7985 D2(vswp, "%s: %d: memcookie 0 : addr 0x%llx : size 0x%llx", 7986 name, i, priv_addr->memcookie[0].addr, 7987 priv_addr->memcookie[0].size); 7988 7989 if (ncookies >= (uint32_t)(VSW_MAX_COOKIES + 1)) { 7990 DERR(vswp, "%s(%lld) ldc_mem_bind_handle returned " 7991 "invalid num of cookies (%d) for size 0x%llx", 7992 name, ldcp->ldc_id, ncookies, 7993 VSW_RING_EL_DATA_SZ); 7994 7995 goto setup_ring_cleanup; 7996 } else { 7997 for (j = 1; j < ncookies; j++) { 7998 rv = ldc_mem_nextcookie(priv_addr->memhandle, 7999 &(priv_addr->memcookie[j])); 8000 if (rv != 0) { 8001 DERR(vswp, "%s: ldc_mem_nextcookie " 8002 "failed rv (%d)", name, rv); 8003 goto setup_ring_cleanup; 8004 } 8005 D3(vswp, "%s: memcookie %d : addr 0x%llx : " 8006 "size 0x%llx", name, j, 8007 priv_addr->memcookie[j].addr, 8008 priv_addr->memcookie[j].size); 8009 } 8010 8011 } 8012 priv_addr->ncookies = ncookies; 8013 priv_addr->dstate = VIO_DESC_FREE; 8014 8015 if (pub_addr != NULL) { 8016 8017 /* link pub and private sides */ 8018 priv_addr->descp = pub_addr; 8019 8020 pub_addr->ncookies = priv_addr->ncookies; 8021 8022 for (nc = 0; nc < pub_addr->ncookies; nc++) { 8023 bcopy(&priv_addr->memcookie[nc], 8024 &pub_addr->memcookie[nc], 8025 sizeof (ldc_mem_cookie_t)); 8026 } 8027 8028 pub_addr->hdr.dstate = VIO_DESC_FREE; 8029 pub_addr++; 8030 } 8031 8032 /* 8033 * move to next element in the dring and the next 8034 * position in the data buffer. 8035 */ 8036 priv_addr++; 8037 tmpp += offset; 8038 } 8039 8040 return (0); 8041 8042 setup_ring_cleanup: 8043 priv_addr = dp->priv_addr; 8044 8045 for (j = 0; j < i; j++) { 8046 (void) ldc_mem_unbind_handle(priv_addr->memhandle); 8047 (void) ldc_mem_free_handle(priv_addr->memhandle); 8048 8049 mutex_destroy(&priv_addr->dstate_lock); 8050 8051 priv_addr++; 8052 } 8053 kmem_free(dp->data_addr, dp->data_sz); 8054 8055 return (1); 8056 } 8057 8058 /* 8059 * Searches the private section of a ring for a free descriptor, 8060 * starting at the location of the last free descriptor found 8061 * previously. 8062 * 8063 * Returns 0 if free descriptor is available, and updates state 8064 * of private descriptor to VIO_DESC_READY, otherwise returns 1. 8065 * 8066 * FUTURE: might need to return contiguous range of descriptors 8067 * as dring info msg assumes all will be contiguous. 8068 */ 8069 static int 8070 vsw_dring_find_free_desc(dring_info_t *dringp, 8071 vsw_private_desc_t **priv_p, int *idx) 8072 { 8073 vsw_private_desc_t *addr = NULL; 8074 int num = VSW_RING_NUM_EL; 8075 int ret = 1; 8076 8077 D1(NULL, "%s enter\n", __func__); 8078 8079 ASSERT(dringp->priv_addr != NULL); 8080 8081 D2(NULL, "%s: searching ring, dringp 0x%llx : start pos %lld", 8082 __func__, dringp, dringp->end_idx); 8083 8084 addr = (vsw_private_desc_t *)dringp->priv_addr + dringp->end_idx; 8085 8086 mutex_enter(&addr->dstate_lock); 8087 if (addr->dstate == VIO_DESC_FREE) { 8088 addr->dstate = VIO_DESC_READY; 8089 *priv_p = addr; 8090 *idx = dringp->end_idx; 8091 dringp->end_idx = (dringp->end_idx + 1) % num; 8092 ret = 0; 8093 8094 } 8095 mutex_exit(&addr->dstate_lock); 8096 8097 /* ring full */ 8098 if (ret == 1) { 8099 D2(NULL, "%s: no desp free: started at %d", __func__, 8100 dringp->end_idx); 8101 } 8102 8103 D1(NULL, "%s: exit\n", __func__); 8104 8105 return (ret); 8106 } 8107 8108 /* 8109 * Map from a dring identifier to the ring itself. Returns 8110 * pointer to ring or NULL if no match found. 8111 */ 8112 static dring_info_t * 8113 vsw_ident2dring(lane_t *lane, uint64_t ident) 8114 { 8115 dring_info_t *dp = NULL; 8116 8117 if ((dp = lane->dringp) == NULL) { 8118 return (NULL); 8119 } else { 8120 if (dp->ident == ident) 8121 return (dp); 8122 8123 while (dp != NULL) { 8124 if (dp->ident == ident) 8125 break; 8126 dp = dp->next; 8127 } 8128 } 8129 8130 return (dp); 8131 } 8132 8133 /* 8134 * Set the default lane attributes. These are copied into 8135 * the attr msg we send to our peer. If they are not acceptable 8136 * then (currently) the handshake ends. 8137 */ 8138 static void 8139 vsw_set_lane_attr(vsw_t *vswp, lane_t *lp) 8140 { 8141 bzero(lp, sizeof (lane_t)); 8142 8143 READ_ENTER(&vswp->if_lockrw); 8144 ether_copy(&(vswp->if_addr), &(lp->addr)); 8145 RW_EXIT(&vswp->if_lockrw); 8146 8147 lp->mtu = VSW_MTU; 8148 lp->addr_type = ADDR_TYPE_MAC; 8149 lp->xfer_mode = VIO_DRING_MODE; 8150 lp->ack_freq = 0; /* for shared mode */ 8151 8152 mutex_enter(&lp->seq_lock); 8153 lp->seq_num = VNET_ISS; 8154 mutex_exit(&lp->seq_lock); 8155 } 8156 8157 /* 8158 * Verify that the attributes are acceptable. 8159 * 8160 * FUTURE: If some attributes are not acceptable, change them 8161 * our desired values. 8162 */ 8163 static int 8164 vsw_check_attr(vnet_attr_msg_t *pkt, vsw_port_t *port) 8165 { 8166 int ret = 0; 8167 8168 D1(NULL, "vsw_check_attr enter\n"); 8169 8170 /* 8171 * Note we currently only support in-band descriptors 8172 * and descriptor rings, not packet based transfer (VIO_PKT_MODE) 8173 */ 8174 if ((pkt->xfer_mode != VIO_DESC_MODE) && 8175 (pkt->xfer_mode != VIO_DRING_MODE)) { 8176 D2(NULL, "vsw_check_attr: unknown mode %x\n", 8177 pkt->xfer_mode); 8178 ret = 1; 8179 } 8180 8181 /* Only support MAC addresses at moment. */ 8182 if ((pkt->addr_type != ADDR_TYPE_MAC) || (pkt->addr == 0)) { 8183 D2(NULL, "vsw_check_attr: invalid addr_type %x, " 8184 "or address 0x%llx\n", pkt->addr_type, 8185 pkt->addr); 8186 ret = 1; 8187 } 8188 8189 /* 8190 * MAC address supplied by device should match that stored 8191 * in the vsw-port OBP node. Need to decide what to do if they 8192 * don't match, for the moment just warn but don't fail. 8193 */ 8194 if (bcmp(&pkt->addr, &port->p_macaddr, ETHERADDRL) != 0) { 8195 DERR(NULL, "vsw_check_attr: device supplied address " 8196 "0x%llx doesn't match node address 0x%llx\n", 8197 pkt->addr, port->p_macaddr); 8198 } 8199 8200 /* 8201 * Ack freq only makes sense in pkt mode, in shared 8202 * mode the ring descriptors say whether or not to 8203 * send back an ACK. 8204 */ 8205 if ((pkt->xfer_mode == VIO_DRING_MODE) && 8206 (pkt->ack_freq > 0)) { 8207 D2(NULL, "vsw_check_attr: non zero ack freq " 8208 " in SHM mode\n"); 8209 ret = 1; 8210 } 8211 8212 /* 8213 * Note: for the moment we only support ETHER 8214 * frames. This may change in the future. 8215 */ 8216 if ((pkt->mtu > VSW_MTU) || (pkt->mtu <= 0)) { 8217 D2(NULL, "vsw_check_attr: invalid MTU (0x%llx)\n", 8218 pkt->mtu); 8219 ret = 1; 8220 } 8221 8222 D1(NULL, "vsw_check_attr exit\n"); 8223 8224 return (ret); 8225 } 8226 8227 /* 8228 * Returns 1 if there is a problem, 0 otherwise. 8229 */ 8230 static int 8231 vsw_check_dring_info(vio_dring_reg_msg_t *pkt) 8232 { 8233 _NOTE(ARGUNUSED(pkt)) 8234 8235 int ret = 0; 8236 8237 D1(NULL, "vsw_check_dring_info enter\n"); 8238 8239 if ((pkt->num_descriptors == 0) || 8240 (pkt->descriptor_size == 0) || 8241 (pkt->ncookies != 1)) { 8242 DERR(NULL, "vsw_check_dring_info: invalid dring msg"); 8243 ret = 1; 8244 } 8245 8246 D1(NULL, "vsw_check_dring_info exit\n"); 8247 8248 return (ret); 8249 } 8250 8251 /* 8252 * Returns 1 if two memory cookies match. Otherwise returns 0. 8253 */ 8254 static int 8255 vsw_mem_cookie_match(ldc_mem_cookie_t *m1, ldc_mem_cookie_t *m2) 8256 { 8257 if ((m1->addr != m2->addr) || 8258 (m2->size != m2->size)) { 8259 return (0); 8260 } else { 8261 return (1); 8262 } 8263 } 8264 8265 /* 8266 * Returns 1 if ring described in reg message matches that 8267 * described by dring_info structure. Otherwise returns 0. 8268 */ 8269 static int 8270 vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg) 8271 { 8272 if ((msg->descriptor_size != dp->descriptor_size) || 8273 (msg->num_descriptors != dp->num_descriptors) || 8274 (msg->ncookies != dp->ncookies) || 8275 !(vsw_mem_cookie_match(&msg->cookie[0], &dp->cookie[0]))) { 8276 return (0); 8277 } else { 8278 return (1); 8279 } 8280 8281 } 8282 8283 static caddr_t 8284 vsw_print_ethaddr(uint8_t *a, char *ebuf) 8285 { 8286 (void) sprintf(ebuf, "%x:%x:%x:%x:%x:%x", 8287 a[0], a[1], a[2], a[3], a[4], a[5]); 8288 return (ebuf); 8289 } 8290 8291 /* 8292 * Reset and free all the resources associated with 8293 * the channel. 8294 */ 8295 static void 8296 vsw_free_lane_resources(vsw_ldc_t *ldcp, uint64_t dir) 8297 { 8298 dring_info_t *dp, *dpp; 8299 lane_t *lp = NULL; 8300 int rv = 0; 8301 8302 ASSERT(ldcp != NULL); 8303 8304 D1(ldcp->ldc_vswp, "%s (%lld): enter", __func__, ldcp->ldc_id); 8305 8306 if (dir == INBOUND) { 8307 D2(ldcp->ldc_vswp, "%s: freeing INBOUND lane" 8308 " of channel %lld", __func__, ldcp->ldc_id); 8309 lp = &ldcp->lane_in; 8310 } else { 8311 D2(ldcp->ldc_vswp, "%s: freeing OUTBOUND lane" 8312 " of channel %lld", __func__, ldcp->ldc_id); 8313 lp = &ldcp->lane_out; 8314 } 8315 8316 lp->lstate = VSW_LANE_INACTIV; 8317 mutex_enter(&lp->seq_lock); 8318 lp->seq_num = VNET_ISS; 8319 mutex_exit(&lp->seq_lock); 8320 if (lp->dringp) { 8321 if (dir == INBOUND) { 8322 dp = lp->dringp; 8323 while (dp != NULL) { 8324 dpp = dp->next; 8325 if (dp->handle != NULL) 8326 (void) ldc_mem_dring_unmap(dp->handle); 8327 kmem_free(dp, sizeof (dring_info_t)); 8328 dp = dpp; 8329 } 8330 } else { 8331 /* 8332 * unbind, destroy exported dring, free dring struct 8333 */ 8334 dp = lp->dringp; 8335 rv = vsw_free_ring(dp); 8336 } 8337 if (rv == 0) { 8338 lp->dringp = NULL; 8339 } 8340 } 8341 8342 D1(ldcp->ldc_vswp, "%s (%lld): exit", __func__, ldcp->ldc_id); 8343 } 8344 8345 /* 8346 * Free ring and all associated resources. 8347 */ 8348 static int 8349 vsw_free_ring(dring_info_t *dp) 8350 { 8351 vsw_private_desc_t *paddr = NULL; 8352 dring_info_t *dpp; 8353 int i, rv = 1; 8354 8355 while (dp != NULL) { 8356 mutex_enter(&dp->dlock); 8357 dpp = dp->next; 8358 if (dp->priv_addr != NULL) { 8359 /* 8360 * First unbind and free the memory handles 8361 * stored in each descriptor within the ring. 8362 */ 8363 for (i = 0; i < VSW_RING_NUM_EL; i++) { 8364 paddr = (vsw_private_desc_t *) 8365 dp->priv_addr + i; 8366 if (paddr->memhandle != NULL) { 8367 if (paddr->bound == 1) { 8368 rv = ldc_mem_unbind_handle( 8369 paddr->memhandle); 8370 8371 if (rv != 0) { 8372 DERR(NULL, "error " 8373 "unbinding handle for " 8374 "ring 0x%llx at pos %d", 8375 dp, i); 8376 mutex_exit(&dp->dlock); 8377 return (rv); 8378 } 8379 paddr->bound = 0; 8380 } 8381 8382 rv = ldc_mem_free_handle( 8383 paddr->memhandle); 8384 if (rv != 0) { 8385 DERR(NULL, "error freeing " 8386 "handle for ring " 8387 "0x%llx at pos %d", 8388 dp, i); 8389 mutex_exit(&dp->dlock); 8390 return (rv); 8391 } 8392 paddr->memhandle = NULL; 8393 } 8394 mutex_destroy(&paddr->dstate_lock); 8395 } 8396 kmem_free(dp->priv_addr, (sizeof (vsw_private_desc_t) 8397 * VSW_RING_NUM_EL)); 8398 } 8399 8400 /* 8401 * Now unbind and destroy the ring itself. 8402 */ 8403 if (dp->handle != NULL) { 8404 (void) ldc_mem_dring_unbind(dp->handle); 8405 (void) ldc_mem_dring_destroy(dp->handle); 8406 } 8407 8408 if (dp->data_addr != NULL) { 8409 kmem_free(dp->data_addr, dp->data_sz); 8410 } 8411 8412 mutex_exit(&dp->dlock); 8413 mutex_destroy(&dp->dlock); 8414 mutex_destroy(&dp->restart_lock); 8415 kmem_free(dp, sizeof (dring_info_t)); 8416 8417 dp = dpp; 8418 } 8419 return (0); 8420 } 8421 8422 /* 8423 * Debugging routines 8424 */ 8425 static void 8426 display_state(void) 8427 { 8428 vsw_t *vswp; 8429 vsw_port_list_t *plist; 8430 vsw_port_t *port; 8431 vsw_ldc_list_t *ldcl; 8432 vsw_ldc_t *ldcp; 8433 8434 cmn_err(CE_NOTE, "***** system state *****"); 8435 8436 for (vswp = vsw_head; vswp; vswp = vswp->next) { 8437 plist = &vswp->plist; 8438 READ_ENTER(&plist->lockrw); 8439 cmn_err(CE_CONT, "vsw instance %d has %d ports attached\n", 8440 vswp->instance, plist->num_ports); 8441 8442 for (port = plist->head; port != NULL; port = port->p_next) { 8443 ldcl = &port->p_ldclist; 8444 cmn_err(CE_CONT, "port %d : %d ldcs attached\n", 8445 port->p_instance, ldcl->num_ldcs); 8446 READ_ENTER(&ldcl->lockrw); 8447 ldcp = ldcl->head; 8448 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 8449 cmn_err(CE_CONT, "chan %lu : dev %d : " 8450 "status %d : phase %u\n", 8451 ldcp->ldc_id, ldcp->dev_class, 8452 ldcp->ldc_status, ldcp->hphase); 8453 cmn_err(CE_CONT, "chan %lu : lsession %lu : " 8454 "psession %lu\n", 8455 ldcp->ldc_id, 8456 ldcp->local_session, 8457 ldcp->peer_session); 8458 8459 cmn_err(CE_CONT, "Inbound lane:\n"); 8460 display_lane(&ldcp->lane_in); 8461 cmn_err(CE_CONT, "Outbound lane:\n"); 8462 display_lane(&ldcp->lane_out); 8463 } 8464 RW_EXIT(&ldcl->lockrw); 8465 } 8466 RW_EXIT(&plist->lockrw); 8467 } 8468 cmn_err(CE_NOTE, "***** system state *****"); 8469 } 8470 8471 static void 8472 display_lane(lane_t *lp) 8473 { 8474 dring_info_t *drp; 8475 8476 cmn_err(CE_CONT, "ver 0x%x:0x%x : state %lx : mtu 0x%lx\n", 8477 lp->ver_major, lp->ver_minor, lp->lstate, lp->mtu); 8478 cmn_err(CE_CONT, "addr_type %d : addr 0x%lx : xmode %d\n", 8479 lp->addr_type, lp->addr, lp->xfer_mode); 8480 cmn_err(CE_CONT, "dringp 0x%lx\n", (uint64_t)lp->dringp); 8481 8482 cmn_err(CE_CONT, "Dring info:\n"); 8483 for (drp = lp->dringp; drp != NULL; drp = drp->next) { 8484 cmn_err(CE_CONT, "\tnum_desc %u : dsize %u\n", 8485 drp->num_descriptors, drp->descriptor_size); 8486 cmn_err(CE_CONT, "\thandle 0x%lx\n", drp->handle); 8487 cmn_err(CE_CONT, "\tpub_addr 0x%lx : priv_addr 0x%lx\n", 8488 (uint64_t)drp->pub_addr, (uint64_t)drp->priv_addr); 8489 cmn_err(CE_CONT, "\tident 0x%lx : end_idx %lu\n", 8490 drp->ident, drp->end_idx); 8491 display_ring(drp); 8492 } 8493 } 8494 8495 static void 8496 display_ring(dring_info_t *dringp) 8497 { 8498 uint64_t i; 8499 uint64_t priv_count = 0; 8500 uint64_t pub_count = 0; 8501 vnet_public_desc_t *pub_addr = NULL; 8502 vsw_private_desc_t *priv_addr = NULL; 8503 8504 for (i = 0; i < VSW_RING_NUM_EL; i++) { 8505 if (dringp->pub_addr != NULL) { 8506 pub_addr = (vnet_public_desc_t *)dringp->pub_addr + i; 8507 8508 if (pub_addr->hdr.dstate == VIO_DESC_FREE) 8509 pub_count++; 8510 } 8511 8512 if (dringp->priv_addr != NULL) { 8513 priv_addr = 8514 (vsw_private_desc_t *)dringp->priv_addr + i; 8515 8516 if (priv_addr->dstate == VIO_DESC_FREE) 8517 priv_count++; 8518 } 8519 } 8520 cmn_err(CE_CONT, "\t%lu elements: %lu priv free: %lu pub free\n", 8521 i, priv_count, pub_count); 8522 } 8523 8524 static void 8525 dump_flags(uint64_t state) 8526 { 8527 int i; 8528 8529 typedef struct flag_name { 8530 int flag_val; 8531 char *flag_name; 8532 } flag_name_t; 8533 8534 flag_name_t flags[] = { 8535 VSW_VER_INFO_SENT, "VSW_VER_INFO_SENT", 8536 VSW_VER_INFO_RECV, "VSW_VER_INFO_RECV", 8537 VSW_VER_ACK_RECV, "VSW_VER_ACK_RECV", 8538 VSW_VER_ACK_SENT, "VSW_VER_ACK_SENT", 8539 VSW_VER_NACK_RECV, "VSW_VER_NACK_RECV", 8540 VSW_VER_NACK_SENT, "VSW_VER_NACK_SENT", 8541 VSW_ATTR_INFO_SENT, "VSW_ATTR_INFO_SENT", 8542 VSW_ATTR_INFO_RECV, "VSW_ATTR_INFO_RECV", 8543 VSW_ATTR_ACK_SENT, "VSW_ATTR_ACK_SENT", 8544 VSW_ATTR_ACK_RECV, "VSW_ATTR_ACK_RECV", 8545 VSW_ATTR_NACK_SENT, "VSW_ATTR_NACK_SENT", 8546 VSW_ATTR_NACK_RECV, "VSW_ATTR_NACK_RECV", 8547 VSW_DRING_INFO_SENT, "VSW_DRING_INFO_SENT", 8548 VSW_DRING_INFO_RECV, "VSW_DRING_INFO_RECV", 8549 VSW_DRING_ACK_SENT, "VSW_DRING_ACK_SENT", 8550 VSW_DRING_ACK_RECV, "VSW_DRING_ACK_RECV", 8551 VSW_DRING_NACK_SENT, "VSW_DRING_NACK_SENT", 8552 VSW_DRING_NACK_RECV, "VSW_DRING_NACK_RECV", 8553 VSW_RDX_INFO_SENT, "VSW_RDX_INFO_SENT", 8554 VSW_RDX_INFO_RECV, "VSW_RDX_INFO_RECV", 8555 VSW_RDX_ACK_SENT, "VSW_RDX_ACK_SENT", 8556 VSW_RDX_ACK_RECV, "VSW_RDX_ACK_RECV", 8557 VSW_RDX_NACK_SENT, "VSW_RDX_NACK_SENT", 8558 VSW_RDX_NACK_RECV, "VSW_RDX_NACK_RECV", 8559 VSW_MCST_INFO_SENT, "VSW_MCST_INFO_SENT", 8560 VSW_MCST_INFO_RECV, "VSW_MCST_INFO_RECV", 8561 VSW_MCST_ACK_SENT, "VSW_MCST_ACK_SENT", 8562 VSW_MCST_ACK_RECV, "VSW_MCST_ACK_RECV", 8563 VSW_MCST_NACK_SENT, "VSW_MCST_NACK_SENT", 8564 VSW_MCST_NACK_RECV, "VSW_MCST_NACK_RECV", 8565 VSW_LANE_ACTIVE, "VSW_LANE_ACTIVE"}; 8566 8567 DERR(NULL, "DUMP_FLAGS: %llx\n", state); 8568 for (i = 0; i < sizeof (flags)/sizeof (flag_name_t); i++) { 8569 if (state & flags[i].flag_val) 8570 DERR(NULL, "DUMP_FLAGS %s", flags[i].flag_name); 8571 } 8572 } 8573