1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/errno.h> 31 #include <sys/debug.h> 32 #include <sys/time.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/user.h> 36 #include <sys/stropts.h> 37 #include <sys/stream.h> 38 #include <sys/strlog.h> 39 #include <sys/strsubr.h> 40 #include <sys/cmn_err.h> 41 #include <sys/cpu.h> 42 #include <sys/kmem.h> 43 #include <sys/conf.h> 44 #include <sys/ddi.h> 45 #include <sys/sunddi.h> 46 #include <sys/ksynch.h> 47 #include <sys/stat.h> 48 #include <sys/kstat.h> 49 #include <sys/vtrace.h> 50 #include <sys/strsun.h> 51 #include <sys/dlpi.h> 52 #include <sys/ethernet.h> 53 #include <net/if.h> 54 #include <sys/varargs.h> 55 #include <sys/machsystm.h> 56 #include <sys/modctl.h> 57 #include <sys/modhash.h> 58 #include <sys/mac.h> 59 #include <sys/mac_ether.h> 60 #include <sys/taskq.h> 61 #include <sys/note.h> 62 #include <sys/mach_descrip.h> 63 #include <sys/mac.h> 64 #include <sys/mdeg.h> 65 #include <sys/ldc.h> 66 #include <sys/vsw_fdb.h> 67 #include <sys/vsw.h> 68 #include <sys/vio_mailbox.h> 69 #include <sys/vnet_mailbox.h> 70 #include <sys/vnet_common.h> 71 #include <sys/vio_util.h> 72 #include <sys/sdt.h> 73 74 /* 75 * Function prototypes. 76 */ 77 static int vsw_attach(dev_info_t *, ddi_attach_cmd_t); 78 static int vsw_detach(dev_info_t *, ddi_detach_cmd_t); 79 static int vsw_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 80 static void vsw_get_md_properties(vsw_t *vswp); 81 static int vsw_get_physaddr(vsw_t *); 82 static int vsw_setup_layer2(vsw_t *); 83 static int vsw_setup_layer3(vsw_t *); 84 85 /* MAC Ring table functions. */ 86 static void vsw_mac_ring_tbl_init(vsw_t *vswp); 87 static void vsw_mac_ring_tbl_destroy(vsw_t *vswp); 88 static void vsw_queue_worker(vsw_mac_ring_t *rrp); 89 static void vsw_queue_stop(vsw_queue_t *vqp); 90 static vsw_queue_t *vsw_queue_create(); 91 static void vsw_queue_destroy(vsw_queue_t *vqp); 92 93 /* MAC layer routines */ 94 static mac_resource_handle_t vsw_mac_ring_add_cb(void *arg, 95 mac_resource_t *mrp); 96 static int vsw_get_hw_maddr(vsw_t *); 97 static int vsw_set_hw(vsw_t *, vsw_port_t *); 98 static int vsw_set_hw_promisc(vsw_t *, vsw_port_t *); 99 static int vsw_unset_hw(vsw_t *, vsw_port_t *); 100 static int vsw_unset_hw_promisc(vsw_t *, vsw_port_t *); 101 static int vsw_reconfig_hw(vsw_t *); 102 static int vsw_mac_attach(vsw_t *vswp); 103 static void vsw_mac_detach(vsw_t *vswp); 104 105 static void vsw_rx_queue_cb(void *, mac_resource_handle_t, mblk_t *); 106 static void vsw_rx_cb(void *, mac_resource_handle_t, mblk_t *); 107 static mblk_t *vsw_tx_msg(vsw_t *, mblk_t *); 108 static int vsw_mac_register(vsw_t *); 109 static int vsw_mac_unregister(vsw_t *); 110 static int vsw_m_stat(void *, uint_t, uint64_t *); 111 static void vsw_m_stop(void *arg); 112 static int vsw_m_start(void *arg); 113 static int vsw_m_unicst(void *arg, const uint8_t *); 114 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *); 115 static int vsw_m_promisc(void *arg, boolean_t); 116 static mblk_t *vsw_m_tx(void *arg, mblk_t *); 117 118 /* MDEG routines */ 119 static void vsw_mdeg_register(vsw_t *vswp); 120 static void vsw_mdeg_unregister(vsw_t *vswp); 121 static int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *); 122 123 /* Port add/deletion routines */ 124 static int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 125 static int vsw_port_attach(vsw_t *vswp, int p_instance, 126 uint64_t *ldcids, int nids, struct ether_addr *macaddr); 127 static int vsw_detach_ports(vsw_t *vswp); 128 static int vsw_port_detach(vsw_t *vswp, int p_instance); 129 static int vsw_port_delete(vsw_port_t *port); 130 static int vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id); 131 static int vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id); 132 static int vsw_init_ldcs(vsw_port_t *port); 133 static int vsw_uninit_ldcs(vsw_port_t *port); 134 static int vsw_ldc_init(vsw_ldc_t *ldcp); 135 static int vsw_ldc_uninit(vsw_ldc_t *ldcp); 136 static int vsw_drain_ldcs(vsw_port_t *port); 137 static int vsw_drain_port_taskq(vsw_port_t *port); 138 static void vsw_marker_task(void *); 139 static vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 140 static int vsw_plist_del_node(vsw_t *, vsw_port_t *port); 141 142 /* Interrupt routines */ 143 static uint_t vsw_ldc_cb(uint64_t cb, caddr_t arg); 144 145 /* Handshake routines */ 146 static void vsw_restart_handshake(vsw_ldc_t *); 147 static int vsw_check_flag(vsw_ldc_t *, int, uint64_t); 148 static void vsw_next_milestone(vsw_ldc_t *); 149 static int vsw_supported_version(vio_ver_msg_t *); 150 151 /* Data processing routines */ 152 static void vsw_process_pkt(void *); 153 static void vsw_dispatch_ctrl_task(vsw_ldc_t *, void *, vio_msg_tag_t); 154 static void vsw_process_ctrl_pkt(void *); 155 static void vsw_process_ctrl_ver_pkt(vsw_ldc_t *, void *); 156 static void vsw_process_ctrl_attr_pkt(vsw_ldc_t *, void *); 157 static void vsw_process_ctrl_mcst_pkt(vsw_ldc_t *, void *); 158 static void vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *, void *); 159 static void vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *, void *); 160 static void vsw_process_ctrl_rdx_pkt(vsw_ldc_t *, void *); 161 static void vsw_process_data_pkt(vsw_ldc_t *, void *, vio_msg_tag_t); 162 static void vsw_process_data_dring_pkt(vsw_ldc_t *, void *); 163 static void vsw_process_data_raw_pkt(vsw_ldc_t *, void *); 164 static void vsw_process_data_ibnd_pkt(vsw_ldc_t *, void *); 165 static void vsw_process_err_pkt(vsw_ldc_t *, void *, vio_msg_tag_t); 166 167 /* Switching/data transmit routines */ 168 static void vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller, 169 vsw_port_t *port, mac_resource_handle_t); 170 static void vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller, 171 vsw_port_t *port, mac_resource_handle_t); 172 static int vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller, 173 vsw_port_t *port); 174 static int vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller, 175 vsw_port_t *port); 176 static int vsw_portsend(vsw_port_t *, mblk_t *); 177 static int vsw_dringsend(vsw_ldc_t *, mblk_t *); 178 static int vsw_descrsend(vsw_ldc_t *, mblk_t *); 179 180 /* Packet creation routines */ 181 static void vsw_send_ver(vsw_ldc_t *); 182 static void vsw_send_attr(vsw_ldc_t *); 183 static vio_dring_reg_msg_t *vsw_create_dring_info_pkt(vsw_ldc_t *); 184 static void vsw_send_dring_info(vsw_ldc_t *); 185 static void vsw_send_rdx(vsw_ldc_t *); 186 187 static void vsw_send_msg(vsw_ldc_t *, void *, int); 188 189 /* Forwarding database (FDB) routines */ 190 static int vsw_add_fdb(vsw_t *vswp, vsw_port_t *port); 191 static int vsw_del_fdb(vsw_t *vswp, vsw_port_t *port); 192 static vsw_port_t *vsw_lookup_fdb(vsw_t *vswp, struct ether_header *); 193 static int vsw_add_rem_mcst(vnet_mcast_msg_t *, vsw_port_t *); 194 static int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *); 195 static int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *); 196 static void vsw_del_addr(uint8_t, void *, uint64_t); 197 static void vsw_del_mcst_port(vsw_port_t *); 198 static void vsw_del_mcst_vsw(vsw_t *); 199 200 /* Dring routines */ 201 static dring_info_t *vsw_create_dring(vsw_ldc_t *); 202 static void vsw_create_privring(vsw_ldc_t *); 203 static int vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp); 204 static int vsw_dring_find_free_desc(dring_info_t *, vsw_private_desc_t **, 205 int *); 206 static dring_info_t *vsw_ident2dring(lane_t *, uint64_t); 207 208 static void vsw_set_lane_attr(vsw_t *, lane_t *); 209 static int vsw_check_attr(vnet_attr_msg_t *, vsw_port_t *); 210 static int vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg); 211 static int vsw_mem_cookie_match(ldc_mem_cookie_t *, ldc_mem_cookie_t *); 212 static int vsw_check_dring_info(vio_dring_reg_msg_t *); 213 214 /* Misc support routines */ 215 static caddr_t vsw_print_ethaddr(uint8_t *addr, char *ebuf); 216 static void vsw_free_lane_resources(vsw_ldc_t *, uint64_t); 217 static int vsw_free_ring(dring_info_t *); 218 219 220 /* Debugging routines */ 221 static void dump_flags(uint64_t); 222 static void display_state(void); 223 static void display_lane(lane_t *); 224 static void display_ring(dring_info_t *); 225 226 int vsw_num_handshakes = 3; /* # of handshake attempts */ 227 int vsw_wretries = 100; /* # of write attempts */ 228 int vsw_chain_len = 150; /* max # of mblks in msg chain */ 229 int vsw_desc_delay = 0; /* delay in us */ 230 int vsw_read_attempts = 5; /* # of reads of descriptor */ 231 232 uint32_t vsw_mblk_size = VSW_MBLK_SIZE; 233 uint32_t vsw_num_mblks = VSW_NUM_MBLKS; 234 235 236 /* 237 * mode specific frame switching function 238 */ 239 void (*vsw_switch_frame)(vsw_t *, mblk_t *, int, vsw_port_t *, 240 mac_resource_handle_t); 241 242 static mac_callbacks_t vsw_m_callbacks = { 243 0, 244 vsw_m_stat, 245 vsw_m_start, 246 vsw_m_stop, 247 vsw_m_promisc, 248 vsw_m_multicst, 249 vsw_m_unicst, 250 vsw_m_tx, 251 NULL, 252 NULL, 253 NULL 254 }; 255 256 static struct cb_ops vsw_cb_ops = { 257 nulldev, /* cb_open */ 258 nulldev, /* cb_close */ 259 nodev, /* cb_strategy */ 260 nodev, /* cb_print */ 261 nodev, /* cb_dump */ 262 nodev, /* cb_read */ 263 nodev, /* cb_write */ 264 nodev, /* cb_ioctl */ 265 nodev, /* cb_devmap */ 266 nodev, /* cb_mmap */ 267 nodev, /* cb_segmap */ 268 nochpoll, /* cb_chpoll */ 269 ddi_prop_op, /* cb_prop_op */ 270 NULL, /* cb_stream */ 271 D_MP, /* cb_flag */ 272 CB_REV, /* rev */ 273 nodev, /* int (*cb_aread)() */ 274 nodev /* int (*cb_awrite)() */ 275 }; 276 277 static struct dev_ops vsw_ops = { 278 DEVO_REV, /* devo_rev */ 279 0, /* devo_refcnt */ 280 vsw_getinfo, /* devo_getinfo */ 281 nulldev, /* devo_identify */ 282 nulldev, /* devo_probe */ 283 vsw_attach, /* devo_attach */ 284 vsw_detach, /* devo_detach */ 285 nodev, /* devo_reset */ 286 &vsw_cb_ops, /* devo_cb_ops */ 287 (struct bus_ops *)NULL, /* devo_bus_ops */ 288 ddi_power /* devo_power */ 289 }; 290 291 extern struct mod_ops mod_driverops; 292 static struct modldrv vswmodldrv = { 293 &mod_driverops, 294 "sun4v Virtual Switch Driver %I%", 295 &vsw_ops, 296 }; 297 298 #define LDC_ENTER_LOCK(ldcp) \ 299 mutex_enter(&((ldcp)->ldc_cblock));\ 300 mutex_enter(&((ldcp)->ldc_txlock)); 301 #define LDC_EXIT_LOCK(ldcp) \ 302 mutex_exit(&((ldcp)->ldc_txlock));\ 303 mutex_exit(&((ldcp)->ldc_cblock)); 304 305 /* Driver soft state ptr */ 306 static void *vsw_state; 307 308 /* 309 * Linked list of "vsw_t" structures - one per instance. 310 */ 311 vsw_t *vsw_head = NULL; 312 krwlock_t vsw_rw; 313 314 /* 315 * Property names 316 */ 317 static char vdev_propname[] = "virtual-device"; 318 static char vsw_propname[] = "virtual-network-switch"; 319 static char physdev_propname[] = "vsw-phys-dev"; 320 static char smode_propname[] = "vsw-switch-mode"; 321 static char macaddr_propname[] = "local-mac-address"; 322 static char remaddr_propname[] = "remote-mac-address"; 323 static char ldcids_propname[] = "ldc-ids"; 324 static char chan_propname[] = "channel-endpoint"; 325 static char id_propname[] = "id"; 326 static char reg_propname[] = "reg"; 327 328 /* supported versions */ 329 static ver_sup_t vsw_versions[] = { {1, 0} }; 330 331 /* 332 * Matching criteria passed to the MDEG to register interest 333 * in changes to 'virtual-device-port' nodes identified by their 334 * 'id' property. 335 */ 336 static md_prop_match_t vport_prop_match[] = { 337 { MDET_PROP_VAL, "id" }, 338 { MDET_LIST_END, NULL } 339 }; 340 341 static mdeg_node_match_t vport_match = { "virtual-device-port", 342 vport_prop_match }; 343 344 /* 345 * Specification of an MD node passed to the MDEG to filter any 346 * 'vport' nodes that do not belong to the specified node. This 347 * template is copied for each vsw instance and filled in with 348 * the appropriate 'cfg-handle' value before being passed to the MDEG. 349 */ 350 static mdeg_prop_spec_t vsw_prop_template[] = { 351 { MDET_PROP_STR, "name", vsw_propname }, 352 { MDET_PROP_VAL, "cfg-handle", NULL }, 353 { MDET_LIST_END, NULL, NULL } 354 }; 355 356 #define VSW_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val); 357 358 /* 359 * From /etc/system enable/disable thread per ring. This is a mode 360 * selection that is done a vsw driver attach time. 361 */ 362 boolean_t vsw_multi_ring_enable = B_FALSE; 363 int vsw_mac_rx_rings = VSW_MAC_RX_RINGS; 364 365 /* 366 * Print debug messages - set to 0x1f to enable all msgs 367 * or 0x0 to turn all off. 368 */ 369 int vswdbg = 0x0; 370 371 /* 372 * debug levels: 373 * 0x01: Function entry/exit tracing 374 * 0x02: Internal function messages 375 * 0x04: Verbose internal messages 376 * 0x08: Warning messages 377 * 0x10: Error messages 378 */ 379 380 static void 381 vswdebug(vsw_t *vswp, const char *fmt, ...) 382 { 383 char buf[512]; 384 va_list ap; 385 386 va_start(ap, fmt); 387 (void) vsprintf(buf, fmt, ap); 388 va_end(ap); 389 390 if (vswp == NULL) 391 cmn_err(CE_CONT, "%s\n", buf); 392 else 393 cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf); 394 } 395 396 /* 397 * For the moment the state dump routines have their own 398 * private flag. 399 */ 400 #define DUMP_STATE 0 401 402 #if DUMP_STATE 403 404 #define DUMP_TAG(tag) \ 405 { \ 406 D1(NULL, "DUMP_TAG: type 0x%llx", (tag).vio_msgtype); \ 407 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag).vio_subtype); \ 408 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag).vio_subtype_env); \ 409 } 410 411 #define DUMP_TAG_PTR(tag) \ 412 { \ 413 D1(NULL, "DUMP_TAG: type 0x%llx", (tag)->vio_msgtype); \ 414 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag)->vio_subtype); \ 415 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag)->vio_subtype_env); \ 416 } 417 418 #define DUMP_FLAGS(flags) dump_flags(flags); 419 #define DISPLAY_STATE() display_state() 420 421 #else 422 423 #define DUMP_TAG(tag) 424 #define DUMP_TAG_PTR(tag) 425 #define DUMP_FLAGS(state) 426 #define DISPLAY_STATE() 427 428 #endif /* DUMP_STATE */ 429 430 #ifdef DEBUG 431 432 #define D1 \ 433 if (vswdbg & 0x01) \ 434 vswdebug 435 436 #define D2 \ 437 if (vswdbg & 0x02) \ 438 vswdebug 439 440 #define D3 \ 441 if (vswdbg & 0x04) \ 442 vswdebug 443 444 #define DWARN \ 445 if (vswdbg & 0x08) \ 446 vswdebug 447 448 #define DERR \ 449 if (vswdbg & 0x10) \ 450 vswdebug 451 452 #else 453 454 #define DERR if (0) vswdebug 455 #define DWARN if (0) vswdebug 456 #define D1 if (0) vswdebug 457 #define D2 if (0) vswdebug 458 #define D3 if (0) vswdebug 459 460 #endif /* DEBUG */ 461 462 static struct modlinkage modlinkage = { 463 MODREV_1, 464 &vswmodldrv, 465 NULL 466 }; 467 468 int 469 _init(void) 470 { 471 int status; 472 473 rw_init(&vsw_rw, NULL, RW_DRIVER, NULL); 474 475 status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1); 476 if (status != 0) { 477 return (status); 478 } 479 480 mac_init_ops(&vsw_ops, "vsw"); 481 status = mod_install(&modlinkage); 482 if (status != 0) { 483 ddi_soft_state_fini(&vsw_state); 484 } 485 return (status); 486 } 487 488 int 489 _fini(void) 490 { 491 int status; 492 493 status = mod_remove(&modlinkage); 494 if (status != 0) 495 return (status); 496 mac_fini_ops(&vsw_ops); 497 ddi_soft_state_fini(&vsw_state); 498 499 rw_destroy(&vsw_rw); 500 501 return (status); 502 } 503 504 int 505 _info(struct modinfo *modinfop) 506 { 507 return (mod_info(&modlinkage, modinfop)); 508 } 509 510 static int 511 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 512 { 513 vsw_t *vswp; 514 int instance, i; 515 char hashname[MAXNAMELEN]; 516 char qname[TASKQ_NAMELEN]; 517 int rv = 1; 518 enum { PROG_init = 0x00, 519 PROG_if_lock = 0x01, 520 PROG_fdb = 0x02, 521 PROG_mfdb = 0x04, 522 PROG_report_dev = 0x08, 523 PROG_plist = 0x10, 524 PROG_taskq = 0x20} 525 progress; 526 527 progress = PROG_init; 528 529 switch (cmd) { 530 case DDI_ATTACH: 531 break; 532 case DDI_RESUME: 533 /* nothing to do for this non-device */ 534 return (DDI_SUCCESS); 535 case DDI_PM_RESUME: 536 default: 537 return (DDI_FAILURE); 538 } 539 540 instance = ddi_get_instance(dip); 541 if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) { 542 DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance); 543 return (DDI_FAILURE); 544 } 545 vswp = ddi_get_soft_state(vsw_state, instance); 546 547 if (vswp == NULL) { 548 DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance); 549 goto vsw_attach_fail; 550 } 551 552 vswp->dip = dip; 553 vswp->instance = instance; 554 ddi_set_driver_private(dip, (caddr_t)vswp); 555 556 rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL); 557 progress |= PROG_if_lock; 558 559 /* 560 * Get the various properties such as physical device name 561 * (vsw-phys-dev), switch mode etc from the MD. 562 */ 563 vsw_get_md_properties(vswp); 564 565 /* setup the unicast forwarding database */ 566 (void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d", 567 vswp->instance); 568 D2(vswp, "creating unicast hash table (%s)...", hashname); 569 vswp->fdb = mod_hash_create_ptrhash(hashname, VSW_NCHAINS, 570 mod_hash_null_valdtor, sizeof (void *)); 571 572 progress |= PROG_fdb; 573 574 /* setup the multicast fowarding database */ 575 (void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d", 576 vswp->instance); 577 D2(vswp, "creating multicast hash table %s)...", hashname); 578 rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL); 579 vswp->mfdb = mod_hash_create_ptrhash(hashname, VSW_NCHAINS, 580 mod_hash_null_valdtor, sizeof (void *)); 581 582 progress |= PROG_mfdb; 583 584 /* 585 * create lock protecting list of multicast addresses 586 * which could come via m_multicst() entry point when plumbed. 587 */ 588 mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL); 589 vswp->mcap = NULL; 590 591 ddi_report_dev(vswp->dip); 592 593 progress |= PROG_report_dev; 594 595 WRITE_ENTER(&vsw_rw); 596 vswp->next = vsw_head; 597 vsw_head = vswp; 598 RW_EXIT(&vsw_rw); 599 600 /* setup the port list */ 601 rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL); 602 vswp->plist.head = NULL; 603 604 progress |= PROG_plist; 605 606 /* 607 * Create the taskq which will process all the VIO 608 * control messages. 609 */ 610 (void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance); 611 if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1, 612 TASKQ_DEFAULTPRI, 0)) == NULL) { 613 cmn_err(CE_WARN, "Unable to create task queue"); 614 goto vsw_attach_fail; 615 } 616 617 progress |= PROG_taskq; 618 619 /* select best switching mode */ 620 for (i = 0; i < vswp->smode_num; i++) { 621 vswp->smode_idx = i; 622 switch (vswp->smode[i]) { 623 case VSW_LAYER2: 624 case VSW_LAYER2_PROMISC: 625 rv = vsw_setup_layer2(vswp); 626 break; 627 628 case VSW_LAYER3: 629 rv = vsw_setup_layer3(vswp); 630 break; 631 632 default: 633 DERR(vswp, "unknown switch mode"); 634 rv = 1; 635 break; 636 } 637 638 if (rv == 0) 639 break; 640 } 641 642 if (rv == 1) { 643 cmn_err(CE_WARN, "Unable to setup switching mode"); 644 goto vsw_attach_fail; 645 } 646 647 D2(vswp, "Operating in mode %d", vswp->smode[vswp->smode_idx]); 648 649 /* 650 * Register with the MAC layer as a network device so 651 * we can be plumbed if desired. 652 * 653 * Do this in both layer 2 and layer 3 mode. 654 */ 655 vswp->if_state &= ~VSW_IF_UP; 656 if (vswp->mdprops & (VSW_MD_MACADDR | VSW_DEV_MACADDR)) { 657 if (vsw_mac_register(vswp) != 0) { 658 cmn_err(CE_WARN, "Unable to register as provider " 659 " with MAC layer, continuing with attach"); 660 } 661 } 662 663 /* prevent auto-detaching */ 664 if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip, 665 DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) { 666 cmn_err(CE_NOTE, "Unable to set \"%s\" property for " 667 "instance %u", DDI_NO_AUTODETACH, instance); 668 } 669 670 /* 671 * Now we have everything setup, register for MD change 672 * events. 673 */ 674 vsw_mdeg_register(vswp); 675 676 return (DDI_SUCCESS); 677 678 vsw_attach_fail: 679 DERR(NULL, "vsw_attach: failed"); 680 681 if (progress & PROG_taskq) 682 ddi_taskq_destroy(vswp->taskq_p); 683 684 if (progress & PROG_plist) 685 rw_destroy(&vswp->plist.lockrw); 686 687 if (progress & PROG_report_dev) { 688 ddi_remove_minor_node(dip, NULL); 689 mutex_destroy(&vswp->mca_lock); 690 } 691 692 if (progress & PROG_mfdb) { 693 mod_hash_destroy_hash(vswp->mfdb); 694 vswp->mfdb = NULL; 695 rw_destroy(&vswp->mfdbrw); 696 } 697 698 if (progress & PROG_fdb) { 699 mod_hash_destroy_hash(vswp->fdb); 700 vswp->fdb = NULL; 701 } 702 703 if (progress & PROG_if_lock) 704 rw_destroy(&vswp->if_lockrw); 705 706 ddi_soft_state_free(vsw_state, instance); 707 return (DDI_FAILURE); 708 } 709 710 static int 711 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 712 { 713 vio_mblk_pool_t *poolp, *npoolp; 714 vsw_t **vswpp, *vswp; 715 int instance; 716 717 instance = ddi_get_instance(dip); 718 vswp = ddi_get_soft_state(vsw_state, instance); 719 720 if (vswp == NULL) { 721 return (DDI_FAILURE); 722 } 723 724 switch (cmd) { 725 case DDI_DETACH: 726 break; 727 case DDI_SUSPEND: 728 case DDI_PM_SUSPEND: 729 default: 730 return (DDI_FAILURE); 731 } 732 733 D2(vswp, "detaching instance %d", instance); 734 735 if (vswp->mdprops & (VSW_MD_MACADDR | VSW_DEV_MACADDR)) { 736 if (vsw_mac_unregister(vswp) != 0) { 737 cmn_err(CE_WARN, "Unable to detach from MAC layer"); 738 return (DDI_FAILURE); 739 } 740 rw_destroy(&vswp->if_lockrw); 741 } 742 743 vsw_mdeg_unregister(vswp); 744 745 /* remove mac layer callback */ 746 if ((vswp->mh != NULL) && (vswp->mrh != NULL)) { 747 mac_rx_remove(vswp->mh, vswp->mrh); 748 vswp->mrh = NULL; 749 } 750 751 if (vsw_detach_ports(vswp) != 0) { 752 cmn_err(CE_WARN, "Unable to detach ports"); 753 return (DDI_FAILURE); 754 } 755 756 /* 757 * Now that the ports have been deleted, stop and close 758 * the physical device. 759 */ 760 if (vswp->mh != NULL) { 761 if (vswp->mstarted) 762 mac_stop(vswp->mh); 763 if (vswp->mresources) 764 mac_resource_set(vswp->mh, NULL, NULL); 765 mac_close(vswp->mh); 766 767 vswp->mh = NULL; 768 vswp->txinfo = NULL; 769 } 770 771 /* 772 * Destroy any free pools that may still exist. 773 */ 774 poolp = vswp->rxh; 775 while (poolp != NULL) { 776 npoolp = vswp->rxh = poolp->nextp; 777 if (vio_destroy_mblks(poolp) != 0) { 778 vswp->rxh = poolp; 779 return (DDI_FAILURE); 780 } 781 poolp = npoolp; 782 } 783 784 /* 785 * Remove this instance from any entries it may be on in 786 * the hash table by using the list of addresses maintained 787 * in the vsw_t structure. 788 */ 789 vsw_del_mcst_vsw(vswp); 790 791 vswp->mcap = NULL; 792 mutex_destroy(&vswp->mca_lock); 793 794 /* 795 * By now any pending tasks have finished and the underlying 796 * ldc's have been destroyed, so its safe to delete the control 797 * message taskq. 798 */ 799 if (vswp->taskq_p != NULL) 800 ddi_taskq_destroy(vswp->taskq_p); 801 802 /* 803 * At this stage all the data pointers in the hash table 804 * should be NULL, as all the ports have been removed and will 805 * have deleted themselves from the port lists which the data 806 * pointers point to. Hence we can destroy the table using the 807 * default destructors. 808 */ 809 D2(vswp, "vsw_detach: destroying hash tables.."); 810 mod_hash_destroy_hash(vswp->fdb); 811 vswp->fdb = NULL; 812 813 WRITE_ENTER(&vswp->mfdbrw); 814 mod_hash_destroy_hash(vswp->mfdb); 815 vswp->mfdb = NULL; 816 RW_EXIT(&vswp->mfdbrw); 817 rw_destroy(&vswp->mfdbrw); 818 819 ddi_remove_minor_node(dip, NULL); 820 821 rw_destroy(&vswp->plist.lockrw); 822 WRITE_ENTER(&vsw_rw); 823 for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) { 824 if (*vswpp == vswp) { 825 *vswpp = vswp->next; 826 break; 827 } 828 } 829 RW_EXIT(&vsw_rw); 830 ddi_soft_state_free(vsw_state, instance); 831 832 return (DDI_SUCCESS); 833 } 834 835 static int 836 vsw_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 837 { 838 _NOTE(ARGUNUSED(dip)) 839 840 vsw_t *vswp = NULL; 841 dev_t dev = (dev_t)arg; 842 int instance; 843 844 instance = getminor(dev); 845 846 switch (infocmd) { 847 case DDI_INFO_DEVT2DEVINFO: 848 if ((vswp = ddi_get_soft_state(vsw_state, instance)) == NULL) { 849 *result = NULL; 850 return (DDI_FAILURE); 851 } 852 *result = vswp->dip; 853 return (DDI_SUCCESS); 854 855 case DDI_INFO_DEVT2INSTANCE: 856 *result = (void *)(uintptr_t)instance; 857 return (DDI_SUCCESS); 858 859 default: 860 *result = NULL; 861 return (DDI_FAILURE); 862 } 863 } 864 865 /* 866 * Get the properties from our MD node. 867 */ 868 static void 869 vsw_get_md_properties(vsw_t *vswp) 870 { 871 md_t *mdp = NULL; 872 int num_nodes = 0; 873 int len = 0, listsz = 0; 874 int num_vdev = 0; 875 int i, idx; 876 boolean_t found_node = B_FALSE; 877 char *smode = NULL; 878 char *curr_mode = NULL; 879 char *physname = NULL; 880 char *node_name = NULL; 881 char *dev; 882 uint64_t macaddr = 0; 883 uint64_t md_inst, obp_inst; 884 mde_cookie_t *listp = NULL; 885 mde_cookie_t rootnode; 886 887 D1(vswp, "%s: enter", __func__); 888 889 /* 890 * Further down we compare the obp 'reg' property to the 891 * 'cfg-handle' property in the vsw MD node to determine 892 * if the node refers to this particular instance. So if 893 * we can't read the obp value then there is no point 894 * in proceeding further. 895 */ 896 if (ddi_prop_exists(DDI_DEV_T_ANY, vswp->dip, 897 DDI_PROP_DONTPASS, reg_propname) != 1) { 898 cmn_err(CE_WARN, "Unable to read %s property " 899 "from OBP device node", reg_propname); 900 return; 901 } 902 903 obp_inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip, 904 DDI_PROP_DONTPASS, reg_propname, 0); 905 906 D2(vswp, "%s: obp_inst 0x%llx", __func__, obp_inst); 907 908 if ((mdp = md_get_handle()) == NULL) { 909 DERR(vswp, "%s: unable to init MD", __func__); 910 return; 911 } 912 913 if ((num_nodes = md_node_count(mdp)) <= 0) { 914 DERR(vswp, "%s: invalid number of nodes found %d", 915 __func__, num_nodes); 916 (void) md_fini_handle(mdp); 917 return; 918 } 919 920 D2(vswp, "%s: %d nodes in total in MD", __func__, num_nodes); 921 922 /* allocate enough space for node list */ 923 listsz = num_nodes * sizeof (mde_cookie_t); 924 listp = kmem_zalloc(listsz, KM_SLEEP); 925 926 rootnode = md_root_node(mdp); 927 928 /* Get the list of virtual devices */ 929 num_vdev = md_scan_dag(mdp, rootnode, 930 md_find_name(mdp, vdev_propname), 931 md_find_name(mdp, "fwd"), listp); 932 933 if (num_vdev <= 0) { 934 DERR(vswp, "%s: didn't find any virtual-device nodes in MD", 935 __func__); 936 goto md_prop_exit; 937 } 938 939 D2(vswp, "%s: %d virtual-device nodes found", __func__, num_vdev); 940 941 /* Look for the virtual switch nodes in the list */ 942 for (idx = 0; idx < num_vdev; idx++) { 943 if (md_get_prop_str(mdp, listp[idx], 944 "name", &node_name) != 0) { 945 DERR(vswp, "%s: unable to get node name", __func__); 946 continue; 947 948 } 949 950 if (strcmp(node_name, vsw_propname) == 0) { 951 /* Virtual switch node */ 952 if (md_get_prop_val(mdp, listp[idx], 953 "cfg-handle", &md_inst) != 0) { 954 DERR(vswp, "%s: unable to get cfg-handle from" 955 " node %d", __func__, idx); 956 goto md_prop_exit; 957 } else if (md_inst == obp_inst) { 958 D2(vswp, "%s: found matching node (%d)" 959 " 0x%llx == 0x%llx", __func__, idx, 960 md_inst, obp_inst); 961 found_node = B_TRUE; 962 break; 963 } 964 } 965 } 966 967 if (!found_node) { 968 DWARN(vswp, "%s: couldn't find correct vsw node", __func__); 969 goto md_prop_exit; 970 } 971 972 /* 973 * Now, having found the correct node, get the various properties. 974 */ 975 976 if (md_get_prop_data(mdp, listp[idx], physdev_propname, 977 (uint8_t **)(&physname), &len) != 0) { 978 cmn_err(CE_WARN, "%s: unable to get name(s) of physical " 979 "device(s) from MD", __func__); 980 } else if ((strlen(physname) + 1) > LIFNAMSIZ) { 981 cmn_err(CE_WARN, "%s is too long a device name", physname); 982 } else { 983 (void) strncpy(vswp->physname, physname, strlen(physname) + 1); 984 vswp->mdprops |= VSW_MD_PHYSNAME; 985 D2(vswp, "%s: using first device specified (%s)", 986 __func__, vswp->physname); 987 } 988 989 #ifdef DEBUG 990 /* 991 * As a temporary measure to aid testing we check to see if there 992 * is a vsw.conf file present. If there is we use the value of the 993 * vsw_physname property in the file as the name of the physical 994 * device, overriding the value from the MD. 995 * 996 * There may be multiple devices listed, but for the moment 997 * we just use the first one. 998 */ 999 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0, 1000 "vsw_physname", &dev) == DDI_PROP_SUCCESS) { 1001 if ((strlen(dev) + 1) > LIFNAMSIZ) { 1002 cmn_err(CE_WARN, "%s is too long a device name", dev); 1003 } else { 1004 cmn_err(CE_NOTE, "%s: using device name (%s) from " 1005 "config file", __func__, dev); 1006 1007 (void) strncpy(vswp->physname, dev, strlen(dev) + 1); 1008 vswp->mdprops |= VSW_MD_PHYSNAME; 1009 } 1010 1011 ddi_prop_free(dev); 1012 1013 } 1014 #endif 1015 1016 /* mac address for vswitch device itself */ 1017 if (md_get_prop_val(mdp, listp[idx], 1018 macaddr_propname, &macaddr) != 0) { 1019 cmn_err(CE_WARN, "!Unable to get MAC address from MD"); 1020 1021 /* 1022 * Fallback to using the mac address of the physical 1023 * device. 1024 */ 1025 if (vsw_get_physaddr(vswp) == 0) { 1026 cmn_err(CE_NOTE, "!Using MAC address from physical " 1027 "device (%s)", vswp->physname); 1028 } 1029 } else { 1030 READ_ENTER(&vswp->if_lockrw); 1031 for (i = ETHERADDRL - 1; i >= 0; i--) { 1032 vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF; 1033 macaddr >>= 8; 1034 } 1035 RW_EXIT(&vswp->if_lockrw); 1036 vswp->mdprops |= VSW_MD_MACADDR; 1037 } 1038 1039 /* 1040 * Get the switch-mode property. The modes are listed in 1041 * decreasing order of preference, i.e. prefered mode is 1042 * first item in list. 1043 */ 1044 len = 0; 1045 vswp->smode_num = 0; 1046 if (md_get_prop_data(mdp, listp[idx], smode_propname, 1047 (uint8_t **)(&smode), &len) != 0) { 1048 /* 1049 * Unable to get switch-mode property from MD, nothing 1050 * more we can do. 1051 */ 1052 cmn_err(CE_WARN, "!unable to get switch mode property"); 1053 goto md_prop_exit; 1054 } 1055 1056 curr_mode = smode; 1057 /* 1058 * Modes of operation: 1059 * 'switched' - layer 2 switching, underlying HW in 1060 * programmed mode. 1061 * 'promiscuous' - layer 2 switching, underlying HW in 1062 * promiscuous mode. 1063 * 'routed' - layer 3 (i.e. IP) routing, underlying HW 1064 * in non-promiscuous mode. 1065 */ 1066 while ((curr_mode < (smode + len)) && (vswp->smode_num < NUM_SMODES)) { 1067 D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode); 1068 if (strcmp(curr_mode, "switched") == 0) { 1069 vswp->smode[vswp->smode_num++] = VSW_LAYER2; 1070 } else if (strcmp(curr_mode, "promiscuous") == 0) { 1071 vswp->smode[vswp->smode_num++] = VSW_LAYER2_PROMISC; 1072 } else if (strcmp(curr_mode, "routed") == 0) { 1073 vswp->smode[vswp->smode_num++] = VSW_LAYER3; 1074 } else { 1075 cmn_err(CE_WARN, "Unknown switch mode %s, setting to" 1076 " default switched mode", curr_mode); 1077 vswp->smode[vswp->smode_num++] = VSW_LAYER2; 1078 } 1079 curr_mode += strlen(curr_mode) + 1; 1080 } 1081 1082 D2(vswp, "%d switching modes specified", vswp->smode_num); 1083 1084 if (vswp->smode_num > 0) 1085 vswp->mdprops |= VSW_MD_SMODE; 1086 1087 md_prop_exit: 1088 (void) md_fini_handle(mdp); 1089 1090 kmem_free(listp, listsz); 1091 1092 D1(vswp, "%s: exit", __func__); 1093 } 1094 1095 /* 1096 * Get the mac address of the physical device. 1097 * 1098 * Returns 0 on success, 1 on failure. 1099 */ 1100 static int 1101 vsw_get_physaddr(vsw_t *vswp) 1102 { 1103 mac_handle_t mh; 1104 char drv[LIFNAMSIZ]; 1105 uint_t ddi_instance; 1106 1107 D1(vswp, "%s: enter", __func__); 1108 1109 if (ddi_parse(vswp->physname, drv, &ddi_instance) != DDI_SUCCESS) 1110 return (1); 1111 1112 if (mac_open(vswp->physname, ddi_instance, &mh) != 0) { 1113 cmn_err(CE_WARN, "!mac_open %s failed", vswp->physname); 1114 return (1); 1115 } 1116 1117 READ_ENTER(&vswp->if_lockrw); 1118 mac_unicst_get(mh, vswp->if_addr.ether_addr_octet); 1119 RW_EXIT(&vswp->if_lockrw); 1120 1121 mac_close(mh); 1122 1123 vswp->mdprops |= VSW_DEV_MACADDR; 1124 1125 D1(vswp, "%s: exit", __func__); 1126 1127 return (0); 1128 } 1129 1130 /* 1131 * Check to see if the card supports the setting of multiple unicst 1132 * addresses. 1133 * 1134 * Returns 0 if card supports the programming of multiple unicast addresses 1135 * and there are free address slots available, otherwise returns 1. 1136 */ 1137 static int 1138 vsw_get_hw_maddr(vsw_t *vswp) 1139 { 1140 D1(vswp, "%s: enter", __func__); 1141 1142 if (vswp->mh == NULL) { 1143 return (1); 1144 } 1145 1146 if (!mac_capab_get(vswp->mh, MAC_CAPAB_MULTIADDRESS, &vswp->maddr)) { 1147 DWARN(vswp, "Unable to get capabilities of" 1148 " underlying device (%s)", vswp->physname); 1149 return (1); 1150 } 1151 1152 if (vswp->maddr.maddr_naddrfree == 0) { 1153 cmn_err(CE_WARN, 1154 "!device %s has no free unicast address slots", 1155 vswp->physname); 1156 return (1); 1157 } 1158 1159 D2(vswp, "%s: %d addrs : %d free", __func__, 1160 vswp->maddr.maddr_naddr, vswp->maddr.maddr_naddrfree); 1161 1162 D1(vswp, "%s: exit", __func__); 1163 1164 return (0); 1165 } 1166 1167 /* 1168 * Setup for layer 2 switching. 1169 * 1170 * Returns 0 on success, 1 on failure. 1171 */ 1172 static int 1173 vsw_setup_layer2(vsw_t *vswp) 1174 { 1175 D1(vswp, "%s: enter", __func__); 1176 1177 vsw_switch_frame = vsw_switch_l2_frame; 1178 1179 /* 1180 * Attempt to link into the MAC layer so we can get 1181 * and send packets out over the physical adapter. 1182 */ 1183 if (vswp->mdprops & VSW_MD_PHYSNAME) { 1184 if (vsw_mac_attach(vswp) != 0) { 1185 /* 1186 * Registration with the MAC layer has failed, 1187 * so return 1 so that can fall back to next 1188 * prefered switching method. 1189 */ 1190 cmn_err(CE_WARN, "!Unable to join as MAC layer " 1191 "client"); 1192 return (1); 1193 } 1194 1195 if (vswp->smode[vswp->smode_idx] == VSW_LAYER2) { 1196 /* 1197 * Verify that underlying device can support multiple 1198 * unicast mac addresses, and has free capacity. 1199 */ 1200 if (vsw_get_hw_maddr(vswp) != 0) { 1201 cmn_err(CE_WARN, "!unable to setup switching"); 1202 vsw_mac_detach(vswp); 1203 return (1); 1204 } 1205 } 1206 1207 } else { 1208 /* 1209 * No physical device name found in MD which is 1210 * required for layer 2. 1211 */ 1212 cmn_err(CE_WARN, "!no physical device name specified"); 1213 return (1); 1214 } 1215 1216 D1(vswp, "%s: exit", __func__); 1217 1218 return (0); 1219 } 1220 1221 static int 1222 vsw_setup_layer3(vsw_t *vswp) 1223 { 1224 D1(vswp, "%s: enter", __func__); 1225 1226 D2(vswp, "%s: operating in layer 3 mode", __func__); 1227 vsw_switch_frame = vsw_switch_l3_frame; 1228 1229 D1(vswp, "%s: exit", __func__); 1230 1231 return (0); 1232 } 1233 1234 /* 1235 * Link into the MAC layer to gain access to the services provided by 1236 * the underlying physical device driver (which should also have 1237 * registered with the MAC layer). 1238 * 1239 * Only when in layer 2 mode. 1240 */ 1241 static int 1242 vsw_mac_attach(vsw_t *vswp) 1243 { 1244 char drv[LIFNAMSIZ]; 1245 uint_t ddi_instance; 1246 1247 D1(vswp, "%s: enter", __func__); 1248 1249 vswp->mh = NULL; 1250 vswp->mrh = NULL; 1251 vswp->mstarted = B_FALSE; 1252 vswp->mresources = B_FALSE; 1253 1254 ASSERT(vswp->mdprops & VSW_MD_PHYSNAME); 1255 1256 if (ddi_parse(vswp->physname, drv, &ddi_instance) != DDI_SUCCESS) { 1257 cmn_err(CE_WARN, "invalid device name: %s", vswp->physname); 1258 goto mac_fail_exit; 1259 } 1260 if ((mac_open(vswp->physname, ddi_instance, &vswp->mh)) != 0) { 1261 cmn_err(CE_WARN, "mac_open %s failed", vswp->physname); 1262 goto mac_fail_exit; 1263 } 1264 1265 ASSERT(vswp->mh != NULL); 1266 1267 D2(vswp, "vsw_mac_attach: using device %s", vswp->physname); 1268 1269 if (vsw_multi_ring_enable) { 1270 vsw_mac_ring_tbl_init(vswp); 1271 1272 /* 1273 * Register our receive callback. 1274 */ 1275 vswp->mrh = mac_rx_add(vswp->mh, 1276 vsw_rx_queue_cb, (void *)vswp); 1277 1278 /* 1279 * Register our mac resource callback. 1280 */ 1281 mac_resource_set(vswp->mh, vsw_mac_ring_add_cb, (void *)vswp); 1282 vswp->mresources = B_TRUE; 1283 1284 /* 1285 * Get the ring resources available to us from 1286 * the mac below us. 1287 */ 1288 mac_resources(vswp->mh); 1289 } else { 1290 /* 1291 * Just register our rx callback function 1292 */ 1293 vswp->mrh = mac_rx_add(vswp->mh, vsw_rx_cb, (void *)vswp); 1294 } 1295 1296 ASSERT(vswp->mrh != NULL); 1297 1298 /* Get the MAC tx fn */ 1299 vswp->txinfo = mac_tx_get(vswp->mh); 1300 1301 /* start the interface */ 1302 if (mac_start(vswp->mh) != 0) { 1303 cmn_err(CE_WARN, "could not start mac interface"); 1304 goto mac_fail_exit; 1305 } 1306 1307 vswp->mstarted = B_TRUE; 1308 1309 D1(vswp, "%s: exit", __func__); 1310 return (0); 1311 1312 mac_fail_exit: 1313 vsw_mac_detach(vswp); 1314 1315 D1(vswp, "%s: exit", __func__); 1316 return (1); 1317 } 1318 1319 static void 1320 vsw_mac_detach(vsw_t *vswp) 1321 { 1322 D1(vswp, "vsw_mac_detach: enter"); 1323 1324 ASSERT(vswp != NULL); 1325 ASSERT(vswp->mh != NULL); 1326 1327 if (vsw_multi_ring_enable) { 1328 vsw_mac_ring_tbl_destroy(vswp); 1329 } 1330 1331 if (vswp->mstarted) 1332 mac_stop(vswp->mh); 1333 if (vswp->mrh != NULL) 1334 mac_rx_remove(vswp->mh, vswp->mrh); 1335 if (vswp->mresources) 1336 mac_resource_set(vswp->mh, NULL, NULL); 1337 mac_close(vswp->mh); 1338 1339 vswp->mrh = NULL; 1340 vswp->mh = NULL; 1341 vswp->txinfo = NULL; 1342 vswp->mstarted = B_FALSE; 1343 1344 D1(vswp, "vsw_mac_detach: exit"); 1345 } 1346 1347 /* 1348 * Depending on the mode specified, the capabilites and capacity 1349 * of the underlying device setup the physical device. 1350 * 1351 * If in layer 3 mode, then do nothing. 1352 * 1353 * If in layer 2 programmed mode attempt to program the unicast address 1354 * associated with the port into the physical device. If this is not 1355 * possible due to resource exhaustion or simply because the device does 1356 * not support multiple unicast addresses then if required fallback onto 1357 * putting the card into promisc mode. 1358 * 1359 * If in promisc mode then simply set the card into promisc mode. 1360 * 1361 * Returns 0 success, 1 on failure. 1362 */ 1363 static int 1364 vsw_set_hw(vsw_t *vswp, vsw_port_t *port) 1365 { 1366 mac_multi_addr_t mac_addr; 1367 void *mah; 1368 int err; 1369 1370 D1(vswp, "%s: enter", __func__); 1371 1372 if (vswp->smode[vswp->smode_idx] == VSW_LAYER3) 1373 return (0); 1374 1375 if (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC) { 1376 return (vsw_set_hw_promisc(vswp, port)); 1377 } 1378 1379 if (vswp->maddr.maddr_handle == NULL) 1380 return (1); 1381 1382 mah = vswp->maddr.maddr_handle; 1383 1384 /* 1385 * Attempt to program the unicast address into the HW. 1386 */ 1387 mac_addr.mma_addrlen = ETHERADDRL; 1388 ether_copy(&port->p_macaddr, &mac_addr.mma_addr); 1389 1390 err = vswp->maddr.maddr_add(mah, &mac_addr); 1391 if (err != 0) { 1392 cmn_err(CE_WARN, "!failed to program addr " 1393 "%x:%x:%x:%x:%x:%x for port %d into device %s " 1394 ": err %d", port->p_macaddr.ether_addr_octet[0], 1395 port->p_macaddr.ether_addr_octet[1], 1396 port->p_macaddr.ether_addr_octet[2], 1397 port->p_macaddr.ether_addr_octet[3], 1398 port->p_macaddr.ether_addr_octet[4], 1399 port->p_macaddr.ether_addr_octet[5], 1400 port->p_instance, vswp->physname, err); 1401 1402 /* 1403 * Mark that attempt should be made to re-config sometime 1404 * in future if a port is deleted. 1405 */ 1406 vswp->recfg_reqd = B_TRUE; 1407 1408 /* 1409 * Only 1 mode specified, nothing more to do. 1410 */ 1411 if (vswp->smode_num == 1) 1412 return (err); 1413 1414 /* 1415 * If promiscuous was next mode specified try to 1416 * set the card into that mode. 1417 */ 1418 if ((vswp->smode_idx <= (vswp->smode_num - 2)) && 1419 (vswp->smode[vswp->smode_idx + 1] 1420 == VSW_LAYER2_PROMISC)) { 1421 vswp->smode_idx += 1; 1422 return (vsw_set_hw_promisc(vswp, port)); 1423 } 1424 return (err); 1425 } 1426 1427 port->addr_slot = mac_addr.mma_slot; 1428 port->addr_set = VSW_ADDR_HW; 1429 1430 D2(vswp, "programmed addr %x:%x:%x:%x:%x:%x for port %d " 1431 "into slot %d of device %s", 1432 port->p_macaddr.ether_addr_octet[0], 1433 port->p_macaddr.ether_addr_octet[1], 1434 port->p_macaddr.ether_addr_octet[2], 1435 port->p_macaddr.ether_addr_octet[3], 1436 port->p_macaddr.ether_addr_octet[4], 1437 port->p_macaddr.ether_addr_octet[5], 1438 port->p_instance, port->addr_slot, vswp->physname); 1439 1440 D1(vswp, "%s: exit", __func__); 1441 1442 return (0); 1443 } 1444 1445 /* 1446 * If in layer 3 mode do nothing. 1447 * 1448 * If in layer 2 switched mode remove the address from the physical 1449 * device. 1450 * 1451 * If in layer 2 promiscuous mode disable promisc mode. 1452 * 1453 * Returns 0 on success. 1454 */ 1455 static int 1456 vsw_unset_hw(vsw_t *vswp, vsw_port_t *port) 1457 { 1458 int err; 1459 void *mah; 1460 1461 D1(vswp, "%s: enter", __func__); 1462 1463 if (vswp->smode[vswp->smode_idx] == VSW_LAYER3) 1464 return (0); 1465 1466 if (port->addr_set == VSW_ADDR_PROMISC) { 1467 return (vsw_unset_hw_promisc(vswp, port)); 1468 } 1469 1470 if (port->addr_set == VSW_ADDR_HW) { 1471 if (vswp->mh == NULL) 1472 return (1); 1473 1474 if (vswp->maddr.maddr_handle == NULL) 1475 return (1); 1476 1477 mah = vswp->maddr.maddr_handle; 1478 1479 err = vswp->maddr.maddr_remove(mah, port->addr_slot); 1480 if (err != 0) { 1481 cmn_err(CE_WARN, "!Unable to remove addr " 1482 "%x:%x:%x:%x:%x:%x for port %d from device %s" 1483 " : (err %d)", 1484 port->p_macaddr.ether_addr_octet[0], 1485 port->p_macaddr.ether_addr_octet[1], 1486 port->p_macaddr.ether_addr_octet[2], 1487 port->p_macaddr.ether_addr_octet[3], 1488 port->p_macaddr.ether_addr_octet[4], 1489 port->p_macaddr.ether_addr_octet[5], 1490 port->p_instance, vswp->physname, err); 1491 return (err); 1492 } 1493 1494 port->addr_set = VSW_ADDR_UNSET; 1495 1496 D2(vswp, "removed addr %x:%x:%x:%x:%x:%x for " 1497 "port %d from device %s", 1498 port->p_macaddr.ether_addr_octet[0], 1499 port->p_macaddr.ether_addr_octet[1], 1500 port->p_macaddr.ether_addr_octet[2], 1501 port->p_macaddr.ether_addr_octet[3], 1502 port->p_macaddr.ether_addr_octet[4], 1503 port->p_macaddr.ether_addr_octet[5], 1504 port->p_instance, vswp->physname); 1505 } 1506 1507 D1(vswp, "%s: exit", __func__); 1508 return (0); 1509 } 1510 1511 /* 1512 * Set network card into promisc mode. 1513 * 1514 * Returns 0 on success, 1 on failure. 1515 */ 1516 static int 1517 vsw_set_hw_promisc(vsw_t *vswp, vsw_port_t *port) 1518 { 1519 D1(vswp, "%s: enter", __func__); 1520 1521 if (vswp->mh == NULL) 1522 return (1); 1523 1524 if (vswp->promisc_cnt++ == 0) { 1525 if (mac_promisc_set(vswp->mh, B_TRUE, MAC_DEVPROMISC) != 0) { 1526 vswp->promisc_cnt--; 1527 return (1); 1528 } 1529 cmn_err(CE_NOTE, "!switching device %s into promiscuous mode", 1530 vswp->physname); 1531 } 1532 port->addr_set = VSW_ADDR_PROMISC; 1533 1534 D1(vswp, "%s: exit", __func__); 1535 1536 return (0); 1537 } 1538 1539 /* 1540 * Turn off promiscuous mode on network card. 1541 * 1542 * Returns 0 on success, 1 on failure. 1543 */ 1544 static int 1545 vsw_unset_hw_promisc(vsw_t *vswp, vsw_port_t *port) 1546 { 1547 vsw_port_list_t *plist = &vswp->plist; 1548 1549 D1(vswp, "%s: enter", __func__); 1550 1551 if (vswp->mh == NULL) 1552 return (1); 1553 1554 ASSERT(port->addr_set == VSW_ADDR_PROMISC); 1555 1556 if (--vswp->promisc_cnt == 0) { 1557 if (mac_promisc_set(vswp->mh, B_FALSE, MAC_DEVPROMISC) != 0) { 1558 vswp->promisc_cnt++; 1559 return (1); 1560 } 1561 1562 /* 1563 * We are exiting promisc mode either because we were 1564 * only in promisc mode because we had failed over from 1565 * switched mode due to HW resource issues, or the user 1566 * wanted the card in promisc mode for all the ports and 1567 * the last port is now being deleted. Tweak the message 1568 * accordingly. 1569 */ 1570 if (plist->num_ports != 0) { 1571 cmn_err(CE_NOTE, "!switching device %s back to " 1572 "programmed mode", vswp->physname); 1573 } else { 1574 cmn_err(CE_NOTE, "!switching device %s out of " 1575 "promiscuous mode", vswp->physname); 1576 } 1577 } 1578 port->addr_set = VSW_ADDR_UNSET; 1579 1580 D1(vswp, "%s: exit", __func__); 1581 return (0); 1582 } 1583 1584 /* 1585 * Determine whether or not we are operating in our prefered 1586 * mode and if not whether the physical resources now allow us 1587 * to operate in it. 1588 * 1589 * Should only be invoked after port which is being deleted has been 1590 * removed from the port list. 1591 */ 1592 static int 1593 vsw_reconfig_hw(vsw_t *vswp) 1594 { 1595 vsw_port_list_t *plist = &vswp->plist; 1596 mac_multi_addr_t mac_addr; 1597 vsw_port_t *tp; 1598 void *mah; 1599 int rv = 0; 1600 int s_idx; 1601 1602 D1(vswp, "%s: enter", __func__); 1603 1604 if (vswp->maddr.maddr_handle == NULL) 1605 return (1); 1606 1607 /* 1608 * Check if there are now sufficient HW resources to 1609 * attempt a re-config. 1610 */ 1611 if (plist->num_ports > vswp->maddr.maddr_naddrfree) 1612 return (1); 1613 1614 /* 1615 * If we are in layer 2 (i.e. switched) or would like to be 1616 * in layer 2 then check if any ports need to be programmed 1617 * into the HW. 1618 * 1619 * This can happen in two cases - switched was specified as 1620 * the prefered mode of operation but we exhausted the HW 1621 * resources and so failed over to the next specifed mode, 1622 * or switched was the only mode specified so after HW 1623 * resources were exhausted there was nothing more we 1624 * could do. 1625 */ 1626 if (vswp->smode_idx > 0) 1627 s_idx = vswp->smode_idx - 1; 1628 else 1629 s_idx = vswp->smode_idx; 1630 1631 if (vswp->smode[s_idx] == VSW_LAYER2) { 1632 mah = vswp->maddr.maddr_handle; 1633 1634 D2(vswp, "%s: attempting reconfig..", __func__); 1635 1636 /* 1637 * Scan the port list for any port whose address has not 1638 * be programmed in HW - there should be a max of one. 1639 */ 1640 for (tp = plist->head; tp != NULL; tp = tp->p_next) { 1641 if (tp->addr_set != VSW_ADDR_HW) { 1642 mac_addr.mma_addrlen = ETHERADDRL; 1643 ether_copy(&tp->p_macaddr, &mac_addr.mma_addr); 1644 1645 rv = vswp->maddr.maddr_add(mah, &mac_addr); 1646 if (rv != 0) { 1647 DWARN(vswp, "Error setting addr in " 1648 "HW for port %d err %d", 1649 tp->p_instance, rv); 1650 goto reconfig_err_exit; 1651 } 1652 tp->addr_slot = mac_addr.mma_slot; 1653 1654 D2(vswp, "re-programmed port %d " 1655 "addr %x:%x:%x:%x:%x:%x into slot %d" 1656 " of device %s", tp->p_instance, 1657 tp->p_macaddr.ether_addr_octet[0], 1658 tp->p_macaddr.ether_addr_octet[1], 1659 tp->p_macaddr.ether_addr_octet[2], 1660 tp->p_macaddr.ether_addr_octet[3], 1661 tp->p_macaddr.ether_addr_octet[4], 1662 tp->p_macaddr.ether_addr_octet[5], 1663 tp->addr_slot, vswp->physname); 1664 1665 /* 1666 * If up to now we had to put the card into 1667 * promisc mode to see this address, we 1668 * can now safely disable promisc mode. 1669 */ 1670 if (tp->addr_set == VSW_ADDR_PROMISC) 1671 (void) vsw_unset_hw_promisc(vswp, tp); 1672 1673 tp->addr_set = VSW_ADDR_HW; 1674 } 1675 } 1676 1677 /* no further re-config needed */ 1678 vswp->recfg_reqd = B_FALSE; 1679 1680 vswp->smode_idx = s_idx; 1681 1682 return (0); 1683 } 1684 1685 reconfig_err_exit: 1686 return (rv); 1687 } 1688 1689 static void 1690 vsw_mac_ring_tbl_entry_init(vsw_t *vswp, vsw_mac_ring_t *ringp) 1691 { 1692 ringp->ring_state = VSW_MAC_RING_FREE; 1693 ringp->ring_arg = NULL; 1694 ringp->ring_blank = NULL; 1695 ringp->ring_vqp = NULL; 1696 ringp->ring_vswp = vswp; 1697 } 1698 1699 static void 1700 vsw_mac_ring_tbl_init(vsw_t *vswp) 1701 { 1702 int i; 1703 1704 mutex_init(&vswp->mac_ring_lock, NULL, MUTEX_DRIVER, NULL); 1705 1706 vswp->mac_ring_tbl_sz = vsw_mac_rx_rings; 1707 vswp->mac_ring_tbl = 1708 kmem_alloc(vsw_mac_rx_rings * sizeof (vsw_mac_ring_t), 1709 KM_SLEEP); 1710 1711 for (i = 0; i < vswp->mac_ring_tbl_sz; i++) 1712 vsw_mac_ring_tbl_entry_init(vswp, &vswp->mac_ring_tbl[i]); 1713 } 1714 1715 static void 1716 vsw_mac_ring_tbl_destroy(vsw_t *vswp) 1717 { 1718 int i; 1719 1720 mutex_enter(&vswp->mac_ring_lock); 1721 for (i = 0; i < vswp->mac_ring_tbl_sz; i++) { 1722 if (vswp->mac_ring_tbl[i].ring_state != VSW_MAC_RING_FREE) { 1723 /* 1724 * Destroy the queue. 1725 */ 1726 vsw_queue_stop(vswp->mac_ring_tbl[i].ring_vqp); 1727 vsw_queue_destroy(vswp->mac_ring_tbl[i].ring_vqp); 1728 1729 /* 1730 * Re-initialize the structure. 1731 */ 1732 vsw_mac_ring_tbl_entry_init(vswp, 1733 &vswp->mac_ring_tbl[i]); 1734 } 1735 } 1736 mutex_exit(&vswp->mac_ring_lock); 1737 1738 mutex_destroy(&vswp->mac_ring_lock); 1739 kmem_free(vswp->mac_ring_tbl, 1740 vswp->mac_ring_tbl_sz * sizeof (vsw_mac_ring_t)); 1741 vswp->mac_ring_tbl_sz = 0; 1742 } 1743 1744 /* 1745 * Handle resource add callbacks from the driver below. 1746 */ 1747 static mac_resource_handle_t 1748 vsw_mac_ring_add_cb(void *arg, mac_resource_t *mrp) 1749 { 1750 vsw_t *vswp = (vsw_t *)arg; 1751 mac_rx_fifo_t *mrfp = (mac_rx_fifo_t *)mrp; 1752 vsw_mac_ring_t *ringp; 1753 vsw_queue_t *vqp; 1754 int i; 1755 1756 ASSERT(vswp != NULL); 1757 ASSERT(mrp != NULL); 1758 ASSERT(vswp->mac_ring_tbl != NULL); 1759 1760 D1(vswp, "%s: enter", __func__); 1761 1762 /* 1763 * Check to make sure we have the correct resource type. 1764 */ 1765 if (mrp->mr_type != MAC_RX_FIFO) 1766 return (NULL); 1767 1768 /* 1769 * Find a open entry in the ring table. 1770 */ 1771 mutex_enter(&vswp->mac_ring_lock); 1772 for (i = 0; i < vswp->mac_ring_tbl_sz; i++) { 1773 ringp = &vswp->mac_ring_tbl[i]; 1774 1775 /* 1776 * Check for an empty slot, if found, then setup queue 1777 * and thread. 1778 */ 1779 if (ringp->ring_state == VSW_MAC_RING_FREE) { 1780 /* 1781 * Create the queue for this ring. 1782 */ 1783 vqp = vsw_queue_create(); 1784 1785 /* 1786 * Initialize the ring data structure. 1787 */ 1788 ringp->ring_vqp = vqp; 1789 ringp->ring_arg = mrfp->mrf_arg; 1790 ringp->ring_blank = mrfp->mrf_blank; 1791 ringp->ring_state = VSW_MAC_RING_INUSE; 1792 1793 /* 1794 * Create the worker thread. 1795 */ 1796 vqp->vq_worker = thread_create(NULL, 0, 1797 vsw_queue_worker, ringp, 0, &p0, 1798 TS_RUN, minclsyspri); 1799 if (vqp->vq_worker == NULL) { 1800 vsw_queue_destroy(vqp); 1801 vsw_mac_ring_tbl_entry_init(vswp, ringp); 1802 ringp = NULL; 1803 } 1804 1805 mutex_exit(&vswp->mac_ring_lock); 1806 D1(vswp, "%s: exit", __func__); 1807 return ((mac_resource_handle_t)ringp); 1808 } 1809 } 1810 mutex_exit(&vswp->mac_ring_lock); 1811 1812 /* 1813 * No slots in the ring table available. 1814 */ 1815 D1(vswp, "%s: exit", __func__); 1816 return (NULL); 1817 } 1818 1819 static void 1820 vsw_queue_stop(vsw_queue_t *vqp) 1821 { 1822 mutex_enter(&vqp->vq_lock); 1823 1824 if (vqp->vq_state == VSW_QUEUE_RUNNING) { 1825 vqp->vq_state = VSW_QUEUE_STOP; 1826 cv_signal(&vqp->vq_cv); 1827 1828 while (vqp->vq_state != VSW_QUEUE_DRAINED) 1829 cv_wait(&vqp->vq_cv, &vqp->vq_lock); 1830 } 1831 1832 mutex_exit(&vqp->vq_lock); 1833 } 1834 1835 static vsw_queue_t * 1836 vsw_queue_create() 1837 { 1838 vsw_queue_t *vqp; 1839 1840 vqp = kmem_zalloc(sizeof (vsw_queue_t), KM_SLEEP); 1841 1842 mutex_init(&vqp->vq_lock, NULL, MUTEX_DRIVER, NULL); 1843 cv_init(&vqp->vq_cv, NULL, CV_DRIVER, NULL); 1844 vqp->vq_first = NULL; 1845 vqp->vq_last = NULL; 1846 vqp->vq_state = VSW_QUEUE_STOP; 1847 1848 return (vqp); 1849 } 1850 1851 static void 1852 vsw_queue_destroy(vsw_queue_t *vqp) 1853 { 1854 cv_destroy(&vqp->vq_cv); 1855 mutex_destroy(&vqp->vq_lock); 1856 kmem_free(vqp, sizeof (vsw_queue_t)); 1857 } 1858 1859 static void 1860 vsw_queue_worker(vsw_mac_ring_t *rrp) 1861 { 1862 mblk_t *mp; 1863 vsw_queue_t *vqp = rrp->ring_vqp; 1864 vsw_t *vswp = rrp->ring_vswp; 1865 1866 mutex_enter(&vqp->vq_lock); 1867 1868 ASSERT(vqp->vq_state == VSW_QUEUE_STOP); 1869 1870 /* 1871 * Set the state to running, since the thread is now active. 1872 */ 1873 vqp->vq_state = VSW_QUEUE_RUNNING; 1874 1875 while (vqp->vq_state == VSW_QUEUE_RUNNING) { 1876 /* 1877 * Wait for work to do or the state has changed 1878 * to not running. 1879 */ 1880 while ((vqp->vq_state == VSW_QUEUE_RUNNING) && 1881 (vqp->vq_first == NULL)) { 1882 cv_wait(&vqp->vq_cv, &vqp->vq_lock); 1883 } 1884 1885 /* 1886 * Process packets that we received from the interface. 1887 */ 1888 if (vqp->vq_first != NULL) { 1889 mp = vqp->vq_first; 1890 1891 vqp->vq_first = NULL; 1892 vqp->vq_last = NULL; 1893 1894 mutex_exit(&vqp->vq_lock); 1895 1896 /* switch the chain of packets received */ 1897 vsw_switch_frame(vswp, mp, VSW_PHYSDEV, NULL, NULL); 1898 1899 mutex_enter(&vqp->vq_lock); 1900 } 1901 } 1902 1903 /* 1904 * We are drained and signal we are done. 1905 */ 1906 vqp->vq_state = VSW_QUEUE_DRAINED; 1907 cv_signal(&vqp->vq_cv); 1908 1909 /* 1910 * Exit lock and drain the remaining packets. 1911 */ 1912 mutex_exit(&vqp->vq_lock); 1913 1914 /* 1915 * Exit the thread 1916 */ 1917 thread_exit(); 1918 } 1919 1920 /* 1921 * static void 1922 * vsw_rx_queue_cb() - Receive callback routine when 1923 * vsw_multi_ring_enable is non-zero. Queue the packets 1924 * to a packet queue for a worker thread to process. 1925 */ 1926 static void 1927 vsw_rx_queue_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp) 1928 { 1929 vsw_mac_ring_t *ringp = (vsw_mac_ring_t *)mrh; 1930 vsw_t *vswp = (vsw_t *)arg; 1931 vsw_queue_t *vqp; 1932 mblk_t *bp, *last; 1933 1934 ASSERT(mrh != NULL); 1935 ASSERT(vswp != NULL); 1936 ASSERT(mp != NULL); 1937 1938 D1(vswp, "%s: enter", __func__); 1939 1940 /* 1941 * Find the last element in the mblk chain. 1942 */ 1943 bp = mp; 1944 do { 1945 last = bp; 1946 bp = bp->b_next; 1947 } while (bp != NULL); 1948 1949 /* Get the queue for the packets */ 1950 vqp = ringp->ring_vqp; 1951 1952 /* 1953 * Grab the lock such we can queue the packets. 1954 */ 1955 mutex_enter(&vqp->vq_lock); 1956 1957 if (vqp->vq_state != VSW_QUEUE_RUNNING) { 1958 freemsg(mp); 1959 goto vsw_rx_queue_cb_exit; 1960 } 1961 1962 /* 1963 * Add the mblk chain to the queue. If there 1964 * is some mblks in the queue, then add the new 1965 * chain to the end. 1966 */ 1967 if (vqp->vq_first == NULL) 1968 vqp->vq_first = mp; 1969 else 1970 vqp->vq_last->b_next = mp; 1971 1972 vqp->vq_last = last; 1973 1974 /* 1975 * Signal the worker thread that there is work to 1976 * do. 1977 */ 1978 cv_signal(&vqp->vq_cv); 1979 1980 /* 1981 * Let go of the lock and exit. 1982 */ 1983 vsw_rx_queue_cb_exit: 1984 mutex_exit(&vqp->vq_lock); 1985 D1(vswp, "%s: exit", __func__); 1986 } 1987 1988 /* 1989 * receive callback routine. Invoked by MAC layer when there 1990 * are pkts being passed up from physical device. 1991 * 1992 * PERF: It may be more efficient when the card is in promisc 1993 * mode to check the dest address of the pkts here (against 1994 * the FDB) rather than checking later. Needs to be investigated. 1995 */ 1996 static void 1997 vsw_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp) 1998 { 1999 _NOTE(ARGUNUSED(mrh)) 2000 2001 vsw_t *vswp = (vsw_t *)arg; 2002 2003 ASSERT(vswp != NULL); 2004 2005 D1(vswp, "vsw_rx_cb: enter"); 2006 2007 /* switch the chain of packets received */ 2008 vsw_switch_frame(vswp, mp, VSW_PHYSDEV, NULL, NULL); 2009 2010 D1(vswp, "vsw_rx_cb: exit"); 2011 } 2012 2013 /* 2014 * Send a message out over the physical device via the MAC layer. 2015 * 2016 * Returns any mblks that it was unable to transmit. 2017 */ 2018 static mblk_t * 2019 vsw_tx_msg(vsw_t *vswp, mblk_t *mp) 2020 { 2021 const mac_txinfo_t *mtp; 2022 mblk_t *nextp; 2023 2024 if (vswp->mh == NULL) { 2025 DERR(vswp, "vsw_tx_msg: dropping pkts: no tx routine avail"); 2026 return (mp); 2027 } else { 2028 for (;;) { 2029 nextp = mp->b_next; 2030 mp->b_next = NULL; 2031 2032 mtp = vswp->txinfo; 2033 if ((mp = mtp->mt_fn(mtp->mt_arg, mp)) != NULL) { 2034 mp->b_next = nextp; 2035 break; 2036 } 2037 2038 if ((mp = nextp) == NULL) 2039 break; 2040 2041 } 2042 2043 } 2044 2045 return (mp); 2046 } 2047 2048 /* 2049 * Register with the MAC layer as a network device, so we 2050 * can be plumbed if necessary. 2051 */ 2052 static int 2053 vsw_mac_register(vsw_t *vswp) 2054 { 2055 mac_register_t *macp; 2056 int rv; 2057 2058 D1(vswp, "%s: enter", __func__); 2059 2060 if ((macp = mac_alloc(MAC_VERSION)) == NULL) 2061 return (EINVAL); 2062 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 2063 macp->m_driver = vswp; 2064 macp->m_dip = vswp->dip; 2065 macp->m_src_addr = (uint8_t *)&vswp->if_addr; 2066 macp->m_callbacks = &vsw_m_callbacks; 2067 macp->m_min_sdu = 0; 2068 macp->m_max_sdu = ETHERMTU; 2069 rv = mac_register(macp, &vswp->if_mh); 2070 mac_free(macp); 2071 if (rv == 0) 2072 vswp->if_state |= VSW_IF_REG; 2073 2074 D1(vswp, "%s: exit", __func__); 2075 2076 return (rv); 2077 } 2078 2079 static int 2080 vsw_mac_unregister(vsw_t *vswp) 2081 { 2082 int rv = 0; 2083 2084 D1(vswp, "%s: enter", __func__); 2085 2086 WRITE_ENTER(&vswp->if_lockrw); 2087 2088 if (vswp->if_state & VSW_IF_REG) { 2089 rv = mac_unregister(vswp->if_mh); 2090 if (rv != 0) { 2091 DWARN(vswp, "%s: unable to unregister from MAC " 2092 "framework", __func__); 2093 2094 RW_EXIT(&vswp->if_lockrw); 2095 D1(vswp, "%s: fail exit", __func__); 2096 return (rv); 2097 } 2098 2099 /* mark i/f as down and unregistered */ 2100 vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG); 2101 } 2102 RW_EXIT(&vswp->if_lockrw); 2103 2104 vswp->mdprops &= ~(VSW_MD_MACADDR | VSW_DEV_MACADDR); 2105 2106 D1(vswp, "%s: exit", __func__); 2107 2108 return (rv); 2109 } 2110 2111 static int 2112 vsw_m_stat(void *arg, uint_t stat, uint64_t *val) 2113 { 2114 vsw_t *vswp = (vsw_t *)arg; 2115 2116 D1(vswp, "%s: enter", __func__); 2117 2118 if (vswp->mh == NULL) 2119 return (EINVAL); 2120 2121 /* return stats from underlying device */ 2122 *val = mac_stat_get(vswp->mh, stat); 2123 return (0); 2124 } 2125 2126 static void 2127 vsw_m_stop(void *arg) 2128 { 2129 vsw_t *vswp = (vsw_t *)arg; 2130 2131 D1(vswp, "%s: enter", __func__); 2132 2133 WRITE_ENTER(&vswp->if_lockrw); 2134 vswp->if_state &= ~VSW_IF_UP; 2135 RW_EXIT(&vswp->if_lockrw); 2136 2137 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 2138 } 2139 2140 static int 2141 vsw_m_start(void *arg) 2142 { 2143 vsw_t *vswp = (vsw_t *)arg; 2144 2145 D1(vswp, "%s: enter", __func__); 2146 2147 WRITE_ENTER(&vswp->if_lockrw); 2148 vswp->if_state |= VSW_IF_UP; 2149 RW_EXIT(&vswp->if_lockrw); 2150 2151 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 2152 return (0); 2153 } 2154 2155 /* 2156 * Change the local interface address. 2157 */ 2158 static int 2159 vsw_m_unicst(void *arg, const uint8_t *macaddr) 2160 { 2161 vsw_t *vswp = (vsw_t *)arg; 2162 2163 D1(vswp, "%s: enter", __func__); 2164 2165 WRITE_ENTER(&vswp->if_lockrw); 2166 ether_copy(macaddr, &vswp->if_addr); 2167 RW_EXIT(&vswp->if_lockrw); 2168 2169 D1(vswp, "%s: exit", __func__); 2170 2171 return (0); 2172 } 2173 2174 static int 2175 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca) 2176 { 2177 vsw_t *vswp = (vsw_t *)arg; 2178 mcst_addr_t *mcst_p = NULL; 2179 uint64_t addr = 0x0; 2180 int i, ret = 0; 2181 2182 D1(vswp, "%s: enter", __func__); 2183 2184 /* 2185 * Convert address into form that can be used 2186 * as hash table key. 2187 */ 2188 for (i = 0; i < ETHERADDRL; i++) { 2189 addr = (addr << 8) | mca[i]; 2190 } 2191 2192 D2(vswp, "%s: addr = 0x%llx", __func__, addr); 2193 2194 if (add) { 2195 D2(vswp, "%s: adding multicast", __func__); 2196 if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 2197 /* 2198 * Update the list of multicast addresses 2199 * contained within the vsw_t structure to 2200 * include this new one. 2201 */ 2202 mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP); 2203 if (mcst_p == NULL) { 2204 DERR(vswp, "%s unable to alloc mem", __func__); 2205 return (1); 2206 } 2207 mcst_p->addr = addr; 2208 2209 mutex_enter(&vswp->mca_lock); 2210 mcst_p->nextp = vswp->mcap; 2211 vswp->mcap = mcst_p; 2212 mutex_exit(&vswp->mca_lock); 2213 2214 /* 2215 * Call into the underlying driver to program the 2216 * address into HW. 2217 */ 2218 if (vswp->mh != NULL) { 2219 ret = mac_multicst_add(vswp->mh, mca); 2220 if (ret != 0) { 2221 cmn_err(CE_WARN, "!unable to add " 2222 "multicast address"); 2223 goto vsw_remove_addr; 2224 } 2225 } 2226 } else { 2227 cmn_err(CE_WARN, "!unable to add multicast address"); 2228 } 2229 return (ret); 2230 } 2231 2232 vsw_remove_addr: 2233 2234 D2(vswp, "%s: removing multicast", __func__); 2235 /* 2236 * Remove the address from the hash table.. 2237 */ 2238 if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 2239 2240 /* 2241 * ..and then from the list maintained in the 2242 * vsw_t structure. 2243 */ 2244 vsw_del_addr(VSW_LOCALDEV, vswp, addr); 2245 2246 if (vswp->mh != NULL) 2247 (void) mac_multicst_remove(vswp->mh, mca); 2248 } 2249 2250 D1(vswp, "%s: exit", __func__); 2251 2252 return (0); 2253 } 2254 2255 static int 2256 vsw_m_promisc(void *arg, boolean_t on) 2257 { 2258 vsw_t *vswp = (vsw_t *)arg; 2259 2260 D1(vswp, "%s: enter", __func__); 2261 2262 WRITE_ENTER(&vswp->if_lockrw); 2263 if (on) 2264 vswp->if_state |= VSW_IF_PROMISC; 2265 else 2266 vswp->if_state &= ~VSW_IF_PROMISC; 2267 RW_EXIT(&vswp->if_lockrw); 2268 2269 D1(vswp, "%s: exit", __func__); 2270 2271 return (0); 2272 } 2273 2274 static mblk_t * 2275 vsw_m_tx(void *arg, mblk_t *mp) 2276 { 2277 vsw_t *vswp = (vsw_t *)arg; 2278 2279 D1(vswp, "%s: enter", __func__); 2280 2281 vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL); 2282 2283 D1(vswp, "%s: exit", __func__); 2284 2285 return (NULL); 2286 } 2287 2288 /* 2289 * Register for machine description (MD) updates. 2290 */ 2291 static void 2292 vsw_mdeg_register(vsw_t *vswp) 2293 { 2294 mdeg_prop_spec_t *pspecp; 2295 mdeg_node_spec_t *inst_specp; 2296 mdeg_handle_t mdeg_hdl; 2297 size_t templatesz; 2298 int inst, rv; 2299 2300 D1(vswp, "%s: enter", __func__); 2301 2302 inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip, 2303 DDI_PROP_DONTPASS, reg_propname, -1); 2304 if (inst == -1) { 2305 DERR(vswp, "%s: unable to get %s property", 2306 __func__, reg_propname); 2307 return; 2308 } 2309 2310 D2(vswp, "%s: instance %d registering with mdeg", __func__, inst); 2311 2312 /* 2313 * Allocate and initialize a per-instance copy 2314 * of the global property spec array that will 2315 * uniquely identify this vsw instance. 2316 */ 2317 templatesz = sizeof (vsw_prop_template); 2318 pspecp = kmem_zalloc(templatesz, KM_SLEEP); 2319 2320 bcopy(vsw_prop_template, pspecp, templatesz); 2321 2322 VSW_SET_MDEG_PROP_INST(pspecp, inst); 2323 2324 /* initialize the complete prop spec structure */ 2325 inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP); 2326 inst_specp->namep = "virtual-device"; 2327 inst_specp->specp = pspecp; 2328 2329 /* perform the registration */ 2330 rv = mdeg_register(inst_specp, &vport_match, vsw_mdeg_cb, 2331 (void *)vswp, &mdeg_hdl); 2332 2333 if (rv != MDEG_SUCCESS) { 2334 DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv); 2335 kmem_free(inst_specp, sizeof (mdeg_node_spec_t)); 2336 kmem_free(pspecp, templatesz); 2337 return; 2338 } 2339 2340 /* save off data that will be needed later */ 2341 vswp->inst_spec = inst_specp; 2342 vswp->mdeg_hdl = mdeg_hdl; 2343 2344 D1(vswp, "%s: exit", __func__); 2345 } 2346 2347 static void 2348 vsw_mdeg_unregister(vsw_t *vswp) 2349 { 2350 D1(vswp, "vsw_mdeg_unregister: enter"); 2351 2352 (void) mdeg_unregister(vswp->mdeg_hdl); 2353 2354 if (vswp->inst_spec->specp != NULL) { 2355 (void) kmem_free(vswp->inst_spec->specp, 2356 sizeof (vsw_prop_template)); 2357 vswp->inst_spec->specp = NULL; 2358 } 2359 2360 if (vswp->inst_spec != NULL) { 2361 (void) kmem_free(vswp->inst_spec, 2362 sizeof (mdeg_node_spec_t)); 2363 vswp->inst_spec = NULL; 2364 } 2365 2366 D1(vswp, "vsw_mdeg_unregister: exit"); 2367 } 2368 2369 static int 2370 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 2371 { 2372 vsw_t *vswp; 2373 int idx; 2374 md_t *mdp; 2375 mde_cookie_t node; 2376 uint64_t inst; 2377 2378 if (resp == NULL) 2379 return (MDEG_FAILURE); 2380 2381 vswp = (vsw_t *)cb_argp; 2382 2383 D1(vswp, "%s: added %d : removed %d : matched %d", 2384 __func__, resp->added.nelem, resp->removed.nelem, 2385 resp->match_prev.nelem); 2386 2387 /* process added ports */ 2388 for (idx = 0; idx < resp->added.nelem; idx++) { 2389 mdp = resp->added.mdp; 2390 node = resp->added.mdep[idx]; 2391 2392 D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node); 2393 2394 if (vsw_port_add(vswp, mdp, &node) != 0) { 2395 cmn_err(CE_WARN, "Unable to add new port (0x%lx)", 2396 node); 2397 } 2398 } 2399 2400 /* process removed ports */ 2401 for (idx = 0; idx < resp->removed.nelem; idx++) { 2402 mdp = resp->removed.mdp; 2403 node = resp->removed.mdep[idx]; 2404 2405 if (md_get_prop_val(mdp, node, id_propname, &inst)) { 2406 DERR(vswp, "%s: prop(%s) not found port(%d)", 2407 __func__, id_propname, idx); 2408 continue; 2409 } 2410 2411 D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node); 2412 2413 if (vsw_port_detach(vswp, inst) != 0) { 2414 cmn_err(CE_WARN, "Unable to remove port %ld", inst); 2415 } 2416 } 2417 2418 /* 2419 * Currently no support for updating already active ports. 2420 * So, ignore the match_curr and match_priv arrays for now. 2421 */ 2422 2423 D1(vswp, "%s: exit", __func__); 2424 2425 return (MDEG_SUCCESS); 2426 } 2427 2428 /* 2429 * Add a new port to the system. 2430 * 2431 * Returns 0 on success, 1 on failure. 2432 */ 2433 int 2434 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node) 2435 { 2436 uint64_t ldc_id; 2437 uint8_t *addrp; 2438 int i, addrsz; 2439 int num_nodes = 0, nchan = 0; 2440 int listsz = 0; 2441 mde_cookie_t *listp = NULL; 2442 struct ether_addr ea; 2443 uint64_t macaddr; 2444 uint64_t inst = 0; 2445 vsw_port_t *port; 2446 2447 if (md_get_prop_val(mdp, *node, id_propname, &inst)) { 2448 DWARN(vswp, "%s: prop(%s) not found", __func__, 2449 id_propname); 2450 return (1); 2451 } 2452 2453 /* 2454 * Find the channel endpoint node(s) (which should be under this 2455 * port node) which contain the channel id(s). 2456 */ 2457 if ((num_nodes = md_node_count(mdp)) <= 0) { 2458 DERR(vswp, "%s: invalid number of nodes found (%d)", 2459 __func__, num_nodes); 2460 return (1); 2461 } 2462 2463 /* allocate enough space for node list */ 2464 listsz = num_nodes * sizeof (mde_cookie_t); 2465 listp = kmem_zalloc(listsz, KM_SLEEP); 2466 2467 nchan = md_scan_dag(mdp, *node, 2468 md_find_name(mdp, chan_propname), 2469 md_find_name(mdp, "fwd"), listp); 2470 2471 if (nchan <= 0) { 2472 DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname); 2473 kmem_free(listp, listsz); 2474 return (1); 2475 } 2476 2477 D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname); 2478 2479 /* use property from first node found */ 2480 if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) { 2481 DWARN(vswp, "%s: prop(%s) not found\n", __func__, 2482 id_propname); 2483 kmem_free(listp, listsz); 2484 return (1); 2485 } 2486 2487 /* don't need list any more */ 2488 kmem_free(listp, listsz); 2489 2490 D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id); 2491 2492 /* read mac-address property */ 2493 if (md_get_prop_data(mdp, *node, remaddr_propname, 2494 &addrp, &addrsz)) { 2495 DWARN(vswp, "%s: prop(%s) not found", 2496 __func__, remaddr_propname); 2497 return (1); 2498 } 2499 2500 if (addrsz < ETHERADDRL) { 2501 DWARN(vswp, "%s: invalid address size", __func__); 2502 return (1); 2503 } 2504 2505 macaddr = *((uint64_t *)addrp); 2506 D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr); 2507 2508 for (i = ETHERADDRL - 1; i >= 0; i--) { 2509 ea.ether_addr_octet[i] = macaddr & 0xFF; 2510 macaddr >>= 8; 2511 } 2512 2513 if (vsw_port_attach(vswp, (int)inst, &ldc_id, 1, &ea) != 0) { 2514 DERR(vswp, "%s: failed to attach port", __func__); 2515 return (1); 2516 } 2517 2518 port = vsw_lookup_port(vswp, (int)inst); 2519 2520 /* just successfuly created the port, so it should exist */ 2521 ASSERT(port != NULL); 2522 2523 return (0); 2524 } 2525 2526 /* 2527 * Attach the specified port. 2528 * 2529 * Returns 0 on success, 1 on failure. 2530 */ 2531 static int 2532 vsw_port_attach(vsw_t *vswp, int p_instance, uint64_t *ldcids, int nids, 2533 struct ether_addr *macaddr) 2534 { 2535 vsw_port_list_t *plist = &vswp->plist; 2536 vsw_port_t *port, **prev_port; 2537 int i; 2538 2539 D1(vswp, "%s: enter : port %d", __func__, p_instance); 2540 2541 /* port already exists? */ 2542 READ_ENTER(&plist->lockrw); 2543 for (port = plist->head; port != NULL; port = port->p_next) { 2544 if (port->p_instance == p_instance) { 2545 DWARN(vswp, "%s: port instance %d already attached", 2546 __func__, p_instance); 2547 RW_EXIT(&plist->lockrw); 2548 return (1); 2549 } 2550 } 2551 RW_EXIT(&plist->lockrw); 2552 2553 port = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP); 2554 port->p_vswp = vswp; 2555 port->p_instance = p_instance; 2556 port->p_ldclist.num_ldcs = 0; 2557 port->p_ldclist.head = NULL; 2558 port->addr_set = VSW_ADDR_UNSET; 2559 2560 rw_init(&port->p_ldclist.lockrw, NULL, RW_DRIVER, NULL); 2561 2562 mutex_init(&port->tx_lock, NULL, MUTEX_DRIVER, NULL); 2563 mutex_init(&port->mca_lock, NULL, MUTEX_DRIVER, NULL); 2564 2565 mutex_init(&port->ref_lock, NULL, MUTEX_DRIVER, NULL); 2566 cv_init(&port->ref_cv, NULL, CV_DRIVER, NULL); 2567 2568 mutex_init(&port->state_lock, NULL, MUTEX_DRIVER, NULL); 2569 cv_init(&port->state_cv, NULL, CV_DRIVER, NULL); 2570 port->state = VSW_PORT_INIT; 2571 2572 if (nids > VSW_PORT_MAX_LDCS) { 2573 D2(vswp, "%s: using first of %d ldc ids", 2574 __func__, nids); 2575 nids = VSW_PORT_MAX_LDCS; 2576 } 2577 2578 D2(vswp, "%s: %d nids", __func__, nids); 2579 for (i = 0; i < nids; i++) { 2580 D2(vswp, "%s: ldcid (%llx)", __func__, (uint64_t)ldcids[i]); 2581 if (vsw_ldc_attach(port, (uint64_t)ldcids[i]) != 0) { 2582 DERR(vswp, "%s: ldc_attach failed", __func__); 2583 2584 rw_destroy(&port->p_ldclist.lockrw); 2585 2586 cv_destroy(&port->ref_cv); 2587 mutex_destroy(&port->ref_lock); 2588 2589 cv_destroy(&port->state_cv); 2590 mutex_destroy(&port->state_lock); 2591 2592 mutex_destroy(&port->tx_lock); 2593 mutex_destroy(&port->mca_lock); 2594 kmem_free(port, sizeof (vsw_port_t)); 2595 return (1); 2596 } 2597 } 2598 2599 ether_copy(macaddr, &port->p_macaddr); 2600 2601 WRITE_ENTER(&plist->lockrw); 2602 2603 /* create the fdb entry for this port/mac address */ 2604 (void) vsw_add_fdb(vswp, port); 2605 2606 (void) vsw_set_hw(vswp, port); 2607 2608 /* link it into the list of ports for this vsw instance */ 2609 prev_port = (vsw_port_t **)(&plist->head); 2610 port->p_next = *prev_port; 2611 *prev_port = port; 2612 plist->num_ports++; 2613 RW_EXIT(&plist->lockrw); 2614 2615 /* 2616 * Initialise the port and any ldc's under it. 2617 */ 2618 (void) vsw_init_ldcs(port); 2619 2620 D1(vswp, "%s: exit", __func__); 2621 return (0); 2622 } 2623 2624 /* 2625 * Detach the specified port. 2626 * 2627 * Returns 0 on success, 1 on failure. 2628 */ 2629 static int 2630 vsw_port_detach(vsw_t *vswp, int p_instance) 2631 { 2632 vsw_port_t *port = NULL; 2633 vsw_port_list_t *plist = &vswp->plist; 2634 2635 D1(vswp, "%s: enter: port id %d", __func__, p_instance); 2636 2637 WRITE_ENTER(&plist->lockrw); 2638 2639 if ((port = vsw_lookup_port(vswp, p_instance)) == NULL) { 2640 RW_EXIT(&plist->lockrw); 2641 return (1); 2642 } 2643 2644 if (vsw_plist_del_node(vswp, port)) { 2645 RW_EXIT(&plist->lockrw); 2646 return (1); 2647 } 2648 2649 /* Remove address if was programmed into HW. */ 2650 (void) vsw_unset_hw(vswp, port); 2651 2652 /* Remove the fdb entry for this port/mac address */ 2653 (void) vsw_del_fdb(vswp, port); 2654 2655 /* Remove any multicast addresses.. */ 2656 vsw_del_mcst_port(port); 2657 2658 /* 2659 * No longer need to hold writer lock on port list now 2660 * that we have unlinked the target port from the list. 2661 */ 2662 RW_EXIT(&plist->lockrw); 2663 2664 READ_ENTER(&plist->lockrw); 2665 2666 if (vswp->recfg_reqd) 2667 (void) vsw_reconfig_hw(vswp); 2668 2669 RW_EXIT(&plist->lockrw); 2670 2671 if (vsw_port_delete(port)) { 2672 return (1); 2673 } 2674 2675 D1(vswp, "%s: exit: p_instance(%d)", __func__, p_instance); 2676 return (0); 2677 } 2678 2679 /* 2680 * Detach all active ports. 2681 * 2682 * Returns 0 on success, 1 on failure. 2683 */ 2684 static int 2685 vsw_detach_ports(vsw_t *vswp) 2686 { 2687 vsw_port_list_t *plist = &vswp->plist; 2688 vsw_port_t *port = NULL; 2689 2690 D1(vswp, "%s: enter", __func__); 2691 2692 WRITE_ENTER(&plist->lockrw); 2693 2694 while ((port = plist->head) != NULL) { 2695 if (vsw_plist_del_node(vswp, port)) { 2696 DERR(vswp, "%s: Error deleting port %d" 2697 " from port list", __func__, 2698 port->p_instance); 2699 RW_EXIT(&plist->lockrw); 2700 return (1); 2701 } 2702 2703 /* Remove address if was programmed into HW. */ 2704 (void) vsw_unset_hw(vswp, port); 2705 2706 /* Remove the fdb entry for this port/mac address */ 2707 (void) vsw_del_fdb(vswp, port); 2708 2709 /* Remove any multicast addresses.. */ 2710 vsw_del_mcst_port(port); 2711 2712 /* 2713 * No longer need to hold the lock on the port list 2714 * now that we have unlinked the target port from the 2715 * list. 2716 */ 2717 RW_EXIT(&plist->lockrw); 2718 if (vsw_port_delete(port)) { 2719 DERR(vswp, "%s: Error deleting port %d", 2720 __func__, port->p_instance); 2721 return (1); 2722 } 2723 WRITE_ENTER(&plist->lockrw); 2724 } 2725 RW_EXIT(&plist->lockrw); 2726 2727 D1(vswp, "%s: exit", __func__); 2728 2729 return (0); 2730 } 2731 2732 /* 2733 * Delete the specified port. 2734 * 2735 * Returns 0 on success, 1 on failure. 2736 */ 2737 static int 2738 vsw_port_delete(vsw_port_t *port) 2739 { 2740 vsw_ldc_list_t *ldcl; 2741 vsw_t *vswp = port->p_vswp; 2742 2743 D1(vswp, "%s: enter : port id %d", __func__, port->p_instance); 2744 2745 (void) vsw_uninit_ldcs(port); 2746 2747 /* 2748 * Wait for any pending ctrl msg tasks which reference this 2749 * port to finish. 2750 */ 2751 if (vsw_drain_port_taskq(port)) 2752 return (1); 2753 2754 /* 2755 * Wait for port reference count to hit zero. 2756 */ 2757 mutex_enter(&port->ref_lock); 2758 while (port->ref_cnt != 0) 2759 cv_wait(&port->ref_cv, &port->ref_lock); 2760 mutex_exit(&port->ref_lock); 2761 2762 /* 2763 * Wait for any active callbacks to finish 2764 */ 2765 if (vsw_drain_ldcs(port)) 2766 return (1); 2767 2768 ldcl = &port->p_ldclist; 2769 WRITE_ENTER(&ldcl->lockrw); 2770 while (ldcl->num_ldcs > 0) { 2771 if (vsw_ldc_detach(port, ldcl->head->ldc_id) != 0) {; 2772 cmn_err(CE_WARN, "unable to detach ldc %ld", 2773 ldcl->head->ldc_id); 2774 RW_EXIT(&ldcl->lockrw); 2775 return (1); 2776 } 2777 } 2778 RW_EXIT(&ldcl->lockrw); 2779 2780 rw_destroy(&port->p_ldclist.lockrw); 2781 2782 mutex_destroy(&port->mca_lock); 2783 mutex_destroy(&port->tx_lock); 2784 cv_destroy(&port->ref_cv); 2785 mutex_destroy(&port->ref_lock); 2786 2787 cv_destroy(&port->state_cv); 2788 mutex_destroy(&port->state_lock); 2789 2790 kmem_free(port, sizeof (vsw_port_t)); 2791 2792 D1(vswp, "%s: exit", __func__); 2793 2794 return (0); 2795 } 2796 2797 /* 2798 * Attach a logical domain channel (ldc) under a specified port. 2799 * 2800 * Returns 0 on success, 1 on failure. 2801 */ 2802 static int 2803 vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id) 2804 { 2805 vsw_t *vswp = port->p_vswp; 2806 vsw_ldc_list_t *ldcl = &port->p_ldclist; 2807 vsw_ldc_t *ldcp = NULL; 2808 ldc_attr_t attr; 2809 ldc_status_t istatus; 2810 int status = DDI_FAILURE; 2811 int rv; 2812 2813 D1(vswp, "%s: enter", __func__); 2814 2815 ldcp = kmem_zalloc(sizeof (vsw_ldc_t), KM_NOSLEEP); 2816 if (ldcp == NULL) { 2817 DERR(vswp, "%s: kmem_zalloc failed", __func__); 2818 return (1); 2819 } 2820 ldcp->ldc_id = ldc_id; 2821 2822 /* allocate pool of receive mblks */ 2823 rv = vio_create_mblks(vsw_num_mblks, vsw_mblk_size, &(ldcp->rxh)); 2824 if (rv) { 2825 DWARN(vswp, "%s: unable to create free mblk pool for" 2826 " channel %ld (rv %d)", __func__, ldc_id, rv); 2827 kmem_free(ldcp, sizeof (vsw_ldc_t)); 2828 return (1); 2829 } 2830 2831 mutex_init(&ldcp->ldc_txlock, NULL, MUTEX_DRIVER, NULL); 2832 mutex_init(&ldcp->ldc_cblock, NULL, MUTEX_DRIVER, NULL); 2833 mutex_init(&ldcp->drain_cv_lock, NULL, MUTEX_DRIVER, NULL); 2834 cv_init(&ldcp->drain_cv, NULL, CV_DRIVER, NULL); 2835 2836 /* required for handshake with peer */ 2837 ldcp->local_session = (uint64_t)ddi_get_lbolt(); 2838 ldcp->peer_session = 0; 2839 ldcp->session_status = 0; 2840 2841 mutex_init(&ldcp->hss_lock, NULL, MUTEX_DRIVER, NULL); 2842 ldcp->hss_id = 1; /* Initial handshake session id */ 2843 2844 /* only set for outbound lane, inbound set by peer */ 2845 mutex_init(&ldcp->lane_in.seq_lock, NULL, MUTEX_DRIVER, NULL); 2846 mutex_init(&ldcp->lane_out.seq_lock, NULL, MUTEX_DRIVER, NULL); 2847 vsw_set_lane_attr(vswp, &ldcp->lane_out); 2848 2849 attr.devclass = LDC_DEV_NT_SVC; 2850 attr.instance = ddi_get_instance(vswp->dip); 2851 attr.mode = LDC_MODE_UNRELIABLE; 2852 attr.mtu = VSW_LDC_MTU; 2853 status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle); 2854 if (status != 0) { 2855 DERR(vswp, "%s(%lld): ldc_init failed, rv (%d)", 2856 __func__, ldc_id, status); 2857 goto ldc_attach_fail; 2858 } 2859 2860 status = ldc_reg_callback(ldcp->ldc_handle, vsw_ldc_cb, (caddr_t)ldcp); 2861 if (status != 0) { 2862 DERR(vswp, "%s(%lld): ldc_reg_callback failed, rv (%d)", 2863 __func__, ldc_id, status); 2864 (void) ldc_fini(ldcp->ldc_handle); 2865 goto ldc_attach_fail; 2866 } 2867 2868 2869 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 2870 DERR(vswp, "%s: ldc_status failed", __func__); 2871 return (1); 2872 } 2873 2874 ldcp->ldc_status = istatus; 2875 ldcp->ldc_port = port; 2876 ldcp->ldc_vswp = vswp; 2877 2878 /* link it into the list of channels for this port */ 2879 WRITE_ENTER(&ldcl->lockrw); 2880 ldcp->ldc_next = ldcl->head; 2881 ldcl->head = ldcp; 2882 ldcl->num_ldcs++; 2883 RW_EXIT(&ldcl->lockrw); 2884 2885 D1(vswp, "%s: exit", __func__); 2886 return (0); 2887 2888 ldc_attach_fail: 2889 mutex_destroy(&ldcp->ldc_txlock); 2890 mutex_destroy(&ldcp->ldc_cblock); 2891 2892 cv_destroy(&ldcp->drain_cv); 2893 2894 if (ldcp->rxh != NULL) { 2895 if (vio_destroy_mblks(ldcp->rxh) != 0) { 2896 /* 2897 * Something odd has happened, as the destroy 2898 * will only fail if some mblks have been allocated 2899 * from the pool already (which shouldn't happen) 2900 * and have not been returned. 2901 * 2902 * Add the pool pointer to a list maintained in 2903 * the device instance. Another attempt will be made 2904 * to free the pool when the device itself detaches. 2905 */ 2906 cmn_err(CE_WARN, "Creation of ldc channel %ld failed" 2907 " and cannot destroy associated mblk pool", 2908 ldc_id); 2909 ldcp->rxh->nextp = vswp->rxh; 2910 vswp->rxh = ldcp->rxh; 2911 } 2912 } 2913 mutex_destroy(&ldcp->drain_cv_lock); 2914 mutex_destroy(&ldcp->hss_lock); 2915 2916 mutex_destroy(&ldcp->lane_in.seq_lock); 2917 mutex_destroy(&ldcp->lane_out.seq_lock); 2918 kmem_free(ldcp, sizeof (vsw_ldc_t)); 2919 2920 return (1); 2921 } 2922 2923 /* 2924 * Detach a logical domain channel (ldc) belonging to a 2925 * particular port. 2926 * 2927 * Returns 0 on success, 1 on failure. 2928 */ 2929 static int 2930 vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id) 2931 { 2932 vsw_t *vswp = port->p_vswp; 2933 vsw_ldc_t *ldcp, *prev_ldcp; 2934 vsw_ldc_list_t *ldcl = &port->p_ldclist; 2935 int rv; 2936 2937 prev_ldcp = ldcl->head; 2938 for (; (ldcp = prev_ldcp) != NULL; prev_ldcp = ldcp->ldc_next) { 2939 if (ldcp->ldc_id == ldc_id) { 2940 break; 2941 } 2942 } 2943 2944 /* specified ldc id not found */ 2945 if (ldcp == NULL) { 2946 DERR(vswp, "%s: ldcp = NULL", __func__); 2947 return (1); 2948 } 2949 2950 D2(vswp, "%s: detaching channel %lld", __func__, ldcp->ldc_id); 2951 2952 /* 2953 * Before we can close the channel we must release any mapped 2954 * resources (e.g. drings). 2955 */ 2956 vsw_free_lane_resources(ldcp, INBOUND); 2957 vsw_free_lane_resources(ldcp, OUTBOUND); 2958 2959 /* 2960 * If the close fails we are in serious trouble, as won't 2961 * be able to delete the parent port. 2962 */ 2963 if ((rv = ldc_close(ldcp->ldc_handle)) != 0) { 2964 DERR(vswp, "%s: error %d closing channel %lld", 2965 __func__, rv, ldcp->ldc_id); 2966 return (1); 2967 } 2968 2969 (void) ldc_fini(ldcp->ldc_handle); 2970 2971 ldcp->ldc_status = LDC_INIT; 2972 ldcp->ldc_handle = NULL; 2973 ldcp->ldc_vswp = NULL; 2974 2975 if (ldcp->rxh != NULL) { 2976 if (vio_destroy_mblks(ldcp->rxh)) { 2977 /* 2978 * Mostly likely some mblks are still in use and 2979 * have not been returned to the pool. Add the pool 2980 * to the list maintained in the device instance. 2981 * Another attempt will be made to destroy the pool 2982 * when the device detaches. 2983 */ 2984 ldcp->rxh->nextp = vswp->rxh; 2985 vswp->rxh = ldcp->rxh; 2986 } 2987 } 2988 2989 mutex_destroy(&ldcp->ldc_txlock); 2990 mutex_destroy(&ldcp->ldc_cblock); 2991 cv_destroy(&ldcp->drain_cv); 2992 mutex_destroy(&ldcp->drain_cv_lock); 2993 mutex_destroy(&ldcp->hss_lock); 2994 mutex_destroy(&ldcp->lane_in.seq_lock); 2995 mutex_destroy(&ldcp->lane_out.seq_lock); 2996 2997 /* unlink it from the list */ 2998 prev_ldcp = ldcp->ldc_next; 2999 ldcl->num_ldcs--; 3000 kmem_free(ldcp, sizeof (vsw_ldc_t)); 3001 3002 return (0); 3003 } 3004 3005 /* 3006 * Open and attempt to bring up the channel. Note that channel 3007 * can only be brought up if peer has also opened channel. 3008 * 3009 * Returns 0 if can open and bring up channel, otherwise 3010 * returns 1. 3011 */ 3012 static int 3013 vsw_ldc_init(vsw_ldc_t *ldcp) 3014 { 3015 vsw_t *vswp = ldcp->ldc_vswp; 3016 ldc_status_t istatus = 0; 3017 int rv; 3018 3019 D1(vswp, "%s: enter", __func__); 3020 3021 LDC_ENTER_LOCK(ldcp); 3022 3023 /* don't start at 0 in case clients don't like that */ 3024 ldcp->next_ident = 1; 3025 3026 rv = ldc_open(ldcp->ldc_handle); 3027 if (rv != 0) { 3028 DERR(vswp, "%s: ldc_open failed: id(%lld) rv(%d)", 3029 __func__, ldcp->ldc_id, rv); 3030 LDC_EXIT_LOCK(ldcp); 3031 return (1); 3032 } 3033 3034 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 3035 DERR(vswp, "%s: unable to get status", __func__); 3036 LDC_EXIT_LOCK(ldcp); 3037 return (1); 3038 3039 } else if (istatus != LDC_OPEN && istatus != LDC_READY) { 3040 DERR(vswp, "%s: id (%lld) status(%d) is not OPEN/READY", 3041 __func__, ldcp->ldc_id, istatus); 3042 LDC_EXIT_LOCK(ldcp); 3043 return (1); 3044 } 3045 3046 ldcp->ldc_status = istatus; 3047 rv = ldc_up(ldcp->ldc_handle); 3048 if (rv != 0) { 3049 /* 3050 * Not a fatal error for ldc_up() to fail, as peer 3051 * end point may simply not be ready yet. 3052 */ 3053 D2(vswp, "%s: ldc_up err id(%lld) rv(%d)", __func__, 3054 ldcp->ldc_id, rv); 3055 LDC_EXIT_LOCK(ldcp); 3056 return (1); 3057 } 3058 3059 /* 3060 * ldc_up() call is non-blocking so need to explicitly 3061 * check channel status to see if in fact the channel 3062 * is UP. 3063 */ 3064 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 3065 DERR(vswp, "%s: unable to get status", __func__); 3066 LDC_EXIT_LOCK(ldcp); 3067 return (1); 3068 3069 } else if (istatus != LDC_UP) { 3070 DERR(vswp, "%s: id(%lld) status(%d) is not UP", 3071 __func__, ldcp->ldc_id, istatus); 3072 } else { 3073 ldcp->ldc_status = istatus; 3074 } 3075 3076 LDC_EXIT_LOCK(ldcp); 3077 3078 D1(vswp, "%s: exit", __func__); 3079 return (0); 3080 } 3081 3082 /* disable callbacks on the channel */ 3083 static int 3084 vsw_ldc_uninit(vsw_ldc_t *ldcp) 3085 { 3086 vsw_t *vswp = ldcp->ldc_vswp; 3087 int rv; 3088 3089 D1(vswp, "vsw_ldc_uninit: enter: id(%lx)\n", ldcp->ldc_id); 3090 3091 LDC_ENTER_LOCK(ldcp); 3092 3093 rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE); 3094 if (rv != 0) { 3095 DERR(vswp, "vsw_ldc_uninit(%lld): error disabling " 3096 "interrupts (rv = %d)\n", ldcp->ldc_id, rv); 3097 LDC_EXIT_LOCK(ldcp); 3098 return (1); 3099 } 3100 3101 ldcp->ldc_status = LDC_INIT; 3102 3103 LDC_EXIT_LOCK(ldcp); 3104 3105 D1(vswp, "vsw_ldc_uninit: exit: id(%lx)", ldcp->ldc_id); 3106 3107 return (0); 3108 } 3109 3110 static int 3111 vsw_init_ldcs(vsw_port_t *port) 3112 { 3113 vsw_ldc_list_t *ldcl = &port->p_ldclist; 3114 vsw_ldc_t *ldcp; 3115 3116 READ_ENTER(&ldcl->lockrw); 3117 ldcp = ldcl->head; 3118 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 3119 (void) vsw_ldc_init(ldcp); 3120 } 3121 RW_EXIT(&ldcl->lockrw); 3122 3123 return (0); 3124 } 3125 3126 static int 3127 vsw_uninit_ldcs(vsw_port_t *port) 3128 { 3129 vsw_ldc_list_t *ldcl = &port->p_ldclist; 3130 vsw_ldc_t *ldcp; 3131 3132 D1(NULL, "vsw_uninit_ldcs: enter\n"); 3133 3134 READ_ENTER(&ldcl->lockrw); 3135 ldcp = ldcl->head; 3136 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 3137 (void) vsw_ldc_uninit(ldcp); 3138 } 3139 RW_EXIT(&ldcl->lockrw); 3140 3141 D1(NULL, "vsw_uninit_ldcs: exit\n"); 3142 3143 return (0); 3144 } 3145 3146 /* 3147 * Wait until the callback(s) associated with the ldcs under the specified 3148 * port have completed. 3149 * 3150 * Prior to this function being invoked each channel under this port 3151 * should have been quiesced via ldc_set_cb_mode(DISABLE). 3152 * 3153 * A short explaination of what we are doing below.. 3154 * 3155 * The simplest approach would be to have a reference counter in 3156 * the ldc structure which is increment/decremented by the callbacks as 3157 * they use the channel. The drain function could then simply disable any 3158 * further callbacks and do a cv_wait for the ref to hit zero. Unfortunately 3159 * there is a tiny window here - before the callback is able to get the lock 3160 * on the channel it is interrupted and this function gets to execute. It 3161 * sees that the ref count is zero and believes its free to delete the 3162 * associated data structures. 3163 * 3164 * We get around this by taking advantage of the fact that before the ldc 3165 * framework invokes a callback it sets a flag to indicate that there is a 3166 * callback active (or about to become active). If when we attempt to 3167 * unregister a callback when this active flag is set then the unregister 3168 * will fail with EWOULDBLOCK. 3169 * 3170 * If the unregister fails we do a cv_timedwait. We will either be signaled 3171 * by the callback as it is exiting (note we have to wait a short period to 3172 * allow the callback to return fully to the ldc framework and it to clear 3173 * the active flag), or by the timer expiring. In either case we again attempt 3174 * the unregister. We repeat this until we can succesfully unregister the 3175 * callback. 3176 * 3177 * The reason we use a cv_timedwait rather than a simple cv_wait is to catch 3178 * the case where the callback has finished but the ldc framework has not yet 3179 * cleared the active flag. In this case we would never get a cv_signal. 3180 */ 3181 static int 3182 vsw_drain_ldcs(vsw_port_t *port) 3183 { 3184 vsw_ldc_list_t *ldcl = &port->p_ldclist; 3185 vsw_ldc_t *ldcp; 3186 vsw_t *vswp = port->p_vswp; 3187 3188 D1(vswp, "%s: enter", __func__); 3189 3190 READ_ENTER(&ldcl->lockrw); 3191 3192 ldcp = ldcl->head; 3193 3194 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 3195 /* 3196 * If we can unregister the channel callback then we 3197 * know that there is no callback either running or 3198 * scheduled to run for this channel so move on to next 3199 * channel in the list. 3200 */ 3201 mutex_enter(&ldcp->drain_cv_lock); 3202 3203 /* prompt active callbacks to quit */ 3204 ldcp->drain_state = VSW_LDC_DRAINING; 3205 3206 if ((ldc_unreg_callback(ldcp->ldc_handle)) == 0) { 3207 D2(vswp, "%s: unreg callback for chan %ld", __func__, 3208 ldcp->ldc_id); 3209 mutex_exit(&ldcp->drain_cv_lock); 3210 continue; 3211 } else { 3212 /* 3213 * If we end up here we know that either 1) a callback 3214 * is currently executing, 2) is about to start (i.e. 3215 * the ldc framework has set the active flag but 3216 * has not actually invoked the callback yet, or 3) 3217 * has finished and has returned to the ldc framework 3218 * but the ldc framework has not yet cleared the 3219 * active bit. 3220 * 3221 * Wait for it to finish. 3222 */ 3223 while (ldc_unreg_callback(ldcp->ldc_handle) 3224 == EWOULDBLOCK) 3225 (void) cv_timedwait(&ldcp->drain_cv, 3226 &ldcp->drain_cv_lock, lbolt + hz); 3227 3228 mutex_exit(&ldcp->drain_cv_lock); 3229 D2(vswp, "%s: unreg callback for chan %ld after " 3230 "timeout", __func__, ldcp->ldc_id); 3231 } 3232 } 3233 RW_EXIT(&ldcl->lockrw); 3234 3235 D1(vswp, "%s: exit", __func__); 3236 return (0); 3237 } 3238 3239 /* 3240 * Wait until all tasks which reference this port have completed. 3241 * 3242 * Prior to this function being invoked each channel under this port 3243 * should have been quiesced via ldc_set_cb_mode(DISABLE). 3244 */ 3245 static int 3246 vsw_drain_port_taskq(vsw_port_t *port) 3247 { 3248 vsw_t *vswp = port->p_vswp; 3249 3250 D1(vswp, "%s: enter", __func__); 3251 3252 /* 3253 * Mark the port as in the process of being detached, and 3254 * dispatch a marker task to the queue so we know when all 3255 * relevant tasks have completed. 3256 */ 3257 mutex_enter(&port->state_lock); 3258 port->state = VSW_PORT_DETACHING; 3259 3260 if ((vswp->taskq_p == NULL) || 3261 (ddi_taskq_dispatch(vswp->taskq_p, vsw_marker_task, 3262 port, DDI_NOSLEEP) != DDI_SUCCESS)) { 3263 DERR(vswp, "%s: unable to dispatch marker task", 3264 __func__); 3265 mutex_exit(&port->state_lock); 3266 return (1); 3267 } 3268 3269 /* 3270 * Wait for the marker task to finish. 3271 */ 3272 while (port->state != VSW_PORT_DETACHABLE) 3273 cv_wait(&port->state_cv, &port->state_lock); 3274 3275 mutex_exit(&port->state_lock); 3276 3277 D1(vswp, "%s: exit", __func__); 3278 3279 return (0); 3280 } 3281 3282 static void 3283 vsw_marker_task(void *arg) 3284 { 3285 vsw_port_t *port = arg; 3286 vsw_t *vswp = port->p_vswp; 3287 3288 D1(vswp, "%s: enter", __func__); 3289 3290 mutex_enter(&port->state_lock); 3291 3292 /* 3293 * No further tasks should be dispatched which reference 3294 * this port so ok to mark it as safe to detach. 3295 */ 3296 port->state = VSW_PORT_DETACHABLE; 3297 3298 cv_signal(&port->state_cv); 3299 3300 mutex_exit(&port->state_lock); 3301 3302 D1(vswp, "%s: exit", __func__); 3303 } 3304 3305 static vsw_port_t * 3306 vsw_lookup_port(vsw_t *vswp, int p_instance) 3307 { 3308 vsw_port_list_t *plist = &vswp->plist; 3309 vsw_port_t *port; 3310 3311 for (port = plist->head; port != NULL; port = port->p_next) { 3312 if (port->p_instance == p_instance) { 3313 D2(vswp, "vsw_lookup_port: found p_instance\n"); 3314 return (port); 3315 } 3316 } 3317 3318 return (NULL); 3319 } 3320 3321 /* 3322 * Search for and remove the specified port from the port 3323 * list. Returns 0 if able to locate and remove port, otherwise 3324 * returns 1. 3325 */ 3326 static int 3327 vsw_plist_del_node(vsw_t *vswp, vsw_port_t *port) 3328 { 3329 vsw_port_list_t *plist = &vswp->plist; 3330 vsw_port_t *curr_p, *prev_p; 3331 3332 if (plist->head == NULL) 3333 return (1); 3334 3335 curr_p = prev_p = plist->head; 3336 3337 while (curr_p != NULL) { 3338 if (curr_p == port) { 3339 if (prev_p == curr_p) { 3340 plist->head = curr_p->p_next; 3341 } else { 3342 prev_p->p_next = curr_p->p_next; 3343 } 3344 plist->num_ports--; 3345 break; 3346 } else { 3347 prev_p = curr_p; 3348 curr_p = curr_p->p_next; 3349 } 3350 } 3351 return (0); 3352 } 3353 3354 /* 3355 * Interrupt handler for ldc messages. 3356 */ 3357 static uint_t 3358 vsw_ldc_cb(uint64_t event, caddr_t arg) 3359 { 3360 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 3361 vsw_t *vswp = ldcp->ldc_vswp; 3362 ldc_status_t lstatus; 3363 int rv; 3364 3365 D1(vswp, "%s: enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 3366 3367 mutex_enter(&ldcp->ldc_cblock); 3368 3369 if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) { 3370 mutex_exit(&ldcp->ldc_cblock); 3371 return (LDC_SUCCESS); 3372 } 3373 3374 if (event & LDC_EVT_UP) { 3375 /* 3376 * Channel has come up, get the state and then start 3377 * the handshake. 3378 */ 3379 rv = ldc_status(ldcp->ldc_handle, &lstatus); 3380 if (rv != 0) { 3381 cmn_err(CE_WARN, "Unable to read channel state"); 3382 } 3383 ldcp->ldc_status = lstatus; 3384 3385 D2(vswp, "%s: id(%ld) event(%llx) UP: status(%ld)", 3386 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 3387 3388 vsw_restart_handshake(ldcp); 3389 3390 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 3391 } 3392 3393 if (event & LDC_EVT_READ) { 3394 /* 3395 * Data available for reading. 3396 */ 3397 D2(vswp, "%s: id(ld) event(%llx) data READ", 3398 __func__, ldcp->ldc_id, event); 3399 3400 vsw_process_pkt(ldcp); 3401 3402 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 3403 3404 goto vsw_cb_exit; 3405 } 3406 3407 if (event & LDC_EVT_RESET) { 3408 rv = ldc_status(ldcp->ldc_handle, &lstatus); 3409 if (rv != 0) { 3410 cmn_err(CE_WARN, "Unable to read channel state"); 3411 } else { 3412 ldcp->ldc_status = lstatus; 3413 } 3414 D2(vswp, "%s: id(%ld) event(%llx) RESET: status (%ld)", 3415 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 3416 } 3417 3418 if (event & LDC_EVT_DOWN) { 3419 rv = ldc_status(ldcp->ldc_handle, &lstatus); 3420 if (rv != 0) { 3421 cmn_err(CE_WARN, "Unable to read channel state"); 3422 } else { 3423 ldcp->ldc_status = lstatus; 3424 } 3425 3426 D2(vswp, "%s: id(%ld) event(%llx) DOWN: status (%ld)", 3427 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 3428 3429 } 3430 3431 /* 3432 * Catch either LDC_EVT_WRITE which we don't support or any 3433 * unknown event. 3434 */ 3435 if (event & ~(LDC_EVT_UP | LDC_EVT_RESET 3436 | LDC_EVT_DOWN | LDC_EVT_READ)) { 3437 3438 DERR(vswp, "%s: id(%ld) Unexpected event=(%llx) status(%ld)", 3439 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 3440 } 3441 3442 vsw_cb_exit: 3443 mutex_exit(&ldcp->ldc_cblock); 3444 3445 /* 3446 * Let the drain function know we are finishing if it 3447 * is waiting. 3448 */ 3449 mutex_enter(&ldcp->drain_cv_lock); 3450 if (ldcp->drain_state == VSW_LDC_DRAINING) 3451 cv_signal(&ldcp->drain_cv); 3452 mutex_exit(&ldcp->drain_cv_lock); 3453 3454 return (LDC_SUCCESS); 3455 } 3456 3457 /* 3458 * (Re)start a handshake with our peer by sending them 3459 * our version info. 3460 */ 3461 static void 3462 vsw_restart_handshake(vsw_ldc_t *ldcp) 3463 { 3464 vsw_t *vswp = ldcp->ldc_vswp; 3465 vsw_port_t *port; 3466 vsw_ldc_list_t *ldcl; 3467 3468 D1(vswp, "vsw_restart_handshake: enter"); 3469 3470 port = ldcp->ldc_port; 3471 ldcl = &port->p_ldclist; 3472 3473 WRITE_ENTER(&ldcl->lockrw); 3474 3475 D2(vswp, "%s: in 0x%llx : out 0x%llx", __func__, 3476 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 3477 3478 vsw_free_lane_resources(ldcp, INBOUND); 3479 vsw_free_lane_resources(ldcp, OUTBOUND); 3480 RW_EXIT(&ldcl->lockrw); 3481 3482 ldcp->lane_in.lstate = 0; 3483 ldcp->lane_out.lstate = 0; 3484 3485 /* 3486 * Remove parent port from any multicast groups 3487 * it may have registered with. Client must resend 3488 * multicast add command after handshake completes. 3489 */ 3490 (void) vsw_del_fdb(vswp, port); 3491 3492 vsw_del_mcst_port(port); 3493 3494 ldcp->hphase = VSW_MILESTONE0; 3495 3496 ldcp->peer_session = 0; 3497 ldcp->session_status = 0; 3498 3499 /* 3500 * We now increment the transaction group id. This allows 3501 * us to identify and disard any tasks which are still pending 3502 * on the taskq and refer to the handshake session we are about 3503 * to restart. These stale messages no longer have any real 3504 * meaning. 3505 */ 3506 mutex_enter(&ldcp->hss_lock); 3507 ldcp->hss_id++; 3508 mutex_exit(&ldcp->hss_lock); 3509 3510 if (ldcp->hcnt++ > vsw_num_handshakes) { 3511 cmn_err(CE_WARN, "exceeded number of permitted " 3512 "handshake attempts (%d) on channel %ld", 3513 ldcp->hcnt, ldcp->ldc_id); 3514 return; 3515 } 3516 3517 vsw_send_ver(ldcp); 3518 3519 D1(vswp, "vsw_restart_handshake: exit"); 3520 } 3521 3522 /* 3523 * returns 0 if legal for event signified by flag to have 3524 * occured at the time it did. Otherwise returns 1. 3525 */ 3526 int 3527 vsw_check_flag(vsw_ldc_t *ldcp, int dir, uint64_t flag) 3528 { 3529 vsw_t *vswp = ldcp->ldc_vswp; 3530 uint64_t state; 3531 uint64_t phase; 3532 3533 if (dir == INBOUND) 3534 state = ldcp->lane_in.lstate; 3535 else 3536 state = ldcp->lane_out.lstate; 3537 3538 phase = ldcp->hphase; 3539 3540 switch (flag) { 3541 case VSW_VER_INFO_RECV: 3542 if (phase > VSW_MILESTONE0) { 3543 DERR(vswp, "vsw_check_flag (%d): VER_INFO_RECV" 3544 " when in state %d\n", ldcp->ldc_id, phase); 3545 vsw_restart_handshake(ldcp); 3546 return (1); 3547 } 3548 break; 3549 3550 case VSW_VER_ACK_RECV: 3551 case VSW_VER_NACK_RECV: 3552 if (!(state & VSW_VER_INFO_SENT)) { 3553 DERR(vswp, "vsw_check_flag (%d): spurious VER_ACK" 3554 " or VER_NACK when in state %d\n", 3555 ldcp->ldc_id, phase); 3556 vsw_restart_handshake(ldcp); 3557 return (1); 3558 } else 3559 state &= ~VSW_VER_INFO_SENT; 3560 break; 3561 3562 case VSW_ATTR_INFO_RECV: 3563 if ((phase < VSW_MILESTONE1) || (phase >= VSW_MILESTONE2)) { 3564 DERR(vswp, "vsw_check_flag (%d): ATTR_INFO_RECV" 3565 " when in state %d\n", ldcp->ldc_id, phase); 3566 vsw_restart_handshake(ldcp); 3567 return (1); 3568 } 3569 break; 3570 3571 case VSW_ATTR_ACK_RECV: 3572 case VSW_ATTR_NACK_RECV: 3573 if (!(state & VSW_ATTR_INFO_SENT)) { 3574 DERR(vswp, "vsw_check_flag (%d): spurious ATTR_ACK" 3575 " or ATTR_NACK when in state %d\n", 3576 ldcp->ldc_id, phase); 3577 vsw_restart_handshake(ldcp); 3578 return (1); 3579 } else 3580 state &= ~VSW_ATTR_INFO_SENT; 3581 break; 3582 3583 case VSW_DRING_INFO_RECV: 3584 if (phase < VSW_MILESTONE1) { 3585 DERR(vswp, "vsw_check_flag (%d): DRING_INFO_RECV" 3586 " when in state %d\n", ldcp->ldc_id, phase); 3587 vsw_restart_handshake(ldcp); 3588 return (1); 3589 } 3590 break; 3591 3592 case VSW_DRING_ACK_RECV: 3593 case VSW_DRING_NACK_RECV: 3594 if (!(state & VSW_DRING_INFO_SENT)) { 3595 DERR(vswp, "vsw_check_flag (%d): spurious DRING_ACK" 3596 " or DRING_NACK when in state %d\n", 3597 ldcp->ldc_id, phase); 3598 vsw_restart_handshake(ldcp); 3599 return (1); 3600 } else 3601 state &= ~VSW_DRING_INFO_SENT; 3602 break; 3603 3604 case VSW_RDX_INFO_RECV: 3605 if (phase < VSW_MILESTONE3) { 3606 DERR(vswp, "vsw_check_flag (%d): RDX_INFO_RECV" 3607 " when in state %d\n", ldcp->ldc_id, phase); 3608 vsw_restart_handshake(ldcp); 3609 return (1); 3610 } 3611 break; 3612 3613 case VSW_RDX_ACK_RECV: 3614 case VSW_RDX_NACK_RECV: 3615 if (!(state & VSW_RDX_INFO_SENT)) { 3616 DERR(vswp, "vsw_check_flag (%d): spurious RDX_ACK" 3617 " or RDX_NACK when in state %d\n", 3618 ldcp->ldc_id, phase); 3619 vsw_restart_handshake(ldcp); 3620 return (1); 3621 } else 3622 state &= ~VSW_RDX_INFO_SENT; 3623 break; 3624 3625 case VSW_MCST_INFO_RECV: 3626 if (phase < VSW_MILESTONE3) { 3627 DERR(vswp, "vsw_check_flag (%d): VSW_MCST_INFO_RECV" 3628 " when in state %d\n", ldcp->ldc_id, phase); 3629 vsw_restart_handshake(ldcp); 3630 return (1); 3631 } 3632 break; 3633 3634 default: 3635 DERR(vswp, "vsw_check_flag (%lld): unknown flag (%llx)", 3636 ldcp->ldc_id, flag); 3637 return (1); 3638 } 3639 3640 if (dir == INBOUND) 3641 ldcp->lane_in.lstate = state; 3642 else 3643 ldcp->lane_out.lstate = state; 3644 3645 D1(vswp, "vsw_check_flag (chan %lld): exit", ldcp->ldc_id); 3646 3647 return (0); 3648 } 3649 3650 void 3651 vsw_next_milestone(vsw_ldc_t *ldcp) 3652 { 3653 vsw_t *vswp = ldcp->ldc_vswp; 3654 3655 D1(vswp, "%s (chan %lld): enter (phase %ld)", __func__, 3656 ldcp->ldc_id, ldcp->hphase); 3657 3658 DUMP_FLAGS(ldcp->lane_in.lstate); 3659 DUMP_FLAGS(ldcp->lane_out.lstate); 3660 3661 switch (ldcp->hphase) { 3662 3663 case VSW_MILESTONE0: 3664 /* 3665 * If we haven't started to handshake with our peer, 3666 * start to do so now. 3667 */ 3668 if (ldcp->lane_out.lstate == 0) { 3669 D2(vswp, "%s: (chan %lld) starting handshake " 3670 "with peer", __func__, ldcp->ldc_id); 3671 vsw_restart_handshake(ldcp); 3672 } 3673 3674 /* 3675 * Only way to pass this milestone is to have successfully 3676 * negotiated version info. 3677 */ 3678 if ((ldcp->lane_in.lstate & VSW_VER_ACK_SENT) && 3679 (ldcp->lane_out.lstate & VSW_VER_ACK_RECV)) { 3680 3681 D2(vswp, "%s: (chan %lld) leaving milestone 0", 3682 __func__, ldcp->ldc_id); 3683 3684 /* 3685 * Next milestone is passed when attribute 3686 * information has been successfully exchanged. 3687 */ 3688 ldcp->hphase = VSW_MILESTONE1; 3689 vsw_send_attr(ldcp); 3690 3691 } 3692 break; 3693 3694 case VSW_MILESTONE1: 3695 /* 3696 * Only way to pass this milestone is to have successfully 3697 * negotiated attribute information. 3698 */ 3699 if (ldcp->lane_in.lstate & VSW_ATTR_ACK_SENT) { 3700 3701 ldcp->hphase = VSW_MILESTONE2; 3702 3703 /* 3704 * If the peer device has said it wishes to 3705 * use descriptor rings then we send it our ring 3706 * info, otherwise we just set up a private ring 3707 * which we use an internal buffer 3708 */ 3709 if (ldcp->lane_in.xfer_mode == VIO_DRING_MODE) 3710 vsw_send_dring_info(ldcp); 3711 } 3712 break; 3713 3714 3715 case VSW_MILESTONE2: 3716 /* 3717 * If peer has indicated in its attribute message that 3718 * it wishes to use descriptor rings then the only way 3719 * to pass this milestone is for us to have received 3720 * valid dring info. 3721 * 3722 * If peer is not using descriptor rings then just fall 3723 * through. 3724 */ 3725 if ((ldcp->lane_in.xfer_mode == VIO_DRING_MODE) && 3726 (!(ldcp->lane_in.lstate & VSW_DRING_ACK_SENT))) 3727 break; 3728 3729 D2(vswp, "%s: (chan %lld) leaving milestone 2", 3730 __func__, ldcp->ldc_id); 3731 3732 ldcp->hphase = VSW_MILESTONE3; 3733 vsw_send_rdx(ldcp); 3734 break; 3735 3736 case VSW_MILESTONE3: 3737 /* 3738 * Pass this milestone when all paramaters have been 3739 * successfully exchanged and RDX sent in both directions. 3740 * 3741 * Mark outbound lane as available to transmit data. 3742 */ 3743 if ((ldcp->lane_in.lstate & VSW_RDX_ACK_SENT) && 3744 (ldcp->lane_out.lstate & VSW_RDX_ACK_RECV)) { 3745 3746 D2(vswp, "%s: (chan %lld) leaving milestone 3", 3747 __func__, ldcp->ldc_id); 3748 D2(vswp, "%s: ** handshake complete **", __func__); 3749 ldcp->lane_out.lstate |= VSW_LANE_ACTIVE; 3750 ldcp->hphase = VSW_MILESTONE4; 3751 ldcp->hcnt = 0; 3752 DISPLAY_STATE(); 3753 } 3754 break; 3755 3756 case VSW_MILESTONE4: 3757 D2(vswp, "%s: (chan %lld) in milestone 4", __func__, 3758 ldcp->ldc_id); 3759 break; 3760 3761 default: 3762 DERR(vswp, "%s: (chan %lld) Unknown Phase %x", __func__, 3763 ldcp->ldc_id, ldcp->hphase); 3764 } 3765 3766 D1(vswp, "%s (chan %lld): exit (phase %ld)", __func__, ldcp->ldc_id, 3767 ldcp->hphase); 3768 } 3769 3770 /* 3771 * Check if major version is supported. 3772 * 3773 * Returns 0 if finds supported major number, and if necessary 3774 * adjusts the minor field. 3775 * 3776 * Returns 1 if can't match major number exactly. Sets mjor/minor 3777 * to next lowest support values, or to zero if no other values possible. 3778 */ 3779 static int 3780 vsw_supported_version(vio_ver_msg_t *vp) 3781 { 3782 int i; 3783 3784 D1(NULL, "vsw_supported_version: enter"); 3785 3786 for (i = 0; i < VSW_NUM_VER; i++) { 3787 if (vsw_versions[i].ver_major == vp->ver_major) { 3788 /* 3789 * Matching or lower major version found. Update 3790 * minor number if necessary. 3791 */ 3792 if (vp->ver_minor > vsw_versions[i].ver_minor) { 3793 D2(NULL, "%s: adjusting minor value" 3794 " from %d to %d", __func__, 3795 vp->ver_minor, 3796 vsw_versions[i].ver_minor); 3797 vp->ver_minor = vsw_versions[i].ver_minor; 3798 } 3799 3800 return (0); 3801 } 3802 3803 if (vsw_versions[i].ver_major < vp->ver_major) { 3804 if (vp->ver_minor > vsw_versions[i].ver_minor) { 3805 D2(NULL, "%s: adjusting minor value" 3806 " from %d to %d", __func__, 3807 vp->ver_minor, 3808 vsw_versions[i].ver_minor); 3809 vp->ver_minor = vsw_versions[i].ver_minor; 3810 } 3811 return (1); 3812 } 3813 } 3814 3815 /* No match was possible, zero out fields */ 3816 vp->ver_major = 0; 3817 vp->ver_minor = 0; 3818 3819 D1(NULL, "vsw_supported_version: exit"); 3820 3821 return (1); 3822 } 3823 3824 /* 3825 * Main routine for processing messages received over LDC. 3826 */ 3827 static void 3828 vsw_process_pkt(void *arg) 3829 { 3830 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 3831 vsw_t *vswp = ldcp->ldc_vswp; 3832 size_t msglen; 3833 vio_msg_tag_t tag; 3834 def_msg_t dmsg; 3835 int rv = 0; 3836 3837 D1(vswp, "%s enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 3838 3839 /* 3840 * If channel is up read messages until channel is empty. 3841 */ 3842 do { 3843 msglen = sizeof (dmsg); 3844 rv = ldc_read(ldcp->ldc_handle, (caddr_t)&dmsg, &msglen); 3845 3846 if (rv != 0) { 3847 DERR(vswp, "%s :ldc_read err id(%lld) rv(%d) " 3848 "len(%d)\n", __func__, ldcp->ldc_id, 3849 rv, msglen); 3850 break; 3851 } 3852 3853 if (msglen == 0) { 3854 D2(vswp, "%s: ldc_read id(%lld) NODATA", __func__, 3855 ldcp->ldc_id); 3856 break; 3857 } 3858 3859 D2(vswp, "%s: ldc_read id(%lld): msglen(%d)", __func__, 3860 ldcp->ldc_id, msglen); 3861 3862 /* 3863 * Figure out what sort of packet we have gotten by 3864 * examining the msg tag, and then switch it appropriately. 3865 */ 3866 bcopy(&dmsg, &tag, sizeof (vio_msg_tag_t)); 3867 3868 switch (tag.vio_msgtype) { 3869 case VIO_TYPE_CTRL: 3870 vsw_dispatch_ctrl_task(ldcp, &dmsg, tag); 3871 break; 3872 case VIO_TYPE_DATA: 3873 vsw_process_data_pkt(ldcp, &dmsg, tag); 3874 break; 3875 case VIO_TYPE_ERR: 3876 vsw_process_err_pkt(ldcp, &dmsg, tag); 3877 break; 3878 default: 3879 DERR(vswp, "%s: Unknown tag(%lx) ", __func__, 3880 "id(%lx)\n", tag.vio_msgtype, ldcp->ldc_id); 3881 break; 3882 } 3883 } while (msglen); 3884 3885 D1(vswp, "%s exit: ldcid (%lld)\n", __func__, ldcp->ldc_id); 3886 } 3887 3888 /* 3889 * Dispatch a task to process a VIO control message. 3890 */ 3891 static void 3892 vsw_dispatch_ctrl_task(vsw_ldc_t *ldcp, void *cpkt, vio_msg_tag_t tag) 3893 { 3894 vsw_ctrl_task_t *ctaskp = NULL; 3895 vsw_port_t *port = ldcp->ldc_port; 3896 vsw_t *vswp = port->p_vswp; 3897 3898 D1(vswp, "%s: enter", __func__); 3899 3900 /* 3901 * We need to handle RDX ACK messages in-band as once they 3902 * are exchanged it is possible that we will get an 3903 * immediate (legitimate) data packet. 3904 */ 3905 if ((tag.vio_subtype_env == VIO_RDX) && 3906 (tag.vio_subtype == VIO_SUBTYPE_ACK)) { 3907 if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_ACK_RECV)) 3908 return; 3909 3910 ldcp->lane_out.lstate |= VSW_RDX_ACK_RECV; 3911 vsw_next_milestone(ldcp); 3912 D2(vswp, "%s (%ld) handling RDX_ACK in place", __func__, 3913 ldcp->ldc_id); 3914 return; 3915 } 3916 3917 ctaskp = kmem_alloc(sizeof (vsw_ctrl_task_t), KM_NOSLEEP); 3918 3919 if (ctaskp == NULL) { 3920 DERR(vswp, "%s: unable to alloc space for ctrl" 3921 " msg", __func__); 3922 vsw_restart_handshake(ldcp); 3923 return; 3924 } 3925 3926 ctaskp->ldcp = ldcp; 3927 bcopy((def_msg_t *)cpkt, &ctaskp->pktp, sizeof (def_msg_t)); 3928 mutex_enter(&ldcp->hss_lock); 3929 ctaskp->hss_id = ldcp->hss_id; 3930 mutex_exit(&ldcp->hss_lock); 3931 3932 /* 3933 * Dispatch task to processing taskq if port is not in 3934 * the process of being detached. 3935 */ 3936 mutex_enter(&port->state_lock); 3937 if (port->state == VSW_PORT_INIT) { 3938 if ((vswp->taskq_p == NULL) || 3939 (ddi_taskq_dispatch(vswp->taskq_p, 3940 vsw_process_ctrl_pkt, ctaskp, DDI_NOSLEEP) 3941 != DDI_SUCCESS)) { 3942 DERR(vswp, "%s: unable to dispatch task to taskq", 3943 __func__); 3944 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 3945 mutex_exit(&port->state_lock); 3946 vsw_restart_handshake(ldcp); 3947 return; 3948 } 3949 } else { 3950 DWARN(vswp, "%s: port %d detaching, not dispatching " 3951 "task", __func__, port->p_instance); 3952 } 3953 3954 mutex_exit(&port->state_lock); 3955 3956 D2(vswp, "%s: dispatched task to taskq for chan %d", __func__, 3957 ldcp->ldc_id); 3958 D1(vswp, "%s: exit", __func__); 3959 } 3960 3961 /* 3962 * Process a VIO ctrl message. Invoked from taskq. 3963 */ 3964 static void 3965 vsw_process_ctrl_pkt(void *arg) 3966 { 3967 vsw_ctrl_task_t *ctaskp = (vsw_ctrl_task_t *)arg; 3968 vsw_ldc_t *ldcp = ctaskp->ldcp; 3969 vsw_t *vswp = ldcp->ldc_vswp; 3970 vio_msg_tag_t tag; 3971 uint16_t env; 3972 3973 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3974 3975 bcopy(&ctaskp->pktp, &tag, sizeof (vio_msg_tag_t)); 3976 env = tag.vio_subtype_env; 3977 3978 /* stale pkt check */ 3979 mutex_enter(&ldcp->hss_lock); 3980 if (ctaskp->hss_id < ldcp->hss_id) { 3981 DWARN(vswp, "%s: discarding stale packet belonging to" 3982 " earlier (%ld) handshake session", __func__, 3983 ctaskp->hss_id); 3984 mutex_exit(&ldcp->hss_lock); 3985 return; 3986 } 3987 mutex_exit(&ldcp->hss_lock); 3988 3989 /* session id check */ 3990 if (ldcp->session_status & VSW_PEER_SESSION) { 3991 if (ldcp->peer_session != tag.vio_sid) { 3992 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 3993 __func__, ldcp->ldc_id, tag.vio_sid); 3994 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 3995 vsw_restart_handshake(ldcp); 3996 return; 3997 } 3998 } 3999 4000 /* 4001 * Switch on vio_subtype envelope, then let lower routines 4002 * decide if its an INFO, ACK or NACK packet. 4003 */ 4004 switch (env) { 4005 case VIO_VER_INFO: 4006 vsw_process_ctrl_ver_pkt(ldcp, &ctaskp->pktp); 4007 break; 4008 case VIO_DRING_REG: 4009 vsw_process_ctrl_dring_reg_pkt(ldcp, &ctaskp->pktp); 4010 break; 4011 case VIO_DRING_UNREG: 4012 vsw_process_ctrl_dring_unreg_pkt(ldcp, &ctaskp->pktp); 4013 break; 4014 case VIO_ATTR_INFO: 4015 vsw_process_ctrl_attr_pkt(ldcp, &ctaskp->pktp); 4016 break; 4017 case VNET_MCAST_INFO: 4018 vsw_process_ctrl_mcst_pkt(ldcp, &ctaskp->pktp); 4019 break; 4020 case VIO_RDX: 4021 vsw_process_ctrl_rdx_pkt(ldcp, &ctaskp->pktp); 4022 break; 4023 default: 4024 DERR(vswp, "%s : unknown vio_subtype_env (%x)\n", 4025 __func__, env); 4026 } 4027 4028 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 4029 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4030 } 4031 4032 /* 4033 * Version negotiation. We can end up here either because our peer 4034 * has responded to a handshake message we have sent it, or our peer 4035 * has initiated a handshake with us. If its the former then can only 4036 * be ACK or NACK, if its the later can only be INFO. 4037 * 4038 * If its an ACK we move to the next stage of the handshake, namely 4039 * attribute exchange. If its a NACK we see if we can specify another 4040 * version, if we can't we stop. 4041 * 4042 * If it is an INFO we reset all params associated with communication 4043 * in that direction over this channel (remember connection is 4044 * essentially 2 independent simplex channels). 4045 */ 4046 void 4047 vsw_process_ctrl_ver_pkt(vsw_ldc_t *ldcp, void *pkt) 4048 { 4049 vio_ver_msg_t *ver_pkt; 4050 vsw_t *vswp = ldcp->ldc_vswp; 4051 4052 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4053 4054 /* 4055 * We know this is a ctrl/version packet so 4056 * cast it into the correct structure. 4057 */ 4058 ver_pkt = (vio_ver_msg_t *)pkt; 4059 4060 switch (ver_pkt->tag.vio_subtype) { 4061 case VIO_SUBTYPE_INFO: 4062 D2(vswp, "vsw_process_ctrl_ver_pkt: VIO_SUBTYPE_INFO\n"); 4063 4064 /* 4065 * Record the session id, which we will use from now 4066 * until we see another VER_INFO msg. Even then the 4067 * session id in most cases will be unchanged, execpt 4068 * if channel was reset. 4069 */ 4070 if ((ldcp->session_status & VSW_PEER_SESSION) && 4071 (ldcp->peer_session != ver_pkt->tag.vio_sid)) { 4072 DERR(vswp, "%s: updating session id for chan %lld " 4073 "from %llx to %llx", __func__, ldcp->ldc_id, 4074 ldcp->peer_session, ver_pkt->tag.vio_sid); 4075 } 4076 4077 ldcp->peer_session = ver_pkt->tag.vio_sid; 4078 ldcp->session_status |= VSW_PEER_SESSION; 4079 4080 /* Legal message at this time ? */ 4081 if (vsw_check_flag(ldcp, INBOUND, VSW_VER_INFO_RECV)) 4082 return; 4083 4084 /* 4085 * First check the device class. Currently only expect 4086 * to be talking to a network device. In the future may 4087 * also talk to another switch. 4088 */ 4089 if (ver_pkt->dev_class != VDEV_NETWORK) { 4090 DERR(vswp, "%s: illegal device class %d", __func__, 4091 ver_pkt->dev_class); 4092 4093 ver_pkt->tag.vio_sid = ldcp->local_session; 4094 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 4095 4096 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 4097 4098 vsw_send_msg(ldcp, (void *)ver_pkt, 4099 sizeof (vio_ver_msg_t)); 4100 4101 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 4102 vsw_next_milestone(ldcp); 4103 return; 4104 } else { 4105 ldcp->dev_class = ver_pkt->dev_class; 4106 } 4107 4108 /* 4109 * Now check the version. 4110 */ 4111 if (vsw_supported_version(ver_pkt) == 0) { 4112 /* 4113 * Support this major version and possibly 4114 * adjusted minor version. 4115 */ 4116 4117 D2(vswp, "%s: accepted ver %d:%d", __func__, 4118 ver_pkt->ver_major, ver_pkt->ver_minor); 4119 4120 /* Store accepted values */ 4121 ldcp->lane_in.ver_major = ver_pkt->ver_major; 4122 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 4123 4124 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 4125 4126 ldcp->lane_in.lstate |= VSW_VER_ACK_SENT; 4127 } else { 4128 /* 4129 * NACK back with the next lower major/minor 4130 * pairing we support (if don't suuport any more 4131 * versions then they will be set to zero. 4132 */ 4133 4134 D2(vswp, "%s: replying with ver %d:%d", __func__, 4135 ver_pkt->ver_major, ver_pkt->ver_minor); 4136 4137 /* Store updated values */ 4138 ldcp->lane_in.ver_major = ver_pkt->ver_major; 4139 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 4140 4141 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 4142 4143 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 4144 } 4145 4146 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 4147 ver_pkt->tag.vio_sid = ldcp->local_session; 4148 vsw_send_msg(ldcp, (void *)ver_pkt, sizeof (vio_ver_msg_t)); 4149 4150 vsw_next_milestone(ldcp); 4151 break; 4152 4153 case VIO_SUBTYPE_ACK: 4154 D2(vswp, "%s: VIO_SUBTYPE_ACK\n", __func__); 4155 4156 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_ACK_RECV)) 4157 return; 4158 4159 /* Store updated values */ 4160 ldcp->lane_in.ver_major = ver_pkt->ver_major; 4161 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 4162 4163 4164 ldcp->lane_out.lstate |= VSW_VER_ACK_RECV; 4165 vsw_next_milestone(ldcp); 4166 4167 break; 4168 4169 case VIO_SUBTYPE_NACK: 4170 D2(vswp, "%s: VIO_SUBTYPE_NACK\n", __func__); 4171 4172 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_NACK_RECV)) 4173 return; 4174 4175 /* 4176 * If our peer sent us a NACK with the ver fields set to 4177 * zero then there is nothing more we can do. Otherwise see 4178 * if we support either the version suggested, or a lesser 4179 * one. 4180 */ 4181 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 4182 DERR(vswp, "%s: peer unable to negotiate any " 4183 "further.", __func__); 4184 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 4185 vsw_next_milestone(ldcp); 4186 return; 4187 } 4188 4189 /* 4190 * Check to see if we support this major version or 4191 * a lower one. If we don't then maj/min will be set 4192 * to zero. 4193 */ 4194 (void) vsw_supported_version(ver_pkt); 4195 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 4196 /* Nothing more we can do */ 4197 DERR(vswp, "%s: version negotiation failed.\n", 4198 __func__); 4199 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 4200 vsw_next_milestone(ldcp); 4201 } else { 4202 /* found a supported major version */ 4203 ldcp->lane_out.ver_major = ver_pkt->ver_major; 4204 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 4205 4206 D2(vswp, "%s: resending with updated values (%x, %x)", 4207 __func__, ver_pkt->ver_major, 4208 ver_pkt->ver_minor); 4209 4210 ldcp->lane_out.lstate |= VSW_VER_INFO_SENT; 4211 ver_pkt->tag.vio_sid = ldcp->local_session; 4212 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 4213 4214 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 4215 4216 vsw_send_msg(ldcp, (void *)ver_pkt, 4217 sizeof (vio_ver_msg_t)); 4218 4219 vsw_next_milestone(ldcp); 4220 4221 } 4222 break; 4223 4224 default: 4225 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 4226 ver_pkt->tag.vio_subtype); 4227 } 4228 4229 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 4230 } 4231 4232 /* 4233 * Process an attribute packet. We can end up here either because our peer 4234 * has ACK/NACK'ed back to an earlier ATTR msg we had sent it, or our 4235 * peer has sent us an attribute INFO message 4236 * 4237 * If its an ACK we then move to the next stage of the handshake which 4238 * is to send our descriptor ring info to our peer. If its a NACK then 4239 * there is nothing more we can (currently) do. 4240 * 4241 * If we get a valid/acceptable INFO packet (and we have already negotiated 4242 * a version) we ACK back and set channel state to ATTR_RECV, otherwise we 4243 * NACK back and reset channel state to INACTIV. 4244 * 4245 * FUTURE: in time we will probably negotiate over attributes, but for 4246 * the moment unacceptable attributes are regarded as a fatal error. 4247 * 4248 */ 4249 void 4250 vsw_process_ctrl_attr_pkt(vsw_ldc_t *ldcp, void *pkt) 4251 { 4252 vnet_attr_msg_t *attr_pkt; 4253 vsw_t *vswp = ldcp->ldc_vswp; 4254 vsw_port_t *port = ldcp->ldc_port; 4255 uint64_t macaddr = 0; 4256 int i; 4257 4258 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 4259 4260 /* 4261 * We know this is a ctrl/attr packet so 4262 * cast it into the correct structure. 4263 */ 4264 attr_pkt = (vnet_attr_msg_t *)pkt; 4265 4266 switch (attr_pkt->tag.vio_subtype) { 4267 case VIO_SUBTYPE_INFO: 4268 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 4269 4270 if (vsw_check_flag(ldcp, INBOUND, VSW_ATTR_INFO_RECV)) 4271 return; 4272 4273 /* 4274 * If the attributes are unacceptable then we NACK back. 4275 */ 4276 if (vsw_check_attr(attr_pkt, ldcp->ldc_port)) { 4277 4278 DERR(vswp, "%s (chan %d): invalid attributes", 4279 __func__, ldcp->ldc_id); 4280 4281 vsw_free_lane_resources(ldcp, INBOUND); 4282 4283 attr_pkt->tag.vio_sid = ldcp->local_session; 4284 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 4285 4286 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 4287 ldcp->lane_in.lstate |= VSW_ATTR_NACK_SENT; 4288 vsw_send_msg(ldcp, (void *)attr_pkt, 4289 sizeof (vnet_attr_msg_t)); 4290 4291 vsw_next_milestone(ldcp); 4292 return; 4293 } 4294 4295 /* 4296 * Otherwise store attributes for this lane and update 4297 * lane state. 4298 */ 4299 ldcp->lane_in.mtu = attr_pkt->mtu; 4300 ldcp->lane_in.addr = attr_pkt->addr; 4301 ldcp->lane_in.addr_type = attr_pkt->addr_type; 4302 ldcp->lane_in.xfer_mode = attr_pkt->xfer_mode; 4303 ldcp->lane_in.ack_freq = attr_pkt->ack_freq; 4304 4305 macaddr = ldcp->lane_in.addr; 4306 for (i = ETHERADDRL - 1; i >= 0; i--) { 4307 port->p_macaddr.ether_addr_octet[i] = macaddr & 0xFF; 4308 macaddr >>= 8; 4309 } 4310 4311 /* create the fdb entry for this port/mac address */ 4312 (void) vsw_add_fdb(vswp, port); 4313 4314 /* setup device specifc xmit routines */ 4315 mutex_enter(&port->tx_lock); 4316 if (ldcp->lane_in.xfer_mode == VIO_DRING_MODE) { 4317 D2(vswp, "%s: mode = VIO_DRING_MODE", __func__); 4318 port->transmit = vsw_dringsend; 4319 } else if (ldcp->lane_in.xfer_mode == VIO_DESC_MODE) { 4320 D2(vswp, "%s: mode = VIO_DESC_MODE", __func__); 4321 vsw_create_privring(ldcp); 4322 port->transmit = vsw_descrsend; 4323 } 4324 mutex_exit(&port->tx_lock); 4325 4326 attr_pkt->tag.vio_sid = ldcp->local_session; 4327 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 4328 4329 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 4330 4331 ldcp->lane_in.lstate |= VSW_ATTR_ACK_SENT; 4332 4333 vsw_send_msg(ldcp, (void *)attr_pkt, 4334 sizeof (vnet_attr_msg_t)); 4335 4336 vsw_next_milestone(ldcp); 4337 break; 4338 4339 case VIO_SUBTYPE_ACK: 4340 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 4341 4342 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_ACK_RECV)) 4343 return; 4344 4345 ldcp->lane_out.lstate |= VSW_ATTR_ACK_RECV; 4346 vsw_next_milestone(ldcp); 4347 break; 4348 4349 case VIO_SUBTYPE_NACK: 4350 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 4351 4352 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_NACK_RECV)) 4353 return; 4354 4355 ldcp->lane_out.lstate |= VSW_ATTR_NACK_RECV; 4356 vsw_next_milestone(ldcp); 4357 break; 4358 4359 default: 4360 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 4361 attr_pkt->tag.vio_subtype); 4362 } 4363 4364 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 4365 } 4366 4367 /* 4368 * Process a dring info packet. We can end up here either because our peer 4369 * has ACK/NACK'ed back to an earlier DRING msg we had sent it, or our 4370 * peer has sent us a dring INFO message. 4371 * 4372 * If we get a valid/acceptable INFO packet (and we have already negotiated 4373 * a version) we ACK back and update the lane state, otherwise we NACK back. 4374 * 4375 * FUTURE: nothing to stop client from sending us info on multiple dring's 4376 * but for the moment we will just use the first one we are given. 4377 * 4378 */ 4379 void 4380 vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *ldcp, void *pkt) 4381 { 4382 vio_dring_reg_msg_t *dring_pkt; 4383 vsw_t *vswp = ldcp->ldc_vswp; 4384 ldc_mem_info_t minfo; 4385 dring_info_t *dp, *dbp; 4386 int dring_found = 0; 4387 4388 /* 4389 * We know this is a ctrl/dring packet so 4390 * cast it into the correct structure. 4391 */ 4392 dring_pkt = (vio_dring_reg_msg_t *)pkt; 4393 4394 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 4395 4396 switch (dring_pkt->tag.vio_subtype) { 4397 case VIO_SUBTYPE_INFO: 4398 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 4399 4400 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 4401 return; 4402 4403 /* 4404 * If the dring params are unacceptable then we NACK back. 4405 */ 4406 if (vsw_check_dring_info(dring_pkt)) { 4407 4408 DERR(vswp, "%s (%lld): invalid dring info", 4409 __func__, ldcp->ldc_id); 4410 4411 vsw_free_lane_resources(ldcp, INBOUND); 4412 4413 dring_pkt->tag.vio_sid = ldcp->local_session; 4414 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 4415 4416 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 4417 4418 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 4419 4420 vsw_send_msg(ldcp, (void *)dring_pkt, 4421 sizeof (vio_dring_reg_msg_t)); 4422 4423 vsw_next_milestone(ldcp); 4424 return; 4425 } 4426 4427 /* 4428 * Otherwise, attempt to map in the dring using the 4429 * cookie. If that succeeds we send back a unique dring 4430 * identifier that the sending side will use in future 4431 * to refer to this descriptor ring. 4432 */ 4433 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 4434 4435 dp->num_descriptors = dring_pkt->num_descriptors; 4436 dp->descriptor_size = dring_pkt->descriptor_size; 4437 dp->options = dring_pkt->options; 4438 dp->ncookies = dring_pkt->ncookies; 4439 4440 /* 4441 * Note: should only get one cookie. Enforced in 4442 * the ldc layer. 4443 */ 4444 bcopy(&dring_pkt->cookie[0], &dp->cookie[0], 4445 sizeof (ldc_mem_cookie_t)); 4446 4447 D2(vswp, "%s: num_desc %ld : desc_size %ld", __func__, 4448 dp->num_descriptors, dp->descriptor_size); 4449 D2(vswp, "%s: options 0x%lx: ncookies %ld", __func__, 4450 dp->options, dp->ncookies); 4451 4452 if ((ldc_mem_dring_map(ldcp->ldc_handle, &dp->cookie[0], 4453 dp->ncookies, dp->num_descriptors, 4454 dp->descriptor_size, LDC_SHADOW_MAP, 4455 &(dp->handle))) != 0) { 4456 4457 DERR(vswp, "%s: dring_map failed\n", __func__); 4458 4459 kmem_free(dp, sizeof (dring_info_t)); 4460 vsw_free_lane_resources(ldcp, INBOUND); 4461 4462 dring_pkt->tag.vio_sid = ldcp->local_session; 4463 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 4464 4465 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 4466 4467 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 4468 vsw_send_msg(ldcp, (void *)dring_pkt, 4469 sizeof (vio_dring_reg_msg_t)); 4470 4471 vsw_next_milestone(ldcp); 4472 return; 4473 } 4474 4475 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 4476 4477 DERR(vswp, "%s: dring_addr failed\n", __func__); 4478 4479 kmem_free(dp, sizeof (dring_info_t)); 4480 vsw_free_lane_resources(ldcp, INBOUND); 4481 4482 dring_pkt->tag.vio_sid = ldcp->local_session; 4483 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 4484 4485 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 4486 4487 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 4488 vsw_send_msg(ldcp, (void *)dring_pkt, 4489 sizeof (vio_dring_reg_msg_t)); 4490 4491 vsw_next_milestone(ldcp); 4492 return; 4493 } else { 4494 /* store the address of the pub part of ring */ 4495 dp->pub_addr = minfo.vaddr; 4496 } 4497 4498 /* no private section as we are importing */ 4499 dp->priv_addr = NULL; 4500 4501 /* 4502 * Using simple mono increasing int for ident at 4503 * the moment. 4504 */ 4505 dp->ident = ldcp->next_ident; 4506 ldcp->next_ident++; 4507 4508 dp->end_idx = 0; 4509 dp->next = NULL; 4510 4511 /* 4512 * Link it onto the end of the list of drings 4513 * for this lane. 4514 */ 4515 if (ldcp->lane_in.dringp == NULL) { 4516 D2(vswp, "%s: adding first INBOUND dring", __func__); 4517 ldcp->lane_in.dringp = dp; 4518 } else { 4519 dbp = ldcp->lane_in.dringp; 4520 4521 while (dbp->next != NULL) 4522 dbp = dbp->next; 4523 4524 dbp->next = dp; 4525 } 4526 4527 /* acknowledge it */ 4528 dring_pkt->tag.vio_sid = ldcp->local_session; 4529 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 4530 dring_pkt->dring_ident = dp->ident; 4531 4532 vsw_send_msg(ldcp, (void *)dring_pkt, 4533 sizeof (vio_dring_reg_msg_t)); 4534 4535 ldcp->lane_in.lstate |= VSW_DRING_ACK_SENT; 4536 vsw_next_milestone(ldcp); 4537 break; 4538 4539 case VIO_SUBTYPE_ACK: 4540 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 4541 4542 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_ACK_RECV)) 4543 return; 4544 4545 /* 4546 * Peer is acknowledging our dring info and will have 4547 * sent us a dring identifier which we will use to 4548 * refer to this ring w.r.t. our peer. 4549 */ 4550 dp = ldcp->lane_out.dringp; 4551 if (dp != NULL) { 4552 /* 4553 * Find the ring this ident should be associated 4554 * with. 4555 */ 4556 if (vsw_dring_match(dp, dring_pkt)) { 4557 dring_found = 1; 4558 4559 } else while (dp != NULL) { 4560 if (vsw_dring_match(dp, dring_pkt)) { 4561 dring_found = 1; 4562 break; 4563 } 4564 dp = dp->next; 4565 } 4566 4567 if (dring_found == 0) { 4568 DERR(NULL, "%s: unrecognised ring cookie", 4569 __func__); 4570 vsw_restart_handshake(ldcp); 4571 return; 4572 } 4573 4574 } else { 4575 DERR(vswp, "%s: DRING ACK received but no drings " 4576 "allocated", __func__); 4577 vsw_restart_handshake(ldcp); 4578 return; 4579 } 4580 4581 /* store ident */ 4582 dp->ident = dring_pkt->dring_ident; 4583 ldcp->lane_out.lstate |= VSW_DRING_ACK_RECV; 4584 vsw_next_milestone(ldcp); 4585 break; 4586 4587 case VIO_SUBTYPE_NACK: 4588 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 4589 4590 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_NACK_RECV)) 4591 return; 4592 4593 ldcp->lane_out.lstate |= VSW_DRING_NACK_RECV; 4594 vsw_next_milestone(ldcp); 4595 break; 4596 4597 default: 4598 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 4599 dring_pkt->tag.vio_subtype); 4600 } 4601 4602 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 4603 } 4604 4605 /* 4606 * Process a request from peer to unregister a dring. 4607 * 4608 * For the moment we just restart the handshake if our 4609 * peer endpoint attempts to unregister a dring. 4610 */ 4611 void 4612 vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *ldcp, void *pkt) 4613 { 4614 vsw_t *vswp = ldcp->ldc_vswp; 4615 vio_dring_unreg_msg_t *dring_pkt; 4616 4617 /* 4618 * We know this is a ctrl/dring packet so 4619 * cast it into the correct structure. 4620 */ 4621 dring_pkt = (vio_dring_unreg_msg_t *)pkt; 4622 4623 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4624 4625 switch (dring_pkt->tag.vio_subtype) { 4626 case VIO_SUBTYPE_INFO: 4627 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 4628 4629 DWARN(vswp, "%s: restarting handshake..", __func__); 4630 vsw_restart_handshake(ldcp); 4631 break; 4632 4633 case VIO_SUBTYPE_ACK: 4634 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 4635 4636 DWARN(vswp, "%s: restarting handshake..", __func__); 4637 vsw_restart_handshake(ldcp); 4638 break; 4639 4640 case VIO_SUBTYPE_NACK: 4641 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 4642 4643 DWARN(vswp, "%s: restarting handshake..", __func__); 4644 vsw_restart_handshake(ldcp); 4645 break; 4646 4647 default: 4648 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 4649 dring_pkt->tag.vio_subtype); 4650 vsw_restart_handshake(ldcp); 4651 } 4652 4653 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4654 } 4655 4656 #define SND_MCST_NACK(ldcp, pkt) \ 4657 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 4658 pkt->tag.vio_sid = ldcp->local_session; \ 4659 vsw_send_msg(ldcp, (void *)pkt, sizeof (vnet_mcast_msg_t)); 4660 4661 /* 4662 * Process a multicast request from a vnet. 4663 * 4664 * Vnet's specify a multicast address that they are interested in. This 4665 * address is used as a key into the hash table which forms the multicast 4666 * forwarding database (mFDB). 4667 * 4668 * The table keys are the multicast addresses, while the table entries 4669 * are pointers to lists of ports which wish to receive packets for the 4670 * specified multicast address. 4671 * 4672 * When a multicast packet is being switched we use the address as a key 4673 * into the hash table, and then walk the appropriate port list forwarding 4674 * the pkt to each port in turn. 4675 * 4676 * If a vnet is no longer interested in a particular multicast grouping 4677 * we simply find the correct location in the hash table and then delete 4678 * the relevant port from the port list. 4679 * 4680 * To deal with the case whereby a port is being deleted without first 4681 * removing itself from the lists in the hash table, we maintain a list 4682 * of multicast addresses the port has registered an interest in, within 4683 * the port structure itself. We then simply walk that list of addresses 4684 * using them as keys into the hash table and remove the port from the 4685 * appropriate lists. 4686 */ 4687 static void 4688 vsw_process_ctrl_mcst_pkt(vsw_ldc_t *ldcp, void *pkt) 4689 { 4690 vnet_mcast_msg_t *mcst_pkt; 4691 vsw_port_t *port = ldcp->ldc_port; 4692 vsw_t *vswp = ldcp->ldc_vswp; 4693 int i; 4694 4695 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4696 4697 /* 4698 * We know this is a ctrl/mcast packet so 4699 * cast it into the correct structure. 4700 */ 4701 mcst_pkt = (vnet_mcast_msg_t *)pkt; 4702 4703 switch (mcst_pkt->tag.vio_subtype) { 4704 case VIO_SUBTYPE_INFO: 4705 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 4706 4707 /* 4708 * Check if in correct state to receive a multicast 4709 * message (i.e. handshake complete). If not reset 4710 * the handshake. 4711 */ 4712 if (vsw_check_flag(ldcp, INBOUND, VSW_MCST_INFO_RECV)) 4713 return; 4714 4715 /* 4716 * Before attempting to add or remove address check 4717 * that they are valid multicast addresses. 4718 * If not, then NACK back. 4719 */ 4720 for (i = 0; i < mcst_pkt->count; i++) { 4721 if ((mcst_pkt->mca[i].ether_addr_octet[0] & 01) != 1) { 4722 DERR(vswp, "%s: invalid multicast address", 4723 __func__); 4724 SND_MCST_NACK(ldcp, mcst_pkt); 4725 return; 4726 } 4727 } 4728 4729 /* 4730 * Now add/remove the addresses. If this fails we 4731 * NACK back. 4732 */ 4733 if (vsw_add_rem_mcst(mcst_pkt, port) != 0) { 4734 SND_MCST_NACK(ldcp, mcst_pkt); 4735 return; 4736 } 4737 4738 mcst_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 4739 mcst_pkt->tag.vio_sid = ldcp->local_session; 4740 4741 DUMP_TAG_PTR((vio_msg_tag_t *)mcst_pkt); 4742 4743 vsw_send_msg(ldcp, (void *)mcst_pkt, 4744 sizeof (vnet_mcast_msg_t)); 4745 break; 4746 4747 case VIO_SUBTYPE_ACK: 4748 DWARN(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 4749 4750 /* 4751 * We shouldn't ever get a multicast ACK message as 4752 * at the moment we never request multicast addresses 4753 * to be set on some other device. This may change in 4754 * the future if we have cascading switches. 4755 */ 4756 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_ACK_RECV)) 4757 return; 4758 4759 /* Do nothing */ 4760 break; 4761 4762 case VIO_SUBTYPE_NACK: 4763 DWARN(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 4764 4765 /* 4766 * We shouldn't get a multicast NACK packet for the 4767 * same reasons as we shouldn't get a ACK packet. 4768 */ 4769 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_NACK_RECV)) 4770 return; 4771 4772 /* Do nothing */ 4773 break; 4774 4775 default: 4776 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 4777 mcst_pkt->tag.vio_subtype); 4778 } 4779 4780 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4781 } 4782 4783 static void 4784 vsw_process_ctrl_rdx_pkt(vsw_ldc_t *ldcp, void *pkt) 4785 { 4786 vio_rdx_msg_t *rdx_pkt; 4787 vsw_t *vswp = ldcp->ldc_vswp; 4788 4789 /* 4790 * We know this is a ctrl/rdx packet so 4791 * cast it into the correct structure. 4792 */ 4793 rdx_pkt = (vio_rdx_msg_t *)pkt; 4794 4795 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 4796 4797 switch (rdx_pkt->tag.vio_subtype) { 4798 case VIO_SUBTYPE_INFO: 4799 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 4800 4801 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_INFO_RECV)) 4802 return; 4803 4804 rdx_pkt->tag.vio_sid = ldcp->local_session; 4805 rdx_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 4806 4807 DUMP_TAG_PTR((vio_msg_tag_t *)rdx_pkt); 4808 4809 ldcp->lane_in.lstate |= VSW_RDX_ACK_SENT; 4810 4811 vsw_send_msg(ldcp, (void *)rdx_pkt, 4812 sizeof (vio_rdx_msg_t)); 4813 4814 vsw_next_milestone(ldcp); 4815 break; 4816 4817 case VIO_SUBTYPE_ACK: 4818 /* 4819 * Should be handled in-band by callback handler. 4820 */ 4821 DERR(vswp, "%s: Unexpected VIO_SUBTYPE_ACK", __func__); 4822 vsw_restart_handshake(ldcp); 4823 break; 4824 4825 case VIO_SUBTYPE_NACK: 4826 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 4827 4828 if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_NACK_RECV)) 4829 return; 4830 4831 ldcp->lane_out.lstate |= VSW_RDX_NACK_RECV; 4832 vsw_next_milestone(ldcp); 4833 break; 4834 4835 default: 4836 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 4837 rdx_pkt->tag.vio_subtype); 4838 } 4839 4840 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4841 } 4842 4843 static void 4844 vsw_process_data_pkt(vsw_ldc_t *ldcp, void *dpkt, vio_msg_tag_t tag) 4845 { 4846 uint16_t env = tag.vio_subtype_env; 4847 vsw_t *vswp = ldcp->ldc_vswp; 4848 4849 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4850 4851 /* session id check */ 4852 if (ldcp->session_status & VSW_PEER_SESSION) { 4853 if (ldcp->peer_session != tag.vio_sid) { 4854 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 4855 __func__, ldcp->ldc_id, tag.vio_sid); 4856 vsw_restart_handshake(ldcp); 4857 return; 4858 } 4859 } 4860 4861 /* 4862 * It is an error for us to be getting data packets 4863 * before the handshake has completed. 4864 */ 4865 if (ldcp->hphase != VSW_MILESTONE4) { 4866 DERR(vswp, "%s: got data packet before handshake complete " 4867 "hphase %d (%x: %x)", __func__, ldcp->hphase, 4868 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 4869 DUMP_FLAGS(ldcp->lane_in.lstate); 4870 DUMP_FLAGS(ldcp->lane_out.lstate); 4871 vsw_restart_handshake(ldcp); 4872 return; 4873 } 4874 4875 /* 4876 * Switch on vio_subtype envelope, then let lower routines 4877 * decide if its an INFO, ACK or NACK packet. 4878 */ 4879 if (env == VIO_DRING_DATA) { 4880 vsw_process_data_dring_pkt(ldcp, dpkt); 4881 } else if (env == VIO_PKT_DATA) { 4882 vsw_process_data_raw_pkt(ldcp, dpkt); 4883 } else if (env == VIO_DESC_DATA) { 4884 vsw_process_data_ibnd_pkt(ldcp, dpkt); 4885 } else { 4886 DERR(vswp, "%s : unknown vio_subtype_env (%x)\n", 4887 __func__, env); 4888 } 4889 4890 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4891 } 4892 4893 #define SND_DRING_NACK(ldcp, pkt) \ 4894 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 4895 pkt->tag.vio_sid = ldcp->local_session; \ 4896 vsw_send_msg(ldcp, (void *)pkt, sizeof (vio_dring_msg_t)); 4897 4898 static void 4899 vsw_process_data_dring_pkt(vsw_ldc_t *ldcp, void *dpkt) 4900 { 4901 vio_dring_msg_t *dring_pkt; 4902 vnet_public_desc_t *pub_addr = NULL; 4903 vsw_private_desc_t *priv_addr = NULL; 4904 dring_info_t *dp = NULL; 4905 vsw_t *vswp = ldcp->ldc_vswp; 4906 mblk_t *mp = NULL; 4907 mblk_t *bp = NULL; 4908 mblk_t *bpt = NULL; 4909 size_t nbytes = 0; 4910 size_t off = 0; 4911 uint64_t ncookies = 0; 4912 uint64_t chain = 0; 4913 uint64_t j, len; 4914 uint32_t pos, start, datalen; 4915 uint32_t range_start, range_end; 4916 int32_t end, num, cnt = 0; 4917 int i, rv; 4918 boolean_t ack_needed = B_FALSE; 4919 boolean_t prev_desc_ack = B_FALSE; 4920 int read_attempts = 0; 4921 4922 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4923 4924 /* 4925 * We know this is a data/dring packet so 4926 * cast it into the correct structure. 4927 */ 4928 dring_pkt = (vio_dring_msg_t *)dpkt; 4929 4930 /* 4931 * Switch on the vio_subtype. If its INFO then we need to 4932 * process the data. If its an ACK we need to make sure 4933 * it makes sense (i.e did we send an earlier data/info), 4934 * and if its a NACK then we maybe attempt a retry. 4935 */ 4936 switch (dring_pkt->tag.vio_subtype) { 4937 case VIO_SUBTYPE_INFO: 4938 D2(vswp, "%s(%lld): VIO_SUBTYPE_INFO", __func__, ldcp->ldc_id); 4939 4940 if ((dp = vsw_ident2dring(&ldcp->lane_in, 4941 dring_pkt->dring_ident)) == NULL) { 4942 4943 DERR(vswp, "%s(%lld): unable to find dring from " 4944 "ident 0x%llx", __func__, ldcp->ldc_id, 4945 dring_pkt->dring_ident); 4946 4947 SND_DRING_NACK(ldcp, dring_pkt); 4948 return; 4949 } 4950 4951 start = pos = dring_pkt->start_idx; 4952 end = dring_pkt->end_idx; 4953 len = dp->num_descriptors; 4954 4955 range_start = range_end = pos; 4956 4957 D2(vswp, "%s(%lld): start index %ld : end %ld\n", 4958 __func__, ldcp->ldc_id, start, end); 4959 4960 if (end == -1) { 4961 num = -1; 4962 } else if (end >= 0) { 4963 num = end >= pos ? 4964 end - pos + 1: (len - pos + 1) + end; 4965 4966 /* basic sanity check */ 4967 if (end > len) { 4968 DERR(vswp, "%s(%lld): endpoint %lld outside " 4969 "ring length %lld", __func__, 4970 ldcp->ldc_id, end, len); 4971 4972 SND_DRING_NACK(ldcp, dring_pkt); 4973 return; 4974 } 4975 } else { 4976 DERR(vswp, "%s(%lld): invalid endpoint %lld", 4977 __func__, ldcp->ldc_id, end); 4978 SND_DRING_NACK(ldcp, dring_pkt); 4979 return; 4980 } 4981 4982 while (cnt != num) { 4983 vsw_recheck_desc: 4984 if ((rv = ldc_mem_dring_acquire(dp->handle, 4985 pos, pos)) != 0) { 4986 DERR(vswp, "%s(%lld): unable to acquire " 4987 "descriptor at pos %d: err %d", 4988 __func__, pos, ldcp->ldc_id, rv); 4989 SND_DRING_NACK(ldcp, dring_pkt); 4990 return; 4991 } 4992 4993 pub_addr = (vnet_public_desc_t *)dp->pub_addr + pos; 4994 4995 /* 4996 * When given a bounded range of descriptors 4997 * to process, its an error to hit a descriptor 4998 * which is not ready. In the non-bounded case 4999 * (end_idx == -1) this simply indicates we have 5000 * reached the end of the current active range. 5001 */ 5002 if (pub_addr->hdr.dstate != VIO_DESC_READY) { 5003 /* unbound - no error */ 5004 if (end == -1) { 5005 if (read_attempts == vsw_read_attempts) 5006 break; 5007 5008 delay(drv_usectohz(vsw_desc_delay)); 5009 read_attempts++; 5010 goto vsw_recheck_desc; 5011 } 5012 5013 /* bounded - error - so NACK back */ 5014 DERR(vswp, "%s(%lld): descriptor not READY " 5015 "(%d)", __func__, ldcp->ldc_id, 5016 pub_addr->hdr.dstate); 5017 SND_DRING_NACK(ldcp, dring_pkt); 5018 return; 5019 } 5020 5021 DTRACE_PROBE1(read_attempts, int, read_attempts); 5022 5023 range_end = pos; 5024 5025 /* 5026 * If we ACK'd the previous descriptor then now 5027 * record the new range start position for later 5028 * ACK's. 5029 */ 5030 if (prev_desc_ack) { 5031 range_start = pos; 5032 5033 D2(vswp, "%s(%lld): updating range start " 5034 "to be %d", __func__, ldcp->ldc_id, 5035 range_start); 5036 5037 prev_desc_ack = B_FALSE; 5038 } 5039 5040 /* 5041 * Data is padded to align on 8 byte boundary, 5042 * datalen is actual data length, i.e. minus that 5043 * padding. 5044 */ 5045 datalen = pub_addr->nbytes; 5046 5047 /* 5048 * Does peer wish us to ACK when we have finished 5049 * with this descriptor ? 5050 */ 5051 if (pub_addr->hdr.ack) 5052 ack_needed = B_TRUE; 5053 5054 D2(vswp, "%s(%lld): processing desc %lld at pos" 5055 " 0x%llx : dstate 0x%lx : datalen 0x%lx", 5056 __func__, ldcp->ldc_id, pos, pub_addr, 5057 pub_addr->hdr.dstate, datalen); 5058 5059 /* 5060 * Mark that we are starting to process descriptor. 5061 */ 5062 pub_addr->hdr.dstate = VIO_DESC_ACCEPTED; 5063 5064 mp = vio_allocb(ldcp->rxh); 5065 if (mp == NULL) { 5066 /* 5067 * No free receive buffers available, so 5068 * fallback onto allocb(9F). Make sure that 5069 * we get a data buffer which is a multiple 5070 * of 8 as this is required by ldc_mem_copy. 5071 */ 5072 DTRACE_PROBE(allocb); 5073 mp = allocb(datalen + VNET_IPALIGN + 8, 5074 BPRI_MED); 5075 } 5076 5077 /* 5078 * Ensure that we ask ldc for an aligned 5079 * number of bytes. 5080 */ 5081 nbytes = datalen + VNET_IPALIGN; 5082 if (nbytes & 0x7) { 5083 off = 8 - (nbytes & 0x7); 5084 nbytes += off; 5085 } 5086 5087 ncookies = pub_addr->ncookies; 5088 rv = ldc_mem_copy(ldcp->ldc_handle, 5089 (caddr_t)mp->b_rptr, 0, &nbytes, 5090 pub_addr->memcookie, ncookies, 5091 LDC_COPY_IN); 5092 5093 if (rv != 0) { 5094 DERR(vswp, "%s(%d): unable to copy in " 5095 "data from %d cookies in desc %d" 5096 " (rv %d)", __func__, ldcp->ldc_id, 5097 ncookies, pos, rv); 5098 freemsg(mp); 5099 5100 pub_addr->hdr.dstate = VIO_DESC_DONE; 5101 (void) ldc_mem_dring_release(dp->handle, 5102 pos, pos); 5103 break; 5104 } else { 5105 D2(vswp, "%s(%d): copied in %ld bytes" 5106 " using %d cookies", __func__, 5107 ldcp->ldc_id, nbytes, ncookies); 5108 } 5109 5110 /* adjust the read pointer to skip over the padding */ 5111 mp->b_rptr += VNET_IPALIGN; 5112 5113 /* point to the actual end of data */ 5114 mp->b_wptr = mp->b_rptr + datalen; 5115 5116 /* build a chain of received packets */ 5117 if (bp == NULL) { 5118 /* first pkt */ 5119 bp = mp; 5120 bp->b_next = bp->b_prev = NULL; 5121 bpt = bp; 5122 chain = 1; 5123 } else { 5124 mp->b_next = NULL; 5125 mp->b_prev = bpt; 5126 bpt->b_next = mp; 5127 bpt = mp; 5128 chain++; 5129 } 5130 5131 /* mark we are finished with this descriptor */ 5132 pub_addr->hdr.dstate = VIO_DESC_DONE; 5133 5134 (void) ldc_mem_dring_release(dp->handle, pos, pos); 5135 5136 /* 5137 * Send an ACK back to peer if requested. 5138 */ 5139 if (ack_needed) { 5140 ack_needed = B_FALSE; 5141 5142 dring_pkt->start_idx = range_start; 5143 dring_pkt->end_idx = range_end; 5144 5145 DERR(vswp, "%s(%lld): processed %d %d, ACK" 5146 " requested", __func__, ldcp->ldc_id, 5147 dring_pkt->start_idx, 5148 dring_pkt->end_idx); 5149 5150 dring_pkt->dring_process_state = VIO_DP_ACTIVE; 5151 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 5152 dring_pkt->tag.vio_sid = ldcp->local_session; 5153 vsw_send_msg(ldcp, (void *)dring_pkt, 5154 sizeof (vio_dring_msg_t)); 5155 5156 prev_desc_ack = B_TRUE; 5157 range_start = pos; 5158 } 5159 5160 /* next descriptor */ 5161 pos = (pos + 1) % len; 5162 cnt++; 5163 5164 /* 5165 * Break out of loop here and stop processing to 5166 * allow some other network device (or disk) to 5167 * get access to the cpu. 5168 */ 5169 /* send the chain of packets to be switched */ 5170 if (chain > vsw_chain_len) { 5171 D3(vswp, "%s(%lld): switching chain of %d " 5172 "msgs", __func__, ldcp->ldc_id, chain); 5173 vsw_switch_frame(vswp, bp, VSW_VNETPORT, 5174 ldcp->ldc_port, NULL); 5175 bp = NULL; 5176 break; 5177 } 5178 } 5179 5180 /* send the chain of packets to be switched */ 5181 if (bp != NULL) { 5182 D3(vswp, "%s(%lld): switching chain of %d msgs", 5183 __func__, ldcp->ldc_id, chain); 5184 vsw_switch_frame(vswp, bp, VSW_VNETPORT, 5185 ldcp->ldc_port, NULL); 5186 } 5187 5188 DTRACE_PROBE1(msg_cnt, int, cnt); 5189 5190 /* 5191 * We are now finished so ACK back with the state 5192 * set to STOPPING so our peer knows we are finished 5193 */ 5194 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 5195 dring_pkt->tag.vio_sid = ldcp->local_session; 5196 5197 dring_pkt->dring_process_state = VIO_DP_STOPPED; 5198 5199 DTRACE_PROBE(stop_process_sent); 5200 5201 /* 5202 * We have not processed any more descriptors beyond 5203 * the last one we ACK'd. 5204 */ 5205 if (prev_desc_ack) 5206 range_start = range_end; 5207 5208 dring_pkt->start_idx = range_start; 5209 dring_pkt->end_idx = range_end; 5210 5211 D2(vswp, "%s(%lld) processed : %d : %d, now stopping", 5212 __func__, ldcp->ldc_id, dring_pkt->start_idx, 5213 dring_pkt->end_idx); 5214 5215 vsw_send_msg(ldcp, (void *)dring_pkt, 5216 sizeof (vio_dring_msg_t)); 5217 break; 5218 5219 case VIO_SUBTYPE_ACK: 5220 D2(vswp, "%s(%lld): VIO_SUBTYPE_ACK", __func__, ldcp->ldc_id); 5221 /* 5222 * Verify that the relevant descriptors are all 5223 * marked as DONE 5224 */ 5225 if ((dp = vsw_ident2dring(&ldcp->lane_out, 5226 dring_pkt->dring_ident)) == NULL) { 5227 DERR(vswp, "%s: unknown ident in ACK", __func__); 5228 return; 5229 } 5230 5231 pub_addr = (vnet_public_desc_t *)dp->pub_addr; 5232 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 5233 5234 start = end = 0; 5235 start = dring_pkt->start_idx; 5236 end = dring_pkt->end_idx; 5237 len = dp->num_descriptors; 5238 5239 j = num = 0; 5240 /* calculate # descriptors taking into a/c wrap around */ 5241 num = end >= start ? end - start + 1: (len - start + 1) + end; 5242 5243 D2(vswp, "%s(%lld): start index %ld : end %ld : num %ld\n", 5244 __func__, ldcp->ldc_id, start, end, num); 5245 5246 mutex_enter(&dp->dlock); 5247 dp->last_ack_recv = end; 5248 mutex_exit(&dp->dlock); 5249 5250 for (i = start; j < num; i = (i + 1) % len, j++) { 5251 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 5252 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 5253 5254 /* 5255 * If the last descriptor in a range has the ACK 5256 * bit set then we will get two messages from our 5257 * peer relating to it. The normal ACK msg and then 5258 * a subsequent STOP msg. The first message will have 5259 * resulted in the descriptor being reclaimed and 5260 * its state set to FREE so when we encounter a non 5261 * DONE descriptor we need to check to see if its 5262 * because we have just reclaimed it. 5263 */ 5264 mutex_enter(&priv_addr->dstate_lock); 5265 if (pub_addr->hdr.dstate == VIO_DESC_DONE) { 5266 /* clear all the fields */ 5267 bzero(priv_addr->datap, priv_addr->datalen); 5268 priv_addr->datalen = 0; 5269 5270 pub_addr->hdr.dstate = VIO_DESC_FREE; 5271 pub_addr->hdr.ack = 0; 5272 5273 priv_addr->dstate = VIO_DESC_FREE; 5274 mutex_exit(&priv_addr->dstate_lock); 5275 5276 D3(vswp, "clearing descp %d : pub state " 5277 "0x%llx : priv state 0x%llx", i, 5278 pub_addr->hdr.dstate, 5279 priv_addr->dstate); 5280 5281 } else { 5282 mutex_exit(&priv_addr->dstate_lock); 5283 5284 if (dring_pkt->dring_process_state != 5285 VIO_DP_STOPPED) { 5286 DERR(vswp, "%s: descriptor %lld at pos " 5287 " 0x%llx not DONE (0x%lx)\n", 5288 __func__, i, pub_addr, 5289 pub_addr->hdr.dstate); 5290 return; 5291 } 5292 } 5293 } 5294 5295 /* 5296 * If our peer is stopping processing descriptors then 5297 * we check to make sure it has processed all the descriptors 5298 * we have updated. If not then we send it a new message 5299 * to prompt it to restart. 5300 */ 5301 if (dring_pkt->dring_process_state == VIO_DP_STOPPED) { 5302 DTRACE_PROBE(stop_process_recv); 5303 D2(vswp, "%s(%lld): got stopping msg : %d : %d", 5304 __func__, ldcp->ldc_id, dring_pkt->start_idx, 5305 dring_pkt->end_idx); 5306 5307 /* 5308 * Check next descriptor in public section of ring. 5309 * If its marked as READY then we need to prompt our 5310 * peer to start processing the ring again. 5311 */ 5312 i = (end + 1) % len; 5313 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 5314 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 5315 5316 /* 5317 * Hold the restart lock across all of this to 5318 * make sure that its not possible for us to 5319 * decide that a msg needs to be sent in the future 5320 * but the sending code having already checked is 5321 * about to exit. 5322 */ 5323 mutex_enter(&dp->restart_lock); 5324 mutex_enter(&priv_addr->dstate_lock); 5325 if (pub_addr->hdr.dstate == VIO_DESC_READY) { 5326 5327 mutex_exit(&priv_addr->dstate_lock); 5328 5329 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 5330 dring_pkt->tag.vio_sid = ldcp->local_session; 5331 5332 mutex_enter(&ldcp->lane_out.seq_lock); 5333 dring_pkt->seq_num = ldcp->lane_out.seq_num++; 5334 mutex_exit(&ldcp->lane_out.seq_lock); 5335 5336 dring_pkt->start_idx = (end + 1) % len; 5337 dring_pkt->end_idx = -1; 5338 5339 D2(vswp, "%s(%lld) : sending restart msg:" 5340 " %d : %d", __func__, ldcp->ldc_id, 5341 dring_pkt->start_idx, 5342 dring_pkt->end_idx); 5343 5344 vsw_send_msg(ldcp, (void *)dring_pkt, 5345 sizeof (vio_dring_msg_t)); 5346 } else { 5347 mutex_exit(&priv_addr->dstate_lock); 5348 dp->restart_reqd = B_TRUE; 5349 } 5350 mutex_exit(&dp->restart_lock); 5351 } 5352 break; 5353 5354 case VIO_SUBTYPE_NACK: 5355 DWARN(vswp, "%s(%lld): VIO_SUBTYPE_NACK", 5356 __func__, ldcp->ldc_id); 5357 /* 5358 * Something is badly wrong if we are getting NACK's 5359 * for our data pkts. So reset the channel. 5360 */ 5361 vsw_restart_handshake(ldcp); 5362 5363 break; 5364 5365 default: 5366 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 5367 ldcp->ldc_id, dring_pkt->tag.vio_subtype); 5368 } 5369 5370 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 5371 } 5372 5373 /* 5374 * VIO_PKT_DATA (a.k.a raw data mode ) 5375 * 5376 * Note - currently not supported. Do nothing. 5377 */ 5378 static void 5379 vsw_process_data_raw_pkt(vsw_ldc_t *ldcp, void *dpkt) 5380 { 5381 _NOTE(ARGUNUSED(dpkt)) 5382 5383 D1(NULL, "%s (%lld): enter\n", __func__, ldcp->ldc_id); 5384 5385 DERR(NULL, "%s (%lld): currently not supported", 5386 __func__, ldcp->ldc_id); 5387 5388 D1(NULL, "%s (%lld): exit\n", __func__, ldcp->ldc_id); 5389 } 5390 5391 #define SND_IBND_DESC_NACK(ldcp, pkt) \ 5392 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 5393 pkt->tag.vio_sid = ldcp->local_session; \ 5394 vsw_send_msg(ldcp, (void *)pkt, sizeof (vio_ibnd_desc_t)); 5395 5396 /* 5397 * Process an in-band descriptor message (most likely from 5398 * OBP). 5399 */ 5400 static void 5401 vsw_process_data_ibnd_pkt(vsw_ldc_t *ldcp, void *pkt) 5402 { 5403 vio_ibnd_desc_t *ibnd_desc; 5404 dring_info_t *dp = NULL; 5405 vsw_private_desc_t *priv_addr = NULL; 5406 vsw_t *vswp = ldcp->ldc_vswp; 5407 mblk_t *mp = NULL; 5408 size_t nbytes = 0; 5409 size_t off = 0; 5410 uint64_t idx = 0; 5411 uint32_t num = 1, len, datalen = 0; 5412 uint64_t ncookies = 0; 5413 int i, rv; 5414 int j = 0; 5415 5416 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 5417 5418 ibnd_desc = (vio_ibnd_desc_t *)pkt; 5419 5420 switch (ibnd_desc->hdr.tag.vio_subtype) { 5421 case VIO_SUBTYPE_INFO: 5422 D1(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 5423 5424 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 5425 return; 5426 5427 /* 5428 * Data is padded to align on a 8 byte boundary, 5429 * nbytes is actual data length, i.e. minus that 5430 * padding. 5431 */ 5432 datalen = ibnd_desc->nbytes; 5433 5434 D2(vswp, "%s(%lld): processing inband desc : " 5435 ": datalen 0x%lx", __func__, ldcp->ldc_id, datalen); 5436 5437 ncookies = ibnd_desc->ncookies; 5438 5439 /* 5440 * allocb(9F) returns an aligned data block. We 5441 * need to ensure that we ask ldc for an aligned 5442 * number of bytes also. 5443 */ 5444 nbytes = datalen; 5445 if (nbytes & 0x7) { 5446 off = 8 - (nbytes & 0x7); 5447 nbytes += off; 5448 } 5449 5450 mp = allocb(datalen, BPRI_MED); 5451 if (mp == NULL) { 5452 DERR(vswp, "%s(%lld): allocb failed", 5453 __func__, ldcp->ldc_id); 5454 return; 5455 } 5456 5457 rv = ldc_mem_copy(ldcp->ldc_handle, (caddr_t)mp->b_rptr, 5458 0, &nbytes, ibnd_desc->memcookie, (uint64_t)ncookies, 5459 LDC_COPY_IN); 5460 5461 if (rv != 0) { 5462 DERR(vswp, "%s(%d): unable to copy in data from " 5463 "%d cookie(s)", __func__, 5464 ldcp->ldc_id, ncookies); 5465 freemsg(mp); 5466 return; 5467 } else { 5468 D2(vswp, "%s(%d): copied in %ld bytes using %d " 5469 "cookies", __func__, ldcp->ldc_id, nbytes, 5470 ncookies); 5471 } 5472 5473 /* point to the actual end of data */ 5474 mp->b_wptr = mp->b_rptr + datalen; 5475 5476 /* 5477 * We ACK back every in-band descriptor message we process 5478 */ 5479 ibnd_desc->hdr.tag.vio_subtype = VIO_SUBTYPE_ACK; 5480 ibnd_desc->hdr.tag.vio_sid = ldcp->local_session; 5481 vsw_send_msg(ldcp, (void *)ibnd_desc, 5482 sizeof (vio_ibnd_desc_t)); 5483 5484 /* send the packet to be switched */ 5485 vsw_switch_frame(vswp, mp, VSW_VNETPORT, 5486 ldcp->ldc_port, NULL); 5487 5488 break; 5489 5490 case VIO_SUBTYPE_ACK: 5491 D1(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 5492 5493 /* Verify the ACK is valid */ 5494 idx = ibnd_desc->hdr.desc_handle; 5495 5496 if (idx >= VSW_RING_NUM_EL) { 5497 cmn_err(CE_WARN, "%s: corrupted ACK received " 5498 "(idx %ld)", __func__, idx); 5499 return; 5500 } 5501 5502 if ((dp = ldcp->lane_out.dringp) == NULL) { 5503 DERR(vswp, "%s: no dring found", __func__); 5504 return; 5505 } 5506 5507 len = dp->num_descriptors; 5508 /* 5509 * If the descriptor we are being ACK'ed for is not the 5510 * one we expected, then pkts were lost somwhere, either 5511 * when we tried to send a msg, or a previous ACK msg from 5512 * our peer. In either case we now reclaim the descriptors 5513 * in the range from the last ACK we received up to the 5514 * current ACK. 5515 */ 5516 if (idx != dp->last_ack_recv) { 5517 DWARN(vswp, "%s: dropped pkts detected, (%ld, %ld)", 5518 __func__, dp->last_ack_recv, idx); 5519 num = idx >= dp->last_ack_recv ? 5520 idx - dp->last_ack_recv + 1: 5521 (len - dp->last_ack_recv + 1) + idx; 5522 } 5523 5524 /* 5525 * When we sent the in-band message to our peer we 5526 * marked the copy in our private ring as READY. We now 5527 * check that the descriptor we are being ACK'ed for is in 5528 * fact READY, i.e. it is one we have shared with our peer. 5529 * 5530 * If its not we flag an error, but still reset the descr 5531 * back to FREE. 5532 */ 5533 for (i = dp->last_ack_recv; j < num; i = (i + 1) % len, j++) { 5534 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 5535 mutex_enter(&priv_addr->dstate_lock); 5536 if (priv_addr->dstate != VIO_DESC_READY) { 5537 DERR(vswp, "%s: (%ld) desc at index %ld not " 5538 "READY (0x%lx)", __func__, 5539 ldcp->ldc_id, idx, priv_addr->dstate); 5540 DERR(vswp, "%s: bound %d: ncookies %ld : " 5541 "datalen %ld", __func__, 5542 priv_addr->bound, priv_addr->ncookies, 5543 priv_addr->datalen); 5544 } 5545 D2(vswp, "%s: (%lld) freeing descp at %lld", __func__, 5546 ldcp->ldc_id, idx); 5547 /* release resources associated with sent msg */ 5548 bzero(priv_addr->datap, priv_addr->datalen); 5549 priv_addr->datalen = 0; 5550 priv_addr->dstate = VIO_DESC_FREE; 5551 mutex_exit(&priv_addr->dstate_lock); 5552 } 5553 /* update to next expected value */ 5554 dp->last_ack_recv = (idx + 1) % dp->num_descriptors; 5555 5556 break; 5557 5558 case VIO_SUBTYPE_NACK: 5559 DERR(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 5560 5561 /* 5562 * We should only get a NACK if our peer doesn't like 5563 * something about a message we have sent it. If this 5564 * happens we just release the resources associated with 5565 * the message. (We are relying on higher layers to decide 5566 * whether or not to resend. 5567 */ 5568 5569 /* limit check */ 5570 idx = ibnd_desc->hdr.desc_handle; 5571 5572 if (idx >= VSW_RING_NUM_EL) { 5573 DERR(vswp, "%s: corrupted NACK received (idx %lld)", 5574 __func__, idx); 5575 return; 5576 } 5577 5578 if ((dp = ldcp->lane_out.dringp) == NULL) { 5579 DERR(vswp, "%s: no dring found", __func__); 5580 return; 5581 } 5582 5583 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 5584 5585 /* move to correct location in ring */ 5586 priv_addr += idx; 5587 5588 /* release resources associated with sent msg */ 5589 mutex_enter(&priv_addr->dstate_lock); 5590 bzero(priv_addr->datap, priv_addr->datalen); 5591 priv_addr->datalen = 0; 5592 priv_addr->dstate = VIO_DESC_FREE; 5593 mutex_exit(&priv_addr->dstate_lock); 5594 5595 break; 5596 5597 default: 5598 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 5599 ldcp->ldc_id, ibnd_desc->hdr.tag.vio_subtype); 5600 } 5601 5602 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 5603 } 5604 5605 static void 5606 vsw_process_err_pkt(vsw_ldc_t *ldcp, void *epkt, vio_msg_tag_t tag) 5607 { 5608 _NOTE(ARGUNUSED(epkt)) 5609 5610 vsw_t *vswp = ldcp->ldc_vswp; 5611 uint16_t env = tag.vio_subtype_env; 5612 5613 D1(vswp, "%s (%lld): enter\n", __func__, ldcp->ldc_id); 5614 5615 /* 5616 * Error vio_subtypes have yet to be defined. So for 5617 * the moment we can't do anything. 5618 */ 5619 D2(vswp, "%s: (%x) vio_subtype env", __func__, env); 5620 5621 D1(vswp, "%s (%lld): exit\n", __func__, ldcp->ldc_id); 5622 } 5623 5624 /* 5625 * Switch the given ethernet frame when operating in layer 2 mode. 5626 * 5627 * vswp: pointer to the vsw instance 5628 * mp: pointer to chain of ethernet frame(s) to be switched 5629 * caller: identifies the source of this frame as: 5630 * 1. VSW_VNETPORT - a vsw port (connected to a vnet). 5631 * 2. VSW_PHYSDEV - the physical ethernet device 5632 * 3. VSW_LOCALDEV - vsw configured as a virtual interface 5633 * arg: argument provided by the caller. 5634 * 1. for VNETPORT - pointer to the corresponding vsw_port_t. 5635 * 2. for PHYSDEV - NULL 5636 * 3. for LOCALDEV - pointer to to this vsw_t(self) 5637 */ 5638 void 5639 vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller, 5640 vsw_port_t *arg, mac_resource_handle_t mrh) 5641 { 5642 struct ether_header *ehp; 5643 vsw_port_t *port = NULL; 5644 mblk_t *bp, *ret_m; 5645 mblk_t *nmp = NULL; 5646 vsw_port_list_t *plist = &vswp->plist; 5647 5648 D1(vswp, "%s: enter (caller %d)", __func__, caller); 5649 5650 /* 5651 * PERF: rather than breaking up the chain here, scan it 5652 * to find all mblks heading to same destination and then 5653 * pass that sub-chain to the lower transmit functions. 5654 */ 5655 5656 /* process the chain of packets */ 5657 bp = mp; 5658 while (bp) { 5659 mp = bp; 5660 bp = bp->b_next; 5661 mp->b_next = mp->b_prev = NULL; 5662 ehp = (struct ether_header *)mp->b_rptr; 5663 5664 D2(vswp, "%s: mblk data buffer %lld : actual data size %lld", 5665 __func__, MBLKSIZE(mp), MBLKL(mp)); 5666 5667 READ_ENTER(&vswp->if_lockrw); 5668 if (ether_cmp(&ehp->ether_dhost, &vswp->if_addr) == 0) { 5669 /* 5670 * If destination is VSW_LOCALDEV (vsw as an eth 5671 * interface) and if the device is up & running, 5672 * send the packet up the stack on this host. 5673 * If the virtual interface is down, drop the packet. 5674 */ 5675 if (caller != VSW_LOCALDEV) { 5676 if (vswp->if_state & VSW_IF_UP) { 5677 RW_EXIT(&vswp->if_lockrw); 5678 mac_rx(vswp->if_mh, mrh, mp); 5679 } else { 5680 RW_EXIT(&vswp->if_lockrw); 5681 /* Interface down, drop pkt */ 5682 freemsg(mp); 5683 } 5684 } else { 5685 RW_EXIT(&vswp->if_lockrw); 5686 freemsg(mp); 5687 } 5688 continue; 5689 } 5690 RW_EXIT(&vswp->if_lockrw); 5691 5692 READ_ENTER(&plist->lockrw); 5693 port = vsw_lookup_fdb(vswp, ehp); 5694 if (port) { 5695 /* 5696 * Mark the port as in-use. 5697 */ 5698 mutex_enter(&port->ref_lock); 5699 port->ref_cnt++; 5700 mutex_exit(&port->ref_lock); 5701 RW_EXIT(&plist->lockrw); 5702 5703 /* 5704 * If plumbed and in promisc mode then copy msg 5705 * and send up the stack. 5706 */ 5707 READ_ENTER(&vswp->if_lockrw); 5708 if (VSW_U_P(vswp->if_state)) { 5709 RW_EXIT(&vswp->if_lockrw); 5710 nmp = copymsg(mp); 5711 if (nmp) 5712 mac_rx(vswp->if_mh, mrh, nmp); 5713 } else { 5714 RW_EXIT(&vswp->if_lockrw); 5715 } 5716 5717 /* 5718 * If the destination is in FDB, the packet 5719 * should be forwarded to the correponding 5720 * vsw_port (connected to a vnet device - 5721 * VSW_VNETPORT) 5722 */ 5723 (void) vsw_portsend(port, mp); 5724 5725 /* 5726 * Decrement use count in port and check if 5727 * should wake delete thread. 5728 */ 5729 mutex_enter(&port->ref_lock); 5730 port->ref_cnt--; 5731 if (port->ref_cnt == 0) 5732 cv_signal(&port->ref_cv); 5733 mutex_exit(&port->ref_lock); 5734 } else { 5735 RW_EXIT(&plist->lockrw); 5736 /* 5737 * Destination not in FDB. 5738 * 5739 * If the destination is broadcast or 5740 * multicast forward the packet to all 5741 * (VNETPORTs, PHYSDEV, LOCALDEV), 5742 * except the caller. 5743 */ 5744 if (IS_BROADCAST(ehp)) { 5745 D3(vswp, "%s: BROADCAST pkt", __func__); 5746 (void) vsw_forward_all(vswp, mp, 5747 caller, arg); 5748 } else if (IS_MULTICAST(ehp)) { 5749 D3(vswp, "%s: MULTICAST pkt", __func__); 5750 (void) vsw_forward_grp(vswp, mp, 5751 caller, arg); 5752 } else { 5753 /* 5754 * If the destination is unicast, and came 5755 * from either a logical network device or 5756 * the switch itself when it is plumbed, then 5757 * send it out on the physical device and also 5758 * up the stack if the logical interface is 5759 * in promiscious mode. 5760 * 5761 * NOTE: The assumption here is that if we 5762 * cannot find the destination in our fdb, its 5763 * a unicast address, and came from either a 5764 * vnet or down the stack (when plumbed) it 5765 * must be destinded for an ethernet device 5766 * outside our ldoms. 5767 */ 5768 if (caller == VSW_VNETPORT) { 5769 READ_ENTER(&vswp->if_lockrw); 5770 if (VSW_U_P(vswp->if_state)) { 5771 RW_EXIT(&vswp->if_lockrw); 5772 nmp = copymsg(mp); 5773 if (nmp) 5774 mac_rx(vswp->if_mh, 5775 mrh, nmp); 5776 } else { 5777 RW_EXIT(&vswp->if_lockrw); 5778 } 5779 if ((ret_m = vsw_tx_msg(vswp, mp)) 5780 != NULL) { 5781 DERR(vswp, "%s: drop mblks to " 5782 "phys dev", __func__); 5783 freemsg(ret_m); 5784 } 5785 5786 } else if (caller == VSW_PHYSDEV) { 5787 /* 5788 * Pkt seen because card in promisc 5789 * mode. Send up stack if plumbed in 5790 * promisc mode, else drop it. 5791 */ 5792 READ_ENTER(&vswp->if_lockrw); 5793 if (VSW_U_P(vswp->if_state)) { 5794 RW_EXIT(&vswp->if_lockrw); 5795 mac_rx(vswp->if_mh, mrh, mp); 5796 } else { 5797 RW_EXIT(&vswp->if_lockrw); 5798 freemsg(mp); 5799 } 5800 5801 } else if (caller == VSW_LOCALDEV) { 5802 /* 5803 * Pkt came down the stack, send out 5804 * over physical device. 5805 */ 5806 if ((ret_m = vsw_tx_msg(vswp, mp)) 5807 != NULL) { 5808 DERR(vswp, "%s: drop mblks to " 5809 "phys dev", __func__); 5810 freemsg(ret_m); 5811 } 5812 } 5813 } 5814 } 5815 } 5816 D1(vswp, "%s: exit\n", __func__); 5817 } 5818 5819 /* 5820 * Switch ethernet frame when in layer 3 mode (i.e. using IP 5821 * layer to do the routing). 5822 * 5823 * There is a large amount of overlap between this function and 5824 * vsw_switch_l2_frame. At some stage we need to revisit and refactor 5825 * both these functions. 5826 */ 5827 void 5828 vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller, 5829 vsw_port_t *arg, mac_resource_handle_t mrh) 5830 { 5831 struct ether_header *ehp; 5832 vsw_port_t *port = NULL; 5833 mblk_t *bp = NULL; 5834 vsw_port_list_t *plist = &vswp->plist; 5835 5836 D1(vswp, "%s: enter (caller %d)", __func__, caller); 5837 5838 /* 5839 * In layer 3 mode should only ever be switching packets 5840 * between IP layer and vnet devices. So make sure thats 5841 * who is invoking us. 5842 */ 5843 if ((caller != VSW_LOCALDEV) && (caller != VSW_VNETPORT)) { 5844 DERR(vswp, "%s: unexpected caller (%d)", __func__, caller); 5845 freemsgchain(mp); 5846 return; 5847 } 5848 5849 /* process the chain of packets */ 5850 bp = mp; 5851 while (bp) { 5852 mp = bp; 5853 bp = bp->b_next; 5854 mp->b_next = mp->b_prev = NULL; 5855 ehp = (struct ether_header *)mp->b_rptr; 5856 5857 D2(vswp, "%s: mblk data buffer %lld : actual data size %lld", 5858 __func__, MBLKSIZE(mp), MBLKL(mp)); 5859 5860 READ_ENTER(&plist->lockrw); 5861 port = vsw_lookup_fdb(vswp, ehp); 5862 if (port) { 5863 /* 5864 * Mark port as in-use. 5865 */ 5866 mutex_enter(&port->ref_lock); 5867 port->ref_cnt++; 5868 mutex_exit(&port->ref_lock); 5869 RW_EXIT(&plist->lockrw); 5870 5871 D2(vswp, "%s: sending to target port", __func__); 5872 (void) vsw_portsend(port, mp); 5873 5874 /* 5875 * Finished with port so decrement ref count and 5876 * check if should wake delete thread. 5877 */ 5878 mutex_enter(&port->ref_lock); 5879 port->ref_cnt--; 5880 if (port->ref_cnt == 0) 5881 cv_signal(&port->ref_cv); 5882 mutex_exit(&port->ref_lock); 5883 } else { 5884 RW_EXIT(&plist->lockrw); 5885 /* 5886 * Destination not in FDB 5887 * 5888 * If the destination is broadcast or 5889 * multicast forward the packet to all 5890 * (VNETPORTs, PHYSDEV, LOCALDEV), 5891 * except the caller. 5892 */ 5893 if (IS_BROADCAST(ehp)) { 5894 D2(vswp, "%s: BROADCAST pkt", __func__); 5895 (void) vsw_forward_all(vswp, mp, 5896 caller, arg); 5897 } else if (IS_MULTICAST(ehp)) { 5898 D2(vswp, "%s: MULTICAST pkt", __func__); 5899 (void) vsw_forward_grp(vswp, mp, 5900 caller, arg); 5901 } else { 5902 /* 5903 * Unicast pkt from vnet that we don't have 5904 * an FDB entry for, so must be destinded for 5905 * the outside world. Attempt to send up to the 5906 * IP layer to allow it to deal with it. 5907 */ 5908 if (caller == VSW_VNETPORT) { 5909 READ_ENTER(&vswp->if_lockrw); 5910 if (vswp->if_state & VSW_IF_UP) { 5911 RW_EXIT(&vswp->if_lockrw); 5912 D2(vswp, "%s: sending up", 5913 __func__); 5914 mac_rx(vswp->if_mh, mrh, mp); 5915 } else { 5916 RW_EXIT(&vswp->if_lockrw); 5917 /* Interface down, drop pkt */ 5918 D2(vswp, "%s I/F down", 5919 __func__); 5920 freemsg(mp); 5921 } 5922 } 5923 } 5924 } 5925 } 5926 5927 D1(vswp, "%s: exit", __func__); 5928 } 5929 5930 /* 5931 * Forward the ethernet frame to all ports (VNETPORTs, PHYSDEV, LOCALDEV), 5932 * except the caller (port on which frame arrived). 5933 */ 5934 static int 5935 vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg) 5936 { 5937 vsw_port_list_t *plist = &vswp->plist; 5938 vsw_port_t *portp; 5939 mblk_t *nmp = NULL; 5940 mblk_t *ret_m = NULL; 5941 int skip_port = 0; 5942 5943 D1(vswp, "vsw_forward_all: enter\n"); 5944 5945 /* 5946 * Broadcast message from inside ldoms so send to outside 5947 * world if in either of layer 2 modes. 5948 */ 5949 if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) || 5950 (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) && 5951 ((caller == VSW_LOCALDEV) || (caller == VSW_VNETPORT))) { 5952 5953 nmp = dupmsg(mp); 5954 if (nmp) { 5955 if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) { 5956 DERR(vswp, "%s: dropping pkt(s) " 5957 "consisting of %ld bytes of data for" 5958 " physical device", __func__, MBLKL(ret_m)); 5959 freemsg(ret_m); 5960 } 5961 } 5962 } 5963 5964 if (caller == VSW_VNETPORT) 5965 skip_port = 1; 5966 5967 /* 5968 * Broadcast message from other vnet (layer 2 or 3) or outside 5969 * world (layer 2 only), send up stack if plumbed. 5970 */ 5971 if ((caller == VSW_PHYSDEV) || (caller == VSW_VNETPORT)) { 5972 READ_ENTER(&vswp->if_lockrw); 5973 if (vswp->if_state & VSW_IF_UP) { 5974 RW_EXIT(&vswp->if_lockrw); 5975 nmp = copymsg(mp); 5976 if (nmp) 5977 mac_rx(vswp->if_mh, NULL, nmp); 5978 } else { 5979 RW_EXIT(&vswp->if_lockrw); 5980 } 5981 } 5982 5983 /* send it to all VNETPORTs */ 5984 READ_ENTER(&plist->lockrw); 5985 for (portp = plist->head; portp != NULL; portp = portp->p_next) { 5986 D2(vswp, "vsw_forward_all: port %d", portp->p_instance); 5987 /* 5988 * Caution ! - don't reorder these two checks as arg 5989 * will be NULL if the caller is PHYSDEV. skip_port is 5990 * only set if caller is VNETPORT. 5991 */ 5992 if ((skip_port) && (portp == arg)) 5993 continue; 5994 else { 5995 nmp = dupmsg(mp); 5996 if (nmp) { 5997 (void) vsw_portsend(portp, nmp); 5998 } else { 5999 DERR(vswp, "vsw_forward_all: nmp NULL"); 6000 } 6001 } 6002 } 6003 RW_EXIT(&plist->lockrw); 6004 6005 freemsg(mp); 6006 6007 D1(vswp, "vsw_forward_all: exit\n"); 6008 return (0); 6009 } 6010 6011 /* 6012 * Forward pkts to any devices or interfaces which have registered 6013 * an interest in them (i.e. multicast groups). 6014 */ 6015 static int 6016 vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg) 6017 { 6018 struct ether_header *ehp = (struct ether_header *)mp->b_rptr; 6019 mfdb_ent_t *entp = NULL; 6020 mfdb_ent_t *tpp = NULL; 6021 vsw_port_t *port; 6022 uint64_t key = 0; 6023 mblk_t *nmp = NULL; 6024 mblk_t *ret_m = NULL; 6025 boolean_t check_if = B_TRUE; 6026 6027 /* 6028 * Convert address to hash table key 6029 */ 6030 KEY_HASH(key, ehp->ether_dhost); 6031 6032 D1(vswp, "%s: key 0x%llx", __func__, key); 6033 6034 /* 6035 * If pkt came from either a vnet or down the stack (if we are 6036 * plumbed) and we are in layer 2 mode, then we send the pkt out 6037 * over the physical adapter, and then check to see if any other 6038 * vnets are interested in it. 6039 */ 6040 if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) || 6041 (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) && 6042 ((caller == VSW_VNETPORT) || (caller == VSW_LOCALDEV))) { 6043 nmp = dupmsg(mp); 6044 if (nmp) { 6045 if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) { 6046 DERR(vswp, "%s: dropping pkt(s) " 6047 "consisting of %ld bytes of " 6048 "data for physical device", 6049 __func__, MBLKL(ret_m)); 6050 freemsg(ret_m); 6051 } 6052 } 6053 } 6054 6055 READ_ENTER(&vswp->mfdbrw); 6056 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)key, 6057 (mod_hash_val_t *)&entp) != 0) { 6058 D3(vswp, "%s: no table entry found for addr 0x%llx", 6059 __func__, key); 6060 } else { 6061 /* 6062 * Send to list of devices associated with this address... 6063 */ 6064 for (tpp = entp; tpp != NULL; tpp = tpp->nextp) { 6065 6066 /* dont send to ourselves */ 6067 if ((caller == VSW_VNETPORT) && 6068 (tpp->d_addr == (void *)arg)) { 6069 port = (vsw_port_t *)tpp->d_addr; 6070 D3(vswp, "%s: not sending to ourselves" 6071 " : port %d", __func__, 6072 port->p_instance); 6073 continue; 6074 6075 } else if ((caller == VSW_LOCALDEV) && 6076 (tpp->d_type == VSW_LOCALDEV)) { 6077 D3(vswp, "%s: not sending back up stack", 6078 __func__); 6079 continue; 6080 } 6081 6082 if (tpp->d_type == VSW_VNETPORT) { 6083 port = (vsw_port_t *)tpp->d_addr; 6084 D3(vswp, "%s: sending to port %ld for " 6085 " addr 0x%llx", __func__, 6086 port->p_instance, key); 6087 6088 nmp = dupmsg(mp); 6089 if (nmp) 6090 (void) vsw_portsend(port, nmp); 6091 } else { 6092 if (vswp->if_state & VSW_IF_UP) { 6093 nmp = copymsg(mp); 6094 if (nmp) 6095 mac_rx(vswp->if_mh, NULL, nmp); 6096 check_if = B_FALSE; 6097 D3(vswp, "%s: sending up stack" 6098 " for addr 0x%llx", __func__, 6099 key); 6100 } 6101 } 6102 } 6103 } 6104 6105 RW_EXIT(&vswp->mfdbrw); 6106 6107 /* 6108 * If the pkt came from either a vnet or from physical device, 6109 * and if we havent already sent the pkt up the stack then we 6110 * check now if we can/should (i.e. the interface is plumbed 6111 * and in promisc mode). 6112 */ 6113 if ((check_if) && 6114 ((caller == VSW_VNETPORT) || (caller == VSW_PHYSDEV))) { 6115 READ_ENTER(&vswp->if_lockrw); 6116 if (VSW_U_P(vswp->if_state)) { 6117 RW_EXIT(&vswp->if_lockrw); 6118 D3(vswp, "%s: (caller %d) finally sending up stack" 6119 " for addr 0x%llx", __func__, caller, key); 6120 nmp = copymsg(mp); 6121 if (nmp) 6122 mac_rx(vswp->if_mh, NULL, nmp); 6123 } else { 6124 RW_EXIT(&vswp->if_lockrw); 6125 } 6126 } 6127 6128 freemsg(mp); 6129 6130 D1(vswp, "%s: exit", __func__); 6131 6132 return (0); 6133 } 6134 6135 /* transmit the packet over the given port */ 6136 static int 6137 vsw_portsend(vsw_port_t *port, mblk_t *mp) 6138 { 6139 vsw_ldc_list_t *ldcl = &port->p_ldclist; 6140 vsw_ldc_t *ldcp; 6141 int status = 0; 6142 6143 6144 READ_ENTER(&ldcl->lockrw); 6145 /* 6146 * Note for now, we have a single channel. 6147 */ 6148 ldcp = ldcl->head; 6149 if (ldcp == NULL) { 6150 DERR(port->p_vswp, "vsw_portsend: no ldc: dropping packet\n"); 6151 freemsg(mp); 6152 RW_EXIT(&ldcl->lockrw); 6153 return (1); 6154 } 6155 6156 /* 6157 * Send the message out using the appropriate 6158 * transmit function which will free mblock when it 6159 * is finished with it. 6160 */ 6161 mutex_enter(&port->tx_lock); 6162 if (port->transmit != NULL) 6163 status = (*port->transmit)(ldcp, mp); 6164 else { 6165 freemsg(mp); 6166 } 6167 mutex_exit(&port->tx_lock); 6168 6169 RW_EXIT(&ldcl->lockrw); 6170 6171 return (status); 6172 } 6173 6174 /* 6175 * Send packet out via descriptor ring to a logical device. 6176 */ 6177 static int 6178 vsw_dringsend(vsw_ldc_t *ldcp, mblk_t *mp) 6179 { 6180 vio_dring_msg_t dring_pkt; 6181 dring_info_t *dp = NULL; 6182 vsw_private_desc_t *priv_desc = NULL; 6183 vnet_public_desc_t *pub = NULL; 6184 vsw_t *vswp = ldcp->ldc_vswp; 6185 mblk_t *bp; 6186 size_t n, size; 6187 caddr_t bufp; 6188 int idx; 6189 int status = LDC_TX_SUCCESS; 6190 6191 D1(vswp, "%s(%lld): enter\n", __func__, ldcp->ldc_id); 6192 6193 /* TODO: make test a macro */ 6194 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 6195 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 6196 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 6197 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 6198 ldcp->lane_out.lstate); 6199 freemsg(mp); 6200 return (LDC_TX_FAILURE); 6201 } 6202 6203 /* 6204 * Note - using first ring only, this may change 6205 * in the future. 6206 */ 6207 if ((dp = ldcp->lane_out.dringp) == NULL) { 6208 DERR(vswp, "%s(%lld): no dring for outbound lane on" 6209 " channel %d", __func__, ldcp->ldc_id, ldcp->ldc_id); 6210 freemsg(mp); 6211 return (LDC_TX_FAILURE); 6212 } 6213 6214 size = msgsize(mp); 6215 if (size > (size_t)ETHERMAX) { 6216 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 6217 ldcp->ldc_id, size); 6218 freemsg(mp); 6219 return (LDC_TX_FAILURE); 6220 } 6221 6222 /* 6223 * Find a free descriptor 6224 * 6225 * Note: for the moment we are assuming that we will only 6226 * have one dring going from the switch to each of its 6227 * peers. This may change in the future. 6228 */ 6229 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 6230 D2(vswp, "%s(%lld): no descriptor available for ring " 6231 "at 0x%llx", __func__, ldcp->ldc_id, dp); 6232 6233 /* nothing more we can do */ 6234 status = LDC_TX_NORESOURCES; 6235 goto vsw_dringsend_free_exit; 6236 } else { 6237 D2(vswp, "%s(%lld): free private descriptor found at pos " 6238 "%ld addr 0x%llx\n", __func__, ldcp->ldc_id, idx, 6239 priv_desc); 6240 } 6241 6242 /* copy data into the descriptor */ 6243 bufp = priv_desc->datap; 6244 bufp += VNET_IPALIGN; 6245 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 6246 n = MBLKL(bp); 6247 bcopy(bp->b_rptr, bufp, n); 6248 bufp += n; 6249 } 6250 6251 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 6252 6253 pub = priv_desc->descp; 6254 pub->nbytes = priv_desc->datalen; 6255 6256 mutex_enter(&priv_desc->dstate_lock); 6257 pub->hdr.dstate = VIO_DESC_READY; 6258 mutex_exit(&priv_desc->dstate_lock); 6259 6260 /* 6261 * Determine whether or not we need to send a message to our 6262 * peer prompting them to read our newly updated descriptor(s). 6263 */ 6264 mutex_enter(&dp->restart_lock); 6265 if (dp->restart_reqd) { 6266 dp->restart_reqd = B_FALSE; 6267 mutex_exit(&dp->restart_lock); 6268 6269 /* 6270 * Send a vio_dring_msg to peer to prompt them to read 6271 * the updated descriptor ring. 6272 */ 6273 dring_pkt.tag.vio_msgtype = VIO_TYPE_DATA; 6274 dring_pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 6275 dring_pkt.tag.vio_subtype_env = VIO_DRING_DATA; 6276 dring_pkt.tag.vio_sid = ldcp->local_session; 6277 6278 /* Note - for now using first ring */ 6279 dring_pkt.dring_ident = dp->ident; 6280 6281 mutex_enter(&ldcp->lane_out.seq_lock); 6282 dring_pkt.seq_num = ldcp->lane_out.seq_num++; 6283 mutex_exit(&ldcp->lane_out.seq_lock); 6284 6285 /* 6286 * If last_ack_recv is -1 then we know we've not 6287 * received any ack's yet, so this must be the first 6288 * msg sent, so set the start to the begining of the ring. 6289 */ 6290 mutex_enter(&dp->dlock); 6291 if (dp->last_ack_recv == -1) { 6292 dring_pkt.start_idx = 0; 6293 } else { 6294 dring_pkt.start_idx = (dp->last_ack_recv + 1) % 6295 dp->num_descriptors; 6296 } 6297 dring_pkt.end_idx = -1; 6298 mutex_exit(&dp->dlock); 6299 6300 D3(vswp, "%s(%lld): dring 0x%llx : ident 0x%llx\n", __func__, 6301 ldcp->ldc_id, dp, dring_pkt.dring_ident); 6302 D3(vswp, "%s(%lld): start %lld : end %lld : seq %lld\n", 6303 __func__, ldcp->ldc_id, dring_pkt.start_idx, 6304 dring_pkt.end_idx, dring_pkt.seq_num); 6305 6306 vsw_send_msg(ldcp, (void *)&dring_pkt, 6307 sizeof (vio_dring_msg_t)); 6308 } else { 6309 mutex_exit(&dp->restart_lock); 6310 D2(vswp, "%s(%lld): updating descp %d", __func__, 6311 ldcp->ldc_id, idx); 6312 } 6313 6314 vsw_dringsend_free_exit: 6315 6316 /* free the message block */ 6317 freemsg(mp); 6318 6319 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 6320 return (status); 6321 } 6322 6323 /* 6324 * Send an in-band descriptor message over ldc. 6325 */ 6326 static int 6327 vsw_descrsend(vsw_ldc_t *ldcp, mblk_t *mp) 6328 { 6329 vsw_t *vswp = ldcp->ldc_vswp; 6330 vio_ibnd_desc_t ibnd_msg; 6331 vsw_private_desc_t *priv_desc = NULL; 6332 dring_info_t *dp = NULL; 6333 size_t n, size = 0; 6334 caddr_t bufp; 6335 mblk_t *bp; 6336 int idx, i; 6337 int status = LDC_TX_SUCCESS; 6338 static int warn_msg = 1; 6339 6340 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 6341 6342 ASSERT(mp != NULL); 6343 6344 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 6345 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 6346 DERR(vswp, "%s(%lld) status(%d) state (0x%llx), dropping pkt", 6347 __func__, ldcp->ldc_id, ldcp->ldc_status, 6348 ldcp->lane_out.lstate); 6349 freemsg(mp); 6350 return (LDC_TX_FAILURE); 6351 } 6352 6353 /* 6354 * only expect single dring to exist, which we use 6355 * as an internal buffer, rather than a transfer channel. 6356 */ 6357 if ((dp = ldcp->lane_out.dringp) == NULL) { 6358 DERR(vswp, "%s(%lld): no dring for outbound lane", 6359 __func__, ldcp->ldc_id); 6360 DERR(vswp, "%s(%lld) status(%d) state (0x%llx)", 6361 __func__, ldcp->ldc_id, ldcp->ldc_status, 6362 ldcp->lane_out.lstate); 6363 freemsg(mp); 6364 return (LDC_TX_FAILURE); 6365 } 6366 6367 size = msgsize(mp); 6368 if (size > (size_t)ETHERMAX) { 6369 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 6370 ldcp->ldc_id, size); 6371 freemsg(mp); 6372 return (LDC_TX_FAILURE); 6373 } 6374 6375 /* 6376 * Find a free descriptor in our buffer ring 6377 */ 6378 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 6379 if (warn_msg) { 6380 DERR(vswp, "%s(%lld): no descriptor available for ring " 6381 "at 0x%llx", __func__, ldcp->ldc_id, dp); 6382 warn_msg = 0; 6383 } 6384 6385 /* nothing more we can do */ 6386 status = LDC_TX_NORESOURCES; 6387 goto vsw_descrsend_free_exit; 6388 } else { 6389 D2(vswp, "%s(%lld): free private descriptor found at pos " 6390 "%ld addr 0x%x\n", __func__, ldcp->ldc_id, idx, 6391 priv_desc); 6392 warn_msg = 1; 6393 } 6394 6395 /* copy data into the descriptor */ 6396 bufp = priv_desc->datap; 6397 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 6398 n = MBLKL(bp); 6399 bcopy(bp->b_rptr, bufp, n); 6400 bufp += n; 6401 } 6402 6403 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 6404 6405 /* create and send the in-band descp msg */ 6406 ibnd_msg.hdr.tag.vio_msgtype = VIO_TYPE_DATA; 6407 ibnd_msg.hdr.tag.vio_subtype = VIO_SUBTYPE_INFO; 6408 ibnd_msg.hdr.tag.vio_subtype_env = VIO_DESC_DATA; 6409 ibnd_msg.hdr.tag.vio_sid = ldcp->local_session; 6410 6411 mutex_enter(&ldcp->lane_out.seq_lock); 6412 ibnd_msg.hdr.seq_num = ldcp->lane_out.seq_num++; 6413 mutex_exit(&ldcp->lane_out.seq_lock); 6414 6415 /* 6416 * Copy the mem cookies describing the data from the 6417 * private region of the descriptor ring into the inband 6418 * descriptor. 6419 */ 6420 for (i = 0; i < priv_desc->ncookies; i++) { 6421 bcopy(&priv_desc->memcookie[i], &ibnd_msg.memcookie[i], 6422 sizeof (ldc_mem_cookie_t)); 6423 } 6424 6425 ibnd_msg.hdr.desc_handle = idx; 6426 ibnd_msg.ncookies = priv_desc->ncookies; 6427 ibnd_msg.nbytes = size; 6428 6429 vsw_send_msg(ldcp, (void *)&ibnd_msg, sizeof (vio_ibnd_desc_t)); 6430 6431 vsw_descrsend_free_exit: 6432 6433 /* free the allocated message blocks */ 6434 freemsg(mp); 6435 6436 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 6437 return (status); 6438 } 6439 6440 static void 6441 vsw_send_ver(vsw_ldc_t *ldcp) 6442 { 6443 vsw_t *vswp = ldcp->ldc_vswp; 6444 lane_t *lp = &ldcp->lane_out; 6445 vio_ver_msg_t ver_msg; 6446 6447 D1(vswp, "%s enter", __func__); 6448 6449 ver_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 6450 ver_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 6451 ver_msg.tag.vio_subtype_env = VIO_VER_INFO; 6452 ver_msg.tag.vio_sid = ldcp->local_session; 6453 6454 ver_msg.ver_major = vsw_versions[0].ver_major; 6455 ver_msg.ver_minor = vsw_versions[0].ver_minor; 6456 ver_msg.dev_class = VDEV_NETWORK_SWITCH; 6457 6458 lp->lstate |= VSW_VER_INFO_SENT; 6459 lp->ver_major = ver_msg.ver_major; 6460 lp->ver_minor = ver_msg.ver_minor; 6461 6462 DUMP_TAG(ver_msg.tag); 6463 6464 vsw_send_msg(ldcp, &ver_msg, sizeof (vio_ver_msg_t)); 6465 6466 D1(vswp, "%s (%d): exit", __func__, ldcp->ldc_id); 6467 } 6468 6469 static void 6470 vsw_send_attr(vsw_ldc_t *ldcp) 6471 { 6472 vsw_t *vswp = ldcp->ldc_vswp; 6473 lane_t *lp = &ldcp->lane_out; 6474 vnet_attr_msg_t attr_msg; 6475 6476 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 6477 6478 /* 6479 * Subtype is set to INFO by default 6480 */ 6481 attr_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 6482 attr_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 6483 attr_msg.tag.vio_subtype_env = VIO_ATTR_INFO; 6484 attr_msg.tag.vio_sid = ldcp->local_session; 6485 6486 /* payload copied from default settings for lane */ 6487 attr_msg.mtu = lp->mtu; 6488 attr_msg.addr_type = lp->addr_type; 6489 attr_msg.xfer_mode = lp->xfer_mode; 6490 attr_msg.ack_freq = lp->xfer_mode; 6491 6492 READ_ENTER(&vswp->if_lockrw); 6493 bcopy(&(vswp->if_addr), &(attr_msg.addr), ETHERADDRL); 6494 RW_EXIT(&vswp->if_lockrw); 6495 6496 ldcp->lane_out.lstate |= VSW_ATTR_INFO_SENT; 6497 6498 DUMP_TAG(attr_msg.tag); 6499 6500 vsw_send_msg(ldcp, &attr_msg, sizeof (vnet_attr_msg_t)); 6501 6502 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 6503 } 6504 6505 /* 6506 * Create dring info msg (which also results in the creation of 6507 * a dring). 6508 */ 6509 static vio_dring_reg_msg_t * 6510 vsw_create_dring_info_pkt(vsw_ldc_t *ldcp) 6511 { 6512 vio_dring_reg_msg_t *mp; 6513 dring_info_t *dp; 6514 vsw_t *vswp = ldcp->ldc_vswp; 6515 6516 D1(vswp, "vsw_create_dring_info_pkt enter\n"); 6517 6518 /* 6519 * If we can't create a dring, obviously no point sending 6520 * a message. 6521 */ 6522 if ((dp = vsw_create_dring(ldcp)) == NULL) 6523 return (NULL); 6524 6525 mp = kmem_zalloc(sizeof (vio_dring_reg_msg_t), KM_SLEEP); 6526 6527 mp->tag.vio_msgtype = VIO_TYPE_CTRL; 6528 mp->tag.vio_subtype = VIO_SUBTYPE_INFO; 6529 mp->tag.vio_subtype_env = VIO_DRING_REG; 6530 mp->tag.vio_sid = ldcp->local_session; 6531 6532 /* payload */ 6533 mp->num_descriptors = dp->num_descriptors; 6534 mp->descriptor_size = dp->descriptor_size; 6535 mp->options = dp->options; 6536 mp->ncookies = dp->ncookies; 6537 bcopy(&dp->cookie[0], &mp->cookie[0], sizeof (ldc_mem_cookie_t)); 6538 6539 mp->dring_ident = 0; 6540 6541 D1(vswp, "vsw_create_dring_info_pkt exit\n"); 6542 6543 return (mp); 6544 } 6545 6546 static void 6547 vsw_send_dring_info(vsw_ldc_t *ldcp) 6548 { 6549 vio_dring_reg_msg_t *dring_msg; 6550 vsw_t *vswp = ldcp->ldc_vswp; 6551 6552 D1(vswp, "%s: (%ld) enter", __func__, ldcp->ldc_id); 6553 6554 dring_msg = vsw_create_dring_info_pkt(ldcp); 6555 if (dring_msg == NULL) { 6556 cmn_err(CE_WARN, "vsw_send_dring_info: error creating msg"); 6557 return; 6558 } 6559 6560 ldcp->lane_out.lstate |= VSW_DRING_INFO_SENT; 6561 6562 DUMP_TAG_PTR((vio_msg_tag_t *)dring_msg); 6563 6564 vsw_send_msg(ldcp, dring_msg, 6565 sizeof (vio_dring_reg_msg_t)); 6566 6567 kmem_free(dring_msg, sizeof (vio_dring_reg_msg_t)); 6568 6569 D1(vswp, "%s: (%ld) exit", __func__, ldcp->ldc_id); 6570 } 6571 6572 static void 6573 vsw_send_rdx(vsw_ldc_t *ldcp) 6574 { 6575 vsw_t *vswp = ldcp->ldc_vswp; 6576 vio_rdx_msg_t rdx_msg; 6577 6578 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 6579 6580 rdx_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 6581 rdx_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 6582 rdx_msg.tag.vio_subtype_env = VIO_RDX; 6583 rdx_msg.tag.vio_sid = ldcp->local_session; 6584 6585 ldcp->lane_out.lstate |= VSW_RDX_INFO_SENT; 6586 6587 DUMP_TAG(rdx_msg.tag); 6588 6589 vsw_send_msg(ldcp, &rdx_msg, sizeof (vio_rdx_msg_t)); 6590 6591 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 6592 } 6593 6594 /* 6595 * Generic routine to send message out over ldc channel. 6596 */ 6597 static void 6598 vsw_send_msg(vsw_ldc_t *ldcp, void *msgp, int size) 6599 { 6600 int rv; 6601 size_t msglen = size; 6602 vio_msg_tag_t *tag = (vio_msg_tag_t *)msgp; 6603 vsw_t *vswp = ldcp->ldc_vswp; 6604 6605 D1(vswp, "vsw_send_msg (%lld) enter : sending %d bytes", 6606 ldcp->ldc_id, size); 6607 6608 D2(vswp, "send_msg: type 0x%llx", tag->vio_msgtype); 6609 D2(vswp, "send_msg: stype 0x%llx", tag->vio_subtype); 6610 D2(vswp, "send_msg: senv 0x%llx", tag->vio_subtype_env); 6611 6612 mutex_enter(&ldcp->ldc_txlock); 6613 do { 6614 msglen = size; 6615 rv = ldc_write(ldcp->ldc_handle, (caddr_t)msgp, &msglen); 6616 } while (rv == EWOULDBLOCK && --vsw_wretries > 0); 6617 6618 mutex_exit(&ldcp->ldc_txlock); 6619 6620 if ((rv != 0) || (msglen != size)) { 6621 DERR(vswp, "vsw_send_msg:ldc_write failed: chan(%lld) " 6622 "rv(%d) size (%d) msglen(%d)\n", ldcp->ldc_id, 6623 rv, size, msglen); 6624 } 6625 6626 D1(vswp, "vsw_send_msg (%lld) exit : sent %d bytes", 6627 ldcp->ldc_id, msglen); 6628 } 6629 6630 /* 6631 * Add an entry into FDB, for the given mac address and port_id. 6632 * Returns 0 on success, 1 on failure. 6633 * 6634 * Lock protecting FDB must be held by calling process. 6635 */ 6636 static int 6637 vsw_add_fdb(vsw_t *vswp, vsw_port_t *port) 6638 { 6639 uint64_t addr = 0; 6640 6641 D1(vswp, "%s: enter", __func__); 6642 6643 KEY_HASH(addr, port->p_macaddr); 6644 6645 D2(vswp, "%s: key = 0x%llx", __func__, addr); 6646 6647 /* 6648 * Note: duplicate keys will be rejected by mod_hash. 6649 */ 6650 if (mod_hash_insert(vswp->fdb, (mod_hash_key_t)addr, 6651 (mod_hash_val_t)port) != 0) { 6652 DERR(vswp, "%s: unable to add entry into fdb.", __func__); 6653 return (1); 6654 } 6655 6656 D1(vswp, "%s: exit", __func__); 6657 return (0); 6658 } 6659 6660 /* 6661 * Remove an entry from FDB. 6662 * Returns 0 on success, 1 on failure. 6663 */ 6664 static int 6665 vsw_del_fdb(vsw_t *vswp, vsw_port_t *port) 6666 { 6667 uint64_t addr = 0; 6668 6669 D1(vswp, "%s: enter", __func__); 6670 6671 KEY_HASH(addr, port->p_macaddr); 6672 6673 D2(vswp, "%s: key = 0x%llx", __func__, addr); 6674 6675 (void) mod_hash_destroy(vswp->fdb, (mod_hash_val_t)addr); 6676 6677 D1(vswp, "%s: enter", __func__); 6678 6679 return (0); 6680 } 6681 6682 /* 6683 * Search fdb for a given mac address. 6684 * Returns pointer to the entry if found, else returns NULL. 6685 */ 6686 static vsw_port_t * 6687 vsw_lookup_fdb(vsw_t *vswp, struct ether_header *ehp) 6688 { 6689 uint64_t key = 0; 6690 vsw_port_t *port = NULL; 6691 6692 D1(vswp, "%s: enter", __func__); 6693 6694 KEY_HASH(key, ehp->ether_dhost); 6695 6696 D2(vswp, "%s: key = 0x%llx", __func__, key); 6697 6698 if (mod_hash_find(vswp->fdb, (mod_hash_key_t)key, 6699 (mod_hash_val_t *)&port) != 0) { 6700 return (NULL); 6701 } 6702 6703 D1(vswp, "%s: exit", __func__); 6704 6705 return (port); 6706 } 6707 6708 /* 6709 * Add or remove multicast address(es). 6710 * 6711 * Returns 0 on success, 1 on failure. 6712 */ 6713 static int 6714 vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port) 6715 { 6716 mcst_addr_t *mcst_p = NULL; 6717 vsw_t *vswp = port->p_vswp; 6718 uint64_t addr = 0x0; 6719 int i, ret; 6720 6721 D1(vswp, "%s: enter", __func__); 6722 6723 D2(vswp, "%s: %d addresses", __func__, mcst_pkt->count); 6724 6725 if (vswp->mh == NULL) 6726 return (1); 6727 6728 for (i = 0; i < mcst_pkt->count; i++) { 6729 /* 6730 * Convert address into form that can be used 6731 * as hash table key. 6732 */ 6733 KEY_HASH(addr, mcst_pkt->mca[i]); 6734 6735 /* 6736 * Add or delete the specified address/port combination. 6737 */ 6738 if (mcst_pkt->set == 0x1) { 6739 D3(vswp, "%s: adding multicast address 0x%llx for " 6740 "port %ld", __func__, addr, port->p_instance); 6741 if (vsw_add_mcst(vswp, VSW_VNETPORT, addr, port) == 0) { 6742 /* 6743 * Update the list of multicast 6744 * addresses contained within the 6745 * port structure to include this new 6746 * one. 6747 */ 6748 mcst_p = kmem_alloc(sizeof (mcst_addr_t), 6749 KM_NOSLEEP); 6750 if (mcst_p == NULL) { 6751 DERR(vswp, "%s: unable to alloc mem", 6752 __func__); 6753 return (1); 6754 } 6755 6756 mcst_p->nextp = NULL; 6757 mcst_p->addr = addr; 6758 6759 mutex_enter(&port->mca_lock); 6760 mcst_p->nextp = port->mcap; 6761 port->mcap = mcst_p; 6762 mutex_exit(&port->mca_lock); 6763 6764 /* 6765 * Program the address into HW. If the addr 6766 * has already been programmed then the MAC 6767 * just increments a ref counter (which is 6768 * used when the address is being deleted) 6769 */ 6770 ret = mac_multicst_add(vswp->mh, 6771 (uchar_t *)&mcst_pkt->mca[i]); 6772 if (ret) { 6773 cmn_err(CE_WARN, "!unable to add " 6774 "multicast address"); 6775 (void) vsw_del_mcst(vswp, VSW_VNETPORT, 6776 addr, port); 6777 vsw_del_addr(VSW_VNETPORT, port, addr); 6778 return (ret); 6779 } 6780 6781 } else { 6782 DERR(vswp, "%s: error adding multicast " 6783 "address 0x%llx for port %ld", 6784 __func__, addr, port->p_instance); 6785 return (1); 6786 } 6787 } else { 6788 /* 6789 * Delete an entry from the multicast hash 6790 * table and update the address list 6791 * appropriately. 6792 */ 6793 if (vsw_del_mcst(vswp, VSW_VNETPORT, addr, port) == 0) { 6794 D3(vswp, "%s: deleting multicast address " 6795 "0x%llx for port %ld", __func__, addr, 6796 port->p_instance); 6797 6798 vsw_del_addr(VSW_VNETPORT, port, addr); 6799 6800 /* 6801 * Remove the address from HW. The address 6802 * will actually only be removed once the ref 6803 * count within the MAC layer has dropped to 6804 * zero. I.e. we can safely call this fn even 6805 * if other ports are interested in this 6806 * address. 6807 */ 6808 (void) mac_multicst_remove(vswp->mh, 6809 (uchar_t *)&mcst_pkt->mca[i]); 6810 6811 } else { 6812 DERR(vswp, "%s: error deleting multicast " 6813 "addr 0x%llx for port %ld", 6814 __func__, addr, port->p_instance); 6815 return (1); 6816 } 6817 } 6818 } 6819 D1(vswp, "%s: exit", __func__); 6820 return (0); 6821 } 6822 6823 /* 6824 * Add a new multicast entry. 6825 * 6826 * Search hash table based on address. If match found then 6827 * update associated val (which is chain of ports), otherwise 6828 * create new key/val (addr/port) pair and insert into table. 6829 */ 6830 static int 6831 vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg) 6832 { 6833 int dup = 0; 6834 int rv = 0; 6835 mfdb_ent_t *ment = NULL; 6836 mfdb_ent_t *tmp_ent = NULL; 6837 mfdb_ent_t *new_ent = NULL; 6838 void *tgt = NULL; 6839 6840 if (devtype == VSW_VNETPORT) { 6841 /* 6842 * Being invoked from a vnet. 6843 */ 6844 ASSERT(arg != NULL); 6845 tgt = arg; 6846 D2(NULL, "%s: port %d : address 0x%llx", __func__, 6847 ((vsw_port_t *)arg)->p_instance, addr); 6848 } else { 6849 /* 6850 * We are being invoked via the m_multicst mac entry 6851 * point. 6852 */ 6853 D2(NULL, "%s: address 0x%llx", __func__, addr); 6854 tgt = (void *)vswp; 6855 } 6856 6857 WRITE_ENTER(&vswp->mfdbrw); 6858 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr, 6859 (mod_hash_val_t *)&ment) != 0) { 6860 6861 /* address not currently in table */ 6862 ment = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP); 6863 ment->d_addr = (void *)tgt; 6864 ment->d_type = devtype; 6865 ment->nextp = NULL; 6866 6867 if (mod_hash_insert(vswp->mfdb, (mod_hash_key_t)addr, 6868 (mod_hash_val_t)ment) != 0) { 6869 DERR(vswp, "%s: hash table insertion failed", __func__); 6870 kmem_free(ment, sizeof (mfdb_ent_t)); 6871 rv = 1; 6872 } else { 6873 D2(vswp, "%s: added initial entry for 0x%llx to " 6874 "table", __func__, addr); 6875 } 6876 } else { 6877 /* 6878 * Address in table. Check to see if specified port 6879 * is already associated with the address. If not add 6880 * it now. 6881 */ 6882 tmp_ent = ment; 6883 while (tmp_ent != NULL) { 6884 if (tmp_ent->d_addr == (void *)tgt) { 6885 if (devtype == VSW_VNETPORT) { 6886 DERR(vswp, "%s: duplicate port entry " 6887 "found for portid %ld and key " 6888 "0x%llx", __func__, 6889 ((vsw_port_t *)arg)->p_instance, 6890 addr); 6891 } else { 6892 DERR(vswp, "%s: duplicate entry found" 6893 "for key 0x%llx", 6894 __func__, addr); 6895 } 6896 rv = 1; 6897 dup = 1; 6898 break; 6899 } 6900 tmp_ent = tmp_ent->nextp; 6901 } 6902 6903 /* 6904 * Port not on list so add it to end now. 6905 */ 6906 if (0 == dup) { 6907 D2(vswp, "%s: added entry for 0x%llx to table", 6908 __func__, addr); 6909 new_ent = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP); 6910 new_ent->d_addr = (void *)tgt; 6911 new_ent->d_type = devtype; 6912 new_ent->nextp = NULL; 6913 6914 tmp_ent = ment; 6915 while (tmp_ent->nextp != NULL) 6916 tmp_ent = tmp_ent->nextp; 6917 6918 tmp_ent->nextp = new_ent; 6919 } 6920 } 6921 6922 RW_EXIT(&vswp->mfdbrw); 6923 return (rv); 6924 } 6925 6926 /* 6927 * Remove a multicast entry from the hashtable. 6928 * 6929 * Search hash table based on address. If match found, scan 6930 * list of ports associated with address. If specified port 6931 * found remove it from list. 6932 */ 6933 static int 6934 vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg) 6935 { 6936 mfdb_ent_t *ment = NULL; 6937 mfdb_ent_t *curr_p, *prev_p; 6938 void *tgt = NULL; 6939 6940 D1(vswp, "%s: enter", __func__); 6941 6942 if (devtype == VSW_VNETPORT) { 6943 tgt = (vsw_port_t *)arg; 6944 D2(vswp, "%s: removing port %d from mFDB for address" 6945 " 0x%llx", __func__, ((vsw_port_t *)tgt)->p_instance, 6946 addr); 6947 } else { 6948 D2(vswp, "%s: removing entry", __func__); 6949 tgt = (void *)vswp; 6950 } 6951 6952 WRITE_ENTER(&vswp->mfdbrw); 6953 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr, 6954 (mod_hash_val_t *)&ment) != 0) { 6955 D2(vswp, "%s: address 0x%llx not in table", __func__, addr); 6956 RW_EXIT(&vswp->mfdbrw); 6957 return (1); 6958 } 6959 6960 prev_p = curr_p = ment; 6961 6962 while (curr_p != NULL) { 6963 if (curr_p->d_addr == (void *)tgt) { 6964 if (devtype == VSW_VNETPORT) { 6965 D2(vswp, "%s: port %d found", __func__, 6966 ((vsw_port_t *)tgt)->p_instance); 6967 } else { 6968 D2(vswp, "%s: instance found", __func__); 6969 } 6970 6971 if (prev_p == curr_p) { 6972 /* 6973 * head of list, if no other element is in 6974 * list then destroy this entry, otherwise 6975 * just replace it with updated value. 6976 */ 6977 ment = curr_p->nextp; 6978 kmem_free(curr_p, sizeof (mfdb_ent_t)); 6979 if (ment == NULL) { 6980 (void) mod_hash_destroy(vswp->mfdb, 6981 (mod_hash_val_t)addr); 6982 } else { 6983 (void) mod_hash_replace(vswp->mfdb, 6984 (mod_hash_key_t)addr, 6985 (mod_hash_val_t)ment); 6986 } 6987 } else { 6988 /* 6989 * Not head of list, no need to do 6990 * replacement, just adjust list pointers. 6991 */ 6992 prev_p->nextp = curr_p->nextp; 6993 kmem_free(curr_p, sizeof (mfdb_ent_t)); 6994 } 6995 break; 6996 } 6997 6998 prev_p = curr_p; 6999 curr_p = curr_p->nextp; 7000 } 7001 7002 RW_EXIT(&vswp->mfdbrw); 7003 7004 D1(vswp, "%s: exit", __func__); 7005 7006 return (0); 7007 } 7008 7009 /* 7010 * Port is being deleted, but has registered an interest in one 7011 * or more multicast groups. Using the list of addresses maintained 7012 * within the port structure find the appropriate entry in the hash 7013 * table and remove this port from the list of interested ports. 7014 */ 7015 static void 7016 vsw_del_mcst_port(vsw_port_t *port) 7017 { 7018 mcst_addr_t *mcst_p = NULL; 7019 vsw_t *vswp = port->p_vswp; 7020 7021 D1(vswp, "%s: enter", __func__); 7022 7023 mutex_enter(&port->mca_lock); 7024 while (port->mcap != NULL) { 7025 (void) vsw_del_mcst(vswp, VSW_VNETPORT, 7026 port->mcap->addr, port); 7027 7028 mcst_p = port->mcap->nextp; 7029 kmem_free(port->mcap, sizeof (mcst_addr_t)); 7030 port->mcap = mcst_p; 7031 } 7032 mutex_exit(&port->mca_lock); 7033 7034 D1(vswp, "%s: exit", __func__); 7035 } 7036 7037 /* 7038 * This vsw instance is detaching, but has registered an interest in one 7039 * or more multicast groups. Using the list of addresses maintained 7040 * within the vsw structure find the appropriate entry in the hash 7041 * table and remove this instance from the list of interested ports. 7042 */ 7043 static void 7044 vsw_del_mcst_vsw(vsw_t *vswp) 7045 { 7046 mcst_addr_t *next_p = NULL; 7047 7048 D1(vswp, "%s: enter", __func__); 7049 7050 mutex_enter(&vswp->mca_lock); 7051 7052 while (vswp->mcap != NULL) { 7053 DERR(vswp, "%s: deleting addr 0x%llx", 7054 __func__, vswp->mcap->addr); 7055 (void) vsw_del_mcst(vswp, VSW_LOCALDEV, 7056 vswp->mcap->addr, NULL); 7057 7058 next_p = vswp->mcap->nextp; 7059 kmem_free(vswp->mcap, sizeof (mcst_addr_t)); 7060 vswp->mcap = next_p; 7061 } 7062 7063 vswp->mcap = NULL; 7064 mutex_exit(&vswp->mca_lock); 7065 7066 D1(vswp, "%s: exit", __func__); 7067 } 7068 7069 7070 /* 7071 * Remove the specified address from the list of address maintained 7072 * in this port node. 7073 */ 7074 static void 7075 vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr) 7076 { 7077 vsw_t *vswp = NULL; 7078 vsw_port_t *port = NULL; 7079 mcst_addr_t *prev_p = NULL; 7080 mcst_addr_t *curr_p = NULL; 7081 7082 D1(NULL, "%s: enter : devtype %d : addr 0x%llx", 7083 __func__, devtype, addr); 7084 7085 if (devtype == VSW_VNETPORT) { 7086 port = (vsw_port_t *)arg; 7087 mutex_enter(&port->mca_lock); 7088 prev_p = curr_p = port->mcap; 7089 } else { 7090 vswp = (vsw_t *)arg; 7091 mutex_enter(&vswp->mca_lock); 7092 prev_p = curr_p = vswp->mcap; 7093 } 7094 7095 while (curr_p != NULL) { 7096 if (curr_p->addr == addr) { 7097 D2(NULL, "%s: address found", __func__); 7098 /* match found */ 7099 if (prev_p == curr_p) { 7100 /* list head */ 7101 if (devtype == VSW_VNETPORT) 7102 port->mcap = curr_p->nextp; 7103 else 7104 vswp->mcap = curr_p->nextp; 7105 } else { 7106 prev_p->nextp = curr_p->nextp; 7107 } 7108 kmem_free(curr_p, sizeof (mcst_addr_t)); 7109 break; 7110 } else { 7111 prev_p = curr_p; 7112 curr_p = curr_p->nextp; 7113 } 7114 } 7115 7116 if (devtype == VSW_VNETPORT) 7117 mutex_exit(&port->mca_lock); 7118 else 7119 mutex_exit(&vswp->mca_lock); 7120 7121 D1(NULL, "%s: exit", __func__); 7122 } 7123 7124 /* 7125 * Creates a descriptor ring (dring) and links it into the 7126 * link of outbound drings for this channel. 7127 * 7128 * Returns NULL if creation failed. 7129 */ 7130 static dring_info_t * 7131 vsw_create_dring(vsw_ldc_t *ldcp) 7132 { 7133 vsw_private_desc_t *priv_addr = NULL; 7134 vsw_t *vswp = ldcp->ldc_vswp; 7135 ldc_mem_info_t minfo; 7136 dring_info_t *dp, *tp; 7137 int i; 7138 7139 dp = (dring_info_t *)kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 7140 7141 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 7142 7143 /* create public section of ring */ 7144 if ((ldc_mem_dring_create(VSW_RING_NUM_EL, 7145 VSW_PUB_SIZE, &dp->handle)) != 0) { 7146 7147 DERR(vswp, "vsw_create_dring(%lld): ldc dring create " 7148 "failed", ldcp->ldc_id); 7149 goto create_fail_exit; 7150 } 7151 7152 ASSERT(dp->handle != NULL); 7153 7154 /* 7155 * Get the base address of the public section of the ring. 7156 */ 7157 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 7158 DERR(vswp, "vsw_create_dring(%lld): dring info failed\n", 7159 ldcp->ldc_id); 7160 goto dring_fail_exit; 7161 } else { 7162 ASSERT(minfo.vaddr != 0); 7163 dp->pub_addr = minfo.vaddr; 7164 } 7165 7166 dp->num_descriptors = VSW_RING_NUM_EL; 7167 dp->descriptor_size = VSW_PUB_SIZE; 7168 dp->options = VIO_TX_DRING; 7169 dp->ncookies = 1; /* guaranteed by ldc */ 7170 7171 /* 7172 * create private portion of ring 7173 */ 7174 dp->priv_addr = (vsw_private_desc_t *)kmem_zalloc( 7175 (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL), KM_SLEEP); 7176 7177 if (vsw_setup_ring(ldcp, dp)) { 7178 DERR(vswp, "%s: unable to setup ring", __func__); 7179 goto dring_fail_exit; 7180 } 7181 7182 /* haven't used any descriptors yet */ 7183 dp->end_idx = 0; 7184 dp->last_ack_recv = -1; 7185 7186 /* bind dring to the channel */ 7187 if ((ldc_mem_dring_bind(ldcp->ldc_handle, dp->handle, 7188 LDC_SHADOW_MAP, LDC_MEM_RW, 7189 &dp->cookie[0], &dp->ncookies)) != 0) { 7190 DERR(vswp, "vsw_create_dring: unable to bind to channel " 7191 "%lld", ldcp->ldc_id); 7192 goto dring_fail_exit; 7193 } 7194 7195 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 7196 dp->restart_reqd = B_TRUE; 7197 7198 /* 7199 * Only ever create rings for outgoing lane. Link it onto 7200 * end of list. 7201 */ 7202 if (ldcp->lane_out.dringp == NULL) { 7203 D2(vswp, "vsw_create_dring: adding first outbound ring"); 7204 ldcp->lane_out.dringp = dp; 7205 } else { 7206 tp = ldcp->lane_out.dringp; 7207 while (tp->next != NULL) 7208 tp = tp->next; 7209 7210 tp->next = dp; 7211 } 7212 7213 return (dp); 7214 7215 dring_fail_exit: 7216 (void) ldc_mem_dring_destroy(dp->handle); 7217 7218 create_fail_exit: 7219 if (dp->priv_addr != NULL) { 7220 priv_addr = dp->priv_addr; 7221 for (i = 0; i < VSW_RING_NUM_EL; i++) { 7222 if (priv_addr->memhandle != NULL) 7223 (void) ldc_mem_free_handle( 7224 priv_addr->memhandle); 7225 priv_addr++; 7226 } 7227 kmem_free(dp->priv_addr, 7228 (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL)); 7229 } 7230 mutex_destroy(&dp->dlock); 7231 7232 kmem_free(dp, sizeof (dring_info_t)); 7233 return (NULL); 7234 } 7235 7236 /* 7237 * Create a ring consisting of just a private portion and link 7238 * it into the list of rings for the outbound lane. 7239 * 7240 * These type of rings are used primarily for temporary data 7241 * storage (i.e. as data buffers). 7242 */ 7243 void 7244 vsw_create_privring(vsw_ldc_t *ldcp) 7245 { 7246 dring_info_t *dp, *tp; 7247 vsw_t *vswp = ldcp->ldc_vswp; 7248 7249 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 7250 7251 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 7252 7253 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 7254 7255 /* no public section */ 7256 dp->pub_addr = NULL; 7257 7258 dp->priv_addr = kmem_zalloc((sizeof (vsw_private_desc_t) * 7259 VSW_RING_NUM_EL), KM_SLEEP); 7260 7261 dp->num_descriptors = VSW_RING_NUM_EL; 7262 7263 if (vsw_setup_ring(ldcp, dp)) { 7264 DERR(vswp, "%s: setup of ring failed", __func__); 7265 kmem_free(dp->priv_addr, 7266 (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL)); 7267 mutex_destroy(&dp->dlock); 7268 kmem_free(dp, sizeof (dring_info_t)); 7269 return; 7270 } 7271 7272 /* haven't used any descriptors yet */ 7273 dp->end_idx = 0; 7274 7275 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 7276 dp->restart_reqd = B_TRUE; 7277 7278 /* 7279 * Only ever create rings for outgoing lane. Link it onto 7280 * end of list. 7281 */ 7282 if (ldcp->lane_out.dringp == NULL) { 7283 D2(vswp, "%s: adding first outbound privring", __func__); 7284 ldcp->lane_out.dringp = dp; 7285 } else { 7286 tp = ldcp->lane_out.dringp; 7287 while (tp->next != NULL) 7288 tp = tp->next; 7289 7290 tp->next = dp; 7291 } 7292 7293 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 7294 } 7295 7296 /* 7297 * Setup the descriptors in the dring. Returns 0 on success, 1 on 7298 * failure. 7299 */ 7300 int 7301 vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp) 7302 { 7303 vnet_public_desc_t *pub_addr = NULL; 7304 vsw_private_desc_t *priv_addr = NULL; 7305 vsw_t *vswp = ldcp->ldc_vswp; 7306 uint64_t *tmpp; 7307 uint64_t offset = 0; 7308 uint32_t ncookies = 0; 7309 static char *name = "vsw_setup_ring"; 7310 int i, j, nc, rv; 7311 7312 priv_addr = dp->priv_addr; 7313 pub_addr = dp->pub_addr; 7314 7315 /* public section may be null but private should never be */ 7316 ASSERT(priv_addr != NULL); 7317 7318 /* 7319 * Allocate the region of memory which will be used to hold 7320 * the data the descriptors will refer to. 7321 */ 7322 dp->data_sz = (VSW_RING_NUM_EL * VSW_RING_EL_DATA_SZ); 7323 dp->data_addr = kmem_alloc(dp->data_sz, KM_SLEEP); 7324 7325 D2(vswp, "%s: allocated %lld bytes at 0x%llx\n", name, 7326 dp->data_sz, dp->data_addr); 7327 7328 tmpp = (uint64_t *)dp->data_addr; 7329 offset = VSW_RING_EL_DATA_SZ / sizeof (tmpp); 7330 7331 /* 7332 * Initialise some of the private and public (if they exist) 7333 * descriptor fields. 7334 */ 7335 for (i = 0; i < VSW_RING_NUM_EL; i++) { 7336 mutex_init(&priv_addr->dstate_lock, NULL, MUTEX_DRIVER, NULL); 7337 7338 if ((ldc_mem_alloc_handle(ldcp->ldc_handle, 7339 &priv_addr->memhandle)) != 0) { 7340 DERR(vswp, "%s: alloc mem handle failed", name); 7341 goto setup_ring_cleanup; 7342 } 7343 7344 priv_addr->datap = (void *)tmpp; 7345 7346 rv = ldc_mem_bind_handle(priv_addr->memhandle, 7347 (caddr_t)priv_addr->datap, VSW_RING_EL_DATA_SZ, 7348 LDC_SHADOW_MAP, LDC_MEM_R|LDC_MEM_W, 7349 &(priv_addr->memcookie[0]), &ncookies); 7350 if (rv != 0) { 7351 DERR(vswp, "%s(%lld): ldc_mem_bind_handle failed " 7352 "(rv %d)", name, ldcp->ldc_id, rv); 7353 goto setup_ring_cleanup; 7354 } 7355 priv_addr->bound = 1; 7356 7357 D2(vswp, "%s: %d: memcookie 0 : addr 0x%llx : size 0x%llx", 7358 name, i, priv_addr->memcookie[0].addr, 7359 priv_addr->memcookie[0].size); 7360 7361 if (ncookies >= (uint32_t)(VSW_MAX_COOKIES + 1)) { 7362 DERR(vswp, "%s(%lld) ldc_mem_bind_handle returned " 7363 "invalid num of cookies (%d) for size 0x%llx", 7364 name, ldcp->ldc_id, ncookies, 7365 VSW_RING_EL_DATA_SZ); 7366 7367 goto setup_ring_cleanup; 7368 } else { 7369 for (j = 1; j < ncookies; j++) { 7370 rv = ldc_mem_nextcookie(priv_addr->memhandle, 7371 &(priv_addr->memcookie[j])); 7372 if (rv != 0) { 7373 DERR(vswp, "%s: ldc_mem_nextcookie " 7374 "failed rv (%d)", name, rv); 7375 goto setup_ring_cleanup; 7376 } 7377 D3(vswp, "%s: memcookie %d : addr 0x%llx : " 7378 "size 0x%llx", name, j, 7379 priv_addr->memcookie[j].addr, 7380 priv_addr->memcookie[j].size); 7381 } 7382 7383 } 7384 priv_addr->ncookies = ncookies; 7385 priv_addr->dstate = VIO_DESC_FREE; 7386 7387 if (pub_addr != NULL) { 7388 7389 /* link pub and private sides */ 7390 priv_addr->descp = pub_addr; 7391 7392 pub_addr->ncookies = priv_addr->ncookies; 7393 7394 for (nc = 0; nc < pub_addr->ncookies; nc++) { 7395 bcopy(&priv_addr->memcookie[nc], 7396 &pub_addr->memcookie[nc], 7397 sizeof (ldc_mem_cookie_t)); 7398 } 7399 7400 pub_addr->hdr.dstate = VIO_DESC_FREE; 7401 pub_addr++; 7402 } 7403 7404 /* 7405 * move to next element in the dring and the next 7406 * position in the data buffer. 7407 */ 7408 priv_addr++; 7409 tmpp += offset; 7410 } 7411 7412 return (0); 7413 7414 setup_ring_cleanup: 7415 priv_addr = dp->priv_addr; 7416 7417 for (j = 0; j < i; j++) { 7418 (void) ldc_mem_unbind_handle(priv_addr->memhandle); 7419 (void) ldc_mem_free_handle(priv_addr->memhandle); 7420 7421 mutex_destroy(&priv_addr->dstate_lock); 7422 7423 priv_addr++; 7424 } 7425 kmem_free(dp->data_addr, dp->data_sz); 7426 7427 return (1); 7428 } 7429 7430 /* 7431 * Searches the private section of a ring for a free descriptor, 7432 * starting at the location of the last free descriptor found 7433 * previously. 7434 * 7435 * Returns 0 if free descriptor is available, and updates state 7436 * of private descriptor to VIO_DESC_READY, otherwise returns 1. 7437 * 7438 * FUTURE: might need to return contiguous range of descriptors 7439 * as dring info msg assumes all will be contiguous. 7440 */ 7441 static int 7442 vsw_dring_find_free_desc(dring_info_t *dringp, 7443 vsw_private_desc_t **priv_p, int *idx) 7444 { 7445 vsw_private_desc_t *addr = NULL; 7446 int num = VSW_RING_NUM_EL; 7447 int ret = 1; 7448 7449 D1(NULL, "%s enter\n", __func__); 7450 7451 ASSERT(dringp->priv_addr != NULL); 7452 7453 D2(NULL, "%s: searching ring, dringp 0x%llx : start pos %lld", 7454 __func__, dringp, dringp->end_idx); 7455 7456 addr = (vsw_private_desc_t *)dringp->priv_addr + dringp->end_idx; 7457 7458 mutex_enter(&addr->dstate_lock); 7459 if (addr->dstate == VIO_DESC_FREE) { 7460 addr->dstate = VIO_DESC_READY; 7461 *priv_p = addr; 7462 *idx = dringp->end_idx; 7463 dringp->end_idx = (dringp->end_idx + 1) % num; 7464 ret = 0; 7465 7466 } 7467 mutex_exit(&addr->dstate_lock); 7468 7469 /* ring full */ 7470 if (ret == 1) { 7471 D2(NULL, "%s: no desp free: started at %d", __func__, 7472 dringp->end_idx); 7473 } 7474 7475 D1(NULL, "%s: exit\n", __func__); 7476 7477 return (ret); 7478 } 7479 7480 /* 7481 * Map from a dring identifier to the ring itself. Returns 7482 * pointer to ring or NULL if no match found. 7483 */ 7484 static dring_info_t * 7485 vsw_ident2dring(lane_t *lane, uint64_t ident) 7486 { 7487 dring_info_t *dp = NULL; 7488 7489 if ((dp = lane->dringp) == NULL) { 7490 return (NULL); 7491 } else { 7492 if (dp->ident == ident) 7493 return (dp); 7494 7495 while (dp != NULL) { 7496 if (dp->ident == ident) 7497 break; 7498 dp = dp->next; 7499 } 7500 } 7501 7502 return (dp); 7503 } 7504 7505 /* 7506 * Set the default lane attributes. These are copied into 7507 * the attr msg we send to our peer. If they are not acceptable 7508 * then (currently) the handshake ends. 7509 */ 7510 static void 7511 vsw_set_lane_attr(vsw_t *vswp, lane_t *lp) 7512 { 7513 bzero(lp, sizeof (lane_t)); 7514 7515 READ_ENTER(&vswp->if_lockrw); 7516 ether_copy(&(vswp->if_addr), &(lp->addr)); 7517 RW_EXIT(&vswp->if_lockrw); 7518 7519 lp->mtu = VSW_MTU; 7520 lp->addr_type = ADDR_TYPE_MAC; 7521 lp->xfer_mode = VIO_DRING_MODE; 7522 lp->ack_freq = 0; /* for shared mode */ 7523 7524 mutex_enter(&lp->seq_lock); 7525 lp->seq_num = VNET_ISS; 7526 mutex_exit(&lp->seq_lock); 7527 } 7528 7529 /* 7530 * Verify that the attributes are acceptable. 7531 * 7532 * FUTURE: If some attributes are not acceptable, change them 7533 * our desired values. 7534 */ 7535 static int 7536 vsw_check_attr(vnet_attr_msg_t *pkt, vsw_port_t *port) 7537 { 7538 int ret = 0; 7539 7540 D1(NULL, "vsw_check_attr enter\n"); 7541 7542 /* 7543 * Note we currently only support in-band descriptors 7544 * and descriptor rings, not packet based transfer (VIO_PKT_MODE) 7545 */ 7546 if ((pkt->xfer_mode != VIO_DESC_MODE) && 7547 (pkt->xfer_mode != VIO_DRING_MODE)) { 7548 D2(NULL, "vsw_check_attr: unknown mode %x\n", 7549 pkt->xfer_mode); 7550 ret = 1; 7551 } 7552 7553 /* Only support MAC addresses at moment. */ 7554 if ((pkt->addr_type != ADDR_TYPE_MAC) || (pkt->addr == 0)) { 7555 D2(NULL, "vsw_check_attr: invalid addr_type %x, " 7556 "or address 0x%llx\n", pkt->addr_type, 7557 pkt->addr); 7558 ret = 1; 7559 } 7560 7561 /* 7562 * MAC address supplied by device should match that stored 7563 * in the vsw-port OBP node. Need to decide what to do if they 7564 * don't match, for the moment just warn but don't fail. 7565 */ 7566 if (bcmp(&pkt->addr, &port->p_macaddr, ETHERADDRL) != 0) { 7567 DERR(NULL, "vsw_check_attr: device supplied address " 7568 "0x%llx doesn't match node address 0x%llx\n", 7569 pkt->addr, port->p_macaddr); 7570 } 7571 7572 /* 7573 * Ack freq only makes sense in pkt mode, in shared 7574 * mode the ring descriptors say whether or not to 7575 * send back an ACK. 7576 */ 7577 if ((pkt->xfer_mode == VIO_DRING_MODE) && 7578 (pkt->ack_freq > 0)) { 7579 D2(NULL, "vsw_check_attr: non zero ack freq " 7580 " in SHM mode\n"); 7581 ret = 1; 7582 } 7583 7584 /* 7585 * Note: for the moment we only support ETHER 7586 * frames. This may change in the future. 7587 */ 7588 if ((pkt->mtu > VSW_MTU) || (pkt->mtu <= 0)) { 7589 D2(NULL, "vsw_check_attr: invalid MTU (0x%llx)\n", 7590 pkt->mtu); 7591 ret = 1; 7592 } 7593 7594 D1(NULL, "vsw_check_attr exit\n"); 7595 7596 return (ret); 7597 } 7598 7599 /* 7600 * Returns 1 if there is a problem, 0 otherwise. 7601 */ 7602 static int 7603 vsw_check_dring_info(vio_dring_reg_msg_t *pkt) 7604 { 7605 _NOTE(ARGUNUSED(pkt)) 7606 7607 int ret = 0; 7608 7609 D1(NULL, "vsw_check_dring_info enter\n"); 7610 7611 if ((pkt->num_descriptors == 0) || 7612 (pkt->descriptor_size == 0) || 7613 (pkt->ncookies != 1)) { 7614 DERR(NULL, "vsw_check_dring_info: invalid dring msg"); 7615 ret = 1; 7616 } 7617 7618 D1(NULL, "vsw_check_dring_info exit\n"); 7619 7620 return (ret); 7621 } 7622 7623 /* 7624 * Returns 1 if two memory cookies match. Otherwise returns 0. 7625 */ 7626 static int 7627 vsw_mem_cookie_match(ldc_mem_cookie_t *m1, ldc_mem_cookie_t *m2) 7628 { 7629 if ((m1->addr != m2->addr) || 7630 (m2->size != m2->size)) { 7631 return (0); 7632 } else { 7633 return (1); 7634 } 7635 } 7636 7637 /* 7638 * Returns 1 if ring described in reg message matches that 7639 * described by dring_info structure. Otherwise returns 0. 7640 */ 7641 static int 7642 vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg) 7643 { 7644 if ((msg->descriptor_size != dp->descriptor_size) || 7645 (msg->num_descriptors != dp->num_descriptors) || 7646 (msg->ncookies != dp->ncookies) || 7647 !(vsw_mem_cookie_match(&msg->cookie[0], &dp->cookie[0]))) { 7648 return (0); 7649 } else { 7650 return (1); 7651 } 7652 7653 } 7654 7655 static caddr_t 7656 vsw_print_ethaddr(uint8_t *a, char *ebuf) 7657 { 7658 (void) sprintf(ebuf, "%x:%x:%x:%x:%x:%x", 7659 a[0], a[1], a[2], a[3], a[4], a[5]); 7660 return (ebuf); 7661 } 7662 7663 /* 7664 * Reset and free all the resources associated with 7665 * the channel. 7666 */ 7667 static void 7668 vsw_free_lane_resources(vsw_ldc_t *ldcp, uint64_t dir) 7669 { 7670 dring_info_t *dp, *dpp; 7671 lane_t *lp = NULL; 7672 int rv = 0; 7673 7674 ASSERT(ldcp != NULL); 7675 7676 D1(ldcp->ldc_vswp, "%s (%lld): enter", __func__, ldcp->ldc_id); 7677 7678 if (dir == INBOUND) { 7679 D2(ldcp->ldc_vswp, "%s: freeing INBOUND lane" 7680 " of channel %lld", __func__, ldcp->ldc_id); 7681 lp = &ldcp->lane_in; 7682 } else { 7683 D2(ldcp->ldc_vswp, "%s: freeing OUTBOUND lane" 7684 " of channel %lld", __func__, ldcp->ldc_id); 7685 lp = &ldcp->lane_out; 7686 } 7687 7688 lp->lstate = VSW_LANE_INACTIV; 7689 mutex_enter(&lp->seq_lock); 7690 lp->seq_num = VNET_ISS; 7691 mutex_exit(&lp->seq_lock); 7692 if (lp->dringp) { 7693 if (dir == INBOUND) { 7694 dp = lp->dringp; 7695 while (dp != NULL) { 7696 dpp = dp->next; 7697 if (dp->handle != NULL) 7698 (void) ldc_mem_dring_unmap(dp->handle); 7699 kmem_free(dp, sizeof (dring_info_t)); 7700 dp = dpp; 7701 } 7702 } else { 7703 /* 7704 * unbind, destroy exported dring, free dring struct 7705 */ 7706 dp = lp->dringp; 7707 rv = vsw_free_ring(dp); 7708 } 7709 if (rv == 0) { 7710 lp->dringp = NULL; 7711 } 7712 } 7713 7714 D1(ldcp->ldc_vswp, "%s (%lld): exit", __func__, ldcp->ldc_id); 7715 } 7716 7717 /* 7718 * Free ring and all associated resources. 7719 */ 7720 static int 7721 vsw_free_ring(dring_info_t *dp) 7722 { 7723 vsw_private_desc_t *paddr = NULL; 7724 dring_info_t *dpp; 7725 int i, rv = 1; 7726 7727 while (dp != NULL) { 7728 mutex_enter(&dp->dlock); 7729 dpp = dp->next; 7730 if (dp->priv_addr != NULL) { 7731 /* 7732 * First unbind and free the memory handles 7733 * stored in each descriptor within the ring. 7734 */ 7735 for (i = 0; i < VSW_RING_NUM_EL; i++) { 7736 paddr = (vsw_private_desc_t *) 7737 dp->priv_addr + i; 7738 if (paddr->memhandle != NULL) { 7739 if (paddr->bound == 1) { 7740 rv = ldc_mem_unbind_handle( 7741 paddr->memhandle); 7742 7743 if (rv != 0) { 7744 DERR(NULL, "error " 7745 "unbinding handle for " 7746 "ring 0x%llx at pos %d", 7747 dp, i); 7748 mutex_exit(&dp->dlock); 7749 return (rv); 7750 } 7751 paddr->bound = 0; 7752 } 7753 7754 rv = ldc_mem_free_handle( 7755 paddr->memhandle); 7756 if (rv != 0) { 7757 DERR(NULL, "error freeing " 7758 "handle for ring " 7759 "0x%llx at pos %d", 7760 dp, i); 7761 mutex_exit(&dp->dlock); 7762 return (rv); 7763 } 7764 paddr->memhandle = NULL; 7765 } 7766 mutex_destroy(&paddr->dstate_lock); 7767 } 7768 kmem_free(dp->priv_addr, (sizeof (vsw_private_desc_t) 7769 * VSW_RING_NUM_EL)); 7770 } 7771 7772 /* 7773 * Now unbind and destroy the ring itself. 7774 */ 7775 if (dp->handle != NULL) { 7776 (void) ldc_mem_dring_unbind(dp->handle); 7777 (void) ldc_mem_dring_destroy(dp->handle); 7778 } 7779 7780 if (dp->data_addr != NULL) { 7781 kmem_free(dp->data_addr, dp->data_sz); 7782 } 7783 7784 mutex_exit(&dp->dlock); 7785 mutex_destroy(&dp->dlock); 7786 mutex_destroy(&dp->restart_lock); 7787 kmem_free(dp, sizeof (dring_info_t)); 7788 7789 dp = dpp; 7790 } 7791 return (0); 7792 } 7793 7794 /* 7795 * Debugging routines 7796 */ 7797 static void 7798 display_state(void) 7799 { 7800 vsw_t *vswp; 7801 vsw_port_list_t *plist; 7802 vsw_port_t *port; 7803 vsw_ldc_list_t *ldcl; 7804 vsw_ldc_t *ldcp; 7805 7806 cmn_err(CE_NOTE, "***** system state *****"); 7807 7808 for (vswp = vsw_head; vswp; vswp = vswp->next) { 7809 plist = &vswp->plist; 7810 READ_ENTER(&plist->lockrw); 7811 cmn_err(CE_CONT, "vsw instance %d has %d ports attached\n", 7812 vswp->instance, plist->num_ports); 7813 7814 for (port = plist->head; port != NULL; port = port->p_next) { 7815 ldcl = &port->p_ldclist; 7816 cmn_err(CE_CONT, "port %d : %d ldcs attached\n", 7817 port->p_instance, ldcl->num_ldcs); 7818 READ_ENTER(&ldcl->lockrw); 7819 ldcp = ldcl->head; 7820 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 7821 cmn_err(CE_CONT, "chan %lu : dev %d : " 7822 "status %d : phase %u\n", 7823 ldcp->ldc_id, ldcp->dev_class, 7824 ldcp->ldc_status, ldcp->hphase); 7825 cmn_err(CE_CONT, "chan %lu : lsession %lu : " 7826 "psession %lu\n", 7827 ldcp->ldc_id, 7828 ldcp->local_session, 7829 ldcp->peer_session); 7830 7831 cmn_err(CE_CONT, "Inbound lane:\n"); 7832 display_lane(&ldcp->lane_in); 7833 cmn_err(CE_CONT, "Outbound lane:\n"); 7834 display_lane(&ldcp->lane_out); 7835 } 7836 RW_EXIT(&ldcl->lockrw); 7837 } 7838 RW_EXIT(&plist->lockrw); 7839 } 7840 cmn_err(CE_NOTE, "***** system state *****"); 7841 } 7842 7843 static void 7844 display_lane(lane_t *lp) 7845 { 7846 dring_info_t *drp; 7847 7848 cmn_err(CE_CONT, "ver 0x%x:0x%x : state %lx : mtu 0x%lx\n", 7849 lp->ver_major, lp->ver_minor, lp->lstate, lp->mtu); 7850 cmn_err(CE_CONT, "addr_type %d : addr 0x%lx : xmode %d\n", 7851 lp->addr_type, lp->addr, lp->xfer_mode); 7852 cmn_err(CE_CONT, "dringp 0x%lx\n", (uint64_t)lp->dringp); 7853 7854 cmn_err(CE_CONT, "Dring info:\n"); 7855 for (drp = lp->dringp; drp != NULL; drp = drp->next) { 7856 cmn_err(CE_CONT, "\tnum_desc %u : dsize %u\n", 7857 drp->num_descriptors, drp->descriptor_size); 7858 cmn_err(CE_CONT, "\thandle 0x%lx\n", drp->handle); 7859 cmn_err(CE_CONT, "\tpub_addr 0x%lx : priv_addr 0x%lx\n", 7860 (uint64_t)drp->pub_addr, (uint64_t)drp->priv_addr); 7861 cmn_err(CE_CONT, "\tident 0x%lx : end_idx %lu\n", 7862 drp->ident, drp->end_idx); 7863 display_ring(drp); 7864 } 7865 } 7866 7867 static void 7868 display_ring(dring_info_t *dringp) 7869 { 7870 uint64_t i; 7871 uint64_t priv_count = 0; 7872 uint64_t pub_count = 0; 7873 vnet_public_desc_t *pub_addr = NULL; 7874 vsw_private_desc_t *priv_addr = NULL; 7875 7876 for (i = 0; i < VSW_RING_NUM_EL; i++) { 7877 if (dringp->pub_addr != NULL) { 7878 pub_addr = (vnet_public_desc_t *)dringp->pub_addr + i; 7879 7880 if (pub_addr->hdr.dstate == VIO_DESC_FREE) 7881 pub_count++; 7882 } 7883 7884 if (dringp->priv_addr != NULL) { 7885 priv_addr = 7886 (vsw_private_desc_t *)dringp->priv_addr + i; 7887 7888 if (priv_addr->dstate == VIO_DESC_FREE) 7889 priv_count++; 7890 } 7891 } 7892 cmn_err(CE_CONT, "\t%lu elements: %lu priv free: %lu pub free\n", 7893 i, priv_count, pub_count); 7894 } 7895 7896 static void 7897 dump_flags(uint64_t state) 7898 { 7899 int i; 7900 7901 typedef struct flag_name { 7902 int flag_val; 7903 char *flag_name; 7904 } flag_name_t; 7905 7906 flag_name_t flags[] = { 7907 VSW_VER_INFO_SENT, "VSW_VER_INFO_SENT", 7908 VSW_VER_INFO_RECV, "VSW_VER_INFO_RECV", 7909 VSW_VER_ACK_RECV, "VSW_VER_ACK_RECV", 7910 VSW_VER_ACK_SENT, "VSW_VER_ACK_SENT", 7911 VSW_VER_NACK_RECV, "VSW_VER_NACK_RECV", 7912 VSW_VER_NACK_SENT, "VSW_VER_NACK_SENT", 7913 VSW_ATTR_INFO_SENT, "VSW_ATTR_INFO_SENT", 7914 VSW_ATTR_INFO_RECV, "VSW_ATTR_INFO_RECV", 7915 VSW_ATTR_ACK_SENT, "VSW_ATTR_ACK_SENT", 7916 VSW_ATTR_ACK_RECV, "VSW_ATTR_ACK_RECV", 7917 VSW_ATTR_NACK_SENT, "VSW_ATTR_NACK_SENT", 7918 VSW_ATTR_NACK_RECV, "VSW_ATTR_NACK_RECV", 7919 VSW_DRING_INFO_SENT, "VSW_DRING_INFO_SENT", 7920 VSW_DRING_INFO_RECV, "VSW_DRING_INFO_RECV", 7921 VSW_DRING_ACK_SENT, "VSW_DRING_ACK_SENT", 7922 VSW_DRING_ACK_RECV, "VSW_DRING_ACK_RECV", 7923 VSW_DRING_NACK_SENT, "VSW_DRING_NACK_SENT", 7924 VSW_DRING_NACK_RECV, "VSW_DRING_NACK_RECV", 7925 VSW_RDX_INFO_SENT, "VSW_RDX_INFO_SENT", 7926 VSW_RDX_INFO_RECV, "VSW_RDX_INFO_RECV", 7927 VSW_RDX_ACK_SENT, "VSW_RDX_ACK_SENT", 7928 VSW_RDX_ACK_RECV, "VSW_RDX_ACK_RECV", 7929 VSW_RDX_NACK_SENT, "VSW_RDX_NACK_SENT", 7930 VSW_RDX_NACK_RECV, "VSW_RDX_NACK_RECV", 7931 VSW_MCST_INFO_SENT, "VSW_MCST_INFO_SENT", 7932 VSW_MCST_INFO_RECV, "VSW_MCST_INFO_RECV", 7933 VSW_MCST_ACK_SENT, "VSW_MCST_ACK_SENT", 7934 VSW_MCST_ACK_RECV, "VSW_MCST_ACK_RECV", 7935 VSW_MCST_NACK_SENT, "VSW_MCST_NACK_SENT", 7936 VSW_MCST_NACK_RECV, "VSW_MCST_NACK_RECV", 7937 VSW_LANE_ACTIVE, "VSW_LANE_ACTIVE"}; 7938 7939 DERR(NULL, "DUMP_FLAGS: %llx\n", state); 7940 for (i = 0; i < sizeof (flags)/sizeof (flag_name_t); i++) { 7941 if (state & flags[i].flag_val) 7942 DERR(NULL, "DUMP_FLAGS %s", flags[i].flag_name); 7943 } 7944 } 7945