1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/errno.h> 31 #include <sys/debug.h> 32 #include <sys/time.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/user.h> 36 #include <sys/stropts.h> 37 #include <sys/stream.h> 38 #include <sys/strlog.h> 39 #include <sys/strsubr.h> 40 #include <sys/cmn_err.h> 41 #include <sys/cpu.h> 42 #include <sys/kmem.h> 43 #include <sys/conf.h> 44 #include <sys/ddi.h> 45 #include <sys/sunddi.h> 46 #include <sys/ksynch.h> 47 #include <sys/stat.h> 48 #include <sys/kstat.h> 49 #include <sys/vtrace.h> 50 #include <sys/strsun.h> 51 #include <sys/dlpi.h> 52 #include <sys/ethernet.h> 53 #include <net/if.h> 54 #include <sys/varargs.h> 55 #include <sys/machsystm.h> 56 #include <sys/modctl.h> 57 #include <sys/modhash.h> 58 #include <sys/mac.h> 59 #include <sys/mac_ether.h> 60 #include <sys/taskq.h> 61 #include <sys/note.h> 62 #include <sys/mach_descrip.h> 63 #include <sys/mac.h> 64 #include <sys/mdeg.h> 65 #include <sys/ldc.h> 66 #include <sys/vsw_fdb.h> 67 #include <sys/vsw.h> 68 #include <sys/vio_mailbox.h> 69 #include <sys/vnet_mailbox.h> 70 #include <sys/vnet_common.h> 71 #include <sys/vio_util.h> 72 #include <sys/sdt.h> 73 #include <sys/atomic.h> 74 #include <sys/callb.h> 75 76 /* Port add/deletion/etc routines */ 77 static int vsw_port_delete(vsw_port_t *port); 78 static int vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id); 79 static int vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id); 80 static int vsw_init_ldcs(vsw_port_t *port); 81 static int vsw_uninit_ldcs(vsw_port_t *port); 82 static int vsw_ldc_init(vsw_ldc_t *ldcp); 83 static int vsw_ldc_uninit(vsw_ldc_t *ldcp); 84 static int vsw_drain_ldcs(vsw_port_t *port); 85 static int vsw_drain_port_taskq(vsw_port_t *port); 86 static void vsw_marker_task(void *); 87 static int vsw_plist_del_node(vsw_t *, vsw_port_t *port); 88 int vsw_detach_ports(vsw_t *vswp); 89 int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 90 mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr); 91 int vsw_port_detach(vsw_t *vswp, int p_instance); 92 int vsw_portsend(vsw_port_t *port, mblk_t *mp, mblk_t *mpt); 93 int vsw_port_attach(vsw_t *vswp, int p_instance, 94 uint64_t *ldcids, int nids, struct ether_addr *macaddr); 95 vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 96 97 98 /* Interrupt routines */ 99 static uint_t vsw_ldc_cb(uint64_t cb, caddr_t arg); 100 101 /* Handshake routines */ 102 static void vsw_ldc_reinit(vsw_ldc_t *); 103 static void vsw_process_conn_evt(vsw_ldc_t *, uint16_t); 104 static void vsw_conn_task(void *); 105 static int vsw_check_flag(vsw_ldc_t *, int, uint64_t); 106 static void vsw_next_milestone(vsw_ldc_t *); 107 static int vsw_supported_version(vio_ver_msg_t *); 108 109 /* Data processing routines */ 110 static void vsw_process_pkt(void *); 111 static void vsw_dispatch_ctrl_task(vsw_ldc_t *, void *, vio_msg_tag_t); 112 static void vsw_process_ctrl_pkt(void *); 113 static void vsw_process_ctrl_ver_pkt(vsw_ldc_t *, void *); 114 static void vsw_process_ctrl_attr_pkt(vsw_ldc_t *, void *); 115 static void vsw_process_ctrl_mcst_pkt(vsw_ldc_t *, void *); 116 static void vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *, void *); 117 static void vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *, void *); 118 static void vsw_process_ctrl_rdx_pkt(vsw_ldc_t *, void *); 119 static void vsw_process_data_pkt(vsw_ldc_t *, void *, vio_msg_tag_t); 120 static void vsw_process_data_dring_pkt(vsw_ldc_t *, void *); 121 static void vsw_process_data_raw_pkt(vsw_ldc_t *, void *); 122 static void vsw_process_data_ibnd_pkt(vsw_ldc_t *, void *); 123 static void vsw_process_err_pkt(vsw_ldc_t *, void *, vio_msg_tag_t); 124 125 /* Switching/data transmit routines */ 126 static int vsw_dringsend(vsw_ldc_t *, mblk_t *); 127 static int vsw_descrsend(vsw_ldc_t *, mblk_t *); 128 static int vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, int retries); 129 130 /* Packet creation routines */ 131 static void vsw_send_ver(void *); 132 static void vsw_send_attr(vsw_ldc_t *); 133 static vio_dring_reg_msg_t *vsw_create_dring_info_pkt(vsw_ldc_t *); 134 static void vsw_send_dring_info(vsw_ldc_t *); 135 static void vsw_send_rdx(vsw_ldc_t *); 136 static int vsw_send_msg(vsw_ldc_t *, void *, int, boolean_t); 137 138 /* Dring routines */ 139 static dring_info_t *vsw_create_dring(vsw_ldc_t *); 140 static void vsw_create_privring(vsw_ldc_t *); 141 static int vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp); 142 static int vsw_dring_find_free_desc(dring_info_t *, vsw_private_desc_t **, 143 int *); 144 static dring_info_t *vsw_ident2dring(lane_t *, uint64_t); 145 static int vsw_reclaim_dring(dring_info_t *dp, int start); 146 147 static void vsw_set_lane_attr(vsw_t *, lane_t *); 148 static int vsw_check_attr(vnet_attr_msg_t *, vsw_port_t *); 149 static int vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg); 150 static int vsw_mem_cookie_match(ldc_mem_cookie_t *, ldc_mem_cookie_t *); 151 static int vsw_check_dring_info(vio_dring_reg_msg_t *); 152 153 /* Rcv/Tx thread routines */ 154 static void vsw_stop_tx_thread(vsw_ldc_t *ldcp); 155 static void vsw_ldc_tx_worker(void *arg); 156 static uint_t vsw_rx_softintr(caddr_t arg1, caddr_t arg2); 157 static void vsw_stop_rx_thread(vsw_ldc_t *ldcp); 158 static void vsw_ldc_rx_worker(void *arg); 159 160 /* Misc support routines */ 161 static caddr_t vsw_print_ethaddr(uint8_t *addr, char *ebuf); 162 static void vsw_free_lane_resources(vsw_ldc_t *, uint64_t); 163 static int vsw_free_ring(dring_info_t *); 164 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr); 165 static int vsw_get_same_dest_list(struct ether_header *ehp, 166 mblk_t **rhead, mblk_t **rtail, mblk_t **mpp); 167 static mblk_t *vsw_dupmsgchain(mblk_t *mp); 168 static void vsw_mac_rx(vsw_t *vswp, int caller, mac_resource_handle_t mrh, 169 mblk_t *mp, mblk_t *mpt, vsw_macrx_flags_t flags); 170 171 /* Debugging routines */ 172 static void dump_flags(uint64_t); 173 static void display_state(void); 174 static void display_lane(lane_t *); 175 static void display_ring(dring_info_t *); 176 177 /* 178 * Functions imported from other files. 179 */ 180 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int); 181 extern int vsw_unset_hw(vsw_t *, vsw_port_t *, int); 182 extern void vsw_reconfig_hw(vsw_t *); 183 extern int vsw_add_fdb(vsw_t *vswp, vsw_port_t *port); 184 extern int vsw_del_fdb(vsw_t *vswp, vsw_port_t *port); 185 extern int vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port); 186 extern void vsw_del_mcst_port(vsw_port_t *port); 187 extern int vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg); 188 extern int vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg); 189 190 #define VSW_NUM_VMPOOLS 3 /* number of vio mblk pools */ 191 #define VSW_PORT_REF_DELAY 30 /* delay for port ref_cnt to become 0 */ 192 193 /* 194 * Tunables used in this file. 195 */ 196 extern int vsw_num_handshakes; 197 extern int vsw_wretries; 198 extern int vsw_desc_delay; 199 extern int vsw_read_attempts; 200 extern int vsw_ldc_tx_delay; 201 extern int vsw_ldc_tx_retries; 202 extern int vsw_ldc_tx_max_failures; 203 extern boolean_t vsw_ldc_rxthr_enabled; 204 extern boolean_t vsw_ldc_txthr_enabled; 205 extern uint32_t vsw_chain_len; 206 extern uint32_t vsw_mblk_size1; 207 extern uint32_t vsw_mblk_size2; 208 extern uint32_t vsw_mblk_size3; 209 extern uint32_t vsw_num_mblks1; 210 extern uint32_t vsw_num_mblks2; 211 extern uint32_t vsw_num_mblks3; 212 213 214 #define LDC_ENTER_LOCK(ldcp) \ 215 mutex_enter(&((ldcp)->ldc_cblock));\ 216 mutex_enter(&((ldcp)->ldc_rxlock));\ 217 mutex_enter(&((ldcp)->ldc_txlock)); 218 #define LDC_EXIT_LOCK(ldcp) \ 219 mutex_exit(&((ldcp)->ldc_txlock));\ 220 mutex_exit(&((ldcp)->ldc_rxlock));\ 221 mutex_exit(&((ldcp)->ldc_cblock)); 222 223 224 /* supported versions */ 225 static ver_sup_t vsw_versions[] = { {1, 0} }; 226 227 /* 228 * For the moment the state dump routines have their own 229 * private flag. 230 */ 231 #define DUMP_STATE 0 232 233 #if DUMP_STATE 234 235 #define DUMP_TAG(tag) \ 236 { \ 237 D1(NULL, "DUMP_TAG: type 0x%llx", (tag).vio_msgtype); \ 238 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag).vio_subtype); \ 239 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag).vio_subtype_env); \ 240 } 241 242 #define DUMP_TAG_PTR(tag) \ 243 { \ 244 D1(NULL, "DUMP_TAG: type 0x%llx", (tag)->vio_msgtype); \ 245 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag)->vio_subtype); \ 246 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag)->vio_subtype_env); \ 247 } 248 249 #define DUMP_FLAGS(flags) dump_flags(flags); 250 #define DISPLAY_STATE() display_state() 251 252 #else 253 254 #define DUMP_TAG(tag) 255 #define DUMP_TAG_PTR(tag) 256 #define DUMP_FLAGS(state) 257 #define DISPLAY_STATE() 258 259 #endif /* DUMP_STATE */ 260 261 /* 262 * Attach the specified port. 263 * 264 * Returns 0 on success, 1 on failure. 265 */ 266 int 267 vsw_port_attach(vsw_t *vswp, int p_instance, uint64_t *ldcids, int nids, 268 struct ether_addr *macaddr) 269 { 270 vsw_port_list_t *plist = &vswp->plist; 271 vsw_port_t *port, **prev_port; 272 int i; 273 274 D1(vswp, "%s: enter : port %d", __func__, p_instance); 275 276 /* port already exists? */ 277 READ_ENTER(&plist->lockrw); 278 for (port = plist->head; port != NULL; port = port->p_next) { 279 if (port->p_instance == p_instance) { 280 DWARN(vswp, "%s: port instance %d already attached", 281 __func__, p_instance); 282 RW_EXIT(&plist->lockrw); 283 return (1); 284 } 285 } 286 RW_EXIT(&plist->lockrw); 287 288 port = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP); 289 port->p_vswp = vswp; 290 port->p_instance = p_instance; 291 port->p_ldclist.num_ldcs = 0; 292 port->p_ldclist.head = NULL; 293 port->addr_set = VSW_ADDR_UNSET; 294 295 rw_init(&port->p_ldclist.lockrw, NULL, RW_DRIVER, NULL); 296 297 mutex_init(&port->tx_lock, NULL, MUTEX_DRIVER, NULL); 298 mutex_init(&port->mca_lock, NULL, MUTEX_DRIVER, NULL); 299 300 mutex_init(&port->state_lock, NULL, MUTEX_DRIVER, NULL); 301 cv_init(&port->state_cv, NULL, CV_DRIVER, NULL); 302 port->state = VSW_PORT_INIT; 303 304 if (nids > VSW_PORT_MAX_LDCS) { 305 D2(vswp, "%s: using first of %d ldc ids", 306 __func__, nids); 307 nids = VSW_PORT_MAX_LDCS; 308 } 309 310 D2(vswp, "%s: %d nids", __func__, nids); 311 for (i = 0; i < nids; i++) { 312 D2(vswp, "%s: ldcid (%llx)", __func__, (uint64_t)ldcids[i]); 313 if (vsw_ldc_attach(port, (uint64_t)ldcids[i]) != 0) { 314 DERR(vswp, "%s: ldc_attach failed", __func__); 315 316 rw_destroy(&port->p_ldclist.lockrw); 317 318 cv_destroy(&port->state_cv); 319 mutex_destroy(&port->state_lock); 320 321 mutex_destroy(&port->tx_lock); 322 mutex_destroy(&port->mca_lock); 323 kmem_free(port, sizeof (vsw_port_t)); 324 return (1); 325 } 326 } 327 328 ether_copy(macaddr, &port->p_macaddr); 329 330 if (vswp->switching_setup_done == B_TRUE) { 331 /* 332 * If the underlying physical device has been setup, 333 * program the mac address of this port in it. 334 * Otherwise, port macaddr will be set after the physical 335 * device is successfully setup by the timeout handler. 336 */ 337 mutex_enter(&vswp->hw_lock); 338 (void) vsw_set_hw(vswp, port, VSW_VNETPORT); 339 mutex_exit(&vswp->hw_lock); 340 } 341 342 WRITE_ENTER(&plist->lockrw); 343 344 /* create the fdb entry for this port/mac address */ 345 (void) vsw_add_fdb(vswp, port); 346 347 /* link it into the list of ports for this vsw instance */ 348 prev_port = (vsw_port_t **)(&plist->head); 349 port->p_next = *prev_port; 350 *prev_port = port; 351 plist->num_ports++; 352 353 RW_EXIT(&plist->lockrw); 354 355 /* 356 * Initialise the port and any ldc's under it. 357 */ 358 (void) vsw_init_ldcs(port); 359 360 D1(vswp, "%s: exit", __func__); 361 return (0); 362 } 363 364 /* 365 * Detach the specified port. 366 * 367 * Returns 0 on success, 1 on failure. 368 */ 369 int 370 vsw_port_detach(vsw_t *vswp, int p_instance) 371 { 372 vsw_port_t *port = NULL; 373 vsw_port_list_t *plist = &vswp->plist; 374 375 D1(vswp, "%s: enter: port id %d", __func__, p_instance); 376 377 WRITE_ENTER(&plist->lockrw); 378 379 if ((port = vsw_lookup_port(vswp, p_instance)) == NULL) { 380 RW_EXIT(&plist->lockrw); 381 return (1); 382 } 383 384 if (vsw_plist_del_node(vswp, port)) { 385 RW_EXIT(&plist->lockrw); 386 return (1); 387 } 388 389 /* Remove the fdb entry for this port/mac address */ 390 (void) vsw_del_fdb(vswp, port); 391 392 /* Remove any multicast addresses.. */ 393 vsw_del_mcst_port(port); 394 395 /* 396 * No longer need to hold writer lock on port list now 397 * that we have unlinked the target port from the list. 398 */ 399 RW_EXIT(&plist->lockrw); 400 401 /* Remove address if was programmed into HW. */ 402 mutex_enter(&vswp->hw_lock); 403 404 /* 405 * Port's address may not have been set in hardware. This could 406 * happen if the underlying physical device is not yet available and 407 * vsw_setup_switching_timeout() may be in progress. 408 * We remove its addr from hardware only if it has been set before. 409 */ 410 if (port->addr_set != VSW_ADDR_UNSET) 411 (void) vsw_unset_hw(vswp, port, VSW_VNETPORT); 412 413 if (vswp->recfg_reqd) 414 vsw_reconfig_hw(vswp); 415 416 mutex_exit(&vswp->hw_lock); 417 418 if (vsw_port_delete(port)) { 419 return (1); 420 } 421 422 D1(vswp, "%s: exit: p_instance(%d)", __func__, p_instance); 423 return (0); 424 } 425 426 /* 427 * Detach all active ports. 428 * 429 * Returns 0 on success, 1 on failure. 430 */ 431 int 432 vsw_detach_ports(vsw_t *vswp) 433 { 434 vsw_port_list_t *plist = &vswp->plist; 435 vsw_port_t *port = NULL; 436 437 D1(vswp, "%s: enter", __func__); 438 439 WRITE_ENTER(&plist->lockrw); 440 441 while ((port = plist->head) != NULL) { 442 if (vsw_plist_del_node(vswp, port)) { 443 DERR(vswp, "%s: Error deleting port %d" 444 " from port list", __func__, port->p_instance); 445 RW_EXIT(&plist->lockrw); 446 return (1); 447 } 448 449 /* Remove address if was programmed into HW. */ 450 mutex_enter(&vswp->hw_lock); 451 (void) vsw_unset_hw(vswp, port, VSW_VNETPORT); 452 mutex_exit(&vswp->hw_lock); 453 454 /* Remove the fdb entry for this port/mac address */ 455 (void) vsw_del_fdb(vswp, port); 456 457 /* Remove any multicast addresses.. */ 458 vsw_del_mcst_port(port); 459 460 /* 461 * No longer need to hold the lock on the port list 462 * now that we have unlinked the target port from the 463 * list. 464 */ 465 RW_EXIT(&plist->lockrw); 466 if (vsw_port_delete(port)) { 467 DERR(vswp, "%s: Error deleting port %d", 468 __func__, port->p_instance); 469 return (1); 470 } 471 WRITE_ENTER(&plist->lockrw); 472 } 473 RW_EXIT(&plist->lockrw); 474 475 D1(vswp, "%s: exit", __func__); 476 477 return (0); 478 } 479 480 /* 481 * Delete the specified port. 482 * 483 * Returns 0 on success, 1 on failure. 484 */ 485 static int 486 vsw_port_delete(vsw_port_t *port) 487 { 488 vsw_ldc_list_t *ldcl; 489 vsw_t *vswp = port->p_vswp; 490 491 D1(vswp, "%s: enter : port id %d", __func__, port->p_instance); 492 493 (void) vsw_uninit_ldcs(port); 494 495 /* 496 * Wait for any pending ctrl msg tasks which reference this 497 * port to finish. 498 */ 499 if (vsw_drain_port_taskq(port)) 500 return (1); 501 502 /* 503 * Wait for port reference count to hit zero. 504 */ 505 while (port->ref_cnt != 0) { 506 delay(drv_usectohz(VSW_PORT_REF_DELAY)); 507 } 508 509 /* 510 * Wait for any active callbacks to finish 511 */ 512 if (vsw_drain_ldcs(port)) 513 return (1); 514 515 ldcl = &port->p_ldclist; 516 WRITE_ENTER(&ldcl->lockrw); 517 while (ldcl->num_ldcs > 0) { 518 if (vsw_ldc_detach(port, ldcl->head->ldc_id) != 0) { 519 cmn_err(CE_WARN, "!vsw%d: unable to detach ldc %ld", 520 vswp->instance, ldcl->head->ldc_id); 521 RW_EXIT(&ldcl->lockrw); 522 return (1); 523 } 524 } 525 RW_EXIT(&ldcl->lockrw); 526 527 rw_destroy(&port->p_ldclist.lockrw); 528 529 mutex_destroy(&port->mca_lock); 530 mutex_destroy(&port->tx_lock); 531 cv_destroy(&port->state_cv); 532 mutex_destroy(&port->state_lock); 533 534 kmem_free(port, sizeof (vsw_port_t)); 535 536 D1(vswp, "%s: exit", __func__); 537 538 return (0); 539 } 540 541 /* 542 * Attach a logical domain channel (ldc) under a specified port. 543 * 544 * Returns 0 on success, 1 on failure. 545 */ 546 static int 547 vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id) 548 { 549 vsw_t *vswp = port->p_vswp; 550 vsw_ldc_list_t *ldcl = &port->p_ldclist; 551 vsw_ldc_t *ldcp = NULL; 552 ldc_attr_t attr; 553 ldc_status_t istatus; 554 int status = DDI_FAILURE; 555 int rv; 556 char kname[MAXNAMELEN]; 557 enum { PROG_init = 0x0, PROG_mblks = 0x1, 558 PROG_callback = 0x2, PROG_rx_thread = 0x4, 559 PROG_tx_thread = 0x8} 560 progress; 561 562 progress = PROG_init; 563 564 D1(vswp, "%s: enter", __func__); 565 566 ldcp = kmem_zalloc(sizeof (vsw_ldc_t), KM_NOSLEEP); 567 if (ldcp == NULL) { 568 DERR(vswp, "%s: kmem_zalloc failed", __func__); 569 return (1); 570 } 571 ldcp->ldc_id = ldc_id; 572 573 /* Allocate pools of receive mblks */ 574 rv = vio_init_multipools(&ldcp->vmp, VSW_NUM_VMPOOLS, 575 vsw_mblk_size1, vsw_mblk_size2, vsw_mblk_size3, 576 vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3); 577 if (rv) { 578 DWARN(vswp, "%s: unable to create free mblk pools for" 579 " channel %ld (rv %d)", __func__, ldc_id, rv); 580 kmem_free(ldcp, sizeof (vsw_ldc_t)); 581 return (1); 582 } 583 584 progress |= PROG_mblks; 585 586 mutex_init(&ldcp->ldc_txlock, NULL, MUTEX_DRIVER, NULL); 587 mutex_init(&ldcp->ldc_rxlock, NULL, MUTEX_DRIVER, NULL); 588 mutex_init(&ldcp->ldc_cblock, NULL, MUTEX_DRIVER, NULL); 589 mutex_init(&ldcp->drain_cv_lock, NULL, MUTEX_DRIVER, NULL); 590 cv_init(&ldcp->drain_cv, NULL, CV_DRIVER, NULL); 591 rw_init(&ldcp->lane_in.dlistrw, NULL, RW_DRIVER, NULL); 592 rw_init(&ldcp->lane_out.dlistrw, NULL, RW_DRIVER, NULL); 593 594 /* required for handshake with peer */ 595 ldcp->local_session = (uint64_t)ddi_get_lbolt(); 596 ldcp->peer_session = 0; 597 ldcp->session_status = 0; 598 ldcp->hss_id = 1; /* Initial handshake session id */ 599 600 /* only set for outbound lane, inbound set by peer */ 601 vsw_set_lane_attr(vswp, &ldcp->lane_out); 602 603 attr.devclass = LDC_DEV_NT_SVC; 604 attr.instance = ddi_get_instance(vswp->dip); 605 attr.mode = LDC_MODE_UNRELIABLE; 606 attr.mtu = VSW_LDC_MTU; 607 status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle); 608 if (status != 0) { 609 DERR(vswp, "%s(%lld): ldc_init failed, rv (%d)", 610 __func__, ldc_id, status); 611 goto ldc_attach_fail; 612 } 613 614 if (vsw_ldc_rxthr_enabled) { 615 ldcp->rx_thr_flags = 0; 616 617 mutex_init(&ldcp->rx_thr_lock, NULL, MUTEX_DRIVER, NULL); 618 cv_init(&ldcp->rx_thr_cv, NULL, CV_DRIVER, NULL); 619 ldcp->rx_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 620 vsw_ldc_rx_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 621 622 progress |= PROG_rx_thread; 623 if (ldcp->rx_thread == NULL) { 624 DWARN(vswp, "%s(%lld): Failed to create worker thread", 625 __func__, ldc_id); 626 goto ldc_attach_fail; 627 } 628 } 629 630 if (vsw_ldc_txthr_enabled) { 631 ldcp->tx_thr_flags = 0; 632 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 633 634 mutex_init(&ldcp->tx_thr_lock, NULL, MUTEX_DRIVER, NULL); 635 cv_init(&ldcp->tx_thr_cv, NULL, CV_DRIVER, NULL); 636 ldcp->tx_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 637 vsw_ldc_tx_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 638 639 progress |= PROG_tx_thread; 640 if (ldcp->tx_thread == NULL) { 641 DWARN(vswp, "%s(%lld): Failed to create worker thread", 642 __func__, ldc_id); 643 goto ldc_attach_fail; 644 } 645 } 646 647 status = ldc_reg_callback(ldcp->ldc_handle, vsw_ldc_cb, (caddr_t)ldcp); 648 if (status != 0) { 649 DERR(vswp, "%s(%lld): ldc_reg_callback failed, rv (%d)", 650 __func__, ldc_id, status); 651 (void) ldc_fini(ldcp->ldc_handle); 652 goto ldc_attach_fail; 653 } 654 655 progress |= PROG_callback; 656 657 mutex_init(&ldcp->status_lock, NULL, MUTEX_DRIVER, NULL); 658 659 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 660 DERR(vswp, "%s: ldc_status failed", __func__); 661 mutex_destroy(&ldcp->status_lock); 662 goto ldc_attach_fail; 663 } 664 665 ldcp->ldc_status = istatus; 666 ldcp->ldc_port = port; 667 ldcp->ldc_vswp = vswp; 668 669 (void) sprintf(kname, "%sldc0x%lx", DRV_NAME, ldcp->ldc_id); 670 ldcp->ksp = vgen_setup_kstats(DRV_NAME, vswp->instance, 671 kname, &ldcp->ldc_stats); 672 if (ldcp->ksp == NULL) { 673 DERR(vswp, "%s: kstats setup failed", __func__); 674 goto ldc_attach_fail; 675 } 676 677 /* link it into the list of channels for this port */ 678 WRITE_ENTER(&ldcl->lockrw); 679 ldcp->ldc_next = ldcl->head; 680 ldcl->head = ldcp; 681 ldcl->num_ldcs++; 682 RW_EXIT(&ldcl->lockrw); 683 684 D1(vswp, "%s: exit", __func__); 685 return (0); 686 687 ldc_attach_fail: 688 689 if (progress & PROG_callback) { 690 (void) ldc_unreg_callback(ldcp->ldc_handle); 691 } 692 693 if (progress & PROG_rx_thread) { 694 if (ldcp->rx_thread != NULL) { 695 vsw_stop_rx_thread(ldcp); 696 } 697 mutex_destroy(&ldcp->rx_thr_lock); 698 cv_destroy(&ldcp->rx_thr_cv); 699 } 700 701 if (progress & PROG_tx_thread) { 702 if (ldcp->tx_thread != NULL) { 703 vsw_stop_tx_thread(ldcp); 704 } 705 mutex_destroy(&ldcp->tx_thr_lock); 706 cv_destroy(&ldcp->tx_thr_cv); 707 } 708 if (ldcp->ksp != NULL) { 709 vgen_destroy_kstats(ldcp->ksp); 710 } 711 mutex_destroy(&ldcp->ldc_txlock); 712 mutex_destroy(&ldcp->ldc_rxlock); 713 mutex_destroy(&ldcp->ldc_cblock); 714 mutex_destroy(&ldcp->drain_cv_lock); 715 716 cv_destroy(&ldcp->drain_cv); 717 718 rw_destroy(&ldcp->lane_in.dlistrw); 719 rw_destroy(&ldcp->lane_out.dlistrw); 720 721 if (progress & PROG_mblks) { 722 vio_destroy_multipools(&ldcp->vmp, &vswp->rxh); 723 } 724 kmem_free(ldcp, sizeof (vsw_ldc_t)); 725 726 return (1); 727 } 728 729 /* 730 * Detach a logical domain channel (ldc) belonging to a 731 * particular port. 732 * 733 * Returns 0 on success, 1 on failure. 734 */ 735 static int 736 vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id) 737 { 738 vsw_t *vswp = port->p_vswp; 739 vsw_ldc_t *ldcp, *prev_ldcp; 740 vsw_ldc_list_t *ldcl = &port->p_ldclist; 741 int rv; 742 743 prev_ldcp = ldcl->head; 744 for (; (ldcp = prev_ldcp) != NULL; prev_ldcp = ldcp->ldc_next) { 745 if (ldcp->ldc_id == ldc_id) { 746 break; 747 } 748 } 749 750 /* specified ldc id not found */ 751 if (ldcp == NULL) { 752 DERR(vswp, "%s: ldcp = NULL", __func__); 753 return (1); 754 } 755 756 D2(vswp, "%s: detaching channel %lld", __func__, ldcp->ldc_id); 757 758 /* Stop the receive thread */ 759 if (ldcp->rx_thread != NULL) { 760 vsw_stop_rx_thread(ldcp); 761 mutex_destroy(&ldcp->rx_thr_lock); 762 cv_destroy(&ldcp->rx_thr_cv); 763 } 764 765 /* Stop the tx thread */ 766 if (ldcp->tx_thread != NULL) { 767 vsw_stop_tx_thread(ldcp); 768 mutex_destroy(&ldcp->tx_thr_lock); 769 cv_destroy(&ldcp->tx_thr_cv); 770 if (ldcp->tx_mhead != NULL) { 771 freemsgchain(ldcp->tx_mhead); 772 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 773 } 774 } 775 776 /* Destory kstats */ 777 vgen_destroy_kstats(ldcp->ksp); 778 779 /* 780 * Before we can close the channel we must release any mapped 781 * resources (e.g. drings). 782 */ 783 vsw_free_lane_resources(ldcp, INBOUND); 784 vsw_free_lane_resources(ldcp, OUTBOUND); 785 786 /* 787 * If the close fails we are in serious trouble, as won't 788 * be able to delete the parent port. 789 */ 790 if ((rv = ldc_close(ldcp->ldc_handle)) != 0) { 791 DERR(vswp, "%s: error %d closing channel %lld", 792 __func__, rv, ldcp->ldc_id); 793 return (1); 794 } 795 796 (void) ldc_fini(ldcp->ldc_handle); 797 798 ldcp->ldc_status = LDC_INIT; 799 ldcp->ldc_handle = NULL; 800 ldcp->ldc_vswp = NULL; 801 802 803 /* 804 * Most likely some mblks are still in use and 805 * have not been returned to the pool. These mblks are 806 * added to the pool that is maintained in the device instance. 807 * Another attempt will be made to destroy the pool 808 * when the device detaches. 809 */ 810 vio_destroy_multipools(&ldcp->vmp, &vswp->rxh); 811 812 /* unlink it from the list */ 813 prev_ldcp = ldcp->ldc_next; 814 ldcl->num_ldcs--; 815 816 mutex_destroy(&ldcp->ldc_txlock); 817 mutex_destroy(&ldcp->ldc_rxlock); 818 mutex_destroy(&ldcp->ldc_cblock); 819 cv_destroy(&ldcp->drain_cv); 820 mutex_destroy(&ldcp->drain_cv_lock); 821 mutex_destroy(&ldcp->status_lock); 822 rw_destroy(&ldcp->lane_in.dlistrw); 823 rw_destroy(&ldcp->lane_out.dlistrw); 824 825 kmem_free(ldcp, sizeof (vsw_ldc_t)); 826 827 return (0); 828 } 829 830 /* 831 * Open and attempt to bring up the channel. Note that channel 832 * can only be brought up if peer has also opened channel. 833 * 834 * Returns 0 if can open and bring up channel, otherwise 835 * returns 1. 836 */ 837 static int 838 vsw_ldc_init(vsw_ldc_t *ldcp) 839 { 840 vsw_t *vswp = ldcp->ldc_vswp; 841 ldc_status_t istatus = 0; 842 int rv; 843 844 D1(vswp, "%s: enter", __func__); 845 846 LDC_ENTER_LOCK(ldcp); 847 848 /* don't start at 0 in case clients don't like that */ 849 ldcp->next_ident = 1; 850 851 rv = ldc_open(ldcp->ldc_handle); 852 if (rv != 0) { 853 DERR(vswp, "%s: ldc_open failed: id(%lld) rv(%d)", 854 __func__, ldcp->ldc_id, rv); 855 LDC_EXIT_LOCK(ldcp); 856 return (1); 857 } 858 859 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 860 DERR(vswp, "%s: unable to get status", __func__); 861 LDC_EXIT_LOCK(ldcp); 862 return (1); 863 864 } else if (istatus != LDC_OPEN && istatus != LDC_READY) { 865 DERR(vswp, "%s: id (%lld) status(%d) is not OPEN/READY", 866 __func__, ldcp->ldc_id, istatus); 867 LDC_EXIT_LOCK(ldcp); 868 return (1); 869 } 870 871 mutex_enter(&ldcp->status_lock); 872 ldcp->ldc_status = istatus; 873 mutex_exit(&ldcp->status_lock); 874 875 rv = ldc_up(ldcp->ldc_handle); 876 if (rv != 0) { 877 /* 878 * Not a fatal error for ldc_up() to fail, as peer 879 * end point may simply not be ready yet. 880 */ 881 D2(vswp, "%s: ldc_up err id(%lld) rv(%d)", __func__, 882 ldcp->ldc_id, rv); 883 LDC_EXIT_LOCK(ldcp); 884 return (1); 885 } 886 887 /* 888 * ldc_up() call is non-blocking so need to explicitly 889 * check channel status to see if in fact the channel 890 * is UP. 891 */ 892 mutex_enter(&ldcp->status_lock); 893 if (ldc_status(ldcp->ldc_handle, &ldcp->ldc_status) != 0) { 894 DERR(vswp, "%s: unable to get status", __func__); 895 mutex_exit(&ldcp->status_lock); 896 LDC_EXIT_LOCK(ldcp); 897 return (1); 898 899 } 900 901 if (ldcp->ldc_status == LDC_UP) { 902 D2(vswp, "%s: channel %ld now UP (%ld)", __func__, 903 ldcp->ldc_id, istatus); 904 mutex_exit(&ldcp->status_lock); 905 LDC_EXIT_LOCK(ldcp); 906 907 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 908 return (0); 909 } 910 911 mutex_exit(&ldcp->status_lock); 912 LDC_EXIT_LOCK(ldcp); 913 914 D1(vswp, "%s: exit", __func__); 915 return (0); 916 } 917 918 /* disable callbacks on the channel */ 919 static int 920 vsw_ldc_uninit(vsw_ldc_t *ldcp) 921 { 922 vsw_t *vswp = ldcp->ldc_vswp; 923 int rv; 924 925 D1(vswp, "vsw_ldc_uninit: enter: id(%lx)\n", ldcp->ldc_id); 926 927 LDC_ENTER_LOCK(ldcp); 928 929 rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE); 930 if (rv != 0) { 931 DERR(vswp, "vsw_ldc_uninit(%lld): error disabling " 932 "interrupts (rv = %d)\n", ldcp->ldc_id, rv); 933 LDC_EXIT_LOCK(ldcp); 934 return (1); 935 } 936 937 mutex_enter(&ldcp->status_lock); 938 ldcp->ldc_status = LDC_INIT; 939 mutex_exit(&ldcp->status_lock); 940 941 LDC_EXIT_LOCK(ldcp); 942 943 D1(vswp, "vsw_ldc_uninit: exit: id(%lx)", ldcp->ldc_id); 944 945 return (0); 946 } 947 948 static int 949 vsw_init_ldcs(vsw_port_t *port) 950 { 951 vsw_ldc_list_t *ldcl = &port->p_ldclist; 952 vsw_ldc_t *ldcp; 953 954 READ_ENTER(&ldcl->lockrw); 955 ldcp = ldcl->head; 956 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 957 (void) vsw_ldc_init(ldcp); 958 } 959 RW_EXIT(&ldcl->lockrw); 960 961 return (0); 962 } 963 964 static int 965 vsw_uninit_ldcs(vsw_port_t *port) 966 { 967 vsw_ldc_list_t *ldcl = &port->p_ldclist; 968 vsw_ldc_t *ldcp; 969 970 D1(NULL, "vsw_uninit_ldcs: enter\n"); 971 972 READ_ENTER(&ldcl->lockrw); 973 ldcp = ldcl->head; 974 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 975 (void) vsw_ldc_uninit(ldcp); 976 } 977 RW_EXIT(&ldcl->lockrw); 978 979 D1(NULL, "vsw_uninit_ldcs: exit\n"); 980 981 return (0); 982 } 983 984 /* 985 * Wait until the callback(s) associated with the ldcs under the specified 986 * port have completed. 987 * 988 * Prior to this function being invoked each channel under this port 989 * should have been quiesced via ldc_set_cb_mode(DISABLE). 990 * 991 * A short explaination of what we are doing below.. 992 * 993 * The simplest approach would be to have a reference counter in 994 * the ldc structure which is increment/decremented by the callbacks as 995 * they use the channel. The drain function could then simply disable any 996 * further callbacks and do a cv_wait for the ref to hit zero. Unfortunately 997 * there is a tiny window here - before the callback is able to get the lock 998 * on the channel it is interrupted and this function gets to execute. It 999 * sees that the ref count is zero and believes its free to delete the 1000 * associated data structures. 1001 * 1002 * We get around this by taking advantage of the fact that before the ldc 1003 * framework invokes a callback it sets a flag to indicate that there is a 1004 * callback active (or about to become active). If when we attempt to 1005 * unregister a callback when this active flag is set then the unregister 1006 * will fail with EWOULDBLOCK. 1007 * 1008 * If the unregister fails we do a cv_timedwait. We will either be signaled 1009 * by the callback as it is exiting (note we have to wait a short period to 1010 * allow the callback to return fully to the ldc framework and it to clear 1011 * the active flag), or by the timer expiring. In either case we again attempt 1012 * the unregister. We repeat this until we can succesfully unregister the 1013 * callback. 1014 * 1015 * The reason we use a cv_timedwait rather than a simple cv_wait is to catch 1016 * the case where the callback has finished but the ldc framework has not yet 1017 * cleared the active flag. In this case we would never get a cv_signal. 1018 */ 1019 static int 1020 vsw_drain_ldcs(vsw_port_t *port) 1021 { 1022 vsw_ldc_list_t *ldcl = &port->p_ldclist; 1023 vsw_ldc_t *ldcp; 1024 vsw_t *vswp = port->p_vswp; 1025 1026 D1(vswp, "%s: enter", __func__); 1027 1028 READ_ENTER(&ldcl->lockrw); 1029 1030 ldcp = ldcl->head; 1031 1032 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 1033 /* 1034 * If we can unregister the channel callback then we 1035 * know that there is no callback either running or 1036 * scheduled to run for this channel so move on to next 1037 * channel in the list. 1038 */ 1039 mutex_enter(&ldcp->drain_cv_lock); 1040 1041 /* prompt active callbacks to quit */ 1042 ldcp->drain_state = VSW_LDC_DRAINING; 1043 1044 if ((ldc_unreg_callback(ldcp->ldc_handle)) == 0) { 1045 D2(vswp, "%s: unreg callback for chan %ld", __func__, 1046 ldcp->ldc_id); 1047 mutex_exit(&ldcp->drain_cv_lock); 1048 continue; 1049 } else { 1050 /* 1051 * If we end up here we know that either 1) a callback 1052 * is currently executing, 2) is about to start (i.e. 1053 * the ldc framework has set the active flag but 1054 * has not actually invoked the callback yet, or 3) 1055 * has finished and has returned to the ldc framework 1056 * but the ldc framework has not yet cleared the 1057 * active bit. 1058 * 1059 * Wait for it to finish. 1060 */ 1061 while (ldc_unreg_callback(ldcp->ldc_handle) 1062 == EWOULDBLOCK) 1063 (void) cv_timedwait(&ldcp->drain_cv, 1064 &ldcp->drain_cv_lock, lbolt + hz); 1065 1066 mutex_exit(&ldcp->drain_cv_lock); 1067 D2(vswp, "%s: unreg callback for chan %ld after " 1068 "timeout", __func__, ldcp->ldc_id); 1069 } 1070 } 1071 RW_EXIT(&ldcl->lockrw); 1072 1073 D1(vswp, "%s: exit", __func__); 1074 return (0); 1075 } 1076 1077 /* 1078 * Wait until all tasks which reference this port have completed. 1079 * 1080 * Prior to this function being invoked each channel under this port 1081 * should have been quiesced via ldc_set_cb_mode(DISABLE). 1082 */ 1083 static int 1084 vsw_drain_port_taskq(vsw_port_t *port) 1085 { 1086 vsw_t *vswp = port->p_vswp; 1087 1088 D1(vswp, "%s: enter", __func__); 1089 1090 /* 1091 * Mark the port as in the process of being detached, and 1092 * dispatch a marker task to the queue so we know when all 1093 * relevant tasks have completed. 1094 */ 1095 mutex_enter(&port->state_lock); 1096 port->state = VSW_PORT_DETACHING; 1097 1098 if ((vswp->taskq_p == NULL) || 1099 (ddi_taskq_dispatch(vswp->taskq_p, vsw_marker_task, 1100 port, DDI_NOSLEEP) != DDI_SUCCESS)) { 1101 DERR(vswp, "%s: unable to dispatch marker task", 1102 __func__); 1103 mutex_exit(&port->state_lock); 1104 return (1); 1105 } 1106 1107 /* 1108 * Wait for the marker task to finish. 1109 */ 1110 while (port->state != VSW_PORT_DETACHABLE) 1111 cv_wait(&port->state_cv, &port->state_lock); 1112 1113 mutex_exit(&port->state_lock); 1114 1115 D1(vswp, "%s: exit", __func__); 1116 1117 return (0); 1118 } 1119 1120 static void 1121 vsw_marker_task(void *arg) 1122 { 1123 vsw_port_t *port = arg; 1124 vsw_t *vswp = port->p_vswp; 1125 1126 D1(vswp, "%s: enter", __func__); 1127 1128 mutex_enter(&port->state_lock); 1129 1130 /* 1131 * No further tasks should be dispatched which reference 1132 * this port so ok to mark it as safe to detach. 1133 */ 1134 port->state = VSW_PORT_DETACHABLE; 1135 1136 cv_signal(&port->state_cv); 1137 1138 mutex_exit(&port->state_lock); 1139 1140 D1(vswp, "%s: exit", __func__); 1141 } 1142 1143 vsw_port_t * 1144 vsw_lookup_port(vsw_t *vswp, int p_instance) 1145 { 1146 vsw_port_list_t *plist = &vswp->plist; 1147 vsw_port_t *port; 1148 1149 for (port = plist->head; port != NULL; port = port->p_next) { 1150 if (port->p_instance == p_instance) { 1151 D2(vswp, "vsw_lookup_port: found p_instance\n"); 1152 return (port); 1153 } 1154 } 1155 1156 return (NULL); 1157 } 1158 1159 /* 1160 * Search for and remove the specified port from the port 1161 * list. Returns 0 if able to locate and remove port, otherwise 1162 * returns 1. 1163 */ 1164 static int 1165 vsw_plist_del_node(vsw_t *vswp, vsw_port_t *port) 1166 { 1167 vsw_port_list_t *plist = &vswp->plist; 1168 vsw_port_t *curr_p, *prev_p; 1169 1170 if (plist->head == NULL) 1171 return (1); 1172 1173 curr_p = prev_p = plist->head; 1174 1175 while (curr_p != NULL) { 1176 if (curr_p == port) { 1177 if (prev_p == curr_p) { 1178 plist->head = curr_p->p_next; 1179 } else { 1180 prev_p->p_next = curr_p->p_next; 1181 } 1182 plist->num_ports--; 1183 break; 1184 } else { 1185 prev_p = curr_p; 1186 curr_p = curr_p->p_next; 1187 } 1188 } 1189 return (0); 1190 } 1191 1192 /* 1193 * Interrupt handler for ldc messages. 1194 */ 1195 static uint_t 1196 vsw_ldc_cb(uint64_t event, caddr_t arg) 1197 { 1198 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 1199 vsw_t *vswp = ldcp->ldc_vswp; 1200 1201 D1(vswp, "%s: enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 1202 1203 mutex_enter(&ldcp->ldc_cblock); 1204 ldcp->ldc_stats.callbacks++; 1205 1206 mutex_enter(&ldcp->status_lock); 1207 if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) { 1208 mutex_exit(&ldcp->status_lock); 1209 mutex_exit(&ldcp->ldc_cblock); 1210 return (LDC_SUCCESS); 1211 } 1212 mutex_exit(&ldcp->status_lock); 1213 1214 if (event & LDC_EVT_UP) { 1215 /* 1216 * Channel has come up. 1217 */ 1218 D2(vswp, "%s: id(%ld) event(%llx) UP: status(%ld)", 1219 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1220 1221 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1222 1223 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 1224 } 1225 1226 if (event & LDC_EVT_READ) { 1227 /* 1228 * Data available for reading. 1229 */ 1230 D2(vswp, "%s: id(ld) event(%llx) data READ", 1231 __func__, ldcp->ldc_id, event); 1232 1233 if (ldcp->rx_thread != NULL) { 1234 /* 1235 * If the receive thread is enabled, then 1236 * wakeup the receive thread to process the 1237 * LDC messages. 1238 */ 1239 mutex_exit(&ldcp->ldc_cblock); 1240 mutex_enter(&ldcp->rx_thr_lock); 1241 if (!(ldcp->rx_thr_flags & VSW_WTHR_DATARCVD)) { 1242 ldcp->rx_thr_flags |= VSW_WTHR_DATARCVD; 1243 cv_signal(&ldcp->rx_thr_cv); 1244 } 1245 mutex_exit(&ldcp->rx_thr_lock); 1246 mutex_enter(&ldcp->ldc_cblock); 1247 } else { 1248 vsw_process_pkt(ldcp); 1249 } 1250 1251 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 1252 1253 goto vsw_cb_exit; 1254 } 1255 1256 if (event & (LDC_EVT_DOWN | LDC_EVT_RESET)) { 1257 D2(vswp, "%s: id(%ld) event (%lx) DOWN/RESET: status(%ld)", 1258 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1259 1260 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 1261 } 1262 1263 /* 1264 * Catch either LDC_EVT_WRITE which we don't support or any 1265 * unknown event. 1266 */ 1267 if (event & 1268 ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) { 1269 DERR(vswp, "%s: id(%ld) Unexpected event=(%llx) status(%ld)", 1270 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1271 } 1272 1273 vsw_cb_exit: 1274 mutex_exit(&ldcp->ldc_cblock); 1275 1276 /* 1277 * Let the drain function know we are finishing if it 1278 * is waiting. 1279 */ 1280 mutex_enter(&ldcp->drain_cv_lock); 1281 if (ldcp->drain_state == VSW_LDC_DRAINING) 1282 cv_signal(&ldcp->drain_cv); 1283 mutex_exit(&ldcp->drain_cv_lock); 1284 1285 return (LDC_SUCCESS); 1286 } 1287 1288 /* 1289 * Reinitialise data structures associated with the channel. 1290 */ 1291 static void 1292 vsw_ldc_reinit(vsw_ldc_t *ldcp) 1293 { 1294 vsw_t *vswp = ldcp->ldc_vswp; 1295 vsw_port_t *port; 1296 vsw_ldc_list_t *ldcl; 1297 1298 D1(vswp, "%s: enter", __func__); 1299 1300 port = ldcp->ldc_port; 1301 ldcl = &port->p_ldclist; 1302 1303 READ_ENTER(&ldcl->lockrw); 1304 1305 D2(vswp, "%s: in 0x%llx : out 0x%llx", __func__, 1306 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 1307 1308 vsw_free_lane_resources(ldcp, INBOUND); 1309 vsw_free_lane_resources(ldcp, OUTBOUND); 1310 RW_EXIT(&ldcl->lockrw); 1311 1312 ldcp->lane_in.lstate = 0; 1313 ldcp->lane_out.lstate = 0; 1314 1315 /* 1316 * Remove parent port from any multicast groups 1317 * it may have registered with. Client must resend 1318 * multicast add command after handshake completes. 1319 */ 1320 (void) vsw_del_fdb(vswp, port); 1321 1322 vsw_del_mcst_port(port); 1323 1324 ldcp->peer_session = 0; 1325 ldcp->session_status = 0; 1326 ldcp->hcnt = 0; 1327 ldcp->hphase = VSW_MILESTONE0; 1328 ldcp->tx_failures = 0; 1329 1330 D1(vswp, "%s: exit", __func__); 1331 } 1332 1333 /* 1334 * Process a connection event. 1335 * 1336 * Note - care must be taken to ensure that this function is 1337 * not called with the dlistrw lock held. 1338 */ 1339 static void 1340 vsw_process_conn_evt(vsw_ldc_t *ldcp, uint16_t evt) 1341 { 1342 vsw_t *vswp = ldcp->ldc_vswp; 1343 vsw_conn_evt_t *conn = NULL; 1344 1345 D1(vswp, "%s: enter", __func__); 1346 1347 /* 1348 * Check if either a reset or restart event is pending 1349 * or in progress. If so just return. 1350 * 1351 * A VSW_CONN_RESET event originates either with a LDC_RESET_EVT 1352 * being received by the callback handler, or a ECONNRESET error 1353 * code being returned from a ldc_read() or ldc_write() call. 1354 * 1355 * A VSW_CONN_RESTART event occurs when some error checking code 1356 * decides that there is a problem with data from the channel, 1357 * and that the handshake should be restarted. 1358 */ 1359 if (((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) && 1360 (ldstub((uint8_t *)&ldcp->reset_active))) 1361 return; 1362 1363 /* 1364 * If it is an LDC_UP event we first check the recorded 1365 * state of the channel. If this is UP then we know that 1366 * the channel moving to the UP state has already been dealt 1367 * with and don't need to dispatch a new task. 1368 * 1369 * The reason for this check is that when we do a ldc_up(), 1370 * depending on the state of the peer, we may or may not get 1371 * a LDC_UP event. As we can't depend on getting a LDC_UP evt 1372 * every time we do ldc_up() we explicitly check the channel 1373 * status to see has it come up (ldc_up() is asynch and will 1374 * complete at some undefined time), and take the appropriate 1375 * action. 1376 * 1377 * The flip side of this is that we may get a LDC_UP event 1378 * when we have already seen that the channel is up and have 1379 * dealt with that. 1380 */ 1381 mutex_enter(&ldcp->status_lock); 1382 if (evt == VSW_CONN_UP) { 1383 if ((ldcp->ldc_status == LDC_UP) || (ldcp->reset_active != 0)) { 1384 mutex_exit(&ldcp->status_lock); 1385 return; 1386 } 1387 } 1388 mutex_exit(&ldcp->status_lock); 1389 1390 /* 1391 * The transaction group id allows us to identify and discard 1392 * any tasks which are still pending on the taskq and refer 1393 * to the handshake session we are about to restart or reset. 1394 * These stale messages no longer have any real meaning. 1395 */ 1396 (void) atomic_inc_32(&ldcp->hss_id); 1397 1398 ASSERT(vswp->taskq_p != NULL); 1399 1400 if ((conn = kmem_zalloc(sizeof (vsw_conn_evt_t), KM_NOSLEEP)) == NULL) { 1401 cmn_err(CE_WARN, "!vsw%d: unable to allocate memory for" 1402 " connection event", vswp->instance); 1403 goto err_exit; 1404 } 1405 1406 conn->evt = evt; 1407 conn->ldcp = ldcp; 1408 1409 if (ddi_taskq_dispatch(vswp->taskq_p, vsw_conn_task, conn, 1410 DDI_NOSLEEP) != DDI_SUCCESS) { 1411 cmn_err(CE_WARN, "!vsw%d: Can't dispatch connection task", 1412 vswp->instance); 1413 1414 kmem_free(conn, sizeof (vsw_conn_evt_t)); 1415 goto err_exit; 1416 } 1417 1418 D1(vswp, "%s: exit", __func__); 1419 return; 1420 1421 err_exit: 1422 /* 1423 * Have mostly likely failed due to memory shortage. Clear the flag so 1424 * that future requests will at least be attempted and will hopefully 1425 * succeed. 1426 */ 1427 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 1428 ldcp->reset_active = 0; 1429 } 1430 1431 /* 1432 * Deal with events relating to a connection. Invoked from a taskq. 1433 */ 1434 static void 1435 vsw_conn_task(void *arg) 1436 { 1437 vsw_conn_evt_t *conn = (vsw_conn_evt_t *)arg; 1438 vsw_ldc_t *ldcp = NULL; 1439 vsw_t *vswp = NULL; 1440 uint16_t evt; 1441 ldc_status_t curr_status; 1442 1443 ldcp = conn->ldcp; 1444 evt = conn->evt; 1445 vswp = ldcp->ldc_vswp; 1446 1447 D1(vswp, "%s: enter", __func__); 1448 1449 /* can safely free now have copied out data */ 1450 kmem_free(conn, sizeof (vsw_conn_evt_t)); 1451 1452 mutex_enter(&ldcp->status_lock); 1453 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 1454 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 1455 "channel %ld", vswp->instance, ldcp->ldc_id); 1456 mutex_exit(&ldcp->status_lock); 1457 return; 1458 } 1459 1460 /* 1461 * If we wish to restart the handshake on this channel, then if 1462 * the channel is UP we bring it DOWN to flush the underlying 1463 * ldc queue. 1464 */ 1465 if ((evt == VSW_CONN_RESTART) && (curr_status == LDC_UP)) 1466 (void) ldc_down(ldcp->ldc_handle); 1467 1468 /* 1469 * re-init all the associated data structures. 1470 */ 1471 vsw_ldc_reinit(ldcp); 1472 1473 /* 1474 * Bring the channel back up (note it does no harm to 1475 * do this even if the channel is already UP, Just 1476 * becomes effectively a no-op). 1477 */ 1478 (void) ldc_up(ldcp->ldc_handle); 1479 1480 /* 1481 * Check if channel is now UP. This will only happen if 1482 * peer has also done a ldc_up(). 1483 */ 1484 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 1485 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 1486 "channel %ld", vswp->instance, ldcp->ldc_id); 1487 mutex_exit(&ldcp->status_lock); 1488 return; 1489 } 1490 1491 ldcp->ldc_status = curr_status; 1492 1493 /* channel UP so restart handshake by sending version info */ 1494 if (curr_status == LDC_UP) { 1495 if (ldcp->hcnt++ > vsw_num_handshakes) { 1496 cmn_err(CE_WARN, "!vsw%d: exceeded number of permitted" 1497 " handshake attempts (%d) on channel %ld", 1498 vswp->instance, ldcp->hcnt, ldcp->ldc_id); 1499 mutex_exit(&ldcp->status_lock); 1500 return; 1501 } 1502 1503 if (ddi_taskq_dispatch(vswp->taskq_p, vsw_send_ver, ldcp, 1504 DDI_NOSLEEP) != DDI_SUCCESS) { 1505 cmn_err(CE_WARN, "!vsw%d: Can't dispatch version task", 1506 vswp->instance); 1507 1508 /* 1509 * Don't count as valid restart attempt if couldn't 1510 * send version msg. 1511 */ 1512 if (ldcp->hcnt > 0) 1513 ldcp->hcnt--; 1514 } 1515 } 1516 1517 /* 1518 * Mark that the process is complete by clearing the flag. 1519 * 1520 * Note is it possible that the taskq dispatch above may have failed, 1521 * most likely due to memory shortage. We still clear the flag so 1522 * future attempts will at least be attempted and will hopefully 1523 * succeed. 1524 */ 1525 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 1526 ldcp->reset_active = 0; 1527 1528 mutex_exit(&ldcp->status_lock); 1529 1530 D1(vswp, "%s: exit", __func__); 1531 } 1532 1533 /* 1534 * returns 0 if legal for event signified by flag to have 1535 * occured at the time it did. Otherwise returns 1. 1536 */ 1537 int 1538 vsw_check_flag(vsw_ldc_t *ldcp, int dir, uint64_t flag) 1539 { 1540 vsw_t *vswp = ldcp->ldc_vswp; 1541 uint64_t state; 1542 uint64_t phase; 1543 1544 if (dir == INBOUND) 1545 state = ldcp->lane_in.lstate; 1546 else 1547 state = ldcp->lane_out.lstate; 1548 1549 phase = ldcp->hphase; 1550 1551 switch (flag) { 1552 case VSW_VER_INFO_RECV: 1553 if (phase > VSW_MILESTONE0) { 1554 DERR(vswp, "vsw_check_flag (%d): VER_INFO_RECV" 1555 " when in state %d\n", ldcp->ldc_id, phase); 1556 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1557 return (1); 1558 } 1559 break; 1560 1561 case VSW_VER_ACK_RECV: 1562 case VSW_VER_NACK_RECV: 1563 if (!(state & VSW_VER_INFO_SENT)) { 1564 DERR(vswp, "vsw_check_flag (%d): spurious VER_ACK or " 1565 "VER_NACK when in state %d\n", ldcp->ldc_id, phase); 1566 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1567 return (1); 1568 } else 1569 state &= ~VSW_VER_INFO_SENT; 1570 break; 1571 1572 case VSW_ATTR_INFO_RECV: 1573 if ((phase < VSW_MILESTONE1) || (phase >= VSW_MILESTONE2)) { 1574 DERR(vswp, "vsw_check_flag (%d): ATTR_INFO_RECV" 1575 " when in state %d\n", ldcp->ldc_id, phase); 1576 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1577 return (1); 1578 } 1579 break; 1580 1581 case VSW_ATTR_ACK_RECV: 1582 case VSW_ATTR_NACK_RECV: 1583 if (!(state & VSW_ATTR_INFO_SENT)) { 1584 DERR(vswp, "vsw_check_flag (%d): spurious ATTR_ACK" 1585 " or ATTR_NACK when in state %d\n", 1586 ldcp->ldc_id, phase); 1587 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1588 return (1); 1589 } else 1590 state &= ~VSW_ATTR_INFO_SENT; 1591 break; 1592 1593 case VSW_DRING_INFO_RECV: 1594 if (phase < VSW_MILESTONE1) { 1595 DERR(vswp, "vsw_check_flag (%d): DRING_INFO_RECV" 1596 " when in state %d\n", ldcp->ldc_id, phase); 1597 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1598 return (1); 1599 } 1600 break; 1601 1602 case VSW_DRING_ACK_RECV: 1603 case VSW_DRING_NACK_RECV: 1604 if (!(state & VSW_DRING_INFO_SENT)) { 1605 DERR(vswp, "vsw_check_flag (%d): spurious DRING_ACK " 1606 " or DRING_NACK when in state %d\n", 1607 ldcp->ldc_id, phase); 1608 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1609 return (1); 1610 } else 1611 state &= ~VSW_DRING_INFO_SENT; 1612 break; 1613 1614 case VSW_RDX_INFO_RECV: 1615 if (phase < VSW_MILESTONE3) { 1616 DERR(vswp, "vsw_check_flag (%d): RDX_INFO_RECV" 1617 " when in state %d\n", ldcp->ldc_id, phase); 1618 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1619 return (1); 1620 } 1621 break; 1622 1623 case VSW_RDX_ACK_RECV: 1624 case VSW_RDX_NACK_RECV: 1625 if (!(state & VSW_RDX_INFO_SENT)) { 1626 DERR(vswp, "vsw_check_flag (%d): spurious RDX_ACK or " 1627 "RDX_NACK when in state %d\n", ldcp->ldc_id, phase); 1628 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1629 return (1); 1630 } else 1631 state &= ~VSW_RDX_INFO_SENT; 1632 break; 1633 1634 case VSW_MCST_INFO_RECV: 1635 if (phase < VSW_MILESTONE3) { 1636 DERR(vswp, "vsw_check_flag (%d): VSW_MCST_INFO_RECV" 1637 " when in state %d\n", ldcp->ldc_id, phase); 1638 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1639 return (1); 1640 } 1641 break; 1642 1643 default: 1644 DERR(vswp, "vsw_check_flag (%lld): unknown flag (%llx)", 1645 ldcp->ldc_id, flag); 1646 return (1); 1647 } 1648 1649 if (dir == INBOUND) 1650 ldcp->lane_in.lstate = state; 1651 else 1652 ldcp->lane_out.lstate = state; 1653 1654 D1(vswp, "vsw_check_flag (chan %lld): exit", ldcp->ldc_id); 1655 1656 return (0); 1657 } 1658 1659 void 1660 vsw_next_milestone(vsw_ldc_t *ldcp) 1661 { 1662 vsw_t *vswp = ldcp->ldc_vswp; 1663 1664 D1(vswp, "%s (chan %lld): enter (phase %ld)", __func__, 1665 ldcp->ldc_id, ldcp->hphase); 1666 1667 DUMP_FLAGS(ldcp->lane_in.lstate); 1668 DUMP_FLAGS(ldcp->lane_out.lstate); 1669 1670 switch (ldcp->hphase) { 1671 1672 case VSW_MILESTONE0: 1673 /* 1674 * If we haven't started to handshake with our peer, 1675 * start to do so now. 1676 */ 1677 if (ldcp->lane_out.lstate == 0) { 1678 D2(vswp, "%s: (chan %lld) starting handshake " 1679 "with peer", __func__, ldcp->ldc_id); 1680 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1681 } 1682 1683 /* 1684 * Only way to pass this milestone is to have successfully 1685 * negotiated version info. 1686 */ 1687 if ((ldcp->lane_in.lstate & VSW_VER_ACK_SENT) && 1688 (ldcp->lane_out.lstate & VSW_VER_ACK_RECV)) { 1689 1690 D2(vswp, "%s: (chan %lld) leaving milestone 0", 1691 __func__, ldcp->ldc_id); 1692 1693 /* 1694 * Next milestone is passed when attribute 1695 * information has been successfully exchanged. 1696 */ 1697 ldcp->hphase = VSW_MILESTONE1; 1698 vsw_send_attr(ldcp); 1699 1700 } 1701 break; 1702 1703 case VSW_MILESTONE1: 1704 /* 1705 * Only way to pass this milestone is to have successfully 1706 * negotiated attribute information. 1707 */ 1708 if (ldcp->lane_in.lstate & VSW_ATTR_ACK_SENT) { 1709 1710 ldcp->hphase = VSW_MILESTONE2; 1711 1712 /* 1713 * If the peer device has said it wishes to 1714 * use descriptor rings then we send it our ring 1715 * info, otherwise we just set up a private ring 1716 * which we use an internal buffer 1717 */ 1718 if (ldcp->lane_in.xfer_mode == VIO_DRING_MODE) 1719 vsw_send_dring_info(ldcp); 1720 } 1721 break; 1722 1723 case VSW_MILESTONE2: 1724 /* 1725 * If peer has indicated in its attribute message that 1726 * it wishes to use descriptor rings then the only way 1727 * to pass this milestone is for us to have received 1728 * valid dring info. 1729 * 1730 * If peer is not using descriptor rings then just fall 1731 * through. 1732 */ 1733 if ((ldcp->lane_in.xfer_mode == VIO_DRING_MODE) && 1734 (!(ldcp->lane_in.lstate & VSW_DRING_ACK_SENT))) 1735 break; 1736 1737 D2(vswp, "%s: (chan %lld) leaving milestone 2", 1738 __func__, ldcp->ldc_id); 1739 1740 ldcp->hphase = VSW_MILESTONE3; 1741 vsw_send_rdx(ldcp); 1742 break; 1743 1744 case VSW_MILESTONE3: 1745 /* 1746 * Pass this milestone when all paramaters have been 1747 * successfully exchanged and RDX sent in both directions. 1748 * 1749 * Mark outbound lane as available to transmit data. 1750 */ 1751 if ((ldcp->lane_out.lstate & VSW_RDX_ACK_SENT) && 1752 (ldcp->lane_in.lstate & VSW_RDX_ACK_RECV)) { 1753 1754 D2(vswp, "%s: (chan %lld) leaving milestone 3", 1755 __func__, ldcp->ldc_id); 1756 D2(vswp, "%s: ** handshake complete (0x%llx : " 1757 "0x%llx) **", __func__, ldcp->lane_in.lstate, 1758 ldcp->lane_out.lstate); 1759 ldcp->lane_out.lstate |= VSW_LANE_ACTIVE; 1760 ldcp->hphase = VSW_MILESTONE4; 1761 ldcp->hcnt = 0; 1762 DISPLAY_STATE(); 1763 } else { 1764 D2(vswp, "%s: still in milestone 3 (0x%llx : 0x%llx)", 1765 __func__, ldcp->lane_in.lstate, 1766 ldcp->lane_out.lstate); 1767 } 1768 break; 1769 1770 case VSW_MILESTONE4: 1771 D2(vswp, "%s: (chan %lld) in milestone 4", __func__, 1772 ldcp->ldc_id); 1773 break; 1774 1775 default: 1776 DERR(vswp, "%s: (chan %lld) Unknown Phase %x", __func__, 1777 ldcp->ldc_id, ldcp->hphase); 1778 } 1779 1780 D1(vswp, "%s (chan %lld): exit (phase %ld)", __func__, ldcp->ldc_id, 1781 ldcp->hphase); 1782 } 1783 1784 /* 1785 * Check if major version is supported. 1786 * 1787 * Returns 0 if finds supported major number, and if necessary 1788 * adjusts the minor field. 1789 * 1790 * Returns 1 if can't match major number exactly. Sets mjor/minor 1791 * to next lowest support values, or to zero if no other values possible. 1792 */ 1793 static int 1794 vsw_supported_version(vio_ver_msg_t *vp) 1795 { 1796 int i; 1797 1798 D1(NULL, "vsw_supported_version: enter"); 1799 1800 for (i = 0; i < VSW_NUM_VER; i++) { 1801 if (vsw_versions[i].ver_major == vp->ver_major) { 1802 /* 1803 * Matching or lower major version found. Update 1804 * minor number if necessary. 1805 */ 1806 if (vp->ver_minor > vsw_versions[i].ver_minor) { 1807 D2(NULL, "%s: adjusting minor value from %d " 1808 "to %d", __func__, vp->ver_minor, 1809 vsw_versions[i].ver_minor); 1810 vp->ver_minor = vsw_versions[i].ver_minor; 1811 } 1812 1813 return (0); 1814 } 1815 1816 if (vsw_versions[i].ver_major < vp->ver_major) { 1817 if (vp->ver_minor > vsw_versions[i].ver_minor) { 1818 D2(NULL, "%s: adjusting minor value from %d " 1819 "to %d", __func__, vp->ver_minor, 1820 vsw_versions[i].ver_minor); 1821 vp->ver_minor = vsw_versions[i].ver_minor; 1822 } 1823 return (1); 1824 } 1825 } 1826 1827 /* No match was possible, zero out fields */ 1828 vp->ver_major = 0; 1829 vp->ver_minor = 0; 1830 1831 D1(NULL, "vsw_supported_version: exit"); 1832 1833 return (1); 1834 } 1835 1836 /* 1837 * Main routine for processing messages received over LDC. 1838 */ 1839 static void 1840 vsw_process_pkt(void *arg) 1841 { 1842 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 1843 vsw_t *vswp = ldcp->ldc_vswp; 1844 size_t msglen; 1845 vio_msg_tag_t tag; 1846 def_msg_t dmsg; 1847 int rv = 0; 1848 1849 1850 D1(vswp, "%s enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 1851 1852 ASSERT(MUTEX_HELD(&ldcp->ldc_cblock)); 1853 1854 /* 1855 * If channel is up read messages until channel is empty. 1856 */ 1857 do { 1858 msglen = sizeof (dmsg); 1859 rv = ldc_read(ldcp->ldc_handle, (caddr_t)&dmsg, &msglen); 1860 1861 if (rv != 0) { 1862 DERR(vswp, "%s :ldc_read err id(%lld) rv(%d) len(%d)\n", 1863 __func__, ldcp->ldc_id, rv, msglen); 1864 } 1865 1866 /* channel has been reset */ 1867 if (rv == ECONNRESET) { 1868 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 1869 break; 1870 } 1871 1872 if (msglen == 0) { 1873 D2(vswp, "%s: ldc_read id(%lld) NODATA", __func__, 1874 ldcp->ldc_id); 1875 break; 1876 } 1877 1878 D2(vswp, "%s: ldc_read id(%lld): msglen(%d)", __func__, 1879 ldcp->ldc_id, msglen); 1880 1881 /* 1882 * Figure out what sort of packet we have gotten by 1883 * examining the msg tag, and then switch it appropriately. 1884 */ 1885 bcopy(&dmsg, &tag, sizeof (vio_msg_tag_t)); 1886 1887 switch (tag.vio_msgtype) { 1888 case VIO_TYPE_CTRL: 1889 vsw_dispatch_ctrl_task(ldcp, &dmsg, tag); 1890 break; 1891 case VIO_TYPE_DATA: 1892 vsw_process_data_pkt(ldcp, &dmsg, tag); 1893 break; 1894 case VIO_TYPE_ERR: 1895 vsw_process_err_pkt(ldcp, &dmsg, tag); 1896 break; 1897 default: 1898 DERR(vswp, "%s: Unknown tag(%lx) ", __func__, 1899 "id(%lx)\n", tag.vio_msgtype, ldcp->ldc_id); 1900 break; 1901 } 1902 } while (msglen); 1903 1904 D1(vswp, "%s exit: ldcid (%lld)\n", __func__, ldcp->ldc_id); 1905 } 1906 1907 /* 1908 * Dispatch a task to process a VIO control message. 1909 */ 1910 static void 1911 vsw_dispatch_ctrl_task(vsw_ldc_t *ldcp, void *cpkt, vio_msg_tag_t tag) 1912 { 1913 vsw_ctrl_task_t *ctaskp = NULL; 1914 vsw_port_t *port = ldcp->ldc_port; 1915 vsw_t *vswp = port->p_vswp; 1916 1917 D1(vswp, "%s: enter", __func__); 1918 1919 /* 1920 * We need to handle RDX ACK messages in-band as once they 1921 * are exchanged it is possible that we will get an 1922 * immediate (legitimate) data packet. 1923 */ 1924 if ((tag.vio_subtype_env == VIO_RDX) && 1925 (tag.vio_subtype == VIO_SUBTYPE_ACK)) { 1926 1927 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_ACK_RECV)) 1928 return; 1929 1930 ldcp->lane_in.lstate |= VSW_RDX_ACK_RECV; 1931 D2(vswp, "%s (%ld) handling RDX_ACK in place " 1932 "(ostate 0x%llx : hphase %d)", __func__, 1933 ldcp->ldc_id, ldcp->lane_in.lstate, ldcp->hphase); 1934 vsw_next_milestone(ldcp); 1935 return; 1936 } 1937 1938 ctaskp = kmem_alloc(sizeof (vsw_ctrl_task_t), KM_NOSLEEP); 1939 1940 if (ctaskp == NULL) { 1941 DERR(vswp, "%s: unable to alloc space for ctrl msg", __func__); 1942 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1943 return; 1944 } 1945 1946 ctaskp->ldcp = ldcp; 1947 bcopy((def_msg_t *)cpkt, &ctaskp->pktp, sizeof (def_msg_t)); 1948 ctaskp->hss_id = ldcp->hss_id; 1949 1950 /* 1951 * Dispatch task to processing taskq if port is not in 1952 * the process of being detached. 1953 */ 1954 mutex_enter(&port->state_lock); 1955 if (port->state == VSW_PORT_INIT) { 1956 if ((vswp->taskq_p == NULL) || 1957 (ddi_taskq_dispatch(vswp->taskq_p, vsw_process_ctrl_pkt, 1958 ctaskp, DDI_NOSLEEP) != DDI_SUCCESS)) { 1959 DERR(vswp, "%s: unable to dispatch task to taskq", 1960 __func__); 1961 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 1962 mutex_exit(&port->state_lock); 1963 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1964 return; 1965 } 1966 } else { 1967 DWARN(vswp, "%s: port %d detaching, not dispatching " 1968 "task", __func__, port->p_instance); 1969 } 1970 1971 mutex_exit(&port->state_lock); 1972 1973 D2(vswp, "%s: dispatched task to taskq for chan %d", __func__, 1974 ldcp->ldc_id); 1975 D1(vswp, "%s: exit", __func__); 1976 } 1977 1978 /* 1979 * Process a VIO ctrl message. Invoked from taskq. 1980 */ 1981 static void 1982 vsw_process_ctrl_pkt(void *arg) 1983 { 1984 vsw_ctrl_task_t *ctaskp = (vsw_ctrl_task_t *)arg; 1985 vsw_ldc_t *ldcp = ctaskp->ldcp; 1986 vsw_t *vswp = ldcp->ldc_vswp; 1987 vio_msg_tag_t tag; 1988 uint16_t env; 1989 1990 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 1991 1992 bcopy(&ctaskp->pktp, &tag, sizeof (vio_msg_tag_t)); 1993 env = tag.vio_subtype_env; 1994 1995 /* stale pkt check */ 1996 if (ctaskp->hss_id < ldcp->hss_id) { 1997 DWARN(vswp, "%s: discarding stale packet belonging to earlier" 1998 " (%ld) handshake session", __func__, ctaskp->hss_id); 1999 return; 2000 } 2001 2002 /* session id check */ 2003 if (ldcp->session_status & VSW_PEER_SESSION) { 2004 if (ldcp->peer_session != tag.vio_sid) { 2005 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 2006 __func__, ldcp->ldc_id, tag.vio_sid); 2007 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2008 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2009 return; 2010 } 2011 } 2012 2013 /* 2014 * Switch on vio_subtype envelope, then let lower routines 2015 * decide if its an INFO, ACK or NACK packet. 2016 */ 2017 switch (env) { 2018 case VIO_VER_INFO: 2019 vsw_process_ctrl_ver_pkt(ldcp, &ctaskp->pktp); 2020 break; 2021 case VIO_DRING_REG: 2022 vsw_process_ctrl_dring_reg_pkt(ldcp, &ctaskp->pktp); 2023 break; 2024 case VIO_DRING_UNREG: 2025 vsw_process_ctrl_dring_unreg_pkt(ldcp, &ctaskp->pktp); 2026 break; 2027 case VIO_ATTR_INFO: 2028 vsw_process_ctrl_attr_pkt(ldcp, &ctaskp->pktp); 2029 break; 2030 case VNET_MCAST_INFO: 2031 vsw_process_ctrl_mcst_pkt(ldcp, &ctaskp->pktp); 2032 break; 2033 case VIO_RDX: 2034 vsw_process_ctrl_rdx_pkt(ldcp, &ctaskp->pktp); 2035 break; 2036 default: 2037 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env); 2038 } 2039 2040 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2041 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2042 } 2043 2044 /* 2045 * Version negotiation. We can end up here either because our peer 2046 * has responded to a handshake message we have sent it, or our peer 2047 * has initiated a handshake with us. If its the former then can only 2048 * be ACK or NACK, if its the later can only be INFO. 2049 * 2050 * If its an ACK we move to the next stage of the handshake, namely 2051 * attribute exchange. If its a NACK we see if we can specify another 2052 * version, if we can't we stop. 2053 * 2054 * If it is an INFO we reset all params associated with communication 2055 * in that direction over this channel (remember connection is 2056 * essentially 2 independent simplex channels). 2057 */ 2058 void 2059 vsw_process_ctrl_ver_pkt(vsw_ldc_t *ldcp, void *pkt) 2060 { 2061 vio_ver_msg_t *ver_pkt; 2062 vsw_t *vswp = ldcp->ldc_vswp; 2063 2064 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2065 2066 /* 2067 * We know this is a ctrl/version packet so 2068 * cast it into the correct structure. 2069 */ 2070 ver_pkt = (vio_ver_msg_t *)pkt; 2071 2072 switch (ver_pkt->tag.vio_subtype) { 2073 case VIO_SUBTYPE_INFO: 2074 D2(vswp, "vsw_process_ctrl_ver_pkt: VIO_SUBTYPE_INFO\n"); 2075 2076 /* 2077 * Record the session id, which we will use from now 2078 * until we see another VER_INFO msg. Even then the 2079 * session id in most cases will be unchanged, execpt 2080 * if channel was reset. 2081 */ 2082 if ((ldcp->session_status & VSW_PEER_SESSION) && 2083 (ldcp->peer_session != ver_pkt->tag.vio_sid)) { 2084 DERR(vswp, "%s: updating session id for chan %lld " 2085 "from %llx to %llx", __func__, ldcp->ldc_id, 2086 ldcp->peer_session, ver_pkt->tag.vio_sid); 2087 } 2088 2089 ldcp->peer_session = ver_pkt->tag.vio_sid; 2090 ldcp->session_status |= VSW_PEER_SESSION; 2091 2092 /* Legal message at this time ? */ 2093 if (vsw_check_flag(ldcp, INBOUND, VSW_VER_INFO_RECV)) 2094 return; 2095 2096 /* 2097 * First check the device class. Currently only expect 2098 * to be talking to a network device. In the future may 2099 * also talk to another switch. 2100 */ 2101 if (ver_pkt->dev_class != VDEV_NETWORK) { 2102 DERR(vswp, "%s: illegal device class %d", __func__, 2103 ver_pkt->dev_class); 2104 2105 ver_pkt->tag.vio_sid = ldcp->local_session; 2106 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2107 2108 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2109 2110 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2111 sizeof (vio_ver_msg_t), B_TRUE); 2112 2113 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 2114 vsw_next_milestone(ldcp); 2115 return; 2116 } else { 2117 ldcp->dev_class = ver_pkt->dev_class; 2118 } 2119 2120 /* 2121 * Now check the version. 2122 */ 2123 if (vsw_supported_version(ver_pkt) == 0) { 2124 /* 2125 * Support this major version and possibly 2126 * adjusted minor version. 2127 */ 2128 2129 D2(vswp, "%s: accepted ver %d:%d", __func__, 2130 ver_pkt->ver_major, ver_pkt->ver_minor); 2131 2132 /* Store accepted values */ 2133 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2134 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2135 2136 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2137 2138 ldcp->lane_in.lstate |= VSW_VER_ACK_SENT; 2139 } else { 2140 /* 2141 * NACK back with the next lower major/minor 2142 * pairing we support (if don't suuport any more 2143 * versions then they will be set to zero. 2144 */ 2145 2146 D2(vswp, "%s: replying with ver %d:%d", __func__, 2147 ver_pkt->ver_major, ver_pkt->ver_minor); 2148 2149 /* Store updated values */ 2150 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2151 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2152 2153 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2154 2155 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 2156 } 2157 2158 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2159 ver_pkt->tag.vio_sid = ldcp->local_session; 2160 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2161 sizeof (vio_ver_msg_t), B_TRUE); 2162 2163 vsw_next_milestone(ldcp); 2164 break; 2165 2166 case VIO_SUBTYPE_ACK: 2167 D2(vswp, "%s: VIO_SUBTYPE_ACK\n", __func__); 2168 2169 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_ACK_RECV)) 2170 return; 2171 2172 /* Store updated values */ 2173 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2174 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2175 2176 ldcp->lane_out.lstate |= VSW_VER_ACK_RECV; 2177 vsw_next_milestone(ldcp); 2178 2179 break; 2180 2181 case VIO_SUBTYPE_NACK: 2182 D2(vswp, "%s: VIO_SUBTYPE_NACK\n", __func__); 2183 2184 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_NACK_RECV)) 2185 return; 2186 2187 /* 2188 * If our peer sent us a NACK with the ver fields set to 2189 * zero then there is nothing more we can do. Otherwise see 2190 * if we support either the version suggested, or a lesser 2191 * one. 2192 */ 2193 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 2194 DERR(vswp, "%s: peer unable to negotiate any " 2195 "further.", __func__); 2196 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 2197 vsw_next_milestone(ldcp); 2198 return; 2199 } 2200 2201 /* 2202 * Check to see if we support this major version or 2203 * a lower one. If we don't then maj/min will be set 2204 * to zero. 2205 */ 2206 (void) vsw_supported_version(ver_pkt); 2207 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 2208 /* Nothing more we can do */ 2209 DERR(vswp, "%s: version negotiation failed.\n", 2210 __func__); 2211 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 2212 vsw_next_milestone(ldcp); 2213 } else { 2214 /* found a supported major version */ 2215 ldcp->lane_out.ver_major = ver_pkt->ver_major; 2216 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 2217 2218 D2(vswp, "%s: resending with updated values (%x, %x)", 2219 __func__, ver_pkt->ver_major, ver_pkt->ver_minor); 2220 2221 ldcp->lane_out.lstate |= VSW_VER_INFO_SENT; 2222 ver_pkt->tag.vio_sid = ldcp->local_session; 2223 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 2224 2225 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2226 2227 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2228 sizeof (vio_ver_msg_t), B_TRUE); 2229 2230 vsw_next_milestone(ldcp); 2231 2232 } 2233 break; 2234 2235 default: 2236 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2237 ver_pkt->tag.vio_subtype); 2238 } 2239 2240 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 2241 } 2242 2243 /* 2244 * Process an attribute packet. We can end up here either because our peer 2245 * has ACK/NACK'ed back to an earlier ATTR msg we had sent it, or our 2246 * peer has sent us an attribute INFO message 2247 * 2248 * If its an ACK we then move to the next stage of the handshake which 2249 * is to send our descriptor ring info to our peer. If its a NACK then 2250 * there is nothing more we can (currently) do. 2251 * 2252 * If we get a valid/acceptable INFO packet (and we have already negotiated 2253 * a version) we ACK back and set channel state to ATTR_RECV, otherwise we 2254 * NACK back and reset channel state to INACTIV. 2255 * 2256 * FUTURE: in time we will probably negotiate over attributes, but for 2257 * the moment unacceptable attributes are regarded as a fatal error. 2258 * 2259 */ 2260 void 2261 vsw_process_ctrl_attr_pkt(vsw_ldc_t *ldcp, void *pkt) 2262 { 2263 vnet_attr_msg_t *attr_pkt; 2264 vsw_t *vswp = ldcp->ldc_vswp; 2265 vsw_port_t *port = ldcp->ldc_port; 2266 uint64_t macaddr = 0; 2267 int i; 2268 2269 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 2270 2271 /* 2272 * We know this is a ctrl/attr packet so 2273 * cast it into the correct structure. 2274 */ 2275 attr_pkt = (vnet_attr_msg_t *)pkt; 2276 2277 switch (attr_pkt->tag.vio_subtype) { 2278 case VIO_SUBTYPE_INFO: 2279 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2280 2281 if (vsw_check_flag(ldcp, INBOUND, VSW_ATTR_INFO_RECV)) 2282 return; 2283 2284 /* 2285 * If the attributes are unacceptable then we NACK back. 2286 */ 2287 if (vsw_check_attr(attr_pkt, ldcp->ldc_port)) { 2288 2289 DERR(vswp, "%s (chan %d): invalid attributes", 2290 __func__, ldcp->ldc_id); 2291 2292 vsw_free_lane_resources(ldcp, INBOUND); 2293 2294 attr_pkt->tag.vio_sid = ldcp->local_session; 2295 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2296 2297 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 2298 ldcp->lane_in.lstate |= VSW_ATTR_NACK_SENT; 2299 (void) vsw_send_msg(ldcp, (void *)attr_pkt, 2300 sizeof (vnet_attr_msg_t), B_TRUE); 2301 2302 vsw_next_milestone(ldcp); 2303 return; 2304 } 2305 2306 /* 2307 * Otherwise store attributes for this lane and update 2308 * lane state. 2309 */ 2310 ldcp->lane_in.mtu = attr_pkt->mtu; 2311 ldcp->lane_in.addr = attr_pkt->addr; 2312 ldcp->lane_in.addr_type = attr_pkt->addr_type; 2313 ldcp->lane_in.xfer_mode = attr_pkt->xfer_mode; 2314 ldcp->lane_in.ack_freq = attr_pkt->ack_freq; 2315 2316 macaddr = ldcp->lane_in.addr; 2317 for (i = ETHERADDRL - 1; i >= 0; i--) { 2318 port->p_macaddr.ether_addr_octet[i] = macaddr & 0xFF; 2319 macaddr >>= 8; 2320 } 2321 2322 /* create the fdb entry for this port/mac address */ 2323 (void) vsw_add_fdb(vswp, port); 2324 2325 /* setup device specifc xmit routines */ 2326 mutex_enter(&port->tx_lock); 2327 if (ldcp->lane_in.xfer_mode == VIO_DRING_MODE) { 2328 D2(vswp, "%s: mode = VIO_DRING_MODE", __func__); 2329 port->transmit = vsw_dringsend; 2330 } else if (ldcp->lane_in.xfer_mode == VIO_DESC_MODE) { 2331 D2(vswp, "%s: mode = VIO_DESC_MODE", __func__); 2332 vsw_create_privring(ldcp); 2333 port->transmit = vsw_descrsend; 2334 } 2335 mutex_exit(&port->tx_lock); 2336 2337 attr_pkt->tag.vio_sid = ldcp->local_session; 2338 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2339 2340 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 2341 2342 ldcp->lane_in.lstate |= VSW_ATTR_ACK_SENT; 2343 2344 (void) vsw_send_msg(ldcp, (void *)attr_pkt, 2345 sizeof (vnet_attr_msg_t), B_TRUE); 2346 2347 vsw_next_milestone(ldcp); 2348 break; 2349 2350 case VIO_SUBTYPE_ACK: 2351 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2352 2353 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_ACK_RECV)) 2354 return; 2355 2356 ldcp->lane_out.lstate |= VSW_ATTR_ACK_RECV; 2357 vsw_next_milestone(ldcp); 2358 break; 2359 2360 case VIO_SUBTYPE_NACK: 2361 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2362 2363 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_NACK_RECV)) 2364 return; 2365 2366 ldcp->lane_out.lstate |= VSW_ATTR_NACK_RECV; 2367 vsw_next_milestone(ldcp); 2368 break; 2369 2370 default: 2371 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2372 attr_pkt->tag.vio_subtype); 2373 } 2374 2375 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 2376 } 2377 2378 /* 2379 * Process a dring info packet. We can end up here either because our peer 2380 * has ACK/NACK'ed back to an earlier DRING msg we had sent it, or our 2381 * peer has sent us a dring INFO message. 2382 * 2383 * If we get a valid/acceptable INFO packet (and we have already negotiated 2384 * a version) we ACK back and update the lane state, otherwise we NACK back. 2385 * 2386 * FUTURE: nothing to stop client from sending us info on multiple dring's 2387 * but for the moment we will just use the first one we are given. 2388 * 2389 */ 2390 void 2391 vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *ldcp, void *pkt) 2392 { 2393 vio_dring_reg_msg_t *dring_pkt; 2394 vsw_t *vswp = ldcp->ldc_vswp; 2395 ldc_mem_info_t minfo; 2396 dring_info_t *dp, *dbp; 2397 int dring_found = 0; 2398 2399 /* 2400 * We know this is a ctrl/dring packet so 2401 * cast it into the correct structure. 2402 */ 2403 dring_pkt = (vio_dring_reg_msg_t *)pkt; 2404 2405 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 2406 2407 switch (dring_pkt->tag.vio_subtype) { 2408 case VIO_SUBTYPE_INFO: 2409 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2410 2411 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 2412 return; 2413 2414 /* 2415 * If the dring params are unacceptable then we NACK back. 2416 */ 2417 if (vsw_check_dring_info(dring_pkt)) { 2418 2419 DERR(vswp, "%s (%lld): invalid dring info", 2420 __func__, ldcp->ldc_id); 2421 2422 vsw_free_lane_resources(ldcp, INBOUND); 2423 2424 dring_pkt->tag.vio_sid = ldcp->local_session; 2425 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2426 2427 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2428 2429 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2430 2431 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2432 sizeof (vio_dring_reg_msg_t), B_TRUE); 2433 2434 vsw_next_milestone(ldcp); 2435 return; 2436 } 2437 2438 /* 2439 * Otherwise, attempt to map in the dring using the 2440 * cookie. If that succeeds we send back a unique dring 2441 * identifier that the sending side will use in future 2442 * to refer to this descriptor ring. 2443 */ 2444 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 2445 2446 dp->num_descriptors = dring_pkt->num_descriptors; 2447 dp->descriptor_size = dring_pkt->descriptor_size; 2448 dp->options = dring_pkt->options; 2449 dp->ncookies = dring_pkt->ncookies; 2450 2451 /* 2452 * Note: should only get one cookie. Enforced in 2453 * the ldc layer. 2454 */ 2455 bcopy(&dring_pkt->cookie[0], &dp->cookie[0], 2456 sizeof (ldc_mem_cookie_t)); 2457 2458 D2(vswp, "%s: num_desc %ld : desc_size %ld", __func__, 2459 dp->num_descriptors, dp->descriptor_size); 2460 D2(vswp, "%s: options 0x%lx: ncookies %ld", __func__, 2461 dp->options, dp->ncookies); 2462 2463 if ((ldc_mem_dring_map(ldcp->ldc_handle, &dp->cookie[0], 2464 dp->ncookies, dp->num_descriptors, dp->descriptor_size, 2465 LDC_SHADOW_MAP, &(dp->handle))) != 0) { 2466 2467 DERR(vswp, "%s: dring_map failed\n", __func__); 2468 2469 kmem_free(dp, sizeof (dring_info_t)); 2470 vsw_free_lane_resources(ldcp, INBOUND); 2471 2472 dring_pkt->tag.vio_sid = ldcp->local_session; 2473 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2474 2475 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2476 2477 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2478 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2479 sizeof (vio_dring_reg_msg_t), B_TRUE); 2480 2481 vsw_next_milestone(ldcp); 2482 return; 2483 } 2484 2485 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 2486 2487 DERR(vswp, "%s: dring_addr failed\n", __func__); 2488 2489 kmem_free(dp, sizeof (dring_info_t)); 2490 vsw_free_lane_resources(ldcp, INBOUND); 2491 2492 dring_pkt->tag.vio_sid = ldcp->local_session; 2493 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2494 2495 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2496 2497 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2498 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2499 sizeof (vio_dring_reg_msg_t), B_TRUE); 2500 2501 vsw_next_milestone(ldcp); 2502 return; 2503 } else { 2504 /* store the address of the pub part of ring */ 2505 dp->pub_addr = minfo.vaddr; 2506 } 2507 2508 /* no private section as we are importing */ 2509 dp->priv_addr = NULL; 2510 2511 /* 2512 * Using simple mono increasing int for ident at 2513 * the moment. 2514 */ 2515 dp->ident = ldcp->next_ident; 2516 ldcp->next_ident++; 2517 2518 dp->end_idx = 0; 2519 dp->next = NULL; 2520 2521 /* 2522 * Link it onto the end of the list of drings 2523 * for this lane. 2524 */ 2525 if (ldcp->lane_in.dringp == NULL) { 2526 D2(vswp, "%s: adding first INBOUND dring", __func__); 2527 ldcp->lane_in.dringp = dp; 2528 } else { 2529 dbp = ldcp->lane_in.dringp; 2530 2531 while (dbp->next != NULL) 2532 dbp = dbp->next; 2533 2534 dbp->next = dp; 2535 } 2536 2537 /* acknowledge it */ 2538 dring_pkt->tag.vio_sid = ldcp->local_session; 2539 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2540 dring_pkt->dring_ident = dp->ident; 2541 2542 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2543 sizeof (vio_dring_reg_msg_t), B_TRUE); 2544 2545 ldcp->lane_in.lstate |= VSW_DRING_ACK_SENT; 2546 vsw_next_milestone(ldcp); 2547 break; 2548 2549 case VIO_SUBTYPE_ACK: 2550 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2551 2552 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_ACK_RECV)) 2553 return; 2554 2555 /* 2556 * Peer is acknowledging our dring info and will have 2557 * sent us a dring identifier which we will use to 2558 * refer to this ring w.r.t. our peer. 2559 */ 2560 dp = ldcp->lane_out.dringp; 2561 if (dp != NULL) { 2562 /* 2563 * Find the ring this ident should be associated 2564 * with. 2565 */ 2566 if (vsw_dring_match(dp, dring_pkt)) { 2567 dring_found = 1; 2568 2569 } else while (dp != NULL) { 2570 if (vsw_dring_match(dp, dring_pkt)) { 2571 dring_found = 1; 2572 break; 2573 } 2574 dp = dp->next; 2575 } 2576 2577 if (dring_found == 0) { 2578 DERR(NULL, "%s: unrecognised ring cookie", 2579 __func__); 2580 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2581 return; 2582 } 2583 2584 } else { 2585 DERR(vswp, "%s: DRING ACK received but no drings " 2586 "allocated", __func__); 2587 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2588 return; 2589 } 2590 2591 /* store ident */ 2592 dp->ident = dring_pkt->dring_ident; 2593 ldcp->lane_out.lstate |= VSW_DRING_ACK_RECV; 2594 vsw_next_milestone(ldcp); 2595 break; 2596 2597 case VIO_SUBTYPE_NACK: 2598 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2599 2600 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_NACK_RECV)) 2601 return; 2602 2603 ldcp->lane_out.lstate |= VSW_DRING_NACK_RECV; 2604 vsw_next_milestone(ldcp); 2605 break; 2606 2607 default: 2608 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 2609 dring_pkt->tag.vio_subtype); 2610 } 2611 2612 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 2613 } 2614 2615 /* 2616 * Process a request from peer to unregister a dring. 2617 * 2618 * For the moment we just restart the handshake if our 2619 * peer endpoint attempts to unregister a dring. 2620 */ 2621 void 2622 vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *ldcp, void *pkt) 2623 { 2624 vsw_t *vswp = ldcp->ldc_vswp; 2625 vio_dring_unreg_msg_t *dring_pkt; 2626 2627 /* 2628 * We know this is a ctrl/dring packet so 2629 * cast it into the correct structure. 2630 */ 2631 dring_pkt = (vio_dring_unreg_msg_t *)pkt; 2632 2633 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2634 2635 switch (dring_pkt->tag.vio_subtype) { 2636 case VIO_SUBTYPE_INFO: 2637 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2638 2639 DWARN(vswp, "%s: restarting handshake..", __func__); 2640 break; 2641 2642 case VIO_SUBTYPE_ACK: 2643 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2644 2645 DWARN(vswp, "%s: restarting handshake..", __func__); 2646 break; 2647 2648 case VIO_SUBTYPE_NACK: 2649 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2650 2651 DWARN(vswp, "%s: restarting handshake..", __func__); 2652 break; 2653 2654 default: 2655 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 2656 dring_pkt->tag.vio_subtype); 2657 } 2658 2659 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2660 2661 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2662 } 2663 2664 #define SND_MCST_NACK(ldcp, pkt) \ 2665 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 2666 pkt->tag.vio_sid = ldcp->local_session; \ 2667 (void) vsw_send_msg(ldcp, (void *)pkt, \ 2668 sizeof (vnet_mcast_msg_t), B_TRUE); 2669 2670 /* 2671 * Process a multicast request from a vnet. 2672 * 2673 * Vnet's specify a multicast address that they are interested in. This 2674 * address is used as a key into the hash table which forms the multicast 2675 * forwarding database (mFDB). 2676 * 2677 * The table keys are the multicast addresses, while the table entries 2678 * are pointers to lists of ports which wish to receive packets for the 2679 * specified multicast address. 2680 * 2681 * When a multicast packet is being switched we use the address as a key 2682 * into the hash table, and then walk the appropriate port list forwarding 2683 * the pkt to each port in turn. 2684 * 2685 * If a vnet is no longer interested in a particular multicast grouping 2686 * we simply find the correct location in the hash table and then delete 2687 * the relevant port from the port list. 2688 * 2689 * To deal with the case whereby a port is being deleted without first 2690 * removing itself from the lists in the hash table, we maintain a list 2691 * of multicast addresses the port has registered an interest in, within 2692 * the port structure itself. We then simply walk that list of addresses 2693 * using them as keys into the hash table and remove the port from the 2694 * appropriate lists. 2695 */ 2696 static void 2697 vsw_process_ctrl_mcst_pkt(vsw_ldc_t *ldcp, void *pkt) 2698 { 2699 vnet_mcast_msg_t *mcst_pkt; 2700 vsw_port_t *port = ldcp->ldc_port; 2701 vsw_t *vswp = ldcp->ldc_vswp; 2702 int i; 2703 2704 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2705 2706 /* 2707 * We know this is a ctrl/mcast packet so 2708 * cast it into the correct structure. 2709 */ 2710 mcst_pkt = (vnet_mcast_msg_t *)pkt; 2711 2712 switch (mcst_pkt->tag.vio_subtype) { 2713 case VIO_SUBTYPE_INFO: 2714 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2715 2716 /* 2717 * Check if in correct state to receive a multicast 2718 * message (i.e. handshake complete). If not reset 2719 * the handshake. 2720 */ 2721 if (vsw_check_flag(ldcp, INBOUND, VSW_MCST_INFO_RECV)) 2722 return; 2723 2724 /* 2725 * Before attempting to add or remove address check 2726 * that they are valid multicast addresses. 2727 * If not, then NACK back. 2728 */ 2729 for (i = 0; i < mcst_pkt->count; i++) { 2730 if ((mcst_pkt->mca[i].ether_addr_octet[0] & 01) != 1) { 2731 DERR(vswp, "%s: invalid multicast address", 2732 __func__); 2733 SND_MCST_NACK(ldcp, mcst_pkt); 2734 return; 2735 } 2736 } 2737 2738 /* 2739 * Now add/remove the addresses. If this fails we 2740 * NACK back. 2741 */ 2742 if (vsw_add_rem_mcst(mcst_pkt, port) != 0) { 2743 SND_MCST_NACK(ldcp, mcst_pkt); 2744 return; 2745 } 2746 2747 mcst_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2748 mcst_pkt->tag.vio_sid = ldcp->local_session; 2749 2750 DUMP_TAG_PTR((vio_msg_tag_t *)mcst_pkt); 2751 2752 (void) vsw_send_msg(ldcp, (void *)mcst_pkt, 2753 sizeof (vnet_mcast_msg_t), B_TRUE); 2754 break; 2755 2756 case VIO_SUBTYPE_ACK: 2757 DWARN(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2758 2759 /* 2760 * We shouldn't ever get a multicast ACK message as 2761 * at the moment we never request multicast addresses 2762 * to be set on some other device. This may change in 2763 * the future if we have cascading switches. 2764 */ 2765 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_ACK_RECV)) 2766 return; 2767 2768 /* Do nothing */ 2769 break; 2770 2771 case VIO_SUBTYPE_NACK: 2772 DWARN(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2773 2774 /* 2775 * We shouldn't get a multicast NACK packet for the 2776 * same reasons as we shouldn't get a ACK packet. 2777 */ 2778 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_NACK_RECV)) 2779 return; 2780 2781 /* Do nothing */ 2782 break; 2783 2784 default: 2785 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2786 mcst_pkt->tag.vio_subtype); 2787 } 2788 2789 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2790 } 2791 2792 static void 2793 vsw_process_ctrl_rdx_pkt(vsw_ldc_t *ldcp, void *pkt) 2794 { 2795 vio_rdx_msg_t *rdx_pkt; 2796 vsw_t *vswp = ldcp->ldc_vswp; 2797 2798 /* 2799 * We know this is a ctrl/rdx packet so 2800 * cast it into the correct structure. 2801 */ 2802 rdx_pkt = (vio_rdx_msg_t *)pkt; 2803 2804 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 2805 2806 switch (rdx_pkt->tag.vio_subtype) { 2807 case VIO_SUBTYPE_INFO: 2808 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2809 2810 if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_INFO_RECV)) 2811 return; 2812 2813 rdx_pkt->tag.vio_sid = ldcp->local_session; 2814 rdx_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2815 2816 DUMP_TAG_PTR((vio_msg_tag_t *)rdx_pkt); 2817 2818 ldcp->lane_out.lstate |= VSW_RDX_ACK_SENT; 2819 2820 (void) vsw_send_msg(ldcp, (void *)rdx_pkt, 2821 sizeof (vio_rdx_msg_t), B_TRUE); 2822 2823 vsw_next_milestone(ldcp); 2824 break; 2825 2826 case VIO_SUBTYPE_ACK: 2827 /* 2828 * Should be handled in-band by callback handler. 2829 */ 2830 DERR(vswp, "%s: Unexpected VIO_SUBTYPE_ACK", __func__); 2831 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2832 break; 2833 2834 case VIO_SUBTYPE_NACK: 2835 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2836 2837 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_NACK_RECV)) 2838 return; 2839 2840 ldcp->lane_in.lstate |= VSW_RDX_NACK_RECV; 2841 vsw_next_milestone(ldcp); 2842 break; 2843 2844 default: 2845 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 2846 rdx_pkt->tag.vio_subtype); 2847 } 2848 2849 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2850 } 2851 2852 static void 2853 vsw_process_data_pkt(vsw_ldc_t *ldcp, void *dpkt, vio_msg_tag_t tag) 2854 { 2855 uint16_t env = tag.vio_subtype_env; 2856 vsw_t *vswp = ldcp->ldc_vswp; 2857 2858 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2859 2860 /* session id check */ 2861 if (ldcp->session_status & VSW_PEER_SESSION) { 2862 if (ldcp->peer_session != tag.vio_sid) { 2863 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 2864 __func__, ldcp->ldc_id, tag.vio_sid); 2865 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2866 return; 2867 } 2868 } 2869 2870 /* 2871 * It is an error for us to be getting data packets 2872 * before the handshake has completed. 2873 */ 2874 if (ldcp->hphase != VSW_MILESTONE4) { 2875 DERR(vswp, "%s: got data packet before handshake complete " 2876 "hphase %d (%x: %x)", __func__, ldcp->hphase, 2877 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 2878 DUMP_FLAGS(ldcp->lane_in.lstate); 2879 DUMP_FLAGS(ldcp->lane_out.lstate); 2880 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2881 return; 2882 } 2883 2884 /* 2885 * To reduce the locking contention, release the 2886 * ldc_cblock here and re-acquire it once we are done 2887 * receiving packets. 2888 */ 2889 mutex_exit(&ldcp->ldc_cblock); 2890 mutex_enter(&ldcp->ldc_rxlock); 2891 2892 /* 2893 * Switch on vio_subtype envelope, then let lower routines 2894 * decide if its an INFO, ACK or NACK packet. 2895 */ 2896 if (env == VIO_DRING_DATA) { 2897 vsw_process_data_dring_pkt(ldcp, dpkt); 2898 } else if (env == VIO_PKT_DATA) { 2899 vsw_process_data_raw_pkt(ldcp, dpkt); 2900 } else if (env == VIO_DESC_DATA) { 2901 vsw_process_data_ibnd_pkt(ldcp, dpkt); 2902 } else { 2903 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env); 2904 } 2905 2906 mutex_exit(&ldcp->ldc_rxlock); 2907 mutex_enter(&ldcp->ldc_cblock); 2908 2909 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2910 } 2911 2912 #define SND_DRING_NACK(ldcp, pkt) \ 2913 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 2914 pkt->tag.vio_sid = ldcp->local_session; \ 2915 (void) vsw_send_msg(ldcp, (void *)pkt, \ 2916 sizeof (vio_dring_msg_t), B_TRUE); 2917 2918 static void 2919 vsw_process_data_dring_pkt(vsw_ldc_t *ldcp, void *dpkt) 2920 { 2921 vio_dring_msg_t *dring_pkt; 2922 vnet_public_desc_t *pub_addr = NULL; 2923 vsw_private_desc_t *priv_addr = NULL; 2924 dring_info_t *dp = NULL; 2925 vsw_t *vswp = ldcp->ldc_vswp; 2926 mblk_t *mp = NULL; 2927 mblk_t *bp = NULL; 2928 mblk_t *bpt = NULL; 2929 size_t nbytes = 0; 2930 uint64_t ncookies = 0; 2931 uint64_t chain = 0; 2932 uint64_t len; 2933 uint32_t pos, start, datalen; 2934 uint32_t range_start, range_end; 2935 int32_t end, num, cnt = 0; 2936 int i, rv, msg_rv = 0; 2937 boolean_t ack_needed = B_FALSE; 2938 boolean_t prev_desc_ack = B_FALSE; 2939 int read_attempts = 0; 2940 struct ether_header *ehp; 2941 2942 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2943 2944 /* 2945 * We know this is a data/dring packet so 2946 * cast it into the correct structure. 2947 */ 2948 dring_pkt = (vio_dring_msg_t *)dpkt; 2949 2950 /* 2951 * Switch on the vio_subtype. If its INFO then we need to 2952 * process the data. If its an ACK we need to make sure 2953 * it makes sense (i.e did we send an earlier data/info), 2954 * and if its a NACK then we maybe attempt a retry. 2955 */ 2956 switch (dring_pkt->tag.vio_subtype) { 2957 case VIO_SUBTYPE_INFO: 2958 D2(vswp, "%s(%lld): VIO_SUBTYPE_INFO", __func__, ldcp->ldc_id); 2959 2960 READ_ENTER(&ldcp->lane_in.dlistrw); 2961 if ((dp = vsw_ident2dring(&ldcp->lane_in, 2962 dring_pkt->dring_ident)) == NULL) { 2963 RW_EXIT(&ldcp->lane_in.dlistrw); 2964 2965 DERR(vswp, "%s(%lld): unable to find dring from " 2966 "ident 0x%llx", __func__, ldcp->ldc_id, 2967 dring_pkt->dring_ident); 2968 2969 SND_DRING_NACK(ldcp, dring_pkt); 2970 return; 2971 } 2972 2973 start = pos = dring_pkt->start_idx; 2974 end = dring_pkt->end_idx; 2975 len = dp->num_descriptors; 2976 2977 range_start = range_end = pos; 2978 2979 D2(vswp, "%s(%lld): start index %ld : end %ld\n", 2980 __func__, ldcp->ldc_id, start, end); 2981 2982 if (end == -1) { 2983 num = -1; 2984 } else if (end >= 0) { 2985 num = end >= pos ? end - pos + 1: (len - pos + 1) + end; 2986 2987 /* basic sanity check */ 2988 if (end > len) { 2989 RW_EXIT(&ldcp->lane_in.dlistrw); 2990 DERR(vswp, "%s(%lld): endpoint %lld outside " 2991 "ring length %lld", __func__, 2992 ldcp->ldc_id, end, len); 2993 2994 SND_DRING_NACK(ldcp, dring_pkt); 2995 return; 2996 } 2997 } else { 2998 RW_EXIT(&ldcp->lane_in.dlistrw); 2999 DERR(vswp, "%s(%lld): invalid endpoint %lld", 3000 __func__, ldcp->ldc_id, end); 3001 SND_DRING_NACK(ldcp, dring_pkt); 3002 return; 3003 } 3004 3005 while (cnt != num) { 3006 vsw_recheck_desc: 3007 if ((rv = ldc_mem_dring_acquire(dp->handle, 3008 pos, pos)) != 0) { 3009 RW_EXIT(&ldcp->lane_in.dlistrw); 3010 DERR(vswp, "%s(%lld): unable to acquire " 3011 "descriptor at pos %d: err %d", 3012 __func__, pos, ldcp->ldc_id, rv); 3013 SND_DRING_NACK(ldcp, dring_pkt); 3014 ldcp->ldc_stats.ierrors++; 3015 return; 3016 } 3017 3018 pub_addr = (vnet_public_desc_t *)dp->pub_addr + pos; 3019 3020 /* 3021 * When given a bounded range of descriptors 3022 * to process, its an error to hit a descriptor 3023 * which is not ready. In the non-bounded case 3024 * (end_idx == -1) this simply indicates we have 3025 * reached the end of the current active range. 3026 */ 3027 if (pub_addr->hdr.dstate != VIO_DESC_READY) { 3028 /* unbound - no error */ 3029 if (end == -1) { 3030 if (read_attempts == vsw_read_attempts) 3031 break; 3032 3033 delay(drv_usectohz(vsw_desc_delay)); 3034 read_attempts++; 3035 goto vsw_recheck_desc; 3036 } 3037 3038 /* bounded - error - so NACK back */ 3039 RW_EXIT(&ldcp->lane_in.dlistrw); 3040 DERR(vswp, "%s(%lld): descriptor not READY " 3041 "(%d)", __func__, ldcp->ldc_id, 3042 pub_addr->hdr.dstate); 3043 SND_DRING_NACK(ldcp, dring_pkt); 3044 return; 3045 } 3046 3047 DTRACE_PROBE1(read_attempts, int, read_attempts); 3048 3049 range_end = pos; 3050 3051 /* 3052 * If we ACK'd the previous descriptor then now 3053 * record the new range start position for later 3054 * ACK's. 3055 */ 3056 if (prev_desc_ack) { 3057 range_start = pos; 3058 3059 D2(vswp, "%s(%lld): updating range start to be " 3060 "%d", __func__, ldcp->ldc_id, range_start); 3061 3062 prev_desc_ack = B_FALSE; 3063 } 3064 3065 /* 3066 * Data is padded to align on 8 byte boundary, 3067 * datalen is actual data length, i.e. minus that 3068 * padding. 3069 */ 3070 datalen = pub_addr->nbytes; 3071 3072 /* 3073 * Does peer wish us to ACK when we have finished 3074 * with this descriptor ? 3075 */ 3076 if (pub_addr->hdr.ack) 3077 ack_needed = B_TRUE; 3078 3079 D2(vswp, "%s(%lld): processing desc %lld at pos" 3080 " 0x%llx : dstate 0x%lx : datalen 0x%lx", 3081 __func__, ldcp->ldc_id, pos, pub_addr, 3082 pub_addr->hdr.dstate, datalen); 3083 3084 /* 3085 * Mark that we are starting to process descriptor. 3086 */ 3087 pub_addr->hdr.dstate = VIO_DESC_ACCEPTED; 3088 3089 /* 3090 * Ensure that we ask ldc for an aligned 3091 * number of bytes. 3092 */ 3093 nbytes = (datalen + VNET_IPALIGN + 7) & ~7; 3094 3095 mp = vio_multipool_allocb(&ldcp->vmp, nbytes); 3096 if (mp == NULL) { 3097 ldcp->ldc_stats.rx_vio_allocb_fail++; 3098 /* 3099 * No free receive buffers available, so 3100 * fallback onto allocb(9F). Make sure that 3101 * we get a data buffer which is a multiple 3102 * of 8 as this is required by ldc_mem_copy. 3103 */ 3104 DTRACE_PROBE(allocb); 3105 if ((mp = allocb(datalen + VNET_IPALIGN + 8, 3106 BPRI_MED)) == NULL) { 3107 DERR(vswp, "%s(%ld): allocb failed", 3108 __func__, ldcp->ldc_id); 3109 pub_addr->hdr.dstate = VIO_DESC_DONE; 3110 (void) ldc_mem_dring_release(dp->handle, 3111 pos, pos); 3112 ldcp->ldc_stats.ierrors++; 3113 ldcp->ldc_stats.rx_allocb_fail++; 3114 break; 3115 } 3116 } 3117 3118 ncookies = pub_addr->ncookies; 3119 rv = ldc_mem_copy(ldcp->ldc_handle, 3120 (caddr_t)mp->b_rptr, 0, &nbytes, 3121 pub_addr->memcookie, ncookies, LDC_COPY_IN); 3122 3123 if (rv != 0) { 3124 DERR(vswp, "%s(%d): unable to copy in data " 3125 "from %d cookies in desc %d (rv %d)", 3126 __func__, ldcp->ldc_id, ncookies, pos, rv); 3127 freemsg(mp); 3128 3129 pub_addr->hdr.dstate = VIO_DESC_DONE; 3130 (void) ldc_mem_dring_release(dp->handle, 3131 pos, pos); 3132 ldcp->ldc_stats.ierrors++; 3133 break; 3134 } else { 3135 D2(vswp, "%s(%d): copied in %ld bytes" 3136 " using %d cookies", __func__, 3137 ldcp->ldc_id, nbytes, ncookies); 3138 } 3139 3140 /* adjust the read pointer to skip over the padding */ 3141 mp->b_rptr += VNET_IPALIGN; 3142 3143 /* point to the actual end of data */ 3144 mp->b_wptr = mp->b_rptr + datalen; 3145 3146 /* update statistics */ 3147 ehp = (struct ether_header *)mp->b_rptr; 3148 if (IS_BROADCAST(ehp)) 3149 ldcp->ldc_stats.brdcstrcv++; 3150 else if (IS_MULTICAST(ehp)) 3151 ldcp->ldc_stats.multircv++; 3152 3153 ldcp->ldc_stats.ipackets++; 3154 ldcp->ldc_stats.rbytes += datalen; 3155 3156 /* build a chain of received packets */ 3157 if (bp == NULL) { 3158 /* first pkt */ 3159 bp = mp; 3160 bp->b_next = bp->b_prev = NULL; 3161 bpt = bp; 3162 chain = 1; 3163 } else { 3164 mp->b_next = mp->b_prev = NULL; 3165 bpt->b_next = mp; 3166 bpt = mp; 3167 chain++; 3168 } 3169 3170 /* mark we are finished with this descriptor */ 3171 pub_addr->hdr.dstate = VIO_DESC_DONE; 3172 3173 (void) ldc_mem_dring_release(dp->handle, pos, pos); 3174 3175 /* 3176 * Send an ACK back to peer if requested. 3177 */ 3178 if (ack_needed) { 3179 ack_needed = B_FALSE; 3180 3181 dring_pkt->start_idx = range_start; 3182 dring_pkt->end_idx = range_end; 3183 3184 DERR(vswp, "%s(%lld): processed %d %d, ACK" 3185 " requested", __func__, ldcp->ldc_id, 3186 dring_pkt->start_idx, dring_pkt->end_idx); 3187 3188 dring_pkt->dring_process_state = VIO_DP_ACTIVE; 3189 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3190 dring_pkt->tag.vio_sid = ldcp->local_session; 3191 3192 msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt, 3193 sizeof (vio_dring_msg_t), B_FALSE); 3194 3195 /* 3196 * Check if ACK was successfully sent. If not 3197 * we break and deal with that below. 3198 */ 3199 if (msg_rv != 0) 3200 break; 3201 3202 prev_desc_ack = B_TRUE; 3203 range_start = pos; 3204 } 3205 3206 /* next descriptor */ 3207 pos = (pos + 1) % len; 3208 cnt++; 3209 3210 /* 3211 * Break out of loop here and stop processing to 3212 * allow some other network device (or disk) to 3213 * get access to the cpu. 3214 */ 3215 if (chain > vsw_chain_len) { 3216 D3(vswp, "%s(%lld): switching chain of %d " 3217 "msgs", __func__, ldcp->ldc_id, chain); 3218 break; 3219 } 3220 } 3221 RW_EXIT(&ldcp->lane_in.dlistrw); 3222 3223 /* 3224 * If when we attempted to send the ACK we found that the 3225 * channel had been reset then now handle this. We deal with 3226 * it here as we cannot reset the channel while holding the 3227 * dlistrw lock, and we don't want to acquire/release it 3228 * continuously in the above loop, as a channel reset should 3229 * be a rare event. 3230 */ 3231 if (msg_rv == ECONNRESET) { 3232 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 3233 break; 3234 } 3235 3236 /* send the chain of packets to be switched */ 3237 if (bp != NULL) { 3238 DTRACE_PROBE1(vsw_rcv_msgs, int, chain); 3239 D3(vswp, "%s(%lld): switching chain of %d msgs", 3240 __func__, ldcp->ldc_id, chain); 3241 vswp->vsw_switch_frame(vswp, bp, VSW_VNETPORT, 3242 ldcp->ldc_port, NULL); 3243 } 3244 3245 DTRACE_PROBE1(msg_cnt, int, cnt); 3246 3247 /* 3248 * We are now finished so ACK back with the state 3249 * set to STOPPING so our peer knows we are finished 3250 */ 3251 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3252 dring_pkt->tag.vio_sid = ldcp->local_session; 3253 3254 dring_pkt->dring_process_state = VIO_DP_STOPPED; 3255 3256 DTRACE_PROBE(stop_process_sent); 3257 3258 /* 3259 * We have not processed any more descriptors beyond 3260 * the last one we ACK'd. 3261 */ 3262 if (prev_desc_ack) 3263 range_start = range_end; 3264 3265 dring_pkt->start_idx = range_start; 3266 dring_pkt->end_idx = range_end; 3267 3268 D2(vswp, "%s(%lld) processed : %d : %d, now stopping", 3269 __func__, ldcp->ldc_id, dring_pkt->start_idx, 3270 dring_pkt->end_idx); 3271 3272 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 3273 sizeof (vio_dring_msg_t), B_TRUE); 3274 break; 3275 3276 case VIO_SUBTYPE_ACK: 3277 D2(vswp, "%s(%lld): VIO_SUBTYPE_ACK", __func__, ldcp->ldc_id); 3278 /* 3279 * Verify that the relevant descriptors are all 3280 * marked as DONE 3281 */ 3282 READ_ENTER(&ldcp->lane_out.dlistrw); 3283 if ((dp = vsw_ident2dring(&ldcp->lane_out, 3284 dring_pkt->dring_ident)) == NULL) { 3285 RW_EXIT(&ldcp->lane_out.dlistrw); 3286 DERR(vswp, "%s: unknown ident in ACK", __func__); 3287 return; 3288 } 3289 3290 start = end = 0; 3291 start = dring_pkt->start_idx; 3292 end = dring_pkt->end_idx; 3293 len = dp->num_descriptors; 3294 3295 3296 mutex_enter(&dp->dlock); 3297 dp->last_ack_recv = end; 3298 ldcp->ldc_stats.dring_data_acks++; 3299 mutex_exit(&dp->dlock); 3300 3301 (void) vsw_reclaim_dring(dp, start); 3302 3303 /* 3304 * If our peer is stopping processing descriptors then 3305 * we check to make sure it has processed all the descriptors 3306 * we have updated. If not then we send it a new message 3307 * to prompt it to restart. 3308 */ 3309 if (dring_pkt->dring_process_state == VIO_DP_STOPPED) { 3310 DTRACE_PROBE(stop_process_recv); 3311 D2(vswp, "%s(%lld): got stopping msg : %d : %d", 3312 __func__, ldcp->ldc_id, dring_pkt->start_idx, 3313 dring_pkt->end_idx); 3314 3315 /* 3316 * Check next descriptor in public section of ring. 3317 * If its marked as READY then we need to prompt our 3318 * peer to start processing the ring again. 3319 */ 3320 i = (end + 1) % len; 3321 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 3322 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 3323 3324 /* 3325 * Hold the restart lock across all of this to 3326 * make sure that its not possible for us to 3327 * decide that a msg needs to be sent in the future 3328 * but the sending code having already checked is 3329 * about to exit. 3330 */ 3331 mutex_enter(&dp->restart_lock); 3332 ldcp->ldc_stats.dring_stopped_acks++; 3333 mutex_enter(&priv_addr->dstate_lock); 3334 if (pub_addr->hdr.dstate == VIO_DESC_READY) { 3335 3336 mutex_exit(&priv_addr->dstate_lock); 3337 3338 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 3339 dring_pkt->tag.vio_sid = ldcp->local_session; 3340 3341 dring_pkt->seq_num = 3342 atomic_inc_64_nv(&ldcp->lane_out.seq_num); 3343 3344 dring_pkt->start_idx = (end + 1) % len; 3345 dring_pkt->end_idx = -1; 3346 3347 D2(vswp, "%s(%lld) : sending restart msg:" 3348 " %d : %d", __func__, ldcp->ldc_id, 3349 dring_pkt->start_idx, dring_pkt->end_idx); 3350 3351 msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt, 3352 sizeof (vio_dring_msg_t), B_FALSE); 3353 ldcp->ldc_stats.dring_data_msgs++; 3354 3355 } else { 3356 mutex_exit(&priv_addr->dstate_lock); 3357 dp->restart_reqd = B_TRUE; 3358 } 3359 mutex_exit(&dp->restart_lock); 3360 } 3361 RW_EXIT(&ldcp->lane_out.dlistrw); 3362 3363 /* only do channel reset after dropping dlistrw lock */ 3364 if (msg_rv == ECONNRESET) 3365 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 3366 3367 break; 3368 3369 case VIO_SUBTYPE_NACK: 3370 DWARN(vswp, "%s(%lld): VIO_SUBTYPE_NACK", 3371 __func__, ldcp->ldc_id); 3372 /* 3373 * Something is badly wrong if we are getting NACK's 3374 * for our data pkts. So reset the channel. 3375 */ 3376 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3377 3378 break; 3379 3380 default: 3381 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 3382 ldcp->ldc_id, dring_pkt->tag.vio_subtype); 3383 } 3384 3385 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3386 } 3387 3388 /* 3389 * VIO_PKT_DATA (a.k.a raw data mode ) 3390 * 3391 * Note - currently not supported. Do nothing. 3392 */ 3393 static void 3394 vsw_process_data_raw_pkt(vsw_ldc_t *ldcp, void *dpkt) 3395 { 3396 _NOTE(ARGUNUSED(dpkt)) 3397 3398 D1(NULL, "%s (%lld): enter\n", __func__, ldcp->ldc_id); 3399 DERR(NULL, "%s (%lld): currently unsupported", __func__, ldcp->ldc_id); 3400 D1(NULL, "%s (%lld): exit\n", __func__, ldcp->ldc_id); 3401 } 3402 3403 /* 3404 * Process an in-band descriptor message (most likely from 3405 * OBP). 3406 */ 3407 static void 3408 vsw_process_data_ibnd_pkt(vsw_ldc_t *ldcp, void *pkt) 3409 { 3410 vnet_ibnd_desc_t *ibnd_desc; 3411 dring_info_t *dp = NULL; 3412 vsw_private_desc_t *priv_addr = NULL; 3413 vsw_t *vswp = ldcp->ldc_vswp; 3414 mblk_t *mp = NULL; 3415 size_t nbytes = 0; 3416 size_t off = 0; 3417 uint64_t idx = 0; 3418 uint32_t num = 1, len, datalen = 0; 3419 uint64_t ncookies = 0; 3420 int i, rv; 3421 int j = 0; 3422 3423 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3424 3425 ibnd_desc = (vnet_ibnd_desc_t *)pkt; 3426 3427 switch (ibnd_desc->hdr.tag.vio_subtype) { 3428 case VIO_SUBTYPE_INFO: 3429 D1(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3430 3431 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 3432 return; 3433 3434 /* 3435 * Data is padded to align on a 8 byte boundary, 3436 * nbytes is actual data length, i.e. minus that 3437 * padding. 3438 */ 3439 datalen = ibnd_desc->nbytes; 3440 3441 D2(vswp, "%s(%lld): processing inband desc : " 3442 ": datalen 0x%lx", __func__, ldcp->ldc_id, datalen); 3443 3444 ncookies = ibnd_desc->ncookies; 3445 3446 /* 3447 * allocb(9F) returns an aligned data block. We 3448 * need to ensure that we ask ldc for an aligned 3449 * number of bytes also. 3450 */ 3451 nbytes = datalen; 3452 if (nbytes & 0x7) { 3453 off = 8 - (nbytes & 0x7); 3454 nbytes += off; 3455 } 3456 3457 mp = allocb(datalen, BPRI_MED); 3458 if (mp == NULL) { 3459 DERR(vswp, "%s(%lld): allocb failed", 3460 __func__, ldcp->ldc_id); 3461 ldcp->ldc_stats.rx_allocb_fail++; 3462 return; 3463 } 3464 3465 rv = ldc_mem_copy(ldcp->ldc_handle, (caddr_t)mp->b_rptr, 3466 0, &nbytes, ibnd_desc->memcookie, (uint64_t)ncookies, 3467 LDC_COPY_IN); 3468 3469 if (rv != 0) { 3470 DERR(vswp, "%s(%d): unable to copy in data from " 3471 "%d cookie(s)", __func__, ldcp->ldc_id, ncookies); 3472 freemsg(mp); 3473 ldcp->ldc_stats.ierrors++; 3474 return; 3475 } 3476 3477 D2(vswp, "%s(%d): copied in %ld bytes using %d cookies", 3478 __func__, ldcp->ldc_id, nbytes, ncookies); 3479 3480 /* point to the actual end of data */ 3481 mp->b_wptr = mp->b_rptr + datalen; 3482 ldcp->ldc_stats.ipackets++; 3483 ldcp->ldc_stats.rbytes += datalen; 3484 3485 /* 3486 * We ACK back every in-band descriptor message we process 3487 */ 3488 ibnd_desc->hdr.tag.vio_subtype = VIO_SUBTYPE_ACK; 3489 ibnd_desc->hdr.tag.vio_sid = ldcp->local_session; 3490 (void) vsw_send_msg(ldcp, (void *)ibnd_desc, 3491 sizeof (vnet_ibnd_desc_t), B_TRUE); 3492 3493 /* send the packet to be switched */ 3494 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, 3495 ldcp->ldc_port, NULL); 3496 3497 break; 3498 3499 case VIO_SUBTYPE_ACK: 3500 D1(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3501 3502 /* Verify the ACK is valid */ 3503 idx = ibnd_desc->hdr.desc_handle; 3504 3505 if (idx >= VSW_RING_NUM_EL) { 3506 cmn_err(CE_WARN, "!vsw%d: corrupted ACK received " 3507 "(idx %ld)", vswp->instance, idx); 3508 return; 3509 } 3510 3511 if ((dp = ldcp->lane_out.dringp) == NULL) { 3512 DERR(vswp, "%s: no dring found", __func__); 3513 return; 3514 } 3515 3516 len = dp->num_descriptors; 3517 /* 3518 * If the descriptor we are being ACK'ed for is not the 3519 * one we expected, then pkts were lost somwhere, either 3520 * when we tried to send a msg, or a previous ACK msg from 3521 * our peer. In either case we now reclaim the descriptors 3522 * in the range from the last ACK we received up to the 3523 * current ACK. 3524 */ 3525 if (idx != dp->last_ack_recv) { 3526 DWARN(vswp, "%s: dropped pkts detected, (%ld, %ld)", 3527 __func__, dp->last_ack_recv, idx); 3528 num = idx >= dp->last_ack_recv ? 3529 idx - dp->last_ack_recv + 1: 3530 (len - dp->last_ack_recv + 1) + idx; 3531 } 3532 3533 /* 3534 * When we sent the in-band message to our peer we 3535 * marked the copy in our private ring as READY. We now 3536 * check that the descriptor we are being ACK'ed for is in 3537 * fact READY, i.e. it is one we have shared with our peer. 3538 * 3539 * If its not we flag an error, but still reset the descr 3540 * back to FREE. 3541 */ 3542 for (i = dp->last_ack_recv; j < num; i = (i + 1) % len, j++) { 3543 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 3544 mutex_enter(&priv_addr->dstate_lock); 3545 if (priv_addr->dstate != VIO_DESC_READY) { 3546 DERR(vswp, "%s: (%ld) desc at index %ld not " 3547 "READY (0x%lx)", __func__, 3548 ldcp->ldc_id, idx, priv_addr->dstate); 3549 DERR(vswp, "%s: bound %d: ncookies %ld : " 3550 "datalen %ld", __func__, 3551 priv_addr->bound, priv_addr->ncookies, 3552 priv_addr->datalen); 3553 } 3554 D2(vswp, "%s: (%lld) freeing descp at %lld", __func__, 3555 ldcp->ldc_id, idx); 3556 /* release resources associated with sent msg */ 3557 bzero(priv_addr->datap, priv_addr->datalen); 3558 priv_addr->datalen = 0; 3559 priv_addr->dstate = VIO_DESC_FREE; 3560 mutex_exit(&priv_addr->dstate_lock); 3561 } 3562 /* update to next expected value */ 3563 dp->last_ack_recv = (idx + 1) % dp->num_descriptors; 3564 3565 break; 3566 3567 case VIO_SUBTYPE_NACK: 3568 DERR(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3569 3570 /* 3571 * We should only get a NACK if our peer doesn't like 3572 * something about a message we have sent it. If this 3573 * happens we just release the resources associated with 3574 * the message. (We are relying on higher layers to decide 3575 * whether or not to resend. 3576 */ 3577 3578 /* limit check */ 3579 idx = ibnd_desc->hdr.desc_handle; 3580 3581 if (idx >= VSW_RING_NUM_EL) { 3582 DERR(vswp, "%s: corrupted NACK received (idx %lld)", 3583 __func__, idx); 3584 return; 3585 } 3586 3587 if ((dp = ldcp->lane_out.dringp) == NULL) { 3588 DERR(vswp, "%s: no dring found", __func__); 3589 return; 3590 } 3591 3592 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 3593 3594 /* move to correct location in ring */ 3595 priv_addr += idx; 3596 3597 /* release resources associated with sent msg */ 3598 mutex_enter(&priv_addr->dstate_lock); 3599 bzero(priv_addr->datap, priv_addr->datalen); 3600 priv_addr->datalen = 0; 3601 priv_addr->dstate = VIO_DESC_FREE; 3602 mutex_exit(&priv_addr->dstate_lock); 3603 3604 break; 3605 3606 default: 3607 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 3608 ldcp->ldc_id, ibnd_desc->hdr.tag.vio_subtype); 3609 } 3610 3611 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3612 } 3613 3614 static void 3615 vsw_process_err_pkt(vsw_ldc_t *ldcp, void *epkt, vio_msg_tag_t tag) 3616 { 3617 _NOTE(ARGUNUSED(epkt)) 3618 3619 vsw_t *vswp = ldcp->ldc_vswp; 3620 uint16_t env = tag.vio_subtype_env; 3621 3622 D1(vswp, "%s (%lld): enter\n", __func__, ldcp->ldc_id); 3623 3624 /* 3625 * Error vio_subtypes have yet to be defined. So for 3626 * the moment we can't do anything. 3627 */ 3628 D2(vswp, "%s: (%x) vio_subtype env", __func__, env); 3629 3630 D1(vswp, "%s (%lld): exit\n", __func__, ldcp->ldc_id); 3631 } 3632 3633 /* transmit the packet over the given port */ 3634 int 3635 vsw_portsend(vsw_port_t *port, mblk_t *mp, mblk_t *mpt) 3636 { 3637 vsw_ldc_list_t *ldcl = &port->p_ldclist; 3638 vsw_ldc_t *ldcp; 3639 mblk_t *tmp; 3640 int status = 0; 3641 3642 READ_ENTER(&ldcl->lockrw); 3643 /* 3644 * Note for now, we have a single channel. 3645 */ 3646 ldcp = ldcl->head; 3647 if (ldcp == NULL) { 3648 DERR(port->p_vswp, "vsw_portsend: no ldc: dropping packet\n"); 3649 freemsg(mp); 3650 RW_EXIT(&ldcl->lockrw); 3651 return (1); 3652 } 3653 3654 /* 3655 * If the TX thread is enabled, then queue the packets 3656 * and signal the tx thread. 3657 */ 3658 if (ldcp->tx_thread != NULL) { 3659 mutex_enter(&ldcp->tx_thr_lock); 3660 if (ldcp->tx_mhead == NULL) { 3661 ldcp->tx_mhead = mp; 3662 ldcp->tx_mtail = mpt; 3663 cv_signal(&ldcp->tx_thr_cv); 3664 } else { 3665 ldcp->tx_mtail->b_next = mp; 3666 ldcp->tx_mtail = mpt; 3667 } 3668 mutex_exit(&ldcp->tx_thr_lock); 3669 } else { 3670 while (mp != NULL) { 3671 tmp = mp->b_next; 3672 mp->b_next = mp->b_prev = NULL; 3673 (void) vsw_ldcsend(ldcp, mp, 1); 3674 mp = tmp; 3675 } 3676 } 3677 3678 RW_EXIT(&ldcl->lockrw); 3679 3680 return (status); 3681 } 3682 3683 /* 3684 * Transmit the packet over the given LDC channel. 3685 * 3686 * The 'retries' argument indicates how many times a packet 3687 * is retried before it is dropped. Note, the retry is done 3688 * only for a resource related failure, for all other failures 3689 * the packet is dropped immediately. 3690 * 3691 * The 'tx_failure' counter is used as mechanism to track 3692 * continuous failures. Once these failures are more than 3693 * 'vsw_ldc_tx_max_failures' tunable, the packets are tried only 3694 * once and then they are dropped. This is done to avoid 3695 * buffering too many packets. 3696 */ 3697 static int 3698 vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, int retries) 3699 { 3700 int i; 3701 int rc; 3702 int status = 0; 3703 vsw_port_t *port = ldcp->ldc_port; 3704 dring_info_t *dp = NULL; 3705 3706 3707 for (i = 0; i < retries; ) { 3708 /* 3709 * Send the message out using the appropriate 3710 * transmit function which will free mblock when it 3711 * is finished with it. 3712 */ 3713 mutex_enter(&port->tx_lock); 3714 if (port->transmit != NULL) { 3715 status = (*port->transmit)(ldcp, mp); 3716 } 3717 if (status == LDC_TX_SUCCESS) { 3718 ldcp->tx_failures = 0; 3719 mutex_exit(&port->tx_lock); 3720 break; 3721 } else if (ldcp->tx_failures > vsw_ldc_tx_max_failures) { 3722 /* 3723 * If the failures crossed the threshold then 3724 * break here. 3725 */ 3726 ldcp->ldc_stats.oerrors++; 3727 mutex_exit(&port->tx_lock); 3728 break; 3729 } else { 3730 ldcp->tx_failures++; 3731 } 3732 i++; /* increment the counter here */ 3733 3734 /* If its the last retry, then update the oerror */ 3735 if ((i == retries) && (status == LDC_TX_NORESOURCES)) { 3736 ldcp->ldc_stats.oerrors++; 3737 } 3738 mutex_exit(&port->tx_lock); 3739 3740 if (status != LDC_TX_NORESOURCES) { 3741 /* 3742 * No retrying required for errors un-related 3743 * to resources. 3744 */ 3745 break; 3746 } 3747 READ_ENTER(&ldcp->lane_out.dlistrw); 3748 if ((dp = ldcp->lane_out.dringp) == NULL) { 3749 RW_EXIT(&ldcp->lane_out.dlistrw); 3750 break; 3751 } 3752 rc = vsw_reclaim_dring(dp, dp->end_idx); 3753 RW_EXIT(&ldcp->lane_out.dlistrw); 3754 3755 /* 3756 * Delay only if none were reclaimed 3757 * and its not the last retry. 3758 */ 3759 if ((rc == 0) && (i < retries)) { 3760 delay(drv_usectohz(vsw_ldc_tx_delay)); 3761 } 3762 } 3763 freemsg(mp); 3764 return (status); 3765 } 3766 3767 /* 3768 * Send packet out via descriptor ring to a logical device. 3769 */ 3770 static int 3771 vsw_dringsend(vsw_ldc_t *ldcp, mblk_t *mp) 3772 { 3773 vio_dring_msg_t dring_pkt; 3774 dring_info_t *dp = NULL; 3775 vsw_private_desc_t *priv_desc = NULL; 3776 vnet_public_desc_t *pub = NULL; 3777 vsw_t *vswp = ldcp->ldc_vswp; 3778 mblk_t *bp; 3779 size_t n, size; 3780 caddr_t bufp; 3781 int idx; 3782 int status = LDC_TX_SUCCESS; 3783 struct ether_header *ehp = (struct ether_header *)mp->b_rptr; 3784 3785 D1(vswp, "%s(%lld): enter\n", __func__, ldcp->ldc_id); 3786 3787 /* TODO: make test a macro */ 3788 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 3789 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 3790 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 3791 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 3792 ldcp->lane_out.lstate); 3793 ldcp->ldc_stats.oerrors++; 3794 return (LDC_TX_FAILURE); 3795 } 3796 3797 /* 3798 * Note - using first ring only, this may change 3799 * in the future. 3800 */ 3801 READ_ENTER(&ldcp->lane_out.dlistrw); 3802 if ((dp = ldcp->lane_out.dringp) == NULL) { 3803 RW_EXIT(&ldcp->lane_out.dlistrw); 3804 DERR(vswp, "%s(%lld): no dring for outbound lane on" 3805 " channel %d", __func__, ldcp->ldc_id, ldcp->ldc_id); 3806 ldcp->ldc_stats.oerrors++; 3807 return (LDC_TX_FAILURE); 3808 } 3809 3810 size = msgsize(mp); 3811 if (size > (size_t)ETHERMAX) { 3812 RW_EXIT(&ldcp->lane_out.dlistrw); 3813 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 3814 ldcp->ldc_id, size); 3815 ldcp->ldc_stats.oerrors++; 3816 return (LDC_TX_FAILURE); 3817 } 3818 3819 /* 3820 * Find a free descriptor 3821 * 3822 * Note: for the moment we are assuming that we will only 3823 * have one dring going from the switch to each of its 3824 * peers. This may change in the future. 3825 */ 3826 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 3827 D2(vswp, "%s(%lld): no descriptor available for ring " 3828 "at 0x%llx", __func__, ldcp->ldc_id, dp); 3829 3830 /* nothing more we can do */ 3831 status = LDC_TX_NORESOURCES; 3832 ldcp->ldc_stats.tx_no_desc++; 3833 goto vsw_dringsend_free_exit; 3834 } else { 3835 D2(vswp, "%s(%lld): free private descriptor found at pos %ld " 3836 "addr 0x%llx\n", __func__, ldcp->ldc_id, idx, priv_desc); 3837 } 3838 3839 /* copy data into the descriptor */ 3840 bufp = priv_desc->datap; 3841 bufp += VNET_IPALIGN; 3842 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 3843 n = MBLKL(bp); 3844 bcopy(bp->b_rptr, bufp, n); 3845 bufp += n; 3846 } 3847 3848 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 3849 3850 pub = priv_desc->descp; 3851 pub->nbytes = priv_desc->datalen; 3852 3853 /* update statistics */ 3854 if (IS_BROADCAST(ehp)) 3855 ldcp->ldc_stats.brdcstxmt++; 3856 else if (IS_MULTICAST(ehp)) 3857 ldcp->ldc_stats.multixmt++; 3858 ldcp->ldc_stats.opackets++; 3859 ldcp->ldc_stats.obytes += priv_desc->datalen; 3860 3861 mutex_enter(&priv_desc->dstate_lock); 3862 pub->hdr.dstate = VIO_DESC_READY; 3863 mutex_exit(&priv_desc->dstate_lock); 3864 3865 /* 3866 * Determine whether or not we need to send a message to our 3867 * peer prompting them to read our newly updated descriptor(s). 3868 */ 3869 mutex_enter(&dp->restart_lock); 3870 if (dp->restart_reqd) { 3871 dp->restart_reqd = B_FALSE; 3872 ldcp->ldc_stats.dring_data_msgs++; 3873 mutex_exit(&dp->restart_lock); 3874 3875 /* 3876 * Send a vio_dring_msg to peer to prompt them to read 3877 * the updated descriptor ring. 3878 */ 3879 dring_pkt.tag.vio_msgtype = VIO_TYPE_DATA; 3880 dring_pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 3881 dring_pkt.tag.vio_subtype_env = VIO_DRING_DATA; 3882 dring_pkt.tag.vio_sid = ldcp->local_session; 3883 3884 /* Note - for now using first ring */ 3885 dring_pkt.dring_ident = dp->ident; 3886 dring_pkt.seq_num = atomic_inc_64_nv(&ldcp->lane_out.seq_num); 3887 3888 /* 3889 * If last_ack_recv is -1 then we know we've not 3890 * received any ack's yet, so this must be the first 3891 * msg sent, so set the start to the begining of the ring. 3892 */ 3893 mutex_enter(&dp->dlock); 3894 if (dp->last_ack_recv == -1) { 3895 dring_pkt.start_idx = 0; 3896 } else { 3897 dring_pkt.start_idx = 3898 (dp->last_ack_recv + 1) % dp->num_descriptors; 3899 } 3900 dring_pkt.end_idx = -1; 3901 mutex_exit(&dp->dlock); 3902 3903 D3(vswp, "%s(%lld): dring 0x%llx : ident 0x%llx\n", __func__, 3904 ldcp->ldc_id, dp, dring_pkt.dring_ident); 3905 D3(vswp, "%s(%lld): start %lld : end %lld : seq %lld\n", 3906 __func__, ldcp->ldc_id, dring_pkt.start_idx, 3907 dring_pkt.end_idx, dring_pkt.seq_num); 3908 3909 RW_EXIT(&ldcp->lane_out.dlistrw); 3910 3911 (void) vsw_send_msg(ldcp, (void *)&dring_pkt, 3912 sizeof (vio_dring_msg_t), B_TRUE); 3913 3914 return (status); 3915 3916 } else { 3917 mutex_exit(&dp->restart_lock); 3918 D2(vswp, "%s(%lld): updating descp %d", __func__, 3919 ldcp->ldc_id, idx); 3920 } 3921 3922 vsw_dringsend_free_exit: 3923 3924 RW_EXIT(&ldcp->lane_out.dlistrw); 3925 3926 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 3927 return (status); 3928 } 3929 3930 /* 3931 * Send an in-band descriptor message over ldc. 3932 */ 3933 static int 3934 vsw_descrsend(vsw_ldc_t *ldcp, mblk_t *mp) 3935 { 3936 vsw_t *vswp = ldcp->ldc_vswp; 3937 vnet_ibnd_desc_t ibnd_msg; 3938 vsw_private_desc_t *priv_desc = NULL; 3939 dring_info_t *dp = NULL; 3940 size_t n, size = 0; 3941 caddr_t bufp; 3942 mblk_t *bp; 3943 int idx, i; 3944 int status = LDC_TX_SUCCESS; 3945 static int warn_msg = 1; 3946 3947 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3948 3949 ASSERT(mp != NULL); 3950 3951 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 3952 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 3953 DERR(vswp, "%s(%lld) status(%d) state (0x%llx), dropping pkt", 3954 __func__, ldcp->ldc_id, ldcp->ldc_status, 3955 ldcp->lane_out.lstate); 3956 ldcp->ldc_stats.oerrors++; 3957 return (LDC_TX_FAILURE); 3958 } 3959 3960 /* 3961 * only expect single dring to exist, which we use 3962 * as an internal buffer, rather than a transfer channel. 3963 */ 3964 READ_ENTER(&ldcp->lane_out.dlistrw); 3965 if ((dp = ldcp->lane_out.dringp) == NULL) { 3966 DERR(vswp, "%s(%lld): no dring for outbound lane", 3967 __func__, ldcp->ldc_id); 3968 DERR(vswp, "%s(%lld) status(%d) state (0x%llx)", __func__, 3969 ldcp->ldc_id, ldcp->ldc_status, ldcp->lane_out.lstate); 3970 RW_EXIT(&ldcp->lane_out.dlistrw); 3971 ldcp->ldc_stats.oerrors++; 3972 return (LDC_TX_FAILURE); 3973 } 3974 3975 size = msgsize(mp); 3976 if (size > (size_t)ETHERMAX) { 3977 RW_EXIT(&ldcp->lane_out.dlistrw); 3978 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 3979 ldcp->ldc_id, size); 3980 ldcp->ldc_stats.oerrors++; 3981 return (LDC_TX_FAILURE); 3982 } 3983 3984 /* 3985 * Find a free descriptor in our buffer ring 3986 */ 3987 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 3988 RW_EXIT(&ldcp->lane_out.dlistrw); 3989 if (warn_msg) { 3990 DERR(vswp, "%s(%lld): no descriptor available for ring " 3991 "at 0x%llx", __func__, ldcp->ldc_id, dp); 3992 warn_msg = 0; 3993 } 3994 3995 /* nothing more we can do */ 3996 status = LDC_TX_NORESOURCES; 3997 goto vsw_descrsend_free_exit; 3998 } else { 3999 D2(vswp, "%s(%lld): free private descriptor found at pos " 4000 "%ld addr 0x%x\n", __func__, ldcp->ldc_id, idx, priv_desc); 4001 warn_msg = 1; 4002 } 4003 4004 /* copy data into the descriptor */ 4005 bufp = priv_desc->datap; 4006 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 4007 n = MBLKL(bp); 4008 bcopy(bp->b_rptr, bufp, n); 4009 bufp += n; 4010 } 4011 4012 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 4013 4014 /* create and send the in-band descp msg */ 4015 ibnd_msg.hdr.tag.vio_msgtype = VIO_TYPE_DATA; 4016 ibnd_msg.hdr.tag.vio_subtype = VIO_SUBTYPE_INFO; 4017 ibnd_msg.hdr.tag.vio_subtype_env = VIO_DESC_DATA; 4018 ibnd_msg.hdr.tag.vio_sid = ldcp->local_session; 4019 4020 ibnd_msg.hdr.seq_num = atomic_inc_64_nv(&ldcp->lane_out.seq_num); 4021 4022 /* 4023 * Copy the mem cookies describing the data from the 4024 * private region of the descriptor ring into the inband 4025 * descriptor. 4026 */ 4027 for (i = 0; i < priv_desc->ncookies; i++) { 4028 bcopy(&priv_desc->memcookie[i], &ibnd_msg.memcookie[i], 4029 sizeof (ldc_mem_cookie_t)); 4030 } 4031 4032 ibnd_msg.hdr.desc_handle = idx; 4033 ibnd_msg.ncookies = priv_desc->ncookies; 4034 ibnd_msg.nbytes = size; 4035 4036 ldcp->ldc_stats.opackets++; 4037 ldcp->ldc_stats.obytes += size; 4038 4039 RW_EXIT(&ldcp->lane_out.dlistrw); 4040 4041 (void) vsw_send_msg(ldcp, (void *)&ibnd_msg, 4042 sizeof (vnet_ibnd_desc_t), B_TRUE); 4043 4044 vsw_descrsend_free_exit: 4045 4046 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4047 return (status); 4048 } 4049 4050 static void 4051 vsw_send_ver(void *arg) 4052 { 4053 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4054 vsw_t *vswp = ldcp->ldc_vswp; 4055 lane_t *lp = &ldcp->lane_out; 4056 vio_ver_msg_t ver_msg; 4057 4058 D1(vswp, "%s enter", __func__); 4059 4060 ver_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4061 ver_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4062 ver_msg.tag.vio_subtype_env = VIO_VER_INFO; 4063 ver_msg.tag.vio_sid = ldcp->local_session; 4064 4065 ver_msg.ver_major = vsw_versions[0].ver_major; 4066 ver_msg.ver_minor = vsw_versions[0].ver_minor; 4067 ver_msg.dev_class = VDEV_NETWORK_SWITCH; 4068 4069 lp->lstate |= VSW_VER_INFO_SENT; 4070 lp->ver_major = ver_msg.ver_major; 4071 lp->ver_minor = ver_msg.ver_minor; 4072 4073 DUMP_TAG(ver_msg.tag); 4074 4075 (void) vsw_send_msg(ldcp, &ver_msg, sizeof (vio_ver_msg_t), B_TRUE); 4076 4077 D1(vswp, "%s (%d): exit", __func__, ldcp->ldc_id); 4078 } 4079 4080 static void 4081 vsw_send_attr(vsw_ldc_t *ldcp) 4082 { 4083 vsw_t *vswp = ldcp->ldc_vswp; 4084 lane_t *lp = &ldcp->lane_out; 4085 vnet_attr_msg_t attr_msg; 4086 4087 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 4088 4089 /* 4090 * Subtype is set to INFO by default 4091 */ 4092 attr_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4093 attr_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4094 attr_msg.tag.vio_subtype_env = VIO_ATTR_INFO; 4095 attr_msg.tag.vio_sid = ldcp->local_session; 4096 4097 /* payload copied from default settings for lane */ 4098 attr_msg.mtu = lp->mtu; 4099 attr_msg.addr_type = lp->addr_type; 4100 attr_msg.xfer_mode = lp->xfer_mode; 4101 attr_msg.ack_freq = lp->xfer_mode; 4102 4103 READ_ENTER(&vswp->if_lockrw); 4104 bcopy(&(vswp->if_addr), &(attr_msg.addr), ETHERADDRL); 4105 RW_EXIT(&vswp->if_lockrw); 4106 4107 ldcp->lane_out.lstate |= VSW_ATTR_INFO_SENT; 4108 4109 DUMP_TAG(attr_msg.tag); 4110 4111 (void) vsw_send_msg(ldcp, &attr_msg, sizeof (vnet_attr_msg_t), B_TRUE); 4112 4113 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 4114 } 4115 4116 /* 4117 * Create dring info msg (which also results in the creation of 4118 * a dring). 4119 */ 4120 static vio_dring_reg_msg_t * 4121 vsw_create_dring_info_pkt(vsw_ldc_t *ldcp) 4122 { 4123 vio_dring_reg_msg_t *mp; 4124 dring_info_t *dp; 4125 vsw_t *vswp = ldcp->ldc_vswp; 4126 4127 D1(vswp, "vsw_create_dring_info_pkt enter\n"); 4128 4129 /* 4130 * If we can't create a dring, obviously no point sending 4131 * a message. 4132 */ 4133 if ((dp = vsw_create_dring(ldcp)) == NULL) 4134 return (NULL); 4135 4136 mp = kmem_zalloc(sizeof (vio_dring_reg_msg_t), KM_SLEEP); 4137 4138 mp->tag.vio_msgtype = VIO_TYPE_CTRL; 4139 mp->tag.vio_subtype = VIO_SUBTYPE_INFO; 4140 mp->tag.vio_subtype_env = VIO_DRING_REG; 4141 mp->tag.vio_sid = ldcp->local_session; 4142 4143 /* payload */ 4144 mp->num_descriptors = dp->num_descriptors; 4145 mp->descriptor_size = dp->descriptor_size; 4146 mp->options = dp->options; 4147 mp->ncookies = dp->ncookies; 4148 bcopy(&dp->cookie[0], &mp->cookie[0], sizeof (ldc_mem_cookie_t)); 4149 4150 mp->dring_ident = 0; 4151 4152 D1(vswp, "vsw_create_dring_info_pkt exit\n"); 4153 4154 return (mp); 4155 } 4156 4157 static void 4158 vsw_send_dring_info(vsw_ldc_t *ldcp) 4159 { 4160 vio_dring_reg_msg_t *dring_msg; 4161 vsw_t *vswp = ldcp->ldc_vswp; 4162 4163 D1(vswp, "%s: (%ld) enter", __func__, ldcp->ldc_id); 4164 4165 dring_msg = vsw_create_dring_info_pkt(ldcp); 4166 if (dring_msg == NULL) { 4167 cmn_err(CE_WARN, "!vsw%d: %s: error creating msg", 4168 vswp->instance, __func__); 4169 return; 4170 } 4171 4172 ldcp->lane_out.lstate |= VSW_DRING_INFO_SENT; 4173 4174 DUMP_TAG_PTR((vio_msg_tag_t *)dring_msg); 4175 4176 (void) vsw_send_msg(ldcp, dring_msg, 4177 sizeof (vio_dring_reg_msg_t), B_TRUE); 4178 4179 kmem_free(dring_msg, sizeof (vio_dring_reg_msg_t)); 4180 4181 D1(vswp, "%s: (%ld) exit", __func__, ldcp->ldc_id); 4182 } 4183 4184 static void 4185 vsw_send_rdx(vsw_ldc_t *ldcp) 4186 { 4187 vsw_t *vswp = ldcp->ldc_vswp; 4188 vio_rdx_msg_t rdx_msg; 4189 4190 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 4191 4192 rdx_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4193 rdx_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4194 rdx_msg.tag.vio_subtype_env = VIO_RDX; 4195 rdx_msg.tag.vio_sid = ldcp->local_session; 4196 4197 ldcp->lane_in.lstate |= VSW_RDX_INFO_SENT; 4198 4199 DUMP_TAG(rdx_msg.tag); 4200 4201 (void) vsw_send_msg(ldcp, &rdx_msg, sizeof (vio_rdx_msg_t), B_TRUE); 4202 4203 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 4204 } 4205 4206 /* 4207 * Generic routine to send message out over ldc channel. 4208 * 4209 * It is possible that when we attempt to write over the ldc channel 4210 * that we get notified that it has been reset. Depending on the value 4211 * of the handle_reset flag we either handle that event here or simply 4212 * notify the caller that the channel was reset. 4213 */ 4214 static int 4215 vsw_send_msg(vsw_ldc_t *ldcp, void *msgp, int size, boolean_t handle_reset) 4216 { 4217 int rv; 4218 size_t msglen = size; 4219 vio_msg_tag_t *tag = (vio_msg_tag_t *)msgp; 4220 vsw_t *vswp = ldcp->ldc_vswp; 4221 4222 D1(vswp, "vsw_send_msg (%lld) enter : sending %d bytes", 4223 ldcp->ldc_id, size); 4224 4225 D2(vswp, "send_msg: type 0x%llx", tag->vio_msgtype); 4226 D2(vswp, "send_msg: stype 0x%llx", tag->vio_subtype); 4227 D2(vswp, "send_msg: senv 0x%llx", tag->vio_subtype_env); 4228 4229 mutex_enter(&ldcp->ldc_txlock); 4230 do { 4231 msglen = size; 4232 rv = ldc_write(ldcp->ldc_handle, (caddr_t)msgp, &msglen); 4233 } while (rv == EWOULDBLOCK && --vsw_wretries > 0); 4234 4235 if ((rv != 0) || (msglen != size)) { 4236 DERR(vswp, "vsw_send_msg:ldc_write failed: chan(%lld) rv(%d) " 4237 "size (%d) msglen(%d)\n", ldcp->ldc_id, rv, size, msglen); 4238 ldcp->ldc_stats.oerrors++; 4239 } 4240 mutex_exit(&ldcp->ldc_txlock); 4241 4242 /* 4243 * If channel has been reset we either handle it here or 4244 * simply report back that it has been reset and let caller 4245 * decide what to do. 4246 */ 4247 if (rv == ECONNRESET) { 4248 DWARN(vswp, "%s (%lld) channel reset", __func__, ldcp->ldc_id); 4249 4250 /* 4251 * N.B - must never be holding the dlistrw lock when 4252 * we do a reset of the channel. 4253 */ 4254 if (handle_reset) { 4255 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 4256 } 4257 } 4258 4259 return (rv); 4260 } 4261 4262 /* 4263 * Remove the specified address from the list of address maintained 4264 * in this port node. 4265 */ 4266 mcst_addr_t * 4267 vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr) 4268 { 4269 vsw_t *vswp = NULL; 4270 vsw_port_t *port = NULL; 4271 mcst_addr_t *prev_p = NULL; 4272 mcst_addr_t *curr_p = NULL; 4273 4274 D1(NULL, "%s: enter : devtype %d : addr 0x%llx", 4275 __func__, devtype, addr); 4276 4277 if (devtype == VSW_VNETPORT) { 4278 port = (vsw_port_t *)arg; 4279 mutex_enter(&port->mca_lock); 4280 prev_p = curr_p = port->mcap; 4281 } else { 4282 vswp = (vsw_t *)arg; 4283 mutex_enter(&vswp->mca_lock); 4284 prev_p = curr_p = vswp->mcap; 4285 } 4286 4287 while (curr_p != NULL) { 4288 if (curr_p->addr == addr) { 4289 D2(NULL, "%s: address found", __func__); 4290 /* match found */ 4291 if (prev_p == curr_p) { 4292 /* list head */ 4293 if (devtype == VSW_VNETPORT) 4294 port->mcap = curr_p->nextp; 4295 else 4296 vswp->mcap = curr_p->nextp; 4297 } else { 4298 prev_p->nextp = curr_p->nextp; 4299 } 4300 break; 4301 } else { 4302 prev_p = curr_p; 4303 curr_p = curr_p->nextp; 4304 } 4305 } 4306 4307 if (devtype == VSW_VNETPORT) 4308 mutex_exit(&port->mca_lock); 4309 else 4310 mutex_exit(&vswp->mca_lock); 4311 4312 D1(NULL, "%s: exit", __func__); 4313 4314 return (curr_p); 4315 } 4316 4317 /* 4318 * Creates a descriptor ring (dring) and links it into the 4319 * link of outbound drings for this channel. 4320 * 4321 * Returns NULL if creation failed. 4322 */ 4323 static dring_info_t * 4324 vsw_create_dring(vsw_ldc_t *ldcp) 4325 { 4326 vsw_private_desc_t *priv_addr = NULL; 4327 vsw_t *vswp = ldcp->ldc_vswp; 4328 ldc_mem_info_t minfo; 4329 dring_info_t *dp, *tp; 4330 int i; 4331 4332 dp = (dring_info_t *)kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 4333 4334 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 4335 4336 /* create public section of ring */ 4337 if ((ldc_mem_dring_create(VSW_RING_NUM_EL, 4338 VSW_PUB_SIZE, &dp->handle)) != 0) { 4339 4340 DERR(vswp, "vsw_create_dring(%lld): ldc dring create " 4341 "failed", ldcp->ldc_id); 4342 goto create_fail_exit; 4343 } 4344 4345 ASSERT(dp->handle != NULL); 4346 4347 /* 4348 * Get the base address of the public section of the ring. 4349 */ 4350 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 4351 DERR(vswp, "vsw_create_dring(%lld): dring info failed\n", 4352 ldcp->ldc_id); 4353 goto dring_fail_exit; 4354 } else { 4355 ASSERT(minfo.vaddr != 0); 4356 dp->pub_addr = minfo.vaddr; 4357 } 4358 4359 dp->num_descriptors = VSW_RING_NUM_EL; 4360 dp->descriptor_size = VSW_PUB_SIZE; 4361 dp->options = VIO_TX_DRING; 4362 dp->ncookies = 1; /* guaranteed by ldc */ 4363 4364 /* 4365 * create private portion of ring 4366 */ 4367 dp->priv_addr = (vsw_private_desc_t *)kmem_zalloc( 4368 (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL), KM_SLEEP); 4369 4370 if (vsw_setup_ring(ldcp, dp)) { 4371 DERR(vswp, "%s: unable to setup ring", __func__); 4372 goto dring_fail_exit; 4373 } 4374 4375 /* haven't used any descriptors yet */ 4376 dp->end_idx = 0; 4377 dp->last_ack_recv = -1; 4378 4379 /* bind dring to the channel */ 4380 if ((ldc_mem_dring_bind(ldcp->ldc_handle, dp->handle, 4381 LDC_SHADOW_MAP, LDC_MEM_RW, 4382 &dp->cookie[0], &dp->ncookies)) != 0) { 4383 DERR(vswp, "vsw_create_dring: unable to bind to channel " 4384 "%lld", ldcp->ldc_id); 4385 goto dring_fail_exit; 4386 } 4387 4388 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 4389 dp->restart_reqd = B_TRUE; 4390 4391 /* 4392 * Only ever create rings for outgoing lane. Link it onto 4393 * end of list. 4394 */ 4395 WRITE_ENTER(&ldcp->lane_out.dlistrw); 4396 if (ldcp->lane_out.dringp == NULL) { 4397 D2(vswp, "vsw_create_dring: adding first outbound ring"); 4398 ldcp->lane_out.dringp = dp; 4399 } else { 4400 tp = ldcp->lane_out.dringp; 4401 while (tp->next != NULL) 4402 tp = tp->next; 4403 4404 tp->next = dp; 4405 } 4406 RW_EXIT(&ldcp->lane_out.dlistrw); 4407 4408 return (dp); 4409 4410 dring_fail_exit: 4411 (void) ldc_mem_dring_destroy(dp->handle); 4412 4413 create_fail_exit: 4414 if (dp->priv_addr != NULL) { 4415 priv_addr = dp->priv_addr; 4416 for (i = 0; i < VSW_RING_NUM_EL; i++) { 4417 if (priv_addr->memhandle != NULL) 4418 (void) ldc_mem_free_handle( 4419 priv_addr->memhandle); 4420 priv_addr++; 4421 } 4422 kmem_free(dp->priv_addr, 4423 (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL)); 4424 } 4425 mutex_destroy(&dp->dlock); 4426 4427 kmem_free(dp, sizeof (dring_info_t)); 4428 return (NULL); 4429 } 4430 4431 /* 4432 * Create a ring consisting of just a private portion and link 4433 * it into the list of rings for the outbound lane. 4434 * 4435 * These type of rings are used primarily for temporary data 4436 * storage (i.e. as data buffers). 4437 */ 4438 void 4439 vsw_create_privring(vsw_ldc_t *ldcp) 4440 { 4441 dring_info_t *dp, *tp; 4442 vsw_t *vswp = ldcp->ldc_vswp; 4443 4444 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4445 4446 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 4447 4448 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 4449 4450 /* no public section */ 4451 dp->pub_addr = NULL; 4452 4453 dp->priv_addr = kmem_zalloc( 4454 (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL), KM_SLEEP); 4455 4456 dp->num_descriptors = VSW_RING_NUM_EL; 4457 4458 if (vsw_setup_ring(ldcp, dp)) { 4459 DERR(vswp, "%s: setup of ring failed", __func__); 4460 kmem_free(dp->priv_addr, 4461 (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL)); 4462 mutex_destroy(&dp->dlock); 4463 kmem_free(dp, sizeof (dring_info_t)); 4464 return; 4465 } 4466 4467 /* haven't used any descriptors yet */ 4468 dp->end_idx = 0; 4469 4470 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 4471 dp->restart_reqd = B_TRUE; 4472 4473 /* 4474 * Only ever create rings for outgoing lane. Link it onto 4475 * end of list. 4476 */ 4477 WRITE_ENTER(&ldcp->lane_out.dlistrw); 4478 if (ldcp->lane_out.dringp == NULL) { 4479 D2(vswp, "%s: adding first outbound privring", __func__); 4480 ldcp->lane_out.dringp = dp; 4481 } else { 4482 tp = ldcp->lane_out.dringp; 4483 while (tp->next != NULL) 4484 tp = tp->next; 4485 4486 tp->next = dp; 4487 } 4488 RW_EXIT(&ldcp->lane_out.dlistrw); 4489 4490 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4491 } 4492 4493 /* 4494 * Setup the descriptors in the dring. Returns 0 on success, 1 on 4495 * failure. 4496 */ 4497 int 4498 vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp) 4499 { 4500 vnet_public_desc_t *pub_addr = NULL; 4501 vsw_private_desc_t *priv_addr = NULL; 4502 vsw_t *vswp = ldcp->ldc_vswp; 4503 uint64_t *tmpp; 4504 uint64_t offset = 0; 4505 uint32_t ncookies = 0; 4506 static char *name = "vsw_setup_ring"; 4507 int i, j, nc, rv; 4508 4509 priv_addr = dp->priv_addr; 4510 pub_addr = dp->pub_addr; 4511 4512 /* public section may be null but private should never be */ 4513 ASSERT(priv_addr != NULL); 4514 4515 /* 4516 * Allocate the region of memory which will be used to hold 4517 * the data the descriptors will refer to. 4518 */ 4519 dp->data_sz = (VSW_RING_NUM_EL * VSW_RING_EL_DATA_SZ); 4520 dp->data_addr = kmem_alloc(dp->data_sz, KM_SLEEP); 4521 4522 D2(vswp, "%s: allocated %lld bytes at 0x%llx\n", name, 4523 dp->data_sz, dp->data_addr); 4524 4525 tmpp = (uint64_t *)dp->data_addr; 4526 offset = VSW_RING_EL_DATA_SZ / sizeof (tmpp); 4527 4528 /* 4529 * Initialise some of the private and public (if they exist) 4530 * descriptor fields. 4531 */ 4532 for (i = 0; i < VSW_RING_NUM_EL; i++) { 4533 mutex_init(&priv_addr->dstate_lock, NULL, MUTEX_DRIVER, NULL); 4534 4535 if ((ldc_mem_alloc_handle(ldcp->ldc_handle, 4536 &priv_addr->memhandle)) != 0) { 4537 DERR(vswp, "%s: alloc mem handle failed", name); 4538 goto setup_ring_cleanup; 4539 } 4540 4541 priv_addr->datap = (void *)tmpp; 4542 4543 rv = ldc_mem_bind_handle(priv_addr->memhandle, 4544 (caddr_t)priv_addr->datap, VSW_RING_EL_DATA_SZ, 4545 LDC_SHADOW_MAP, LDC_MEM_R|LDC_MEM_W, 4546 &(priv_addr->memcookie[0]), &ncookies); 4547 if (rv != 0) { 4548 DERR(vswp, "%s(%lld): ldc_mem_bind_handle failed " 4549 "(rv %d)", name, ldcp->ldc_id, rv); 4550 goto setup_ring_cleanup; 4551 } 4552 priv_addr->bound = 1; 4553 4554 D2(vswp, "%s: %d: memcookie 0 : addr 0x%llx : size 0x%llx", 4555 name, i, priv_addr->memcookie[0].addr, 4556 priv_addr->memcookie[0].size); 4557 4558 if (ncookies >= (uint32_t)(VSW_MAX_COOKIES + 1)) { 4559 DERR(vswp, "%s(%lld) ldc_mem_bind_handle returned " 4560 "invalid num of cookies (%d) for size 0x%llx", 4561 name, ldcp->ldc_id, ncookies, VSW_RING_EL_DATA_SZ); 4562 4563 goto setup_ring_cleanup; 4564 } else { 4565 for (j = 1; j < ncookies; j++) { 4566 rv = ldc_mem_nextcookie(priv_addr->memhandle, 4567 &(priv_addr->memcookie[j])); 4568 if (rv != 0) { 4569 DERR(vswp, "%s: ldc_mem_nextcookie " 4570 "failed rv (%d)", name, rv); 4571 goto setup_ring_cleanup; 4572 } 4573 D3(vswp, "%s: memcookie %d : addr 0x%llx : " 4574 "size 0x%llx", name, j, 4575 priv_addr->memcookie[j].addr, 4576 priv_addr->memcookie[j].size); 4577 } 4578 4579 } 4580 priv_addr->ncookies = ncookies; 4581 priv_addr->dstate = VIO_DESC_FREE; 4582 4583 if (pub_addr != NULL) { 4584 4585 /* link pub and private sides */ 4586 priv_addr->descp = pub_addr; 4587 4588 pub_addr->ncookies = priv_addr->ncookies; 4589 4590 for (nc = 0; nc < pub_addr->ncookies; nc++) { 4591 bcopy(&priv_addr->memcookie[nc], 4592 &pub_addr->memcookie[nc], 4593 sizeof (ldc_mem_cookie_t)); 4594 } 4595 4596 pub_addr->hdr.dstate = VIO_DESC_FREE; 4597 pub_addr++; 4598 } 4599 4600 /* 4601 * move to next element in the dring and the next 4602 * position in the data buffer. 4603 */ 4604 priv_addr++; 4605 tmpp += offset; 4606 } 4607 4608 return (0); 4609 4610 setup_ring_cleanup: 4611 priv_addr = dp->priv_addr; 4612 4613 for (j = 0; j < i; j++) { 4614 (void) ldc_mem_unbind_handle(priv_addr->memhandle); 4615 (void) ldc_mem_free_handle(priv_addr->memhandle); 4616 4617 mutex_destroy(&priv_addr->dstate_lock); 4618 4619 priv_addr++; 4620 } 4621 kmem_free(dp->data_addr, dp->data_sz); 4622 4623 return (1); 4624 } 4625 4626 /* 4627 * Searches the private section of a ring for a free descriptor, 4628 * starting at the location of the last free descriptor found 4629 * previously. 4630 * 4631 * Returns 0 if free descriptor is available, and updates state 4632 * of private descriptor to VIO_DESC_READY, otherwise returns 1. 4633 * 4634 * FUTURE: might need to return contiguous range of descriptors 4635 * as dring info msg assumes all will be contiguous. 4636 */ 4637 static int 4638 vsw_dring_find_free_desc(dring_info_t *dringp, 4639 vsw_private_desc_t **priv_p, int *idx) 4640 { 4641 vsw_private_desc_t *addr = NULL; 4642 int num = VSW_RING_NUM_EL; 4643 int ret = 1; 4644 4645 D1(NULL, "%s enter\n", __func__); 4646 4647 ASSERT(dringp->priv_addr != NULL); 4648 4649 D2(NULL, "%s: searching ring, dringp 0x%llx : start pos %lld", 4650 __func__, dringp, dringp->end_idx); 4651 4652 addr = (vsw_private_desc_t *)dringp->priv_addr + dringp->end_idx; 4653 4654 mutex_enter(&addr->dstate_lock); 4655 if (addr->dstate == VIO_DESC_FREE) { 4656 addr->dstate = VIO_DESC_READY; 4657 *priv_p = addr; 4658 *idx = dringp->end_idx; 4659 dringp->end_idx = (dringp->end_idx + 1) % num; 4660 ret = 0; 4661 4662 } 4663 mutex_exit(&addr->dstate_lock); 4664 4665 /* ring full */ 4666 if (ret == 1) { 4667 D2(NULL, "%s: no desp free: started at %d", __func__, 4668 dringp->end_idx); 4669 } 4670 4671 D1(NULL, "%s: exit\n", __func__); 4672 4673 return (ret); 4674 } 4675 4676 /* 4677 * Map from a dring identifier to the ring itself. Returns 4678 * pointer to ring or NULL if no match found. 4679 * 4680 * Should be called with dlistrw rwlock held as reader. 4681 */ 4682 static dring_info_t * 4683 vsw_ident2dring(lane_t *lane, uint64_t ident) 4684 { 4685 dring_info_t *dp = NULL; 4686 4687 if ((dp = lane->dringp) == NULL) { 4688 return (NULL); 4689 } else { 4690 if (dp->ident == ident) 4691 return (dp); 4692 4693 while (dp != NULL) { 4694 if (dp->ident == ident) 4695 break; 4696 dp = dp->next; 4697 } 4698 } 4699 4700 return (dp); 4701 } 4702 4703 /* 4704 * Set the default lane attributes. These are copied into 4705 * the attr msg we send to our peer. If they are not acceptable 4706 * then (currently) the handshake ends. 4707 */ 4708 static void 4709 vsw_set_lane_attr(vsw_t *vswp, lane_t *lp) 4710 { 4711 bzero(lp, sizeof (lane_t)); 4712 4713 READ_ENTER(&vswp->if_lockrw); 4714 ether_copy(&(vswp->if_addr), &(lp->addr)); 4715 RW_EXIT(&vswp->if_lockrw); 4716 4717 lp->mtu = VSW_MTU; 4718 lp->addr_type = ADDR_TYPE_MAC; 4719 lp->xfer_mode = VIO_DRING_MODE; 4720 lp->ack_freq = 0; /* for shared mode */ 4721 4722 /* 4723 * As the seq_num is incremented before sending, 4724 * initialize it with VNET_ISS - 1. 4725 */ 4726 atomic_swap_64(&lp->seq_num, (VNET_ISS - 1)); 4727 } 4728 4729 /* 4730 * Verify that the attributes are acceptable. 4731 * 4732 * FUTURE: If some attributes are not acceptable, change them 4733 * our desired values. 4734 */ 4735 static int 4736 vsw_check_attr(vnet_attr_msg_t *pkt, vsw_port_t *port) 4737 { 4738 int ret = 0; 4739 4740 D1(NULL, "vsw_check_attr enter\n"); 4741 4742 /* 4743 * Note we currently only support in-band descriptors 4744 * and descriptor rings, not packet based transfer (VIO_PKT_MODE) 4745 */ 4746 if ((pkt->xfer_mode != VIO_DESC_MODE) && 4747 (pkt->xfer_mode != VIO_DRING_MODE)) { 4748 D2(NULL, "vsw_check_attr: unknown mode %x\n", pkt->xfer_mode); 4749 ret = 1; 4750 } 4751 4752 /* Only support MAC addresses at moment. */ 4753 if ((pkt->addr_type != ADDR_TYPE_MAC) || (pkt->addr == 0)) { 4754 D2(NULL, "vsw_check_attr: invalid addr_type %x, " 4755 "or address 0x%llx\n", pkt->addr_type, pkt->addr); 4756 ret = 1; 4757 } 4758 4759 /* 4760 * MAC address supplied by device should match that stored 4761 * in the vsw-port OBP node. Need to decide what to do if they 4762 * don't match, for the moment just warn but don't fail. 4763 */ 4764 if (bcmp(&pkt->addr, &port->p_macaddr, ETHERADDRL) != 0) { 4765 DERR(NULL, "vsw_check_attr: device supplied address " 4766 "0x%llx doesn't match node address 0x%llx\n", 4767 pkt->addr, port->p_macaddr); 4768 } 4769 4770 /* 4771 * Ack freq only makes sense in pkt mode, in shared 4772 * mode the ring descriptors say whether or not to 4773 * send back an ACK. 4774 */ 4775 if ((pkt->xfer_mode == VIO_DRING_MODE) && 4776 (pkt->ack_freq > 0)) { 4777 D2(NULL, "vsw_check_attr: non zero ack freq " 4778 " in SHM mode\n"); 4779 ret = 1; 4780 } 4781 4782 /* 4783 * Note: for the moment we only support ETHER 4784 * frames. This may change in the future. 4785 */ 4786 if ((pkt->mtu > VSW_MTU) || (pkt->mtu <= 0)) { 4787 D2(NULL, "vsw_check_attr: invalid MTU (0x%llx)\n", 4788 pkt->mtu); 4789 ret = 1; 4790 } 4791 4792 D1(NULL, "vsw_check_attr exit\n"); 4793 4794 return (ret); 4795 } 4796 4797 /* 4798 * Returns 1 if there is a problem, 0 otherwise. 4799 */ 4800 static int 4801 vsw_check_dring_info(vio_dring_reg_msg_t *pkt) 4802 { 4803 _NOTE(ARGUNUSED(pkt)) 4804 4805 int ret = 0; 4806 4807 D1(NULL, "vsw_check_dring_info enter\n"); 4808 4809 if ((pkt->num_descriptors == 0) || 4810 (pkt->descriptor_size == 0) || 4811 (pkt->ncookies != 1)) { 4812 DERR(NULL, "vsw_check_dring_info: invalid dring msg"); 4813 ret = 1; 4814 } 4815 4816 D1(NULL, "vsw_check_dring_info exit\n"); 4817 4818 return (ret); 4819 } 4820 4821 /* 4822 * Returns 1 if two memory cookies match. Otherwise returns 0. 4823 */ 4824 static int 4825 vsw_mem_cookie_match(ldc_mem_cookie_t *m1, ldc_mem_cookie_t *m2) 4826 { 4827 if ((m1->addr != m2->addr) || 4828 (m2->size != m2->size)) { 4829 return (0); 4830 } else { 4831 return (1); 4832 } 4833 } 4834 4835 /* 4836 * Returns 1 if ring described in reg message matches that 4837 * described by dring_info structure. Otherwise returns 0. 4838 */ 4839 static int 4840 vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg) 4841 { 4842 if ((msg->descriptor_size != dp->descriptor_size) || 4843 (msg->num_descriptors != dp->num_descriptors) || 4844 (msg->ncookies != dp->ncookies) || 4845 !(vsw_mem_cookie_match(&msg->cookie[0], &dp->cookie[0]))) { 4846 return (0); 4847 } else { 4848 return (1); 4849 } 4850 4851 } 4852 4853 static caddr_t 4854 vsw_print_ethaddr(uint8_t *a, char *ebuf) 4855 { 4856 (void) sprintf(ebuf, "%x:%x:%x:%x:%x:%x", 4857 a[0], a[1], a[2], a[3], a[4], a[5]); 4858 return (ebuf); 4859 } 4860 4861 /* 4862 * Reset and free all the resources associated with 4863 * the channel. 4864 */ 4865 static void 4866 vsw_free_lane_resources(vsw_ldc_t *ldcp, uint64_t dir) 4867 { 4868 dring_info_t *dp, *dpp; 4869 lane_t *lp = NULL; 4870 int rv = 0; 4871 4872 ASSERT(ldcp != NULL); 4873 4874 D1(ldcp->ldc_vswp, "%s (%lld): enter", __func__, ldcp->ldc_id); 4875 4876 if (dir == INBOUND) { 4877 D2(ldcp->ldc_vswp, "%s: freeing INBOUND lane" 4878 " of channel %lld", __func__, ldcp->ldc_id); 4879 lp = &ldcp->lane_in; 4880 } else { 4881 D2(ldcp->ldc_vswp, "%s: freeing OUTBOUND lane" 4882 " of channel %lld", __func__, ldcp->ldc_id); 4883 lp = &ldcp->lane_out; 4884 } 4885 4886 lp->lstate = VSW_LANE_INACTIV; 4887 4888 /* 4889 * As the seq_num is incremented before sending, 4890 * initialize it with VNET_ISS - 1. 4891 */ 4892 atomic_swap_64(&lp->seq_num, (VNET_ISS - 1)); 4893 4894 if (lp->dringp) { 4895 if (dir == INBOUND) { 4896 WRITE_ENTER(&lp->dlistrw); 4897 dp = lp->dringp; 4898 while (dp != NULL) { 4899 dpp = dp->next; 4900 if (dp->handle != NULL) 4901 (void) ldc_mem_dring_unmap(dp->handle); 4902 kmem_free(dp, sizeof (dring_info_t)); 4903 dp = dpp; 4904 } 4905 RW_EXIT(&lp->dlistrw); 4906 } else { 4907 /* 4908 * unbind, destroy exported dring, free dring struct 4909 */ 4910 WRITE_ENTER(&lp->dlistrw); 4911 dp = lp->dringp; 4912 rv = vsw_free_ring(dp); 4913 RW_EXIT(&lp->dlistrw); 4914 } 4915 if (rv == 0) { 4916 lp->dringp = NULL; 4917 } 4918 } 4919 4920 D1(ldcp->ldc_vswp, "%s (%lld): exit", __func__, ldcp->ldc_id); 4921 } 4922 4923 /* 4924 * Free ring and all associated resources. 4925 * 4926 * Should be called with dlistrw rwlock held as writer. 4927 */ 4928 static int 4929 vsw_free_ring(dring_info_t *dp) 4930 { 4931 vsw_private_desc_t *paddr = NULL; 4932 dring_info_t *dpp; 4933 int i, rv = 1; 4934 4935 while (dp != NULL) { 4936 mutex_enter(&dp->dlock); 4937 dpp = dp->next; 4938 if (dp->priv_addr != NULL) { 4939 /* 4940 * First unbind and free the memory handles 4941 * stored in each descriptor within the ring. 4942 */ 4943 for (i = 0; i < VSW_RING_NUM_EL; i++) { 4944 paddr = (vsw_private_desc_t *) 4945 dp->priv_addr + i; 4946 if (paddr->memhandle != NULL) { 4947 if (paddr->bound == 1) { 4948 rv = ldc_mem_unbind_handle( 4949 paddr->memhandle); 4950 4951 if (rv != 0) { 4952 DERR(NULL, "error " 4953 "unbinding handle for " 4954 "ring 0x%llx at pos %d", 4955 dp, i); 4956 mutex_exit(&dp->dlock); 4957 return (rv); 4958 } 4959 paddr->bound = 0; 4960 } 4961 4962 rv = ldc_mem_free_handle( 4963 paddr->memhandle); 4964 if (rv != 0) { 4965 DERR(NULL, "error freeing " 4966 "handle for ring 0x%llx " 4967 "at pos %d", dp, i); 4968 mutex_exit(&dp->dlock); 4969 return (rv); 4970 } 4971 paddr->memhandle = NULL; 4972 } 4973 mutex_destroy(&paddr->dstate_lock); 4974 } 4975 kmem_free(dp->priv_addr, 4976 (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL)); 4977 } 4978 4979 /* 4980 * Now unbind and destroy the ring itself. 4981 */ 4982 if (dp->handle != NULL) { 4983 (void) ldc_mem_dring_unbind(dp->handle); 4984 (void) ldc_mem_dring_destroy(dp->handle); 4985 } 4986 4987 if (dp->data_addr != NULL) { 4988 kmem_free(dp->data_addr, dp->data_sz); 4989 } 4990 4991 mutex_exit(&dp->dlock); 4992 mutex_destroy(&dp->dlock); 4993 mutex_destroy(&dp->restart_lock); 4994 kmem_free(dp, sizeof (dring_info_t)); 4995 4996 dp = dpp; 4997 } 4998 return (0); 4999 } 5000 5001 /* 5002 * vsw_ldc_rx_worker -- A per LDC worker thread to receive data. 5003 * This thread is woken up by the LDC interrupt handler to process 5004 * LDC packets and receive data. 5005 */ 5006 static void 5007 vsw_ldc_rx_worker(void *arg) 5008 { 5009 callb_cpr_t cprinfo; 5010 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 5011 vsw_t *vswp = ldcp->ldc_vswp; 5012 5013 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5014 CALLB_CPR_INIT(&cprinfo, &ldcp->rx_thr_lock, callb_generic_cpr, 5015 "vsw_rx_thread"); 5016 mutex_enter(&ldcp->rx_thr_lock); 5017 ldcp->rx_thr_flags |= VSW_WTHR_RUNNING; 5018 while (!(ldcp->rx_thr_flags & VSW_WTHR_STOP)) { 5019 5020 CALLB_CPR_SAFE_BEGIN(&cprinfo); 5021 /* 5022 * Wait until the data is received or a stop 5023 * request is received. 5024 */ 5025 while (!(ldcp->rx_thr_flags & 5026 (VSW_WTHR_DATARCVD | VSW_WTHR_STOP))) { 5027 cv_wait(&ldcp->rx_thr_cv, &ldcp->rx_thr_lock); 5028 } 5029 CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rx_thr_lock) 5030 5031 /* 5032 * First process the stop request. 5033 */ 5034 if (ldcp->rx_thr_flags & VSW_WTHR_STOP) { 5035 D2(vswp, "%s(%lld):Rx thread stopped\n", 5036 __func__, ldcp->ldc_id); 5037 break; 5038 } 5039 ldcp->rx_thr_flags &= ~VSW_WTHR_DATARCVD; 5040 mutex_exit(&ldcp->rx_thr_lock); 5041 D1(vswp, "%s(%lld):calling vsw_process_pkt\n", 5042 __func__, ldcp->ldc_id); 5043 mutex_enter(&ldcp->ldc_cblock); 5044 vsw_process_pkt(ldcp); 5045 mutex_exit(&ldcp->ldc_cblock); 5046 mutex_enter(&ldcp->rx_thr_lock); 5047 } 5048 5049 /* 5050 * Update the run status and wakeup the thread that 5051 * has sent the stop request. 5052 */ 5053 ldcp->rx_thr_flags &= ~VSW_WTHR_RUNNING; 5054 cv_signal(&ldcp->rx_thr_cv); 5055 CALLB_CPR_EXIT(&cprinfo); 5056 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5057 thread_exit(); 5058 } 5059 5060 /* vsw_stop_rx_thread -- Co-ordinate with receive thread to stop it */ 5061 static void 5062 vsw_stop_rx_thread(vsw_ldc_t *ldcp) 5063 { 5064 vsw_t *vswp = ldcp->ldc_vswp; 5065 5066 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5067 /* 5068 * Send a stop request by setting the stop flag and 5069 * wait until the receive thread stops. 5070 */ 5071 mutex_enter(&ldcp->rx_thr_lock); 5072 if (ldcp->rx_thr_flags & VSW_WTHR_RUNNING) { 5073 ldcp->rx_thr_flags |= VSW_WTHR_STOP; 5074 cv_signal(&ldcp->rx_thr_cv); 5075 while (ldcp->rx_thr_flags & VSW_WTHR_RUNNING) { 5076 cv_wait(&ldcp->rx_thr_cv, &ldcp->rx_thr_lock); 5077 } 5078 } 5079 mutex_exit(&ldcp->rx_thr_lock); 5080 ldcp->rx_thread = NULL; 5081 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5082 } 5083 5084 /* 5085 * vsw_ldc_tx_worker -- A per LDC worker thread to transmit data. 5086 * This thread is woken up by the vsw_portsend to transmit 5087 * packets. 5088 */ 5089 static void 5090 vsw_ldc_tx_worker(void *arg) 5091 { 5092 callb_cpr_t cprinfo; 5093 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 5094 vsw_t *vswp = ldcp->ldc_vswp; 5095 mblk_t *mp; 5096 mblk_t *tmp; 5097 5098 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5099 CALLB_CPR_INIT(&cprinfo, &ldcp->tx_thr_lock, callb_generic_cpr, 5100 "vnet_tx_thread"); 5101 mutex_enter(&ldcp->tx_thr_lock); 5102 ldcp->tx_thr_flags |= VSW_WTHR_RUNNING; 5103 while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP)) { 5104 5105 CALLB_CPR_SAFE_BEGIN(&cprinfo); 5106 /* 5107 * Wait until the data is received or a stop 5108 * request is received. 5109 */ 5110 while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP) && 5111 (ldcp->tx_mhead == NULL)) { 5112 cv_wait(&ldcp->tx_thr_cv, &ldcp->tx_thr_lock); 5113 } 5114 CALLB_CPR_SAFE_END(&cprinfo, &ldcp->tx_thr_lock) 5115 5116 /* 5117 * First process the stop request. 5118 */ 5119 if (ldcp->tx_thr_flags & VSW_WTHR_STOP) { 5120 D2(vswp, "%s(%lld):tx thread stopped\n", 5121 __func__, ldcp->ldc_id); 5122 break; 5123 } 5124 mp = ldcp->tx_mhead; 5125 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 5126 mutex_exit(&ldcp->tx_thr_lock); 5127 D2(vswp, "%s(%lld):calling vsw_ldcsend\n", 5128 __func__, ldcp->ldc_id); 5129 while (mp != NULL) { 5130 tmp = mp->b_next; 5131 mp->b_next = mp->b_prev = NULL; 5132 (void) vsw_ldcsend(ldcp, mp, vsw_ldc_tx_retries); 5133 mp = tmp; 5134 } 5135 mutex_enter(&ldcp->tx_thr_lock); 5136 } 5137 5138 /* 5139 * Update the run status and wakeup the thread that 5140 * has sent the stop request. 5141 */ 5142 ldcp->tx_thr_flags &= ~VSW_WTHR_RUNNING; 5143 cv_signal(&ldcp->tx_thr_cv); 5144 CALLB_CPR_EXIT(&cprinfo); 5145 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5146 thread_exit(); 5147 } 5148 5149 /* vsw_stop_tx_thread -- Co-ordinate with receive thread to stop it */ 5150 static void 5151 vsw_stop_tx_thread(vsw_ldc_t *ldcp) 5152 { 5153 vsw_t *vswp = ldcp->ldc_vswp; 5154 5155 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5156 /* 5157 * Send a stop request by setting the stop flag and 5158 * wait until the receive thread stops. 5159 */ 5160 mutex_enter(&ldcp->tx_thr_lock); 5161 if (ldcp->tx_thr_flags & VSW_WTHR_RUNNING) { 5162 ldcp->tx_thr_flags |= VSW_WTHR_STOP; 5163 cv_signal(&ldcp->tx_thr_cv); 5164 while (ldcp->tx_thr_flags & VSW_WTHR_RUNNING) { 5165 cv_wait(&ldcp->tx_thr_cv, &ldcp->tx_thr_lock); 5166 } 5167 } 5168 mutex_exit(&ldcp->tx_thr_lock); 5169 ldcp->tx_thread = NULL; 5170 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5171 } 5172 5173 /* vsw_reclaim_dring -- reclaim descriptors */ 5174 static int 5175 vsw_reclaim_dring(dring_info_t *dp, int start) 5176 { 5177 int i, j, len; 5178 vsw_private_desc_t *priv_addr; 5179 vnet_public_desc_t *pub_addr; 5180 5181 pub_addr = (vnet_public_desc_t *)dp->pub_addr; 5182 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 5183 len = dp->num_descriptors; 5184 5185 D2(NULL, "%s: start index %ld\n", __func__, start); 5186 5187 j = 0; 5188 for (i = start; j < len; i = (i + 1) % len, j++) { 5189 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 5190 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 5191 5192 mutex_enter(&priv_addr->dstate_lock); 5193 if (pub_addr->hdr.dstate != VIO_DESC_DONE) { 5194 mutex_exit(&priv_addr->dstate_lock); 5195 DTRACE_PROBE1(vsw_reclaimed, int, j); 5196 break; 5197 } 5198 pub_addr->hdr.dstate = VIO_DESC_FREE; 5199 priv_addr->dstate = VIO_DESC_FREE; 5200 /* clear all the fields */ 5201 priv_addr->datalen = 0; 5202 pub_addr->hdr.ack = 0; 5203 mutex_exit(&priv_addr->dstate_lock); 5204 5205 D3(NULL, "claiming descp:%d pub state:0x%llx priv state 0x%llx", 5206 i, pub_addr->hdr.dstate, priv_addr->dstate); 5207 } 5208 return (j); 5209 } 5210 5211 /* 5212 * Debugging routines 5213 */ 5214 static void 5215 display_state(void) 5216 { 5217 vsw_t *vswp; 5218 vsw_port_list_t *plist; 5219 vsw_port_t *port; 5220 vsw_ldc_list_t *ldcl; 5221 vsw_ldc_t *ldcp; 5222 extern vsw_t *vsw_head; 5223 5224 cmn_err(CE_NOTE, "***** system state *****"); 5225 5226 for (vswp = vsw_head; vswp; vswp = vswp->next) { 5227 plist = &vswp->plist; 5228 READ_ENTER(&plist->lockrw); 5229 cmn_err(CE_CONT, "vsw instance %d has %d ports attached\n", 5230 vswp->instance, plist->num_ports); 5231 5232 for (port = plist->head; port != NULL; port = port->p_next) { 5233 ldcl = &port->p_ldclist; 5234 cmn_err(CE_CONT, "port %d : %d ldcs attached\n", 5235 port->p_instance, ldcl->num_ldcs); 5236 READ_ENTER(&ldcl->lockrw); 5237 ldcp = ldcl->head; 5238 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 5239 cmn_err(CE_CONT, "chan %lu : dev %d : " 5240 "status %d : phase %u\n", 5241 ldcp->ldc_id, ldcp->dev_class, 5242 ldcp->ldc_status, ldcp->hphase); 5243 cmn_err(CE_CONT, "chan %lu : lsession %lu : " 5244 "psession %lu\n", ldcp->ldc_id, 5245 ldcp->local_session, ldcp->peer_session); 5246 5247 cmn_err(CE_CONT, "Inbound lane:\n"); 5248 display_lane(&ldcp->lane_in); 5249 cmn_err(CE_CONT, "Outbound lane:\n"); 5250 display_lane(&ldcp->lane_out); 5251 } 5252 RW_EXIT(&ldcl->lockrw); 5253 } 5254 RW_EXIT(&plist->lockrw); 5255 } 5256 cmn_err(CE_NOTE, "***** system state *****"); 5257 } 5258 5259 static void 5260 display_lane(lane_t *lp) 5261 { 5262 dring_info_t *drp; 5263 5264 cmn_err(CE_CONT, "ver 0x%x:0x%x : state %lx : mtu 0x%lx\n", 5265 lp->ver_major, lp->ver_minor, lp->lstate, lp->mtu); 5266 cmn_err(CE_CONT, "addr_type %d : addr 0x%lx : xmode %d\n", 5267 lp->addr_type, lp->addr, lp->xfer_mode); 5268 cmn_err(CE_CONT, "dringp 0x%lx\n", (uint64_t)lp->dringp); 5269 5270 cmn_err(CE_CONT, "Dring info:\n"); 5271 for (drp = lp->dringp; drp != NULL; drp = drp->next) { 5272 cmn_err(CE_CONT, "\tnum_desc %u : dsize %u\n", 5273 drp->num_descriptors, drp->descriptor_size); 5274 cmn_err(CE_CONT, "\thandle 0x%lx\n", drp->handle); 5275 cmn_err(CE_CONT, "\tpub_addr 0x%lx : priv_addr 0x%lx\n", 5276 (uint64_t)drp->pub_addr, (uint64_t)drp->priv_addr); 5277 cmn_err(CE_CONT, "\tident 0x%lx : end_idx %lu\n", 5278 drp->ident, drp->end_idx); 5279 display_ring(drp); 5280 } 5281 } 5282 5283 static void 5284 display_ring(dring_info_t *dringp) 5285 { 5286 uint64_t i; 5287 uint64_t priv_count = 0; 5288 uint64_t pub_count = 0; 5289 vnet_public_desc_t *pub_addr = NULL; 5290 vsw_private_desc_t *priv_addr = NULL; 5291 5292 for (i = 0; i < VSW_RING_NUM_EL; i++) { 5293 if (dringp->pub_addr != NULL) { 5294 pub_addr = (vnet_public_desc_t *)dringp->pub_addr + i; 5295 5296 if (pub_addr->hdr.dstate == VIO_DESC_FREE) 5297 pub_count++; 5298 } 5299 5300 if (dringp->priv_addr != NULL) { 5301 priv_addr = (vsw_private_desc_t *)dringp->priv_addr + i; 5302 5303 if (priv_addr->dstate == VIO_DESC_FREE) 5304 priv_count++; 5305 } 5306 } 5307 cmn_err(CE_CONT, "\t%lu elements: %lu priv free: %lu pub free\n", 5308 i, priv_count, pub_count); 5309 } 5310 5311 static void 5312 dump_flags(uint64_t state) 5313 { 5314 int i; 5315 5316 typedef struct flag_name { 5317 int flag_val; 5318 char *flag_name; 5319 } flag_name_t; 5320 5321 flag_name_t flags[] = { 5322 VSW_VER_INFO_SENT, "VSW_VER_INFO_SENT", 5323 VSW_VER_INFO_RECV, "VSW_VER_INFO_RECV", 5324 VSW_VER_ACK_RECV, "VSW_VER_ACK_RECV", 5325 VSW_VER_ACK_SENT, "VSW_VER_ACK_SENT", 5326 VSW_VER_NACK_RECV, "VSW_VER_NACK_RECV", 5327 VSW_VER_NACK_SENT, "VSW_VER_NACK_SENT", 5328 VSW_ATTR_INFO_SENT, "VSW_ATTR_INFO_SENT", 5329 VSW_ATTR_INFO_RECV, "VSW_ATTR_INFO_RECV", 5330 VSW_ATTR_ACK_SENT, "VSW_ATTR_ACK_SENT", 5331 VSW_ATTR_ACK_RECV, "VSW_ATTR_ACK_RECV", 5332 VSW_ATTR_NACK_SENT, "VSW_ATTR_NACK_SENT", 5333 VSW_ATTR_NACK_RECV, "VSW_ATTR_NACK_RECV", 5334 VSW_DRING_INFO_SENT, "VSW_DRING_INFO_SENT", 5335 VSW_DRING_INFO_RECV, "VSW_DRING_INFO_RECV", 5336 VSW_DRING_ACK_SENT, "VSW_DRING_ACK_SENT", 5337 VSW_DRING_ACK_RECV, "VSW_DRING_ACK_RECV", 5338 VSW_DRING_NACK_SENT, "VSW_DRING_NACK_SENT", 5339 VSW_DRING_NACK_RECV, "VSW_DRING_NACK_RECV", 5340 VSW_RDX_INFO_SENT, "VSW_RDX_INFO_SENT", 5341 VSW_RDX_INFO_RECV, "VSW_RDX_INFO_RECV", 5342 VSW_RDX_ACK_SENT, "VSW_RDX_ACK_SENT", 5343 VSW_RDX_ACK_RECV, "VSW_RDX_ACK_RECV", 5344 VSW_RDX_NACK_SENT, "VSW_RDX_NACK_SENT", 5345 VSW_RDX_NACK_RECV, "VSW_RDX_NACK_RECV", 5346 VSW_MCST_INFO_SENT, "VSW_MCST_INFO_SENT", 5347 VSW_MCST_INFO_RECV, "VSW_MCST_INFO_RECV", 5348 VSW_MCST_ACK_SENT, "VSW_MCST_ACK_SENT", 5349 VSW_MCST_ACK_RECV, "VSW_MCST_ACK_RECV", 5350 VSW_MCST_NACK_SENT, "VSW_MCST_NACK_SENT", 5351 VSW_MCST_NACK_RECV, "VSW_MCST_NACK_RECV", 5352 VSW_LANE_ACTIVE, "VSW_LANE_ACTIVE"}; 5353 5354 DERR(NULL, "DUMP_FLAGS: %llx\n", state); 5355 for (i = 0; i < sizeof (flags)/sizeof (flag_name_t); i++) { 5356 if (state & flags[i].flag_val) 5357 DERR(NULL, "DUMP_FLAGS %s", flags[i].flag_name); 5358 } 5359 } 5360