1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/errno.h> 31 #include <sys/debug.h> 32 #include <sys/time.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/user.h> 36 #include <sys/stropts.h> 37 #include <sys/stream.h> 38 #include <sys/strlog.h> 39 #include <sys/strsubr.h> 40 #include <sys/cmn_err.h> 41 #include <sys/cpu.h> 42 #include <sys/kmem.h> 43 #include <sys/conf.h> 44 #include <sys/ddi.h> 45 #include <sys/sunddi.h> 46 #include <sys/ksynch.h> 47 #include <sys/stat.h> 48 #include <sys/kstat.h> 49 #include <sys/vtrace.h> 50 #include <sys/strsun.h> 51 #include <sys/dlpi.h> 52 #include <sys/ethernet.h> 53 #include <net/if.h> 54 #include <sys/varargs.h> 55 #include <sys/machsystm.h> 56 #include <sys/modctl.h> 57 #include <sys/modhash.h> 58 #include <sys/mac.h> 59 #include <sys/mac_ether.h> 60 #include <sys/taskq.h> 61 #include <sys/note.h> 62 #include <sys/mach_descrip.h> 63 #include <sys/mac.h> 64 #include <sys/mdeg.h> 65 #include <sys/ldc.h> 66 #include <sys/vsw_fdb.h> 67 #include <sys/vsw.h> 68 #include <sys/vio_mailbox.h> 69 #include <sys/vnet_mailbox.h> 70 #include <sys/vnet_common.h> 71 #include <sys/vio_util.h> 72 #include <sys/sdt.h> 73 #include <sys/atomic.h> 74 #include <sys/callb.h> 75 76 /* Port add/deletion/etc routines */ 77 static int vsw_port_delete(vsw_port_t *port); 78 static int vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id); 79 static int vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id); 80 static int vsw_init_ldcs(vsw_port_t *port); 81 static int vsw_uninit_ldcs(vsw_port_t *port); 82 static int vsw_ldc_init(vsw_ldc_t *ldcp); 83 static int vsw_ldc_uninit(vsw_ldc_t *ldcp); 84 static int vsw_drain_ldcs(vsw_port_t *port); 85 static int vsw_drain_port_taskq(vsw_port_t *port); 86 static void vsw_marker_task(void *); 87 static int vsw_plist_del_node(vsw_t *, vsw_port_t *port); 88 int vsw_detach_ports(vsw_t *vswp); 89 int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 90 mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr); 91 int vsw_port_detach(vsw_t *vswp, int p_instance); 92 int vsw_portsend(vsw_port_t *port, mblk_t *mp, mblk_t *mpt); 93 int vsw_port_attach(vsw_t *vswp, int p_instance, 94 uint64_t *ldcids, int nids, struct ether_addr *macaddr); 95 vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 96 97 98 /* Interrupt routines */ 99 static uint_t vsw_ldc_cb(uint64_t cb, caddr_t arg); 100 101 /* Handshake routines */ 102 static void vsw_ldc_reinit(vsw_ldc_t *); 103 static void vsw_process_conn_evt(vsw_ldc_t *, uint16_t); 104 static void vsw_conn_task(void *); 105 static int vsw_check_flag(vsw_ldc_t *, int, uint64_t); 106 static void vsw_next_milestone(vsw_ldc_t *); 107 static int vsw_supported_version(vio_ver_msg_t *); 108 109 /* Data processing routines */ 110 static void vsw_process_pkt(void *); 111 static void vsw_dispatch_ctrl_task(vsw_ldc_t *, void *, vio_msg_tag_t); 112 static void vsw_process_ctrl_pkt(void *); 113 static void vsw_process_ctrl_ver_pkt(vsw_ldc_t *, void *); 114 static void vsw_process_ctrl_attr_pkt(vsw_ldc_t *, void *); 115 static void vsw_process_ctrl_mcst_pkt(vsw_ldc_t *, void *); 116 static void vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *, void *); 117 static void vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *, void *); 118 static void vsw_process_ctrl_rdx_pkt(vsw_ldc_t *, void *); 119 static void vsw_process_data_pkt(vsw_ldc_t *, void *, vio_msg_tag_t); 120 static void vsw_process_data_dring_pkt(vsw_ldc_t *, void *); 121 static void vsw_process_data_raw_pkt(vsw_ldc_t *, void *); 122 static void vsw_process_data_ibnd_pkt(vsw_ldc_t *, void *); 123 static void vsw_process_err_pkt(vsw_ldc_t *, void *, vio_msg_tag_t); 124 125 /* Switching/data transmit routines */ 126 static int vsw_dringsend(vsw_ldc_t *, mblk_t *); 127 static int vsw_descrsend(vsw_ldc_t *, mblk_t *); 128 static int vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, int retries); 129 130 /* Packet creation routines */ 131 static void vsw_send_ver(void *); 132 static void vsw_send_attr(vsw_ldc_t *); 133 static vio_dring_reg_msg_t *vsw_create_dring_info_pkt(vsw_ldc_t *); 134 static void vsw_send_dring_info(vsw_ldc_t *); 135 static void vsw_send_rdx(vsw_ldc_t *); 136 static int vsw_send_msg(vsw_ldc_t *, void *, int, boolean_t); 137 138 /* Dring routines */ 139 static dring_info_t *vsw_create_dring(vsw_ldc_t *); 140 static void vsw_create_privring(vsw_ldc_t *); 141 static int vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp); 142 static int vsw_dring_find_free_desc(dring_info_t *, vsw_private_desc_t **, 143 int *); 144 static dring_info_t *vsw_ident2dring(lane_t *, uint64_t); 145 static int vsw_reclaim_dring(dring_info_t *dp, int start); 146 147 static void vsw_set_lane_attr(vsw_t *, lane_t *); 148 static int vsw_check_attr(vnet_attr_msg_t *, vsw_port_t *); 149 static int vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg); 150 static int vsw_mem_cookie_match(ldc_mem_cookie_t *, ldc_mem_cookie_t *); 151 static int vsw_check_dring_info(vio_dring_reg_msg_t *); 152 153 /* Rcv/Tx thread routines */ 154 static void vsw_stop_tx_thread(vsw_ldc_t *ldcp); 155 static void vsw_ldc_tx_worker(void *arg); 156 static uint_t vsw_rx_softintr(caddr_t arg1, caddr_t arg2); 157 static void vsw_stop_rx_thread(vsw_ldc_t *ldcp); 158 static void vsw_ldc_rx_worker(void *arg); 159 160 /* Misc support routines */ 161 static caddr_t vsw_print_ethaddr(uint8_t *addr, char *ebuf); 162 static void vsw_free_lane_resources(vsw_ldc_t *, uint64_t); 163 static int vsw_free_ring(dring_info_t *); 164 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr); 165 static int vsw_get_same_dest_list(struct ether_header *ehp, 166 mblk_t **rhead, mblk_t **rtail, mblk_t **mpp); 167 static mblk_t *vsw_dupmsgchain(mblk_t *mp); 168 static void vsw_mac_rx(vsw_t *vswp, int caller, mac_resource_handle_t mrh, 169 mblk_t *mp, mblk_t *mpt, vsw_macrx_flags_t flags); 170 171 /* Debugging routines */ 172 static void dump_flags(uint64_t); 173 static void display_state(void); 174 static void display_lane(lane_t *); 175 static void display_ring(dring_info_t *); 176 177 /* 178 * Functions imported from other files. 179 */ 180 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int); 181 extern int vsw_unset_hw(vsw_t *, vsw_port_t *, int); 182 extern void vsw_reconfig_hw(vsw_t *); 183 extern int vsw_add_fdb(vsw_t *vswp, vsw_port_t *port); 184 extern int vsw_del_fdb(vsw_t *vswp, vsw_port_t *port); 185 extern int vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port); 186 extern void vsw_del_mcst_port(vsw_port_t *port); 187 extern int vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg); 188 extern int vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg); 189 190 #define VSW_NUM_VMPOOLS 3 /* number of vio mblk pools */ 191 #define VSW_PORT_REF_DELAY 30 /* delay for port ref_cnt to become 0 */ 192 193 /* 194 * Tunables used in this file. 195 */ 196 extern int vsw_num_handshakes; 197 extern int vsw_wretries; 198 extern int vsw_desc_delay; 199 extern int vsw_read_attempts; 200 extern int vsw_ldc_tx_delay; 201 extern int vsw_ldc_tx_retries; 202 extern int vsw_ldc_tx_max_failures; 203 extern boolean_t vsw_ldc_rxthr_enabled; 204 extern boolean_t vsw_ldc_txthr_enabled; 205 extern uint32_t vsw_chain_len; 206 extern uint32_t vsw_mblk_size1; 207 extern uint32_t vsw_mblk_size2; 208 extern uint32_t vsw_mblk_size3; 209 extern uint32_t vsw_num_mblks1; 210 extern uint32_t vsw_num_mblks2; 211 extern uint32_t vsw_num_mblks3; 212 213 214 #define LDC_ENTER_LOCK(ldcp) \ 215 mutex_enter(&((ldcp)->ldc_cblock));\ 216 mutex_enter(&((ldcp)->ldc_rxlock));\ 217 mutex_enter(&((ldcp)->ldc_txlock)); 218 #define LDC_EXIT_LOCK(ldcp) \ 219 mutex_exit(&((ldcp)->ldc_txlock));\ 220 mutex_exit(&((ldcp)->ldc_rxlock));\ 221 mutex_exit(&((ldcp)->ldc_cblock)); 222 223 224 /* supported versions */ 225 static ver_sup_t vsw_versions[] = { {1, 0} }; 226 227 /* 228 * For the moment the state dump routines have their own 229 * private flag. 230 */ 231 #define DUMP_STATE 0 232 233 #if DUMP_STATE 234 235 #define DUMP_TAG(tag) \ 236 { \ 237 D1(NULL, "DUMP_TAG: type 0x%llx", (tag).vio_msgtype); \ 238 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag).vio_subtype); \ 239 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag).vio_subtype_env); \ 240 } 241 242 #define DUMP_TAG_PTR(tag) \ 243 { \ 244 D1(NULL, "DUMP_TAG: type 0x%llx", (tag)->vio_msgtype); \ 245 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag)->vio_subtype); \ 246 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag)->vio_subtype_env); \ 247 } 248 249 #define DUMP_FLAGS(flags) dump_flags(flags); 250 #define DISPLAY_STATE() display_state() 251 252 #else 253 254 #define DUMP_TAG(tag) 255 #define DUMP_TAG_PTR(tag) 256 #define DUMP_FLAGS(state) 257 #define DISPLAY_STATE() 258 259 #endif /* DUMP_STATE */ 260 261 /* 262 * Attach the specified port. 263 * 264 * Returns 0 on success, 1 on failure. 265 */ 266 int 267 vsw_port_attach(vsw_t *vswp, int p_instance, uint64_t *ldcids, int nids, 268 struct ether_addr *macaddr) 269 { 270 vsw_port_list_t *plist = &vswp->plist; 271 vsw_port_t *port, **prev_port; 272 int i; 273 274 D1(vswp, "%s: enter : port %d", __func__, p_instance); 275 276 /* port already exists? */ 277 READ_ENTER(&plist->lockrw); 278 for (port = plist->head; port != NULL; port = port->p_next) { 279 if (port->p_instance == p_instance) { 280 DWARN(vswp, "%s: port instance %d already attached", 281 __func__, p_instance); 282 RW_EXIT(&plist->lockrw); 283 return (1); 284 } 285 } 286 RW_EXIT(&plist->lockrw); 287 288 port = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP); 289 port->p_vswp = vswp; 290 port->p_instance = p_instance; 291 port->p_ldclist.num_ldcs = 0; 292 port->p_ldclist.head = NULL; 293 port->addr_set = VSW_ADDR_UNSET; 294 295 rw_init(&port->p_ldclist.lockrw, NULL, RW_DRIVER, NULL); 296 297 mutex_init(&port->tx_lock, NULL, MUTEX_DRIVER, NULL); 298 mutex_init(&port->mca_lock, NULL, MUTEX_DRIVER, NULL); 299 300 mutex_init(&port->state_lock, NULL, MUTEX_DRIVER, NULL); 301 cv_init(&port->state_cv, NULL, CV_DRIVER, NULL); 302 port->state = VSW_PORT_INIT; 303 304 if (nids > VSW_PORT_MAX_LDCS) { 305 D2(vswp, "%s: using first of %d ldc ids", 306 __func__, nids); 307 nids = VSW_PORT_MAX_LDCS; 308 } 309 310 D2(vswp, "%s: %d nids", __func__, nids); 311 for (i = 0; i < nids; i++) { 312 D2(vswp, "%s: ldcid (%llx)", __func__, (uint64_t)ldcids[i]); 313 if (vsw_ldc_attach(port, (uint64_t)ldcids[i]) != 0) { 314 DERR(vswp, "%s: ldc_attach failed", __func__); 315 316 rw_destroy(&port->p_ldclist.lockrw); 317 318 cv_destroy(&port->state_cv); 319 mutex_destroy(&port->state_lock); 320 321 mutex_destroy(&port->tx_lock); 322 mutex_destroy(&port->mca_lock); 323 kmem_free(port, sizeof (vsw_port_t)); 324 return (1); 325 } 326 } 327 328 ether_copy(macaddr, &port->p_macaddr); 329 330 if (vswp->switching_setup_done == B_TRUE) { 331 /* 332 * If the underlying physical device has been setup, 333 * program the mac address of this port in it. 334 * Otherwise, port macaddr will be set after the physical 335 * device is successfully setup by the timeout handler. 336 */ 337 mutex_enter(&vswp->hw_lock); 338 (void) vsw_set_hw(vswp, port, VSW_VNETPORT); 339 mutex_exit(&vswp->hw_lock); 340 } 341 342 WRITE_ENTER(&plist->lockrw); 343 344 /* create the fdb entry for this port/mac address */ 345 (void) vsw_add_fdb(vswp, port); 346 347 /* link it into the list of ports for this vsw instance */ 348 prev_port = (vsw_port_t **)(&plist->head); 349 port->p_next = *prev_port; 350 *prev_port = port; 351 plist->num_ports++; 352 353 RW_EXIT(&plist->lockrw); 354 355 /* 356 * Initialise the port and any ldc's under it. 357 */ 358 (void) vsw_init_ldcs(port); 359 360 D1(vswp, "%s: exit", __func__); 361 return (0); 362 } 363 364 /* 365 * Detach the specified port. 366 * 367 * Returns 0 on success, 1 on failure. 368 */ 369 int 370 vsw_port_detach(vsw_t *vswp, int p_instance) 371 { 372 vsw_port_t *port = NULL; 373 vsw_port_list_t *plist = &vswp->plist; 374 375 D1(vswp, "%s: enter: port id %d", __func__, p_instance); 376 377 WRITE_ENTER(&plist->lockrw); 378 379 if ((port = vsw_lookup_port(vswp, p_instance)) == NULL) { 380 RW_EXIT(&plist->lockrw); 381 return (1); 382 } 383 384 if (vsw_plist_del_node(vswp, port)) { 385 RW_EXIT(&plist->lockrw); 386 return (1); 387 } 388 389 /* Remove the fdb entry for this port/mac address */ 390 (void) vsw_del_fdb(vswp, port); 391 392 /* Remove any multicast addresses.. */ 393 vsw_del_mcst_port(port); 394 395 /* 396 * No longer need to hold writer lock on port list now 397 * that we have unlinked the target port from the list. 398 */ 399 RW_EXIT(&plist->lockrw); 400 401 /* Remove address if was programmed into HW. */ 402 mutex_enter(&vswp->hw_lock); 403 404 /* 405 * Port's address may not have been set in hardware. This could 406 * happen if the underlying physical device is not yet available and 407 * vsw_setup_switching_timeout() may be in progress. 408 * We remove its addr from hardware only if it has been set before. 409 */ 410 if (port->addr_set != VSW_ADDR_UNSET) 411 (void) vsw_unset_hw(vswp, port, VSW_VNETPORT); 412 413 if (vswp->recfg_reqd) 414 vsw_reconfig_hw(vswp); 415 416 mutex_exit(&vswp->hw_lock); 417 418 if (vsw_port_delete(port)) { 419 return (1); 420 } 421 422 D1(vswp, "%s: exit: p_instance(%d)", __func__, p_instance); 423 return (0); 424 } 425 426 /* 427 * Detach all active ports. 428 * 429 * Returns 0 on success, 1 on failure. 430 */ 431 int 432 vsw_detach_ports(vsw_t *vswp) 433 { 434 vsw_port_list_t *plist = &vswp->plist; 435 vsw_port_t *port = NULL; 436 437 D1(vswp, "%s: enter", __func__); 438 439 WRITE_ENTER(&plist->lockrw); 440 441 while ((port = plist->head) != NULL) { 442 if (vsw_plist_del_node(vswp, port)) { 443 DERR(vswp, "%s: Error deleting port %d" 444 " from port list", __func__, port->p_instance); 445 RW_EXIT(&plist->lockrw); 446 return (1); 447 } 448 449 /* Remove address if was programmed into HW. */ 450 mutex_enter(&vswp->hw_lock); 451 (void) vsw_unset_hw(vswp, port, VSW_VNETPORT); 452 mutex_exit(&vswp->hw_lock); 453 454 /* Remove the fdb entry for this port/mac address */ 455 (void) vsw_del_fdb(vswp, port); 456 457 /* Remove any multicast addresses.. */ 458 vsw_del_mcst_port(port); 459 460 /* 461 * No longer need to hold the lock on the port list 462 * now that we have unlinked the target port from the 463 * list. 464 */ 465 RW_EXIT(&plist->lockrw); 466 if (vsw_port_delete(port)) { 467 DERR(vswp, "%s: Error deleting port %d", 468 __func__, port->p_instance); 469 return (1); 470 } 471 WRITE_ENTER(&plist->lockrw); 472 } 473 RW_EXIT(&plist->lockrw); 474 475 D1(vswp, "%s: exit", __func__); 476 477 return (0); 478 } 479 480 /* 481 * Delete the specified port. 482 * 483 * Returns 0 on success, 1 on failure. 484 */ 485 static int 486 vsw_port_delete(vsw_port_t *port) 487 { 488 vsw_ldc_list_t *ldcl; 489 vsw_t *vswp = port->p_vswp; 490 491 D1(vswp, "%s: enter : port id %d", __func__, port->p_instance); 492 493 (void) vsw_uninit_ldcs(port); 494 495 /* 496 * Wait for any pending ctrl msg tasks which reference this 497 * port to finish. 498 */ 499 if (vsw_drain_port_taskq(port)) 500 return (1); 501 502 /* 503 * Wait for port reference count to hit zero. 504 */ 505 while (port->ref_cnt != 0) { 506 delay(drv_usectohz(VSW_PORT_REF_DELAY)); 507 } 508 509 /* 510 * Wait for any active callbacks to finish 511 */ 512 if (vsw_drain_ldcs(port)) 513 return (1); 514 515 ldcl = &port->p_ldclist; 516 WRITE_ENTER(&ldcl->lockrw); 517 while (ldcl->num_ldcs > 0) { 518 if (vsw_ldc_detach(port, ldcl->head->ldc_id) != 0) { 519 cmn_err(CE_WARN, "!vsw%d: unable to detach ldc %ld", 520 vswp->instance, ldcl->head->ldc_id); 521 RW_EXIT(&ldcl->lockrw); 522 return (1); 523 } 524 } 525 RW_EXIT(&ldcl->lockrw); 526 527 rw_destroy(&port->p_ldclist.lockrw); 528 529 mutex_destroy(&port->mca_lock); 530 mutex_destroy(&port->tx_lock); 531 cv_destroy(&port->state_cv); 532 mutex_destroy(&port->state_lock); 533 534 kmem_free(port, sizeof (vsw_port_t)); 535 536 D1(vswp, "%s: exit", __func__); 537 538 return (0); 539 } 540 541 /* 542 * Attach a logical domain channel (ldc) under a specified port. 543 * 544 * Returns 0 on success, 1 on failure. 545 */ 546 static int 547 vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id) 548 { 549 vsw_t *vswp = port->p_vswp; 550 vsw_ldc_list_t *ldcl = &port->p_ldclist; 551 vsw_ldc_t *ldcp = NULL; 552 ldc_attr_t attr; 553 ldc_status_t istatus; 554 int status = DDI_FAILURE; 555 int rv; 556 char kname[MAXNAMELEN]; 557 enum { PROG_init = 0x0, PROG_mblks = 0x1, 558 PROG_callback = 0x2, PROG_rx_thread = 0x4, 559 PROG_tx_thread = 0x8} 560 progress; 561 562 progress = PROG_init; 563 564 D1(vswp, "%s: enter", __func__); 565 566 ldcp = kmem_zalloc(sizeof (vsw_ldc_t), KM_NOSLEEP); 567 if (ldcp == NULL) { 568 DERR(vswp, "%s: kmem_zalloc failed", __func__); 569 return (1); 570 } 571 ldcp->ldc_id = ldc_id; 572 573 /* Allocate pools of receive mblks */ 574 rv = vio_init_multipools(&ldcp->vmp, VSW_NUM_VMPOOLS, 575 vsw_mblk_size1, vsw_mblk_size2, vsw_mblk_size3, 576 vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3); 577 if (rv) { 578 DWARN(vswp, "%s: unable to create free mblk pools for" 579 " channel %ld (rv %d)", __func__, ldc_id, rv); 580 kmem_free(ldcp, sizeof (vsw_ldc_t)); 581 return (1); 582 } 583 584 progress |= PROG_mblks; 585 586 mutex_init(&ldcp->ldc_txlock, NULL, MUTEX_DRIVER, NULL); 587 mutex_init(&ldcp->ldc_rxlock, NULL, MUTEX_DRIVER, NULL); 588 mutex_init(&ldcp->ldc_cblock, NULL, MUTEX_DRIVER, NULL); 589 mutex_init(&ldcp->drain_cv_lock, NULL, MUTEX_DRIVER, NULL); 590 cv_init(&ldcp->drain_cv, NULL, CV_DRIVER, NULL); 591 rw_init(&ldcp->lane_in.dlistrw, NULL, RW_DRIVER, NULL); 592 rw_init(&ldcp->lane_out.dlistrw, NULL, RW_DRIVER, NULL); 593 594 /* required for handshake with peer */ 595 ldcp->local_session = (uint64_t)ddi_get_lbolt(); 596 ldcp->peer_session = 0; 597 ldcp->session_status = 0; 598 ldcp->hss_id = 1; /* Initial handshake session id */ 599 600 /* only set for outbound lane, inbound set by peer */ 601 vsw_set_lane_attr(vswp, &ldcp->lane_out); 602 603 attr.devclass = LDC_DEV_NT_SVC; 604 attr.instance = ddi_get_instance(vswp->dip); 605 attr.mode = LDC_MODE_UNRELIABLE; 606 attr.mtu = VSW_LDC_MTU; 607 status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle); 608 if (status != 0) { 609 DERR(vswp, "%s(%lld): ldc_init failed, rv (%d)", 610 __func__, ldc_id, status); 611 goto ldc_attach_fail; 612 } 613 614 if (vsw_ldc_rxthr_enabled) { 615 ldcp->rx_thr_flags = 0; 616 617 mutex_init(&ldcp->rx_thr_lock, NULL, MUTEX_DRIVER, NULL); 618 cv_init(&ldcp->rx_thr_cv, NULL, CV_DRIVER, NULL); 619 ldcp->rx_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 620 vsw_ldc_rx_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 621 622 progress |= PROG_rx_thread; 623 if (ldcp->rx_thread == NULL) { 624 DWARN(vswp, "%s(%lld): Failed to create worker thread", 625 __func__, ldc_id); 626 goto ldc_attach_fail; 627 } 628 } 629 630 if (vsw_ldc_txthr_enabled) { 631 ldcp->tx_thr_flags = 0; 632 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 633 634 mutex_init(&ldcp->tx_thr_lock, NULL, MUTEX_DRIVER, NULL); 635 cv_init(&ldcp->tx_thr_cv, NULL, CV_DRIVER, NULL); 636 ldcp->tx_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 637 vsw_ldc_tx_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 638 639 progress |= PROG_tx_thread; 640 if (ldcp->tx_thread == NULL) { 641 DWARN(vswp, "%s(%lld): Failed to create worker thread", 642 __func__, ldc_id); 643 goto ldc_attach_fail; 644 } 645 } 646 647 status = ldc_reg_callback(ldcp->ldc_handle, vsw_ldc_cb, (caddr_t)ldcp); 648 if (status != 0) { 649 DERR(vswp, "%s(%lld): ldc_reg_callback failed, rv (%d)", 650 __func__, ldc_id, status); 651 (void) ldc_fini(ldcp->ldc_handle); 652 goto ldc_attach_fail; 653 } 654 655 progress |= PROG_callback; 656 657 mutex_init(&ldcp->status_lock, NULL, MUTEX_DRIVER, NULL); 658 659 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 660 DERR(vswp, "%s: ldc_status failed", __func__); 661 mutex_destroy(&ldcp->status_lock); 662 goto ldc_attach_fail; 663 } 664 665 ldcp->ldc_status = istatus; 666 ldcp->ldc_port = port; 667 ldcp->ldc_vswp = vswp; 668 669 (void) sprintf(kname, "%sldc0x%lx", DRV_NAME, ldcp->ldc_id); 670 ldcp->ksp = vgen_setup_kstats(DRV_NAME, vswp->instance, 671 kname, &ldcp->ldc_stats); 672 if (ldcp->ksp == NULL) { 673 DERR(vswp, "%s: kstats setup failed", __func__); 674 goto ldc_attach_fail; 675 } 676 677 /* link it into the list of channels for this port */ 678 WRITE_ENTER(&ldcl->lockrw); 679 ldcp->ldc_next = ldcl->head; 680 ldcl->head = ldcp; 681 ldcl->num_ldcs++; 682 RW_EXIT(&ldcl->lockrw); 683 684 D1(vswp, "%s: exit", __func__); 685 return (0); 686 687 ldc_attach_fail: 688 689 if (progress & PROG_callback) { 690 (void) ldc_unreg_callback(ldcp->ldc_handle); 691 } 692 693 if (progress & PROG_rx_thread) { 694 if (ldcp->rx_thread != NULL) { 695 vsw_stop_rx_thread(ldcp); 696 } 697 mutex_destroy(&ldcp->rx_thr_lock); 698 cv_destroy(&ldcp->rx_thr_cv); 699 } 700 701 if (progress & PROG_tx_thread) { 702 if (ldcp->tx_thread != NULL) { 703 vsw_stop_tx_thread(ldcp); 704 } 705 mutex_destroy(&ldcp->tx_thr_lock); 706 cv_destroy(&ldcp->tx_thr_cv); 707 } 708 if (ldcp->ksp != NULL) { 709 vgen_destroy_kstats(ldcp->ksp); 710 } 711 mutex_destroy(&ldcp->ldc_txlock); 712 mutex_destroy(&ldcp->ldc_rxlock); 713 mutex_destroy(&ldcp->ldc_cblock); 714 mutex_destroy(&ldcp->drain_cv_lock); 715 716 cv_destroy(&ldcp->drain_cv); 717 718 rw_destroy(&ldcp->lane_in.dlistrw); 719 rw_destroy(&ldcp->lane_out.dlistrw); 720 721 if (progress & PROG_mblks) { 722 vio_destroy_multipools(&ldcp->vmp, &vswp->rxh); 723 } 724 kmem_free(ldcp, sizeof (vsw_ldc_t)); 725 726 return (1); 727 } 728 729 /* 730 * Detach a logical domain channel (ldc) belonging to a 731 * particular port. 732 * 733 * Returns 0 on success, 1 on failure. 734 */ 735 static int 736 vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id) 737 { 738 vsw_t *vswp = port->p_vswp; 739 vsw_ldc_t *ldcp, *prev_ldcp; 740 vsw_ldc_list_t *ldcl = &port->p_ldclist; 741 int rv; 742 743 prev_ldcp = ldcl->head; 744 for (; (ldcp = prev_ldcp) != NULL; prev_ldcp = ldcp->ldc_next) { 745 if (ldcp->ldc_id == ldc_id) { 746 break; 747 } 748 } 749 750 /* specified ldc id not found */ 751 if (ldcp == NULL) { 752 DERR(vswp, "%s: ldcp = NULL", __func__); 753 return (1); 754 } 755 756 D2(vswp, "%s: detaching channel %lld", __func__, ldcp->ldc_id); 757 758 /* Stop the receive thread */ 759 if (ldcp->rx_thread != NULL) { 760 vsw_stop_rx_thread(ldcp); 761 mutex_destroy(&ldcp->rx_thr_lock); 762 cv_destroy(&ldcp->rx_thr_cv); 763 } 764 765 /* Stop the tx thread */ 766 if (ldcp->tx_thread != NULL) { 767 vsw_stop_tx_thread(ldcp); 768 mutex_destroy(&ldcp->tx_thr_lock); 769 cv_destroy(&ldcp->tx_thr_cv); 770 if (ldcp->tx_mhead != NULL) { 771 freemsgchain(ldcp->tx_mhead); 772 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 773 } 774 } 775 776 /* Destory kstats */ 777 vgen_destroy_kstats(ldcp->ksp); 778 779 /* 780 * Before we can close the channel we must release any mapped 781 * resources (e.g. drings). 782 */ 783 vsw_free_lane_resources(ldcp, INBOUND); 784 vsw_free_lane_resources(ldcp, OUTBOUND); 785 786 /* 787 * If the close fails we are in serious trouble, as won't 788 * be able to delete the parent port. 789 */ 790 if ((rv = ldc_close(ldcp->ldc_handle)) != 0) { 791 DERR(vswp, "%s: error %d closing channel %lld", 792 __func__, rv, ldcp->ldc_id); 793 return (1); 794 } 795 796 (void) ldc_fini(ldcp->ldc_handle); 797 798 ldcp->ldc_status = LDC_INIT; 799 ldcp->ldc_handle = NULL; 800 ldcp->ldc_vswp = NULL; 801 802 803 /* 804 * Most likely some mblks are still in use and 805 * have not been returned to the pool. These mblks are 806 * added to the pool that is maintained in the device instance. 807 * Another attempt will be made to destroy the pool 808 * when the device detaches. 809 */ 810 vio_destroy_multipools(&ldcp->vmp, &vswp->rxh); 811 812 /* unlink it from the list */ 813 prev_ldcp = ldcp->ldc_next; 814 ldcl->num_ldcs--; 815 816 mutex_destroy(&ldcp->ldc_txlock); 817 mutex_destroy(&ldcp->ldc_rxlock); 818 mutex_destroy(&ldcp->ldc_cblock); 819 cv_destroy(&ldcp->drain_cv); 820 mutex_destroy(&ldcp->drain_cv_lock); 821 mutex_destroy(&ldcp->status_lock); 822 rw_destroy(&ldcp->lane_in.dlistrw); 823 rw_destroy(&ldcp->lane_out.dlistrw); 824 825 kmem_free(ldcp, sizeof (vsw_ldc_t)); 826 827 return (0); 828 } 829 830 /* 831 * Open and attempt to bring up the channel. Note that channel 832 * can only be brought up if peer has also opened channel. 833 * 834 * Returns 0 if can open and bring up channel, otherwise 835 * returns 1. 836 */ 837 static int 838 vsw_ldc_init(vsw_ldc_t *ldcp) 839 { 840 vsw_t *vswp = ldcp->ldc_vswp; 841 ldc_status_t istatus = 0; 842 int rv; 843 844 D1(vswp, "%s: enter", __func__); 845 846 LDC_ENTER_LOCK(ldcp); 847 848 /* don't start at 0 in case clients don't like that */ 849 ldcp->next_ident = 1; 850 851 rv = ldc_open(ldcp->ldc_handle); 852 if (rv != 0) { 853 DERR(vswp, "%s: ldc_open failed: id(%lld) rv(%d)", 854 __func__, ldcp->ldc_id, rv); 855 LDC_EXIT_LOCK(ldcp); 856 return (1); 857 } 858 859 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 860 DERR(vswp, "%s: unable to get status", __func__); 861 LDC_EXIT_LOCK(ldcp); 862 return (1); 863 864 } else if (istatus != LDC_OPEN && istatus != LDC_READY) { 865 DERR(vswp, "%s: id (%lld) status(%d) is not OPEN/READY", 866 __func__, ldcp->ldc_id, istatus); 867 LDC_EXIT_LOCK(ldcp); 868 return (1); 869 } 870 871 mutex_enter(&ldcp->status_lock); 872 ldcp->ldc_status = istatus; 873 mutex_exit(&ldcp->status_lock); 874 875 rv = ldc_up(ldcp->ldc_handle); 876 if (rv != 0) { 877 /* 878 * Not a fatal error for ldc_up() to fail, as peer 879 * end point may simply not be ready yet. 880 */ 881 D2(vswp, "%s: ldc_up err id(%lld) rv(%d)", __func__, 882 ldcp->ldc_id, rv); 883 LDC_EXIT_LOCK(ldcp); 884 return (1); 885 } 886 887 /* 888 * ldc_up() call is non-blocking so need to explicitly 889 * check channel status to see if in fact the channel 890 * is UP. 891 */ 892 mutex_enter(&ldcp->status_lock); 893 if (ldc_status(ldcp->ldc_handle, &ldcp->ldc_status) != 0) { 894 DERR(vswp, "%s: unable to get status", __func__); 895 mutex_exit(&ldcp->status_lock); 896 LDC_EXIT_LOCK(ldcp); 897 return (1); 898 899 } 900 901 if (ldcp->ldc_status == LDC_UP) { 902 D2(vswp, "%s: channel %ld now UP (%ld)", __func__, 903 ldcp->ldc_id, istatus); 904 mutex_exit(&ldcp->status_lock); 905 LDC_EXIT_LOCK(ldcp); 906 907 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 908 return (0); 909 } 910 911 mutex_exit(&ldcp->status_lock); 912 LDC_EXIT_LOCK(ldcp); 913 914 D1(vswp, "%s: exit", __func__); 915 return (0); 916 } 917 918 /* disable callbacks on the channel */ 919 static int 920 vsw_ldc_uninit(vsw_ldc_t *ldcp) 921 { 922 vsw_t *vswp = ldcp->ldc_vswp; 923 int rv; 924 925 D1(vswp, "vsw_ldc_uninit: enter: id(%lx)\n", ldcp->ldc_id); 926 927 LDC_ENTER_LOCK(ldcp); 928 929 rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE); 930 if (rv != 0) { 931 DERR(vswp, "vsw_ldc_uninit(%lld): error disabling " 932 "interrupts (rv = %d)\n", ldcp->ldc_id, rv); 933 LDC_EXIT_LOCK(ldcp); 934 return (1); 935 } 936 937 mutex_enter(&ldcp->status_lock); 938 ldcp->ldc_status = LDC_INIT; 939 mutex_exit(&ldcp->status_lock); 940 941 LDC_EXIT_LOCK(ldcp); 942 943 D1(vswp, "vsw_ldc_uninit: exit: id(%lx)", ldcp->ldc_id); 944 945 return (0); 946 } 947 948 static int 949 vsw_init_ldcs(vsw_port_t *port) 950 { 951 vsw_ldc_list_t *ldcl = &port->p_ldclist; 952 vsw_ldc_t *ldcp; 953 954 READ_ENTER(&ldcl->lockrw); 955 ldcp = ldcl->head; 956 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 957 (void) vsw_ldc_init(ldcp); 958 } 959 RW_EXIT(&ldcl->lockrw); 960 961 return (0); 962 } 963 964 static int 965 vsw_uninit_ldcs(vsw_port_t *port) 966 { 967 vsw_ldc_list_t *ldcl = &port->p_ldclist; 968 vsw_ldc_t *ldcp; 969 970 D1(NULL, "vsw_uninit_ldcs: enter\n"); 971 972 READ_ENTER(&ldcl->lockrw); 973 ldcp = ldcl->head; 974 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 975 (void) vsw_ldc_uninit(ldcp); 976 } 977 RW_EXIT(&ldcl->lockrw); 978 979 D1(NULL, "vsw_uninit_ldcs: exit\n"); 980 981 return (0); 982 } 983 984 /* 985 * Wait until the callback(s) associated with the ldcs under the specified 986 * port have completed. 987 * 988 * Prior to this function being invoked each channel under this port 989 * should have been quiesced via ldc_set_cb_mode(DISABLE). 990 * 991 * A short explaination of what we are doing below.. 992 * 993 * The simplest approach would be to have a reference counter in 994 * the ldc structure which is increment/decremented by the callbacks as 995 * they use the channel. The drain function could then simply disable any 996 * further callbacks and do a cv_wait for the ref to hit zero. Unfortunately 997 * there is a tiny window here - before the callback is able to get the lock 998 * on the channel it is interrupted and this function gets to execute. It 999 * sees that the ref count is zero and believes its free to delete the 1000 * associated data structures. 1001 * 1002 * We get around this by taking advantage of the fact that before the ldc 1003 * framework invokes a callback it sets a flag to indicate that there is a 1004 * callback active (or about to become active). If when we attempt to 1005 * unregister a callback when this active flag is set then the unregister 1006 * will fail with EWOULDBLOCK. 1007 * 1008 * If the unregister fails we do a cv_timedwait. We will either be signaled 1009 * by the callback as it is exiting (note we have to wait a short period to 1010 * allow the callback to return fully to the ldc framework and it to clear 1011 * the active flag), or by the timer expiring. In either case we again attempt 1012 * the unregister. We repeat this until we can succesfully unregister the 1013 * callback. 1014 * 1015 * The reason we use a cv_timedwait rather than a simple cv_wait is to catch 1016 * the case where the callback has finished but the ldc framework has not yet 1017 * cleared the active flag. In this case we would never get a cv_signal. 1018 */ 1019 static int 1020 vsw_drain_ldcs(vsw_port_t *port) 1021 { 1022 vsw_ldc_list_t *ldcl = &port->p_ldclist; 1023 vsw_ldc_t *ldcp; 1024 vsw_t *vswp = port->p_vswp; 1025 1026 D1(vswp, "%s: enter", __func__); 1027 1028 READ_ENTER(&ldcl->lockrw); 1029 1030 ldcp = ldcl->head; 1031 1032 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 1033 /* 1034 * If we can unregister the channel callback then we 1035 * know that there is no callback either running or 1036 * scheduled to run for this channel so move on to next 1037 * channel in the list. 1038 */ 1039 mutex_enter(&ldcp->drain_cv_lock); 1040 1041 /* prompt active callbacks to quit */ 1042 ldcp->drain_state = VSW_LDC_DRAINING; 1043 1044 if ((ldc_unreg_callback(ldcp->ldc_handle)) == 0) { 1045 D2(vswp, "%s: unreg callback for chan %ld", __func__, 1046 ldcp->ldc_id); 1047 mutex_exit(&ldcp->drain_cv_lock); 1048 continue; 1049 } else { 1050 /* 1051 * If we end up here we know that either 1) a callback 1052 * is currently executing, 2) is about to start (i.e. 1053 * the ldc framework has set the active flag but 1054 * has not actually invoked the callback yet, or 3) 1055 * has finished and has returned to the ldc framework 1056 * but the ldc framework has not yet cleared the 1057 * active bit. 1058 * 1059 * Wait for it to finish. 1060 */ 1061 while (ldc_unreg_callback(ldcp->ldc_handle) 1062 == EWOULDBLOCK) 1063 (void) cv_timedwait(&ldcp->drain_cv, 1064 &ldcp->drain_cv_lock, lbolt + hz); 1065 1066 mutex_exit(&ldcp->drain_cv_lock); 1067 D2(vswp, "%s: unreg callback for chan %ld after " 1068 "timeout", __func__, ldcp->ldc_id); 1069 } 1070 } 1071 RW_EXIT(&ldcl->lockrw); 1072 1073 D1(vswp, "%s: exit", __func__); 1074 return (0); 1075 } 1076 1077 /* 1078 * Wait until all tasks which reference this port have completed. 1079 * 1080 * Prior to this function being invoked each channel under this port 1081 * should have been quiesced via ldc_set_cb_mode(DISABLE). 1082 */ 1083 static int 1084 vsw_drain_port_taskq(vsw_port_t *port) 1085 { 1086 vsw_t *vswp = port->p_vswp; 1087 1088 D1(vswp, "%s: enter", __func__); 1089 1090 /* 1091 * Mark the port as in the process of being detached, and 1092 * dispatch a marker task to the queue so we know when all 1093 * relevant tasks have completed. 1094 */ 1095 mutex_enter(&port->state_lock); 1096 port->state = VSW_PORT_DETACHING; 1097 1098 if ((vswp->taskq_p == NULL) || 1099 (ddi_taskq_dispatch(vswp->taskq_p, vsw_marker_task, 1100 port, DDI_NOSLEEP) != DDI_SUCCESS)) { 1101 DERR(vswp, "%s: unable to dispatch marker task", 1102 __func__); 1103 mutex_exit(&port->state_lock); 1104 return (1); 1105 } 1106 1107 /* 1108 * Wait for the marker task to finish. 1109 */ 1110 while (port->state != VSW_PORT_DETACHABLE) 1111 cv_wait(&port->state_cv, &port->state_lock); 1112 1113 mutex_exit(&port->state_lock); 1114 1115 D1(vswp, "%s: exit", __func__); 1116 1117 return (0); 1118 } 1119 1120 static void 1121 vsw_marker_task(void *arg) 1122 { 1123 vsw_port_t *port = arg; 1124 vsw_t *vswp = port->p_vswp; 1125 1126 D1(vswp, "%s: enter", __func__); 1127 1128 mutex_enter(&port->state_lock); 1129 1130 /* 1131 * No further tasks should be dispatched which reference 1132 * this port so ok to mark it as safe to detach. 1133 */ 1134 port->state = VSW_PORT_DETACHABLE; 1135 1136 cv_signal(&port->state_cv); 1137 1138 mutex_exit(&port->state_lock); 1139 1140 D1(vswp, "%s: exit", __func__); 1141 } 1142 1143 vsw_port_t * 1144 vsw_lookup_port(vsw_t *vswp, int p_instance) 1145 { 1146 vsw_port_list_t *plist = &vswp->plist; 1147 vsw_port_t *port; 1148 1149 for (port = plist->head; port != NULL; port = port->p_next) { 1150 if (port->p_instance == p_instance) { 1151 D2(vswp, "vsw_lookup_port: found p_instance\n"); 1152 return (port); 1153 } 1154 } 1155 1156 return (NULL); 1157 } 1158 1159 /* 1160 * Search for and remove the specified port from the port 1161 * list. Returns 0 if able to locate and remove port, otherwise 1162 * returns 1. 1163 */ 1164 static int 1165 vsw_plist_del_node(vsw_t *vswp, vsw_port_t *port) 1166 { 1167 vsw_port_list_t *plist = &vswp->plist; 1168 vsw_port_t *curr_p, *prev_p; 1169 1170 if (plist->head == NULL) 1171 return (1); 1172 1173 curr_p = prev_p = plist->head; 1174 1175 while (curr_p != NULL) { 1176 if (curr_p == port) { 1177 if (prev_p == curr_p) { 1178 plist->head = curr_p->p_next; 1179 } else { 1180 prev_p->p_next = curr_p->p_next; 1181 } 1182 plist->num_ports--; 1183 break; 1184 } else { 1185 prev_p = curr_p; 1186 curr_p = curr_p->p_next; 1187 } 1188 } 1189 return (0); 1190 } 1191 1192 /* 1193 * Interrupt handler for ldc messages. 1194 */ 1195 static uint_t 1196 vsw_ldc_cb(uint64_t event, caddr_t arg) 1197 { 1198 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 1199 vsw_t *vswp = ldcp->ldc_vswp; 1200 1201 D1(vswp, "%s: enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 1202 1203 mutex_enter(&ldcp->ldc_cblock); 1204 ldcp->ldc_stats.callbacks++; 1205 1206 mutex_enter(&ldcp->status_lock); 1207 if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) { 1208 mutex_exit(&ldcp->status_lock); 1209 mutex_exit(&ldcp->ldc_cblock); 1210 return (LDC_SUCCESS); 1211 } 1212 mutex_exit(&ldcp->status_lock); 1213 1214 if (event & LDC_EVT_UP) { 1215 /* 1216 * Channel has come up. 1217 */ 1218 D2(vswp, "%s: id(%ld) event(%llx) UP: status(%ld)", 1219 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1220 1221 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1222 1223 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 1224 } 1225 1226 if (event & LDC_EVT_READ) { 1227 /* 1228 * Data available for reading. 1229 */ 1230 D2(vswp, "%s: id(ld) event(%llx) data READ", 1231 __func__, ldcp->ldc_id, event); 1232 1233 if (ldcp->rx_thread != NULL) { 1234 /* 1235 * If the receive thread is enabled, then 1236 * wakeup the receive thread to process the 1237 * LDC messages. 1238 */ 1239 mutex_exit(&ldcp->ldc_cblock); 1240 mutex_enter(&ldcp->rx_thr_lock); 1241 if (!(ldcp->rx_thr_flags & VSW_WTHR_DATARCVD)) { 1242 ldcp->rx_thr_flags |= VSW_WTHR_DATARCVD; 1243 cv_signal(&ldcp->rx_thr_cv); 1244 } 1245 mutex_exit(&ldcp->rx_thr_lock); 1246 mutex_enter(&ldcp->ldc_cblock); 1247 } else { 1248 vsw_process_pkt(ldcp); 1249 } 1250 1251 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 1252 1253 goto vsw_cb_exit; 1254 } 1255 1256 if (event & (LDC_EVT_DOWN | LDC_EVT_RESET)) { 1257 D2(vswp, "%s: id(%ld) event (%lx) DOWN/RESET: status(%ld)", 1258 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1259 1260 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 1261 } 1262 1263 /* 1264 * Catch either LDC_EVT_WRITE which we don't support or any 1265 * unknown event. 1266 */ 1267 if (event & 1268 ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) { 1269 DERR(vswp, "%s: id(%ld) Unexpected event=(%llx) status(%ld)", 1270 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1271 } 1272 1273 vsw_cb_exit: 1274 mutex_exit(&ldcp->ldc_cblock); 1275 1276 /* 1277 * Let the drain function know we are finishing if it 1278 * is waiting. 1279 */ 1280 mutex_enter(&ldcp->drain_cv_lock); 1281 if (ldcp->drain_state == VSW_LDC_DRAINING) 1282 cv_signal(&ldcp->drain_cv); 1283 mutex_exit(&ldcp->drain_cv_lock); 1284 1285 return (LDC_SUCCESS); 1286 } 1287 1288 /* 1289 * Reinitialise data structures associated with the channel. 1290 */ 1291 static void 1292 vsw_ldc_reinit(vsw_ldc_t *ldcp) 1293 { 1294 vsw_t *vswp = ldcp->ldc_vswp; 1295 vsw_port_t *port; 1296 vsw_ldc_list_t *ldcl; 1297 1298 D1(vswp, "%s: enter", __func__); 1299 1300 port = ldcp->ldc_port; 1301 ldcl = &port->p_ldclist; 1302 1303 READ_ENTER(&ldcl->lockrw); 1304 1305 D2(vswp, "%s: in 0x%llx : out 0x%llx", __func__, 1306 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 1307 1308 vsw_free_lane_resources(ldcp, INBOUND); 1309 vsw_free_lane_resources(ldcp, OUTBOUND); 1310 RW_EXIT(&ldcl->lockrw); 1311 1312 ldcp->lane_in.lstate = 0; 1313 ldcp->lane_out.lstate = 0; 1314 1315 /* 1316 * Remove parent port from any multicast groups 1317 * it may have registered with. Client must resend 1318 * multicast add command after handshake completes. 1319 */ 1320 (void) vsw_del_fdb(vswp, port); 1321 1322 vsw_del_mcst_port(port); 1323 1324 ldcp->peer_session = 0; 1325 ldcp->session_status = 0; 1326 ldcp->hcnt = 0; 1327 ldcp->hphase = VSW_MILESTONE0; 1328 ldcp->tx_failures = 0; 1329 1330 D1(vswp, "%s: exit", __func__); 1331 } 1332 1333 /* 1334 * Process a connection event. 1335 * 1336 * Note - care must be taken to ensure that this function is 1337 * not called with the dlistrw lock held. 1338 */ 1339 static void 1340 vsw_process_conn_evt(vsw_ldc_t *ldcp, uint16_t evt) 1341 { 1342 vsw_t *vswp = ldcp->ldc_vswp; 1343 vsw_conn_evt_t *conn = NULL; 1344 1345 D1(vswp, "%s: enter", __func__); 1346 1347 /* 1348 * Check if either a reset or restart event is pending 1349 * or in progress. If so just return. 1350 * 1351 * A VSW_CONN_RESET event originates either with a LDC_RESET_EVT 1352 * being received by the callback handler, or a ECONNRESET error 1353 * code being returned from a ldc_read() or ldc_write() call. 1354 * 1355 * A VSW_CONN_RESTART event occurs when some error checking code 1356 * decides that there is a problem with data from the channel, 1357 * and that the handshake should be restarted. 1358 */ 1359 if (((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) && 1360 (ldstub((uint8_t *)&ldcp->reset_active))) 1361 return; 1362 1363 /* 1364 * If it is an LDC_UP event we first check the recorded 1365 * state of the channel. If this is UP then we know that 1366 * the channel moving to the UP state has already been dealt 1367 * with and don't need to dispatch a new task. 1368 * 1369 * The reason for this check is that when we do a ldc_up(), 1370 * depending on the state of the peer, we may or may not get 1371 * a LDC_UP event. As we can't depend on getting a LDC_UP evt 1372 * every time we do ldc_up() we explicitly check the channel 1373 * status to see has it come up (ldc_up() is asynch and will 1374 * complete at some undefined time), and take the appropriate 1375 * action. 1376 * 1377 * The flip side of this is that we may get a LDC_UP event 1378 * when we have already seen that the channel is up and have 1379 * dealt with that. 1380 */ 1381 mutex_enter(&ldcp->status_lock); 1382 if (evt == VSW_CONN_UP) { 1383 if ((ldcp->ldc_status == LDC_UP) || (ldcp->reset_active != 0)) { 1384 mutex_exit(&ldcp->status_lock); 1385 return; 1386 } 1387 } 1388 mutex_exit(&ldcp->status_lock); 1389 1390 /* 1391 * The transaction group id allows us to identify and discard 1392 * any tasks which are still pending on the taskq and refer 1393 * to the handshake session we are about to restart or reset. 1394 * These stale messages no longer have any real meaning. 1395 */ 1396 (void) atomic_inc_32(&ldcp->hss_id); 1397 1398 ASSERT(vswp->taskq_p != NULL); 1399 1400 if ((conn = kmem_zalloc(sizeof (vsw_conn_evt_t), KM_NOSLEEP)) == NULL) { 1401 cmn_err(CE_WARN, "!vsw%d: unable to allocate memory for" 1402 " connection event", vswp->instance); 1403 goto err_exit; 1404 } 1405 1406 conn->evt = evt; 1407 conn->ldcp = ldcp; 1408 1409 if (ddi_taskq_dispatch(vswp->taskq_p, vsw_conn_task, conn, 1410 DDI_NOSLEEP) != DDI_SUCCESS) { 1411 cmn_err(CE_WARN, "!vsw%d: Can't dispatch connection task", 1412 vswp->instance); 1413 1414 kmem_free(conn, sizeof (vsw_conn_evt_t)); 1415 goto err_exit; 1416 } 1417 1418 D1(vswp, "%s: exit", __func__); 1419 return; 1420 1421 err_exit: 1422 /* 1423 * Have mostly likely failed due to memory shortage. Clear the flag so 1424 * that future requests will at least be attempted and will hopefully 1425 * succeed. 1426 */ 1427 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 1428 ldcp->reset_active = 0; 1429 } 1430 1431 /* 1432 * Deal with events relating to a connection. Invoked from a taskq. 1433 */ 1434 static void 1435 vsw_conn_task(void *arg) 1436 { 1437 vsw_conn_evt_t *conn = (vsw_conn_evt_t *)arg; 1438 vsw_ldc_t *ldcp = NULL; 1439 vsw_t *vswp = NULL; 1440 uint16_t evt; 1441 ldc_status_t curr_status; 1442 1443 ldcp = conn->ldcp; 1444 evt = conn->evt; 1445 vswp = ldcp->ldc_vswp; 1446 1447 D1(vswp, "%s: enter", __func__); 1448 1449 /* can safely free now have copied out data */ 1450 kmem_free(conn, sizeof (vsw_conn_evt_t)); 1451 1452 mutex_enter(&ldcp->status_lock); 1453 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 1454 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 1455 "channel %ld", vswp->instance, ldcp->ldc_id); 1456 mutex_exit(&ldcp->status_lock); 1457 return; 1458 } 1459 1460 /* 1461 * If we wish to restart the handshake on this channel, then if 1462 * the channel is UP we bring it DOWN to flush the underlying 1463 * ldc queue. 1464 */ 1465 if ((evt == VSW_CONN_RESTART) && (curr_status == LDC_UP)) 1466 (void) ldc_down(ldcp->ldc_handle); 1467 1468 /* 1469 * re-init all the associated data structures. 1470 */ 1471 vsw_ldc_reinit(ldcp); 1472 1473 /* 1474 * Bring the channel back up (note it does no harm to 1475 * do this even if the channel is already UP, Just 1476 * becomes effectively a no-op). 1477 */ 1478 (void) ldc_up(ldcp->ldc_handle); 1479 1480 /* 1481 * Check if channel is now UP. This will only happen if 1482 * peer has also done a ldc_up(). 1483 */ 1484 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 1485 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 1486 "channel %ld", vswp->instance, ldcp->ldc_id); 1487 mutex_exit(&ldcp->status_lock); 1488 return; 1489 } 1490 1491 ldcp->ldc_status = curr_status; 1492 1493 /* channel UP so restart handshake by sending version info */ 1494 if (curr_status == LDC_UP) { 1495 if (ldcp->hcnt++ > vsw_num_handshakes) { 1496 cmn_err(CE_WARN, "!vsw%d: exceeded number of permitted" 1497 " handshake attempts (%d) on channel %ld", 1498 vswp->instance, ldcp->hcnt, ldcp->ldc_id); 1499 mutex_exit(&ldcp->status_lock); 1500 return; 1501 } 1502 1503 if (ddi_taskq_dispatch(vswp->taskq_p, vsw_send_ver, ldcp, 1504 DDI_NOSLEEP) != DDI_SUCCESS) { 1505 cmn_err(CE_WARN, "!vsw%d: Can't dispatch version task", 1506 vswp->instance); 1507 1508 /* 1509 * Don't count as valid restart attempt if couldn't 1510 * send version msg. 1511 */ 1512 if (ldcp->hcnt > 0) 1513 ldcp->hcnt--; 1514 } 1515 } 1516 1517 /* 1518 * Mark that the process is complete by clearing the flag. 1519 * 1520 * Note is it possible that the taskq dispatch above may have failed, 1521 * most likely due to memory shortage. We still clear the flag so 1522 * future attempts will at least be attempted and will hopefully 1523 * succeed. 1524 */ 1525 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 1526 ldcp->reset_active = 0; 1527 1528 mutex_exit(&ldcp->status_lock); 1529 1530 D1(vswp, "%s: exit", __func__); 1531 } 1532 1533 /* 1534 * returns 0 if legal for event signified by flag to have 1535 * occured at the time it did. Otherwise returns 1. 1536 */ 1537 int 1538 vsw_check_flag(vsw_ldc_t *ldcp, int dir, uint64_t flag) 1539 { 1540 vsw_t *vswp = ldcp->ldc_vswp; 1541 uint64_t state; 1542 uint64_t phase; 1543 1544 if (dir == INBOUND) 1545 state = ldcp->lane_in.lstate; 1546 else 1547 state = ldcp->lane_out.lstate; 1548 1549 phase = ldcp->hphase; 1550 1551 switch (flag) { 1552 case VSW_VER_INFO_RECV: 1553 if (phase > VSW_MILESTONE0) { 1554 DERR(vswp, "vsw_check_flag (%d): VER_INFO_RECV" 1555 " when in state %d\n", ldcp->ldc_id, phase); 1556 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1557 return (1); 1558 } 1559 break; 1560 1561 case VSW_VER_ACK_RECV: 1562 case VSW_VER_NACK_RECV: 1563 if (!(state & VSW_VER_INFO_SENT)) { 1564 DERR(vswp, "vsw_check_flag (%d): spurious VER_ACK or " 1565 "VER_NACK when in state %d\n", ldcp->ldc_id, phase); 1566 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1567 return (1); 1568 } else 1569 state &= ~VSW_VER_INFO_SENT; 1570 break; 1571 1572 case VSW_ATTR_INFO_RECV: 1573 if ((phase < VSW_MILESTONE1) || (phase >= VSW_MILESTONE2)) { 1574 DERR(vswp, "vsw_check_flag (%d): ATTR_INFO_RECV" 1575 " when in state %d\n", ldcp->ldc_id, phase); 1576 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1577 return (1); 1578 } 1579 break; 1580 1581 case VSW_ATTR_ACK_RECV: 1582 case VSW_ATTR_NACK_RECV: 1583 if (!(state & VSW_ATTR_INFO_SENT)) { 1584 DERR(vswp, "vsw_check_flag (%d): spurious ATTR_ACK" 1585 " or ATTR_NACK when in state %d\n", 1586 ldcp->ldc_id, phase); 1587 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1588 return (1); 1589 } else 1590 state &= ~VSW_ATTR_INFO_SENT; 1591 break; 1592 1593 case VSW_DRING_INFO_RECV: 1594 if (phase < VSW_MILESTONE1) { 1595 DERR(vswp, "vsw_check_flag (%d): DRING_INFO_RECV" 1596 " when in state %d\n", ldcp->ldc_id, phase); 1597 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1598 return (1); 1599 } 1600 break; 1601 1602 case VSW_DRING_ACK_RECV: 1603 case VSW_DRING_NACK_RECV: 1604 if (!(state & VSW_DRING_INFO_SENT)) { 1605 DERR(vswp, "vsw_check_flag (%d): spurious DRING_ACK " 1606 " or DRING_NACK when in state %d\n", 1607 ldcp->ldc_id, phase); 1608 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1609 return (1); 1610 } else 1611 state &= ~VSW_DRING_INFO_SENT; 1612 break; 1613 1614 case VSW_RDX_INFO_RECV: 1615 if (phase < VSW_MILESTONE3) { 1616 DERR(vswp, "vsw_check_flag (%d): RDX_INFO_RECV" 1617 " when in state %d\n", ldcp->ldc_id, phase); 1618 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1619 return (1); 1620 } 1621 break; 1622 1623 case VSW_RDX_ACK_RECV: 1624 case VSW_RDX_NACK_RECV: 1625 if (!(state & VSW_RDX_INFO_SENT)) { 1626 DERR(vswp, "vsw_check_flag (%d): spurious RDX_ACK or " 1627 "RDX_NACK when in state %d\n", ldcp->ldc_id, phase); 1628 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1629 return (1); 1630 } else 1631 state &= ~VSW_RDX_INFO_SENT; 1632 break; 1633 1634 case VSW_MCST_INFO_RECV: 1635 if (phase < VSW_MILESTONE3) { 1636 DERR(vswp, "vsw_check_flag (%d): VSW_MCST_INFO_RECV" 1637 " when in state %d\n", ldcp->ldc_id, phase); 1638 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1639 return (1); 1640 } 1641 break; 1642 1643 default: 1644 DERR(vswp, "vsw_check_flag (%lld): unknown flag (%llx)", 1645 ldcp->ldc_id, flag); 1646 return (1); 1647 } 1648 1649 if (dir == INBOUND) 1650 ldcp->lane_in.lstate = state; 1651 else 1652 ldcp->lane_out.lstate = state; 1653 1654 D1(vswp, "vsw_check_flag (chan %lld): exit", ldcp->ldc_id); 1655 1656 return (0); 1657 } 1658 1659 void 1660 vsw_next_milestone(vsw_ldc_t *ldcp) 1661 { 1662 vsw_t *vswp = ldcp->ldc_vswp; 1663 1664 D1(vswp, "%s (chan %lld): enter (phase %ld)", __func__, 1665 ldcp->ldc_id, ldcp->hphase); 1666 1667 DUMP_FLAGS(ldcp->lane_in.lstate); 1668 DUMP_FLAGS(ldcp->lane_out.lstate); 1669 1670 switch (ldcp->hphase) { 1671 1672 case VSW_MILESTONE0: 1673 /* 1674 * If we haven't started to handshake with our peer, 1675 * start to do so now. 1676 */ 1677 if (ldcp->lane_out.lstate == 0) { 1678 D2(vswp, "%s: (chan %lld) starting handshake " 1679 "with peer", __func__, ldcp->ldc_id); 1680 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1681 } 1682 1683 /* 1684 * Only way to pass this milestone is to have successfully 1685 * negotiated version info. 1686 */ 1687 if ((ldcp->lane_in.lstate & VSW_VER_ACK_SENT) && 1688 (ldcp->lane_out.lstate & VSW_VER_ACK_RECV)) { 1689 1690 D2(vswp, "%s: (chan %lld) leaving milestone 0", 1691 __func__, ldcp->ldc_id); 1692 1693 /* 1694 * Next milestone is passed when attribute 1695 * information has been successfully exchanged. 1696 */ 1697 ldcp->hphase = VSW_MILESTONE1; 1698 vsw_send_attr(ldcp); 1699 1700 } 1701 break; 1702 1703 case VSW_MILESTONE1: 1704 /* 1705 * Only way to pass this milestone is to have successfully 1706 * negotiated attribute information. 1707 */ 1708 if (ldcp->lane_in.lstate & VSW_ATTR_ACK_SENT) { 1709 1710 ldcp->hphase = VSW_MILESTONE2; 1711 1712 /* 1713 * If the peer device has said it wishes to 1714 * use descriptor rings then we send it our ring 1715 * info, otherwise we just set up a private ring 1716 * which we use an internal buffer 1717 */ 1718 if (ldcp->lane_in.xfer_mode == VIO_DRING_MODE) 1719 vsw_send_dring_info(ldcp); 1720 } 1721 break; 1722 1723 case VSW_MILESTONE2: 1724 /* 1725 * If peer has indicated in its attribute message that 1726 * it wishes to use descriptor rings then the only way 1727 * to pass this milestone is for us to have received 1728 * valid dring info. 1729 * 1730 * If peer is not using descriptor rings then just fall 1731 * through. 1732 */ 1733 if ((ldcp->lane_in.xfer_mode == VIO_DRING_MODE) && 1734 (!(ldcp->lane_in.lstate & VSW_DRING_ACK_SENT))) 1735 break; 1736 1737 D2(vswp, "%s: (chan %lld) leaving milestone 2", 1738 __func__, ldcp->ldc_id); 1739 1740 ldcp->hphase = VSW_MILESTONE3; 1741 vsw_send_rdx(ldcp); 1742 break; 1743 1744 case VSW_MILESTONE3: 1745 /* 1746 * Pass this milestone when all paramaters have been 1747 * successfully exchanged and RDX sent in both directions. 1748 * 1749 * Mark outbound lane as available to transmit data. 1750 */ 1751 if ((ldcp->lane_out.lstate & VSW_RDX_ACK_SENT) && 1752 (ldcp->lane_in.lstate & VSW_RDX_ACK_RECV)) { 1753 1754 D2(vswp, "%s: (chan %lld) leaving milestone 3", 1755 __func__, ldcp->ldc_id); 1756 D2(vswp, "%s: ** handshake complete (0x%llx : " 1757 "0x%llx) **", __func__, ldcp->lane_in.lstate, 1758 ldcp->lane_out.lstate); 1759 ldcp->lane_out.lstate |= VSW_LANE_ACTIVE; 1760 ldcp->hphase = VSW_MILESTONE4; 1761 ldcp->hcnt = 0; 1762 DISPLAY_STATE(); 1763 } else { 1764 D2(vswp, "%s: still in milestone 3 (0x%llx : 0x%llx)", 1765 __func__, ldcp->lane_in.lstate, 1766 ldcp->lane_out.lstate); 1767 } 1768 break; 1769 1770 case VSW_MILESTONE4: 1771 D2(vswp, "%s: (chan %lld) in milestone 4", __func__, 1772 ldcp->ldc_id); 1773 break; 1774 1775 default: 1776 DERR(vswp, "%s: (chan %lld) Unknown Phase %x", __func__, 1777 ldcp->ldc_id, ldcp->hphase); 1778 } 1779 1780 D1(vswp, "%s (chan %lld): exit (phase %ld)", __func__, ldcp->ldc_id, 1781 ldcp->hphase); 1782 } 1783 1784 /* 1785 * Check if major version is supported. 1786 * 1787 * Returns 0 if finds supported major number, and if necessary 1788 * adjusts the minor field. 1789 * 1790 * Returns 1 if can't match major number exactly. Sets mjor/minor 1791 * to next lowest support values, or to zero if no other values possible. 1792 */ 1793 static int 1794 vsw_supported_version(vio_ver_msg_t *vp) 1795 { 1796 int i; 1797 1798 D1(NULL, "vsw_supported_version: enter"); 1799 1800 for (i = 0; i < VSW_NUM_VER; i++) { 1801 if (vsw_versions[i].ver_major == vp->ver_major) { 1802 /* 1803 * Matching or lower major version found. Update 1804 * minor number if necessary. 1805 */ 1806 if (vp->ver_minor > vsw_versions[i].ver_minor) { 1807 D2(NULL, "%s: adjusting minor value from %d " 1808 "to %d", __func__, vp->ver_minor, 1809 vsw_versions[i].ver_minor); 1810 vp->ver_minor = vsw_versions[i].ver_minor; 1811 } 1812 1813 return (0); 1814 } 1815 1816 if (vsw_versions[i].ver_major < vp->ver_major) { 1817 if (vp->ver_minor > vsw_versions[i].ver_minor) { 1818 D2(NULL, "%s: adjusting minor value from %d " 1819 "to %d", __func__, vp->ver_minor, 1820 vsw_versions[i].ver_minor); 1821 vp->ver_minor = vsw_versions[i].ver_minor; 1822 } 1823 return (1); 1824 } 1825 } 1826 1827 /* No match was possible, zero out fields */ 1828 vp->ver_major = 0; 1829 vp->ver_minor = 0; 1830 1831 D1(NULL, "vsw_supported_version: exit"); 1832 1833 return (1); 1834 } 1835 1836 /* 1837 * Main routine for processing messages received over LDC. 1838 */ 1839 static void 1840 vsw_process_pkt(void *arg) 1841 { 1842 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 1843 vsw_t *vswp = ldcp->ldc_vswp; 1844 size_t msglen; 1845 vio_msg_tag_t tag; 1846 def_msg_t dmsg; 1847 int rv = 0; 1848 1849 1850 D1(vswp, "%s enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 1851 1852 ASSERT(MUTEX_HELD(&ldcp->ldc_cblock)); 1853 1854 /* 1855 * If channel is up read messages until channel is empty. 1856 */ 1857 do { 1858 msglen = sizeof (dmsg); 1859 rv = ldc_read(ldcp->ldc_handle, (caddr_t)&dmsg, &msglen); 1860 1861 if (rv != 0) { 1862 DERR(vswp, "%s :ldc_read err id(%lld) rv(%d) len(%d)\n", 1863 __func__, ldcp->ldc_id, rv, msglen); 1864 } 1865 1866 /* channel has been reset */ 1867 if (rv == ECONNRESET) { 1868 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 1869 break; 1870 } 1871 1872 if (msglen == 0) { 1873 D2(vswp, "%s: ldc_read id(%lld) NODATA", __func__, 1874 ldcp->ldc_id); 1875 break; 1876 } 1877 1878 D2(vswp, "%s: ldc_read id(%lld): msglen(%d)", __func__, 1879 ldcp->ldc_id, msglen); 1880 1881 /* 1882 * Figure out what sort of packet we have gotten by 1883 * examining the msg tag, and then switch it appropriately. 1884 */ 1885 bcopy(&dmsg, &tag, sizeof (vio_msg_tag_t)); 1886 1887 switch (tag.vio_msgtype) { 1888 case VIO_TYPE_CTRL: 1889 vsw_dispatch_ctrl_task(ldcp, &dmsg, tag); 1890 break; 1891 case VIO_TYPE_DATA: 1892 vsw_process_data_pkt(ldcp, &dmsg, tag); 1893 break; 1894 case VIO_TYPE_ERR: 1895 vsw_process_err_pkt(ldcp, &dmsg, tag); 1896 break; 1897 default: 1898 DERR(vswp, "%s: Unknown tag(%lx) ", __func__, 1899 "id(%lx)\n", tag.vio_msgtype, ldcp->ldc_id); 1900 break; 1901 } 1902 } while (msglen); 1903 1904 D1(vswp, "%s exit: ldcid (%lld)\n", __func__, ldcp->ldc_id); 1905 } 1906 1907 /* 1908 * Dispatch a task to process a VIO control message. 1909 */ 1910 static void 1911 vsw_dispatch_ctrl_task(vsw_ldc_t *ldcp, void *cpkt, vio_msg_tag_t tag) 1912 { 1913 vsw_ctrl_task_t *ctaskp = NULL; 1914 vsw_port_t *port = ldcp->ldc_port; 1915 vsw_t *vswp = port->p_vswp; 1916 1917 D1(vswp, "%s: enter", __func__); 1918 1919 /* 1920 * We need to handle RDX ACK messages in-band as once they 1921 * are exchanged it is possible that we will get an 1922 * immediate (legitimate) data packet. 1923 */ 1924 if ((tag.vio_subtype_env == VIO_RDX) && 1925 (tag.vio_subtype == VIO_SUBTYPE_ACK)) { 1926 1927 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_ACK_RECV)) 1928 return; 1929 1930 ldcp->lane_in.lstate |= VSW_RDX_ACK_RECV; 1931 D2(vswp, "%s (%ld) handling RDX_ACK in place " 1932 "(ostate 0x%llx : hphase %d)", __func__, 1933 ldcp->ldc_id, ldcp->lane_in.lstate, ldcp->hphase); 1934 vsw_next_milestone(ldcp); 1935 return; 1936 } 1937 1938 ctaskp = kmem_alloc(sizeof (vsw_ctrl_task_t), KM_NOSLEEP); 1939 1940 if (ctaskp == NULL) { 1941 DERR(vswp, "%s: unable to alloc space for ctrl msg", __func__); 1942 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1943 return; 1944 } 1945 1946 ctaskp->ldcp = ldcp; 1947 bcopy((def_msg_t *)cpkt, &ctaskp->pktp, sizeof (def_msg_t)); 1948 ctaskp->hss_id = ldcp->hss_id; 1949 1950 /* 1951 * Dispatch task to processing taskq if port is not in 1952 * the process of being detached. 1953 */ 1954 mutex_enter(&port->state_lock); 1955 if (port->state == VSW_PORT_INIT) { 1956 if ((vswp->taskq_p == NULL) || 1957 (ddi_taskq_dispatch(vswp->taskq_p, vsw_process_ctrl_pkt, 1958 ctaskp, DDI_NOSLEEP) != DDI_SUCCESS)) { 1959 DERR(vswp, "%s: unable to dispatch task to taskq", 1960 __func__); 1961 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 1962 mutex_exit(&port->state_lock); 1963 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1964 return; 1965 } 1966 } else { 1967 DWARN(vswp, "%s: port %d detaching, not dispatching " 1968 "task", __func__, port->p_instance); 1969 } 1970 1971 mutex_exit(&port->state_lock); 1972 1973 D2(vswp, "%s: dispatched task to taskq for chan %d", __func__, 1974 ldcp->ldc_id); 1975 D1(vswp, "%s: exit", __func__); 1976 } 1977 1978 /* 1979 * Process a VIO ctrl message. Invoked from taskq. 1980 */ 1981 static void 1982 vsw_process_ctrl_pkt(void *arg) 1983 { 1984 vsw_ctrl_task_t *ctaskp = (vsw_ctrl_task_t *)arg; 1985 vsw_ldc_t *ldcp = ctaskp->ldcp; 1986 vsw_t *vswp = ldcp->ldc_vswp; 1987 vio_msg_tag_t tag; 1988 uint16_t env; 1989 1990 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 1991 1992 bcopy(&ctaskp->pktp, &tag, sizeof (vio_msg_tag_t)); 1993 env = tag.vio_subtype_env; 1994 1995 /* stale pkt check */ 1996 if (ctaskp->hss_id < ldcp->hss_id) { 1997 DWARN(vswp, "%s: discarding stale packet belonging to earlier" 1998 " (%ld) handshake session", __func__, ctaskp->hss_id); 1999 return; 2000 } 2001 2002 /* session id check */ 2003 if (ldcp->session_status & VSW_PEER_SESSION) { 2004 if (ldcp->peer_session != tag.vio_sid) { 2005 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 2006 __func__, ldcp->ldc_id, tag.vio_sid); 2007 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2008 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2009 return; 2010 } 2011 } 2012 2013 /* 2014 * Switch on vio_subtype envelope, then let lower routines 2015 * decide if its an INFO, ACK or NACK packet. 2016 */ 2017 switch (env) { 2018 case VIO_VER_INFO: 2019 vsw_process_ctrl_ver_pkt(ldcp, &ctaskp->pktp); 2020 break; 2021 case VIO_DRING_REG: 2022 vsw_process_ctrl_dring_reg_pkt(ldcp, &ctaskp->pktp); 2023 break; 2024 case VIO_DRING_UNREG: 2025 vsw_process_ctrl_dring_unreg_pkt(ldcp, &ctaskp->pktp); 2026 break; 2027 case VIO_ATTR_INFO: 2028 vsw_process_ctrl_attr_pkt(ldcp, &ctaskp->pktp); 2029 break; 2030 case VNET_MCAST_INFO: 2031 vsw_process_ctrl_mcst_pkt(ldcp, &ctaskp->pktp); 2032 break; 2033 case VIO_RDX: 2034 vsw_process_ctrl_rdx_pkt(ldcp, &ctaskp->pktp); 2035 break; 2036 default: 2037 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env); 2038 } 2039 2040 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2041 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2042 } 2043 2044 /* 2045 * Version negotiation. We can end up here either because our peer 2046 * has responded to a handshake message we have sent it, or our peer 2047 * has initiated a handshake with us. If its the former then can only 2048 * be ACK or NACK, if its the later can only be INFO. 2049 * 2050 * If its an ACK we move to the next stage of the handshake, namely 2051 * attribute exchange. If its a NACK we see if we can specify another 2052 * version, if we can't we stop. 2053 * 2054 * If it is an INFO we reset all params associated with communication 2055 * in that direction over this channel (remember connection is 2056 * essentially 2 independent simplex channels). 2057 */ 2058 void 2059 vsw_process_ctrl_ver_pkt(vsw_ldc_t *ldcp, void *pkt) 2060 { 2061 vio_ver_msg_t *ver_pkt; 2062 vsw_t *vswp = ldcp->ldc_vswp; 2063 2064 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2065 2066 /* 2067 * We know this is a ctrl/version packet so 2068 * cast it into the correct structure. 2069 */ 2070 ver_pkt = (vio_ver_msg_t *)pkt; 2071 2072 switch (ver_pkt->tag.vio_subtype) { 2073 case VIO_SUBTYPE_INFO: 2074 D2(vswp, "vsw_process_ctrl_ver_pkt: VIO_SUBTYPE_INFO\n"); 2075 2076 /* 2077 * Record the session id, which we will use from now 2078 * until we see another VER_INFO msg. Even then the 2079 * session id in most cases will be unchanged, execpt 2080 * if channel was reset. 2081 */ 2082 if ((ldcp->session_status & VSW_PEER_SESSION) && 2083 (ldcp->peer_session != ver_pkt->tag.vio_sid)) { 2084 DERR(vswp, "%s: updating session id for chan %lld " 2085 "from %llx to %llx", __func__, ldcp->ldc_id, 2086 ldcp->peer_session, ver_pkt->tag.vio_sid); 2087 } 2088 2089 ldcp->peer_session = ver_pkt->tag.vio_sid; 2090 ldcp->session_status |= VSW_PEER_SESSION; 2091 2092 /* Legal message at this time ? */ 2093 if (vsw_check_flag(ldcp, INBOUND, VSW_VER_INFO_RECV)) 2094 return; 2095 2096 /* 2097 * First check the device class. Currently only expect 2098 * to be talking to a network device. In the future may 2099 * also talk to another switch. 2100 */ 2101 if (ver_pkt->dev_class != VDEV_NETWORK) { 2102 DERR(vswp, "%s: illegal device class %d", __func__, 2103 ver_pkt->dev_class); 2104 2105 ver_pkt->tag.vio_sid = ldcp->local_session; 2106 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2107 2108 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2109 2110 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2111 sizeof (vio_ver_msg_t), B_TRUE); 2112 2113 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 2114 vsw_next_milestone(ldcp); 2115 return; 2116 } else { 2117 ldcp->dev_class = ver_pkt->dev_class; 2118 } 2119 2120 /* 2121 * Now check the version. 2122 */ 2123 if (vsw_supported_version(ver_pkt) == 0) { 2124 /* 2125 * Support this major version and possibly 2126 * adjusted minor version. 2127 */ 2128 2129 D2(vswp, "%s: accepted ver %d:%d", __func__, 2130 ver_pkt->ver_major, ver_pkt->ver_minor); 2131 2132 /* Store accepted values */ 2133 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2134 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2135 2136 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2137 2138 ldcp->lane_in.lstate |= VSW_VER_ACK_SENT; 2139 } else { 2140 /* 2141 * NACK back with the next lower major/minor 2142 * pairing we support (if don't suuport any more 2143 * versions then they will be set to zero. 2144 */ 2145 2146 D2(vswp, "%s: replying with ver %d:%d", __func__, 2147 ver_pkt->ver_major, ver_pkt->ver_minor); 2148 2149 /* Store updated values */ 2150 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2151 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2152 2153 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2154 2155 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 2156 } 2157 2158 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2159 ver_pkt->tag.vio_sid = ldcp->local_session; 2160 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2161 sizeof (vio_ver_msg_t), B_TRUE); 2162 2163 vsw_next_milestone(ldcp); 2164 break; 2165 2166 case VIO_SUBTYPE_ACK: 2167 D2(vswp, "%s: VIO_SUBTYPE_ACK\n", __func__); 2168 2169 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_ACK_RECV)) 2170 return; 2171 2172 /* Store updated values */ 2173 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2174 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2175 2176 ldcp->lane_out.lstate |= VSW_VER_ACK_RECV; 2177 vsw_next_milestone(ldcp); 2178 2179 break; 2180 2181 case VIO_SUBTYPE_NACK: 2182 D2(vswp, "%s: VIO_SUBTYPE_NACK\n", __func__); 2183 2184 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_NACK_RECV)) 2185 return; 2186 2187 /* 2188 * If our peer sent us a NACK with the ver fields set to 2189 * zero then there is nothing more we can do. Otherwise see 2190 * if we support either the version suggested, or a lesser 2191 * one. 2192 */ 2193 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 2194 DERR(vswp, "%s: peer unable to negotiate any " 2195 "further.", __func__); 2196 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 2197 vsw_next_milestone(ldcp); 2198 return; 2199 } 2200 2201 /* 2202 * Check to see if we support this major version or 2203 * a lower one. If we don't then maj/min will be set 2204 * to zero. 2205 */ 2206 (void) vsw_supported_version(ver_pkt); 2207 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 2208 /* Nothing more we can do */ 2209 DERR(vswp, "%s: version negotiation failed.\n", 2210 __func__); 2211 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 2212 vsw_next_milestone(ldcp); 2213 } else { 2214 /* found a supported major version */ 2215 ldcp->lane_out.ver_major = ver_pkt->ver_major; 2216 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 2217 2218 D2(vswp, "%s: resending with updated values (%x, %x)", 2219 __func__, ver_pkt->ver_major, ver_pkt->ver_minor); 2220 2221 ldcp->lane_out.lstate |= VSW_VER_INFO_SENT; 2222 ver_pkt->tag.vio_sid = ldcp->local_session; 2223 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 2224 2225 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2226 2227 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2228 sizeof (vio_ver_msg_t), B_TRUE); 2229 2230 vsw_next_milestone(ldcp); 2231 2232 } 2233 break; 2234 2235 default: 2236 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2237 ver_pkt->tag.vio_subtype); 2238 } 2239 2240 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 2241 } 2242 2243 /* 2244 * Process an attribute packet. We can end up here either because our peer 2245 * has ACK/NACK'ed back to an earlier ATTR msg we had sent it, or our 2246 * peer has sent us an attribute INFO message 2247 * 2248 * If its an ACK we then move to the next stage of the handshake which 2249 * is to send our descriptor ring info to our peer. If its a NACK then 2250 * there is nothing more we can (currently) do. 2251 * 2252 * If we get a valid/acceptable INFO packet (and we have already negotiated 2253 * a version) we ACK back and set channel state to ATTR_RECV, otherwise we 2254 * NACK back and reset channel state to INACTIV. 2255 * 2256 * FUTURE: in time we will probably negotiate over attributes, but for 2257 * the moment unacceptable attributes are regarded as a fatal error. 2258 * 2259 */ 2260 void 2261 vsw_process_ctrl_attr_pkt(vsw_ldc_t *ldcp, void *pkt) 2262 { 2263 vnet_attr_msg_t *attr_pkt; 2264 vsw_t *vswp = ldcp->ldc_vswp; 2265 vsw_port_t *port = ldcp->ldc_port; 2266 uint64_t macaddr = 0; 2267 int i; 2268 2269 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 2270 2271 /* 2272 * We know this is a ctrl/attr packet so 2273 * cast it into the correct structure. 2274 */ 2275 attr_pkt = (vnet_attr_msg_t *)pkt; 2276 2277 switch (attr_pkt->tag.vio_subtype) { 2278 case VIO_SUBTYPE_INFO: 2279 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2280 2281 if (vsw_check_flag(ldcp, INBOUND, VSW_ATTR_INFO_RECV)) 2282 return; 2283 2284 /* 2285 * If the attributes are unacceptable then we NACK back. 2286 */ 2287 if (vsw_check_attr(attr_pkt, ldcp->ldc_port)) { 2288 2289 DERR(vswp, "%s (chan %d): invalid attributes", 2290 __func__, ldcp->ldc_id); 2291 2292 vsw_free_lane_resources(ldcp, INBOUND); 2293 2294 attr_pkt->tag.vio_sid = ldcp->local_session; 2295 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2296 2297 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 2298 ldcp->lane_in.lstate |= VSW_ATTR_NACK_SENT; 2299 (void) vsw_send_msg(ldcp, (void *)attr_pkt, 2300 sizeof (vnet_attr_msg_t), B_TRUE); 2301 2302 vsw_next_milestone(ldcp); 2303 return; 2304 } 2305 2306 /* 2307 * Otherwise store attributes for this lane and update 2308 * lane state. 2309 */ 2310 ldcp->lane_in.mtu = attr_pkt->mtu; 2311 ldcp->lane_in.addr = attr_pkt->addr; 2312 ldcp->lane_in.addr_type = attr_pkt->addr_type; 2313 ldcp->lane_in.xfer_mode = attr_pkt->xfer_mode; 2314 ldcp->lane_in.ack_freq = attr_pkt->ack_freq; 2315 2316 macaddr = ldcp->lane_in.addr; 2317 for (i = ETHERADDRL - 1; i >= 0; i--) { 2318 port->p_macaddr.ether_addr_octet[i] = macaddr & 0xFF; 2319 macaddr >>= 8; 2320 } 2321 2322 /* create the fdb entry for this port/mac address */ 2323 (void) vsw_add_fdb(vswp, port); 2324 2325 /* setup device specifc xmit routines */ 2326 mutex_enter(&port->tx_lock); 2327 if (ldcp->lane_in.xfer_mode == VIO_DRING_MODE) { 2328 D2(vswp, "%s: mode = VIO_DRING_MODE", __func__); 2329 port->transmit = vsw_dringsend; 2330 ldcp->lane_out.xfer_mode = VIO_DRING_MODE; 2331 } else if (ldcp->lane_in.xfer_mode == VIO_DESC_MODE) { 2332 D2(vswp, "%s: mode = VIO_DESC_MODE", __func__); 2333 vsw_create_privring(ldcp); 2334 port->transmit = vsw_descrsend; 2335 ldcp->lane_out.xfer_mode = VIO_DESC_MODE; 2336 } 2337 mutex_exit(&port->tx_lock); 2338 2339 attr_pkt->tag.vio_sid = ldcp->local_session; 2340 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2341 2342 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 2343 2344 ldcp->lane_in.lstate |= VSW_ATTR_ACK_SENT; 2345 2346 (void) vsw_send_msg(ldcp, (void *)attr_pkt, 2347 sizeof (vnet_attr_msg_t), B_TRUE); 2348 2349 vsw_next_milestone(ldcp); 2350 break; 2351 2352 case VIO_SUBTYPE_ACK: 2353 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2354 2355 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_ACK_RECV)) 2356 return; 2357 2358 ldcp->lane_out.lstate |= VSW_ATTR_ACK_RECV; 2359 vsw_next_milestone(ldcp); 2360 break; 2361 2362 case VIO_SUBTYPE_NACK: 2363 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2364 2365 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_NACK_RECV)) 2366 return; 2367 2368 ldcp->lane_out.lstate |= VSW_ATTR_NACK_RECV; 2369 vsw_next_milestone(ldcp); 2370 break; 2371 2372 default: 2373 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2374 attr_pkt->tag.vio_subtype); 2375 } 2376 2377 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 2378 } 2379 2380 /* 2381 * Process a dring info packet. We can end up here either because our peer 2382 * has ACK/NACK'ed back to an earlier DRING msg we had sent it, or our 2383 * peer has sent us a dring INFO message. 2384 * 2385 * If we get a valid/acceptable INFO packet (and we have already negotiated 2386 * a version) we ACK back and update the lane state, otherwise we NACK back. 2387 * 2388 * FUTURE: nothing to stop client from sending us info on multiple dring's 2389 * but for the moment we will just use the first one we are given. 2390 * 2391 */ 2392 void 2393 vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *ldcp, void *pkt) 2394 { 2395 vio_dring_reg_msg_t *dring_pkt; 2396 vsw_t *vswp = ldcp->ldc_vswp; 2397 ldc_mem_info_t minfo; 2398 dring_info_t *dp, *dbp; 2399 int dring_found = 0; 2400 2401 /* 2402 * We know this is a ctrl/dring packet so 2403 * cast it into the correct structure. 2404 */ 2405 dring_pkt = (vio_dring_reg_msg_t *)pkt; 2406 2407 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 2408 2409 switch (dring_pkt->tag.vio_subtype) { 2410 case VIO_SUBTYPE_INFO: 2411 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2412 2413 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 2414 return; 2415 2416 /* 2417 * If the dring params are unacceptable then we NACK back. 2418 */ 2419 if (vsw_check_dring_info(dring_pkt)) { 2420 2421 DERR(vswp, "%s (%lld): invalid dring info", 2422 __func__, ldcp->ldc_id); 2423 2424 vsw_free_lane_resources(ldcp, INBOUND); 2425 2426 dring_pkt->tag.vio_sid = ldcp->local_session; 2427 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2428 2429 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2430 2431 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2432 2433 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2434 sizeof (vio_dring_reg_msg_t), B_TRUE); 2435 2436 vsw_next_milestone(ldcp); 2437 return; 2438 } 2439 2440 /* 2441 * Otherwise, attempt to map in the dring using the 2442 * cookie. If that succeeds we send back a unique dring 2443 * identifier that the sending side will use in future 2444 * to refer to this descriptor ring. 2445 */ 2446 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 2447 2448 dp->num_descriptors = dring_pkt->num_descriptors; 2449 dp->descriptor_size = dring_pkt->descriptor_size; 2450 dp->options = dring_pkt->options; 2451 dp->ncookies = dring_pkt->ncookies; 2452 2453 /* 2454 * Note: should only get one cookie. Enforced in 2455 * the ldc layer. 2456 */ 2457 bcopy(&dring_pkt->cookie[0], &dp->cookie[0], 2458 sizeof (ldc_mem_cookie_t)); 2459 2460 D2(vswp, "%s: num_desc %ld : desc_size %ld", __func__, 2461 dp->num_descriptors, dp->descriptor_size); 2462 D2(vswp, "%s: options 0x%lx: ncookies %ld", __func__, 2463 dp->options, dp->ncookies); 2464 2465 if ((ldc_mem_dring_map(ldcp->ldc_handle, &dp->cookie[0], 2466 dp->ncookies, dp->num_descriptors, dp->descriptor_size, 2467 LDC_SHADOW_MAP, &(dp->handle))) != 0) { 2468 2469 DERR(vswp, "%s: dring_map failed\n", __func__); 2470 2471 kmem_free(dp, sizeof (dring_info_t)); 2472 vsw_free_lane_resources(ldcp, INBOUND); 2473 2474 dring_pkt->tag.vio_sid = ldcp->local_session; 2475 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2476 2477 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2478 2479 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2480 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2481 sizeof (vio_dring_reg_msg_t), B_TRUE); 2482 2483 vsw_next_milestone(ldcp); 2484 return; 2485 } 2486 2487 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 2488 2489 DERR(vswp, "%s: dring_addr failed\n", __func__); 2490 2491 kmem_free(dp, sizeof (dring_info_t)); 2492 vsw_free_lane_resources(ldcp, INBOUND); 2493 2494 dring_pkt->tag.vio_sid = ldcp->local_session; 2495 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2496 2497 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2498 2499 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2500 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2501 sizeof (vio_dring_reg_msg_t), B_TRUE); 2502 2503 vsw_next_milestone(ldcp); 2504 return; 2505 } else { 2506 /* store the address of the pub part of ring */ 2507 dp->pub_addr = minfo.vaddr; 2508 } 2509 2510 /* no private section as we are importing */ 2511 dp->priv_addr = NULL; 2512 2513 /* 2514 * Using simple mono increasing int for ident at 2515 * the moment. 2516 */ 2517 dp->ident = ldcp->next_ident; 2518 ldcp->next_ident++; 2519 2520 dp->end_idx = 0; 2521 dp->next = NULL; 2522 2523 /* 2524 * Link it onto the end of the list of drings 2525 * for this lane. 2526 */ 2527 if (ldcp->lane_in.dringp == NULL) { 2528 D2(vswp, "%s: adding first INBOUND dring", __func__); 2529 ldcp->lane_in.dringp = dp; 2530 } else { 2531 dbp = ldcp->lane_in.dringp; 2532 2533 while (dbp->next != NULL) 2534 dbp = dbp->next; 2535 2536 dbp->next = dp; 2537 } 2538 2539 /* acknowledge it */ 2540 dring_pkt->tag.vio_sid = ldcp->local_session; 2541 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2542 dring_pkt->dring_ident = dp->ident; 2543 2544 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2545 sizeof (vio_dring_reg_msg_t), B_TRUE); 2546 2547 ldcp->lane_in.lstate |= VSW_DRING_ACK_SENT; 2548 vsw_next_milestone(ldcp); 2549 break; 2550 2551 case VIO_SUBTYPE_ACK: 2552 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2553 2554 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_ACK_RECV)) 2555 return; 2556 2557 /* 2558 * Peer is acknowledging our dring info and will have 2559 * sent us a dring identifier which we will use to 2560 * refer to this ring w.r.t. our peer. 2561 */ 2562 dp = ldcp->lane_out.dringp; 2563 if (dp != NULL) { 2564 /* 2565 * Find the ring this ident should be associated 2566 * with. 2567 */ 2568 if (vsw_dring_match(dp, dring_pkt)) { 2569 dring_found = 1; 2570 2571 } else while (dp != NULL) { 2572 if (vsw_dring_match(dp, dring_pkt)) { 2573 dring_found = 1; 2574 break; 2575 } 2576 dp = dp->next; 2577 } 2578 2579 if (dring_found == 0) { 2580 DERR(NULL, "%s: unrecognised ring cookie", 2581 __func__); 2582 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2583 return; 2584 } 2585 2586 } else { 2587 DERR(vswp, "%s: DRING ACK received but no drings " 2588 "allocated", __func__); 2589 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2590 return; 2591 } 2592 2593 /* store ident */ 2594 dp->ident = dring_pkt->dring_ident; 2595 ldcp->lane_out.lstate |= VSW_DRING_ACK_RECV; 2596 vsw_next_milestone(ldcp); 2597 break; 2598 2599 case VIO_SUBTYPE_NACK: 2600 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2601 2602 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_NACK_RECV)) 2603 return; 2604 2605 ldcp->lane_out.lstate |= VSW_DRING_NACK_RECV; 2606 vsw_next_milestone(ldcp); 2607 break; 2608 2609 default: 2610 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 2611 dring_pkt->tag.vio_subtype); 2612 } 2613 2614 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 2615 } 2616 2617 /* 2618 * Process a request from peer to unregister a dring. 2619 * 2620 * For the moment we just restart the handshake if our 2621 * peer endpoint attempts to unregister a dring. 2622 */ 2623 void 2624 vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *ldcp, void *pkt) 2625 { 2626 vsw_t *vswp = ldcp->ldc_vswp; 2627 vio_dring_unreg_msg_t *dring_pkt; 2628 2629 /* 2630 * We know this is a ctrl/dring packet so 2631 * cast it into the correct structure. 2632 */ 2633 dring_pkt = (vio_dring_unreg_msg_t *)pkt; 2634 2635 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2636 2637 switch (dring_pkt->tag.vio_subtype) { 2638 case VIO_SUBTYPE_INFO: 2639 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2640 2641 DWARN(vswp, "%s: restarting handshake..", __func__); 2642 break; 2643 2644 case VIO_SUBTYPE_ACK: 2645 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2646 2647 DWARN(vswp, "%s: restarting handshake..", __func__); 2648 break; 2649 2650 case VIO_SUBTYPE_NACK: 2651 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2652 2653 DWARN(vswp, "%s: restarting handshake..", __func__); 2654 break; 2655 2656 default: 2657 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 2658 dring_pkt->tag.vio_subtype); 2659 } 2660 2661 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2662 2663 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2664 } 2665 2666 #define SND_MCST_NACK(ldcp, pkt) \ 2667 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 2668 pkt->tag.vio_sid = ldcp->local_session; \ 2669 (void) vsw_send_msg(ldcp, (void *)pkt, \ 2670 sizeof (vnet_mcast_msg_t), B_TRUE); 2671 2672 /* 2673 * Process a multicast request from a vnet. 2674 * 2675 * Vnet's specify a multicast address that they are interested in. This 2676 * address is used as a key into the hash table which forms the multicast 2677 * forwarding database (mFDB). 2678 * 2679 * The table keys are the multicast addresses, while the table entries 2680 * are pointers to lists of ports which wish to receive packets for the 2681 * specified multicast address. 2682 * 2683 * When a multicast packet is being switched we use the address as a key 2684 * into the hash table, and then walk the appropriate port list forwarding 2685 * the pkt to each port in turn. 2686 * 2687 * If a vnet is no longer interested in a particular multicast grouping 2688 * we simply find the correct location in the hash table and then delete 2689 * the relevant port from the port list. 2690 * 2691 * To deal with the case whereby a port is being deleted without first 2692 * removing itself from the lists in the hash table, we maintain a list 2693 * of multicast addresses the port has registered an interest in, within 2694 * the port structure itself. We then simply walk that list of addresses 2695 * using them as keys into the hash table and remove the port from the 2696 * appropriate lists. 2697 */ 2698 static void 2699 vsw_process_ctrl_mcst_pkt(vsw_ldc_t *ldcp, void *pkt) 2700 { 2701 vnet_mcast_msg_t *mcst_pkt; 2702 vsw_port_t *port = ldcp->ldc_port; 2703 vsw_t *vswp = ldcp->ldc_vswp; 2704 int i; 2705 2706 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2707 2708 /* 2709 * We know this is a ctrl/mcast packet so 2710 * cast it into the correct structure. 2711 */ 2712 mcst_pkt = (vnet_mcast_msg_t *)pkt; 2713 2714 switch (mcst_pkt->tag.vio_subtype) { 2715 case VIO_SUBTYPE_INFO: 2716 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2717 2718 /* 2719 * Check if in correct state to receive a multicast 2720 * message (i.e. handshake complete). If not reset 2721 * the handshake. 2722 */ 2723 if (vsw_check_flag(ldcp, INBOUND, VSW_MCST_INFO_RECV)) 2724 return; 2725 2726 /* 2727 * Before attempting to add or remove address check 2728 * that they are valid multicast addresses. 2729 * If not, then NACK back. 2730 */ 2731 for (i = 0; i < mcst_pkt->count; i++) { 2732 if ((mcst_pkt->mca[i].ether_addr_octet[0] & 01) != 1) { 2733 DERR(vswp, "%s: invalid multicast address", 2734 __func__); 2735 SND_MCST_NACK(ldcp, mcst_pkt); 2736 return; 2737 } 2738 } 2739 2740 /* 2741 * Now add/remove the addresses. If this fails we 2742 * NACK back. 2743 */ 2744 if (vsw_add_rem_mcst(mcst_pkt, port) != 0) { 2745 SND_MCST_NACK(ldcp, mcst_pkt); 2746 return; 2747 } 2748 2749 mcst_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2750 mcst_pkt->tag.vio_sid = ldcp->local_session; 2751 2752 DUMP_TAG_PTR((vio_msg_tag_t *)mcst_pkt); 2753 2754 (void) vsw_send_msg(ldcp, (void *)mcst_pkt, 2755 sizeof (vnet_mcast_msg_t), B_TRUE); 2756 break; 2757 2758 case VIO_SUBTYPE_ACK: 2759 DWARN(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2760 2761 /* 2762 * We shouldn't ever get a multicast ACK message as 2763 * at the moment we never request multicast addresses 2764 * to be set on some other device. This may change in 2765 * the future if we have cascading switches. 2766 */ 2767 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_ACK_RECV)) 2768 return; 2769 2770 /* Do nothing */ 2771 break; 2772 2773 case VIO_SUBTYPE_NACK: 2774 DWARN(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2775 2776 /* 2777 * We shouldn't get a multicast NACK packet for the 2778 * same reasons as we shouldn't get a ACK packet. 2779 */ 2780 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_NACK_RECV)) 2781 return; 2782 2783 /* Do nothing */ 2784 break; 2785 2786 default: 2787 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2788 mcst_pkt->tag.vio_subtype); 2789 } 2790 2791 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2792 } 2793 2794 static void 2795 vsw_process_ctrl_rdx_pkt(vsw_ldc_t *ldcp, void *pkt) 2796 { 2797 vio_rdx_msg_t *rdx_pkt; 2798 vsw_t *vswp = ldcp->ldc_vswp; 2799 2800 /* 2801 * We know this is a ctrl/rdx packet so 2802 * cast it into the correct structure. 2803 */ 2804 rdx_pkt = (vio_rdx_msg_t *)pkt; 2805 2806 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 2807 2808 switch (rdx_pkt->tag.vio_subtype) { 2809 case VIO_SUBTYPE_INFO: 2810 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2811 2812 if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_INFO_RECV)) 2813 return; 2814 2815 rdx_pkt->tag.vio_sid = ldcp->local_session; 2816 rdx_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2817 2818 DUMP_TAG_PTR((vio_msg_tag_t *)rdx_pkt); 2819 2820 ldcp->lane_out.lstate |= VSW_RDX_ACK_SENT; 2821 2822 (void) vsw_send_msg(ldcp, (void *)rdx_pkt, 2823 sizeof (vio_rdx_msg_t), B_TRUE); 2824 2825 vsw_next_milestone(ldcp); 2826 break; 2827 2828 case VIO_SUBTYPE_ACK: 2829 /* 2830 * Should be handled in-band by callback handler. 2831 */ 2832 DERR(vswp, "%s: Unexpected VIO_SUBTYPE_ACK", __func__); 2833 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2834 break; 2835 2836 case VIO_SUBTYPE_NACK: 2837 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2838 2839 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_NACK_RECV)) 2840 return; 2841 2842 ldcp->lane_in.lstate |= VSW_RDX_NACK_RECV; 2843 vsw_next_milestone(ldcp); 2844 break; 2845 2846 default: 2847 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 2848 rdx_pkt->tag.vio_subtype); 2849 } 2850 2851 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2852 } 2853 2854 static void 2855 vsw_process_data_pkt(vsw_ldc_t *ldcp, void *dpkt, vio_msg_tag_t tag) 2856 { 2857 uint16_t env = tag.vio_subtype_env; 2858 vsw_t *vswp = ldcp->ldc_vswp; 2859 2860 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2861 2862 /* session id check */ 2863 if (ldcp->session_status & VSW_PEER_SESSION) { 2864 if (ldcp->peer_session != tag.vio_sid) { 2865 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 2866 __func__, ldcp->ldc_id, tag.vio_sid); 2867 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2868 return; 2869 } 2870 } 2871 2872 /* 2873 * It is an error for us to be getting data packets 2874 * before the handshake has completed. 2875 */ 2876 if (ldcp->hphase != VSW_MILESTONE4) { 2877 DERR(vswp, "%s: got data packet before handshake complete " 2878 "hphase %d (%x: %x)", __func__, ldcp->hphase, 2879 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 2880 DUMP_FLAGS(ldcp->lane_in.lstate); 2881 DUMP_FLAGS(ldcp->lane_out.lstate); 2882 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2883 return; 2884 } 2885 2886 /* 2887 * To reduce the locking contention, release the 2888 * ldc_cblock here and re-acquire it once we are done 2889 * receiving packets. 2890 */ 2891 mutex_exit(&ldcp->ldc_cblock); 2892 mutex_enter(&ldcp->ldc_rxlock); 2893 2894 /* 2895 * Switch on vio_subtype envelope, then let lower routines 2896 * decide if its an INFO, ACK or NACK packet. 2897 */ 2898 if (env == VIO_DRING_DATA) { 2899 vsw_process_data_dring_pkt(ldcp, dpkt); 2900 } else if (env == VIO_PKT_DATA) { 2901 vsw_process_data_raw_pkt(ldcp, dpkt); 2902 } else if (env == VIO_DESC_DATA) { 2903 vsw_process_data_ibnd_pkt(ldcp, dpkt); 2904 } else { 2905 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env); 2906 } 2907 2908 mutex_exit(&ldcp->ldc_rxlock); 2909 mutex_enter(&ldcp->ldc_cblock); 2910 2911 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2912 } 2913 2914 #define SND_DRING_NACK(ldcp, pkt) \ 2915 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 2916 pkt->tag.vio_sid = ldcp->local_session; \ 2917 (void) vsw_send_msg(ldcp, (void *)pkt, \ 2918 sizeof (vio_dring_msg_t), B_TRUE); 2919 2920 static void 2921 vsw_process_data_dring_pkt(vsw_ldc_t *ldcp, void *dpkt) 2922 { 2923 vio_dring_msg_t *dring_pkt; 2924 vnet_public_desc_t *pub_addr = NULL; 2925 vsw_private_desc_t *priv_addr = NULL; 2926 dring_info_t *dp = NULL; 2927 vsw_t *vswp = ldcp->ldc_vswp; 2928 mblk_t *mp = NULL; 2929 mblk_t *bp = NULL; 2930 mblk_t *bpt = NULL; 2931 size_t nbytes = 0; 2932 uint64_t ncookies = 0; 2933 uint64_t chain = 0; 2934 uint64_t len; 2935 uint32_t pos, start, datalen; 2936 uint32_t range_start, range_end; 2937 int32_t end, num, cnt = 0; 2938 int i, rv, msg_rv = 0; 2939 boolean_t ack_needed = B_FALSE; 2940 boolean_t prev_desc_ack = B_FALSE; 2941 int read_attempts = 0; 2942 struct ether_header *ehp; 2943 2944 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2945 2946 /* 2947 * We know this is a data/dring packet so 2948 * cast it into the correct structure. 2949 */ 2950 dring_pkt = (vio_dring_msg_t *)dpkt; 2951 2952 /* 2953 * Switch on the vio_subtype. If its INFO then we need to 2954 * process the data. If its an ACK we need to make sure 2955 * it makes sense (i.e did we send an earlier data/info), 2956 * and if its a NACK then we maybe attempt a retry. 2957 */ 2958 switch (dring_pkt->tag.vio_subtype) { 2959 case VIO_SUBTYPE_INFO: 2960 D2(vswp, "%s(%lld): VIO_SUBTYPE_INFO", __func__, ldcp->ldc_id); 2961 2962 READ_ENTER(&ldcp->lane_in.dlistrw); 2963 if ((dp = vsw_ident2dring(&ldcp->lane_in, 2964 dring_pkt->dring_ident)) == NULL) { 2965 RW_EXIT(&ldcp->lane_in.dlistrw); 2966 2967 DERR(vswp, "%s(%lld): unable to find dring from " 2968 "ident 0x%llx", __func__, ldcp->ldc_id, 2969 dring_pkt->dring_ident); 2970 2971 SND_DRING_NACK(ldcp, dring_pkt); 2972 return; 2973 } 2974 2975 start = pos = dring_pkt->start_idx; 2976 end = dring_pkt->end_idx; 2977 len = dp->num_descriptors; 2978 2979 range_start = range_end = pos; 2980 2981 D2(vswp, "%s(%lld): start index %ld : end %ld\n", 2982 __func__, ldcp->ldc_id, start, end); 2983 2984 if (end == -1) { 2985 num = -1; 2986 } else if (end >= 0) { 2987 num = end >= pos ? end - pos + 1: (len - pos + 1) + end; 2988 2989 /* basic sanity check */ 2990 if (end > len) { 2991 RW_EXIT(&ldcp->lane_in.dlistrw); 2992 DERR(vswp, "%s(%lld): endpoint %lld outside " 2993 "ring length %lld", __func__, 2994 ldcp->ldc_id, end, len); 2995 2996 SND_DRING_NACK(ldcp, dring_pkt); 2997 return; 2998 } 2999 } else { 3000 RW_EXIT(&ldcp->lane_in.dlistrw); 3001 DERR(vswp, "%s(%lld): invalid endpoint %lld", 3002 __func__, ldcp->ldc_id, end); 3003 SND_DRING_NACK(ldcp, dring_pkt); 3004 return; 3005 } 3006 3007 while (cnt != num) { 3008 vsw_recheck_desc: 3009 if ((rv = ldc_mem_dring_acquire(dp->handle, 3010 pos, pos)) != 0) { 3011 RW_EXIT(&ldcp->lane_in.dlistrw); 3012 DERR(vswp, "%s(%lld): unable to acquire " 3013 "descriptor at pos %d: err %d", 3014 __func__, pos, ldcp->ldc_id, rv); 3015 SND_DRING_NACK(ldcp, dring_pkt); 3016 ldcp->ldc_stats.ierrors++; 3017 return; 3018 } 3019 3020 pub_addr = (vnet_public_desc_t *)dp->pub_addr + pos; 3021 3022 /* 3023 * When given a bounded range of descriptors 3024 * to process, its an error to hit a descriptor 3025 * which is not ready. In the non-bounded case 3026 * (end_idx == -1) this simply indicates we have 3027 * reached the end of the current active range. 3028 */ 3029 if (pub_addr->hdr.dstate != VIO_DESC_READY) { 3030 /* unbound - no error */ 3031 if (end == -1) { 3032 if (read_attempts == vsw_read_attempts) 3033 break; 3034 3035 delay(drv_usectohz(vsw_desc_delay)); 3036 read_attempts++; 3037 goto vsw_recheck_desc; 3038 } 3039 3040 /* bounded - error - so NACK back */ 3041 RW_EXIT(&ldcp->lane_in.dlistrw); 3042 DERR(vswp, "%s(%lld): descriptor not READY " 3043 "(%d)", __func__, ldcp->ldc_id, 3044 pub_addr->hdr.dstate); 3045 SND_DRING_NACK(ldcp, dring_pkt); 3046 return; 3047 } 3048 3049 DTRACE_PROBE1(read_attempts, int, read_attempts); 3050 3051 range_end = pos; 3052 3053 /* 3054 * If we ACK'd the previous descriptor then now 3055 * record the new range start position for later 3056 * ACK's. 3057 */ 3058 if (prev_desc_ack) { 3059 range_start = pos; 3060 3061 D2(vswp, "%s(%lld): updating range start to be " 3062 "%d", __func__, ldcp->ldc_id, range_start); 3063 3064 prev_desc_ack = B_FALSE; 3065 } 3066 3067 /* 3068 * Data is padded to align on 8 byte boundary, 3069 * datalen is actual data length, i.e. minus that 3070 * padding. 3071 */ 3072 datalen = pub_addr->nbytes; 3073 3074 /* 3075 * Does peer wish us to ACK when we have finished 3076 * with this descriptor ? 3077 */ 3078 if (pub_addr->hdr.ack) 3079 ack_needed = B_TRUE; 3080 3081 D2(vswp, "%s(%lld): processing desc %lld at pos" 3082 " 0x%llx : dstate 0x%lx : datalen 0x%lx", 3083 __func__, ldcp->ldc_id, pos, pub_addr, 3084 pub_addr->hdr.dstate, datalen); 3085 3086 /* 3087 * Mark that we are starting to process descriptor. 3088 */ 3089 pub_addr->hdr.dstate = VIO_DESC_ACCEPTED; 3090 3091 /* 3092 * Ensure that we ask ldc for an aligned 3093 * number of bytes. 3094 */ 3095 nbytes = (datalen + VNET_IPALIGN + 7) & ~7; 3096 3097 mp = vio_multipool_allocb(&ldcp->vmp, nbytes); 3098 if (mp == NULL) { 3099 ldcp->ldc_stats.rx_vio_allocb_fail++; 3100 /* 3101 * No free receive buffers available, so 3102 * fallback onto allocb(9F). Make sure that 3103 * we get a data buffer which is a multiple 3104 * of 8 as this is required by ldc_mem_copy. 3105 */ 3106 DTRACE_PROBE(allocb); 3107 if ((mp = allocb(datalen + VNET_IPALIGN + 8, 3108 BPRI_MED)) == NULL) { 3109 DERR(vswp, "%s(%ld): allocb failed", 3110 __func__, ldcp->ldc_id); 3111 pub_addr->hdr.dstate = VIO_DESC_DONE; 3112 (void) ldc_mem_dring_release(dp->handle, 3113 pos, pos); 3114 ldcp->ldc_stats.ierrors++; 3115 ldcp->ldc_stats.rx_allocb_fail++; 3116 break; 3117 } 3118 } 3119 3120 ncookies = pub_addr->ncookies; 3121 rv = ldc_mem_copy(ldcp->ldc_handle, 3122 (caddr_t)mp->b_rptr, 0, &nbytes, 3123 pub_addr->memcookie, ncookies, LDC_COPY_IN); 3124 3125 if (rv != 0) { 3126 DERR(vswp, "%s(%d): unable to copy in data " 3127 "from %d cookies in desc %d (rv %d)", 3128 __func__, ldcp->ldc_id, ncookies, pos, rv); 3129 freemsg(mp); 3130 3131 pub_addr->hdr.dstate = VIO_DESC_DONE; 3132 (void) ldc_mem_dring_release(dp->handle, 3133 pos, pos); 3134 ldcp->ldc_stats.ierrors++; 3135 break; 3136 } else { 3137 D2(vswp, "%s(%d): copied in %ld bytes" 3138 " using %d cookies", __func__, 3139 ldcp->ldc_id, nbytes, ncookies); 3140 } 3141 3142 /* adjust the read pointer to skip over the padding */ 3143 mp->b_rptr += VNET_IPALIGN; 3144 3145 /* point to the actual end of data */ 3146 mp->b_wptr = mp->b_rptr + datalen; 3147 3148 /* update statistics */ 3149 ehp = (struct ether_header *)mp->b_rptr; 3150 if (IS_BROADCAST(ehp)) 3151 ldcp->ldc_stats.brdcstrcv++; 3152 else if (IS_MULTICAST(ehp)) 3153 ldcp->ldc_stats.multircv++; 3154 3155 ldcp->ldc_stats.ipackets++; 3156 ldcp->ldc_stats.rbytes += datalen; 3157 3158 /* build a chain of received packets */ 3159 if (bp == NULL) { 3160 /* first pkt */ 3161 bp = mp; 3162 bp->b_next = bp->b_prev = NULL; 3163 bpt = bp; 3164 chain = 1; 3165 } else { 3166 mp->b_next = mp->b_prev = NULL; 3167 bpt->b_next = mp; 3168 bpt = mp; 3169 chain++; 3170 } 3171 3172 /* mark we are finished with this descriptor */ 3173 pub_addr->hdr.dstate = VIO_DESC_DONE; 3174 3175 (void) ldc_mem_dring_release(dp->handle, pos, pos); 3176 3177 /* 3178 * Send an ACK back to peer if requested. 3179 */ 3180 if (ack_needed) { 3181 ack_needed = B_FALSE; 3182 3183 dring_pkt->start_idx = range_start; 3184 dring_pkt->end_idx = range_end; 3185 3186 DERR(vswp, "%s(%lld): processed %d %d, ACK" 3187 " requested", __func__, ldcp->ldc_id, 3188 dring_pkt->start_idx, dring_pkt->end_idx); 3189 3190 dring_pkt->dring_process_state = VIO_DP_ACTIVE; 3191 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3192 dring_pkt->tag.vio_sid = ldcp->local_session; 3193 3194 msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt, 3195 sizeof (vio_dring_msg_t), B_FALSE); 3196 3197 /* 3198 * Check if ACK was successfully sent. If not 3199 * we break and deal with that below. 3200 */ 3201 if (msg_rv != 0) 3202 break; 3203 3204 prev_desc_ack = B_TRUE; 3205 range_start = pos; 3206 } 3207 3208 /* next descriptor */ 3209 pos = (pos + 1) % len; 3210 cnt++; 3211 3212 /* 3213 * Break out of loop here and stop processing to 3214 * allow some other network device (or disk) to 3215 * get access to the cpu. 3216 */ 3217 if (chain > vsw_chain_len) { 3218 D3(vswp, "%s(%lld): switching chain of %d " 3219 "msgs", __func__, ldcp->ldc_id, chain); 3220 break; 3221 } 3222 } 3223 RW_EXIT(&ldcp->lane_in.dlistrw); 3224 3225 /* 3226 * If when we attempted to send the ACK we found that the 3227 * channel had been reset then now handle this. We deal with 3228 * it here as we cannot reset the channel while holding the 3229 * dlistrw lock, and we don't want to acquire/release it 3230 * continuously in the above loop, as a channel reset should 3231 * be a rare event. 3232 */ 3233 if (msg_rv == ECONNRESET) { 3234 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 3235 break; 3236 } 3237 3238 /* send the chain of packets to be switched */ 3239 if (bp != NULL) { 3240 DTRACE_PROBE1(vsw_rcv_msgs, int, chain); 3241 D3(vswp, "%s(%lld): switching chain of %d msgs", 3242 __func__, ldcp->ldc_id, chain); 3243 vswp->vsw_switch_frame(vswp, bp, VSW_VNETPORT, 3244 ldcp->ldc_port, NULL); 3245 } 3246 3247 DTRACE_PROBE1(msg_cnt, int, cnt); 3248 3249 /* 3250 * We are now finished so ACK back with the state 3251 * set to STOPPING so our peer knows we are finished 3252 */ 3253 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3254 dring_pkt->tag.vio_sid = ldcp->local_session; 3255 3256 dring_pkt->dring_process_state = VIO_DP_STOPPED; 3257 3258 DTRACE_PROBE(stop_process_sent); 3259 3260 /* 3261 * We have not processed any more descriptors beyond 3262 * the last one we ACK'd. 3263 */ 3264 if (prev_desc_ack) 3265 range_start = range_end; 3266 3267 dring_pkt->start_idx = range_start; 3268 dring_pkt->end_idx = range_end; 3269 3270 D2(vswp, "%s(%lld) processed : %d : %d, now stopping", 3271 __func__, ldcp->ldc_id, dring_pkt->start_idx, 3272 dring_pkt->end_idx); 3273 3274 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 3275 sizeof (vio_dring_msg_t), B_TRUE); 3276 break; 3277 3278 case VIO_SUBTYPE_ACK: 3279 D2(vswp, "%s(%lld): VIO_SUBTYPE_ACK", __func__, ldcp->ldc_id); 3280 /* 3281 * Verify that the relevant descriptors are all 3282 * marked as DONE 3283 */ 3284 READ_ENTER(&ldcp->lane_out.dlistrw); 3285 if ((dp = vsw_ident2dring(&ldcp->lane_out, 3286 dring_pkt->dring_ident)) == NULL) { 3287 RW_EXIT(&ldcp->lane_out.dlistrw); 3288 DERR(vswp, "%s: unknown ident in ACK", __func__); 3289 return; 3290 } 3291 3292 start = end = 0; 3293 start = dring_pkt->start_idx; 3294 end = dring_pkt->end_idx; 3295 len = dp->num_descriptors; 3296 3297 3298 mutex_enter(&dp->dlock); 3299 dp->last_ack_recv = end; 3300 ldcp->ldc_stats.dring_data_acks++; 3301 mutex_exit(&dp->dlock); 3302 3303 (void) vsw_reclaim_dring(dp, start); 3304 3305 /* 3306 * If our peer is stopping processing descriptors then 3307 * we check to make sure it has processed all the descriptors 3308 * we have updated. If not then we send it a new message 3309 * to prompt it to restart. 3310 */ 3311 if (dring_pkt->dring_process_state == VIO_DP_STOPPED) { 3312 DTRACE_PROBE(stop_process_recv); 3313 D2(vswp, "%s(%lld): got stopping msg : %d : %d", 3314 __func__, ldcp->ldc_id, dring_pkt->start_idx, 3315 dring_pkt->end_idx); 3316 3317 /* 3318 * Check next descriptor in public section of ring. 3319 * If its marked as READY then we need to prompt our 3320 * peer to start processing the ring again. 3321 */ 3322 i = (end + 1) % len; 3323 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 3324 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 3325 3326 /* 3327 * Hold the restart lock across all of this to 3328 * make sure that its not possible for us to 3329 * decide that a msg needs to be sent in the future 3330 * but the sending code having already checked is 3331 * about to exit. 3332 */ 3333 mutex_enter(&dp->restart_lock); 3334 ldcp->ldc_stats.dring_stopped_acks++; 3335 mutex_enter(&priv_addr->dstate_lock); 3336 if (pub_addr->hdr.dstate == VIO_DESC_READY) { 3337 3338 mutex_exit(&priv_addr->dstate_lock); 3339 3340 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 3341 dring_pkt->tag.vio_sid = ldcp->local_session; 3342 3343 dring_pkt->seq_num = 3344 atomic_inc_64_nv(&ldcp->lane_out.seq_num); 3345 3346 dring_pkt->start_idx = (end + 1) % len; 3347 dring_pkt->end_idx = -1; 3348 3349 D2(vswp, "%s(%lld) : sending restart msg:" 3350 " %d : %d", __func__, ldcp->ldc_id, 3351 dring_pkt->start_idx, dring_pkt->end_idx); 3352 3353 msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt, 3354 sizeof (vio_dring_msg_t), B_FALSE); 3355 ldcp->ldc_stats.dring_data_msgs++; 3356 3357 } else { 3358 mutex_exit(&priv_addr->dstate_lock); 3359 dp->restart_reqd = B_TRUE; 3360 } 3361 mutex_exit(&dp->restart_lock); 3362 } 3363 RW_EXIT(&ldcp->lane_out.dlistrw); 3364 3365 /* only do channel reset after dropping dlistrw lock */ 3366 if (msg_rv == ECONNRESET) 3367 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 3368 3369 break; 3370 3371 case VIO_SUBTYPE_NACK: 3372 DWARN(vswp, "%s(%lld): VIO_SUBTYPE_NACK", 3373 __func__, ldcp->ldc_id); 3374 /* 3375 * Something is badly wrong if we are getting NACK's 3376 * for our data pkts. So reset the channel. 3377 */ 3378 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3379 3380 break; 3381 3382 default: 3383 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 3384 ldcp->ldc_id, dring_pkt->tag.vio_subtype); 3385 } 3386 3387 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3388 } 3389 3390 /* 3391 * VIO_PKT_DATA (a.k.a raw data mode ) 3392 * 3393 * Note - currently not supported. Do nothing. 3394 */ 3395 static void 3396 vsw_process_data_raw_pkt(vsw_ldc_t *ldcp, void *dpkt) 3397 { 3398 _NOTE(ARGUNUSED(dpkt)) 3399 3400 D1(NULL, "%s (%lld): enter\n", __func__, ldcp->ldc_id); 3401 DERR(NULL, "%s (%lld): currently unsupported", __func__, ldcp->ldc_id); 3402 D1(NULL, "%s (%lld): exit\n", __func__, ldcp->ldc_id); 3403 } 3404 3405 /* 3406 * Process an in-band descriptor message (most likely from 3407 * OBP). 3408 */ 3409 static void 3410 vsw_process_data_ibnd_pkt(vsw_ldc_t *ldcp, void *pkt) 3411 { 3412 vnet_ibnd_desc_t *ibnd_desc; 3413 dring_info_t *dp = NULL; 3414 vsw_private_desc_t *priv_addr = NULL; 3415 vsw_t *vswp = ldcp->ldc_vswp; 3416 mblk_t *mp = NULL; 3417 size_t nbytes = 0; 3418 size_t off = 0; 3419 uint64_t idx = 0; 3420 uint32_t num = 1, len, datalen = 0; 3421 uint64_t ncookies = 0; 3422 int i, rv; 3423 int j = 0; 3424 3425 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3426 3427 ibnd_desc = (vnet_ibnd_desc_t *)pkt; 3428 3429 switch (ibnd_desc->hdr.tag.vio_subtype) { 3430 case VIO_SUBTYPE_INFO: 3431 D1(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3432 3433 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 3434 return; 3435 3436 /* 3437 * Data is padded to align on a 8 byte boundary, 3438 * nbytes is actual data length, i.e. minus that 3439 * padding. 3440 */ 3441 datalen = ibnd_desc->nbytes; 3442 3443 D2(vswp, "%s(%lld): processing inband desc : " 3444 ": datalen 0x%lx", __func__, ldcp->ldc_id, datalen); 3445 3446 ncookies = ibnd_desc->ncookies; 3447 3448 /* 3449 * allocb(9F) returns an aligned data block. We 3450 * need to ensure that we ask ldc for an aligned 3451 * number of bytes also. 3452 */ 3453 nbytes = datalen; 3454 if (nbytes & 0x7) { 3455 off = 8 - (nbytes & 0x7); 3456 nbytes += off; 3457 } 3458 3459 mp = allocb(datalen, BPRI_MED); 3460 if (mp == NULL) { 3461 DERR(vswp, "%s(%lld): allocb failed", 3462 __func__, ldcp->ldc_id); 3463 ldcp->ldc_stats.rx_allocb_fail++; 3464 return; 3465 } 3466 3467 rv = ldc_mem_copy(ldcp->ldc_handle, (caddr_t)mp->b_rptr, 3468 0, &nbytes, ibnd_desc->memcookie, (uint64_t)ncookies, 3469 LDC_COPY_IN); 3470 3471 if (rv != 0) { 3472 DERR(vswp, "%s(%d): unable to copy in data from " 3473 "%d cookie(s)", __func__, ldcp->ldc_id, ncookies); 3474 freemsg(mp); 3475 ldcp->ldc_stats.ierrors++; 3476 return; 3477 } 3478 3479 D2(vswp, "%s(%d): copied in %ld bytes using %d cookies", 3480 __func__, ldcp->ldc_id, nbytes, ncookies); 3481 3482 /* point to the actual end of data */ 3483 mp->b_wptr = mp->b_rptr + datalen; 3484 ldcp->ldc_stats.ipackets++; 3485 ldcp->ldc_stats.rbytes += datalen; 3486 3487 /* 3488 * We ACK back every in-band descriptor message we process 3489 */ 3490 ibnd_desc->hdr.tag.vio_subtype = VIO_SUBTYPE_ACK; 3491 ibnd_desc->hdr.tag.vio_sid = ldcp->local_session; 3492 (void) vsw_send_msg(ldcp, (void *)ibnd_desc, 3493 sizeof (vnet_ibnd_desc_t), B_TRUE); 3494 3495 /* send the packet to be switched */ 3496 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, 3497 ldcp->ldc_port, NULL); 3498 3499 break; 3500 3501 case VIO_SUBTYPE_ACK: 3502 D1(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3503 3504 /* Verify the ACK is valid */ 3505 idx = ibnd_desc->hdr.desc_handle; 3506 3507 if (idx >= VSW_RING_NUM_EL) { 3508 cmn_err(CE_WARN, "!vsw%d: corrupted ACK received " 3509 "(idx %ld)", vswp->instance, idx); 3510 return; 3511 } 3512 3513 if ((dp = ldcp->lane_out.dringp) == NULL) { 3514 DERR(vswp, "%s: no dring found", __func__); 3515 return; 3516 } 3517 3518 len = dp->num_descriptors; 3519 /* 3520 * If the descriptor we are being ACK'ed for is not the 3521 * one we expected, then pkts were lost somwhere, either 3522 * when we tried to send a msg, or a previous ACK msg from 3523 * our peer. In either case we now reclaim the descriptors 3524 * in the range from the last ACK we received up to the 3525 * current ACK. 3526 */ 3527 if (idx != dp->last_ack_recv) { 3528 DWARN(vswp, "%s: dropped pkts detected, (%ld, %ld)", 3529 __func__, dp->last_ack_recv, idx); 3530 num = idx >= dp->last_ack_recv ? 3531 idx - dp->last_ack_recv + 1: 3532 (len - dp->last_ack_recv + 1) + idx; 3533 } 3534 3535 /* 3536 * When we sent the in-band message to our peer we 3537 * marked the copy in our private ring as READY. We now 3538 * check that the descriptor we are being ACK'ed for is in 3539 * fact READY, i.e. it is one we have shared with our peer. 3540 * 3541 * If its not we flag an error, but still reset the descr 3542 * back to FREE. 3543 */ 3544 for (i = dp->last_ack_recv; j < num; i = (i + 1) % len, j++) { 3545 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 3546 mutex_enter(&priv_addr->dstate_lock); 3547 if (priv_addr->dstate != VIO_DESC_READY) { 3548 DERR(vswp, "%s: (%ld) desc at index %ld not " 3549 "READY (0x%lx)", __func__, 3550 ldcp->ldc_id, idx, priv_addr->dstate); 3551 DERR(vswp, "%s: bound %d: ncookies %ld : " 3552 "datalen %ld", __func__, 3553 priv_addr->bound, priv_addr->ncookies, 3554 priv_addr->datalen); 3555 } 3556 D2(vswp, "%s: (%lld) freeing descp at %lld", __func__, 3557 ldcp->ldc_id, idx); 3558 /* release resources associated with sent msg */ 3559 priv_addr->datalen = 0; 3560 priv_addr->dstate = VIO_DESC_FREE; 3561 mutex_exit(&priv_addr->dstate_lock); 3562 } 3563 /* update to next expected value */ 3564 dp->last_ack_recv = (idx + 1) % dp->num_descriptors; 3565 3566 break; 3567 3568 case VIO_SUBTYPE_NACK: 3569 DERR(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3570 3571 /* 3572 * We should only get a NACK if our peer doesn't like 3573 * something about a message we have sent it. If this 3574 * happens we just release the resources associated with 3575 * the message. (We are relying on higher layers to decide 3576 * whether or not to resend. 3577 */ 3578 3579 /* limit check */ 3580 idx = ibnd_desc->hdr.desc_handle; 3581 3582 if (idx >= VSW_RING_NUM_EL) { 3583 DERR(vswp, "%s: corrupted NACK received (idx %lld)", 3584 __func__, idx); 3585 return; 3586 } 3587 3588 if ((dp = ldcp->lane_out.dringp) == NULL) { 3589 DERR(vswp, "%s: no dring found", __func__); 3590 return; 3591 } 3592 3593 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 3594 3595 /* move to correct location in ring */ 3596 priv_addr += idx; 3597 3598 /* release resources associated with sent msg */ 3599 mutex_enter(&priv_addr->dstate_lock); 3600 priv_addr->datalen = 0; 3601 priv_addr->dstate = VIO_DESC_FREE; 3602 mutex_exit(&priv_addr->dstate_lock); 3603 3604 break; 3605 3606 default: 3607 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 3608 ldcp->ldc_id, ibnd_desc->hdr.tag.vio_subtype); 3609 } 3610 3611 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3612 } 3613 3614 static void 3615 vsw_process_err_pkt(vsw_ldc_t *ldcp, void *epkt, vio_msg_tag_t tag) 3616 { 3617 _NOTE(ARGUNUSED(epkt)) 3618 3619 vsw_t *vswp = ldcp->ldc_vswp; 3620 uint16_t env = tag.vio_subtype_env; 3621 3622 D1(vswp, "%s (%lld): enter\n", __func__, ldcp->ldc_id); 3623 3624 /* 3625 * Error vio_subtypes have yet to be defined. So for 3626 * the moment we can't do anything. 3627 */ 3628 D2(vswp, "%s: (%x) vio_subtype env", __func__, env); 3629 3630 D1(vswp, "%s (%lld): exit\n", __func__, ldcp->ldc_id); 3631 } 3632 3633 /* transmit the packet over the given port */ 3634 int 3635 vsw_portsend(vsw_port_t *port, mblk_t *mp, mblk_t *mpt) 3636 { 3637 vsw_ldc_list_t *ldcl = &port->p_ldclist; 3638 vsw_ldc_t *ldcp; 3639 mblk_t *tmp; 3640 int status = 0; 3641 3642 READ_ENTER(&ldcl->lockrw); 3643 /* 3644 * Note for now, we have a single channel. 3645 */ 3646 ldcp = ldcl->head; 3647 if (ldcp == NULL) { 3648 DERR(port->p_vswp, "vsw_portsend: no ldc: dropping packet\n"); 3649 freemsgchain(mp); 3650 RW_EXIT(&ldcl->lockrw); 3651 return (1); 3652 } 3653 3654 /* 3655 * If the TX thread is enabled, then queue the packets 3656 * and signal the tx thread. 3657 */ 3658 if (ldcp->tx_thread != NULL) { 3659 mutex_enter(&ldcp->tx_thr_lock); 3660 if (ldcp->tx_mhead == NULL) { 3661 ldcp->tx_mhead = mp; 3662 ldcp->tx_mtail = mpt; 3663 cv_signal(&ldcp->tx_thr_cv); 3664 } else { 3665 ldcp->tx_mtail->b_next = mp; 3666 ldcp->tx_mtail = mpt; 3667 } 3668 mutex_exit(&ldcp->tx_thr_lock); 3669 } else { 3670 while (mp != NULL) { 3671 tmp = mp->b_next; 3672 mp->b_next = mp->b_prev = NULL; 3673 (void) vsw_ldcsend(ldcp, mp, 1); 3674 mp = tmp; 3675 } 3676 } 3677 3678 RW_EXIT(&ldcl->lockrw); 3679 3680 return (status); 3681 } 3682 3683 /* 3684 * Transmit the packet over the given LDC channel. 3685 * 3686 * The 'retries' argument indicates how many times a packet 3687 * is retried before it is dropped. Note, the retry is done 3688 * only for a resource related failure, for all other failures 3689 * the packet is dropped immediately. 3690 * 3691 * The 'tx_failure' counter is used as mechanism to track 3692 * continuous failures. Once these failures are more than 3693 * 'vsw_ldc_tx_max_failures' tunable, the packets are tried only 3694 * once and then they are dropped. This is done to avoid 3695 * buffering too many packets. 3696 */ 3697 static int 3698 vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, int retries) 3699 { 3700 int i; 3701 int rc; 3702 int status = 0; 3703 vsw_port_t *port = ldcp->ldc_port; 3704 dring_info_t *dp = NULL; 3705 3706 3707 for (i = 0; i < retries; ) { 3708 /* 3709 * Send the message out using the appropriate 3710 * transmit function which will free mblock when it 3711 * is finished with it. 3712 */ 3713 mutex_enter(&port->tx_lock); 3714 if (port->transmit != NULL) { 3715 status = (*port->transmit)(ldcp, mp); 3716 } 3717 if (status == LDC_TX_SUCCESS) { 3718 ldcp->tx_failures = 0; 3719 mutex_exit(&port->tx_lock); 3720 break; 3721 } else if (ldcp->tx_failures > vsw_ldc_tx_max_failures) { 3722 /* 3723 * If the failures crossed the threshold then 3724 * break here. 3725 */ 3726 ldcp->ldc_stats.oerrors++; 3727 mutex_exit(&port->tx_lock); 3728 break; 3729 } else { 3730 ldcp->tx_failures++; 3731 } 3732 i++; /* increment the counter here */ 3733 3734 /* If its the last retry, then update the oerror */ 3735 if ((i == retries) && (status == LDC_TX_NORESOURCES)) { 3736 ldcp->ldc_stats.oerrors++; 3737 } 3738 mutex_exit(&port->tx_lock); 3739 3740 if (status != LDC_TX_NORESOURCES) { 3741 /* 3742 * No retrying required for errors un-related 3743 * to resources. 3744 */ 3745 break; 3746 } 3747 READ_ENTER(&ldcp->lane_out.dlistrw); 3748 if (((dp = ldcp->lane_out.dringp) != NULL) && 3749 (ldcp->lane_out.xfer_mode == VIO_DRING_MODE)) { 3750 rc = vsw_reclaim_dring(dp, dp->end_idx); 3751 } else { 3752 /* 3753 * If there is no dring or the xfer_mode is 3754 * set to DESC_MODE(ie., OBP), then simply break here. 3755 */ 3756 RW_EXIT(&ldcp->lane_out.dlistrw); 3757 break; 3758 } 3759 RW_EXIT(&ldcp->lane_out.dlistrw); 3760 3761 /* 3762 * Delay only if none were reclaimed 3763 * and its not the last retry. 3764 */ 3765 if ((rc == 0) && (i < retries)) { 3766 delay(drv_usectohz(vsw_ldc_tx_delay)); 3767 } 3768 } 3769 freemsg(mp); 3770 return (status); 3771 } 3772 3773 /* 3774 * Send packet out via descriptor ring to a logical device. 3775 */ 3776 static int 3777 vsw_dringsend(vsw_ldc_t *ldcp, mblk_t *mp) 3778 { 3779 vio_dring_msg_t dring_pkt; 3780 dring_info_t *dp = NULL; 3781 vsw_private_desc_t *priv_desc = NULL; 3782 vnet_public_desc_t *pub = NULL; 3783 vsw_t *vswp = ldcp->ldc_vswp; 3784 mblk_t *bp; 3785 size_t n, size; 3786 caddr_t bufp; 3787 int idx; 3788 int status = LDC_TX_SUCCESS; 3789 struct ether_header *ehp = (struct ether_header *)mp->b_rptr; 3790 3791 D1(vswp, "%s(%lld): enter\n", __func__, ldcp->ldc_id); 3792 3793 /* TODO: make test a macro */ 3794 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 3795 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 3796 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 3797 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 3798 ldcp->lane_out.lstate); 3799 ldcp->ldc_stats.oerrors++; 3800 return (LDC_TX_FAILURE); 3801 } 3802 3803 /* 3804 * Note - using first ring only, this may change 3805 * in the future. 3806 */ 3807 READ_ENTER(&ldcp->lane_out.dlistrw); 3808 if ((dp = ldcp->lane_out.dringp) == NULL) { 3809 RW_EXIT(&ldcp->lane_out.dlistrw); 3810 DERR(vswp, "%s(%lld): no dring for outbound lane on" 3811 " channel %d", __func__, ldcp->ldc_id, ldcp->ldc_id); 3812 ldcp->ldc_stats.oerrors++; 3813 return (LDC_TX_FAILURE); 3814 } 3815 3816 size = msgsize(mp); 3817 if (size > (size_t)ETHERMAX) { 3818 RW_EXIT(&ldcp->lane_out.dlistrw); 3819 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 3820 ldcp->ldc_id, size); 3821 ldcp->ldc_stats.oerrors++; 3822 return (LDC_TX_FAILURE); 3823 } 3824 3825 /* 3826 * Find a free descriptor 3827 * 3828 * Note: for the moment we are assuming that we will only 3829 * have one dring going from the switch to each of its 3830 * peers. This may change in the future. 3831 */ 3832 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 3833 D2(vswp, "%s(%lld): no descriptor available for ring " 3834 "at 0x%llx", __func__, ldcp->ldc_id, dp); 3835 3836 /* nothing more we can do */ 3837 status = LDC_TX_NORESOURCES; 3838 ldcp->ldc_stats.tx_no_desc++; 3839 goto vsw_dringsend_free_exit; 3840 } else { 3841 D2(vswp, "%s(%lld): free private descriptor found at pos %ld " 3842 "addr 0x%llx\n", __func__, ldcp->ldc_id, idx, priv_desc); 3843 } 3844 3845 /* copy data into the descriptor */ 3846 bufp = priv_desc->datap; 3847 bufp += VNET_IPALIGN; 3848 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 3849 n = MBLKL(bp); 3850 bcopy(bp->b_rptr, bufp, n); 3851 bufp += n; 3852 } 3853 3854 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 3855 3856 pub = priv_desc->descp; 3857 pub->nbytes = priv_desc->datalen; 3858 3859 /* update statistics */ 3860 if (IS_BROADCAST(ehp)) 3861 ldcp->ldc_stats.brdcstxmt++; 3862 else if (IS_MULTICAST(ehp)) 3863 ldcp->ldc_stats.multixmt++; 3864 ldcp->ldc_stats.opackets++; 3865 ldcp->ldc_stats.obytes += priv_desc->datalen; 3866 3867 mutex_enter(&priv_desc->dstate_lock); 3868 pub->hdr.dstate = VIO_DESC_READY; 3869 mutex_exit(&priv_desc->dstate_lock); 3870 3871 /* 3872 * Determine whether or not we need to send a message to our 3873 * peer prompting them to read our newly updated descriptor(s). 3874 */ 3875 mutex_enter(&dp->restart_lock); 3876 if (dp->restart_reqd) { 3877 dp->restart_reqd = B_FALSE; 3878 ldcp->ldc_stats.dring_data_msgs++; 3879 mutex_exit(&dp->restart_lock); 3880 3881 /* 3882 * Send a vio_dring_msg to peer to prompt them to read 3883 * the updated descriptor ring. 3884 */ 3885 dring_pkt.tag.vio_msgtype = VIO_TYPE_DATA; 3886 dring_pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 3887 dring_pkt.tag.vio_subtype_env = VIO_DRING_DATA; 3888 dring_pkt.tag.vio_sid = ldcp->local_session; 3889 3890 /* Note - for now using first ring */ 3891 dring_pkt.dring_ident = dp->ident; 3892 dring_pkt.seq_num = atomic_inc_64_nv(&ldcp->lane_out.seq_num); 3893 3894 /* 3895 * If last_ack_recv is -1 then we know we've not 3896 * received any ack's yet, so this must be the first 3897 * msg sent, so set the start to the begining of the ring. 3898 */ 3899 mutex_enter(&dp->dlock); 3900 if (dp->last_ack_recv == -1) { 3901 dring_pkt.start_idx = 0; 3902 } else { 3903 dring_pkt.start_idx = 3904 (dp->last_ack_recv + 1) % dp->num_descriptors; 3905 } 3906 dring_pkt.end_idx = -1; 3907 mutex_exit(&dp->dlock); 3908 3909 D3(vswp, "%s(%lld): dring 0x%llx : ident 0x%llx\n", __func__, 3910 ldcp->ldc_id, dp, dring_pkt.dring_ident); 3911 D3(vswp, "%s(%lld): start %lld : end %lld : seq %lld\n", 3912 __func__, ldcp->ldc_id, dring_pkt.start_idx, 3913 dring_pkt.end_idx, dring_pkt.seq_num); 3914 3915 RW_EXIT(&ldcp->lane_out.dlistrw); 3916 3917 (void) vsw_send_msg(ldcp, (void *)&dring_pkt, 3918 sizeof (vio_dring_msg_t), B_TRUE); 3919 3920 return (status); 3921 3922 } else { 3923 mutex_exit(&dp->restart_lock); 3924 D2(vswp, "%s(%lld): updating descp %d", __func__, 3925 ldcp->ldc_id, idx); 3926 } 3927 3928 vsw_dringsend_free_exit: 3929 3930 RW_EXIT(&ldcp->lane_out.dlistrw); 3931 3932 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 3933 return (status); 3934 } 3935 3936 /* 3937 * Send an in-band descriptor message over ldc. 3938 */ 3939 static int 3940 vsw_descrsend(vsw_ldc_t *ldcp, mblk_t *mp) 3941 { 3942 vsw_t *vswp = ldcp->ldc_vswp; 3943 vnet_ibnd_desc_t ibnd_msg; 3944 vsw_private_desc_t *priv_desc = NULL; 3945 dring_info_t *dp = NULL; 3946 size_t n, size = 0; 3947 caddr_t bufp; 3948 mblk_t *bp; 3949 int idx, i; 3950 int status = LDC_TX_SUCCESS; 3951 static int warn_msg = 1; 3952 3953 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3954 3955 ASSERT(mp != NULL); 3956 3957 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 3958 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 3959 DERR(vswp, "%s(%lld) status(%d) state (0x%llx), dropping pkt", 3960 __func__, ldcp->ldc_id, ldcp->ldc_status, 3961 ldcp->lane_out.lstate); 3962 ldcp->ldc_stats.oerrors++; 3963 return (LDC_TX_FAILURE); 3964 } 3965 3966 /* 3967 * only expect single dring to exist, which we use 3968 * as an internal buffer, rather than a transfer channel. 3969 */ 3970 READ_ENTER(&ldcp->lane_out.dlistrw); 3971 if ((dp = ldcp->lane_out.dringp) == NULL) { 3972 DERR(vswp, "%s(%lld): no dring for outbound lane", 3973 __func__, ldcp->ldc_id); 3974 DERR(vswp, "%s(%lld) status(%d) state (0x%llx)", __func__, 3975 ldcp->ldc_id, ldcp->ldc_status, ldcp->lane_out.lstate); 3976 RW_EXIT(&ldcp->lane_out.dlistrw); 3977 ldcp->ldc_stats.oerrors++; 3978 return (LDC_TX_FAILURE); 3979 } 3980 3981 size = msgsize(mp); 3982 if (size > (size_t)ETHERMAX) { 3983 RW_EXIT(&ldcp->lane_out.dlistrw); 3984 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 3985 ldcp->ldc_id, size); 3986 ldcp->ldc_stats.oerrors++; 3987 return (LDC_TX_FAILURE); 3988 } 3989 3990 /* 3991 * Find a free descriptor in our buffer ring 3992 */ 3993 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 3994 RW_EXIT(&ldcp->lane_out.dlistrw); 3995 if (warn_msg) { 3996 DERR(vswp, "%s(%lld): no descriptor available for ring " 3997 "at 0x%llx", __func__, ldcp->ldc_id, dp); 3998 warn_msg = 0; 3999 } 4000 4001 /* nothing more we can do */ 4002 status = LDC_TX_NORESOURCES; 4003 goto vsw_descrsend_free_exit; 4004 } else { 4005 D2(vswp, "%s(%lld): free private descriptor found at pos " 4006 "%ld addr 0x%x\n", __func__, ldcp->ldc_id, idx, priv_desc); 4007 warn_msg = 1; 4008 } 4009 4010 /* copy data into the descriptor */ 4011 bufp = priv_desc->datap; 4012 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 4013 n = MBLKL(bp); 4014 bcopy(bp->b_rptr, bufp, n); 4015 bufp += n; 4016 } 4017 4018 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 4019 4020 /* create and send the in-band descp msg */ 4021 ibnd_msg.hdr.tag.vio_msgtype = VIO_TYPE_DATA; 4022 ibnd_msg.hdr.tag.vio_subtype = VIO_SUBTYPE_INFO; 4023 ibnd_msg.hdr.tag.vio_subtype_env = VIO_DESC_DATA; 4024 ibnd_msg.hdr.tag.vio_sid = ldcp->local_session; 4025 4026 ibnd_msg.hdr.seq_num = atomic_inc_64_nv(&ldcp->lane_out.seq_num); 4027 4028 /* 4029 * Copy the mem cookies describing the data from the 4030 * private region of the descriptor ring into the inband 4031 * descriptor. 4032 */ 4033 for (i = 0; i < priv_desc->ncookies; i++) { 4034 bcopy(&priv_desc->memcookie[i], &ibnd_msg.memcookie[i], 4035 sizeof (ldc_mem_cookie_t)); 4036 } 4037 4038 ibnd_msg.hdr.desc_handle = idx; 4039 ibnd_msg.ncookies = priv_desc->ncookies; 4040 ibnd_msg.nbytes = size; 4041 4042 ldcp->ldc_stats.opackets++; 4043 ldcp->ldc_stats.obytes += size; 4044 4045 RW_EXIT(&ldcp->lane_out.dlistrw); 4046 4047 (void) vsw_send_msg(ldcp, (void *)&ibnd_msg, 4048 sizeof (vnet_ibnd_desc_t), B_TRUE); 4049 4050 vsw_descrsend_free_exit: 4051 4052 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4053 return (status); 4054 } 4055 4056 static void 4057 vsw_send_ver(void *arg) 4058 { 4059 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4060 vsw_t *vswp = ldcp->ldc_vswp; 4061 lane_t *lp = &ldcp->lane_out; 4062 vio_ver_msg_t ver_msg; 4063 4064 D1(vswp, "%s enter", __func__); 4065 4066 ver_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4067 ver_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4068 ver_msg.tag.vio_subtype_env = VIO_VER_INFO; 4069 ver_msg.tag.vio_sid = ldcp->local_session; 4070 4071 ver_msg.ver_major = vsw_versions[0].ver_major; 4072 ver_msg.ver_minor = vsw_versions[0].ver_minor; 4073 ver_msg.dev_class = VDEV_NETWORK_SWITCH; 4074 4075 lp->lstate |= VSW_VER_INFO_SENT; 4076 lp->ver_major = ver_msg.ver_major; 4077 lp->ver_minor = ver_msg.ver_minor; 4078 4079 DUMP_TAG(ver_msg.tag); 4080 4081 (void) vsw_send_msg(ldcp, &ver_msg, sizeof (vio_ver_msg_t), B_TRUE); 4082 4083 D1(vswp, "%s (%d): exit", __func__, ldcp->ldc_id); 4084 } 4085 4086 static void 4087 vsw_send_attr(vsw_ldc_t *ldcp) 4088 { 4089 vsw_t *vswp = ldcp->ldc_vswp; 4090 lane_t *lp = &ldcp->lane_out; 4091 vnet_attr_msg_t attr_msg; 4092 4093 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 4094 4095 /* 4096 * Subtype is set to INFO by default 4097 */ 4098 attr_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4099 attr_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4100 attr_msg.tag.vio_subtype_env = VIO_ATTR_INFO; 4101 attr_msg.tag.vio_sid = ldcp->local_session; 4102 4103 /* payload copied from default settings for lane */ 4104 attr_msg.mtu = lp->mtu; 4105 attr_msg.addr_type = lp->addr_type; 4106 attr_msg.xfer_mode = lp->xfer_mode; 4107 attr_msg.ack_freq = lp->xfer_mode; 4108 4109 READ_ENTER(&vswp->if_lockrw); 4110 attr_msg.addr = vnet_macaddr_strtoul((vswp->if_addr).ether_addr_octet); 4111 RW_EXIT(&vswp->if_lockrw); 4112 4113 ldcp->lane_out.lstate |= VSW_ATTR_INFO_SENT; 4114 4115 DUMP_TAG(attr_msg.tag); 4116 4117 (void) vsw_send_msg(ldcp, &attr_msg, sizeof (vnet_attr_msg_t), B_TRUE); 4118 4119 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 4120 } 4121 4122 /* 4123 * Create dring info msg (which also results in the creation of 4124 * a dring). 4125 */ 4126 static vio_dring_reg_msg_t * 4127 vsw_create_dring_info_pkt(vsw_ldc_t *ldcp) 4128 { 4129 vio_dring_reg_msg_t *mp; 4130 dring_info_t *dp; 4131 vsw_t *vswp = ldcp->ldc_vswp; 4132 4133 D1(vswp, "vsw_create_dring_info_pkt enter\n"); 4134 4135 /* 4136 * If we can't create a dring, obviously no point sending 4137 * a message. 4138 */ 4139 if ((dp = vsw_create_dring(ldcp)) == NULL) 4140 return (NULL); 4141 4142 mp = kmem_zalloc(sizeof (vio_dring_reg_msg_t), KM_SLEEP); 4143 4144 mp->tag.vio_msgtype = VIO_TYPE_CTRL; 4145 mp->tag.vio_subtype = VIO_SUBTYPE_INFO; 4146 mp->tag.vio_subtype_env = VIO_DRING_REG; 4147 mp->tag.vio_sid = ldcp->local_session; 4148 4149 /* payload */ 4150 mp->num_descriptors = dp->num_descriptors; 4151 mp->descriptor_size = dp->descriptor_size; 4152 mp->options = dp->options; 4153 mp->ncookies = dp->ncookies; 4154 bcopy(&dp->cookie[0], &mp->cookie[0], sizeof (ldc_mem_cookie_t)); 4155 4156 mp->dring_ident = 0; 4157 4158 D1(vswp, "vsw_create_dring_info_pkt exit\n"); 4159 4160 return (mp); 4161 } 4162 4163 static void 4164 vsw_send_dring_info(vsw_ldc_t *ldcp) 4165 { 4166 vio_dring_reg_msg_t *dring_msg; 4167 vsw_t *vswp = ldcp->ldc_vswp; 4168 4169 D1(vswp, "%s: (%ld) enter", __func__, ldcp->ldc_id); 4170 4171 dring_msg = vsw_create_dring_info_pkt(ldcp); 4172 if (dring_msg == NULL) { 4173 cmn_err(CE_WARN, "!vsw%d: %s: error creating msg", 4174 vswp->instance, __func__); 4175 return; 4176 } 4177 4178 ldcp->lane_out.lstate |= VSW_DRING_INFO_SENT; 4179 4180 DUMP_TAG_PTR((vio_msg_tag_t *)dring_msg); 4181 4182 (void) vsw_send_msg(ldcp, dring_msg, 4183 sizeof (vio_dring_reg_msg_t), B_TRUE); 4184 4185 kmem_free(dring_msg, sizeof (vio_dring_reg_msg_t)); 4186 4187 D1(vswp, "%s: (%ld) exit", __func__, ldcp->ldc_id); 4188 } 4189 4190 static void 4191 vsw_send_rdx(vsw_ldc_t *ldcp) 4192 { 4193 vsw_t *vswp = ldcp->ldc_vswp; 4194 vio_rdx_msg_t rdx_msg; 4195 4196 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 4197 4198 rdx_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4199 rdx_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4200 rdx_msg.tag.vio_subtype_env = VIO_RDX; 4201 rdx_msg.tag.vio_sid = ldcp->local_session; 4202 4203 ldcp->lane_in.lstate |= VSW_RDX_INFO_SENT; 4204 4205 DUMP_TAG(rdx_msg.tag); 4206 4207 (void) vsw_send_msg(ldcp, &rdx_msg, sizeof (vio_rdx_msg_t), B_TRUE); 4208 4209 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 4210 } 4211 4212 /* 4213 * Generic routine to send message out over ldc channel. 4214 * 4215 * It is possible that when we attempt to write over the ldc channel 4216 * that we get notified that it has been reset. Depending on the value 4217 * of the handle_reset flag we either handle that event here or simply 4218 * notify the caller that the channel was reset. 4219 */ 4220 static int 4221 vsw_send_msg(vsw_ldc_t *ldcp, void *msgp, int size, boolean_t handle_reset) 4222 { 4223 int rv; 4224 size_t msglen = size; 4225 vio_msg_tag_t *tag = (vio_msg_tag_t *)msgp; 4226 vsw_t *vswp = ldcp->ldc_vswp; 4227 4228 D1(vswp, "vsw_send_msg (%lld) enter : sending %d bytes", 4229 ldcp->ldc_id, size); 4230 4231 D2(vswp, "send_msg: type 0x%llx", tag->vio_msgtype); 4232 D2(vswp, "send_msg: stype 0x%llx", tag->vio_subtype); 4233 D2(vswp, "send_msg: senv 0x%llx", tag->vio_subtype_env); 4234 4235 mutex_enter(&ldcp->ldc_txlock); 4236 do { 4237 msglen = size; 4238 rv = ldc_write(ldcp->ldc_handle, (caddr_t)msgp, &msglen); 4239 } while (rv == EWOULDBLOCK && --vsw_wretries > 0); 4240 4241 if ((rv != 0) || (msglen != size)) { 4242 DERR(vswp, "vsw_send_msg:ldc_write failed: chan(%lld) rv(%d) " 4243 "size (%d) msglen(%d)\n", ldcp->ldc_id, rv, size, msglen); 4244 ldcp->ldc_stats.oerrors++; 4245 } 4246 mutex_exit(&ldcp->ldc_txlock); 4247 4248 /* 4249 * If channel has been reset we either handle it here or 4250 * simply report back that it has been reset and let caller 4251 * decide what to do. 4252 */ 4253 if (rv == ECONNRESET) { 4254 DWARN(vswp, "%s (%lld) channel reset", __func__, ldcp->ldc_id); 4255 4256 /* 4257 * N.B - must never be holding the dlistrw lock when 4258 * we do a reset of the channel. 4259 */ 4260 if (handle_reset) { 4261 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 4262 } 4263 } 4264 4265 return (rv); 4266 } 4267 4268 /* 4269 * Remove the specified address from the list of address maintained 4270 * in this port node. 4271 */ 4272 mcst_addr_t * 4273 vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr) 4274 { 4275 vsw_t *vswp = NULL; 4276 vsw_port_t *port = NULL; 4277 mcst_addr_t *prev_p = NULL; 4278 mcst_addr_t *curr_p = NULL; 4279 4280 D1(NULL, "%s: enter : devtype %d : addr 0x%llx", 4281 __func__, devtype, addr); 4282 4283 if (devtype == VSW_VNETPORT) { 4284 port = (vsw_port_t *)arg; 4285 mutex_enter(&port->mca_lock); 4286 prev_p = curr_p = port->mcap; 4287 } else { 4288 vswp = (vsw_t *)arg; 4289 mutex_enter(&vswp->mca_lock); 4290 prev_p = curr_p = vswp->mcap; 4291 } 4292 4293 while (curr_p != NULL) { 4294 if (curr_p->addr == addr) { 4295 D2(NULL, "%s: address found", __func__); 4296 /* match found */ 4297 if (prev_p == curr_p) { 4298 /* list head */ 4299 if (devtype == VSW_VNETPORT) 4300 port->mcap = curr_p->nextp; 4301 else 4302 vswp->mcap = curr_p->nextp; 4303 } else { 4304 prev_p->nextp = curr_p->nextp; 4305 } 4306 break; 4307 } else { 4308 prev_p = curr_p; 4309 curr_p = curr_p->nextp; 4310 } 4311 } 4312 4313 if (devtype == VSW_VNETPORT) 4314 mutex_exit(&port->mca_lock); 4315 else 4316 mutex_exit(&vswp->mca_lock); 4317 4318 D1(NULL, "%s: exit", __func__); 4319 4320 return (curr_p); 4321 } 4322 4323 /* 4324 * Creates a descriptor ring (dring) and links it into the 4325 * link of outbound drings for this channel. 4326 * 4327 * Returns NULL if creation failed. 4328 */ 4329 static dring_info_t * 4330 vsw_create_dring(vsw_ldc_t *ldcp) 4331 { 4332 vsw_private_desc_t *priv_addr = NULL; 4333 vsw_t *vswp = ldcp->ldc_vswp; 4334 ldc_mem_info_t minfo; 4335 dring_info_t *dp, *tp; 4336 int i; 4337 4338 dp = (dring_info_t *)kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 4339 4340 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 4341 4342 /* create public section of ring */ 4343 if ((ldc_mem_dring_create(VSW_RING_NUM_EL, 4344 VSW_PUB_SIZE, &dp->handle)) != 0) { 4345 4346 DERR(vswp, "vsw_create_dring(%lld): ldc dring create " 4347 "failed", ldcp->ldc_id); 4348 goto create_fail_exit; 4349 } 4350 4351 ASSERT(dp->handle != NULL); 4352 4353 /* 4354 * Get the base address of the public section of the ring. 4355 */ 4356 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 4357 DERR(vswp, "vsw_create_dring(%lld): dring info failed\n", 4358 ldcp->ldc_id); 4359 goto dring_fail_exit; 4360 } else { 4361 ASSERT(minfo.vaddr != 0); 4362 dp->pub_addr = minfo.vaddr; 4363 } 4364 4365 dp->num_descriptors = VSW_RING_NUM_EL; 4366 dp->descriptor_size = VSW_PUB_SIZE; 4367 dp->options = VIO_TX_DRING; 4368 dp->ncookies = 1; /* guaranteed by ldc */ 4369 4370 /* 4371 * create private portion of ring 4372 */ 4373 dp->priv_addr = (vsw_private_desc_t *)kmem_zalloc( 4374 (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL), KM_SLEEP); 4375 4376 if (vsw_setup_ring(ldcp, dp)) { 4377 DERR(vswp, "%s: unable to setup ring", __func__); 4378 goto dring_fail_exit; 4379 } 4380 4381 /* haven't used any descriptors yet */ 4382 dp->end_idx = 0; 4383 dp->last_ack_recv = -1; 4384 4385 /* bind dring to the channel */ 4386 if ((ldc_mem_dring_bind(ldcp->ldc_handle, dp->handle, 4387 LDC_SHADOW_MAP, LDC_MEM_RW, 4388 &dp->cookie[0], &dp->ncookies)) != 0) { 4389 DERR(vswp, "vsw_create_dring: unable to bind to channel " 4390 "%lld", ldcp->ldc_id); 4391 goto dring_fail_exit; 4392 } 4393 4394 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 4395 dp->restart_reqd = B_TRUE; 4396 4397 /* 4398 * Only ever create rings for outgoing lane. Link it onto 4399 * end of list. 4400 */ 4401 WRITE_ENTER(&ldcp->lane_out.dlistrw); 4402 if (ldcp->lane_out.dringp == NULL) { 4403 D2(vswp, "vsw_create_dring: adding first outbound ring"); 4404 ldcp->lane_out.dringp = dp; 4405 } else { 4406 tp = ldcp->lane_out.dringp; 4407 while (tp->next != NULL) 4408 tp = tp->next; 4409 4410 tp->next = dp; 4411 } 4412 RW_EXIT(&ldcp->lane_out.dlistrw); 4413 4414 return (dp); 4415 4416 dring_fail_exit: 4417 (void) ldc_mem_dring_destroy(dp->handle); 4418 4419 create_fail_exit: 4420 if (dp->priv_addr != NULL) { 4421 priv_addr = dp->priv_addr; 4422 for (i = 0; i < VSW_RING_NUM_EL; i++) { 4423 if (priv_addr->memhandle != NULL) 4424 (void) ldc_mem_free_handle( 4425 priv_addr->memhandle); 4426 priv_addr++; 4427 } 4428 kmem_free(dp->priv_addr, 4429 (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL)); 4430 } 4431 mutex_destroy(&dp->dlock); 4432 4433 kmem_free(dp, sizeof (dring_info_t)); 4434 return (NULL); 4435 } 4436 4437 /* 4438 * Create a ring consisting of just a private portion and link 4439 * it into the list of rings for the outbound lane. 4440 * 4441 * These type of rings are used primarily for temporary data 4442 * storage (i.e. as data buffers). 4443 */ 4444 void 4445 vsw_create_privring(vsw_ldc_t *ldcp) 4446 { 4447 dring_info_t *dp, *tp; 4448 vsw_t *vswp = ldcp->ldc_vswp; 4449 4450 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4451 4452 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 4453 4454 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 4455 4456 /* no public section */ 4457 dp->pub_addr = NULL; 4458 4459 dp->priv_addr = kmem_zalloc( 4460 (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL), KM_SLEEP); 4461 4462 dp->num_descriptors = VSW_RING_NUM_EL; 4463 4464 if (vsw_setup_ring(ldcp, dp)) { 4465 DERR(vswp, "%s: setup of ring failed", __func__); 4466 kmem_free(dp->priv_addr, 4467 (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL)); 4468 mutex_destroy(&dp->dlock); 4469 kmem_free(dp, sizeof (dring_info_t)); 4470 return; 4471 } 4472 4473 /* haven't used any descriptors yet */ 4474 dp->end_idx = 0; 4475 4476 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 4477 dp->restart_reqd = B_TRUE; 4478 4479 /* 4480 * Only ever create rings for outgoing lane. Link it onto 4481 * end of list. 4482 */ 4483 WRITE_ENTER(&ldcp->lane_out.dlistrw); 4484 if (ldcp->lane_out.dringp == NULL) { 4485 D2(vswp, "%s: adding first outbound privring", __func__); 4486 ldcp->lane_out.dringp = dp; 4487 } else { 4488 tp = ldcp->lane_out.dringp; 4489 while (tp->next != NULL) 4490 tp = tp->next; 4491 4492 tp->next = dp; 4493 } 4494 RW_EXIT(&ldcp->lane_out.dlistrw); 4495 4496 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4497 } 4498 4499 /* 4500 * Setup the descriptors in the dring. Returns 0 on success, 1 on 4501 * failure. 4502 */ 4503 int 4504 vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp) 4505 { 4506 vnet_public_desc_t *pub_addr = NULL; 4507 vsw_private_desc_t *priv_addr = NULL; 4508 vsw_t *vswp = ldcp->ldc_vswp; 4509 uint64_t *tmpp; 4510 uint64_t offset = 0; 4511 uint32_t ncookies = 0; 4512 static char *name = "vsw_setup_ring"; 4513 int i, j, nc, rv; 4514 4515 priv_addr = dp->priv_addr; 4516 pub_addr = dp->pub_addr; 4517 4518 /* public section may be null but private should never be */ 4519 ASSERT(priv_addr != NULL); 4520 4521 /* 4522 * Allocate the region of memory which will be used to hold 4523 * the data the descriptors will refer to. 4524 */ 4525 dp->data_sz = (VSW_RING_NUM_EL * VSW_RING_EL_DATA_SZ); 4526 dp->data_addr = kmem_alloc(dp->data_sz, KM_SLEEP); 4527 4528 D2(vswp, "%s: allocated %lld bytes at 0x%llx\n", name, 4529 dp->data_sz, dp->data_addr); 4530 4531 tmpp = (uint64_t *)dp->data_addr; 4532 offset = VSW_RING_EL_DATA_SZ / sizeof (tmpp); 4533 4534 /* 4535 * Initialise some of the private and public (if they exist) 4536 * descriptor fields. 4537 */ 4538 for (i = 0; i < VSW_RING_NUM_EL; i++) { 4539 mutex_init(&priv_addr->dstate_lock, NULL, MUTEX_DRIVER, NULL); 4540 4541 if ((ldc_mem_alloc_handle(ldcp->ldc_handle, 4542 &priv_addr->memhandle)) != 0) { 4543 DERR(vswp, "%s: alloc mem handle failed", name); 4544 goto setup_ring_cleanup; 4545 } 4546 4547 priv_addr->datap = (void *)tmpp; 4548 4549 rv = ldc_mem_bind_handle(priv_addr->memhandle, 4550 (caddr_t)priv_addr->datap, VSW_RING_EL_DATA_SZ, 4551 LDC_SHADOW_MAP, LDC_MEM_R|LDC_MEM_W, 4552 &(priv_addr->memcookie[0]), &ncookies); 4553 if (rv != 0) { 4554 DERR(vswp, "%s(%lld): ldc_mem_bind_handle failed " 4555 "(rv %d)", name, ldcp->ldc_id, rv); 4556 goto setup_ring_cleanup; 4557 } 4558 priv_addr->bound = 1; 4559 4560 D2(vswp, "%s: %d: memcookie 0 : addr 0x%llx : size 0x%llx", 4561 name, i, priv_addr->memcookie[0].addr, 4562 priv_addr->memcookie[0].size); 4563 4564 if (ncookies >= (uint32_t)(VSW_MAX_COOKIES + 1)) { 4565 DERR(vswp, "%s(%lld) ldc_mem_bind_handle returned " 4566 "invalid num of cookies (%d) for size 0x%llx", 4567 name, ldcp->ldc_id, ncookies, VSW_RING_EL_DATA_SZ); 4568 4569 goto setup_ring_cleanup; 4570 } else { 4571 for (j = 1; j < ncookies; j++) { 4572 rv = ldc_mem_nextcookie(priv_addr->memhandle, 4573 &(priv_addr->memcookie[j])); 4574 if (rv != 0) { 4575 DERR(vswp, "%s: ldc_mem_nextcookie " 4576 "failed rv (%d)", name, rv); 4577 goto setup_ring_cleanup; 4578 } 4579 D3(vswp, "%s: memcookie %d : addr 0x%llx : " 4580 "size 0x%llx", name, j, 4581 priv_addr->memcookie[j].addr, 4582 priv_addr->memcookie[j].size); 4583 } 4584 4585 } 4586 priv_addr->ncookies = ncookies; 4587 priv_addr->dstate = VIO_DESC_FREE; 4588 4589 if (pub_addr != NULL) { 4590 4591 /* link pub and private sides */ 4592 priv_addr->descp = pub_addr; 4593 4594 pub_addr->ncookies = priv_addr->ncookies; 4595 4596 for (nc = 0; nc < pub_addr->ncookies; nc++) { 4597 bcopy(&priv_addr->memcookie[nc], 4598 &pub_addr->memcookie[nc], 4599 sizeof (ldc_mem_cookie_t)); 4600 } 4601 4602 pub_addr->hdr.dstate = VIO_DESC_FREE; 4603 pub_addr++; 4604 } 4605 4606 /* 4607 * move to next element in the dring and the next 4608 * position in the data buffer. 4609 */ 4610 priv_addr++; 4611 tmpp += offset; 4612 } 4613 4614 return (0); 4615 4616 setup_ring_cleanup: 4617 priv_addr = dp->priv_addr; 4618 4619 for (j = 0; j < i; j++) { 4620 (void) ldc_mem_unbind_handle(priv_addr->memhandle); 4621 (void) ldc_mem_free_handle(priv_addr->memhandle); 4622 4623 mutex_destroy(&priv_addr->dstate_lock); 4624 4625 priv_addr++; 4626 } 4627 kmem_free(dp->data_addr, dp->data_sz); 4628 4629 return (1); 4630 } 4631 4632 /* 4633 * Searches the private section of a ring for a free descriptor, 4634 * starting at the location of the last free descriptor found 4635 * previously. 4636 * 4637 * Returns 0 if free descriptor is available, and updates state 4638 * of private descriptor to VIO_DESC_READY, otherwise returns 1. 4639 * 4640 * FUTURE: might need to return contiguous range of descriptors 4641 * as dring info msg assumes all will be contiguous. 4642 */ 4643 static int 4644 vsw_dring_find_free_desc(dring_info_t *dringp, 4645 vsw_private_desc_t **priv_p, int *idx) 4646 { 4647 vsw_private_desc_t *addr = NULL; 4648 int num = VSW_RING_NUM_EL; 4649 int ret = 1; 4650 4651 D1(NULL, "%s enter\n", __func__); 4652 4653 ASSERT(dringp->priv_addr != NULL); 4654 4655 D2(NULL, "%s: searching ring, dringp 0x%llx : start pos %lld", 4656 __func__, dringp, dringp->end_idx); 4657 4658 addr = (vsw_private_desc_t *)dringp->priv_addr + dringp->end_idx; 4659 4660 mutex_enter(&addr->dstate_lock); 4661 if (addr->dstate == VIO_DESC_FREE) { 4662 addr->dstate = VIO_DESC_READY; 4663 *priv_p = addr; 4664 *idx = dringp->end_idx; 4665 dringp->end_idx = (dringp->end_idx + 1) % num; 4666 ret = 0; 4667 4668 } 4669 mutex_exit(&addr->dstate_lock); 4670 4671 /* ring full */ 4672 if (ret == 1) { 4673 D2(NULL, "%s: no desp free: started at %d", __func__, 4674 dringp->end_idx); 4675 } 4676 4677 D1(NULL, "%s: exit\n", __func__); 4678 4679 return (ret); 4680 } 4681 4682 /* 4683 * Map from a dring identifier to the ring itself. Returns 4684 * pointer to ring or NULL if no match found. 4685 * 4686 * Should be called with dlistrw rwlock held as reader. 4687 */ 4688 static dring_info_t * 4689 vsw_ident2dring(lane_t *lane, uint64_t ident) 4690 { 4691 dring_info_t *dp = NULL; 4692 4693 if ((dp = lane->dringp) == NULL) { 4694 return (NULL); 4695 } else { 4696 if (dp->ident == ident) 4697 return (dp); 4698 4699 while (dp != NULL) { 4700 if (dp->ident == ident) 4701 break; 4702 dp = dp->next; 4703 } 4704 } 4705 4706 return (dp); 4707 } 4708 4709 /* 4710 * Set the default lane attributes. These are copied into 4711 * the attr msg we send to our peer. If they are not acceptable 4712 * then (currently) the handshake ends. 4713 */ 4714 static void 4715 vsw_set_lane_attr(vsw_t *vswp, lane_t *lp) 4716 { 4717 bzero(lp, sizeof (lane_t)); 4718 4719 READ_ENTER(&vswp->if_lockrw); 4720 ether_copy(&(vswp->if_addr), &(lp->addr)); 4721 RW_EXIT(&vswp->if_lockrw); 4722 4723 lp->mtu = VSW_MTU; 4724 lp->addr_type = ADDR_TYPE_MAC; 4725 lp->xfer_mode = VIO_DRING_MODE; 4726 lp->ack_freq = 0; /* for shared mode */ 4727 4728 /* 4729 * As the seq_num is incremented before sending, 4730 * initialize it with VNET_ISS - 1. 4731 */ 4732 atomic_swap_64(&lp->seq_num, (VNET_ISS - 1)); 4733 } 4734 4735 /* 4736 * Verify that the attributes are acceptable. 4737 * 4738 * FUTURE: If some attributes are not acceptable, change them 4739 * our desired values. 4740 */ 4741 static int 4742 vsw_check_attr(vnet_attr_msg_t *pkt, vsw_port_t *port) 4743 { 4744 int ret = 0; 4745 struct ether_addr ea; 4746 4747 D1(NULL, "vsw_check_attr enter\n"); 4748 4749 /* 4750 * Note we currently only support in-band descriptors 4751 * and descriptor rings, not packet based transfer (VIO_PKT_MODE) 4752 */ 4753 if ((pkt->xfer_mode != VIO_DESC_MODE) && 4754 (pkt->xfer_mode != VIO_DRING_MODE)) { 4755 D2(NULL, "vsw_check_attr: unknown mode %x\n", pkt->xfer_mode); 4756 ret = 1; 4757 } 4758 4759 /* Only support MAC addresses at moment. */ 4760 if ((pkt->addr_type != ADDR_TYPE_MAC) || (pkt->addr == 0)) { 4761 D2(NULL, "vsw_check_attr: invalid addr_type %x, " 4762 "or address 0x%llx\n", pkt->addr_type, pkt->addr); 4763 ret = 1; 4764 } 4765 4766 /* 4767 * MAC address supplied by device should match that stored 4768 * in the vsw-port OBP node. Need to decide what to do if they 4769 * don't match, for the moment just warn but don't fail. 4770 */ 4771 vnet_macaddr_ultostr(pkt->addr, ea.ether_addr_octet); 4772 if (ether_cmp(&ea, &port->p_macaddr) != 0) { 4773 DERR(NULL, "vsw_check_attr: device supplied address " 4774 "0x%llx doesn't match node address 0x%llx\n", 4775 pkt->addr, port->p_macaddr); 4776 } 4777 4778 /* 4779 * Ack freq only makes sense in pkt mode, in shared 4780 * mode the ring descriptors say whether or not to 4781 * send back an ACK. 4782 */ 4783 if ((pkt->xfer_mode == VIO_DRING_MODE) && 4784 (pkt->ack_freq > 0)) { 4785 D2(NULL, "vsw_check_attr: non zero ack freq " 4786 " in SHM mode\n"); 4787 ret = 1; 4788 } 4789 4790 /* 4791 * Note: for the moment we only support ETHER 4792 * frames. This may change in the future. 4793 */ 4794 if ((pkt->mtu > VSW_MTU) || (pkt->mtu <= 0)) { 4795 D2(NULL, "vsw_check_attr: invalid MTU (0x%llx)\n", 4796 pkt->mtu); 4797 ret = 1; 4798 } 4799 4800 D1(NULL, "vsw_check_attr exit\n"); 4801 4802 return (ret); 4803 } 4804 4805 /* 4806 * Returns 1 if there is a problem, 0 otherwise. 4807 */ 4808 static int 4809 vsw_check_dring_info(vio_dring_reg_msg_t *pkt) 4810 { 4811 _NOTE(ARGUNUSED(pkt)) 4812 4813 int ret = 0; 4814 4815 D1(NULL, "vsw_check_dring_info enter\n"); 4816 4817 if ((pkt->num_descriptors == 0) || 4818 (pkt->descriptor_size == 0) || 4819 (pkt->ncookies != 1)) { 4820 DERR(NULL, "vsw_check_dring_info: invalid dring msg"); 4821 ret = 1; 4822 } 4823 4824 D1(NULL, "vsw_check_dring_info exit\n"); 4825 4826 return (ret); 4827 } 4828 4829 /* 4830 * Returns 1 if two memory cookies match. Otherwise returns 0. 4831 */ 4832 static int 4833 vsw_mem_cookie_match(ldc_mem_cookie_t *m1, ldc_mem_cookie_t *m2) 4834 { 4835 if ((m1->addr != m2->addr) || 4836 (m2->size != m2->size)) { 4837 return (0); 4838 } else { 4839 return (1); 4840 } 4841 } 4842 4843 /* 4844 * Returns 1 if ring described in reg message matches that 4845 * described by dring_info structure. Otherwise returns 0. 4846 */ 4847 static int 4848 vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg) 4849 { 4850 if ((msg->descriptor_size != dp->descriptor_size) || 4851 (msg->num_descriptors != dp->num_descriptors) || 4852 (msg->ncookies != dp->ncookies) || 4853 !(vsw_mem_cookie_match(&msg->cookie[0], &dp->cookie[0]))) { 4854 return (0); 4855 } else { 4856 return (1); 4857 } 4858 4859 } 4860 4861 static caddr_t 4862 vsw_print_ethaddr(uint8_t *a, char *ebuf) 4863 { 4864 (void) sprintf(ebuf, "%x:%x:%x:%x:%x:%x", 4865 a[0], a[1], a[2], a[3], a[4], a[5]); 4866 return (ebuf); 4867 } 4868 4869 /* 4870 * Reset and free all the resources associated with 4871 * the channel. 4872 */ 4873 static void 4874 vsw_free_lane_resources(vsw_ldc_t *ldcp, uint64_t dir) 4875 { 4876 dring_info_t *dp, *dpp; 4877 lane_t *lp = NULL; 4878 int rv = 0; 4879 4880 ASSERT(ldcp != NULL); 4881 4882 D1(ldcp->ldc_vswp, "%s (%lld): enter", __func__, ldcp->ldc_id); 4883 4884 if (dir == INBOUND) { 4885 D2(ldcp->ldc_vswp, "%s: freeing INBOUND lane" 4886 " of channel %lld", __func__, ldcp->ldc_id); 4887 lp = &ldcp->lane_in; 4888 } else { 4889 D2(ldcp->ldc_vswp, "%s: freeing OUTBOUND lane" 4890 " of channel %lld", __func__, ldcp->ldc_id); 4891 lp = &ldcp->lane_out; 4892 } 4893 4894 lp->lstate = VSW_LANE_INACTIV; 4895 4896 /* 4897 * As the seq_num is incremented before sending, 4898 * initialize it with VNET_ISS - 1. 4899 */ 4900 atomic_swap_64(&lp->seq_num, (VNET_ISS - 1)); 4901 4902 if (lp->dringp) { 4903 if (dir == INBOUND) { 4904 WRITE_ENTER(&lp->dlistrw); 4905 dp = lp->dringp; 4906 while (dp != NULL) { 4907 dpp = dp->next; 4908 if (dp->handle != NULL) 4909 (void) ldc_mem_dring_unmap(dp->handle); 4910 kmem_free(dp, sizeof (dring_info_t)); 4911 dp = dpp; 4912 } 4913 RW_EXIT(&lp->dlistrw); 4914 } else { 4915 /* 4916 * unbind, destroy exported dring, free dring struct 4917 */ 4918 WRITE_ENTER(&lp->dlistrw); 4919 dp = lp->dringp; 4920 rv = vsw_free_ring(dp); 4921 RW_EXIT(&lp->dlistrw); 4922 } 4923 if (rv == 0) { 4924 lp->dringp = NULL; 4925 } 4926 } 4927 4928 D1(ldcp->ldc_vswp, "%s (%lld): exit", __func__, ldcp->ldc_id); 4929 } 4930 4931 /* 4932 * Free ring and all associated resources. 4933 * 4934 * Should be called with dlistrw rwlock held as writer. 4935 */ 4936 static int 4937 vsw_free_ring(dring_info_t *dp) 4938 { 4939 vsw_private_desc_t *paddr = NULL; 4940 dring_info_t *dpp; 4941 int i, rv = 1; 4942 4943 while (dp != NULL) { 4944 mutex_enter(&dp->dlock); 4945 dpp = dp->next; 4946 if (dp->priv_addr != NULL) { 4947 /* 4948 * First unbind and free the memory handles 4949 * stored in each descriptor within the ring. 4950 */ 4951 for (i = 0; i < VSW_RING_NUM_EL; i++) { 4952 paddr = (vsw_private_desc_t *) 4953 dp->priv_addr + i; 4954 if (paddr->memhandle != NULL) { 4955 if (paddr->bound == 1) { 4956 rv = ldc_mem_unbind_handle( 4957 paddr->memhandle); 4958 4959 if (rv != 0) { 4960 DERR(NULL, "error " 4961 "unbinding handle for " 4962 "ring 0x%llx at pos %d", 4963 dp, i); 4964 mutex_exit(&dp->dlock); 4965 return (rv); 4966 } 4967 paddr->bound = 0; 4968 } 4969 4970 rv = ldc_mem_free_handle( 4971 paddr->memhandle); 4972 if (rv != 0) { 4973 DERR(NULL, "error freeing " 4974 "handle for ring 0x%llx " 4975 "at pos %d", dp, i); 4976 mutex_exit(&dp->dlock); 4977 return (rv); 4978 } 4979 paddr->memhandle = NULL; 4980 } 4981 mutex_destroy(&paddr->dstate_lock); 4982 } 4983 kmem_free(dp->priv_addr, 4984 (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL)); 4985 } 4986 4987 /* 4988 * Now unbind and destroy the ring itself. 4989 */ 4990 if (dp->handle != NULL) { 4991 (void) ldc_mem_dring_unbind(dp->handle); 4992 (void) ldc_mem_dring_destroy(dp->handle); 4993 } 4994 4995 if (dp->data_addr != NULL) { 4996 kmem_free(dp->data_addr, dp->data_sz); 4997 } 4998 4999 mutex_exit(&dp->dlock); 5000 mutex_destroy(&dp->dlock); 5001 mutex_destroy(&dp->restart_lock); 5002 kmem_free(dp, sizeof (dring_info_t)); 5003 5004 dp = dpp; 5005 } 5006 return (0); 5007 } 5008 5009 /* 5010 * vsw_ldc_rx_worker -- A per LDC worker thread to receive data. 5011 * This thread is woken up by the LDC interrupt handler to process 5012 * LDC packets and receive data. 5013 */ 5014 static void 5015 vsw_ldc_rx_worker(void *arg) 5016 { 5017 callb_cpr_t cprinfo; 5018 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 5019 vsw_t *vswp = ldcp->ldc_vswp; 5020 5021 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5022 CALLB_CPR_INIT(&cprinfo, &ldcp->rx_thr_lock, callb_generic_cpr, 5023 "vsw_rx_thread"); 5024 mutex_enter(&ldcp->rx_thr_lock); 5025 ldcp->rx_thr_flags |= VSW_WTHR_RUNNING; 5026 while (!(ldcp->rx_thr_flags & VSW_WTHR_STOP)) { 5027 5028 CALLB_CPR_SAFE_BEGIN(&cprinfo); 5029 /* 5030 * Wait until the data is received or a stop 5031 * request is received. 5032 */ 5033 while (!(ldcp->rx_thr_flags & 5034 (VSW_WTHR_DATARCVD | VSW_WTHR_STOP))) { 5035 cv_wait(&ldcp->rx_thr_cv, &ldcp->rx_thr_lock); 5036 } 5037 CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rx_thr_lock) 5038 5039 /* 5040 * First process the stop request. 5041 */ 5042 if (ldcp->rx_thr_flags & VSW_WTHR_STOP) { 5043 D2(vswp, "%s(%lld):Rx thread stopped\n", 5044 __func__, ldcp->ldc_id); 5045 break; 5046 } 5047 ldcp->rx_thr_flags &= ~VSW_WTHR_DATARCVD; 5048 mutex_exit(&ldcp->rx_thr_lock); 5049 D1(vswp, "%s(%lld):calling vsw_process_pkt\n", 5050 __func__, ldcp->ldc_id); 5051 mutex_enter(&ldcp->ldc_cblock); 5052 vsw_process_pkt(ldcp); 5053 mutex_exit(&ldcp->ldc_cblock); 5054 mutex_enter(&ldcp->rx_thr_lock); 5055 } 5056 5057 /* 5058 * Update the run status and wakeup the thread that 5059 * has sent the stop request. 5060 */ 5061 ldcp->rx_thr_flags &= ~VSW_WTHR_RUNNING; 5062 cv_signal(&ldcp->rx_thr_cv); 5063 CALLB_CPR_EXIT(&cprinfo); 5064 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5065 thread_exit(); 5066 } 5067 5068 /* vsw_stop_rx_thread -- Co-ordinate with receive thread to stop it */ 5069 static void 5070 vsw_stop_rx_thread(vsw_ldc_t *ldcp) 5071 { 5072 vsw_t *vswp = ldcp->ldc_vswp; 5073 5074 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5075 /* 5076 * Send a stop request by setting the stop flag and 5077 * wait until the receive thread stops. 5078 */ 5079 mutex_enter(&ldcp->rx_thr_lock); 5080 if (ldcp->rx_thr_flags & VSW_WTHR_RUNNING) { 5081 ldcp->rx_thr_flags |= VSW_WTHR_STOP; 5082 cv_signal(&ldcp->rx_thr_cv); 5083 while (ldcp->rx_thr_flags & VSW_WTHR_RUNNING) { 5084 cv_wait(&ldcp->rx_thr_cv, &ldcp->rx_thr_lock); 5085 } 5086 } 5087 mutex_exit(&ldcp->rx_thr_lock); 5088 ldcp->rx_thread = NULL; 5089 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5090 } 5091 5092 /* 5093 * vsw_ldc_tx_worker -- A per LDC worker thread to transmit data. 5094 * This thread is woken up by the vsw_portsend to transmit 5095 * packets. 5096 */ 5097 static void 5098 vsw_ldc_tx_worker(void *arg) 5099 { 5100 callb_cpr_t cprinfo; 5101 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 5102 vsw_t *vswp = ldcp->ldc_vswp; 5103 mblk_t *mp; 5104 mblk_t *tmp; 5105 5106 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5107 CALLB_CPR_INIT(&cprinfo, &ldcp->tx_thr_lock, callb_generic_cpr, 5108 "vnet_tx_thread"); 5109 mutex_enter(&ldcp->tx_thr_lock); 5110 ldcp->tx_thr_flags |= VSW_WTHR_RUNNING; 5111 while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP)) { 5112 5113 CALLB_CPR_SAFE_BEGIN(&cprinfo); 5114 /* 5115 * Wait until the data is received or a stop 5116 * request is received. 5117 */ 5118 while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP) && 5119 (ldcp->tx_mhead == NULL)) { 5120 cv_wait(&ldcp->tx_thr_cv, &ldcp->tx_thr_lock); 5121 } 5122 CALLB_CPR_SAFE_END(&cprinfo, &ldcp->tx_thr_lock) 5123 5124 /* 5125 * First process the stop request. 5126 */ 5127 if (ldcp->tx_thr_flags & VSW_WTHR_STOP) { 5128 D2(vswp, "%s(%lld):tx thread stopped\n", 5129 __func__, ldcp->ldc_id); 5130 break; 5131 } 5132 mp = ldcp->tx_mhead; 5133 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 5134 mutex_exit(&ldcp->tx_thr_lock); 5135 D2(vswp, "%s(%lld):calling vsw_ldcsend\n", 5136 __func__, ldcp->ldc_id); 5137 while (mp != NULL) { 5138 tmp = mp->b_next; 5139 mp->b_next = mp->b_prev = NULL; 5140 (void) vsw_ldcsend(ldcp, mp, vsw_ldc_tx_retries); 5141 mp = tmp; 5142 } 5143 mutex_enter(&ldcp->tx_thr_lock); 5144 } 5145 5146 /* 5147 * Update the run status and wakeup the thread that 5148 * has sent the stop request. 5149 */ 5150 ldcp->tx_thr_flags &= ~VSW_WTHR_RUNNING; 5151 cv_signal(&ldcp->tx_thr_cv); 5152 CALLB_CPR_EXIT(&cprinfo); 5153 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5154 thread_exit(); 5155 } 5156 5157 /* vsw_stop_tx_thread -- Co-ordinate with receive thread to stop it */ 5158 static void 5159 vsw_stop_tx_thread(vsw_ldc_t *ldcp) 5160 { 5161 vsw_t *vswp = ldcp->ldc_vswp; 5162 5163 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5164 /* 5165 * Send a stop request by setting the stop flag and 5166 * wait until the receive thread stops. 5167 */ 5168 mutex_enter(&ldcp->tx_thr_lock); 5169 if (ldcp->tx_thr_flags & VSW_WTHR_RUNNING) { 5170 ldcp->tx_thr_flags |= VSW_WTHR_STOP; 5171 cv_signal(&ldcp->tx_thr_cv); 5172 while (ldcp->tx_thr_flags & VSW_WTHR_RUNNING) { 5173 cv_wait(&ldcp->tx_thr_cv, &ldcp->tx_thr_lock); 5174 } 5175 } 5176 mutex_exit(&ldcp->tx_thr_lock); 5177 ldcp->tx_thread = NULL; 5178 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5179 } 5180 5181 /* vsw_reclaim_dring -- reclaim descriptors */ 5182 static int 5183 vsw_reclaim_dring(dring_info_t *dp, int start) 5184 { 5185 int i, j, len; 5186 vsw_private_desc_t *priv_addr; 5187 vnet_public_desc_t *pub_addr; 5188 5189 pub_addr = (vnet_public_desc_t *)dp->pub_addr; 5190 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 5191 len = dp->num_descriptors; 5192 5193 D2(NULL, "%s: start index %ld\n", __func__, start); 5194 5195 j = 0; 5196 for (i = start; j < len; i = (i + 1) % len, j++) { 5197 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 5198 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 5199 5200 mutex_enter(&priv_addr->dstate_lock); 5201 if (pub_addr->hdr.dstate != VIO_DESC_DONE) { 5202 mutex_exit(&priv_addr->dstate_lock); 5203 break; 5204 } 5205 pub_addr->hdr.dstate = VIO_DESC_FREE; 5206 priv_addr->dstate = VIO_DESC_FREE; 5207 /* clear all the fields */ 5208 priv_addr->datalen = 0; 5209 pub_addr->hdr.ack = 0; 5210 mutex_exit(&priv_addr->dstate_lock); 5211 5212 D3(NULL, "claiming descp:%d pub state:0x%llx priv state 0x%llx", 5213 i, pub_addr->hdr.dstate, priv_addr->dstate); 5214 } 5215 return (j); 5216 } 5217 5218 /* 5219 * Debugging routines 5220 */ 5221 static void 5222 display_state(void) 5223 { 5224 vsw_t *vswp; 5225 vsw_port_list_t *plist; 5226 vsw_port_t *port; 5227 vsw_ldc_list_t *ldcl; 5228 vsw_ldc_t *ldcp; 5229 extern vsw_t *vsw_head; 5230 5231 cmn_err(CE_NOTE, "***** system state *****"); 5232 5233 for (vswp = vsw_head; vswp; vswp = vswp->next) { 5234 plist = &vswp->plist; 5235 READ_ENTER(&plist->lockrw); 5236 cmn_err(CE_CONT, "vsw instance %d has %d ports attached\n", 5237 vswp->instance, plist->num_ports); 5238 5239 for (port = plist->head; port != NULL; port = port->p_next) { 5240 ldcl = &port->p_ldclist; 5241 cmn_err(CE_CONT, "port %d : %d ldcs attached\n", 5242 port->p_instance, ldcl->num_ldcs); 5243 READ_ENTER(&ldcl->lockrw); 5244 ldcp = ldcl->head; 5245 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 5246 cmn_err(CE_CONT, "chan %lu : dev %d : " 5247 "status %d : phase %u\n", 5248 ldcp->ldc_id, ldcp->dev_class, 5249 ldcp->ldc_status, ldcp->hphase); 5250 cmn_err(CE_CONT, "chan %lu : lsession %lu : " 5251 "psession %lu\n", ldcp->ldc_id, 5252 ldcp->local_session, ldcp->peer_session); 5253 5254 cmn_err(CE_CONT, "Inbound lane:\n"); 5255 display_lane(&ldcp->lane_in); 5256 cmn_err(CE_CONT, "Outbound lane:\n"); 5257 display_lane(&ldcp->lane_out); 5258 } 5259 RW_EXIT(&ldcl->lockrw); 5260 } 5261 RW_EXIT(&plist->lockrw); 5262 } 5263 cmn_err(CE_NOTE, "***** system state *****"); 5264 } 5265 5266 static void 5267 display_lane(lane_t *lp) 5268 { 5269 dring_info_t *drp; 5270 5271 cmn_err(CE_CONT, "ver 0x%x:0x%x : state %lx : mtu 0x%lx\n", 5272 lp->ver_major, lp->ver_minor, lp->lstate, lp->mtu); 5273 cmn_err(CE_CONT, "addr_type %d : addr 0x%lx : xmode %d\n", 5274 lp->addr_type, lp->addr, lp->xfer_mode); 5275 cmn_err(CE_CONT, "dringp 0x%lx\n", (uint64_t)lp->dringp); 5276 5277 cmn_err(CE_CONT, "Dring info:\n"); 5278 for (drp = lp->dringp; drp != NULL; drp = drp->next) { 5279 cmn_err(CE_CONT, "\tnum_desc %u : dsize %u\n", 5280 drp->num_descriptors, drp->descriptor_size); 5281 cmn_err(CE_CONT, "\thandle 0x%lx\n", drp->handle); 5282 cmn_err(CE_CONT, "\tpub_addr 0x%lx : priv_addr 0x%lx\n", 5283 (uint64_t)drp->pub_addr, (uint64_t)drp->priv_addr); 5284 cmn_err(CE_CONT, "\tident 0x%lx : end_idx %lu\n", 5285 drp->ident, drp->end_idx); 5286 display_ring(drp); 5287 } 5288 } 5289 5290 static void 5291 display_ring(dring_info_t *dringp) 5292 { 5293 uint64_t i; 5294 uint64_t priv_count = 0; 5295 uint64_t pub_count = 0; 5296 vnet_public_desc_t *pub_addr = NULL; 5297 vsw_private_desc_t *priv_addr = NULL; 5298 5299 for (i = 0; i < VSW_RING_NUM_EL; i++) { 5300 if (dringp->pub_addr != NULL) { 5301 pub_addr = (vnet_public_desc_t *)dringp->pub_addr + i; 5302 5303 if (pub_addr->hdr.dstate == VIO_DESC_FREE) 5304 pub_count++; 5305 } 5306 5307 if (dringp->priv_addr != NULL) { 5308 priv_addr = (vsw_private_desc_t *)dringp->priv_addr + i; 5309 5310 if (priv_addr->dstate == VIO_DESC_FREE) 5311 priv_count++; 5312 } 5313 } 5314 cmn_err(CE_CONT, "\t%lu elements: %lu priv free: %lu pub free\n", 5315 i, priv_count, pub_count); 5316 } 5317 5318 static void 5319 dump_flags(uint64_t state) 5320 { 5321 int i; 5322 5323 typedef struct flag_name { 5324 int flag_val; 5325 char *flag_name; 5326 } flag_name_t; 5327 5328 flag_name_t flags[] = { 5329 VSW_VER_INFO_SENT, "VSW_VER_INFO_SENT", 5330 VSW_VER_INFO_RECV, "VSW_VER_INFO_RECV", 5331 VSW_VER_ACK_RECV, "VSW_VER_ACK_RECV", 5332 VSW_VER_ACK_SENT, "VSW_VER_ACK_SENT", 5333 VSW_VER_NACK_RECV, "VSW_VER_NACK_RECV", 5334 VSW_VER_NACK_SENT, "VSW_VER_NACK_SENT", 5335 VSW_ATTR_INFO_SENT, "VSW_ATTR_INFO_SENT", 5336 VSW_ATTR_INFO_RECV, "VSW_ATTR_INFO_RECV", 5337 VSW_ATTR_ACK_SENT, "VSW_ATTR_ACK_SENT", 5338 VSW_ATTR_ACK_RECV, "VSW_ATTR_ACK_RECV", 5339 VSW_ATTR_NACK_SENT, "VSW_ATTR_NACK_SENT", 5340 VSW_ATTR_NACK_RECV, "VSW_ATTR_NACK_RECV", 5341 VSW_DRING_INFO_SENT, "VSW_DRING_INFO_SENT", 5342 VSW_DRING_INFO_RECV, "VSW_DRING_INFO_RECV", 5343 VSW_DRING_ACK_SENT, "VSW_DRING_ACK_SENT", 5344 VSW_DRING_ACK_RECV, "VSW_DRING_ACK_RECV", 5345 VSW_DRING_NACK_SENT, "VSW_DRING_NACK_SENT", 5346 VSW_DRING_NACK_RECV, "VSW_DRING_NACK_RECV", 5347 VSW_RDX_INFO_SENT, "VSW_RDX_INFO_SENT", 5348 VSW_RDX_INFO_RECV, "VSW_RDX_INFO_RECV", 5349 VSW_RDX_ACK_SENT, "VSW_RDX_ACK_SENT", 5350 VSW_RDX_ACK_RECV, "VSW_RDX_ACK_RECV", 5351 VSW_RDX_NACK_SENT, "VSW_RDX_NACK_SENT", 5352 VSW_RDX_NACK_RECV, "VSW_RDX_NACK_RECV", 5353 VSW_MCST_INFO_SENT, "VSW_MCST_INFO_SENT", 5354 VSW_MCST_INFO_RECV, "VSW_MCST_INFO_RECV", 5355 VSW_MCST_ACK_SENT, "VSW_MCST_ACK_SENT", 5356 VSW_MCST_ACK_RECV, "VSW_MCST_ACK_RECV", 5357 VSW_MCST_NACK_SENT, "VSW_MCST_NACK_SENT", 5358 VSW_MCST_NACK_RECV, "VSW_MCST_NACK_RECV", 5359 VSW_LANE_ACTIVE, "VSW_LANE_ACTIVE"}; 5360 5361 DERR(NULL, "DUMP_FLAGS: %llx\n", state); 5362 for (i = 0; i < sizeof (flags)/sizeof (flag_name_t); i++) { 5363 if (state & flags[i].flag_val) 5364 DERR(NULL, "DUMP_FLAGS %s", flags[i].flag_name); 5365 } 5366 } 5367