1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/errno.h> 31 #include <sys/debug.h> 32 #include <sys/time.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/user.h> 36 #include <sys/stropts.h> 37 #include <sys/stream.h> 38 #include <sys/strlog.h> 39 #include <sys/strsubr.h> 40 #include <sys/cmn_err.h> 41 #include <sys/cpu.h> 42 #include <sys/kmem.h> 43 #include <sys/conf.h> 44 #include <sys/ddi.h> 45 #include <sys/sunddi.h> 46 #include <sys/ksynch.h> 47 #include <sys/stat.h> 48 #include <sys/kstat.h> 49 #include <sys/vtrace.h> 50 #include <sys/strsun.h> 51 #include <sys/dlpi.h> 52 #include <sys/ethernet.h> 53 #include <net/if.h> 54 #include <sys/varargs.h> 55 #include <sys/machsystm.h> 56 #include <sys/modctl.h> 57 #include <sys/modhash.h> 58 #include <sys/mac.h> 59 #include <sys/mac_ether.h> 60 #include <sys/taskq.h> 61 #include <sys/note.h> 62 #include <sys/mach_descrip.h> 63 #include <sys/mac.h> 64 #include <sys/mdeg.h> 65 #include <sys/ldc.h> 66 #include <sys/vsw_fdb.h> 67 #include <sys/vsw.h> 68 #include <sys/vio_mailbox.h> 69 #include <sys/vnet_mailbox.h> 70 #include <sys/vnet_common.h> 71 #include <sys/vio_util.h> 72 #include <sys/sdt.h> 73 #include <sys/atomic.h> 74 #include <sys/callb.h> 75 76 /* Port add/deletion/etc routines */ 77 static int vsw_port_delete(vsw_port_t *port); 78 static int vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id); 79 static int vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id); 80 static int vsw_init_ldcs(vsw_port_t *port); 81 static int vsw_uninit_ldcs(vsw_port_t *port); 82 static int vsw_ldc_init(vsw_ldc_t *ldcp); 83 static int vsw_ldc_uninit(vsw_ldc_t *ldcp); 84 static int vsw_drain_ldcs(vsw_port_t *port); 85 static int vsw_drain_port_taskq(vsw_port_t *port); 86 static void vsw_marker_task(void *); 87 static int vsw_plist_del_node(vsw_t *, vsw_port_t *port); 88 int vsw_detach_ports(vsw_t *vswp); 89 int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 90 mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr); 91 int vsw_port_detach(vsw_t *vswp, int p_instance); 92 int vsw_portsend(vsw_port_t *port, mblk_t *mp, mblk_t *mpt, uint32_t count); 93 int vsw_port_attach(vsw_t *vswp, int p_instance, 94 uint64_t *ldcids, int nids, struct ether_addr *macaddr); 95 vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 96 97 98 /* Interrupt routines */ 99 static uint_t vsw_ldc_cb(uint64_t cb, caddr_t arg); 100 101 /* Handshake routines */ 102 static void vsw_ldc_reinit(vsw_ldc_t *); 103 static void vsw_process_conn_evt(vsw_ldc_t *, uint16_t); 104 static void vsw_conn_task(void *); 105 static int vsw_check_flag(vsw_ldc_t *, int, uint64_t); 106 static void vsw_next_milestone(vsw_ldc_t *); 107 static int vsw_supported_version(vio_ver_msg_t *); 108 static void vsw_set_vnet_proto_ops(vsw_ldc_t *ldcp); 109 static void vsw_reset_vnet_proto_ops(vsw_ldc_t *ldcp); 110 111 /* Data processing routines */ 112 static void vsw_process_pkt(void *); 113 static void vsw_dispatch_ctrl_task(vsw_ldc_t *, void *, vio_msg_tag_t *); 114 static void vsw_process_ctrl_pkt(void *); 115 static void vsw_process_ctrl_ver_pkt(vsw_ldc_t *, void *); 116 static void vsw_process_ctrl_attr_pkt(vsw_ldc_t *, void *); 117 static void vsw_process_ctrl_mcst_pkt(vsw_ldc_t *, void *); 118 static void vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *, void *); 119 static void vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *, void *); 120 static void vsw_process_ctrl_rdx_pkt(vsw_ldc_t *, void *); 121 static void vsw_process_data_pkt(vsw_ldc_t *, void *, vio_msg_tag_t *, 122 uint32_t); 123 static void vsw_process_data_dring_pkt(vsw_ldc_t *, void *); 124 static void vsw_process_pkt_data_nop(void *, void *, uint32_t); 125 static void vsw_process_pkt_data(void *, void *, uint32_t); 126 static void vsw_process_data_ibnd_pkt(vsw_ldc_t *, void *); 127 static void vsw_process_err_pkt(vsw_ldc_t *, void *, vio_msg_tag_t *); 128 129 /* Switching/data transmit routines */ 130 static int vsw_dringsend(vsw_ldc_t *, mblk_t *); 131 static int vsw_descrsend(vsw_ldc_t *, mblk_t *); 132 static void vsw_ldcsend_pkt(vsw_ldc_t *ldcp, mblk_t *mp); 133 static int vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, uint32_t retries); 134 static int vsw_ldctx_pri(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count); 135 static int vsw_ldctx(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count); 136 137 /* Packet creation routines */ 138 static void vsw_send_ver(void *); 139 static void vsw_send_attr(vsw_ldc_t *); 140 static vio_dring_reg_msg_t *vsw_create_dring_info_pkt(vsw_ldc_t *); 141 static void vsw_send_dring_info(vsw_ldc_t *); 142 static void vsw_send_rdx(vsw_ldc_t *); 143 static int vsw_send_msg(vsw_ldc_t *, void *, int, boolean_t); 144 145 /* Dring routines */ 146 static dring_info_t *vsw_create_dring(vsw_ldc_t *); 147 static void vsw_create_privring(vsw_ldc_t *); 148 static int vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp); 149 static int vsw_dring_find_free_desc(dring_info_t *, vsw_private_desc_t **, 150 int *); 151 static dring_info_t *vsw_ident2dring(lane_t *, uint64_t); 152 static int vsw_reclaim_dring(dring_info_t *dp, int start); 153 154 static void vsw_set_lane_attr(vsw_t *, lane_t *); 155 static int vsw_check_attr(vnet_attr_msg_t *, vsw_ldc_t *); 156 static int vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg); 157 static int vsw_mem_cookie_match(ldc_mem_cookie_t *, ldc_mem_cookie_t *); 158 static int vsw_check_dring_info(vio_dring_reg_msg_t *); 159 160 /* Rcv/Tx thread routines */ 161 static void vsw_stop_tx_thread(vsw_ldc_t *ldcp); 162 static void vsw_ldc_tx_worker(void *arg); 163 static void vsw_stop_rx_thread(vsw_ldc_t *ldcp); 164 static void vsw_ldc_rx_worker(void *arg); 165 166 /* Misc support routines */ 167 static caddr_t vsw_print_ethaddr(uint8_t *addr, char *ebuf); 168 static void vsw_free_lane_resources(vsw_ldc_t *, uint64_t); 169 static int vsw_free_ring(dring_info_t *); 170 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr); 171 static int vsw_get_same_dest_list(struct ether_header *ehp, 172 mblk_t **rhead, mblk_t **rtail, mblk_t **mpp); 173 static mblk_t *vsw_dupmsgchain(mblk_t *mp); 174 static void vsw_mac_rx(vsw_t *vswp, int caller, mac_resource_handle_t mrh, 175 mblk_t *mp, mblk_t *mpt, vsw_macrx_flags_t flags); 176 177 /* Debugging routines */ 178 static void dump_flags(uint64_t); 179 static void display_state(void); 180 static void display_lane(lane_t *); 181 static void display_ring(dring_info_t *); 182 183 /* 184 * Functions imported from other files. 185 */ 186 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int); 187 extern int vsw_unset_hw(vsw_t *, vsw_port_t *, int); 188 extern void vsw_reconfig_hw(vsw_t *); 189 extern int vsw_add_fdb(vsw_t *vswp, vsw_port_t *port); 190 extern int vsw_del_fdb(vsw_t *vswp, vsw_port_t *port); 191 extern int vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port); 192 extern void vsw_del_mcst_port(vsw_port_t *port); 193 extern int vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg); 194 extern int vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg); 195 196 #define VSW_NUM_VMPOOLS 3 /* number of vio mblk pools */ 197 #define VSW_PORT_REF_DELAY 30 /* delay for port ref_cnt to become 0 */ 198 199 /* 200 * Tunables used in this file. 201 */ 202 extern int vsw_num_handshakes; 203 extern int vsw_wretries; 204 extern int vsw_desc_delay; 205 extern int vsw_read_attempts; 206 extern int vsw_ldc_tx_delay; 207 extern int vsw_ldc_tx_retries; 208 extern boolean_t vsw_ldc_rxthr_enabled; 209 extern boolean_t vsw_ldc_txthr_enabled; 210 extern uint32_t vsw_ntxds; 211 extern uint32_t vsw_max_tx_qcount; 212 extern uint32_t vsw_chain_len; 213 extern uint32_t vsw_mblk_size1; 214 extern uint32_t vsw_mblk_size2; 215 extern uint32_t vsw_mblk_size3; 216 extern uint32_t vsw_num_mblks1; 217 extern uint32_t vsw_num_mblks2; 218 extern uint32_t vsw_num_mblks3; 219 extern boolean_t vsw_obp_ver_proto_workaround; 220 221 #define LDC_ENTER_LOCK(ldcp) \ 222 mutex_enter(&((ldcp)->ldc_cblock));\ 223 mutex_enter(&((ldcp)->ldc_rxlock));\ 224 mutex_enter(&((ldcp)->ldc_txlock)); 225 #define LDC_EXIT_LOCK(ldcp) \ 226 mutex_exit(&((ldcp)->ldc_txlock));\ 227 mutex_exit(&((ldcp)->ldc_rxlock));\ 228 mutex_exit(&((ldcp)->ldc_cblock)); 229 230 #define VSW_VER_EQ(ldcp, major, minor) \ 231 ((ldcp)->lane_out.ver_major == (major) && \ 232 (ldcp)->lane_out.ver_minor == (minor)) 233 234 #define VSW_VER_LT(ldcp, major, minor) \ 235 (((ldcp)->lane_out.ver_major < (major)) || \ 236 ((ldcp)->lane_out.ver_major == (major) && \ 237 (ldcp)->lane_out.ver_minor < (minor))) 238 239 /* supported versions */ 240 static ver_sup_t vsw_versions[] = { {1, 2} }; 241 242 /* 243 * For the moment the state dump routines have their own 244 * private flag. 245 */ 246 #define DUMP_STATE 0 247 248 #if DUMP_STATE 249 250 #define DUMP_TAG(tag) \ 251 { \ 252 D1(NULL, "DUMP_TAG: type 0x%llx", (tag).vio_msgtype); \ 253 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag).vio_subtype); \ 254 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag).vio_subtype_env); \ 255 } 256 257 #define DUMP_TAG_PTR(tag) \ 258 { \ 259 D1(NULL, "DUMP_TAG: type 0x%llx", (tag)->vio_msgtype); \ 260 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag)->vio_subtype); \ 261 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag)->vio_subtype_env); \ 262 } 263 264 #define DUMP_FLAGS(flags) dump_flags(flags); 265 #define DISPLAY_STATE() display_state() 266 267 #else 268 269 #define DUMP_TAG(tag) 270 #define DUMP_TAG_PTR(tag) 271 #define DUMP_FLAGS(state) 272 #define DISPLAY_STATE() 273 274 #endif /* DUMP_STATE */ 275 276 /* 277 * Attach the specified port. 278 * 279 * Returns 0 on success, 1 on failure. 280 */ 281 int 282 vsw_port_attach(vsw_t *vswp, int p_instance, uint64_t *ldcids, int nids, 283 struct ether_addr *macaddr) 284 { 285 vsw_port_list_t *plist = &vswp->plist; 286 vsw_port_t *port, **prev_port; 287 int i; 288 289 D1(vswp, "%s: enter : port %d", __func__, p_instance); 290 291 /* port already exists? */ 292 READ_ENTER(&plist->lockrw); 293 for (port = plist->head; port != NULL; port = port->p_next) { 294 if (port->p_instance == p_instance) { 295 DWARN(vswp, "%s: port instance %d already attached", 296 __func__, p_instance); 297 RW_EXIT(&plist->lockrw); 298 return (1); 299 } 300 } 301 RW_EXIT(&plist->lockrw); 302 303 port = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP); 304 port->p_vswp = vswp; 305 port->p_instance = p_instance; 306 port->p_ldclist.num_ldcs = 0; 307 port->p_ldclist.head = NULL; 308 port->addr_set = VSW_ADDR_UNSET; 309 310 rw_init(&port->p_ldclist.lockrw, NULL, RW_DRIVER, NULL); 311 312 mutex_init(&port->tx_lock, NULL, MUTEX_DRIVER, NULL); 313 mutex_init(&port->mca_lock, NULL, MUTEX_DRIVER, NULL); 314 315 mutex_init(&port->state_lock, NULL, MUTEX_DRIVER, NULL); 316 cv_init(&port->state_cv, NULL, CV_DRIVER, NULL); 317 port->state = VSW_PORT_INIT; 318 319 if (nids > VSW_PORT_MAX_LDCS) { 320 D2(vswp, "%s: using first of %d ldc ids", 321 __func__, nids); 322 nids = VSW_PORT_MAX_LDCS; 323 } 324 325 D2(vswp, "%s: %d nids", __func__, nids); 326 for (i = 0; i < nids; i++) { 327 D2(vswp, "%s: ldcid (%llx)", __func__, (uint64_t)ldcids[i]); 328 if (vsw_ldc_attach(port, (uint64_t)ldcids[i]) != 0) { 329 DERR(vswp, "%s: ldc_attach failed", __func__); 330 331 rw_destroy(&port->p_ldclist.lockrw); 332 333 cv_destroy(&port->state_cv); 334 mutex_destroy(&port->state_lock); 335 336 mutex_destroy(&port->tx_lock); 337 mutex_destroy(&port->mca_lock); 338 kmem_free(port, sizeof (vsw_port_t)); 339 return (1); 340 } 341 } 342 343 ether_copy(macaddr, &port->p_macaddr); 344 345 if (vswp->switching_setup_done == B_TRUE) { 346 /* 347 * If the underlying physical device has been setup, 348 * program the mac address of this port in it. 349 * Otherwise, port macaddr will be set after the physical 350 * device is successfully setup by the timeout handler. 351 */ 352 mutex_enter(&vswp->hw_lock); 353 (void) vsw_set_hw(vswp, port, VSW_VNETPORT); 354 mutex_exit(&vswp->hw_lock); 355 } 356 357 WRITE_ENTER(&plist->lockrw); 358 359 /* create the fdb entry for this port/mac address */ 360 (void) vsw_add_fdb(vswp, port); 361 362 /* link it into the list of ports for this vsw instance */ 363 prev_port = (vsw_port_t **)(&plist->head); 364 port->p_next = *prev_port; 365 *prev_port = port; 366 plist->num_ports++; 367 368 RW_EXIT(&plist->lockrw); 369 370 /* 371 * Initialise the port and any ldc's under it. 372 */ 373 (void) vsw_init_ldcs(port); 374 375 D1(vswp, "%s: exit", __func__); 376 return (0); 377 } 378 379 /* 380 * Detach the specified port. 381 * 382 * Returns 0 on success, 1 on failure. 383 */ 384 int 385 vsw_port_detach(vsw_t *vswp, int p_instance) 386 { 387 vsw_port_t *port = NULL; 388 vsw_port_list_t *plist = &vswp->plist; 389 390 D1(vswp, "%s: enter: port id %d", __func__, p_instance); 391 392 WRITE_ENTER(&plist->lockrw); 393 394 if ((port = vsw_lookup_port(vswp, p_instance)) == NULL) { 395 RW_EXIT(&plist->lockrw); 396 return (1); 397 } 398 399 if (vsw_plist_del_node(vswp, port)) { 400 RW_EXIT(&plist->lockrw); 401 return (1); 402 } 403 404 /* Remove the fdb entry for this port/mac address */ 405 (void) vsw_del_fdb(vswp, port); 406 407 /* Remove any multicast addresses.. */ 408 vsw_del_mcst_port(port); 409 410 /* 411 * No longer need to hold writer lock on port list now 412 * that we have unlinked the target port from the list. 413 */ 414 RW_EXIT(&plist->lockrw); 415 416 /* Remove address if was programmed into HW. */ 417 mutex_enter(&vswp->hw_lock); 418 419 /* 420 * Port's address may not have been set in hardware. This could 421 * happen if the underlying physical device is not yet available and 422 * vsw_setup_switching_timeout() may be in progress. 423 * We remove its addr from hardware only if it has been set before. 424 */ 425 if (port->addr_set != VSW_ADDR_UNSET) 426 (void) vsw_unset_hw(vswp, port, VSW_VNETPORT); 427 428 if (vswp->recfg_reqd) 429 vsw_reconfig_hw(vswp); 430 431 mutex_exit(&vswp->hw_lock); 432 433 if (vsw_port_delete(port)) { 434 return (1); 435 } 436 437 D1(vswp, "%s: exit: p_instance(%d)", __func__, p_instance); 438 return (0); 439 } 440 441 /* 442 * Detach all active ports. 443 * 444 * Returns 0 on success, 1 on failure. 445 */ 446 int 447 vsw_detach_ports(vsw_t *vswp) 448 { 449 vsw_port_list_t *plist = &vswp->plist; 450 vsw_port_t *port = NULL; 451 452 D1(vswp, "%s: enter", __func__); 453 454 WRITE_ENTER(&plist->lockrw); 455 456 while ((port = plist->head) != NULL) { 457 if (vsw_plist_del_node(vswp, port)) { 458 DERR(vswp, "%s: Error deleting port %d" 459 " from port list", __func__, port->p_instance); 460 RW_EXIT(&plist->lockrw); 461 return (1); 462 } 463 464 /* Remove address if was programmed into HW. */ 465 mutex_enter(&vswp->hw_lock); 466 (void) vsw_unset_hw(vswp, port, VSW_VNETPORT); 467 mutex_exit(&vswp->hw_lock); 468 469 /* Remove the fdb entry for this port/mac address */ 470 (void) vsw_del_fdb(vswp, port); 471 472 /* Remove any multicast addresses.. */ 473 vsw_del_mcst_port(port); 474 475 /* 476 * No longer need to hold the lock on the port list 477 * now that we have unlinked the target port from the 478 * list. 479 */ 480 RW_EXIT(&plist->lockrw); 481 if (vsw_port_delete(port)) { 482 DERR(vswp, "%s: Error deleting port %d", 483 __func__, port->p_instance); 484 return (1); 485 } 486 WRITE_ENTER(&plist->lockrw); 487 } 488 RW_EXIT(&plist->lockrw); 489 490 D1(vswp, "%s: exit", __func__); 491 492 return (0); 493 } 494 495 /* 496 * Delete the specified port. 497 * 498 * Returns 0 on success, 1 on failure. 499 */ 500 static int 501 vsw_port_delete(vsw_port_t *port) 502 { 503 vsw_ldc_list_t *ldcl; 504 vsw_t *vswp = port->p_vswp; 505 506 D1(vswp, "%s: enter : port id %d", __func__, port->p_instance); 507 508 (void) vsw_uninit_ldcs(port); 509 510 /* 511 * Wait for any pending ctrl msg tasks which reference this 512 * port to finish. 513 */ 514 if (vsw_drain_port_taskq(port)) 515 return (1); 516 517 /* 518 * Wait for port reference count to hit zero. 519 */ 520 while (port->ref_cnt != 0) { 521 delay(drv_usectohz(VSW_PORT_REF_DELAY)); 522 } 523 524 /* 525 * Wait for any active callbacks to finish 526 */ 527 if (vsw_drain_ldcs(port)) 528 return (1); 529 530 ldcl = &port->p_ldclist; 531 WRITE_ENTER(&ldcl->lockrw); 532 while (ldcl->num_ldcs > 0) { 533 if (vsw_ldc_detach(port, ldcl->head->ldc_id) != 0) { 534 cmn_err(CE_WARN, "!vsw%d: unable to detach ldc %ld", 535 vswp->instance, ldcl->head->ldc_id); 536 RW_EXIT(&ldcl->lockrw); 537 return (1); 538 } 539 } 540 RW_EXIT(&ldcl->lockrw); 541 542 rw_destroy(&port->p_ldclist.lockrw); 543 544 mutex_destroy(&port->mca_lock); 545 mutex_destroy(&port->tx_lock); 546 cv_destroy(&port->state_cv); 547 mutex_destroy(&port->state_lock); 548 549 kmem_free(port, sizeof (vsw_port_t)); 550 551 D1(vswp, "%s: exit", __func__); 552 553 return (0); 554 } 555 556 /* 557 * Attach a logical domain channel (ldc) under a specified port. 558 * 559 * Returns 0 on success, 1 on failure. 560 */ 561 static int 562 vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id) 563 { 564 vsw_t *vswp = port->p_vswp; 565 vsw_ldc_list_t *ldcl = &port->p_ldclist; 566 vsw_ldc_t *ldcp = NULL; 567 ldc_attr_t attr; 568 ldc_status_t istatus; 569 int status = DDI_FAILURE; 570 int rv; 571 char kname[MAXNAMELEN]; 572 enum { PROG_init = 0x0, PROG_mblks = 0x1, 573 PROG_callback = 0x2, PROG_rx_thread = 0x4, 574 PROG_tx_thread = 0x8} 575 progress; 576 577 progress = PROG_init; 578 579 D1(vswp, "%s: enter", __func__); 580 581 ldcp = kmem_zalloc(sizeof (vsw_ldc_t), KM_NOSLEEP); 582 if (ldcp == NULL) { 583 DERR(vswp, "%s: kmem_zalloc failed", __func__); 584 return (1); 585 } 586 ldcp->ldc_id = ldc_id; 587 588 /* Allocate pools of receive mblks */ 589 rv = vio_init_multipools(&ldcp->vmp, VSW_NUM_VMPOOLS, 590 vsw_mblk_size1, vsw_mblk_size2, vsw_mblk_size3, 591 vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3); 592 if (rv) { 593 DWARN(vswp, "%s: unable to create free mblk pools for" 594 " channel %ld (rv %d)", __func__, ldc_id, rv); 595 kmem_free(ldcp, sizeof (vsw_ldc_t)); 596 return (1); 597 } 598 599 progress |= PROG_mblks; 600 601 mutex_init(&ldcp->ldc_txlock, NULL, MUTEX_DRIVER, NULL); 602 mutex_init(&ldcp->ldc_rxlock, NULL, MUTEX_DRIVER, NULL); 603 mutex_init(&ldcp->ldc_cblock, NULL, MUTEX_DRIVER, NULL); 604 mutex_init(&ldcp->drain_cv_lock, NULL, MUTEX_DRIVER, NULL); 605 cv_init(&ldcp->drain_cv, NULL, CV_DRIVER, NULL); 606 rw_init(&ldcp->lane_in.dlistrw, NULL, RW_DRIVER, NULL); 607 rw_init(&ldcp->lane_out.dlistrw, NULL, RW_DRIVER, NULL); 608 609 /* required for handshake with peer */ 610 ldcp->local_session = (uint64_t)ddi_get_lbolt(); 611 ldcp->peer_session = 0; 612 ldcp->session_status = 0; 613 ldcp->hss_id = 1; /* Initial handshake session id */ 614 615 /* only set for outbound lane, inbound set by peer */ 616 vsw_set_lane_attr(vswp, &ldcp->lane_out); 617 618 attr.devclass = LDC_DEV_NT_SVC; 619 attr.instance = ddi_get_instance(vswp->dip); 620 attr.mode = LDC_MODE_UNRELIABLE; 621 attr.mtu = VSW_LDC_MTU; 622 status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle); 623 if (status != 0) { 624 DERR(vswp, "%s(%lld): ldc_init failed, rv (%d)", 625 __func__, ldc_id, status); 626 goto ldc_attach_fail; 627 } 628 629 if (vsw_ldc_rxthr_enabled) { 630 ldcp->rx_thr_flags = 0; 631 632 mutex_init(&ldcp->rx_thr_lock, NULL, MUTEX_DRIVER, NULL); 633 cv_init(&ldcp->rx_thr_cv, NULL, CV_DRIVER, NULL); 634 ldcp->rx_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 635 vsw_ldc_rx_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 636 637 progress |= PROG_rx_thread; 638 if (ldcp->rx_thread == NULL) { 639 DWARN(vswp, "%s(%lld): Failed to create worker thread", 640 __func__, ldc_id); 641 goto ldc_attach_fail; 642 } 643 } 644 645 if (vsw_ldc_txthr_enabled) { 646 ldcp->tx_thr_flags = 0; 647 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 648 649 mutex_init(&ldcp->tx_thr_lock, NULL, MUTEX_DRIVER, NULL); 650 cv_init(&ldcp->tx_thr_cv, NULL, CV_DRIVER, NULL); 651 ldcp->tx_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 652 vsw_ldc_tx_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 653 654 progress |= PROG_tx_thread; 655 if (ldcp->tx_thread == NULL) { 656 DWARN(vswp, "%s(%lld): Failed to create worker thread", 657 __func__, ldc_id); 658 goto ldc_attach_fail; 659 } 660 } 661 662 status = ldc_reg_callback(ldcp->ldc_handle, vsw_ldc_cb, (caddr_t)ldcp); 663 if (status != 0) { 664 DERR(vswp, "%s(%lld): ldc_reg_callback failed, rv (%d)", 665 __func__, ldc_id, status); 666 (void) ldc_fini(ldcp->ldc_handle); 667 goto ldc_attach_fail; 668 } 669 /* 670 * allocate a message for ldc_read()s, big enough to hold ctrl and 671 * data msgs, including raw data msgs used to recv priority frames. 672 */ 673 ldcp->msglen = VIO_PKT_DATA_HDRSIZE + ETHERMAX; 674 ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP); 675 676 progress |= PROG_callback; 677 678 mutex_init(&ldcp->status_lock, NULL, MUTEX_DRIVER, NULL); 679 680 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 681 DERR(vswp, "%s: ldc_status failed", __func__); 682 mutex_destroy(&ldcp->status_lock); 683 goto ldc_attach_fail; 684 } 685 686 ldcp->ldc_status = istatus; 687 ldcp->ldc_port = port; 688 ldcp->ldc_vswp = vswp; 689 690 vsw_reset_vnet_proto_ops(ldcp); 691 692 (void) sprintf(kname, "%sldc0x%lx", DRV_NAME, ldcp->ldc_id); 693 ldcp->ksp = vgen_setup_kstats(DRV_NAME, vswp->instance, 694 kname, &ldcp->ldc_stats); 695 if (ldcp->ksp == NULL) { 696 DERR(vswp, "%s: kstats setup failed", __func__); 697 goto ldc_attach_fail; 698 } 699 700 /* link it into the list of channels for this port */ 701 WRITE_ENTER(&ldcl->lockrw); 702 ldcp->ldc_next = ldcl->head; 703 ldcl->head = ldcp; 704 ldcl->num_ldcs++; 705 RW_EXIT(&ldcl->lockrw); 706 707 D1(vswp, "%s: exit", __func__); 708 return (0); 709 710 ldc_attach_fail: 711 712 if (progress & PROG_callback) { 713 (void) ldc_unreg_callback(ldcp->ldc_handle); 714 kmem_free(ldcp->ldcmsg, ldcp->msglen); 715 } 716 717 if (progress & PROG_rx_thread) { 718 if (ldcp->rx_thread != NULL) { 719 vsw_stop_rx_thread(ldcp); 720 } 721 mutex_destroy(&ldcp->rx_thr_lock); 722 cv_destroy(&ldcp->rx_thr_cv); 723 } 724 725 if (progress & PROG_tx_thread) { 726 if (ldcp->tx_thread != NULL) { 727 vsw_stop_tx_thread(ldcp); 728 } 729 mutex_destroy(&ldcp->tx_thr_lock); 730 cv_destroy(&ldcp->tx_thr_cv); 731 } 732 if (ldcp->ksp != NULL) { 733 vgen_destroy_kstats(ldcp->ksp); 734 } 735 mutex_destroy(&ldcp->ldc_txlock); 736 mutex_destroy(&ldcp->ldc_rxlock); 737 mutex_destroy(&ldcp->ldc_cblock); 738 mutex_destroy(&ldcp->drain_cv_lock); 739 740 cv_destroy(&ldcp->drain_cv); 741 742 rw_destroy(&ldcp->lane_in.dlistrw); 743 rw_destroy(&ldcp->lane_out.dlistrw); 744 745 if (progress & PROG_mblks) { 746 vio_destroy_multipools(&ldcp->vmp, &vswp->rxh); 747 } 748 kmem_free(ldcp, sizeof (vsw_ldc_t)); 749 750 return (1); 751 } 752 753 /* 754 * Detach a logical domain channel (ldc) belonging to a 755 * particular port. 756 * 757 * Returns 0 on success, 1 on failure. 758 */ 759 static int 760 vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id) 761 { 762 vsw_t *vswp = port->p_vswp; 763 vsw_ldc_t *ldcp, *prev_ldcp; 764 vsw_ldc_list_t *ldcl = &port->p_ldclist; 765 int rv; 766 767 prev_ldcp = ldcl->head; 768 for (; (ldcp = prev_ldcp) != NULL; prev_ldcp = ldcp->ldc_next) { 769 if (ldcp->ldc_id == ldc_id) { 770 break; 771 } 772 } 773 774 /* specified ldc id not found */ 775 if (ldcp == NULL) { 776 DERR(vswp, "%s: ldcp = NULL", __func__); 777 return (1); 778 } 779 780 D2(vswp, "%s: detaching channel %lld", __func__, ldcp->ldc_id); 781 782 /* Stop the receive thread */ 783 if (ldcp->rx_thread != NULL) { 784 vsw_stop_rx_thread(ldcp); 785 mutex_destroy(&ldcp->rx_thr_lock); 786 cv_destroy(&ldcp->rx_thr_cv); 787 } 788 kmem_free(ldcp->ldcmsg, ldcp->msglen); 789 790 /* Stop the tx thread */ 791 if (ldcp->tx_thread != NULL) { 792 vsw_stop_tx_thread(ldcp); 793 mutex_destroy(&ldcp->tx_thr_lock); 794 cv_destroy(&ldcp->tx_thr_cv); 795 if (ldcp->tx_mhead != NULL) { 796 freemsgchain(ldcp->tx_mhead); 797 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 798 ldcp->tx_cnt = 0; 799 } 800 } 801 802 /* Destory kstats */ 803 vgen_destroy_kstats(ldcp->ksp); 804 805 /* 806 * Before we can close the channel we must release any mapped 807 * resources (e.g. drings). 808 */ 809 vsw_free_lane_resources(ldcp, INBOUND); 810 vsw_free_lane_resources(ldcp, OUTBOUND); 811 812 /* 813 * If the close fails we are in serious trouble, as won't 814 * be able to delete the parent port. 815 */ 816 if ((rv = ldc_close(ldcp->ldc_handle)) != 0) { 817 DERR(vswp, "%s: error %d closing channel %lld", 818 __func__, rv, ldcp->ldc_id); 819 return (1); 820 } 821 822 (void) ldc_fini(ldcp->ldc_handle); 823 824 ldcp->ldc_status = LDC_INIT; 825 ldcp->ldc_handle = NULL; 826 ldcp->ldc_vswp = NULL; 827 828 829 /* 830 * Most likely some mblks are still in use and 831 * have not been returned to the pool. These mblks are 832 * added to the pool that is maintained in the device instance. 833 * Another attempt will be made to destroy the pool 834 * when the device detaches. 835 */ 836 vio_destroy_multipools(&ldcp->vmp, &vswp->rxh); 837 838 /* unlink it from the list */ 839 prev_ldcp = ldcp->ldc_next; 840 ldcl->num_ldcs--; 841 842 mutex_destroy(&ldcp->ldc_txlock); 843 mutex_destroy(&ldcp->ldc_rxlock); 844 mutex_destroy(&ldcp->ldc_cblock); 845 cv_destroy(&ldcp->drain_cv); 846 mutex_destroy(&ldcp->drain_cv_lock); 847 mutex_destroy(&ldcp->status_lock); 848 rw_destroy(&ldcp->lane_in.dlistrw); 849 rw_destroy(&ldcp->lane_out.dlistrw); 850 851 kmem_free(ldcp, sizeof (vsw_ldc_t)); 852 853 return (0); 854 } 855 856 /* 857 * Open and attempt to bring up the channel. Note that channel 858 * can only be brought up if peer has also opened channel. 859 * 860 * Returns 0 if can open and bring up channel, otherwise 861 * returns 1. 862 */ 863 static int 864 vsw_ldc_init(vsw_ldc_t *ldcp) 865 { 866 vsw_t *vswp = ldcp->ldc_vswp; 867 ldc_status_t istatus = 0; 868 int rv; 869 870 D1(vswp, "%s: enter", __func__); 871 872 LDC_ENTER_LOCK(ldcp); 873 874 /* don't start at 0 in case clients don't like that */ 875 ldcp->next_ident = 1; 876 877 rv = ldc_open(ldcp->ldc_handle); 878 if (rv != 0) { 879 DERR(vswp, "%s: ldc_open failed: id(%lld) rv(%d)", 880 __func__, ldcp->ldc_id, rv); 881 LDC_EXIT_LOCK(ldcp); 882 return (1); 883 } 884 885 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 886 DERR(vswp, "%s: unable to get status", __func__); 887 LDC_EXIT_LOCK(ldcp); 888 return (1); 889 890 } else if (istatus != LDC_OPEN && istatus != LDC_READY) { 891 DERR(vswp, "%s: id (%lld) status(%d) is not OPEN/READY", 892 __func__, ldcp->ldc_id, istatus); 893 LDC_EXIT_LOCK(ldcp); 894 return (1); 895 } 896 897 mutex_enter(&ldcp->status_lock); 898 ldcp->ldc_status = istatus; 899 mutex_exit(&ldcp->status_lock); 900 901 rv = ldc_up(ldcp->ldc_handle); 902 if (rv != 0) { 903 /* 904 * Not a fatal error for ldc_up() to fail, as peer 905 * end point may simply not be ready yet. 906 */ 907 D2(vswp, "%s: ldc_up err id(%lld) rv(%d)", __func__, 908 ldcp->ldc_id, rv); 909 LDC_EXIT_LOCK(ldcp); 910 return (1); 911 } 912 913 /* 914 * ldc_up() call is non-blocking so need to explicitly 915 * check channel status to see if in fact the channel 916 * is UP. 917 */ 918 mutex_enter(&ldcp->status_lock); 919 if (ldc_status(ldcp->ldc_handle, &ldcp->ldc_status) != 0) { 920 DERR(vswp, "%s: unable to get status", __func__); 921 mutex_exit(&ldcp->status_lock); 922 LDC_EXIT_LOCK(ldcp); 923 return (1); 924 925 } 926 927 if (ldcp->ldc_status == LDC_UP) { 928 D2(vswp, "%s: channel %ld now UP (%ld)", __func__, 929 ldcp->ldc_id, istatus); 930 mutex_exit(&ldcp->status_lock); 931 LDC_EXIT_LOCK(ldcp); 932 933 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 934 return (0); 935 } 936 937 mutex_exit(&ldcp->status_lock); 938 LDC_EXIT_LOCK(ldcp); 939 940 D1(vswp, "%s: exit", __func__); 941 return (0); 942 } 943 944 /* disable callbacks on the channel */ 945 static int 946 vsw_ldc_uninit(vsw_ldc_t *ldcp) 947 { 948 vsw_t *vswp = ldcp->ldc_vswp; 949 int rv; 950 951 D1(vswp, "vsw_ldc_uninit: enter: id(%lx)\n", ldcp->ldc_id); 952 953 LDC_ENTER_LOCK(ldcp); 954 955 rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE); 956 if (rv != 0) { 957 DERR(vswp, "vsw_ldc_uninit(%lld): error disabling " 958 "interrupts (rv = %d)\n", ldcp->ldc_id, rv); 959 LDC_EXIT_LOCK(ldcp); 960 return (1); 961 } 962 963 mutex_enter(&ldcp->status_lock); 964 ldcp->ldc_status = LDC_INIT; 965 mutex_exit(&ldcp->status_lock); 966 967 LDC_EXIT_LOCK(ldcp); 968 969 D1(vswp, "vsw_ldc_uninit: exit: id(%lx)", ldcp->ldc_id); 970 971 return (0); 972 } 973 974 static int 975 vsw_init_ldcs(vsw_port_t *port) 976 { 977 vsw_ldc_list_t *ldcl = &port->p_ldclist; 978 vsw_ldc_t *ldcp; 979 980 READ_ENTER(&ldcl->lockrw); 981 ldcp = ldcl->head; 982 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 983 (void) vsw_ldc_init(ldcp); 984 } 985 RW_EXIT(&ldcl->lockrw); 986 987 return (0); 988 } 989 990 static int 991 vsw_uninit_ldcs(vsw_port_t *port) 992 { 993 vsw_ldc_list_t *ldcl = &port->p_ldclist; 994 vsw_ldc_t *ldcp; 995 996 D1(NULL, "vsw_uninit_ldcs: enter\n"); 997 998 READ_ENTER(&ldcl->lockrw); 999 ldcp = ldcl->head; 1000 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 1001 (void) vsw_ldc_uninit(ldcp); 1002 } 1003 RW_EXIT(&ldcl->lockrw); 1004 1005 D1(NULL, "vsw_uninit_ldcs: exit\n"); 1006 1007 return (0); 1008 } 1009 1010 /* 1011 * Wait until the callback(s) associated with the ldcs under the specified 1012 * port have completed. 1013 * 1014 * Prior to this function being invoked each channel under this port 1015 * should have been quiesced via ldc_set_cb_mode(DISABLE). 1016 * 1017 * A short explaination of what we are doing below.. 1018 * 1019 * The simplest approach would be to have a reference counter in 1020 * the ldc structure which is increment/decremented by the callbacks as 1021 * they use the channel. The drain function could then simply disable any 1022 * further callbacks and do a cv_wait for the ref to hit zero. Unfortunately 1023 * there is a tiny window here - before the callback is able to get the lock 1024 * on the channel it is interrupted and this function gets to execute. It 1025 * sees that the ref count is zero and believes its free to delete the 1026 * associated data structures. 1027 * 1028 * We get around this by taking advantage of the fact that before the ldc 1029 * framework invokes a callback it sets a flag to indicate that there is a 1030 * callback active (or about to become active). If when we attempt to 1031 * unregister a callback when this active flag is set then the unregister 1032 * will fail with EWOULDBLOCK. 1033 * 1034 * If the unregister fails we do a cv_timedwait. We will either be signaled 1035 * by the callback as it is exiting (note we have to wait a short period to 1036 * allow the callback to return fully to the ldc framework and it to clear 1037 * the active flag), or by the timer expiring. In either case we again attempt 1038 * the unregister. We repeat this until we can succesfully unregister the 1039 * callback. 1040 * 1041 * The reason we use a cv_timedwait rather than a simple cv_wait is to catch 1042 * the case where the callback has finished but the ldc framework has not yet 1043 * cleared the active flag. In this case we would never get a cv_signal. 1044 */ 1045 static int 1046 vsw_drain_ldcs(vsw_port_t *port) 1047 { 1048 vsw_ldc_list_t *ldcl = &port->p_ldclist; 1049 vsw_ldc_t *ldcp; 1050 vsw_t *vswp = port->p_vswp; 1051 1052 D1(vswp, "%s: enter", __func__); 1053 1054 READ_ENTER(&ldcl->lockrw); 1055 1056 ldcp = ldcl->head; 1057 1058 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 1059 /* 1060 * If we can unregister the channel callback then we 1061 * know that there is no callback either running or 1062 * scheduled to run for this channel so move on to next 1063 * channel in the list. 1064 */ 1065 mutex_enter(&ldcp->drain_cv_lock); 1066 1067 /* prompt active callbacks to quit */ 1068 ldcp->drain_state = VSW_LDC_DRAINING; 1069 1070 if ((ldc_unreg_callback(ldcp->ldc_handle)) == 0) { 1071 D2(vswp, "%s: unreg callback for chan %ld", __func__, 1072 ldcp->ldc_id); 1073 mutex_exit(&ldcp->drain_cv_lock); 1074 continue; 1075 } else { 1076 /* 1077 * If we end up here we know that either 1) a callback 1078 * is currently executing, 2) is about to start (i.e. 1079 * the ldc framework has set the active flag but 1080 * has not actually invoked the callback yet, or 3) 1081 * has finished and has returned to the ldc framework 1082 * but the ldc framework has not yet cleared the 1083 * active bit. 1084 * 1085 * Wait for it to finish. 1086 */ 1087 while (ldc_unreg_callback(ldcp->ldc_handle) 1088 == EWOULDBLOCK) 1089 (void) cv_timedwait(&ldcp->drain_cv, 1090 &ldcp->drain_cv_lock, lbolt + hz); 1091 1092 mutex_exit(&ldcp->drain_cv_lock); 1093 D2(vswp, "%s: unreg callback for chan %ld after " 1094 "timeout", __func__, ldcp->ldc_id); 1095 } 1096 } 1097 RW_EXIT(&ldcl->lockrw); 1098 1099 D1(vswp, "%s: exit", __func__); 1100 return (0); 1101 } 1102 1103 /* 1104 * Wait until all tasks which reference this port have completed. 1105 * 1106 * Prior to this function being invoked each channel under this port 1107 * should have been quiesced via ldc_set_cb_mode(DISABLE). 1108 */ 1109 static int 1110 vsw_drain_port_taskq(vsw_port_t *port) 1111 { 1112 vsw_t *vswp = port->p_vswp; 1113 1114 D1(vswp, "%s: enter", __func__); 1115 1116 /* 1117 * Mark the port as in the process of being detached, and 1118 * dispatch a marker task to the queue so we know when all 1119 * relevant tasks have completed. 1120 */ 1121 mutex_enter(&port->state_lock); 1122 port->state = VSW_PORT_DETACHING; 1123 1124 if ((vswp->taskq_p == NULL) || 1125 (ddi_taskq_dispatch(vswp->taskq_p, vsw_marker_task, 1126 port, DDI_NOSLEEP) != DDI_SUCCESS)) { 1127 DERR(vswp, "%s: unable to dispatch marker task", 1128 __func__); 1129 mutex_exit(&port->state_lock); 1130 return (1); 1131 } 1132 1133 /* 1134 * Wait for the marker task to finish. 1135 */ 1136 while (port->state != VSW_PORT_DETACHABLE) 1137 cv_wait(&port->state_cv, &port->state_lock); 1138 1139 mutex_exit(&port->state_lock); 1140 1141 D1(vswp, "%s: exit", __func__); 1142 1143 return (0); 1144 } 1145 1146 static void 1147 vsw_marker_task(void *arg) 1148 { 1149 vsw_port_t *port = arg; 1150 vsw_t *vswp = port->p_vswp; 1151 1152 D1(vswp, "%s: enter", __func__); 1153 1154 mutex_enter(&port->state_lock); 1155 1156 /* 1157 * No further tasks should be dispatched which reference 1158 * this port so ok to mark it as safe to detach. 1159 */ 1160 port->state = VSW_PORT_DETACHABLE; 1161 1162 cv_signal(&port->state_cv); 1163 1164 mutex_exit(&port->state_lock); 1165 1166 D1(vswp, "%s: exit", __func__); 1167 } 1168 1169 vsw_port_t * 1170 vsw_lookup_port(vsw_t *vswp, int p_instance) 1171 { 1172 vsw_port_list_t *plist = &vswp->plist; 1173 vsw_port_t *port; 1174 1175 for (port = plist->head; port != NULL; port = port->p_next) { 1176 if (port->p_instance == p_instance) { 1177 D2(vswp, "vsw_lookup_port: found p_instance\n"); 1178 return (port); 1179 } 1180 } 1181 1182 return (NULL); 1183 } 1184 1185 /* 1186 * Search for and remove the specified port from the port 1187 * list. Returns 0 if able to locate and remove port, otherwise 1188 * returns 1. 1189 */ 1190 static int 1191 vsw_plist_del_node(vsw_t *vswp, vsw_port_t *port) 1192 { 1193 vsw_port_list_t *plist = &vswp->plist; 1194 vsw_port_t *curr_p, *prev_p; 1195 1196 if (plist->head == NULL) 1197 return (1); 1198 1199 curr_p = prev_p = plist->head; 1200 1201 while (curr_p != NULL) { 1202 if (curr_p == port) { 1203 if (prev_p == curr_p) { 1204 plist->head = curr_p->p_next; 1205 } else { 1206 prev_p->p_next = curr_p->p_next; 1207 } 1208 plist->num_ports--; 1209 break; 1210 } else { 1211 prev_p = curr_p; 1212 curr_p = curr_p->p_next; 1213 } 1214 } 1215 return (0); 1216 } 1217 1218 /* 1219 * Interrupt handler for ldc messages. 1220 */ 1221 static uint_t 1222 vsw_ldc_cb(uint64_t event, caddr_t arg) 1223 { 1224 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 1225 vsw_t *vswp = ldcp->ldc_vswp; 1226 1227 D1(vswp, "%s: enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 1228 1229 mutex_enter(&ldcp->ldc_cblock); 1230 ldcp->ldc_stats.callbacks++; 1231 1232 mutex_enter(&ldcp->status_lock); 1233 if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) { 1234 mutex_exit(&ldcp->status_lock); 1235 mutex_exit(&ldcp->ldc_cblock); 1236 return (LDC_SUCCESS); 1237 } 1238 mutex_exit(&ldcp->status_lock); 1239 1240 if (event & LDC_EVT_UP) { 1241 /* 1242 * Channel has come up. 1243 */ 1244 D2(vswp, "%s: id(%ld) event(%llx) UP: status(%ld)", 1245 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1246 1247 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1248 1249 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 1250 } 1251 1252 if (event & LDC_EVT_READ) { 1253 /* 1254 * Data available for reading. 1255 */ 1256 D2(vswp, "%s: id(ld) event(%llx) data READ", 1257 __func__, ldcp->ldc_id, event); 1258 1259 if (ldcp->rx_thread != NULL) { 1260 /* 1261 * If the receive thread is enabled, then 1262 * wakeup the receive thread to process the 1263 * LDC messages. 1264 */ 1265 mutex_exit(&ldcp->ldc_cblock); 1266 mutex_enter(&ldcp->rx_thr_lock); 1267 if (!(ldcp->rx_thr_flags & VSW_WTHR_DATARCVD)) { 1268 ldcp->rx_thr_flags |= VSW_WTHR_DATARCVD; 1269 cv_signal(&ldcp->rx_thr_cv); 1270 } 1271 mutex_exit(&ldcp->rx_thr_lock); 1272 mutex_enter(&ldcp->ldc_cblock); 1273 } else { 1274 vsw_process_pkt(ldcp); 1275 } 1276 1277 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 1278 1279 goto vsw_cb_exit; 1280 } 1281 1282 if (event & (LDC_EVT_DOWN | LDC_EVT_RESET)) { 1283 D2(vswp, "%s: id(%ld) event (%lx) DOWN/RESET: status(%ld)", 1284 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1285 1286 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 1287 } 1288 1289 /* 1290 * Catch either LDC_EVT_WRITE which we don't support or any 1291 * unknown event. 1292 */ 1293 if (event & 1294 ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) { 1295 DERR(vswp, "%s: id(%ld) Unexpected event=(%llx) status(%ld)", 1296 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1297 } 1298 1299 vsw_cb_exit: 1300 mutex_exit(&ldcp->ldc_cblock); 1301 1302 /* 1303 * Let the drain function know we are finishing if it 1304 * is waiting. 1305 */ 1306 mutex_enter(&ldcp->drain_cv_lock); 1307 if (ldcp->drain_state == VSW_LDC_DRAINING) 1308 cv_signal(&ldcp->drain_cv); 1309 mutex_exit(&ldcp->drain_cv_lock); 1310 1311 return (LDC_SUCCESS); 1312 } 1313 1314 /* 1315 * Reinitialise data structures associated with the channel. 1316 */ 1317 static void 1318 vsw_ldc_reinit(vsw_ldc_t *ldcp) 1319 { 1320 vsw_t *vswp = ldcp->ldc_vswp; 1321 vsw_port_t *port; 1322 vsw_ldc_list_t *ldcl; 1323 1324 D1(vswp, "%s: enter", __func__); 1325 1326 port = ldcp->ldc_port; 1327 ldcl = &port->p_ldclist; 1328 1329 READ_ENTER(&ldcl->lockrw); 1330 1331 D2(vswp, "%s: in 0x%llx : out 0x%llx", __func__, 1332 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 1333 1334 vsw_free_lane_resources(ldcp, INBOUND); 1335 vsw_free_lane_resources(ldcp, OUTBOUND); 1336 RW_EXIT(&ldcl->lockrw); 1337 1338 ldcp->lane_in.lstate = 0; 1339 ldcp->lane_out.lstate = 0; 1340 1341 /* 1342 * Remove parent port from any multicast groups 1343 * it may have registered with. Client must resend 1344 * multicast add command after handshake completes. 1345 */ 1346 (void) vsw_del_fdb(vswp, port); 1347 1348 vsw_del_mcst_port(port); 1349 1350 ldcp->peer_session = 0; 1351 ldcp->session_status = 0; 1352 ldcp->hcnt = 0; 1353 ldcp->hphase = VSW_MILESTONE0; 1354 1355 vsw_reset_vnet_proto_ops(ldcp); 1356 1357 D1(vswp, "%s: exit", __func__); 1358 } 1359 1360 /* 1361 * Process a connection event. 1362 * 1363 * Note - care must be taken to ensure that this function is 1364 * not called with the dlistrw lock held. 1365 */ 1366 static void 1367 vsw_process_conn_evt(vsw_ldc_t *ldcp, uint16_t evt) 1368 { 1369 vsw_t *vswp = ldcp->ldc_vswp; 1370 vsw_conn_evt_t *conn = NULL; 1371 1372 D1(vswp, "%s: enter", __func__); 1373 1374 /* 1375 * Check if either a reset or restart event is pending 1376 * or in progress. If so just return. 1377 * 1378 * A VSW_CONN_RESET event originates either with a LDC_RESET_EVT 1379 * being received by the callback handler, or a ECONNRESET error 1380 * code being returned from a ldc_read() or ldc_write() call. 1381 * 1382 * A VSW_CONN_RESTART event occurs when some error checking code 1383 * decides that there is a problem with data from the channel, 1384 * and that the handshake should be restarted. 1385 */ 1386 if (((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) && 1387 (ldstub((uint8_t *)&ldcp->reset_active))) 1388 return; 1389 1390 /* 1391 * If it is an LDC_UP event we first check the recorded 1392 * state of the channel. If this is UP then we know that 1393 * the channel moving to the UP state has already been dealt 1394 * with and don't need to dispatch a new task. 1395 * 1396 * The reason for this check is that when we do a ldc_up(), 1397 * depending on the state of the peer, we may or may not get 1398 * a LDC_UP event. As we can't depend on getting a LDC_UP evt 1399 * every time we do ldc_up() we explicitly check the channel 1400 * status to see has it come up (ldc_up() is asynch and will 1401 * complete at some undefined time), and take the appropriate 1402 * action. 1403 * 1404 * The flip side of this is that we may get a LDC_UP event 1405 * when we have already seen that the channel is up and have 1406 * dealt with that. 1407 */ 1408 mutex_enter(&ldcp->status_lock); 1409 if (evt == VSW_CONN_UP) { 1410 if ((ldcp->ldc_status == LDC_UP) || (ldcp->reset_active != 0)) { 1411 mutex_exit(&ldcp->status_lock); 1412 return; 1413 } 1414 } 1415 mutex_exit(&ldcp->status_lock); 1416 1417 /* 1418 * The transaction group id allows us to identify and discard 1419 * any tasks which are still pending on the taskq and refer 1420 * to the handshake session we are about to restart or reset. 1421 * These stale messages no longer have any real meaning. 1422 */ 1423 (void) atomic_inc_32(&ldcp->hss_id); 1424 1425 ASSERT(vswp->taskq_p != NULL); 1426 1427 if ((conn = kmem_zalloc(sizeof (vsw_conn_evt_t), KM_NOSLEEP)) == NULL) { 1428 cmn_err(CE_WARN, "!vsw%d: unable to allocate memory for" 1429 " connection event", vswp->instance); 1430 goto err_exit; 1431 } 1432 1433 conn->evt = evt; 1434 conn->ldcp = ldcp; 1435 1436 if (ddi_taskq_dispatch(vswp->taskq_p, vsw_conn_task, conn, 1437 DDI_NOSLEEP) != DDI_SUCCESS) { 1438 cmn_err(CE_WARN, "!vsw%d: Can't dispatch connection task", 1439 vswp->instance); 1440 1441 kmem_free(conn, sizeof (vsw_conn_evt_t)); 1442 goto err_exit; 1443 } 1444 1445 D1(vswp, "%s: exit", __func__); 1446 return; 1447 1448 err_exit: 1449 /* 1450 * Have mostly likely failed due to memory shortage. Clear the flag so 1451 * that future requests will at least be attempted and will hopefully 1452 * succeed. 1453 */ 1454 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 1455 ldcp->reset_active = 0; 1456 } 1457 1458 /* 1459 * Deal with events relating to a connection. Invoked from a taskq. 1460 */ 1461 static void 1462 vsw_conn_task(void *arg) 1463 { 1464 vsw_conn_evt_t *conn = (vsw_conn_evt_t *)arg; 1465 vsw_ldc_t *ldcp = NULL; 1466 vsw_t *vswp = NULL; 1467 uint16_t evt; 1468 ldc_status_t curr_status; 1469 1470 ldcp = conn->ldcp; 1471 evt = conn->evt; 1472 vswp = ldcp->ldc_vswp; 1473 1474 D1(vswp, "%s: enter", __func__); 1475 1476 /* can safely free now have copied out data */ 1477 kmem_free(conn, sizeof (vsw_conn_evt_t)); 1478 1479 mutex_enter(&ldcp->status_lock); 1480 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 1481 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 1482 "channel %ld", vswp->instance, ldcp->ldc_id); 1483 mutex_exit(&ldcp->status_lock); 1484 return; 1485 } 1486 1487 /* 1488 * If we wish to restart the handshake on this channel, then if 1489 * the channel is UP we bring it DOWN to flush the underlying 1490 * ldc queue. 1491 */ 1492 if ((evt == VSW_CONN_RESTART) && (curr_status == LDC_UP)) 1493 (void) ldc_down(ldcp->ldc_handle); 1494 1495 /* 1496 * re-init all the associated data structures. 1497 */ 1498 vsw_ldc_reinit(ldcp); 1499 1500 /* 1501 * Bring the channel back up (note it does no harm to 1502 * do this even if the channel is already UP, Just 1503 * becomes effectively a no-op). 1504 */ 1505 (void) ldc_up(ldcp->ldc_handle); 1506 1507 /* 1508 * Check if channel is now UP. This will only happen if 1509 * peer has also done a ldc_up(). 1510 */ 1511 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 1512 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 1513 "channel %ld", vswp->instance, ldcp->ldc_id); 1514 mutex_exit(&ldcp->status_lock); 1515 return; 1516 } 1517 1518 ldcp->ldc_status = curr_status; 1519 1520 /* channel UP so restart handshake by sending version info */ 1521 if (curr_status == LDC_UP) { 1522 if (ldcp->hcnt++ > vsw_num_handshakes) { 1523 cmn_err(CE_WARN, "!vsw%d: exceeded number of permitted" 1524 " handshake attempts (%d) on channel %ld", 1525 vswp->instance, ldcp->hcnt, ldcp->ldc_id); 1526 mutex_exit(&ldcp->status_lock); 1527 return; 1528 } 1529 1530 if (vsw_obp_ver_proto_workaround == B_FALSE && 1531 (ddi_taskq_dispatch(vswp->taskq_p, vsw_send_ver, ldcp, 1532 DDI_NOSLEEP) != DDI_SUCCESS)) { 1533 cmn_err(CE_WARN, "!vsw%d: Can't dispatch version task", 1534 vswp->instance); 1535 1536 /* 1537 * Don't count as valid restart attempt if couldn't 1538 * send version msg. 1539 */ 1540 if (ldcp->hcnt > 0) 1541 ldcp->hcnt--; 1542 } 1543 } 1544 1545 /* 1546 * Mark that the process is complete by clearing the flag. 1547 * 1548 * Note is it possible that the taskq dispatch above may have failed, 1549 * most likely due to memory shortage. We still clear the flag so 1550 * future attempts will at least be attempted and will hopefully 1551 * succeed. 1552 */ 1553 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 1554 ldcp->reset_active = 0; 1555 1556 mutex_exit(&ldcp->status_lock); 1557 1558 D1(vswp, "%s: exit", __func__); 1559 } 1560 1561 /* 1562 * returns 0 if legal for event signified by flag to have 1563 * occured at the time it did. Otherwise returns 1. 1564 */ 1565 int 1566 vsw_check_flag(vsw_ldc_t *ldcp, int dir, uint64_t flag) 1567 { 1568 vsw_t *vswp = ldcp->ldc_vswp; 1569 uint64_t state; 1570 uint64_t phase; 1571 1572 if (dir == INBOUND) 1573 state = ldcp->lane_in.lstate; 1574 else 1575 state = ldcp->lane_out.lstate; 1576 1577 phase = ldcp->hphase; 1578 1579 switch (flag) { 1580 case VSW_VER_INFO_RECV: 1581 if (phase > VSW_MILESTONE0) { 1582 DERR(vswp, "vsw_check_flag (%d): VER_INFO_RECV" 1583 " when in state %d\n", ldcp->ldc_id, phase); 1584 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1585 return (1); 1586 } 1587 break; 1588 1589 case VSW_VER_ACK_RECV: 1590 case VSW_VER_NACK_RECV: 1591 if (!(state & VSW_VER_INFO_SENT)) { 1592 DERR(vswp, "vsw_check_flag (%d): spurious VER_ACK or " 1593 "VER_NACK when in state %d\n", ldcp->ldc_id, phase); 1594 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1595 return (1); 1596 } else 1597 state &= ~VSW_VER_INFO_SENT; 1598 break; 1599 1600 case VSW_ATTR_INFO_RECV: 1601 if ((phase < VSW_MILESTONE1) || (phase >= VSW_MILESTONE2)) { 1602 DERR(vswp, "vsw_check_flag (%d): ATTR_INFO_RECV" 1603 " when in state %d\n", ldcp->ldc_id, phase); 1604 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1605 return (1); 1606 } 1607 break; 1608 1609 case VSW_ATTR_ACK_RECV: 1610 case VSW_ATTR_NACK_RECV: 1611 if (!(state & VSW_ATTR_INFO_SENT)) { 1612 DERR(vswp, "vsw_check_flag (%d): spurious ATTR_ACK" 1613 " or ATTR_NACK when in state %d\n", 1614 ldcp->ldc_id, phase); 1615 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1616 return (1); 1617 } else 1618 state &= ~VSW_ATTR_INFO_SENT; 1619 break; 1620 1621 case VSW_DRING_INFO_RECV: 1622 if (phase < VSW_MILESTONE1) { 1623 DERR(vswp, "vsw_check_flag (%d): DRING_INFO_RECV" 1624 " when in state %d\n", ldcp->ldc_id, phase); 1625 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1626 return (1); 1627 } 1628 break; 1629 1630 case VSW_DRING_ACK_RECV: 1631 case VSW_DRING_NACK_RECV: 1632 if (!(state & VSW_DRING_INFO_SENT)) { 1633 DERR(vswp, "vsw_check_flag (%d): spurious DRING_ACK " 1634 " or DRING_NACK when in state %d\n", 1635 ldcp->ldc_id, phase); 1636 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1637 return (1); 1638 } else 1639 state &= ~VSW_DRING_INFO_SENT; 1640 break; 1641 1642 case VSW_RDX_INFO_RECV: 1643 if (phase < VSW_MILESTONE3) { 1644 DERR(vswp, "vsw_check_flag (%d): RDX_INFO_RECV" 1645 " when in state %d\n", ldcp->ldc_id, phase); 1646 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1647 return (1); 1648 } 1649 break; 1650 1651 case VSW_RDX_ACK_RECV: 1652 case VSW_RDX_NACK_RECV: 1653 if (!(state & VSW_RDX_INFO_SENT)) { 1654 DERR(vswp, "vsw_check_flag (%d): spurious RDX_ACK or " 1655 "RDX_NACK when in state %d\n", ldcp->ldc_id, phase); 1656 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1657 return (1); 1658 } else 1659 state &= ~VSW_RDX_INFO_SENT; 1660 break; 1661 1662 case VSW_MCST_INFO_RECV: 1663 if (phase < VSW_MILESTONE3) { 1664 DERR(vswp, "vsw_check_flag (%d): VSW_MCST_INFO_RECV" 1665 " when in state %d\n", ldcp->ldc_id, phase); 1666 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1667 return (1); 1668 } 1669 break; 1670 1671 default: 1672 DERR(vswp, "vsw_check_flag (%lld): unknown flag (%llx)", 1673 ldcp->ldc_id, flag); 1674 return (1); 1675 } 1676 1677 if (dir == INBOUND) 1678 ldcp->lane_in.lstate = state; 1679 else 1680 ldcp->lane_out.lstate = state; 1681 1682 D1(vswp, "vsw_check_flag (chan %lld): exit", ldcp->ldc_id); 1683 1684 return (0); 1685 } 1686 1687 void 1688 vsw_next_milestone(vsw_ldc_t *ldcp) 1689 { 1690 vsw_t *vswp = ldcp->ldc_vswp; 1691 1692 D1(vswp, "%s (chan %lld): enter (phase %ld)", __func__, 1693 ldcp->ldc_id, ldcp->hphase); 1694 1695 DUMP_FLAGS(ldcp->lane_in.lstate); 1696 DUMP_FLAGS(ldcp->lane_out.lstate); 1697 1698 switch (ldcp->hphase) { 1699 1700 case VSW_MILESTONE0: 1701 /* 1702 * If we haven't started to handshake with our peer, 1703 * start to do so now. 1704 */ 1705 if (ldcp->lane_out.lstate == 0) { 1706 D2(vswp, "%s: (chan %lld) starting handshake " 1707 "with peer", __func__, ldcp->ldc_id); 1708 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1709 } 1710 1711 /* 1712 * Only way to pass this milestone is to have successfully 1713 * negotiated version info. 1714 */ 1715 if ((ldcp->lane_in.lstate & VSW_VER_ACK_SENT) && 1716 (ldcp->lane_out.lstate & VSW_VER_ACK_RECV)) { 1717 1718 D2(vswp, "%s: (chan %lld) leaving milestone 0", 1719 __func__, ldcp->ldc_id); 1720 1721 vsw_set_vnet_proto_ops(ldcp); 1722 1723 /* 1724 * Next milestone is passed when attribute 1725 * information has been successfully exchanged. 1726 */ 1727 ldcp->hphase = VSW_MILESTONE1; 1728 vsw_send_attr(ldcp); 1729 1730 } 1731 break; 1732 1733 case VSW_MILESTONE1: 1734 /* 1735 * Only way to pass this milestone is to have successfully 1736 * negotiated attribute information. 1737 */ 1738 if (ldcp->lane_in.lstate & VSW_ATTR_ACK_SENT) { 1739 1740 ldcp->hphase = VSW_MILESTONE2; 1741 1742 /* 1743 * If the peer device has said it wishes to 1744 * use descriptor rings then we send it our ring 1745 * info, otherwise we just set up a private ring 1746 * which we use an internal buffer 1747 */ 1748 if ((VSW_VER_EQ(ldcp, 1, 2) && 1749 (ldcp->lane_in.xfer_mode & VIO_DRING_MODE_V1_2)) || 1750 (VSW_VER_LT(ldcp, 1, 2) && 1751 (ldcp->lane_in.xfer_mode == 1752 VIO_DRING_MODE_V1_0))) { 1753 vsw_send_dring_info(ldcp); 1754 } 1755 } 1756 break; 1757 1758 case VSW_MILESTONE2: 1759 /* 1760 * If peer has indicated in its attribute message that 1761 * it wishes to use descriptor rings then the only way 1762 * to pass this milestone is for us to have received 1763 * valid dring info. 1764 * 1765 * If peer is not using descriptor rings then just fall 1766 * through. 1767 */ 1768 if ((VSW_VER_EQ(ldcp, 1, 2) && 1769 (ldcp->lane_in.xfer_mode & VIO_DRING_MODE_V1_2)) || 1770 (VSW_VER_LT(ldcp, 1, 2) && 1771 (ldcp->lane_in.xfer_mode == 1772 VIO_DRING_MODE_V1_0))) { 1773 if (!(ldcp->lane_in.lstate & VSW_DRING_ACK_SENT)) 1774 break; 1775 } 1776 1777 D2(vswp, "%s: (chan %lld) leaving milestone 2", 1778 __func__, ldcp->ldc_id); 1779 1780 ldcp->hphase = VSW_MILESTONE3; 1781 vsw_send_rdx(ldcp); 1782 break; 1783 1784 case VSW_MILESTONE3: 1785 /* 1786 * Pass this milestone when all paramaters have been 1787 * successfully exchanged and RDX sent in both directions. 1788 * 1789 * Mark outbound lane as available to transmit data. 1790 */ 1791 if ((ldcp->lane_out.lstate & VSW_RDX_ACK_SENT) && 1792 (ldcp->lane_in.lstate & VSW_RDX_ACK_RECV)) { 1793 1794 D2(vswp, "%s: (chan %lld) leaving milestone 3", 1795 __func__, ldcp->ldc_id); 1796 D2(vswp, "%s: ** handshake complete (0x%llx : " 1797 "0x%llx) **", __func__, ldcp->lane_in.lstate, 1798 ldcp->lane_out.lstate); 1799 ldcp->lane_out.lstate |= VSW_LANE_ACTIVE; 1800 ldcp->hphase = VSW_MILESTONE4; 1801 ldcp->hcnt = 0; 1802 DISPLAY_STATE(); 1803 } else { 1804 D2(vswp, "%s: still in milestone 3 (0x%llx : 0x%llx)", 1805 __func__, ldcp->lane_in.lstate, 1806 ldcp->lane_out.lstate); 1807 } 1808 break; 1809 1810 case VSW_MILESTONE4: 1811 D2(vswp, "%s: (chan %lld) in milestone 4", __func__, 1812 ldcp->ldc_id); 1813 break; 1814 1815 default: 1816 DERR(vswp, "%s: (chan %lld) Unknown Phase %x", __func__, 1817 ldcp->ldc_id, ldcp->hphase); 1818 } 1819 1820 D1(vswp, "%s (chan %lld): exit (phase %ld)", __func__, ldcp->ldc_id, 1821 ldcp->hphase); 1822 } 1823 1824 /* 1825 * Check if major version is supported. 1826 * 1827 * Returns 0 if finds supported major number, and if necessary 1828 * adjusts the minor field. 1829 * 1830 * Returns 1 if can't match major number exactly. Sets mjor/minor 1831 * to next lowest support values, or to zero if no other values possible. 1832 */ 1833 static int 1834 vsw_supported_version(vio_ver_msg_t *vp) 1835 { 1836 int i; 1837 1838 D1(NULL, "vsw_supported_version: enter"); 1839 1840 for (i = 0; i < VSW_NUM_VER; i++) { 1841 if (vsw_versions[i].ver_major == vp->ver_major) { 1842 /* 1843 * Matching or lower major version found. Update 1844 * minor number if necessary. 1845 */ 1846 if (vp->ver_minor > vsw_versions[i].ver_minor) { 1847 D2(NULL, "%s: adjusting minor value from %d " 1848 "to %d", __func__, vp->ver_minor, 1849 vsw_versions[i].ver_minor); 1850 vp->ver_minor = vsw_versions[i].ver_minor; 1851 } 1852 1853 return (0); 1854 } 1855 1856 /* 1857 * If the message contains a higher major version number, set 1858 * the message's major/minor versions to the current values 1859 * and return false, so this message will get resent with 1860 * these values. 1861 */ 1862 if (vsw_versions[i].ver_major < vp->ver_major) { 1863 D2(NULL, "%s: adjusting major and minor " 1864 "values to %d, %d\n", 1865 __func__, vsw_versions[i].ver_major, 1866 vsw_versions[i].ver_minor); 1867 vp->ver_major = vsw_versions[i].ver_major; 1868 vp->ver_minor = vsw_versions[i].ver_minor; 1869 return (1); 1870 } 1871 } 1872 1873 /* No match was possible, zero out fields */ 1874 vp->ver_major = 0; 1875 vp->ver_minor = 0; 1876 1877 D1(NULL, "vsw_supported_version: exit"); 1878 1879 return (1); 1880 } 1881 1882 /* 1883 * Set vnet-protocol-version dependent functions based on version. 1884 */ 1885 static void 1886 vsw_set_vnet_proto_ops(vsw_ldc_t *ldcp) 1887 { 1888 vsw_t *vswp = ldcp->ldc_vswp; 1889 lane_t *lp = &ldcp->lane_out; 1890 1891 if (VSW_VER_EQ(ldcp, 1, 2)) { 1892 /* Version 1.2 */ 1893 1894 if (VSW_PRI_ETH_DEFINED(vswp)) { 1895 /* 1896 * enable priority routines and pkt mode only if 1897 * at least one pri-eth-type is specified in MD. 1898 */ 1899 ldcp->tx = vsw_ldctx_pri; 1900 ldcp->rx_pktdata = vsw_process_pkt_data; 1901 1902 /* set xfer mode for vsw_send_attr() */ 1903 lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2; 1904 } else { 1905 /* no priority eth types defined in MD */ 1906 1907 ldcp->tx = vsw_ldctx; 1908 ldcp->rx_pktdata = vsw_process_pkt_data_nop; 1909 1910 /* set xfer mode for vsw_send_attr() */ 1911 lp->xfer_mode = VIO_DRING_MODE_V1_2; 1912 1913 } 1914 } else { 1915 /* Versions prior to 1.2 */ 1916 1917 vsw_reset_vnet_proto_ops(ldcp); 1918 } 1919 } 1920 1921 /* 1922 * Reset vnet-protocol-version dependent functions to v1.0. 1923 */ 1924 static void 1925 vsw_reset_vnet_proto_ops(vsw_ldc_t *ldcp) 1926 { 1927 lane_t *lp = &ldcp->lane_out; 1928 1929 ldcp->tx = vsw_ldctx; 1930 ldcp->rx_pktdata = vsw_process_pkt_data_nop; 1931 1932 /* set xfer mode for vsw_send_attr() */ 1933 lp->xfer_mode = VIO_DRING_MODE_V1_0; 1934 } 1935 1936 /* 1937 * Main routine for processing messages received over LDC. 1938 */ 1939 static void 1940 vsw_process_pkt(void *arg) 1941 { 1942 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 1943 vsw_t *vswp = ldcp->ldc_vswp; 1944 size_t msglen; 1945 vio_msg_tag_t *tagp; 1946 uint64_t *ldcmsg; 1947 int rv = 0; 1948 1949 1950 D1(vswp, "%s enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 1951 1952 ASSERT(MUTEX_HELD(&ldcp->ldc_cblock)); 1953 1954 ldcmsg = ldcp->ldcmsg; 1955 /* 1956 * If channel is up read messages until channel is empty. 1957 */ 1958 do { 1959 msglen = ldcp->msglen; 1960 rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen); 1961 1962 if (rv != 0) { 1963 DERR(vswp, "%s :ldc_read err id(%lld) rv(%d) len(%d)\n", 1964 __func__, ldcp->ldc_id, rv, msglen); 1965 } 1966 1967 /* channel has been reset */ 1968 if (rv == ECONNRESET) { 1969 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 1970 break; 1971 } 1972 1973 if (msglen == 0) { 1974 D2(vswp, "%s: ldc_read id(%lld) NODATA", __func__, 1975 ldcp->ldc_id); 1976 break; 1977 } 1978 1979 D2(vswp, "%s: ldc_read id(%lld): msglen(%d)", __func__, 1980 ldcp->ldc_id, msglen); 1981 1982 /* 1983 * Figure out what sort of packet we have gotten by 1984 * examining the msg tag, and then switch it appropriately. 1985 */ 1986 tagp = (vio_msg_tag_t *)ldcmsg; 1987 1988 switch (tagp->vio_msgtype) { 1989 case VIO_TYPE_CTRL: 1990 vsw_dispatch_ctrl_task(ldcp, ldcmsg, tagp); 1991 break; 1992 case VIO_TYPE_DATA: 1993 vsw_process_data_pkt(ldcp, ldcmsg, tagp, msglen); 1994 break; 1995 case VIO_TYPE_ERR: 1996 vsw_process_err_pkt(ldcp, ldcmsg, tagp); 1997 break; 1998 default: 1999 DERR(vswp, "%s: Unknown tag(%lx) ", __func__, 2000 "id(%lx)\n", tagp->vio_msgtype, ldcp->ldc_id); 2001 break; 2002 } 2003 } while (msglen); 2004 2005 D1(vswp, "%s exit: ldcid (%lld)\n", __func__, ldcp->ldc_id); 2006 } 2007 2008 /* 2009 * Dispatch a task to process a VIO control message. 2010 */ 2011 static void 2012 vsw_dispatch_ctrl_task(vsw_ldc_t *ldcp, void *cpkt, vio_msg_tag_t *tagp) 2013 { 2014 vsw_ctrl_task_t *ctaskp = NULL; 2015 vsw_port_t *port = ldcp->ldc_port; 2016 vsw_t *vswp = port->p_vswp; 2017 2018 D1(vswp, "%s: enter", __func__); 2019 2020 /* 2021 * We need to handle RDX ACK messages in-band as once they 2022 * are exchanged it is possible that we will get an 2023 * immediate (legitimate) data packet. 2024 */ 2025 if ((tagp->vio_subtype_env == VIO_RDX) && 2026 (tagp->vio_subtype == VIO_SUBTYPE_ACK)) { 2027 2028 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_ACK_RECV)) 2029 return; 2030 2031 ldcp->lane_in.lstate |= VSW_RDX_ACK_RECV; 2032 D2(vswp, "%s (%ld) handling RDX_ACK in place " 2033 "(ostate 0x%llx : hphase %d)", __func__, 2034 ldcp->ldc_id, ldcp->lane_in.lstate, ldcp->hphase); 2035 vsw_next_milestone(ldcp); 2036 return; 2037 } 2038 2039 ctaskp = kmem_alloc(sizeof (vsw_ctrl_task_t), KM_NOSLEEP); 2040 2041 if (ctaskp == NULL) { 2042 DERR(vswp, "%s: unable to alloc space for ctrl msg", __func__); 2043 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2044 return; 2045 } 2046 2047 ctaskp->ldcp = ldcp; 2048 bcopy((def_msg_t *)cpkt, &ctaskp->pktp, sizeof (def_msg_t)); 2049 ctaskp->hss_id = ldcp->hss_id; 2050 2051 /* 2052 * Dispatch task to processing taskq if port is not in 2053 * the process of being detached. 2054 */ 2055 mutex_enter(&port->state_lock); 2056 if (port->state == VSW_PORT_INIT) { 2057 if ((vswp->taskq_p == NULL) || 2058 (ddi_taskq_dispatch(vswp->taskq_p, vsw_process_ctrl_pkt, 2059 ctaskp, DDI_NOSLEEP) != DDI_SUCCESS)) { 2060 DERR(vswp, "%s: unable to dispatch task to taskq", 2061 __func__); 2062 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2063 mutex_exit(&port->state_lock); 2064 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2065 return; 2066 } 2067 } else { 2068 DWARN(vswp, "%s: port %d detaching, not dispatching " 2069 "task", __func__, port->p_instance); 2070 } 2071 2072 mutex_exit(&port->state_lock); 2073 2074 D2(vswp, "%s: dispatched task to taskq for chan %d", __func__, 2075 ldcp->ldc_id); 2076 D1(vswp, "%s: exit", __func__); 2077 } 2078 2079 /* 2080 * Process a VIO ctrl message. Invoked from taskq. 2081 */ 2082 static void 2083 vsw_process_ctrl_pkt(void *arg) 2084 { 2085 vsw_ctrl_task_t *ctaskp = (vsw_ctrl_task_t *)arg; 2086 vsw_ldc_t *ldcp = ctaskp->ldcp; 2087 vsw_t *vswp = ldcp->ldc_vswp; 2088 vio_msg_tag_t tag; 2089 uint16_t env; 2090 2091 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2092 2093 bcopy(&ctaskp->pktp, &tag, sizeof (vio_msg_tag_t)); 2094 env = tag.vio_subtype_env; 2095 2096 /* stale pkt check */ 2097 if (ctaskp->hss_id < ldcp->hss_id) { 2098 DWARN(vswp, "%s: discarding stale packet belonging to earlier" 2099 " (%ld) handshake session", __func__, ctaskp->hss_id); 2100 return; 2101 } 2102 2103 /* session id check */ 2104 if (ldcp->session_status & VSW_PEER_SESSION) { 2105 if (ldcp->peer_session != tag.vio_sid) { 2106 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 2107 __func__, ldcp->ldc_id, tag.vio_sid); 2108 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2109 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2110 return; 2111 } 2112 } 2113 2114 /* 2115 * Switch on vio_subtype envelope, then let lower routines 2116 * decide if its an INFO, ACK or NACK packet. 2117 */ 2118 switch (env) { 2119 case VIO_VER_INFO: 2120 vsw_process_ctrl_ver_pkt(ldcp, &ctaskp->pktp); 2121 break; 2122 case VIO_DRING_REG: 2123 vsw_process_ctrl_dring_reg_pkt(ldcp, &ctaskp->pktp); 2124 break; 2125 case VIO_DRING_UNREG: 2126 vsw_process_ctrl_dring_unreg_pkt(ldcp, &ctaskp->pktp); 2127 break; 2128 case VIO_ATTR_INFO: 2129 vsw_process_ctrl_attr_pkt(ldcp, &ctaskp->pktp); 2130 break; 2131 case VNET_MCAST_INFO: 2132 vsw_process_ctrl_mcst_pkt(ldcp, &ctaskp->pktp); 2133 break; 2134 case VIO_RDX: 2135 vsw_process_ctrl_rdx_pkt(ldcp, &ctaskp->pktp); 2136 break; 2137 default: 2138 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env); 2139 } 2140 2141 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2142 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2143 } 2144 2145 /* 2146 * Version negotiation. We can end up here either because our peer 2147 * has responded to a handshake message we have sent it, or our peer 2148 * has initiated a handshake with us. If its the former then can only 2149 * be ACK or NACK, if its the later can only be INFO. 2150 * 2151 * If its an ACK we move to the next stage of the handshake, namely 2152 * attribute exchange. If its a NACK we see if we can specify another 2153 * version, if we can't we stop. 2154 * 2155 * If it is an INFO we reset all params associated with communication 2156 * in that direction over this channel (remember connection is 2157 * essentially 2 independent simplex channels). 2158 */ 2159 void 2160 vsw_process_ctrl_ver_pkt(vsw_ldc_t *ldcp, void *pkt) 2161 { 2162 vio_ver_msg_t *ver_pkt; 2163 vsw_t *vswp = ldcp->ldc_vswp; 2164 2165 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2166 2167 /* 2168 * We know this is a ctrl/version packet so 2169 * cast it into the correct structure. 2170 */ 2171 ver_pkt = (vio_ver_msg_t *)pkt; 2172 2173 switch (ver_pkt->tag.vio_subtype) { 2174 case VIO_SUBTYPE_INFO: 2175 D2(vswp, "vsw_process_ctrl_ver_pkt: VIO_SUBTYPE_INFO\n"); 2176 2177 /* 2178 * Record the session id, which we will use from now 2179 * until we see another VER_INFO msg. Even then the 2180 * session id in most cases will be unchanged, execpt 2181 * if channel was reset. 2182 */ 2183 if ((ldcp->session_status & VSW_PEER_SESSION) && 2184 (ldcp->peer_session != ver_pkt->tag.vio_sid)) { 2185 DERR(vswp, "%s: updating session id for chan %lld " 2186 "from %llx to %llx", __func__, ldcp->ldc_id, 2187 ldcp->peer_session, ver_pkt->tag.vio_sid); 2188 } 2189 2190 ldcp->peer_session = ver_pkt->tag.vio_sid; 2191 ldcp->session_status |= VSW_PEER_SESSION; 2192 2193 /* Legal message at this time ? */ 2194 if (vsw_check_flag(ldcp, INBOUND, VSW_VER_INFO_RECV)) 2195 return; 2196 2197 /* 2198 * First check the device class. Currently only expect 2199 * to be talking to a network device. In the future may 2200 * also talk to another switch. 2201 */ 2202 if (ver_pkt->dev_class != VDEV_NETWORK) { 2203 DERR(vswp, "%s: illegal device class %d", __func__, 2204 ver_pkt->dev_class); 2205 2206 ver_pkt->tag.vio_sid = ldcp->local_session; 2207 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2208 2209 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2210 2211 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2212 sizeof (vio_ver_msg_t), B_TRUE); 2213 2214 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 2215 vsw_next_milestone(ldcp); 2216 return; 2217 } else { 2218 ldcp->dev_class = ver_pkt->dev_class; 2219 } 2220 2221 /* 2222 * Now check the version. 2223 */ 2224 if (vsw_supported_version(ver_pkt) == 0) { 2225 /* 2226 * Support this major version and possibly 2227 * adjusted minor version. 2228 */ 2229 2230 D2(vswp, "%s: accepted ver %d:%d", __func__, 2231 ver_pkt->ver_major, ver_pkt->ver_minor); 2232 2233 /* Store accepted values */ 2234 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2235 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2236 2237 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2238 2239 ldcp->lane_in.lstate |= VSW_VER_ACK_SENT; 2240 2241 if (vsw_obp_ver_proto_workaround == B_TRUE) { 2242 /* 2243 * Send a version info message 2244 * using the accepted version that 2245 * we are about to ack. Also note that 2246 * we send our ver info before we ack. 2247 * Otherwise, as soon as receiving the 2248 * ack, obp sends attr info msg, which 2249 * breaks vsw_check_flag() invoked 2250 * from vsw_process_ctrl_attr_pkt(); 2251 * as we also need VSW_VER_ACK_RECV to 2252 * be set in lane_out.lstate, before 2253 * we can receive attr info. 2254 */ 2255 vsw_send_ver(ldcp); 2256 } 2257 } else { 2258 /* 2259 * NACK back with the next lower major/minor 2260 * pairing we support (if don't suuport any more 2261 * versions then they will be set to zero. 2262 */ 2263 2264 D2(vswp, "%s: replying with ver %d:%d", __func__, 2265 ver_pkt->ver_major, ver_pkt->ver_minor); 2266 2267 /* Store updated values */ 2268 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2269 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2270 2271 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2272 2273 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 2274 } 2275 2276 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2277 ver_pkt->tag.vio_sid = ldcp->local_session; 2278 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2279 sizeof (vio_ver_msg_t), B_TRUE); 2280 2281 vsw_next_milestone(ldcp); 2282 break; 2283 2284 case VIO_SUBTYPE_ACK: 2285 D2(vswp, "%s: VIO_SUBTYPE_ACK\n", __func__); 2286 2287 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_ACK_RECV)) 2288 return; 2289 2290 /* Store updated values */ 2291 ldcp->lane_out.ver_major = ver_pkt->ver_major; 2292 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 2293 2294 ldcp->lane_out.lstate |= VSW_VER_ACK_RECV; 2295 vsw_next_milestone(ldcp); 2296 2297 break; 2298 2299 case VIO_SUBTYPE_NACK: 2300 D2(vswp, "%s: VIO_SUBTYPE_NACK\n", __func__); 2301 2302 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_NACK_RECV)) 2303 return; 2304 2305 /* 2306 * If our peer sent us a NACK with the ver fields set to 2307 * zero then there is nothing more we can do. Otherwise see 2308 * if we support either the version suggested, or a lesser 2309 * one. 2310 */ 2311 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 2312 DERR(vswp, "%s: peer unable to negotiate any " 2313 "further.", __func__); 2314 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 2315 vsw_next_milestone(ldcp); 2316 return; 2317 } 2318 2319 /* 2320 * Check to see if we support this major version or 2321 * a lower one. If we don't then maj/min will be set 2322 * to zero. 2323 */ 2324 (void) vsw_supported_version(ver_pkt); 2325 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 2326 /* Nothing more we can do */ 2327 DERR(vswp, "%s: version negotiation failed.\n", 2328 __func__); 2329 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 2330 vsw_next_milestone(ldcp); 2331 } else { 2332 /* found a supported major version */ 2333 ldcp->lane_out.ver_major = ver_pkt->ver_major; 2334 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 2335 2336 D2(vswp, "%s: resending with updated values (%x, %x)", 2337 __func__, ver_pkt->ver_major, ver_pkt->ver_minor); 2338 2339 ldcp->lane_out.lstate |= VSW_VER_INFO_SENT; 2340 ver_pkt->tag.vio_sid = ldcp->local_session; 2341 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 2342 2343 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2344 2345 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2346 sizeof (vio_ver_msg_t), B_TRUE); 2347 2348 vsw_next_milestone(ldcp); 2349 2350 } 2351 break; 2352 2353 default: 2354 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2355 ver_pkt->tag.vio_subtype); 2356 } 2357 2358 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 2359 } 2360 2361 /* 2362 * Process an attribute packet. We can end up here either because our peer 2363 * has ACK/NACK'ed back to an earlier ATTR msg we had sent it, or our 2364 * peer has sent us an attribute INFO message 2365 * 2366 * If its an ACK we then move to the next stage of the handshake which 2367 * is to send our descriptor ring info to our peer. If its a NACK then 2368 * there is nothing more we can (currently) do. 2369 * 2370 * If we get a valid/acceptable INFO packet (and we have already negotiated 2371 * a version) we ACK back and set channel state to ATTR_RECV, otherwise we 2372 * NACK back and reset channel state to INACTIV. 2373 * 2374 * FUTURE: in time we will probably negotiate over attributes, but for 2375 * the moment unacceptable attributes are regarded as a fatal error. 2376 * 2377 */ 2378 void 2379 vsw_process_ctrl_attr_pkt(vsw_ldc_t *ldcp, void *pkt) 2380 { 2381 vnet_attr_msg_t *attr_pkt; 2382 vsw_t *vswp = ldcp->ldc_vswp; 2383 vsw_port_t *port = ldcp->ldc_port; 2384 uint64_t macaddr = 0; 2385 int i; 2386 2387 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 2388 2389 /* 2390 * We know this is a ctrl/attr packet so 2391 * cast it into the correct structure. 2392 */ 2393 attr_pkt = (vnet_attr_msg_t *)pkt; 2394 2395 switch (attr_pkt->tag.vio_subtype) { 2396 case VIO_SUBTYPE_INFO: 2397 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2398 2399 if (vsw_check_flag(ldcp, INBOUND, VSW_ATTR_INFO_RECV)) 2400 return; 2401 2402 /* 2403 * If the attributes are unacceptable then we NACK back. 2404 */ 2405 if (vsw_check_attr(attr_pkt, ldcp)) { 2406 2407 DERR(vswp, "%s (chan %d): invalid attributes", 2408 __func__, ldcp->ldc_id); 2409 2410 vsw_free_lane_resources(ldcp, INBOUND); 2411 2412 attr_pkt->tag.vio_sid = ldcp->local_session; 2413 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2414 2415 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 2416 ldcp->lane_in.lstate |= VSW_ATTR_NACK_SENT; 2417 (void) vsw_send_msg(ldcp, (void *)attr_pkt, 2418 sizeof (vnet_attr_msg_t), B_TRUE); 2419 2420 vsw_next_milestone(ldcp); 2421 return; 2422 } 2423 2424 /* 2425 * Otherwise store attributes for this lane and update 2426 * lane state. 2427 */ 2428 ldcp->lane_in.mtu = attr_pkt->mtu; 2429 ldcp->lane_in.addr = attr_pkt->addr; 2430 ldcp->lane_in.addr_type = attr_pkt->addr_type; 2431 ldcp->lane_in.xfer_mode = attr_pkt->xfer_mode; 2432 ldcp->lane_in.ack_freq = attr_pkt->ack_freq; 2433 2434 macaddr = ldcp->lane_in.addr; 2435 for (i = ETHERADDRL - 1; i >= 0; i--) { 2436 port->p_macaddr.ether_addr_octet[i] = macaddr & 0xFF; 2437 macaddr >>= 8; 2438 } 2439 2440 /* create the fdb entry for this port/mac address */ 2441 (void) vsw_add_fdb(vswp, port); 2442 2443 /* setup device specifc xmit routines */ 2444 mutex_enter(&port->tx_lock); 2445 if ((VSW_VER_EQ(ldcp, 1, 2) && 2446 (ldcp->lane_in.xfer_mode & VIO_DRING_MODE_V1_2)) || 2447 (VSW_VER_LT(ldcp, 1, 2) && 2448 (ldcp->lane_in.xfer_mode == VIO_DRING_MODE_V1_0))) { 2449 D2(vswp, "%s: mode = VIO_DRING_MODE", __func__); 2450 port->transmit = vsw_dringsend; 2451 } else if (ldcp->lane_in.xfer_mode == VIO_DESC_MODE) { 2452 D2(vswp, "%s: mode = VIO_DESC_MODE", __func__); 2453 vsw_create_privring(ldcp); 2454 port->transmit = vsw_descrsend; 2455 ldcp->lane_out.xfer_mode = VIO_DESC_MODE; 2456 } 2457 mutex_exit(&port->tx_lock); 2458 2459 attr_pkt->tag.vio_sid = ldcp->local_session; 2460 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2461 2462 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 2463 2464 ldcp->lane_in.lstate |= VSW_ATTR_ACK_SENT; 2465 2466 (void) vsw_send_msg(ldcp, (void *)attr_pkt, 2467 sizeof (vnet_attr_msg_t), B_TRUE); 2468 2469 vsw_next_milestone(ldcp); 2470 break; 2471 2472 case VIO_SUBTYPE_ACK: 2473 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2474 2475 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_ACK_RECV)) 2476 return; 2477 2478 ldcp->lane_out.lstate |= VSW_ATTR_ACK_RECV; 2479 vsw_next_milestone(ldcp); 2480 break; 2481 2482 case VIO_SUBTYPE_NACK: 2483 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2484 2485 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_NACK_RECV)) 2486 return; 2487 2488 ldcp->lane_out.lstate |= VSW_ATTR_NACK_RECV; 2489 vsw_next_milestone(ldcp); 2490 break; 2491 2492 default: 2493 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2494 attr_pkt->tag.vio_subtype); 2495 } 2496 2497 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 2498 } 2499 2500 /* 2501 * Process a dring info packet. We can end up here either because our peer 2502 * has ACK/NACK'ed back to an earlier DRING msg we had sent it, or our 2503 * peer has sent us a dring INFO message. 2504 * 2505 * If we get a valid/acceptable INFO packet (and we have already negotiated 2506 * a version) we ACK back and update the lane state, otherwise we NACK back. 2507 * 2508 * FUTURE: nothing to stop client from sending us info on multiple dring's 2509 * but for the moment we will just use the first one we are given. 2510 * 2511 */ 2512 void 2513 vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *ldcp, void *pkt) 2514 { 2515 vio_dring_reg_msg_t *dring_pkt; 2516 vsw_t *vswp = ldcp->ldc_vswp; 2517 ldc_mem_info_t minfo; 2518 dring_info_t *dp, *dbp; 2519 int dring_found = 0; 2520 2521 /* 2522 * We know this is a ctrl/dring packet so 2523 * cast it into the correct structure. 2524 */ 2525 dring_pkt = (vio_dring_reg_msg_t *)pkt; 2526 2527 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 2528 2529 switch (dring_pkt->tag.vio_subtype) { 2530 case VIO_SUBTYPE_INFO: 2531 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2532 2533 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 2534 return; 2535 2536 /* 2537 * If the dring params are unacceptable then we NACK back. 2538 */ 2539 if (vsw_check_dring_info(dring_pkt)) { 2540 2541 DERR(vswp, "%s (%lld): invalid dring info", 2542 __func__, ldcp->ldc_id); 2543 2544 vsw_free_lane_resources(ldcp, INBOUND); 2545 2546 dring_pkt->tag.vio_sid = ldcp->local_session; 2547 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2548 2549 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2550 2551 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2552 2553 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2554 sizeof (vio_dring_reg_msg_t), B_TRUE); 2555 2556 vsw_next_milestone(ldcp); 2557 return; 2558 } 2559 2560 /* 2561 * Otherwise, attempt to map in the dring using the 2562 * cookie. If that succeeds we send back a unique dring 2563 * identifier that the sending side will use in future 2564 * to refer to this descriptor ring. 2565 */ 2566 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 2567 2568 dp->num_descriptors = dring_pkt->num_descriptors; 2569 dp->descriptor_size = dring_pkt->descriptor_size; 2570 dp->options = dring_pkt->options; 2571 dp->ncookies = dring_pkt->ncookies; 2572 2573 /* 2574 * Note: should only get one cookie. Enforced in 2575 * the ldc layer. 2576 */ 2577 bcopy(&dring_pkt->cookie[0], &dp->cookie[0], 2578 sizeof (ldc_mem_cookie_t)); 2579 2580 D2(vswp, "%s: num_desc %ld : desc_size %ld", __func__, 2581 dp->num_descriptors, dp->descriptor_size); 2582 D2(vswp, "%s: options 0x%lx: ncookies %ld", __func__, 2583 dp->options, dp->ncookies); 2584 2585 if ((ldc_mem_dring_map(ldcp->ldc_handle, &dp->cookie[0], 2586 dp->ncookies, dp->num_descriptors, dp->descriptor_size, 2587 LDC_SHADOW_MAP, &(dp->handle))) != 0) { 2588 2589 DERR(vswp, "%s: dring_map failed\n", __func__); 2590 2591 kmem_free(dp, sizeof (dring_info_t)); 2592 vsw_free_lane_resources(ldcp, INBOUND); 2593 2594 dring_pkt->tag.vio_sid = ldcp->local_session; 2595 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2596 2597 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2598 2599 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2600 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2601 sizeof (vio_dring_reg_msg_t), B_TRUE); 2602 2603 vsw_next_milestone(ldcp); 2604 return; 2605 } 2606 2607 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 2608 2609 DERR(vswp, "%s: dring_addr failed\n", __func__); 2610 2611 kmem_free(dp, sizeof (dring_info_t)); 2612 vsw_free_lane_resources(ldcp, INBOUND); 2613 2614 dring_pkt->tag.vio_sid = ldcp->local_session; 2615 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2616 2617 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2618 2619 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2620 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2621 sizeof (vio_dring_reg_msg_t), B_TRUE); 2622 2623 vsw_next_milestone(ldcp); 2624 return; 2625 } else { 2626 /* store the address of the pub part of ring */ 2627 dp->pub_addr = minfo.vaddr; 2628 } 2629 2630 /* no private section as we are importing */ 2631 dp->priv_addr = NULL; 2632 2633 /* 2634 * Using simple mono increasing int for ident at 2635 * the moment. 2636 */ 2637 dp->ident = ldcp->next_ident; 2638 ldcp->next_ident++; 2639 2640 dp->end_idx = 0; 2641 dp->next = NULL; 2642 2643 /* 2644 * Link it onto the end of the list of drings 2645 * for this lane. 2646 */ 2647 if (ldcp->lane_in.dringp == NULL) { 2648 D2(vswp, "%s: adding first INBOUND dring", __func__); 2649 ldcp->lane_in.dringp = dp; 2650 } else { 2651 dbp = ldcp->lane_in.dringp; 2652 2653 while (dbp->next != NULL) 2654 dbp = dbp->next; 2655 2656 dbp->next = dp; 2657 } 2658 2659 /* acknowledge it */ 2660 dring_pkt->tag.vio_sid = ldcp->local_session; 2661 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2662 dring_pkt->dring_ident = dp->ident; 2663 2664 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2665 sizeof (vio_dring_reg_msg_t), B_TRUE); 2666 2667 ldcp->lane_in.lstate |= VSW_DRING_ACK_SENT; 2668 vsw_next_milestone(ldcp); 2669 break; 2670 2671 case VIO_SUBTYPE_ACK: 2672 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2673 2674 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_ACK_RECV)) 2675 return; 2676 2677 /* 2678 * Peer is acknowledging our dring info and will have 2679 * sent us a dring identifier which we will use to 2680 * refer to this ring w.r.t. our peer. 2681 */ 2682 dp = ldcp->lane_out.dringp; 2683 if (dp != NULL) { 2684 /* 2685 * Find the ring this ident should be associated 2686 * with. 2687 */ 2688 if (vsw_dring_match(dp, dring_pkt)) { 2689 dring_found = 1; 2690 2691 } else while (dp != NULL) { 2692 if (vsw_dring_match(dp, dring_pkt)) { 2693 dring_found = 1; 2694 break; 2695 } 2696 dp = dp->next; 2697 } 2698 2699 if (dring_found == 0) { 2700 DERR(NULL, "%s: unrecognised ring cookie", 2701 __func__); 2702 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2703 return; 2704 } 2705 2706 } else { 2707 DERR(vswp, "%s: DRING ACK received but no drings " 2708 "allocated", __func__); 2709 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2710 return; 2711 } 2712 2713 /* store ident */ 2714 dp->ident = dring_pkt->dring_ident; 2715 ldcp->lane_out.lstate |= VSW_DRING_ACK_RECV; 2716 vsw_next_milestone(ldcp); 2717 break; 2718 2719 case VIO_SUBTYPE_NACK: 2720 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2721 2722 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_NACK_RECV)) 2723 return; 2724 2725 ldcp->lane_out.lstate |= VSW_DRING_NACK_RECV; 2726 vsw_next_milestone(ldcp); 2727 break; 2728 2729 default: 2730 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 2731 dring_pkt->tag.vio_subtype); 2732 } 2733 2734 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 2735 } 2736 2737 /* 2738 * Process a request from peer to unregister a dring. 2739 * 2740 * For the moment we just restart the handshake if our 2741 * peer endpoint attempts to unregister a dring. 2742 */ 2743 void 2744 vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *ldcp, void *pkt) 2745 { 2746 vsw_t *vswp = ldcp->ldc_vswp; 2747 vio_dring_unreg_msg_t *dring_pkt; 2748 2749 /* 2750 * We know this is a ctrl/dring packet so 2751 * cast it into the correct structure. 2752 */ 2753 dring_pkt = (vio_dring_unreg_msg_t *)pkt; 2754 2755 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2756 2757 switch (dring_pkt->tag.vio_subtype) { 2758 case VIO_SUBTYPE_INFO: 2759 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2760 2761 DWARN(vswp, "%s: restarting handshake..", __func__); 2762 break; 2763 2764 case VIO_SUBTYPE_ACK: 2765 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2766 2767 DWARN(vswp, "%s: restarting handshake..", __func__); 2768 break; 2769 2770 case VIO_SUBTYPE_NACK: 2771 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2772 2773 DWARN(vswp, "%s: restarting handshake..", __func__); 2774 break; 2775 2776 default: 2777 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 2778 dring_pkt->tag.vio_subtype); 2779 } 2780 2781 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2782 2783 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2784 } 2785 2786 #define SND_MCST_NACK(ldcp, pkt) \ 2787 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 2788 pkt->tag.vio_sid = ldcp->local_session; \ 2789 (void) vsw_send_msg(ldcp, (void *)pkt, \ 2790 sizeof (vnet_mcast_msg_t), B_TRUE); 2791 2792 /* 2793 * Process a multicast request from a vnet. 2794 * 2795 * Vnet's specify a multicast address that they are interested in. This 2796 * address is used as a key into the hash table which forms the multicast 2797 * forwarding database (mFDB). 2798 * 2799 * The table keys are the multicast addresses, while the table entries 2800 * are pointers to lists of ports which wish to receive packets for the 2801 * specified multicast address. 2802 * 2803 * When a multicast packet is being switched we use the address as a key 2804 * into the hash table, and then walk the appropriate port list forwarding 2805 * the pkt to each port in turn. 2806 * 2807 * If a vnet is no longer interested in a particular multicast grouping 2808 * we simply find the correct location in the hash table and then delete 2809 * the relevant port from the port list. 2810 * 2811 * To deal with the case whereby a port is being deleted without first 2812 * removing itself from the lists in the hash table, we maintain a list 2813 * of multicast addresses the port has registered an interest in, within 2814 * the port structure itself. We then simply walk that list of addresses 2815 * using them as keys into the hash table and remove the port from the 2816 * appropriate lists. 2817 */ 2818 static void 2819 vsw_process_ctrl_mcst_pkt(vsw_ldc_t *ldcp, void *pkt) 2820 { 2821 vnet_mcast_msg_t *mcst_pkt; 2822 vsw_port_t *port = ldcp->ldc_port; 2823 vsw_t *vswp = ldcp->ldc_vswp; 2824 int i; 2825 2826 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2827 2828 /* 2829 * We know this is a ctrl/mcast packet so 2830 * cast it into the correct structure. 2831 */ 2832 mcst_pkt = (vnet_mcast_msg_t *)pkt; 2833 2834 switch (mcst_pkt->tag.vio_subtype) { 2835 case VIO_SUBTYPE_INFO: 2836 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2837 2838 /* 2839 * Check if in correct state to receive a multicast 2840 * message (i.e. handshake complete). If not reset 2841 * the handshake. 2842 */ 2843 if (vsw_check_flag(ldcp, INBOUND, VSW_MCST_INFO_RECV)) 2844 return; 2845 2846 /* 2847 * Before attempting to add or remove address check 2848 * that they are valid multicast addresses. 2849 * If not, then NACK back. 2850 */ 2851 for (i = 0; i < mcst_pkt->count; i++) { 2852 if ((mcst_pkt->mca[i].ether_addr_octet[0] & 01) != 1) { 2853 DERR(vswp, "%s: invalid multicast address", 2854 __func__); 2855 SND_MCST_NACK(ldcp, mcst_pkt); 2856 return; 2857 } 2858 } 2859 2860 /* 2861 * Now add/remove the addresses. If this fails we 2862 * NACK back. 2863 */ 2864 if (vsw_add_rem_mcst(mcst_pkt, port) != 0) { 2865 SND_MCST_NACK(ldcp, mcst_pkt); 2866 return; 2867 } 2868 2869 mcst_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2870 mcst_pkt->tag.vio_sid = ldcp->local_session; 2871 2872 DUMP_TAG_PTR((vio_msg_tag_t *)mcst_pkt); 2873 2874 (void) vsw_send_msg(ldcp, (void *)mcst_pkt, 2875 sizeof (vnet_mcast_msg_t), B_TRUE); 2876 break; 2877 2878 case VIO_SUBTYPE_ACK: 2879 DWARN(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2880 2881 /* 2882 * We shouldn't ever get a multicast ACK message as 2883 * at the moment we never request multicast addresses 2884 * to be set on some other device. This may change in 2885 * the future if we have cascading switches. 2886 */ 2887 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_ACK_RECV)) 2888 return; 2889 2890 /* Do nothing */ 2891 break; 2892 2893 case VIO_SUBTYPE_NACK: 2894 DWARN(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2895 2896 /* 2897 * We shouldn't get a multicast NACK packet for the 2898 * same reasons as we shouldn't get a ACK packet. 2899 */ 2900 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_NACK_RECV)) 2901 return; 2902 2903 /* Do nothing */ 2904 break; 2905 2906 default: 2907 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2908 mcst_pkt->tag.vio_subtype); 2909 } 2910 2911 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2912 } 2913 2914 static void 2915 vsw_process_ctrl_rdx_pkt(vsw_ldc_t *ldcp, void *pkt) 2916 { 2917 vio_rdx_msg_t *rdx_pkt; 2918 vsw_t *vswp = ldcp->ldc_vswp; 2919 2920 /* 2921 * We know this is a ctrl/rdx packet so 2922 * cast it into the correct structure. 2923 */ 2924 rdx_pkt = (vio_rdx_msg_t *)pkt; 2925 2926 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 2927 2928 switch (rdx_pkt->tag.vio_subtype) { 2929 case VIO_SUBTYPE_INFO: 2930 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2931 2932 if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_INFO_RECV)) 2933 return; 2934 2935 rdx_pkt->tag.vio_sid = ldcp->local_session; 2936 rdx_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2937 2938 DUMP_TAG_PTR((vio_msg_tag_t *)rdx_pkt); 2939 2940 ldcp->lane_out.lstate |= VSW_RDX_ACK_SENT; 2941 2942 (void) vsw_send_msg(ldcp, (void *)rdx_pkt, 2943 sizeof (vio_rdx_msg_t), B_TRUE); 2944 2945 vsw_next_milestone(ldcp); 2946 break; 2947 2948 case VIO_SUBTYPE_ACK: 2949 /* 2950 * Should be handled in-band by callback handler. 2951 */ 2952 DERR(vswp, "%s: Unexpected VIO_SUBTYPE_ACK", __func__); 2953 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2954 break; 2955 2956 case VIO_SUBTYPE_NACK: 2957 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2958 2959 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_NACK_RECV)) 2960 return; 2961 2962 ldcp->lane_in.lstate |= VSW_RDX_NACK_RECV; 2963 vsw_next_milestone(ldcp); 2964 break; 2965 2966 default: 2967 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 2968 rdx_pkt->tag.vio_subtype); 2969 } 2970 2971 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2972 } 2973 2974 static void 2975 vsw_process_data_pkt(vsw_ldc_t *ldcp, void *dpkt, vio_msg_tag_t *tagp, 2976 uint32_t msglen) 2977 { 2978 uint16_t env = tagp->vio_subtype_env; 2979 vsw_t *vswp = ldcp->ldc_vswp; 2980 2981 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2982 2983 /* session id check */ 2984 if (ldcp->session_status & VSW_PEER_SESSION) { 2985 if (ldcp->peer_session != tagp->vio_sid) { 2986 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 2987 __func__, ldcp->ldc_id, tagp->vio_sid); 2988 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2989 return; 2990 } 2991 } 2992 2993 /* 2994 * It is an error for us to be getting data packets 2995 * before the handshake has completed. 2996 */ 2997 if (ldcp->hphase != VSW_MILESTONE4) { 2998 DERR(vswp, "%s: got data packet before handshake complete " 2999 "hphase %d (%x: %x)", __func__, ldcp->hphase, 3000 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 3001 DUMP_FLAGS(ldcp->lane_in.lstate); 3002 DUMP_FLAGS(ldcp->lane_out.lstate); 3003 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3004 return; 3005 } 3006 3007 /* 3008 * To reduce the locking contention, release the 3009 * ldc_cblock here and re-acquire it once we are done 3010 * receiving packets. 3011 */ 3012 mutex_exit(&ldcp->ldc_cblock); 3013 mutex_enter(&ldcp->ldc_rxlock); 3014 3015 /* 3016 * Switch on vio_subtype envelope, then let lower routines 3017 * decide if its an INFO, ACK or NACK packet. 3018 */ 3019 if (env == VIO_DRING_DATA) { 3020 vsw_process_data_dring_pkt(ldcp, dpkt); 3021 } else if (env == VIO_PKT_DATA) { 3022 ldcp->rx_pktdata(ldcp, dpkt, msglen); 3023 } else if (env == VIO_DESC_DATA) { 3024 vsw_process_data_ibnd_pkt(ldcp, dpkt); 3025 } else { 3026 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env); 3027 } 3028 3029 mutex_exit(&ldcp->ldc_rxlock); 3030 mutex_enter(&ldcp->ldc_cblock); 3031 3032 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3033 } 3034 3035 #define SND_DRING_NACK(ldcp, pkt) \ 3036 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 3037 pkt->tag.vio_sid = ldcp->local_session; \ 3038 (void) vsw_send_msg(ldcp, (void *)pkt, \ 3039 sizeof (vio_dring_msg_t), B_TRUE); 3040 3041 static void 3042 vsw_process_data_dring_pkt(vsw_ldc_t *ldcp, void *dpkt) 3043 { 3044 vio_dring_msg_t *dring_pkt; 3045 vnet_public_desc_t *pub_addr = NULL; 3046 vsw_private_desc_t *priv_addr = NULL; 3047 dring_info_t *dp = NULL; 3048 vsw_t *vswp = ldcp->ldc_vswp; 3049 mblk_t *mp = NULL; 3050 mblk_t *bp = NULL; 3051 mblk_t *bpt = NULL; 3052 size_t nbytes = 0; 3053 uint64_t ncookies = 0; 3054 uint64_t chain = 0; 3055 uint64_t len; 3056 uint32_t pos, start, datalen; 3057 uint32_t range_start, range_end; 3058 int32_t end, num, cnt = 0; 3059 int i, rv, msg_rv = 0; 3060 boolean_t ack_needed = B_FALSE; 3061 boolean_t prev_desc_ack = B_FALSE; 3062 int read_attempts = 0; 3063 struct ether_header *ehp; 3064 3065 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3066 3067 /* 3068 * We know this is a data/dring packet so 3069 * cast it into the correct structure. 3070 */ 3071 dring_pkt = (vio_dring_msg_t *)dpkt; 3072 3073 /* 3074 * Switch on the vio_subtype. If its INFO then we need to 3075 * process the data. If its an ACK we need to make sure 3076 * it makes sense (i.e did we send an earlier data/info), 3077 * and if its a NACK then we maybe attempt a retry. 3078 */ 3079 switch (dring_pkt->tag.vio_subtype) { 3080 case VIO_SUBTYPE_INFO: 3081 D2(vswp, "%s(%lld): VIO_SUBTYPE_INFO", __func__, ldcp->ldc_id); 3082 3083 READ_ENTER(&ldcp->lane_in.dlistrw); 3084 if ((dp = vsw_ident2dring(&ldcp->lane_in, 3085 dring_pkt->dring_ident)) == NULL) { 3086 RW_EXIT(&ldcp->lane_in.dlistrw); 3087 3088 DERR(vswp, "%s(%lld): unable to find dring from " 3089 "ident 0x%llx", __func__, ldcp->ldc_id, 3090 dring_pkt->dring_ident); 3091 3092 SND_DRING_NACK(ldcp, dring_pkt); 3093 return; 3094 } 3095 3096 start = pos = dring_pkt->start_idx; 3097 end = dring_pkt->end_idx; 3098 len = dp->num_descriptors; 3099 3100 range_start = range_end = pos; 3101 3102 D2(vswp, "%s(%lld): start index %ld : end %ld\n", 3103 __func__, ldcp->ldc_id, start, end); 3104 3105 if (end == -1) { 3106 num = -1; 3107 } else if (end >= 0) { 3108 num = end >= pos ? end - pos + 1: (len - pos + 1) + end; 3109 3110 /* basic sanity check */ 3111 if (end > len) { 3112 RW_EXIT(&ldcp->lane_in.dlistrw); 3113 DERR(vswp, "%s(%lld): endpoint %lld outside " 3114 "ring length %lld", __func__, 3115 ldcp->ldc_id, end, len); 3116 3117 SND_DRING_NACK(ldcp, dring_pkt); 3118 return; 3119 } 3120 } else { 3121 RW_EXIT(&ldcp->lane_in.dlistrw); 3122 DERR(vswp, "%s(%lld): invalid endpoint %lld", 3123 __func__, ldcp->ldc_id, end); 3124 SND_DRING_NACK(ldcp, dring_pkt); 3125 return; 3126 } 3127 3128 while (cnt != num) { 3129 vsw_recheck_desc: 3130 if ((rv = ldc_mem_dring_acquire(dp->handle, 3131 pos, pos)) != 0) { 3132 RW_EXIT(&ldcp->lane_in.dlistrw); 3133 DERR(vswp, "%s(%lld): unable to acquire " 3134 "descriptor at pos %d: err %d", 3135 __func__, pos, ldcp->ldc_id, rv); 3136 SND_DRING_NACK(ldcp, dring_pkt); 3137 ldcp->ldc_stats.ierrors++; 3138 return; 3139 } 3140 3141 pub_addr = (vnet_public_desc_t *)dp->pub_addr + pos; 3142 3143 /* 3144 * When given a bounded range of descriptors 3145 * to process, its an error to hit a descriptor 3146 * which is not ready. In the non-bounded case 3147 * (end_idx == -1) this simply indicates we have 3148 * reached the end of the current active range. 3149 */ 3150 if (pub_addr->hdr.dstate != VIO_DESC_READY) { 3151 /* unbound - no error */ 3152 if (end == -1) { 3153 if (read_attempts == vsw_read_attempts) 3154 break; 3155 3156 delay(drv_usectohz(vsw_desc_delay)); 3157 read_attempts++; 3158 goto vsw_recheck_desc; 3159 } 3160 3161 /* bounded - error - so NACK back */ 3162 RW_EXIT(&ldcp->lane_in.dlistrw); 3163 DERR(vswp, "%s(%lld): descriptor not READY " 3164 "(%d)", __func__, ldcp->ldc_id, 3165 pub_addr->hdr.dstate); 3166 SND_DRING_NACK(ldcp, dring_pkt); 3167 return; 3168 } 3169 3170 DTRACE_PROBE1(read_attempts, int, read_attempts); 3171 3172 range_end = pos; 3173 3174 /* 3175 * If we ACK'd the previous descriptor then now 3176 * record the new range start position for later 3177 * ACK's. 3178 */ 3179 if (prev_desc_ack) { 3180 range_start = pos; 3181 3182 D2(vswp, "%s(%lld): updating range start to be " 3183 "%d", __func__, ldcp->ldc_id, range_start); 3184 3185 prev_desc_ack = B_FALSE; 3186 } 3187 3188 /* 3189 * Data is padded to align on 8 byte boundary, 3190 * datalen is actual data length, i.e. minus that 3191 * padding. 3192 */ 3193 datalen = pub_addr->nbytes; 3194 3195 /* 3196 * Does peer wish us to ACK when we have finished 3197 * with this descriptor ? 3198 */ 3199 if (pub_addr->hdr.ack) 3200 ack_needed = B_TRUE; 3201 3202 D2(vswp, "%s(%lld): processing desc %lld at pos" 3203 " 0x%llx : dstate 0x%lx : datalen 0x%lx", 3204 __func__, ldcp->ldc_id, pos, pub_addr, 3205 pub_addr->hdr.dstate, datalen); 3206 3207 /* 3208 * Mark that we are starting to process descriptor. 3209 */ 3210 pub_addr->hdr.dstate = VIO_DESC_ACCEPTED; 3211 3212 /* 3213 * Ensure that we ask ldc for an aligned 3214 * number of bytes. 3215 */ 3216 nbytes = (datalen + VNET_IPALIGN + 7) & ~7; 3217 3218 mp = vio_multipool_allocb(&ldcp->vmp, nbytes); 3219 if (mp == NULL) { 3220 ldcp->ldc_stats.rx_vio_allocb_fail++; 3221 /* 3222 * No free receive buffers available, so 3223 * fallback onto allocb(9F). Make sure that 3224 * we get a data buffer which is a multiple 3225 * of 8 as this is required by ldc_mem_copy. 3226 */ 3227 DTRACE_PROBE(allocb); 3228 if ((mp = allocb(datalen + VNET_IPALIGN + 8, 3229 BPRI_MED)) == NULL) { 3230 DERR(vswp, "%s(%ld): allocb failed", 3231 __func__, ldcp->ldc_id); 3232 pub_addr->hdr.dstate = VIO_DESC_DONE; 3233 (void) ldc_mem_dring_release(dp->handle, 3234 pos, pos); 3235 ldcp->ldc_stats.ierrors++; 3236 ldcp->ldc_stats.rx_allocb_fail++; 3237 break; 3238 } 3239 } 3240 3241 ncookies = pub_addr->ncookies; 3242 rv = ldc_mem_copy(ldcp->ldc_handle, 3243 (caddr_t)mp->b_rptr, 0, &nbytes, 3244 pub_addr->memcookie, ncookies, LDC_COPY_IN); 3245 3246 if (rv != 0) { 3247 DERR(vswp, "%s(%d): unable to copy in data " 3248 "from %d cookies in desc %d (rv %d)", 3249 __func__, ldcp->ldc_id, ncookies, pos, rv); 3250 freemsg(mp); 3251 3252 pub_addr->hdr.dstate = VIO_DESC_DONE; 3253 (void) ldc_mem_dring_release(dp->handle, 3254 pos, pos); 3255 ldcp->ldc_stats.ierrors++; 3256 break; 3257 } else { 3258 D2(vswp, "%s(%d): copied in %ld bytes" 3259 " using %d cookies", __func__, 3260 ldcp->ldc_id, nbytes, ncookies); 3261 } 3262 3263 /* adjust the read pointer to skip over the padding */ 3264 mp->b_rptr += VNET_IPALIGN; 3265 3266 /* point to the actual end of data */ 3267 mp->b_wptr = mp->b_rptr + datalen; 3268 3269 /* update statistics */ 3270 ehp = (struct ether_header *)mp->b_rptr; 3271 if (IS_BROADCAST(ehp)) 3272 ldcp->ldc_stats.brdcstrcv++; 3273 else if (IS_MULTICAST(ehp)) 3274 ldcp->ldc_stats.multircv++; 3275 3276 ldcp->ldc_stats.ipackets++; 3277 ldcp->ldc_stats.rbytes += datalen; 3278 3279 /* build a chain of received packets */ 3280 if (bp == NULL) { 3281 /* first pkt */ 3282 bp = mp; 3283 bp->b_next = bp->b_prev = NULL; 3284 bpt = bp; 3285 chain = 1; 3286 } else { 3287 mp->b_next = mp->b_prev = NULL; 3288 bpt->b_next = mp; 3289 bpt = mp; 3290 chain++; 3291 } 3292 3293 /* mark we are finished with this descriptor */ 3294 pub_addr->hdr.dstate = VIO_DESC_DONE; 3295 3296 (void) ldc_mem_dring_release(dp->handle, pos, pos); 3297 3298 /* 3299 * Send an ACK back to peer if requested. 3300 */ 3301 if (ack_needed) { 3302 ack_needed = B_FALSE; 3303 3304 dring_pkt->start_idx = range_start; 3305 dring_pkt->end_idx = range_end; 3306 3307 DERR(vswp, "%s(%lld): processed %d %d, ACK" 3308 " requested", __func__, ldcp->ldc_id, 3309 dring_pkt->start_idx, dring_pkt->end_idx); 3310 3311 dring_pkt->dring_process_state = VIO_DP_ACTIVE; 3312 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3313 dring_pkt->tag.vio_sid = ldcp->local_session; 3314 3315 msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt, 3316 sizeof (vio_dring_msg_t), B_FALSE); 3317 3318 /* 3319 * Check if ACK was successfully sent. If not 3320 * we break and deal with that below. 3321 */ 3322 if (msg_rv != 0) 3323 break; 3324 3325 prev_desc_ack = B_TRUE; 3326 range_start = pos; 3327 } 3328 3329 /* next descriptor */ 3330 pos = (pos + 1) % len; 3331 cnt++; 3332 3333 /* 3334 * Break out of loop here and stop processing to 3335 * allow some other network device (or disk) to 3336 * get access to the cpu. 3337 */ 3338 if (chain > vsw_chain_len) { 3339 D3(vswp, "%s(%lld): switching chain of %d " 3340 "msgs", __func__, ldcp->ldc_id, chain); 3341 break; 3342 } 3343 } 3344 RW_EXIT(&ldcp->lane_in.dlistrw); 3345 3346 /* 3347 * If when we attempted to send the ACK we found that the 3348 * channel had been reset then now handle this. We deal with 3349 * it here as we cannot reset the channel while holding the 3350 * dlistrw lock, and we don't want to acquire/release it 3351 * continuously in the above loop, as a channel reset should 3352 * be a rare event. 3353 */ 3354 if (msg_rv == ECONNRESET) { 3355 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 3356 break; 3357 } 3358 3359 /* send the chain of packets to be switched */ 3360 if (bp != NULL) { 3361 DTRACE_PROBE1(vsw_rcv_msgs, int, chain); 3362 D3(vswp, "%s(%lld): switching chain of %d msgs", 3363 __func__, ldcp->ldc_id, chain); 3364 vswp->vsw_switch_frame(vswp, bp, VSW_VNETPORT, 3365 ldcp->ldc_port, NULL); 3366 } 3367 3368 DTRACE_PROBE1(msg_cnt, int, cnt); 3369 3370 /* 3371 * We are now finished so ACK back with the state 3372 * set to STOPPING so our peer knows we are finished 3373 */ 3374 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3375 dring_pkt->tag.vio_sid = ldcp->local_session; 3376 3377 dring_pkt->dring_process_state = VIO_DP_STOPPED; 3378 3379 DTRACE_PROBE(stop_process_sent); 3380 3381 /* 3382 * We have not processed any more descriptors beyond 3383 * the last one we ACK'd. 3384 */ 3385 if (prev_desc_ack) 3386 range_start = range_end; 3387 3388 dring_pkt->start_idx = range_start; 3389 dring_pkt->end_idx = range_end; 3390 3391 D2(vswp, "%s(%lld) processed : %d : %d, now stopping", 3392 __func__, ldcp->ldc_id, dring_pkt->start_idx, 3393 dring_pkt->end_idx); 3394 3395 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 3396 sizeof (vio_dring_msg_t), B_TRUE); 3397 break; 3398 3399 case VIO_SUBTYPE_ACK: 3400 D2(vswp, "%s(%lld): VIO_SUBTYPE_ACK", __func__, ldcp->ldc_id); 3401 /* 3402 * Verify that the relevant descriptors are all 3403 * marked as DONE 3404 */ 3405 READ_ENTER(&ldcp->lane_out.dlistrw); 3406 if ((dp = vsw_ident2dring(&ldcp->lane_out, 3407 dring_pkt->dring_ident)) == NULL) { 3408 RW_EXIT(&ldcp->lane_out.dlistrw); 3409 DERR(vswp, "%s: unknown ident in ACK", __func__); 3410 return; 3411 } 3412 3413 start = end = 0; 3414 start = dring_pkt->start_idx; 3415 end = dring_pkt->end_idx; 3416 len = dp->num_descriptors; 3417 3418 3419 mutex_enter(&dp->dlock); 3420 dp->last_ack_recv = end; 3421 ldcp->ldc_stats.dring_data_acks++; 3422 mutex_exit(&dp->dlock); 3423 3424 (void) vsw_reclaim_dring(dp, start); 3425 3426 /* 3427 * If our peer is stopping processing descriptors then 3428 * we check to make sure it has processed all the descriptors 3429 * we have updated. If not then we send it a new message 3430 * to prompt it to restart. 3431 */ 3432 if (dring_pkt->dring_process_state == VIO_DP_STOPPED) { 3433 DTRACE_PROBE(stop_process_recv); 3434 D2(vswp, "%s(%lld): got stopping msg : %d : %d", 3435 __func__, ldcp->ldc_id, dring_pkt->start_idx, 3436 dring_pkt->end_idx); 3437 3438 /* 3439 * Check next descriptor in public section of ring. 3440 * If its marked as READY then we need to prompt our 3441 * peer to start processing the ring again. 3442 */ 3443 i = (end + 1) % len; 3444 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 3445 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 3446 3447 /* 3448 * Hold the restart lock across all of this to 3449 * make sure that its not possible for us to 3450 * decide that a msg needs to be sent in the future 3451 * but the sending code having already checked is 3452 * about to exit. 3453 */ 3454 mutex_enter(&dp->restart_lock); 3455 ldcp->ldc_stats.dring_stopped_acks++; 3456 mutex_enter(&priv_addr->dstate_lock); 3457 if (pub_addr->hdr.dstate == VIO_DESC_READY) { 3458 3459 mutex_exit(&priv_addr->dstate_lock); 3460 3461 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 3462 dring_pkt->tag.vio_sid = ldcp->local_session; 3463 3464 dring_pkt->start_idx = (end + 1) % len; 3465 dring_pkt->end_idx = -1; 3466 3467 D2(vswp, "%s(%lld) : sending restart msg:" 3468 " %d : %d", __func__, ldcp->ldc_id, 3469 dring_pkt->start_idx, dring_pkt->end_idx); 3470 3471 msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt, 3472 sizeof (vio_dring_msg_t), B_FALSE); 3473 ldcp->ldc_stats.dring_data_msgs++; 3474 3475 } else { 3476 mutex_exit(&priv_addr->dstate_lock); 3477 dp->restart_reqd = B_TRUE; 3478 } 3479 mutex_exit(&dp->restart_lock); 3480 } 3481 RW_EXIT(&ldcp->lane_out.dlistrw); 3482 3483 /* only do channel reset after dropping dlistrw lock */ 3484 if (msg_rv == ECONNRESET) 3485 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 3486 3487 break; 3488 3489 case VIO_SUBTYPE_NACK: 3490 DWARN(vswp, "%s(%lld): VIO_SUBTYPE_NACK", 3491 __func__, ldcp->ldc_id); 3492 /* 3493 * Something is badly wrong if we are getting NACK's 3494 * for our data pkts. So reset the channel. 3495 */ 3496 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3497 3498 break; 3499 3500 default: 3501 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 3502 ldcp->ldc_id, dring_pkt->tag.vio_subtype); 3503 } 3504 3505 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3506 } 3507 3508 /* 3509 * dummy pkt data handler function for vnet protocol version 1.0 3510 */ 3511 static void 3512 vsw_process_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen) 3513 { 3514 _NOTE(ARGUNUSED(arg1, arg2, msglen)) 3515 } 3516 3517 /* 3518 * This function handles raw pkt data messages received over the channel. 3519 * Currently, only priority-eth-type frames are received through this mechanism. 3520 * In this case, the frame(data) is present within the message itself which 3521 * is copied into an mblk before switching it. 3522 */ 3523 static void 3524 vsw_process_pkt_data(void *arg1, void *arg2, uint32_t msglen) 3525 { 3526 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg1; 3527 vio_raw_data_msg_t *dpkt = (vio_raw_data_msg_t *)arg2; 3528 uint32_t size; 3529 mblk_t *mp; 3530 vsw_t *vswp = ldcp->ldc_vswp; 3531 vgen_stats_t *statsp = &ldcp->ldc_stats; 3532 3533 size = msglen - VIO_PKT_DATA_HDRSIZE; 3534 if (size < ETHERMIN || size > ETHERMAX) { 3535 (void) atomic_inc_32(&statsp->rx_pri_fail); 3536 DWARN(vswp, "%s(%lld) invalid size(%d)\n", __func__, 3537 ldcp->ldc_id, size); 3538 return; 3539 } 3540 3541 mp = vio_multipool_allocb(&ldcp->vmp, size); 3542 if (mp == NULL) { 3543 mp = allocb(size, BPRI_MED); 3544 if (mp == NULL) { 3545 (void) atomic_inc_32(&statsp->rx_pri_fail); 3546 DWARN(vswp, "%s(%lld) allocb failure, " 3547 "unable to process priority frame\n", __func__, 3548 ldcp->ldc_id); 3549 return; 3550 } 3551 } 3552 3553 /* copy the frame from the payload of raw data msg into the mblk */ 3554 bcopy(dpkt->data, mp->b_rptr, size); 3555 mp->b_wptr = mp->b_rptr + size; 3556 3557 /* update stats */ 3558 (void) atomic_inc_64(&statsp->rx_pri_packets); 3559 (void) atomic_add_64(&statsp->rx_pri_bytes, size); 3560 3561 /* switch the frame to destination */ 3562 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, ldcp->ldc_port, NULL); 3563 } 3564 3565 /* 3566 * Process an in-band descriptor message (most likely from 3567 * OBP). 3568 */ 3569 static void 3570 vsw_process_data_ibnd_pkt(vsw_ldc_t *ldcp, void *pkt) 3571 { 3572 vnet_ibnd_desc_t *ibnd_desc; 3573 dring_info_t *dp = NULL; 3574 vsw_private_desc_t *priv_addr = NULL; 3575 vsw_t *vswp = ldcp->ldc_vswp; 3576 mblk_t *mp = NULL; 3577 size_t nbytes = 0; 3578 size_t off = 0; 3579 uint64_t idx = 0; 3580 uint32_t num = 1, len, datalen = 0; 3581 uint64_t ncookies = 0; 3582 int i, rv; 3583 int j = 0; 3584 3585 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3586 3587 ibnd_desc = (vnet_ibnd_desc_t *)pkt; 3588 3589 switch (ibnd_desc->hdr.tag.vio_subtype) { 3590 case VIO_SUBTYPE_INFO: 3591 D1(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3592 3593 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 3594 return; 3595 3596 /* 3597 * Data is padded to align on a 8 byte boundary, 3598 * nbytes is actual data length, i.e. minus that 3599 * padding. 3600 */ 3601 datalen = ibnd_desc->nbytes; 3602 3603 D2(vswp, "%s(%lld): processing inband desc : " 3604 ": datalen 0x%lx", __func__, ldcp->ldc_id, datalen); 3605 3606 ncookies = ibnd_desc->ncookies; 3607 3608 /* 3609 * allocb(9F) returns an aligned data block. We 3610 * need to ensure that we ask ldc for an aligned 3611 * number of bytes also. 3612 */ 3613 nbytes = datalen; 3614 if (nbytes & 0x7) { 3615 off = 8 - (nbytes & 0x7); 3616 nbytes += off; 3617 } 3618 3619 mp = allocb(datalen, BPRI_MED); 3620 if (mp == NULL) { 3621 DERR(vswp, "%s(%lld): allocb failed", 3622 __func__, ldcp->ldc_id); 3623 ldcp->ldc_stats.rx_allocb_fail++; 3624 return; 3625 } 3626 3627 rv = ldc_mem_copy(ldcp->ldc_handle, (caddr_t)mp->b_rptr, 3628 0, &nbytes, ibnd_desc->memcookie, (uint64_t)ncookies, 3629 LDC_COPY_IN); 3630 3631 if (rv != 0) { 3632 DERR(vswp, "%s(%d): unable to copy in data from " 3633 "%d cookie(s)", __func__, ldcp->ldc_id, ncookies); 3634 freemsg(mp); 3635 ldcp->ldc_stats.ierrors++; 3636 return; 3637 } 3638 3639 D2(vswp, "%s(%d): copied in %ld bytes using %d cookies", 3640 __func__, ldcp->ldc_id, nbytes, ncookies); 3641 3642 /* point to the actual end of data */ 3643 mp->b_wptr = mp->b_rptr + datalen; 3644 ldcp->ldc_stats.ipackets++; 3645 ldcp->ldc_stats.rbytes += datalen; 3646 3647 /* 3648 * We ACK back every in-band descriptor message we process 3649 */ 3650 ibnd_desc->hdr.tag.vio_subtype = VIO_SUBTYPE_ACK; 3651 ibnd_desc->hdr.tag.vio_sid = ldcp->local_session; 3652 (void) vsw_send_msg(ldcp, (void *)ibnd_desc, 3653 sizeof (vnet_ibnd_desc_t), B_TRUE); 3654 3655 /* send the packet to be switched */ 3656 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, 3657 ldcp->ldc_port, NULL); 3658 3659 break; 3660 3661 case VIO_SUBTYPE_ACK: 3662 D1(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3663 3664 /* Verify the ACK is valid */ 3665 idx = ibnd_desc->hdr.desc_handle; 3666 3667 if (idx >= vsw_ntxds) { 3668 cmn_err(CE_WARN, "!vsw%d: corrupted ACK received " 3669 "(idx %ld)", vswp->instance, idx); 3670 return; 3671 } 3672 3673 if ((dp = ldcp->lane_out.dringp) == NULL) { 3674 DERR(vswp, "%s: no dring found", __func__); 3675 return; 3676 } 3677 3678 len = dp->num_descriptors; 3679 /* 3680 * If the descriptor we are being ACK'ed for is not the 3681 * one we expected, then pkts were lost somwhere, either 3682 * when we tried to send a msg, or a previous ACK msg from 3683 * our peer. In either case we now reclaim the descriptors 3684 * in the range from the last ACK we received up to the 3685 * current ACK. 3686 */ 3687 if (idx != dp->last_ack_recv) { 3688 DWARN(vswp, "%s: dropped pkts detected, (%ld, %ld)", 3689 __func__, dp->last_ack_recv, idx); 3690 num = idx >= dp->last_ack_recv ? 3691 idx - dp->last_ack_recv + 1: 3692 (len - dp->last_ack_recv + 1) + idx; 3693 } 3694 3695 /* 3696 * When we sent the in-band message to our peer we 3697 * marked the copy in our private ring as READY. We now 3698 * check that the descriptor we are being ACK'ed for is in 3699 * fact READY, i.e. it is one we have shared with our peer. 3700 * 3701 * If its not we flag an error, but still reset the descr 3702 * back to FREE. 3703 */ 3704 for (i = dp->last_ack_recv; j < num; i = (i + 1) % len, j++) { 3705 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 3706 mutex_enter(&priv_addr->dstate_lock); 3707 if (priv_addr->dstate != VIO_DESC_READY) { 3708 DERR(vswp, "%s: (%ld) desc at index %ld not " 3709 "READY (0x%lx)", __func__, 3710 ldcp->ldc_id, idx, priv_addr->dstate); 3711 DERR(vswp, "%s: bound %d: ncookies %ld : " 3712 "datalen %ld", __func__, 3713 priv_addr->bound, priv_addr->ncookies, 3714 priv_addr->datalen); 3715 } 3716 D2(vswp, "%s: (%lld) freeing descp at %lld", __func__, 3717 ldcp->ldc_id, idx); 3718 /* release resources associated with sent msg */ 3719 priv_addr->datalen = 0; 3720 priv_addr->dstate = VIO_DESC_FREE; 3721 mutex_exit(&priv_addr->dstate_lock); 3722 } 3723 /* update to next expected value */ 3724 dp->last_ack_recv = (idx + 1) % dp->num_descriptors; 3725 3726 break; 3727 3728 case VIO_SUBTYPE_NACK: 3729 DERR(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3730 3731 /* 3732 * We should only get a NACK if our peer doesn't like 3733 * something about a message we have sent it. If this 3734 * happens we just release the resources associated with 3735 * the message. (We are relying on higher layers to decide 3736 * whether or not to resend. 3737 */ 3738 3739 /* limit check */ 3740 idx = ibnd_desc->hdr.desc_handle; 3741 3742 if (idx >= vsw_ntxds) { 3743 DERR(vswp, "%s: corrupted NACK received (idx %lld)", 3744 __func__, idx); 3745 return; 3746 } 3747 3748 if ((dp = ldcp->lane_out.dringp) == NULL) { 3749 DERR(vswp, "%s: no dring found", __func__); 3750 return; 3751 } 3752 3753 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 3754 3755 /* move to correct location in ring */ 3756 priv_addr += idx; 3757 3758 /* release resources associated with sent msg */ 3759 mutex_enter(&priv_addr->dstate_lock); 3760 priv_addr->datalen = 0; 3761 priv_addr->dstate = VIO_DESC_FREE; 3762 mutex_exit(&priv_addr->dstate_lock); 3763 3764 break; 3765 3766 default: 3767 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 3768 ldcp->ldc_id, ibnd_desc->hdr.tag.vio_subtype); 3769 } 3770 3771 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3772 } 3773 3774 static void 3775 vsw_process_err_pkt(vsw_ldc_t *ldcp, void *epkt, vio_msg_tag_t *tagp) 3776 { 3777 _NOTE(ARGUNUSED(epkt)) 3778 3779 vsw_t *vswp = ldcp->ldc_vswp; 3780 uint16_t env = tagp->vio_subtype_env; 3781 3782 D1(vswp, "%s (%lld): enter\n", __func__, ldcp->ldc_id); 3783 3784 /* 3785 * Error vio_subtypes have yet to be defined. So for 3786 * the moment we can't do anything. 3787 */ 3788 D2(vswp, "%s: (%x) vio_subtype env", __func__, env); 3789 3790 D1(vswp, "%s (%lld): exit\n", __func__, ldcp->ldc_id); 3791 } 3792 3793 /* transmit the packet over the given port */ 3794 int 3795 vsw_portsend(vsw_port_t *port, mblk_t *mp, mblk_t *mpt, uint32_t count) 3796 { 3797 vsw_ldc_list_t *ldcl = &port->p_ldclist; 3798 vsw_ldc_t *ldcp; 3799 int status = 0; 3800 3801 READ_ENTER(&ldcl->lockrw); 3802 /* 3803 * Note for now, we have a single channel. 3804 */ 3805 ldcp = ldcl->head; 3806 if (ldcp == NULL) { 3807 DERR(port->p_vswp, "vsw_portsend: no ldc: dropping packet\n"); 3808 freemsgchain(mp); 3809 RW_EXIT(&ldcl->lockrw); 3810 return (1); 3811 } 3812 3813 status = ldcp->tx(ldcp, mp, mpt, count); 3814 3815 RW_EXIT(&ldcl->lockrw); 3816 3817 return (status); 3818 } 3819 3820 /* 3821 * Break up frames into 2 seperate chains: normal and 3822 * priority, based on the frame type. The number of 3823 * priority frames is also counted and returned. 3824 * 3825 * Params: 3826 * vswp: pointer to the instance of vsw 3827 * np: head of packet chain to be broken 3828 * npt: tail of packet chain to be broken 3829 * 3830 * Returns: 3831 * np: head of normal data packets 3832 * npt: tail of normal data packets 3833 * hp: head of high priority packets 3834 * hpt: tail of high priority packets 3835 */ 3836 static uint32_t 3837 vsw_get_pri_packets(vsw_t *vswp, mblk_t **np, mblk_t **npt, 3838 mblk_t **hp, mblk_t **hpt) 3839 { 3840 mblk_t *tmp = NULL; 3841 mblk_t *smp = NULL; 3842 mblk_t *hmp = NULL; /* high prio pkts head */ 3843 mblk_t *hmpt = NULL; /* high prio pkts tail */ 3844 mblk_t *nmp = NULL; /* normal pkts head */ 3845 mblk_t *nmpt = NULL; /* normal pkts tail */ 3846 uint32_t count = 0; 3847 int i; 3848 struct ether_header *ehp; 3849 uint32_t num_types; 3850 uint16_t *types; 3851 3852 tmp = *np; 3853 while (tmp != NULL) { 3854 3855 smp = tmp; 3856 tmp = tmp->b_next; 3857 smp->b_next = NULL; 3858 smp->b_prev = NULL; 3859 3860 ehp = (struct ether_header *)smp->b_rptr; 3861 num_types = vswp->pri_num_types; 3862 types = vswp->pri_types; 3863 for (i = 0; i < num_types; i++) { 3864 if (ehp->ether_type == types[i]) { 3865 /* high priority frame */ 3866 3867 if (hmp != NULL) { 3868 hmpt->b_next = smp; 3869 hmpt = smp; 3870 } else { 3871 hmp = hmpt = smp; 3872 } 3873 count++; 3874 break; 3875 } 3876 } 3877 if (i == num_types) { 3878 /* normal data frame */ 3879 3880 if (nmp != NULL) { 3881 nmpt->b_next = smp; 3882 nmpt = smp; 3883 } else { 3884 nmp = nmpt = smp; 3885 } 3886 } 3887 } 3888 3889 *hp = hmp; 3890 *hpt = hmpt; 3891 *np = nmp; 3892 *npt = nmpt; 3893 3894 return (count); 3895 } 3896 3897 /* 3898 * Wrapper function to transmit normal and/or priority frames over the channel. 3899 */ 3900 static int 3901 vsw_ldctx_pri(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count) 3902 { 3903 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 3904 mblk_t *tmp; 3905 mblk_t *smp; 3906 mblk_t *hmp; /* high prio pkts head */ 3907 mblk_t *hmpt; /* high prio pkts tail */ 3908 mblk_t *nmp; /* normal pkts head */ 3909 mblk_t *nmpt; /* normal pkts tail */ 3910 uint32_t n = 0; 3911 vsw_t *vswp = ldcp->ldc_vswp; 3912 3913 ASSERT(VSW_PRI_ETH_DEFINED(vswp)); 3914 ASSERT(count != 0); 3915 3916 nmp = mp; 3917 nmpt = mpt; 3918 3919 /* gather any priority frames from the chain of packets */ 3920 n = vsw_get_pri_packets(vswp, &nmp, &nmpt, &hmp, &hmpt); 3921 3922 /* transmit priority frames */ 3923 tmp = hmp; 3924 while (tmp != NULL) { 3925 smp = tmp; 3926 tmp = tmp->b_next; 3927 smp->b_next = NULL; 3928 vsw_ldcsend_pkt(ldcp, smp); 3929 } 3930 3931 count -= n; 3932 3933 if (count == 0) { 3934 /* no normal data frames to process */ 3935 return (0); 3936 } 3937 3938 return (vsw_ldctx(ldcp, nmp, nmpt, count)); 3939 } 3940 3941 /* 3942 * Wrapper function to transmit normal frames over the channel. 3943 */ 3944 static int 3945 vsw_ldctx(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count) 3946 { 3947 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 3948 mblk_t *tmp = NULL; 3949 3950 ASSERT(count != 0); 3951 /* 3952 * If the TX thread is enabled, then queue the 3953 * ordinary frames and signal the tx thread. 3954 */ 3955 if (ldcp->tx_thread != NULL) { 3956 3957 mutex_enter(&ldcp->tx_thr_lock); 3958 3959 if ((ldcp->tx_cnt + count) >= vsw_max_tx_qcount) { 3960 /* 3961 * If we reached queue limit, 3962 * do not queue new packets, 3963 * drop them. 3964 */ 3965 ldcp->ldc_stats.tx_qfull += count; 3966 mutex_exit(&ldcp->tx_thr_lock); 3967 freemsgchain(mp); 3968 goto exit; 3969 } 3970 if (ldcp->tx_mhead == NULL) { 3971 ldcp->tx_mhead = mp; 3972 ldcp->tx_mtail = mpt; 3973 cv_signal(&ldcp->tx_thr_cv); 3974 } else { 3975 ldcp->tx_mtail->b_next = mp; 3976 ldcp->tx_mtail = mpt; 3977 } 3978 ldcp->tx_cnt += count; 3979 mutex_exit(&ldcp->tx_thr_lock); 3980 } else { 3981 while (mp != NULL) { 3982 tmp = mp->b_next; 3983 mp->b_next = mp->b_prev = NULL; 3984 (void) vsw_ldcsend(ldcp, mp, 1); 3985 mp = tmp; 3986 } 3987 } 3988 3989 exit: 3990 return (0); 3991 } 3992 3993 /* 3994 * This function transmits the frame in the payload of a raw data 3995 * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to 3996 * send special frames with high priorities, without going through 3997 * the normal data path which uses descriptor ring mechanism. 3998 */ 3999 static void 4000 vsw_ldcsend_pkt(vsw_ldc_t *ldcp, mblk_t *mp) 4001 { 4002 vio_raw_data_msg_t *pkt; 4003 mblk_t *bp; 4004 mblk_t *nmp = NULL; 4005 caddr_t dst; 4006 uint32_t mblksz; 4007 uint32_t size; 4008 uint32_t nbytes; 4009 int rv; 4010 vsw_t *vswp = ldcp->ldc_vswp; 4011 vgen_stats_t *statsp = &ldcp->ldc_stats; 4012 4013 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4014 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4015 (void) atomic_inc_32(&statsp->tx_pri_fail); 4016 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 4017 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 4018 ldcp->lane_out.lstate); 4019 goto send_pkt_exit; 4020 } 4021 4022 size = msgsize(mp); 4023 4024 /* frame size bigger than available payload len of raw data msg ? */ 4025 if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) { 4026 (void) atomic_inc_32(&statsp->tx_pri_fail); 4027 DWARN(vswp, "%s(%lld) invalid size(%d)\n", __func__, 4028 ldcp->ldc_id, size); 4029 goto send_pkt_exit; 4030 } 4031 4032 if (size < ETHERMIN) 4033 size = ETHERMIN; 4034 4035 /* alloc space for a raw data message */ 4036 nmp = vio_allocb(vswp->pri_tx_vmp); 4037 if (nmp == NULL) { 4038 (void) atomic_inc_32(&statsp->tx_pri_fail); 4039 DWARN(vswp, "vio_allocb failed\n"); 4040 goto send_pkt_exit; 4041 } 4042 pkt = (vio_raw_data_msg_t *)nmp->b_rptr; 4043 4044 /* copy frame into the payload of raw data message */ 4045 dst = (caddr_t)pkt->data; 4046 for (bp = mp; bp != NULL; bp = bp->b_cont) { 4047 mblksz = MBLKL(bp); 4048 bcopy(bp->b_rptr, dst, mblksz); 4049 dst += mblksz; 4050 } 4051 4052 /* setup the raw data msg */ 4053 pkt->tag.vio_msgtype = VIO_TYPE_DATA; 4054 pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 4055 pkt->tag.vio_subtype_env = VIO_PKT_DATA; 4056 pkt->tag.vio_sid = ldcp->local_session; 4057 nbytes = VIO_PKT_DATA_HDRSIZE + size; 4058 4059 /* send the msg over ldc */ 4060 rv = vsw_send_msg(ldcp, (void *)pkt, nbytes, B_TRUE); 4061 if (rv != 0) { 4062 (void) atomic_inc_32(&statsp->tx_pri_fail); 4063 DWARN(vswp, "%s(%lld) Error sending priority frame\n", __func__, 4064 ldcp->ldc_id); 4065 goto send_pkt_exit; 4066 } 4067 4068 /* update stats */ 4069 (void) atomic_inc_64(&statsp->tx_pri_packets); 4070 (void) atomic_add_64(&statsp->tx_pri_packets, size); 4071 4072 send_pkt_exit: 4073 if (nmp != NULL) 4074 freemsg(nmp); 4075 freemsg(mp); 4076 } 4077 4078 /* 4079 * Transmit the packet over the given LDC channel. 4080 * 4081 * The 'retries' argument indicates how many times a packet 4082 * is retried before it is dropped. Note, the retry is done 4083 * only for a resource related failure, for all other failures 4084 * the packet is dropped immediately. 4085 */ 4086 static int 4087 vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, uint32_t retries) 4088 { 4089 int i; 4090 int rc; 4091 int status = 0; 4092 vsw_port_t *port = ldcp->ldc_port; 4093 dring_info_t *dp = NULL; 4094 4095 4096 for (i = 0; i < retries; ) { 4097 /* 4098 * Send the message out using the appropriate 4099 * transmit function which will free mblock when it 4100 * is finished with it. 4101 */ 4102 mutex_enter(&port->tx_lock); 4103 if (port->transmit != NULL) { 4104 status = (*port->transmit)(ldcp, mp); 4105 } 4106 if (status == LDC_TX_SUCCESS) { 4107 mutex_exit(&port->tx_lock); 4108 break; 4109 } 4110 i++; /* increment the counter here */ 4111 4112 /* If its the last retry, then update the oerror */ 4113 if ((i == retries) && (status == LDC_TX_NORESOURCES)) { 4114 ldcp->ldc_stats.oerrors++; 4115 } 4116 mutex_exit(&port->tx_lock); 4117 4118 if (status != LDC_TX_NORESOURCES) { 4119 /* 4120 * No retrying required for errors un-related 4121 * to resources. 4122 */ 4123 break; 4124 } 4125 READ_ENTER(&ldcp->lane_out.dlistrw); 4126 if (((dp = ldcp->lane_out.dringp) != NULL) && 4127 ((VSW_VER_EQ(ldcp, 1, 2) && 4128 (ldcp->lane_out.xfer_mode & VIO_DRING_MODE_V1_2)) || 4129 ((VSW_VER_LT(ldcp, 1, 2) && 4130 (ldcp->lane_out.xfer_mode == VIO_DRING_MODE_V1_0))))) { 4131 rc = vsw_reclaim_dring(dp, dp->end_idx); 4132 } else { 4133 /* 4134 * If there is no dring or the xfer_mode is 4135 * set to DESC_MODE(ie., OBP), then simply break here. 4136 */ 4137 RW_EXIT(&ldcp->lane_out.dlistrw); 4138 break; 4139 } 4140 RW_EXIT(&ldcp->lane_out.dlistrw); 4141 4142 /* 4143 * Delay only if none were reclaimed 4144 * and its not the last retry. 4145 */ 4146 if ((rc == 0) && (i < retries)) { 4147 delay(drv_usectohz(vsw_ldc_tx_delay)); 4148 } 4149 } 4150 freemsg(mp); 4151 return (status); 4152 } 4153 4154 /* 4155 * Send packet out via descriptor ring to a logical device. 4156 */ 4157 static int 4158 vsw_dringsend(vsw_ldc_t *ldcp, mblk_t *mp) 4159 { 4160 vio_dring_msg_t dring_pkt; 4161 dring_info_t *dp = NULL; 4162 vsw_private_desc_t *priv_desc = NULL; 4163 vnet_public_desc_t *pub = NULL; 4164 vsw_t *vswp = ldcp->ldc_vswp; 4165 mblk_t *bp; 4166 size_t n, size; 4167 caddr_t bufp; 4168 int idx; 4169 int status = LDC_TX_SUCCESS; 4170 struct ether_header *ehp = (struct ether_header *)mp->b_rptr; 4171 4172 D1(vswp, "%s(%lld): enter\n", __func__, ldcp->ldc_id); 4173 4174 /* TODO: make test a macro */ 4175 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4176 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4177 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 4178 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 4179 ldcp->lane_out.lstate); 4180 ldcp->ldc_stats.oerrors++; 4181 return (LDC_TX_FAILURE); 4182 } 4183 4184 /* 4185 * Note - using first ring only, this may change 4186 * in the future. 4187 */ 4188 READ_ENTER(&ldcp->lane_out.dlistrw); 4189 if ((dp = ldcp->lane_out.dringp) == NULL) { 4190 RW_EXIT(&ldcp->lane_out.dlistrw); 4191 DERR(vswp, "%s(%lld): no dring for outbound lane on" 4192 " channel %d", __func__, ldcp->ldc_id, ldcp->ldc_id); 4193 ldcp->ldc_stats.oerrors++; 4194 return (LDC_TX_FAILURE); 4195 } 4196 4197 size = msgsize(mp); 4198 if (size > (size_t)ETHERMAX) { 4199 RW_EXIT(&ldcp->lane_out.dlistrw); 4200 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 4201 ldcp->ldc_id, size); 4202 ldcp->ldc_stats.oerrors++; 4203 return (LDC_TX_FAILURE); 4204 } 4205 4206 /* 4207 * Find a free descriptor 4208 * 4209 * Note: for the moment we are assuming that we will only 4210 * have one dring going from the switch to each of its 4211 * peers. This may change in the future. 4212 */ 4213 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 4214 D2(vswp, "%s(%lld): no descriptor available for ring " 4215 "at 0x%llx", __func__, ldcp->ldc_id, dp); 4216 4217 /* nothing more we can do */ 4218 status = LDC_TX_NORESOURCES; 4219 ldcp->ldc_stats.tx_no_desc++; 4220 goto vsw_dringsend_free_exit; 4221 } else { 4222 D2(vswp, "%s(%lld): free private descriptor found at pos %ld " 4223 "addr 0x%llx\n", __func__, ldcp->ldc_id, idx, priv_desc); 4224 } 4225 4226 /* copy data into the descriptor */ 4227 bufp = priv_desc->datap; 4228 bufp += VNET_IPALIGN; 4229 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 4230 n = MBLKL(bp); 4231 bcopy(bp->b_rptr, bufp, n); 4232 bufp += n; 4233 } 4234 4235 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 4236 4237 pub = priv_desc->descp; 4238 pub->nbytes = priv_desc->datalen; 4239 4240 /* update statistics */ 4241 if (IS_BROADCAST(ehp)) 4242 ldcp->ldc_stats.brdcstxmt++; 4243 else if (IS_MULTICAST(ehp)) 4244 ldcp->ldc_stats.multixmt++; 4245 ldcp->ldc_stats.opackets++; 4246 ldcp->ldc_stats.obytes += priv_desc->datalen; 4247 4248 mutex_enter(&priv_desc->dstate_lock); 4249 pub->hdr.dstate = VIO_DESC_READY; 4250 mutex_exit(&priv_desc->dstate_lock); 4251 4252 /* 4253 * Determine whether or not we need to send a message to our 4254 * peer prompting them to read our newly updated descriptor(s). 4255 */ 4256 mutex_enter(&dp->restart_lock); 4257 if (dp->restart_reqd) { 4258 dp->restart_reqd = B_FALSE; 4259 ldcp->ldc_stats.dring_data_msgs++; 4260 mutex_exit(&dp->restart_lock); 4261 4262 /* 4263 * Send a vio_dring_msg to peer to prompt them to read 4264 * the updated descriptor ring. 4265 */ 4266 dring_pkt.tag.vio_msgtype = VIO_TYPE_DATA; 4267 dring_pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 4268 dring_pkt.tag.vio_subtype_env = VIO_DRING_DATA; 4269 dring_pkt.tag.vio_sid = ldcp->local_session; 4270 4271 /* Note - for now using first ring */ 4272 dring_pkt.dring_ident = dp->ident; 4273 4274 /* 4275 * If last_ack_recv is -1 then we know we've not 4276 * received any ack's yet, so this must be the first 4277 * msg sent, so set the start to the begining of the ring. 4278 */ 4279 mutex_enter(&dp->dlock); 4280 if (dp->last_ack_recv == -1) { 4281 dring_pkt.start_idx = 0; 4282 } else { 4283 dring_pkt.start_idx = 4284 (dp->last_ack_recv + 1) % dp->num_descriptors; 4285 } 4286 dring_pkt.end_idx = -1; 4287 mutex_exit(&dp->dlock); 4288 4289 D3(vswp, "%s(%lld): dring 0x%llx : ident 0x%llx\n", __func__, 4290 ldcp->ldc_id, dp, dring_pkt.dring_ident); 4291 D3(vswp, "%s(%lld): start %lld : end %lld :\n", 4292 __func__, ldcp->ldc_id, dring_pkt.start_idx, 4293 dring_pkt.end_idx); 4294 4295 RW_EXIT(&ldcp->lane_out.dlistrw); 4296 4297 (void) vsw_send_msg(ldcp, (void *)&dring_pkt, 4298 sizeof (vio_dring_msg_t), B_TRUE); 4299 4300 return (status); 4301 4302 } else { 4303 mutex_exit(&dp->restart_lock); 4304 D2(vswp, "%s(%lld): updating descp %d", __func__, 4305 ldcp->ldc_id, idx); 4306 } 4307 4308 vsw_dringsend_free_exit: 4309 4310 RW_EXIT(&ldcp->lane_out.dlistrw); 4311 4312 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 4313 return (status); 4314 } 4315 4316 /* 4317 * Send an in-band descriptor message over ldc. 4318 */ 4319 static int 4320 vsw_descrsend(vsw_ldc_t *ldcp, mblk_t *mp) 4321 { 4322 vsw_t *vswp = ldcp->ldc_vswp; 4323 vnet_ibnd_desc_t ibnd_msg; 4324 vsw_private_desc_t *priv_desc = NULL; 4325 dring_info_t *dp = NULL; 4326 size_t n, size = 0; 4327 caddr_t bufp; 4328 mblk_t *bp; 4329 int idx, i; 4330 int status = LDC_TX_SUCCESS; 4331 static int warn_msg = 1; 4332 4333 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4334 4335 ASSERT(mp != NULL); 4336 4337 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4338 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4339 DERR(vswp, "%s(%lld) status(%d) state (0x%llx), dropping pkt", 4340 __func__, ldcp->ldc_id, ldcp->ldc_status, 4341 ldcp->lane_out.lstate); 4342 ldcp->ldc_stats.oerrors++; 4343 return (LDC_TX_FAILURE); 4344 } 4345 4346 /* 4347 * only expect single dring to exist, which we use 4348 * as an internal buffer, rather than a transfer channel. 4349 */ 4350 READ_ENTER(&ldcp->lane_out.dlistrw); 4351 if ((dp = ldcp->lane_out.dringp) == NULL) { 4352 DERR(vswp, "%s(%lld): no dring for outbound lane", 4353 __func__, ldcp->ldc_id); 4354 DERR(vswp, "%s(%lld) status(%d) state (0x%llx)", __func__, 4355 ldcp->ldc_id, ldcp->ldc_status, ldcp->lane_out.lstate); 4356 RW_EXIT(&ldcp->lane_out.dlistrw); 4357 ldcp->ldc_stats.oerrors++; 4358 return (LDC_TX_FAILURE); 4359 } 4360 4361 size = msgsize(mp); 4362 if (size > (size_t)ETHERMAX) { 4363 RW_EXIT(&ldcp->lane_out.dlistrw); 4364 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 4365 ldcp->ldc_id, size); 4366 ldcp->ldc_stats.oerrors++; 4367 return (LDC_TX_FAILURE); 4368 } 4369 4370 /* 4371 * Find a free descriptor in our buffer ring 4372 */ 4373 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 4374 RW_EXIT(&ldcp->lane_out.dlistrw); 4375 if (warn_msg) { 4376 DERR(vswp, "%s(%lld): no descriptor available for ring " 4377 "at 0x%llx", __func__, ldcp->ldc_id, dp); 4378 warn_msg = 0; 4379 } 4380 4381 /* nothing more we can do */ 4382 status = LDC_TX_NORESOURCES; 4383 goto vsw_descrsend_free_exit; 4384 } else { 4385 D2(vswp, "%s(%lld): free private descriptor found at pos " 4386 "%ld addr 0x%x\n", __func__, ldcp->ldc_id, idx, priv_desc); 4387 warn_msg = 1; 4388 } 4389 4390 /* copy data into the descriptor */ 4391 bufp = priv_desc->datap; 4392 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 4393 n = MBLKL(bp); 4394 bcopy(bp->b_rptr, bufp, n); 4395 bufp += n; 4396 } 4397 4398 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 4399 4400 /* create and send the in-band descp msg */ 4401 ibnd_msg.hdr.tag.vio_msgtype = VIO_TYPE_DATA; 4402 ibnd_msg.hdr.tag.vio_subtype = VIO_SUBTYPE_INFO; 4403 ibnd_msg.hdr.tag.vio_subtype_env = VIO_DESC_DATA; 4404 ibnd_msg.hdr.tag.vio_sid = ldcp->local_session; 4405 4406 /* 4407 * Copy the mem cookies describing the data from the 4408 * private region of the descriptor ring into the inband 4409 * descriptor. 4410 */ 4411 for (i = 0; i < priv_desc->ncookies; i++) { 4412 bcopy(&priv_desc->memcookie[i], &ibnd_msg.memcookie[i], 4413 sizeof (ldc_mem_cookie_t)); 4414 } 4415 4416 ibnd_msg.hdr.desc_handle = idx; 4417 ibnd_msg.ncookies = priv_desc->ncookies; 4418 ibnd_msg.nbytes = size; 4419 4420 ldcp->ldc_stats.opackets++; 4421 ldcp->ldc_stats.obytes += size; 4422 4423 RW_EXIT(&ldcp->lane_out.dlistrw); 4424 4425 (void) vsw_send_msg(ldcp, (void *)&ibnd_msg, 4426 sizeof (vnet_ibnd_desc_t), B_TRUE); 4427 4428 vsw_descrsend_free_exit: 4429 4430 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4431 return (status); 4432 } 4433 4434 static void 4435 vsw_send_ver(void *arg) 4436 { 4437 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4438 vsw_t *vswp = ldcp->ldc_vswp; 4439 lane_t *lp = &ldcp->lane_out; 4440 vio_ver_msg_t ver_msg; 4441 4442 D1(vswp, "%s enter", __func__); 4443 4444 ver_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4445 ver_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4446 ver_msg.tag.vio_subtype_env = VIO_VER_INFO; 4447 ver_msg.tag.vio_sid = ldcp->local_session; 4448 4449 if (vsw_obp_ver_proto_workaround == B_FALSE) { 4450 ver_msg.ver_major = vsw_versions[0].ver_major; 4451 ver_msg.ver_minor = vsw_versions[0].ver_minor; 4452 } else { 4453 /* use the major,minor that we've ack'd */ 4454 lane_t *lpi = &ldcp->lane_in; 4455 ver_msg.ver_major = lpi->ver_major; 4456 ver_msg.ver_minor = lpi->ver_minor; 4457 } 4458 ver_msg.dev_class = VDEV_NETWORK_SWITCH; 4459 4460 lp->lstate |= VSW_VER_INFO_SENT; 4461 lp->ver_major = ver_msg.ver_major; 4462 lp->ver_minor = ver_msg.ver_minor; 4463 4464 DUMP_TAG(ver_msg.tag); 4465 4466 (void) vsw_send_msg(ldcp, &ver_msg, sizeof (vio_ver_msg_t), B_TRUE); 4467 4468 D1(vswp, "%s (%d): exit", __func__, ldcp->ldc_id); 4469 } 4470 4471 static void 4472 vsw_send_attr(vsw_ldc_t *ldcp) 4473 { 4474 vsw_t *vswp = ldcp->ldc_vswp; 4475 lane_t *lp = &ldcp->lane_out; 4476 vnet_attr_msg_t attr_msg; 4477 4478 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 4479 4480 /* 4481 * Subtype is set to INFO by default 4482 */ 4483 attr_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4484 attr_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4485 attr_msg.tag.vio_subtype_env = VIO_ATTR_INFO; 4486 attr_msg.tag.vio_sid = ldcp->local_session; 4487 4488 /* payload copied from default settings for lane */ 4489 attr_msg.mtu = lp->mtu; 4490 attr_msg.addr_type = lp->addr_type; 4491 attr_msg.xfer_mode = lp->xfer_mode; 4492 attr_msg.ack_freq = lp->xfer_mode; 4493 4494 READ_ENTER(&vswp->if_lockrw); 4495 attr_msg.addr = vnet_macaddr_strtoul((vswp->if_addr).ether_addr_octet); 4496 RW_EXIT(&vswp->if_lockrw); 4497 4498 ldcp->lane_out.lstate |= VSW_ATTR_INFO_SENT; 4499 4500 DUMP_TAG(attr_msg.tag); 4501 4502 (void) vsw_send_msg(ldcp, &attr_msg, sizeof (vnet_attr_msg_t), B_TRUE); 4503 4504 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 4505 } 4506 4507 /* 4508 * Create dring info msg (which also results in the creation of 4509 * a dring). 4510 */ 4511 static vio_dring_reg_msg_t * 4512 vsw_create_dring_info_pkt(vsw_ldc_t *ldcp) 4513 { 4514 vio_dring_reg_msg_t *mp; 4515 dring_info_t *dp; 4516 vsw_t *vswp = ldcp->ldc_vswp; 4517 4518 D1(vswp, "vsw_create_dring_info_pkt enter\n"); 4519 4520 /* 4521 * If we can't create a dring, obviously no point sending 4522 * a message. 4523 */ 4524 if ((dp = vsw_create_dring(ldcp)) == NULL) 4525 return (NULL); 4526 4527 mp = kmem_zalloc(sizeof (vio_dring_reg_msg_t), KM_SLEEP); 4528 4529 mp->tag.vio_msgtype = VIO_TYPE_CTRL; 4530 mp->tag.vio_subtype = VIO_SUBTYPE_INFO; 4531 mp->tag.vio_subtype_env = VIO_DRING_REG; 4532 mp->tag.vio_sid = ldcp->local_session; 4533 4534 /* payload */ 4535 mp->num_descriptors = dp->num_descriptors; 4536 mp->descriptor_size = dp->descriptor_size; 4537 mp->options = dp->options; 4538 mp->ncookies = dp->ncookies; 4539 bcopy(&dp->cookie[0], &mp->cookie[0], sizeof (ldc_mem_cookie_t)); 4540 4541 mp->dring_ident = 0; 4542 4543 D1(vswp, "vsw_create_dring_info_pkt exit\n"); 4544 4545 return (mp); 4546 } 4547 4548 static void 4549 vsw_send_dring_info(vsw_ldc_t *ldcp) 4550 { 4551 vio_dring_reg_msg_t *dring_msg; 4552 vsw_t *vswp = ldcp->ldc_vswp; 4553 4554 D1(vswp, "%s: (%ld) enter", __func__, ldcp->ldc_id); 4555 4556 dring_msg = vsw_create_dring_info_pkt(ldcp); 4557 if (dring_msg == NULL) { 4558 cmn_err(CE_WARN, "!vsw%d: %s: error creating msg", 4559 vswp->instance, __func__); 4560 return; 4561 } 4562 4563 ldcp->lane_out.lstate |= VSW_DRING_INFO_SENT; 4564 4565 DUMP_TAG_PTR((vio_msg_tag_t *)dring_msg); 4566 4567 (void) vsw_send_msg(ldcp, dring_msg, 4568 sizeof (vio_dring_reg_msg_t), B_TRUE); 4569 4570 kmem_free(dring_msg, sizeof (vio_dring_reg_msg_t)); 4571 4572 D1(vswp, "%s: (%ld) exit", __func__, ldcp->ldc_id); 4573 } 4574 4575 static void 4576 vsw_send_rdx(vsw_ldc_t *ldcp) 4577 { 4578 vsw_t *vswp = ldcp->ldc_vswp; 4579 vio_rdx_msg_t rdx_msg; 4580 4581 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 4582 4583 rdx_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4584 rdx_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4585 rdx_msg.tag.vio_subtype_env = VIO_RDX; 4586 rdx_msg.tag.vio_sid = ldcp->local_session; 4587 4588 ldcp->lane_in.lstate |= VSW_RDX_INFO_SENT; 4589 4590 DUMP_TAG(rdx_msg.tag); 4591 4592 (void) vsw_send_msg(ldcp, &rdx_msg, sizeof (vio_rdx_msg_t), B_TRUE); 4593 4594 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 4595 } 4596 4597 /* 4598 * Generic routine to send message out over ldc channel. 4599 * 4600 * It is possible that when we attempt to write over the ldc channel 4601 * that we get notified that it has been reset. Depending on the value 4602 * of the handle_reset flag we either handle that event here or simply 4603 * notify the caller that the channel was reset. 4604 */ 4605 static int 4606 vsw_send_msg(vsw_ldc_t *ldcp, void *msgp, int size, boolean_t handle_reset) 4607 { 4608 int rv; 4609 size_t msglen = size; 4610 vio_msg_tag_t *tag = (vio_msg_tag_t *)msgp; 4611 vsw_t *vswp = ldcp->ldc_vswp; 4612 vio_dring_msg_t *dmsg; 4613 vio_raw_data_msg_t *rmsg; 4614 vnet_ibnd_desc_t *imsg; 4615 boolean_t data_msg = B_FALSE; 4616 4617 D1(vswp, "vsw_send_msg (%lld) enter : sending %d bytes", 4618 ldcp->ldc_id, size); 4619 4620 D2(vswp, "send_msg: type 0x%llx", tag->vio_msgtype); 4621 D2(vswp, "send_msg: stype 0x%llx", tag->vio_subtype); 4622 D2(vswp, "send_msg: senv 0x%llx", tag->vio_subtype_env); 4623 4624 mutex_enter(&ldcp->ldc_txlock); 4625 4626 if (tag->vio_subtype == VIO_SUBTYPE_INFO) { 4627 if (tag->vio_subtype_env == VIO_DRING_DATA) { 4628 dmsg = (vio_dring_msg_t *)tag; 4629 dmsg->seq_num = ldcp->lane_out.seq_num; 4630 data_msg = B_TRUE; 4631 } else if (tag->vio_subtype_env == VIO_PKT_DATA) { 4632 rmsg = (vio_raw_data_msg_t *)tag; 4633 rmsg->seq_num = ldcp->lane_out.seq_num; 4634 data_msg = B_TRUE; 4635 } else if (tag->vio_subtype_env == VIO_DESC_DATA) { 4636 imsg = (vnet_ibnd_desc_t *)tag; 4637 imsg->hdr.seq_num = ldcp->lane_out.seq_num; 4638 data_msg = B_TRUE; 4639 } 4640 } 4641 4642 do { 4643 msglen = size; 4644 rv = ldc_write(ldcp->ldc_handle, (caddr_t)msgp, &msglen); 4645 } while (rv == EWOULDBLOCK && --vsw_wretries > 0); 4646 4647 if (rv == 0 && data_msg == B_TRUE) { 4648 ldcp->lane_out.seq_num++; 4649 } 4650 4651 if ((rv != 0) || (msglen != size)) { 4652 DERR(vswp, "vsw_send_msg:ldc_write failed: chan(%lld) rv(%d) " 4653 "size (%d) msglen(%d)\n", ldcp->ldc_id, rv, size, msglen); 4654 ldcp->ldc_stats.oerrors++; 4655 } 4656 4657 mutex_exit(&ldcp->ldc_txlock); 4658 4659 /* 4660 * If channel has been reset we either handle it here or 4661 * simply report back that it has been reset and let caller 4662 * decide what to do. 4663 */ 4664 if (rv == ECONNRESET) { 4665 DWARN(vswp, "%s (%lld) channel reset", __func__, ldcp->ldc_id); 4666 4667 /* 4668 * N.B - must never be holding the dlistrw lock when 4669 * we do a reset of the channel. 4670 */ 4671 if (handle_reset) { 4672 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 4673 } 4674 } 4675 4676 return (rv); 4677 } 4678 4679 /* 4680 * Remove the specified address from the list of address maintained 4681 * in this port node. 4682 */ 4683 mcst_addr_t * 4684 vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr) 4685 { 4686 vsw_t *vswp = NULL; 4687 vsw_port_t *port = NULL; 4688 mcst_addr_t *prev_p = NULL; 4689 mcst_addr_t *curr_p = NULL; 4690 4691 D1(NULL, "%s: enter : devtype %d : addr 0x%llx", 4692 __func__, devtype, addr); 4693 4694 if (devtype == VSW_VNETPORT) { 4695 port = (vsw_port_t *)arg; 4696 mutex_enter(&port->mca_lock); 4697 prev_p = curr_p = port->mcap; 4698 } else { 4699 vswp = (vsw_t *)arg; 4700 mutex_enter(&vswp->mca_lock); 4701 prev_p = curr_p = vswp->mcap; 4702 } 4703 4704 while (curr_p != NULL) { 4705 if (curr_p->addr == addr) { 4706 D2(NULL, "%s: address found", __func__); 4707 /* match found */ 4708 if (prev_p == curr_p) { 4709 /* list head */ 4710 if (devtype == VSW_VNETPORT) 4711 port->mcap = curr_p->nextp; 4712 else 4713 vswp->mcap = curr_p->nextp; 4714 } else { 4715 prev_p->nextp = curr_p->nextp; 4716 } 4717 break; 4718 } else { 4719 prev_p = curr_p; 4720 curr_p = curr_p->nextp; 4721 } 4722 } 4723 4724 if (devtype == VSW_VNETPORT) 4725 mutex_exit(&port->mca_lock); 4726 else 4727 mutex_exit(&vswp->mca_lock); 4728 4729 D1(NULL, "%s: exit", __func__); 4730 4731 return (curr_p); 4732 } 4733 4734 /* 4735 * Creates a descriptor ring (dring) and links it into the 4736 * link of outbound drings for this channel. 4737 * 4738 * Returns NULL if creation failed. 4739 */ 4740 static dring_info_t * 4741 vsw_create_dring(vsw_ldc_t *ldcp) 4742 { 4743 vsw_private_desc_t *priv_addr = NULL; 4744 vsw_t *vswp = ldcp->ldc_vswp; 4745 ldc_mem_info_t minfo; 4746 dring_info_t *dp, *tp; 4747 int i; 4748 4749 dp = (dring_info_t *)kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 4750 4751 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 4752 4753 /* create public section of ring */ 4754 if ((ldc_mem_dring_create(vsw_ntxds, 4755 VSW_PUB_SIZE, &dp->handle)) != 0) { 4756 4757 DERR(vswp, "vsw_create_dring(%lld): ldc dring create " 4758 "failed", ldcp->ldc_id); 4759 goto create_fail_exit; 4760 } 4761 4762 ASSERT(dp->handle != NULL); 4763 4764 /* 4765 * Get the base address of the public section of the ring. 4766 */ 4767 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 4768 DERR(vswp, "vsw_create_dring(%lld): dring info failed\n", 4769 ldcp->ldc_id); 4770 goto dring_fail_exit; 4771 } else { 4772 ASSERT(minfo.vaddr != 0); 4773 dp->pub_addr = minfo.vaddr; 4774 } 4775 4776 dp->num_descriptors = vsw_ntxds; 4777 dp->descriptor_size = VSW_PUB_SIZE; 4778 dp->options = VIO_TX_DRING; 4779 dp->ncookies = 1; /* guaranteed by ldc */ 4780 4781 /* 4782 * create private portion of ring 4783 */ 4784 dp->priv_addr = (vsw_private_desc_t *)kmem_zalloc( 4785 (sizeof (vsw_private_desc_t) * vsw_ntxds), KM_SLEEP); 4786 4787 if (vsw_setup_ring(ldcp, dp)) { 4788 DERR(vswp, "%s: unable to setup ring", __func__); 4789 goto dring_fail_exit; 4790 } 4791 4792 /* haven't used any descriptors yet */ 4793 dp->end_idx = 0; 4794 dp->last_ack_recv = -1; 4795 4796 /* bind dring to the channel */ 4797 if ((ldc_mem_dring_bind(ldcp->ldc_handle, dp->handle, 4798 LDC_SHADOW_MAP, LDC_MEM_RW, 4799 &dp->cookie[0], &dp->ncookies)) != 0) { 4800 DERR(vswp, "vsw_create_dring: unable to bind to channel " 4801 "%lld", ldcp->ldc_id); 4802 goto dring_fail_exit; 4803 } 4804 4805 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 4806 dp->restart_reqd = B_TRUE; 4807 4808 /* 4809 * Only ever create rings for outgoing lane. Link it onto 4810 * end of list. 4811 */ 4812 WRITE_ENTER(&ldcp->lane_out.dlistrw); 4813 if (ldcp->lane_out.dringp == NULL) { 4814 D2(vswp, "vsw_create_dring: adding first outbound ring"); 4815 ldcp->lane_out.dringp = dp; 4816 } else { 4817 tp = ldcp->lane_out.dringp; 4818 while (tp->next != NULL) 4819 tp = tp->next; 4820 4821 tp->next = dp; 4822 } 4823 RW_EXIT(&ldcp->lane_out.dlistrw); 4824 4825 return (dp); 4826 4827 dring_fail_exit: 4828 (void) ldc_mem_dring_destroy(dp->handle); 4829 4830 create_fail_exit: 4831 if (dp->priv_addr != NULL) { 4832 priv_addr = dp->priv_addr; 4833 for (i = 0; i < vsw_ntxds; i++) { 4834 if (priv_addr->memhandle != NULL) 4835 (void) ldc_mem_free_handle( 4836 priv_addr->memhandle); 4837 priv_addr++; 4838 } 4839 kmem_free(dp->priv_addr, 4840 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 4841 } 4842 mutex_destroy(&dp->dlock); 4843 4844 kmem_free(dp, sizeof (dring_info_t)); 4845 return (NULL); 4846 } 4847 4848 /* 4849 * Create a ring consisting of just a private portion and link 4850 * it into the list of rings for the outbound lane. 4851 * 4852 * These type of rings are used primarily for temporary data 4853 * storage (i.e. as data buffers). 4854 */ 4855 void 4856 vsw_create_privring(vsw_ldc_t *ldcp) 4857 { 4858 dring_info_t *dp, *tp; 4859 vsw_t *vswp = ldcp->ldc_vswp; 4860 4861 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4862 4863 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 4864 4865 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 4866 4867 /* no public section */ 4868 dp->pub_addr = NULL; 4869 4870 dp->priv_addr = kmem_zalloc( 4871 (sizeof (vsw_private_desc_t) * vsw_ntxds), KM_SLEEP); 4872 4873 dp->num_descriptors = vsw_ntxds; 4874 4875 if (vsw_setup_ring(ldcp, dp)) { 4876 DERR(vswp, "%s: setup of ring failed", __func__); 4877 kmem_free(dp->priv_addr, 4878 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 4879 mutex_destroy(&dp->dlock); 4880 kmem_free(dp, sizeof (dring_info_t)); 4881 return; 4882 } 4883 4884 /* haven't used any descriptors yet */ 4885 dp->end_idx = 0; 4886 4887 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 4888 dp->restart_reqd = B_TRUE; 4889 4890 /* 4891 * Only ever create rings for outgoing lane. Link it onto 4892 * end of list. 4893 */ 4894 WRITE_ENTER(&ldcp->lane_out.dlistrw); 4895 if (ldcp->lane_out.dringp == NULL) { 4896 D2(vswp, "%s: adding first outbound privring", __func__); 4897 ldcp->lane_out.dringp = dp; 4898 } else { 4899 tp = ldcp->lane_out.dringp; 4900 while (tp->next != NULL) 4901 tp = tp->next; 4902 4903 tp->next = dp; 4904 } 4905 RW_EXIT(&ldcp->lane_out.dlistrw); 4906 4907 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4908 } 4909 4910 /* 4911 * Setup the descriptors in the dring. Returns 0 on success, 1 on 4912 * failure. 4913 */ 4914 int 4915 vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp) 4916 { 4917 vnet_public_desc_t *pub_addr = NULL; 4918 vsw_private_desc_t *priv_addr = NULL; 4919 vsw_t *vswp = ldcp->ldc_vswp; 4920 uint64_t *tmpp; 4921 uint64_t offset = 0; 4922 uint32_t ncookies = 0; 4923 static char *name = "vsw_setup_ring"; 4924 int i, j, nc, rv; 4925 4926 priv_addr = dp->priv_addr; 4927 pub_addr = dp->pub_addr; 4928 4929 /* public section may be null but private should never be */ 4930 ASSERT(priv_addr != NULL); 4931 4932 /* 4933 * Allocate the region of memory which will be used to hold 4934 * the data the descriptors will refer to. 4935 */ 4936 dp->data_sz = (vsw_ntxds * VSW_RING_EL_DATA_SZ); 4937 dp->data_addr = kmem_alloc(dp->data_sz, KM_SLEEP); 4938 4939 D2(vswp, "%s: allocated %lld bytes at 0x%llx\n", name, 4940 dp->data_sz, dp->data_addr); 4941 4942 tmpp = (uint64_t *)dp->data_addr; 4943 offset = VSW_RING_EL_DATA_SZ / sizeof (tmpp); 4944 4945 /* 4946 * Initialise some of the private and public (if they exist) 4947 * descriptor fields. 4948 */ 4949 for (i = 0; i < vsw_ntxds; i++) { 4950 mutex_init(&priv_addr->dstate_lock, NULL, MUTEX_DRIVER, NULL); 4951 4952 if ((ldc_mem_alloc_handle(ldcp->ldc_handle, 4953 &priv_addr->memhandle)) != 0) { 4954 DERR(vswp, "%s: alloc mem handle failed", name); 4955 goto setup_ring_cleanup; 4956 } 4957 4958 priv_addr->datap = (void *)tmpp; 4959 4960 rv = ldc_mem_bind_handle(priv_addr->memhandle, 4961 (caddr_t)priv_addr->datap, VSW_RING_EL_DATA_SZ, 4962 LDC_SHADOW_MAP, LDC_MEM_R|LDC_MEM_W, 4963 &(priv_addr->memcookie[0]), &ncookies); 4964 if (rv != 0) { 4965 DERR(vswp, "%s(%lld): ldc_mem_bind_handle failed " 4966 "(rv %d)", name, ldcp->ldc_id, rv); 4967 goto setup_ring_cleanup; 4968 } 4969 priv_addr->bound = 1; 4970 4971 D2(vswp, "%s: %d: memcookie 0 : addr 0x%llx : size 0x%llx", 4972 name, i, priv_addr->memcookie[0].addr, 4973 priv_addr->memcookie[0].size); 4974 4975 if (ncookies >= (uint32_t)(VSW_MAX_COOKIES + 1)) { 4976 DERR(vswp, "%s(%lld) ldc_mem_bind_handle returned " 4977 "invalid num of cookies (%d) for size 0x%llx", 4978 name, ldcp->ldc_id, ncookies, VSW_RING_EL_DATA_SZ); 4979 4980 goto setup_ring_cleanup; 4981 } else { 4982 for (j = 1; j < ncookies; j++) { 4983 rv = ldc_mem_nextcookie(priv_addr->memhandle, 4984 &(priv_addr->memcookie[j])); 4985 if (rv != 0) { 4986 DERR(vswp, "%s: ldc_mem_nextcookie " 4987 "failed rv (%d)", name, rv); 4988 goto setup_ring_cleanup; 4989 } 4990 D3(vswp, "%s: memcookie %d : addr 0x%llx : " 4991 "size 0x%llx", name, j, 4992 priv_addr->memcookie[j].addr, 4993 priv_addr->memcookie[j].size); 4994 } 4995 4996 } 4997 priv_addr->ncookies = ncookies; 4998 priv_addr->dstate = VIO_DESC_FREE; 4999 5000 if (pub_addr != NULL) { 5001 5002 /* link pub and private sides */ 5003 priv_addr->descp = pub_addr; 5004 5005 pub_addr->ncookies = priv_addr->ncookies; 5006 5007 for (nc = 0; nc < pub_addr->ncookies; nc++) { 5008 bcopy(&priv_addr->memcookie[nc], 5009 &pub_addr->memcookie[nc], 5010 sizeof (ldc_mem_cookie_t)); 5011 } 5012 5013 pub_addr->hdr.dstate = VIO_DESC_FREE; 5014 pub_addr++; 5015 } 5016 5017 /* 5018 * move to next element in the dring and the next 5019 * position in the data buffer. 5020 */ 5021 priv_addr++; 5022 tmpp += offset; 5023 } 5024 5025 return (0); 5026 5027 setup_ring_cleanup: 5028 priv_addr = dp->priv_addr; 5029 5030 for (j = 0; j < i; j++) { 5031 (void) ldc_mem_unbind_handle(priv_addr->memhandle); 5032 (void) ldc_mem_free_handle(priv_addr->memhandle); 5033 5034 mutex_destroy(&priv_addr->dstate_lock); 5035 5036 priv_addr++; 5037 } 5038 kmem_free(dp->data_addr, dp->data_sz); 5039 5040 return (1); 5041 } 5042 5043 /* 5044 * Searches the private section of a ring for a free descriptor, 5045 * starting at the location of the last free descriptor found 5046 * previously. 5047 * 5048 * Returns 0 if free descriptor is available, and updates state 5049 * of private descriptor to VIO_DESC_READY, otherwise returns 1. 5050 * 5051 * FUTURE: might need to return contiguous range of descriptors 5052 * as dring info msg assumes all will be contiguous. 5053 */ 5054 static int 5055 vsw_dring_find_free_desc(dring_info_t *dringp, 5056 vsw_private_desc_t **priv_p, int *idx) 5057 { 5058 vsw_private_desc_t *addr = NULL; 5059 int num = vsw_ntxds; 5060 int ret = 1; 5061 5062 D1(NULL, "%s enter\n", __func__); 5063 5064 ASSERT(dringp->priv_addr != NULL); 5065 5066 D2(NULL, "%s: searching ring, dringp 0x%llx : start pos %lld", 5067 __func__, dringp, dringp->end_idx); 5068 5069 addr = (vsw_private_desc_t *)dringp->priv_addr + dringp->end_idx; 5070 5071 mutex_enter(&addr->dstate_lock); 5072 if (addr->dstate == VIO_DESC_FREE) { 5073 addr->dstate = VIO_DESC_READY; 5074 *priv_p = addr; 5075 *idx = dringp->end_idx; 5076 dringp->end_idx = (dringp->end_idx + 1) % num; 5077 ret = 0; 5078 5079 } 5080 mutex_exit(&addr->dstate_lock); 5081 5082 /* ring full */ 5083 if (ret == 1) { 5084 D2(NULL, "%s: no desp free: started at %d", __func__, 5085 dringp->end_idx); 5086 } 5087 5088 D1(NULL, "%s: exit\n", __func__); 5089 5090 return (ret); 5091 } 5092 5093 /* 5094 * Map from a dring identifier to the ring itself. Returns 5095 * pointer to ring or NULL if no match found. 5096 * 5097 * Should be called with dlistrw rwlock held as reader. 5098 */ 5099 static dring_info_t * 5100 vsw_ident2dring(lane_t *lane, uint64_t ident) 5101 { 5102 dring_info_t *dp = NULL; 5103 5104 if ((dp = lane->dringp) == NULL) { 5105 return (NULL); 5106 } else { 5107 if (dp->ident == ident) 5108 return (dp); 5109 5110 while (dp != NULL) { 5111 if (dp->ident == ident) 5112 break; 5113 dp = dp->next; 5114 } 5115 } 5116 5117 return (dp); 5118 } 5119 5120 /* 5121 * Set the default lane attributes. These are copied into 5122 * the attr msg we send to our peer. If they are not acceptable 5123 * then (currently) the handshake ends. 5124 */ 5125 static void 5126 vsw_set_lane_attr(vsw_t *vswp, lane_t *lp) 5127 { 5128 bzero(lp, sizeof (lane_t)); 5129 5130 READ_ENTER(&vswp->if_lockrw); 5131 ether_copy(&(vswp->if_addr), &(lp->addr)); 5132 RW_EXIT(&vswp->if_lockrw); 5133 5134 lp->mtu = VSW_MTU; 5135 lp->addr_type = ADDR_TYPE_MAC; 5136 lp->xfer_mode = VIO_DRING_MODE_V1_0; 5137 lp->ack_freq = 0; /* for shared mode */ 5138 lp->seq_num = VNET_ISS; 5139 } 5140 5141 /* 5142 * Verify that the attributes are acceptable. 5143 * 5144 * FUTURE: If some attributes are not acceptable, change them 5145 * our desired values. 5146 */ 5147 static int 5148 vsw_check_attr(vnet_attr_msg_t *pkt, vsw_ldc_t *ldcp) 5149 { 5150 int ret = 0; 5151 struct ether_addr ea; 5152 vsw_port_t *port = ldcp->ldc_port; 5153 lane_t *lp = &ldcp->lane_out; 5154 5155 5156 D1(NULL, "vsw_check_attr enter\n"); 5157 5158 if ((pkt->xfer_mode != VIO_DESC_MODE) && 5159 (pkt->xfer_mode != lp->xfer_mode)) { 5160 D2(NULL, "vsw_check_attr: unknown mode %x\n", pkt->xfer_mode); 5161 ret = 1; 5162 } 5163 5164 /* Only support MAC addresses at moment. */ 5165 if ((pkt->addr_type != ADDR_TYPE_MAC) || (pkt->addr == 0)) { 5166 D2(NULL, "vsw_check_attr: invalid addr_type %x, " 5167 "or address 0x%llx\n", pkt->addr_type, pkt->addr); 5168 ret = 1; 5169 } 5170 5171 /* 5172 * MAC address supplied by device should match that stored 5173 * in the vsw-port OBP node. Need to decide what to do if they 5174 * don't match, for the moment just warn but don't fail. 5175 */ 5176 vnet_macaddr_ultostr(pkt->addr, ea.ether_addr_octet); 5177 if (ether_cmp(&ea, &port->p_macaddr) != 0) { 5178 DERR(NULL, "vsw_check_attr: device supplied address " 5179 "0x%llx doesn't match node address 0x%llx\n", 5180 pkt->addr, port->p_macaddr); 5181 } 5182 5183 /* 5184 * Ack freq only makes sense in pkt mode, in shared 5185 * mode the ring descriptors say whether or not to 5186 * send back an ACK. 5187 */ 5188 if ((VSW_VER_EQ(ldcp, 1, 2) && 5189 (pkt->xfer_mode & VIO_DRING_MODE_V1_2)) || 5190 (VSW_VER_LT(ldcp, 1, 2) && 5191 (pkt->xfer_mode == VIO_DRING_MODE_V1_0))) { 5192 if (pkt->ack_freq > 0) { 5193 D2(NULL, "vsw_check_attr: non zero ack freq " 5194 " in SHM mode\n"); 5195 ret = 1; 5196 } 5197 } 5198 5199 /* 5200 * Note: for the moment we only support ETHER 5201 * frames. This may change in the future. 5202 */ 5203 if ((pkt->mtu > VSW_MTU) || (pkt->mtu <= 0)) { 5204 D2(NULL, "vsw_check_attr: invalid MTU (0x%llx)\n", 5205 pkt->mtu); 5206 ret = 1; 5207 } 5208 5209 D1(NULL, "vsw_check_attr exit\n"); 5210 5211 return (ret); 5212 } 5213 5214 /* 5215 * Returns 1 if there is a problem, 0 otherwise. 5216 */ 5217 static int 5218 vsw_check_dring_info(vio_dring_reg_msg_t *pkt) 5219 { 5220 _NOTE(ARGUNUSED(pkt)) 5221 5222 int ret = 0; 5223 5224 D1(NULL, "vsw_check_dring_info enter\n"); 5225 5226 if ((pkt->num_descriptors == 0) || 5227 (pkt->descriptor_size == 0) || 5228 (pkt->ncookies != 1)) { 5229 DERR(NULL, "vsw_check_dring_info: invalid dring msg"); 5230 ret = 1; 5231 } 5232 5233 D1(NULL, "vsw_check_dring_info exit\n"); 5234 5235 return (ret); 5236 } 5237 5238 /* 5239 * Returns 1 if two memory cookies match. Otherwise returns 0. 5240 */ 5241 static int 5242 vsw_mem_cookie_match(ldc_mem_cookie_t *m1, ldc_mem_cookie_t *m2) 5243 { 5244 if ((m1->addr != m2->addr) || 5245 (m2->size != m2->size)) { 5246 return (0); 5247 } else { 5248 return (1); 5249 } 5250 } 5251 5252 /* 5253 * Returns 1 if ring described in reg message matches that 5254 * described by dring_info structure. Otherwise returns 0. 5255 */ 5256 static int 5257 vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg) 5258 { 5259 if ((msg->descriptor_size != dp->descriptor_size) || 5260 (msg->num_descriptors != dp->num_descriptors) || 5261 (msg->ncookies != dp->ncookies) || 5262 !(vsw_mem_cookie_match(&msg->cookie[0], &dp->cookie[0]))) { 5263 return (0); 5264 } else { 5265 return (1); 5266 } 5267 5268 } 5269 5270 static caddr_t 5271 vsw_print_ethaddr(uint8_t *a, char *ebuf) 5272 { 5273 (void) sprintf(ebuf, "%x:%x:%x:%x:%x:%x", 5274 a[0], a[1], a[2], a[3], a[4], a[5]); 5275 return (ebuf); 5276 } 5277 5278 /* 5279 * Reset and free all the resources associated with 5280 * the channel. 5281 */ 5282 static void 5283 vsw_free_lane_resources(vsw_ldc_t *ldcp, uint64_t dir) 5284 { 5285 dring_info_t *dp, *dpp; 5286 lane_t *lp = NULL; 5287 int rv = 0; 5288 5289 ASSERT(ldcp != NULL); 5290 5291 D1(ldcp->ldc_vswp, "%s (%lld): enter", __func__, ldcp->ldc_id); 5292 5293 if (dir == INBOUND) { 5294 D2(ldcp->ldc_vswp, "%s: freeing INBOUND lane" 5295 " of channel %lld", __func__, ldcp->ldc_id); 5296 lp = &ldcp->lane_in; 5297 } else { 5298 D2(ldcp->ldc_vswp, "%s: freeing OUTBOUND lane" 5299 " of channel %lld", __func__, ldcp->ldc_id); 5300 lp = &ldcp->lane_out; 5301 } 5302 5303 lp->lstate = VSW_LANE_INACTIV; 5304 lp->seq_num = VNET_ISS; 5305 5306 if (lp->dringp) { 5307 if (dir == INBOUND) { 5308 WRITE_ENTER(&lp->dlistrw); 5309 dp = lp->dringp; 5310 while (dp != NULL) { 5311 dpp = dp->next; 5312 if (dp->handle != NULL) 5313 (void) ldc_mem_dring_unmap(dp->handle); 5314 kmem_free(dp, sizeof (dring_info_t)); 5315 dp = dpp; 5316 } 5317 RW_EXIT(&lp->dlistrw); 5318 } else { 5319 /* 5320 * unbind, destroy exported dring, free dring struct 5321 */ 5322 WRITE_ENTER(&lp->dlistrw); 5323 dp = lp->dringp; 5324 rv = vsw_free_ring(dp); 5325 RW_EXIT(&lp->dlistrw); 5326 } 5327 if (rv == 0) { 5328 lp->dringp = NULL; 5329 } 5330 } 5331 5332 D1(ldcp->ldc_vswp, "%s (%lld): exit", __func__, ldcp->ldc_id); 5333 } 5334 5335 /* 5336 * Free ring and all associated resources. 5337 * 5338 * Should be called with dlistrw rwlock held as writer. 5339 */ 5340 static int 5341 vsw_free_ring(dring_info_t *dp) 5342 { 5343 vsw_private_desc_t *paddr = NULL; 5344 dring_info_t *dpp; 5345 int i, rv = 1; 5346 5347 while (dp != NULL) { 5348 mutex_enter(&dp->dlock); 5349 dpp = dp->next; 5350 if (dp->priv_addr != NULL) { 5351 /* 5352 * First unbind and free the memory handles 5353 * stored in each descriptor within the ring. 5354 */ 5355 for (i = 0; i < vsw_ntxds; i++) { 5356 paddr = (vsw_private_desc_t *) 5357 dp->priv_addr + i; 5358 if (paddr->memhandle != NULL) { 5359 if (paddr->bound == 1) { 5360 rv = ldc_mem_unbind_handle( 5361 paddr->memhandle); 5362 5363 if (rv != 0) { 5364 DERR(NULL, "error " 5365 "unbinding handle for " 5366 "ring 0x%llx at pos %d", 5367 dp, i); 5368 mutex_exit(&dp->dlock); 5369 return (rv); 5370 } 5371 paddr->bound = 0; 5372 } 5373 5374 rv = ldc_mem_free_handle( 5375 paddr->memhandle); 5376 if (rv != 0) { 5377 DERR(NULL, "error freeing " 5378 "handle for ring 0x%llx " 5379 "at pos %d", dp, i); 5380 mutex_exit(&dp->dlock); 5381 return (rv); 5382 } 5383 paddr->memhandle = NULL; 5384 } 5385 mutex_destroy(&paddr->dstate_lock); 5386 } 5387 kmem_free(dp->priv_addr, 5388 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 5389 } 5390 5391 /* 5392 * Now unbind and destroy the ring itself. 5393 */ 5394 if (dp->handle != NULL) { 5395 (void) ldc_mem_dring_unbind(dp->handle); 5396 (void) ldc_mem_dring_destroy(dp->handle); 5397 } 5398 5399 if (dp->data_addr != NULL) { 5400 kmem_free(dp->data_addr, dp->data_sz); 5401 } 5402 5403 mutex_exit(&dp->dlock); 5404 mutex_destroy(&dp->dlock); 5405 mutex_destroy(&dp->restart_lock); 5406 kmem_free(dp, sizeof (dring_info_t)); 5407 5408 dp = dpp; 5409 } 5410 return (0); 5411 } 5412 5413 /* 5414 * vsw_ldc_rx_worker -- A per LDC worker thread to receive data. 5415 * This thread is woken up by the LDC interrupt handler to process 5416 * LDC packets and receive data. 5417 */ 5418 static void 5419 vsw_ldc_rx_worker(void *arg) 5420 { 5421 callb_cpr_t cprinfo; 5422 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 5423 vsw_t *vswp = ldcp->ldc_vswp; 5424 5425 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5426 CALLB_CPR_INIT(&cprinfo, &ldcp->rx_thr_lock, callb_generic_cpr, 5427 "vsw_rx_thread"); 5428 mutex_enter(&ldcp->rx_thr_lock); 5429 ldcp->rx_thr_flags |= VSW_WTHR_RUNNING; 5430 while (!(ldcp->rx_thr_flags & VSW_WTHR_STOP)) { 5431 5432 CALLB_CPR_SAFE_BEGIN(&cprinfo); 5433 /* 5434 * Wait until the data is received or a stop 5435 * request is received. 5436 */ 5437 while (!(ldcp->rx_thr_flags & 5438 (VSW_WTHR_DATARCVD | VSW_WTHR_STOP))) { 5439 cv_wait(&ldcp->rx_thr_cv, &ldcp->rx_thr_lock); 5440 } 5441 CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rx_thr_lock) 5442 5443 /* 5444 * First process the stop request. 5445 */ 5446 if (ldcp->rx_thr_flags & VSW_WTHR_STOP) { 5447 D2(vswp, "%s(%lld):Rx thread stopped\n", 5448 __func__, ldcp->ldc_id); 5449 break; 5450 } 5451 ldcp->rx_thr_flags &= ~VSW_WTHR_DATARCVD; 5452 mutex_exit(&ldcp->rx_thr_lock); 5453 D1(vswp, "%s(%lld):calling vsw_process_pkt\n", 5454 __func__, ldcp->ldc_id); 5455 mutex_enter(&ldcp->ldc_cblock); 5456 vsw_process_pkt(ldcp); 5457 mutex_exit(&ldcp->ldc_cblock); 5458 mutex_enter(&ldcp->rx_thr_lock); 5459 } 5460 5461 /* 5462 * Update the run status and wakeup the thread that 5463 * has sent the stop request. 5464 */ 5465 ldcp->rx_thr_flags &= ~VSW_WTHR_RUNNING; 5466 cv_signal(&ldcp->rx_thr_cv); 5467 CALLB_CPR_EXIT(&cprinfo); 5468 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5469 thread_exit(); 5470 } 5471 5472 /* vsw_stop_rx_thread -- Co-ordinate with receive thread to stop it */ 5473 static void 5474 vsw_stop_rx_thread(vsw_ldc_t *ldcp) 5475 { 5476 vsw_t *vswp = ldcp->ldc_vswp; 5477 5478 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5479 /* 5480 * Send a stop request by setting the stop flag and 5481 * wait until the receive thread stops. 5482 */ 5483 mutex_enter(&ldcp->rx_thr_lock); 5484 if (ldcp->rx_thr_flags & VSW_WTHR_RUNNING) { 5485 ldcp->rx_thr_flags |= VSW_WTHR_STOP; 5486 cv_signal(&ldcp->rx_thr_cv); 5487 while (ldcp->rx_thr_flags & VSW_WTHR_RUNNING) { 5488 cv_wait(&ldcp->rx_thr_cv, &ldcp->rx_thr_lock); 5489 } 5490 } 5491 mutex_exit(&ldcp->rx_thr_lock); 5492 ldcp->rx_thread = NULL; 5493 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5494 } 5495 5496 /* 5497 * vsw_ldc_tx_worker -- A per LDC worker thread to transmit data. 5498 * This thread is woken up by the vsw_portsend to transmit 5499 * packets. 5500 */ 5501 static void 5502 vsw_ldc_tx_worker(void *arg) 5503 { 5504 callb_cpr_t cprinfo; 5505 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 5506 vsw_t *vswp = ldcp->ldc_vswp; 5507 mblk_t *mp; 5508 mblk_t *tmp; 5509 5510 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5511 CALLB_CPR_INIT(&cprinfo, &ldcp->tx_thr_lock, callb_generic_cpr, 5512 "vnet_tx_thread"); 5513 mutex_enter(&ldcp->tx_thr_lock); 5514 ldcp->tx_thr_flags |= VSW_WTHR_RUNNING; 5515 while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP)) { 5516 5517 CALLB_CPR_SAFE_BEGIN(&cprinfo); 5518 /* 5519 * Wait until the data is received or a stop 5520 * request is received. 5521 */ 5522 while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP) && 5523 (ldcp->tx_mhead == NULL)) { 5524 cv_wait(&ldcp->tx_thr_cv, &ldcp->tx_thr_lock); 5525 } 5526 CALLB_CPR_SAFE_END(&cprinfo, &ldcp->tx_thr_lock) 5527 5528 /* 5529 * First process the stop request. 5530 */ 5531 if (ldcp->tx_thr_flags & VSW_WTHR_STOP) { 5532 D2(vswp, "%s(%lld):tx thread stopped\n", 5533 __func__, ldcp->ldc_id); 5534 break; 5535 } 5536 mp = ldcp->tx_mhead; 5537 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 5538 ldcp->tx_cnt = 0; 5539 mutex_exit(&ldcp->tx_thr_lock); 5540 D2(vswp, "%s(%lld):calling vsw_ldcsend\n", 5541 __func__, ldcp->ldc_id); 5542 while (mp != NULL) { 5543 tmp = mp->b_next; 5544 mp->b_next = mp->b_prev = NULL; 5545 (void) vsw_ldcsend(ldcp, mp, vsw_ldc_tx_retries); 5546 mp = tmp; 5547 } 5548 mutex_enter(&ldcp->tx_thr_lock); 5549 } 5550 5551 /* 5552 * Update the run status and wakeup the thread that 5553 * has sent the stop request. 5554 */ 5555 ldcp->tx_thr_flags &= ~VSW_WTHR_RUNNING; 5556 cv_signal(&ldcp->tx_thr_cv); 5557 CALLB_CPR_EXIT(&cprinfo); 5558 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5559 thread_exit(); 5560 } 5561 5562 /* vsw_stop_tx_thread -- Co-ordinate with receive thread to stop it */ 5563 static void 5564 vsw_stop_tx_thread(vsw_ldc_t *ldcp) 5565 { 5566 vsw_t *vswp = ldcp->ldc_vswp; 5567 5568 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5569 /* 5570 * Send a stop request by setting the stop flag and 5571 * wait until the receive thread stops. 5572 */ 5573 mutex_enter(&ldcp->tx_thr_lock); 5574 if (ldcp->tx_thr_flags & VSW_WTHR_RUNNING) { 5575 ldcp->tx_thr_flags |= VSW_WTHR_STOP; 5576 cv_signal(&ldcp->tx_thr_cv); 5577 while (ldcp->tx_thr_flags & VSW_WTHR_RUNNING) { 5578 cv_wait(&ldcp->tx_thr_cv, &ldcp->tx_thr_lock); 5579 } 5580 } 5581 mutex_exit(&ldcp->tx_thr_lock); 5582 ldcp->tx_thread = NULL; 5583 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5584 } 5585 5586 /* vsw_reclaim_dring -- reclaim descriptors */ 5587 static int 5588 vsw_reclaim_dring(dring_info_t *dp, int start) 5589 { 5590 int i, j, len; 5591 vsw_private_desc_t *priv_addr; 5592 vnet_public_desc_t *pub_addr; 5593 5594 pub_addr = (vnet_public_desc_t *)dp->pub_addr; 5595 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 5596 len = dp->num_descriptors; 5597 5598 D2(NULL, "%s: start index %ld\n", __func__, start); 5599 5600 j = 0; 5601 for (i = start; j < len; i = (i + 1) % len, j++) { 5602 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 5603 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 5604 5605 mutex_enter(&priv_addr->dstate_lock); 5606 if (pub_addr->hdr.dstate != VIO_DESC_DONE) { 5607 mutex_exit(&priv_addr->dstate_lock); 5608 break; 5609 } 5610 pub_addr->hdr.dstate = VIO_DESC_FREE; 5611 priv_addr->dstate = VIO_DESC_FREE; 5612 /* clear all the fields */ 5613 priv_addr->datalen = 0; 5614 pub_addr->hdr.ack = 0; 5615 mutex_exit(&priv_addr->dstate_lock); 5616 5617 D3(NULL, "claiming descp:%d pub state:0x%llx priv state 0x%llx", 5618 i, pub_addr->hdr.dstate, priv_addr->dstate); 5619 } 5620 return (j); 5621 } 5622 5623 /* 5624 * Debugging routines 5625 */ 5626 static void 5627 display_state(void) 5628 { 5629 vsw_t *vswp; 5630 vsw_port_list_t *plist; 5631 vsw_port_t *port; 5632 vsw_ldc_list_t *ldcl; 5633 vsw_ldc_t *ldcp; 5634 extern vsw_t *vsw_head; 5635 5636 cmn_err(CE_NOTE, "***** system state *****"); 5637 5638 for (vswp = vsw_head; vswp; vswp = vswp->next) { 5639 plist = &vswp->plist; 5640 READ_ENTER(&plist->lockrw); 5641 cmn_err(CE_CONT, "vsw instance %d has %d ports attached\n", 5642 vswp->instance, plist->num_ports); 5643 5644 for (port = plist->head; port != NULL; port = port->p_next) { 5645 ldcl = &port->p_ldclist; 5646 cmn_err(CE_CONT, "port %d : %d ldcs attached\n", 5647 port->p_instance, ldcl->num_ldcs); 5648 READ_ENTER(&ldcl->lockrw); 5649 ldcp = ldcl->head; 5650 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 5651 cmn_err(CE_CONT, "chan %lu : dev %d : " 5652 "status %d : phase %u\n", 5653 ldcp->ldc_id, ldcp->dev_class, 5654 ldcp->ldc_status, ldcp->hphase); 5655 cmn_err(CE_CONT, "chan %lu : lsession %lu : " 5656 "psession %lu\n", ldcp->ldc_id, 5657 ldcp->local_session, ldcp->peer_session); 5658 5659 cmn_err(CE_CONT, "Inbound lane:\n"); 5660 display_lane(&ldcp->lane_in); 5661 cmn_err(CE_CONT, "Outbound lane:\n"); 5662 display_lane(&ldcp->lane_out); 5663 } 5664 RW_EXIT(&ldcl->lockrw); 5665 } 5666 RW_EXIT(&plist->lockrw); 5667 } 5668 cmn_err(CE_NOTE, "***** system state *****"); 5669 } 5670 5671 static void 5672 display_lane(lane_t *lp) 5673 { 5674 dring_info_t *drp; 5675 5676 cmn_err(CE_CONT, "ver 0x%x:0x%x : state %lx : mtu 0x%lx\n", 5677 lp->ver_major, lp->ver_minor, lp->lstate, lp->mtu); 5678 cmn_err(CE_CONT, "addr_type %d : addr 0x%lx : xmode %d\n", 5679 lp->addr_type, lp->addr, lp->xfer_mode); 5680 cmn_err(CE_CONT, "dringp 0x%lx\n", (uint64_t)lp->dringp); 5681 5682 cmn_err(CE_CONT, "Dring info:\n"); 5683 for (drp = lp->dringp; drp != NULL; drp = drp->next) { 5684 cmn_err(CE_CONT, "\tnum_desc %u : dsize %u\n", 5685 drp->num_descriptors, drp->descriptor_size); 5686 cmn_err(CE_CONT, "\thandle 0x%lx\n", drp->handle); 5687 cmn_err(CE_CONT, "\tpub_addr 0x%lx : priv_addr 0x%lx\n", 5688 (uint64_t)drp->pub_addr, (uint64_t)drp->priv_addr); 5689 cmn_err(CE_CONT, "\tident 0x%lx : end_idx %lu\n", 5690 drp->ident, drp->end_idx); 5691 display_ring(drp); 5692 } 5693 } 5694 5695 static void 5696 display_ring(dring_info_t *dringp) 5697 { 5698 uint64_t i; 5699 uint64_t priv_count = 0; 5700 uint64_t pub_count = 0; 5701 vnet_public_desc_t *pub_addr = NULL; 5702 vsw_private_desc_t *priv_addr = NULL; 5703 5704 for (i = 0; i < vsw_ntxds; i++) { 5705 if (dringp->pub_addr != NULL) { 5706 pub_addr = (vnet_public_desc_t *)dringp->pub_addr + i; 5707 5708 if (pub_addr->hdr.dstate == VIO_DESC_FREE) 5709 pub_count++; 5710 } 5711 5712 if (dringp->priv_addr != NULL) { 5713 priv_addr = (vsw_private_desc_t *)dringp->priv_addr + i; 5714 5715 if (priv_addr->dstate == VIO_DESC_FREE) 5716 priv_count++; 5717 } 5718 } 5719 cmn_err(CE_CONT, "\t%lu elements: %lu priv free: %lu pub free\n", 5720 i, priv_count, pub_count); 5721 } 5722 5723 static void 5724 dump_flags(uint64_t state) 5725 { 5726 int i; 5727 5728 typedef struct flag_name { 5729 int flag_val; 5730 char *flag_name; 5731 } flag_name_t; 5732 5733 flag_name_t flags[] = { 5734 VSW_VER_INFO_SENT, "VSW_VER_INFO_SENT", 5735 VSW_VER_INFO_RECV, "VSW_VER_INFO_RECV", 5736 VSW_VER_ACK_RECV, "VSW_VER_ACK_RECV", 5737 VSW_VER_ACK_SENT, "VSW_VER_ACK_SENT", 5738 VSW_VER_NACK_RECV, "VSW_VER_NACK_RECV", 5739 VSW_VER_NACK_SENT, "VSW_VER_NACK_SENT", 5740 VSW_ATTR_INFO_SENT, "VSW_ATTR_INFO_SENT", 5741 VSW_ATTR_INFO_RECV, "VSW_ATTR_INFO_RECV", 5742 VSW_ATTR_ACK_SENT, "VSW_ATTR_ACK_SENT", 5743 VSW_ATTR_ACK_RECV, "VSW_ATTR_ACK_RECV", 5744 VSW_ATTR_NACK_SENT, "VSW_ATTR_NACK_SENT", 5745 VSW_ATTR_NACK_RECV, "VSW_ATTR_NACK_RECV", 5746 VSW_DRING_INFO_SENT, "VSW_DRING_INFO_SENT", 5747 VSW_DRING_INFO_RECV, "VSW_DRING_INFO_RECV", 5748 VSW_DRING_ACK_SENT, "VSW_DRING_ACK_SENT", 5749 VSW_DRING_ACK_RECV, "VSW_DRING_ACK_RECV", 5750 VSW_DRING_NACK_SENT, "VSW_DRING_NACK_SENT", 5751 VSW_DRING_NACK_RECV, "VSW_DRING_NACK_RECV", 5752 VSW_RDX_INFO_SENT, "VSW_RDX_INFO_SENT", 5753 VSW_RDX_INFO_RECV, "VSW_RDX_INFO_RECV", 5754 VSW_RDX_ACK_SENT, "VSW_RDX_ACK_SENT", 5755 VSW_RDX_ACK_RECV, "VSW_RDX_ACK_RECV", 5756 VSW_RDX_NACK_SENT, "VSW_RDX_NACK_SENT", 5757 VSW_RDX_NACK_RECV, "VSW_RDX_NACK_RECV", 5758 VSW_MCST_INFO_SENT, "VSW_MCST_INFO_SENT", 5759 VSW_MCST_INFO_RECV, "VSW_MCST_INFO_RECV", 5760 VSW_MCST_ACK_SENT, "VSW_MCST_ACK_SENT", 5761 VSW_MCST_ACK_RECV, "VSW_MCST_ACK_RECV", 5762 VSW_MCST_NACK_SENT, "VSW_MCST_NACK_SENT", 5763 VSW_MCST_NACK_RECV, "VSW_MCST_NACK_RECV", 5764 VSW_LANE_ACTIVE, "VSW_LANE_ACTIVE"}; 5765 5766 DERR(NULL, "DUMP_FLAGS: %llx\n", state); 5767 for (i = 0; i < sizeof (flags)/sizeof (flag_name_t); i++) { 5768 if (state & flags[i].flag_val) 5769 DERR(NULL, "DUMP_FLAGS %s", flags[i].flag_name); 5770 } 5771 } 5772