1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/errno.h> 31 #include <sys/debug.h> 32 #include <sys/time.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/user.h> 36 #include <sys/stropts.h> 37 #include <sys/stream.h> 38 #include <sys/strlog.h> 39 #include <sys/strsubr.h> 40 #include <sys/cmn_err.h> 41 #include <sys/cpu.h> 42 #include <sys/kmem.h> 43 #include <sys/conf.h> 44 #include <sys/ddi.h> 45 #include <sys/sunddi.h> 46 #include <sys/ksynch.h> 47 #include <sys/stat.h> 48 #include <sys/kstat.h> 49 #include <sys/vtrace.h> 50 #include <sys/strsun.h> 51 #include <sys/dlpi.h> 52 #include <sys/ethernet.h> 53 #include <net/if.h> 54 #include <sys/varargs.h> 55 #include <sys/machsystm.h> 56 #include <sys/modctl.h> 57 #include <sys/modhash.h> 58 #include <sys/mac.h> 59 #include <sys/mac_ether.h> 60 #include <sys/taskq.h> 61 #include <sys/note.h> 62 #include <sys/mach_descrip.h> 63 #include <sys/mac.h> 64 #include <sys/mdeg.h> 65 #include <sys/ldc.h> 66 #include <sys/vsw_fdb.h> 67 #include <sys/vsw.h> 68 #include <sys/vio_mailbox.h> 69 #include <sys/vnet_mailbox.h> 70 #include <sys/vnet_common.h> 71 #include <sys/vio_util.h> 72 #include <sys/sdt.h> 73 #include <sys/atomic.h> 74 #include <sys/callb.h> 75 #include <sys/vlan.h> 76 77 /* Port add/deletion/etc routines */ 78 static int vsw_port_delete(vsw_port_t *port); 79 static int vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id); 80 static int vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id); 81 static int vsw_init_ldcs(vsw_port_t *port); 82 static int vsw_uninit_ldcs(vsw_port_t *port); 83 static int vsw_ldc_init(vsw_ldc_t *ldcp); 84 static int vsw_ldc_uninit(vsw_ldc_t *ldcp); 85 static int vsw_drain_ldcs(vsw_port_t *port); 86 static int vsw_drain_port_taskq(vsw_port_t *port); 87 static void vsw_marker_task(void *); 88 static int vsw_plist_del_node(vsw_t *, vsw_port_t *port); 89 int vsw_detach_ports(vsw_t *vswp); 90 int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 91 mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr); 92 int vsw_port_detach(vsw_t *vswp, int p_instance); 93 int vsw_portsend(vsw_port_t *port, mblk_t *mp, mblk_t *mpt, uint32_t count); 94 int vsw_port_attach(vsw_port_t *portp); 95 vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 96 void vsw_vlan_unaware_port_reset(vsw_port_t *portp); 97 int vsw_send_msg(vsw_ldc_t *, void *, int, boolean_t); 98 void vsw_hio_port_reset(vsw_port_t *portp); 99 100 /* Interrupt routines */ 101 static uint_t vsw_ldc_cb(uint64_t cb, caddr_t arg); 102 103 /* Handshake routines */ 104 static void vsw_ldc_reinit(vsw_ldc_t *); 105 static void vsw_process_conn_evt(vsw_ldc_t *, uint16_t); 106 static void vsw_conn_task(void *); 107 static int vsw_check_flag(vsw_ldc_t *, int, uint64_t); 108 static void vsw_next_milestone(vsw_ldc_t *); 109 static int vsw_supported_version(vio_ver_msg_t *); 110 static void vsw_set_vnet_proto_ops(vsw_ldc_t *ldcp); 111 static void vsw_reset_vnet_proto_ops(vsw_ldc_t *ldcp); 112 113 /* Data processing routines */ 114 static void vsw_process_pkt(void *); 115 static void vsw_dispatch_ctrl_task(vsw_ldc_t *, void *, vio_msg_tag_t *); 116 static void vsw_process_ctrl_pkt(void *); 117 static void vsw_process_ctrl_ver_pkt(vsw_ldc_t *, void *); 118 static void vsw_process_ctrl_attr_pkt(vsw_ldc_t *, void *); 119 static void vsw_process_ctrl_mcst_pkt(vsw_ldc_t *, void *); 120 static void vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *, void *); 121 static void vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *, void *); 122 static void vsw_process_ctrl_rdx_pkt(vsw_ldc_t *, void *); 123 static void vsw_process_data_pkt(vsw_ldc_t *, void *, vio_msg_tag_t *, 124 uint32_t); 125 static void vsw_process_data_dring_pkt(vsw_ldc_t *, void *); 126 static void vsw_process_pkt_data_nop(void *, void *, uint32_t); 127 static void vsw_process_pkt_data(void *, void *, uint32_t); 128 static void vsw_process_data_ibnd_pkt(vsw_ldc_t *, void *); 129 static void vsw_process_err_pkt(vsw_ldc_t *, void *, vio_msg_tag_t *); 130 131 /* Switching/data transmit routines */ 132 static int vsw_dringsend(vsw_ldc_t *, mblk_t *); 133 static int vsw_descrsend(vsw_ldc_t *, mblk_t *); 134 static void vsw_ldcsend_pkt(vsw_ldc_t *ldcp, mblk_t *mp); 135 static int vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, uint32_t retries); 136 static int vsw_ldctx_pri(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count); 137 static int vsw_ldctx(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count); 138 139 /* Packet creation routines */ 140 static void vsw_send_ver(void *); 141 static void vsw_send_attr(vsw_ldc_t *); 142 static vio_dring_reg_msg_t *vsw_create_dring_info_pkt(vsw_ldc_t *); 143 static void vsw_send_dring_info(vsw_ldc_t *); 144 static void vsw_send_rdx(vsw_ldc_t *); 145 146 /* Dring routines */ 147 static dring_info_t *vsw_create_dring(vsw_ldc_t *); 148 static void vsw_create_privring(vsw_ldc_t *); 149 static int vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp); 150 static int vsw_dring_find_free_desc(dring_info_t *, vsw_private_desc_t **, 151 int *); 152 static dring_info_t *vsw_ident2dring(lane_t *, uint64_t); 153 static int vsw_reclaim_dring(dring_info_t *dp, int start); 154 155 static void vsw_set_lane_attr(vsw_t *, lane_t *); 156 static int vsw_check_attr(vnet_attr_msg_t *, vsw_ldc_t *); 157 static int vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg); 158 static int vsw_mem_cookie_match(ldc_mem_cookie_t *, ldc_mem_cookie_t *); 159 static int vsw_check_dring_info(vio_dring_reg_msg_t *); 160 161 /* Rcv/Tx thread routines */ 162 static void vsw_stop_tx_thread(vsw_ldc_t *ldcp); 163 static void vsw_ldc_tx_worker(void *arg); 164 static void vsw_stop_rx_thread(vsw_ldc_t *ldcp); 165 static void vsw_ldc_rx_worker(void *arg); 166 167 /* Misc support routines */ 168 static caddr_t vsw_print_ethaddr(uint8_t *addr, char *ebuf); 169 static void vsw_free_lane_resources(vsw_ldc_t *, uint64_t); 170 static int vsw_free_ring(dring_info_t *); 171 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr); 172 static int vsw_get_same_dest_list(struct ether_header *ehp, 173 mblk_t **rhead, mblk_t **rtail, mblk_t **mpp); 174 static mblk_t *vsw_dupmsgchain(mblk_t *mp); 175 176 /* Debugging routines */ 177 static void dump_flags(uint64_t); 178 static void display_state(void); 179 static void display_lane(lane_t *); 180 static void display_ring(dring_info_t *); 181 182 /* 183 * Functions imported from other files. 184 */ 185 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int); 186 extern int vsw_unset_hw(vsw_t *, vsw_port_t *, int); 187 extern void vsw_reconfig_hw(vsw_t *); 188 extern int vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port); 189 extern void vsw_del_mcst_port(vsw_port_t *port); 190 extern int vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg); 191 extern int vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg); 192 extern void vsw_fdbe_add(vsw_t *vswp, void *port); 193 extern void vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr); 194 extern void vsw_create_vlans(void *arg, int type); 195 extern void vsw_destroy_vlans(void *arg, int type); 196 extern void vsw_vlan_add_ids(void *arg, int type); 197 extern void vsw_vlan_remove_ids(void *arg, int type); 198 extern boolean_t vsw_frame_lookup_vid(void *arg, int caller, 199 struct ether_header *ehp, uint16_t *vidp); 200 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp); 201 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, 202 mblk_t **npt); 203 extern boolean_t vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid); 204 extern void vsw_hio_start(vsw_t *vswp, vsw_ldc_t *ldcp); 205 extern void vsw_hio_stop(vsw_t *vswp, vsw_ldc_t *ldcp); 206 extern void vsw_process_dds_msg(vsw_t *vswp, vsw_ldc_t *ldcp, void *msg); 207 extern void vsw_hio_stop_port(vsw_port_t *portp); 208 209 #define VSW_NUM_VMPOOLS 3 /* number of vio mblk pools */ 210 211 /* 212 * Tunables used in this file. 213 */ 214 extern int vsw_num_handshakes; 215 extern int vsw_wretries; 216 extern int vsw_desc_delay; 217 extern int vsw_read_attempts; 218 extern int vsw_ldc_tx_delay; 219 extern int vsw_ldc_tx_retries; 220 extern boolean_t vsw_ldc_rxthr_enabled; 221 extern boolean_t vsw_ldc_txthr_enabled; 222 extern uint32_t vsw_ntxds; 223 extern uint32_t vsw_max_tx_qcount; 224 extern uint32_t vsw_chain_len; 225 extern uint32_t vsw_mblk_size1; 226 extern uint32_t vsw_mblk_size2; 227 extern uint32_t vsw_mblk_size3; 228 extern uint32_t vsw_num_mblks1; 229 extern uint32_t vsw_num_mblks2; 230 extern uint32_t vsw_num_mblks3; 231 extern boolean_t vsw_obp_ver_proto_workaround; 232 233 #define LDC_ENTER_LOCK(ldcp) \ 234 mutex_enter(&((ldcp)->ldc_cblock));\ 235 mutex_enter(&((ldcp)->ldc_rxlock));\ 236 mutex_enter(&((ldcp)->ldc_txlock)); 237 #define LDC_EXIT_LOCK(ldcp) \ 238 mutex_exit(&((ldcp)->ldc_txlock));\ 239 mutex_exit(&((ldcp)->ldc_rxlock));\ 240 mutex_exit(&((ldcp)->ldc_cblock)); 241 242 #define VSW_VER_EQ(ldcp, major, minor) \ 243 ((ldcp)->lane_out.ver_major == (major) && \ 244 (ldcp)->lane_out.ver_minor == (minor)) 245 246 #define VSW_VER_LT(ldcp, major, minor) \ 247 (((ldcp)->lane_out.ver_major < (major)) || \ 248 ((ldcp)->lane_out.ver_major == (major) && \ 249 (ldcp)->lane_out.ver_minor < (minor))) 250 251 #define VSW_VER_GTEQ(ldcp, major, minor) \ 252 (((ldcp)->lane_out.ver_major > (major)) || \ 253 ((ldcp)->lane_out.ver_major == (major) && \ 254 (ldcp)->lane_out.ver_minor >= (minor))) 255 256 /* supported versions */ 257 static ver_sup_t vsw_versions[] = { {1, 3} }; 258 259 /* 260 * For the moment the state dump routines have their own 261 * private flag. 262 */ 263 #define DUMP_STATE 0 264 265 #if DUMP_STATE 266 267 #define DUMP_TAG(tag) \ 268 { \ 269 D1(NULL, "DUMP_TAG: type 0x%llx", (tag).vio_msgtype); \ 270 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag).vio_subtype); \ 271 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag).vio_subtype_env); \ 272 } 273 274 #define DUMP_TAG_PTR(tag) \ 275 { \ 276 D1(NULL, "DUMP_TAG: type 0x%llx", (tag)->vio_msgtype); \ 277 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag)->vio_subtype); \ 278 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag)->vio_subtype_env); \ 279 } 280 281 #define DUMP_FLAGS(flags) dump_flags(flags); 282 #define DISPLAY_STATE() display_state() 283 284 #else 285 286 #define DUMP_TAG(tag) 287 #define DUMP_TAG_PTR(tag) 288 #define DUMP_FLAGS(state) 289 #define DISPLAY_STATE() 290 291 #endif /* DUMP_STATE */ 292 293 /* 294 * Attach the specified port. 295 * 296 * Returns 0 on success, 1 on failure. 297 */ 298 int 299 vsw_port_attach(vsw_port_t *port) 300 { 301 vsw_t *vswp = port->p_vswp; 302 vsw_port_list_t *plist = &vswp->plist; 303 vsw_port_t *p, **pp; 304 int i; 305 int nids = port->num_ldcs; 306 uint64_t *ldcids; 307 308 D1(vswp, "%s: enter : port %d", __func__, port->p_instance); 309 310 /* port already exists? */ 311 READ_ENTER(&plist->lockrw); 312 for (p = plist->head; p != NULL; p = p->p_next) { 313 if (p->p_instance == port->p_instance) { 314 DWARN(vswp, "%s: port instance %d already attached", 315 __func__, p->p_instance); 316 RW_EXIT(&plist->lockrw); 317 return (1); 318 } 319 } 320 RW_EXIT(&plist->lockrw); 321 322 rw_init(&port->p_ldclist.lockrw, NULL, RW_DRIVER, NULL); 323 324 mutex_init(&port->tx_lock, NULL, MUTEX_DRIVER, NULL); 325 mutex_init(&port->mca_lock, NULL, MUTEX_DRIVER, NULL); 326 327 mutex_init(&port->state_lock, NULL, MUTEX_DRIVER, NULL); 328 cv_init(&port->state_cv, NULL, CV_DRIVER, NULL); 329 port->state = VSW_PORT_INIT; 330 331 D2(vswp, "%s: %d nids", __func__, nids); 332 ldcids = port->ldc_ids; 333 for (i = 0; i < nids; i++) { 334 D2(vswp, "%s: ldcid (%llx)", __func__, (uint64_t)ldcids[i]); 335 if (vsw_ldc_attach(port, (uint64_t)ldcids[i]) != 0) { 336 DERR(vswp, "%s: ldc_attach failed", __func__); 337 338 rw_destroy(&port->p_ldclist.lockrw); 339 340 cv_destroy(&port->state_cv); 341 mutex_destroy(&port->state_lock); 342 343 mutex_destroy(&port->tx_lock); 344 mutex_destroy(&port->mca_lock); 345 kmem_free(port, sizeof (vsw_port_t)); 346 return (1); 347 } 348 } 349 350 if (vswp->switching_setup_done == B_TRUE) { 351 /* 352 * If the underlying physical device has been setup, 353 * program the mac address of this port in it. 354 * Otherwise, port macaddr will be set after the physical 355 * device is successfully setup by the timeout handler. 356 */ 357 mutex_enter(&vswp->hw_lock); 358 (void) vsw_set_hw(vswp, port, VSW_VNETPORT); 359 mutex_exit(&vswp->hw_lock); 360 } 361 362 /* create the fdb entry for this port/mac address */ 363 vsw_fdbe_add(vswp, port); 364 365 vsw_create_vlans(port, VSW_VNETPORT); 366 367 WRITE_ENTER(&plist->lockrw); 368 369 /* link it into the list of ports for this vsw instance */ 370 pp = (vsw_port_t **)(&plist->head); 371 port->p_next = *pp; 372 *pp = port; 373 plist->num_ports++; 374 375 RW_EXIT(&plist->lockrw); 376 377 /* 378 * Initialise the port and any ldc's under it. 379 */ 380 (void) vsw_init_ldcs(port); 381 382 D1(vswp, "%s: exit", __func__); 383 return (0); 384 } 385 386 /* 387 * Detach the specified port. 388 * 389 * Returns 0 on success, 1 on failure. 390 */ 391 int 392 vsw_port_detach(vsw_t *vswp, int p_instance) 393 { 394 vsw_port_t *port = NULL; 395 vsw_port_list_t *plist = &vswp->plist; 396 397 D1(vswp, "%s: enter: port id %d", __func__, p_instance); 398 399 WRITE_ENTER(&plist->lockrw); 400 401 if ((port = vsw_lookup_port(vswp, p_instance)) == NULL) { 402 RW_EXIT(&plist->lockrw); 403 return (1); 404 } 405 406 if (vsw_plist_del_node(vswp, port)) { 407 RW_EXIT(&plist->lockrw); 408 return (1); 409 } 410 411 /* cleanup any HybridIO for this port */ 412 vsw_hio_stop_port(port); 413 414 /* 415 * No longer need to hold writer lock on port list now 416 * that we have unlinked the target port from the list. 417 */ 418 RW_EXIT(&plist->lockrw); 419 420 /* Remove the fdb entry for this port/mac address */ 421 vsw_fdbe_del(vswp, &(port->p_macaddr)); 422 vsw_destroy_vlans(port, VSW_VNETPORT); 423 424 /* Remove any multicast addresses.. */ 425 vsw_del_mcst_port(port); 426 427 /* Remove address if was programmed into HW. */ 428 mutex_enter(&vswp->hw_lock); 429 430 /* 431 * Port's address may not have been set in hardware. This could 432 * happen if the underlying physical device is not yet available and 433 * vsw_setup_switching_timeout() may be in progress. 434 * We remove its addr from hardware only if it has been set before. 435 */ 436 if (port->addr_set != VSW_ADDR_UNSET) 437 (void) vsw_unset_hw(vswp, port, VSW_VNETPORT); 438 439 if (vswp->recfg_reqd) 440 vsw_reconfig_hw(vswp); 441 442 mutex_exit(&vswp->hw_lock); 443 444 if (vsw_port_delete(port)) { 445 return (1); 446 } 447 448 D1(vswp, "%s: exit: p_instance(%d)", __func__, p_instance); 449 return (0); 450 } 451 452 /* 453 * Detach all active ports. 454 * 455 * Returns 0 on success, 1 on failure. 456 */ 457 int 458 vsw_detach_ports(vsw_t *vswp) 459 { 460 vsw_port_list_t *plist = &vswp->plist; 461 vsw_port_t *port = NULL; 462 463 D1(vswp, "%s: enter", __func__); 464 465 WRITE_ENTER(&plist->lockrw); 466 467 while ((port = plist->head) != NULL) { 468 if (vsw_plist_del_node(vswp, port)) { 469 DERR(vswp, "%s: Error deleting port %d" 470 " from port list", __func__, port->p_instance); 471 RW_EXIT(&plist->lockrw); 472 return (1); 473 } 474 475 /* Remove address if was programmed into HW. */ 476 mutex_enter(&vswp->hw_lock); 477 (void) vsw_unset_hw(vswp, port, VSW_VNETPORT); 478 mutex_exit(&vswp->hw_lock); 479 480 /* Remove the fdb entry for this port/mac address */ 481 vsw_fdbe_del(vswp, &(port->p_macaddr)); 482 vsw_destroy_vlans(port, VSW_VNETPORT); 483 484 /* Remove any multicast addresses.. */ 485 vsw_del_mcst_port(port); 486 487 /* 488 * No longer need to hold the lock on the port list 489 * now that we have unlinked the target port from the 490 * list. 491 */ 492 RW_EXIT(&plist->lockrw); 493 if (vsw_port_delete(port)) { 494 DERR(vswp, "%s: Error deleting port %d", 495 __func__, port->p_instance); 496 return (1); 497 } 498 WRITE_ENTER(&plist->lockrw); 499 } 500 RW_EXIT(&plist->lockrw); 501 502 D1(vswp, "%s: exit", __func__); 503 504 return (0); 505 } 506 507 /* 508 * Delete the specified port. 509 * 510 * Returns 0 on success, 1 on failure. 511 */ 512 static int 513 vsw_port_delete(vsw_port_t *port) 514 { 515 vsw_ldc_list_t *ldcl; 516 vsw_t *vswp = port->p_vswp; 517 int num_ldcs; 518 519 D1(vswp, "%s: enter : port id %d", __func__, port->p_instance); 520 521 (void) vsw_uninit_ldcs(port); 522 523 /* 524 * Wait for any pending ctrl msg tasks which reference this 525 * port to finish. 526 */ 527 if (vsw_drain_port_taskq(port)) 528 return (1); 529 530 /* 531 * Wait for any active callbacks to finish 532 */ 533 if (vsw_drain_ldcs(port)) 534 return (1); 535 536 ldcl = &port->p_ldclist; 537 num_ldcs = port->num_ldcs; 538 WRITE_ENTER(&ldcl->lockrw); 539 while (num_ldcs > 0) { 540 if (vsw_ldc_detach(port, ldcl->head->ldc_id) != 0) { 541 cmn_err(CE_WARN, "!vsw%d: unable to detach ldc %ld", 542 vswp->instance, ldcl->head->ldc_id); 543 RW_EXIT(&ldcl->lockrw); 544 port->num_ldcs = num_ldcs; 545 return (1); 546 } 547 num_ldcs--; 548 } 549 RW_EXIT(&ldcl->lockrw); 550 551 rw_destroy(&port->p_ldclist.lockrw); 552 553 mutex_destroy(&port->mca_lock); 554 mutex_destroy(&port->tx_lock); 555 556 cv_destroy(&port->state_cv); 557 mutex_destroy(&port->state_lock); 558 559 if (port->num_ldcs != 0) { 560 kmem_free(port->ldc_ids, port->num_ldcs * sizeof (uint64_t)); 561 port->num_ldcs = 0; 562 } 563 kmem_free(port, sizeof (vsw_port_t)); 564 565 D1(vswp, "%s: exit", __func__); 566 567 return (0); 568 } 569 570 /* 571 * Attach a logical domain channel (ldc) under a specified port. 572 * 573 * Returns 0 on success, 1 on failure. 574 */ 575 static int 576 vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id) 577 { 578 vsw_t *vswp = port->p_vswp; 579 vsw_ldc_list_t *ldcl = &port->p_ldclist; 580 vsw_ldc_t *ldcp = NULL; 581 ldc_attr_t attr; 582 ldc_status_t istatus; 583 int status = DDI_FAILURE; 584 int rv; 585 char kname[MAXNAMELEN]; 586 enum { PROG_init = 0x0, PROG_mblks = 0x1, 587 PROG_callback = 0x2, PROG_rx_thread = 0x4, 588 PROG_tx_thread = 0x8} 589 progress; 590 591 progress = PROG_init; 592 593 D1(vswp, "%s: enter", __func__); 594 595 ldcp = kmem_zalloc(sizeof (vsw_ldc_t), KM_NOSLEEP); 596 if (ldcp == NULL) { 597 DERR(vswp, "%s: kmem_zalloc failed", __func__); 598 return (1); 599 } 600 ldcp->ldc_id = ldc_id; 601 602 /* Allocate pools of receive mblks */ 603 rv = vio_init_multipools(&ldcp->vmp, VSW_NUM_VMPOOLS, 604 vsw_mblk_size1, vsw_mblk_size2, vsw_mblk_size3, 605 vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3); 606 if (rv) { 607 DWARN(vswp, "%s: unable to create free mblk pools for" 608 " channel %ld (rv %d)", __func__, ldc_id, rv); 609 kmem_free(ldcp, sizeof (vsw_ldc_t)); 610 return (1); 611 } 612 613 progress |= PROG_mblks; 614 615 mutex_init(&ldcp->ldc_txlock, NULL, MUTEX_DRIVER, NULL); 616 mutex_init(&ldcp->ldc_rxlock, NULL, MUTEX_DRIVER, NULL); 617 mutex_init(&ldcp->ldc_cblock, NULL, MUTEX_DRIVER, NULL); 618 mutex_init(&ldcp->drain_cv_lock, NULL, MUTEX_DRIVER, NULL); 619 cv_init(&ldcp->drain_cv, NULL, CV_DRIVER, NULL); 620 rw_init(&ldcp->lane_in.dlistrw, NULL, RW_DRIVER, NULL); 621 rw_init(&ldcp->lane_out.dlistrw, NULL, RW_DRIVER, NULL); 622 623 /* required for handshake with peer */ 624 ldcp->local_session = (uint64_t)ddi_get_lbolt(); 625 ldcp->peer_session = 0; 626 ldcp->session_status = 0; 627 ldcp->hss_id = 1; /* Initial handshake session id */ 628 629 (void) atomic_swap_32(&port->p_hio_capable, B_FALSE); 630 631 /* only set for outbound lane, inbound set by peer */ 632 vsw_set_lane_attr(vswp, &ldcp->lane_out); 633 634 attr.devclass = LDC_DEV_NT_SVC; 635 attr.instance = ddi_get_instance(vswp->dip); 636 attr.mode = LDC_MODE_UNRELIABLE; 637 attr.mtu = VSW_LDC_MTU; 638 status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle); 639 if (status != 0) { 640 DERR(vswp, "%s(%lld): ldc_init failed, rv (%d)", 641 __func__, ldc_id, status); 642 goto ldc_attach_fail; 643 } 644 645 if (vsw_ldc_rxthr_enabled) { 646 ldcp->rx_thr_flags = 0; 647 648 mutex_init(&ldcp->rx_thr_lock, NULL, MUTEX_DRIVER, NULL); 649 cv_init(&ldcp->rx_thr_cv, NULL, CV_DRIVER, NULL); 650 ldcp->rx_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 651 vsw_ldc_rx_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 652 653 progress |= PROG_rx_thread; 654 if (ldcp->rx_thread == NULL) { 655 DWARN(vswp, "%s(%lld): Failed to create worker thread", 656 __func__, ldc_id); 657 goto ldc_attach_fail; 658 } 659 } 660 661 if (vsw_ldc_txthr_enabled) { 662 ldcp->tx_thr_flags = 0; 663 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 664 665 mutex_init(&ldcp->tx_thr_lock, NULL, MUTEX_DRIVER, NULL); 666 cv_init(&ldcp->tx_thr_cv, NULL, CV_DRIVER, NULL); 667 ldcp->tx_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 668 vsw_ldc_tx_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 669 670 progress |= PROG_tx_thread; 671 if (ldcp->tx_thread == NULL) { 672 DWARN(vswp, "%s(%lld): Failed to create worker thread", 673 __func__, ldc_id); 674 goto ldc_attach_fail; 675 } 676 } 677 678 status = ldc_reg_callback(ldcp->ldc_handle, vsw_ldc_cb, (caddr_t)ldcp); 679 if (status != 0) { 680 DERR(vswp, "%s(%lld): ldc_reg_callback failed, rv (%d)", 681 __func__, ldc_id, status); 682 (void) ldc_fini(ldcp->ldc_handle); 683 goto ldc_attach_fail; 684 } 685 /* 686 * allocate a message for ldc_read()s, big enough to hold ctrl and 687 * data msgs, including raw data msgs used to recv priority frames. 688 */ 689 ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vswp->max_frame_size; 690 ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP); 691 692 progress |= PROG_callback; 693 694 mutex_init(&ldcp->status_lock, NULL, MUTEX_DRIVER, NULL); 695 696 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 697 DERR(vswp, "%s: ldc_status failed", __func__); 698 mutex_destroy(&ldcp->status_lock); 699 goto ldc_attach_fail; 700 } 701 702 ldcp->ldc_status = istatus; 703 ldcp->ldc_port = port; 704 ldcp->ldc_vswp = vswp; 705 706 vsw_reset_vnet_proto_ops(ldcp); 707 708 (void) sprintf(kname, "%sldc0x%lx", DRV_NAME, ldcp->ldc_id); 709 ldcp->ksp = vgen_setup_kstats(DRV_NAME, vswp->instance, 710 kname, &ldcp->ldc_stats); 711 if (ldcp->ksp == NULL) { 712 DERR(vswp, "%s: kstats setup failed", __func__); 713 goto ldc_attach_fail; 714 } 715 716 /* link it into the list of channels for this port */ 717 WRITE_ENTER(&ldcl->lockrw); 718 ldcp->ldc_next = ldcl->head; 719 ldcl->head = ldcp; 720 RW_EXIT(&ldcl->lockrw); 721 722 D1(vswp, "%s: exit", __func__); 723 return (0); 724 725 ldc_attach_fail: 726 727 if (progress & PROG_callback) { 728 (void) ldc_unreg_callback(ldcp->ldc_handle); 729 kmem_free(ldcp->ldcmsg, ldcp->msglen); 730 } 731 732 if (progress & PROG_rx_thread) { 733 if (ldcp->rx_thread != NULL) { 734 vsw_stop_rx_thread(ldcp); 735 } 736 mutex_destroy(&ldcp->rx_thr_lock); 737 cv_destroy(&ldcp->rx_thr_cv); 738 } 739 740 if (progress & PROG_tx_thread) { 741 if (ldcp->tx_thread != NULL) { 742 vsw_stop_tx_thread(ldcp); 743 } 744 mutex_destroy(&ldcp->tx_thr_lock); 745 cv_destroy(&ldcp->tx_thr_cv); 746 } 747 if (ldcp->ksp != NULL) { 748 vgen_destroy_kstats(ldcp->ksp); 749 } 750 mutex_destroy(&ldcp->ldc_txlock); 751 mutex_destroy(&ldcp->ldc_rxlock); 752 mutex_destroy(&ldcp->ldc_cblock); 753 mutex_destroy(&ldcp->drain_cv_lock); 754 755 cv_destroy(&ldcp->drain_cv); 756 757 rw_destroy(&ldcp->lane_in.dlistrw); 758 rw_destroy(&ldcp->lane_out.dlistrw); 759 760 if (progress & PROG_mblks) { 761 vio_destroy_multipools(&ldcp->vmp, &vswp->rxh); 762 } 763 kmem_free(ldcp, sizeof (vsw_ldc_t)); 764 765 return (1); 766 } 767 768 /* 769 * Detach a logical domain channel (ldc) belonging to a 770 * particular port. 771 * 772 * Returns 0 on success, 1 on failure. 773 */ 774 static int 775 vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id) 776 { 777 vsw_t *vswp = port->p_vswp; 778 vsw_ldc_t *ldcp, *prev_ldcp; 779 vsw_ldc_list_t *ldcl = &port->p_ldclist; 780 int rv; 781 782 prev_ldcp = ldcl->head; 783 for (; (ldcp = prev_ldcp) != NULL; prev_ldcp = ldcp->ldc_next) { 784 if (ldcp->ldc_id == ldc_id) { 785 break; 786 } 787 } 788 789 /* specified ldc id not found */ 790 if (ldcp == NULL) { 791 DERR(vswp, "%s: ldcp = NULL", __func__); 792 return (1); 793 } 794 795 D2(vswp, "%s: detaching channel %lld", __func__, ldcp->ldc_id); 796 797 /* Stop the receive thread */ 798 if (ldcp->rx_thread != NULL) { 799 vsw_stop_rx_thread(ldcp); 800 mutex_destroy(&ldcp->rx_thr_lock); 801 cv_destroy(&ldcp->rx_thr_cv); 802 } 803 kmem_free(ldcp->ldcmsg, ldcp->msglen); 804 805 /* Stop the tx thread */ 806 if (ldcp->tx_thread != NULL) { 807 vsw_stop_tx_thread(ldcp); 808 mutex_destroy(&ldcp->tx_thr_lock); 809 cv_destroy(&ldcp->tx_thr_cv); 810 if (ldcp->tx_mhead != NULL) { 811 freemsgchain(ldcp->tx_mhead); 812 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 813 ldcp->tx_cnt = 0; 814 } 815 } 816 817 /* Destory kstats */ 818 vgen_destroy_kstats(ldcp->ksp); 819 820 /* 821 * Before we can close the channel we must release any mapped 822 * resources (e.g. drings). 823 */ 824 vsw_free_lane_resources(ldcp, INBOUND); 825 vsw_free_lane_resources(ldcp, OUTBOUND); 826 827 /* 828 * If the close fails we are in serious trouble, as won't 829 * be able to delete the parent port. 830 */ 831 if ((rv = ldc_close(ldcp->ldc_handle)) != 0) { 832 DERR(vswp, "%s: error %d closing channel %lld", 833 __func__, rv, ldcp->ldc_id); 834 return (1); 835 } 836 837 (void) ldc_fini(ldcp->ldc_handle); 838 839 ldcp->ldc_status = LDC_INIT; 840 ldcp->ldc_handle = NULL; 841 ldcp->ldc_vswp = NULL; 842 843 844 /* 845 * Most likely some mblks are still in use and 846 * have not been returned to the pool. These mblks are 847 * added to the pool that is maintained in the device instance. 848 * Another attempt will be made to destroy the pool 849 * when the device detaches. 850 */ 851 vio_destroy_multipools(&ldcp->vmp, &vswp->rxh); 852 853 /* unlink it from the list */ 854 prev_ldcp = ldcp->ldc_next; 855 856 mutex_destroy(&ldcp->ldc_txlock); 857 mutex_destroy(&ldcp->ldc_rxlock); 858 mutex_destroy(&ldcp->ldc_cblock); 859 cv_destroy(&ldcp->drain_cv); 860 mutex_destroy(&ldcp->drain_cv_lock); 861 mutex_destroy(&ldcp->status_lock); 862 rw_destroy(&ldcp->lane_in.dlistrw); 863 rw_destroy(&ldcp->lane_out.dlistrw); 864 865 kmem_free(ldcp, sizeof (vsw_ldc_t)); 866 867 return (0); 868 } 869 870 /* 871 * Open and attempt to bring up the channel. Note that channel 872 * can only be brought up if peer has also opened channel. 873 * 874 * Returns 0 if can open and bring up channel, otherwise 875 * returns 1. 876 */ 877 static int 878 vsw_ldc_init(vsw_ldc_t *ldcp) 879 { 880 vsw_t *vswp = ldcp->ldc_vswp; 881 ldc_status_t istatus = 0; 882 int rv; 883 884 D1(vswp, "%s: enter", __func__); 885 886 LDC_ENTER_LOCK(ldcp); 887 888 /* don't start at 0 in case clients don't like that */ 889 ldcp->next_ident = 1; 890 891 rv = ldc_open(ldcp->ldc_handle); 892 if (rv != 0) { 893 DERR(vswp, "%s: ldc_open failed: id(%lld) rv(%d)", 894 __func__, ldcp->ldc_id, rv); 895 LDC_EXIT_LOCK(ldcp); 896 return (1); 897 } 898 899 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 900 DERR(vswp, "%s: unable to get status", __func__); 901 LDC_EXIT_LOCK(ldcp); 902 return (1); 903 904 } else if (istatus != LDC_OPEN && istatus != LDC_READY) { 905 DERR(vswp, "%s: id (%lld) status(%d) is not OPEN/READY", 906 __func__, ldcp->ldc_id, istatus); 907 LDC_EXIT_LOCK(ldcp); 908 return (1); 909 } 910 911 mutex_enter(&ldcp->status_lock); 912 ldcp->ldc_status = istatus; 913 mutex_exit(&ldcp->status_lock); 914 915 rv = ldc_up(ldcp->ldc_handle); 916 if (rv != 0) { 917 /* 918 * Not a fatal error for ldc_up() to fail, as peer 919 * end point may simply not be ready yet. 920 */ 921 D2(vswp, "%s: ldc_up err id(%lld) rv(%d)", __func__, 922 ldcp->ldc_id, rv); 923 LDC_EXIT_LOCK(ldcp); 924 return (1); 925 } 926 927 /* 928 * ldc_up() call is non-blocking so need to explicitly 929 * check channel status to see if in fact the channel 930 * is UP. 931 */ 932 mutex_enter(&ldcp->status_lock); 933 if (ldc_status(ldcp->ldc_handle, &ldcp->ldc_status) != 0) { 934 DERR(vswp, "%s: unable to get status", __func__); 935 mutex_exit(&ldcp->status_lock); 936 LDC_EXIT_LOCK(ldcp); 937 return (1); 938 939 } 940 941 if (ldcp->ldc_status == LDC_UP) { 942 D2(vswp, "%s: channel %ld now UP (%ld)", __func__, 943 ldcp->ldc_id, istatus); 944 mutex_exit(&ldcp->status_lock); 945 LDC_EXIT_LOCK(ldcp); 946 947 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 948 return (0); 949 } 950 951 mutex_exit(&ldcp->status_lock); 952 LDC_EXIT_LOCK(ldcp); 953 954 D1(vswp, "%s: exit", __func__); 955 return (0); 956 } 957 958 /* disable callbacks on the channel */ 959 static int 960 vsw_ldc_uninit(vsw_ldc_t *ldcp) 961 { 962 vsw_t *vswp = ldcp->ldc_vswp; 963 int rv; 964 965 D1(vswp, "vsw_ldc_uninit: enter: id(%lx)\n", ldcp->ldc_id); 966 967 LDC_ENTER_LOCK(ldcp); 968 969 rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE); 970 if (rv != 0) { 971 DERR(vswp, "vsw_ldc_uninit(%lld): error disabling " 972 "interrupts (rv = %d)\n", ldcp->ldc_id, rv); 973 LDC_EXIT_LOCK(ldcp); 974 return (1); 975 } 976 977 mutex_enter(&ldcp->status_lock); 978 ldcp->ldc_status = LDC_INIT; 979 mutex_exit(&ldcp->status_lock); 980 981 LDC_EXIT_LOCK(ldcp); 982 983 D1(vswp, "vsw_ldc_uninit: exit: id(%lx)", ldcp->ldc_id); 984 985 return (0); 986 } 987 988 static int 989 vsw_init_ldcs(vsw_port_t *port) 990 { 991 vsw_ldc_list_t *ldcl = &port->p_ldclist; 992 vsw_ldc_t *ldcp; 993 994 READ_ENTER(&ldcl->lockrw); 995 ldcp = ldcl->head; 996 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 997 (void) vsw_ldc_init(ldcp); 998 } 999 RW_EXIT(&ldcl->lockrw); 1000 1001 return (0); 1002 } 1003 1004 static int 1005 vsw_uninit_ldcs(vsw_port_t *port) 1006 { 1007 vsw_ldc_list_t *ldcl = &port->p_ldclist; 1008 vsw_ldc_t *ldcp; 1009 1010 D1(NULL, "vsw_uninit_ldcs: enter\n"); 1011 1012 READ_ENTER(&ldcl->lockrw); 1013 ldcp = ldcl->head; 1014 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 1015 (void) vsw_ldc_uninit(ldcp); 1016 } 1017 RW_EXIT(&ldcl->lockrw); 1018 1019 D1(NULL, "vsw_uninit_ldcs: exit\n"); 1020 1021 return (0); 1022 } 1023 1024 /* 1025 * Wait until the callback(s) associated with the ldcs under the specified 1026 * port have completed. 1027 * 1028 * Prior to this function being invoked each channel under this port 1029 * should have been quiesced via ldc_set_cb_mode(DISABLE). 1030 * 1031 * A short explaination of what we are doing below.. 1032 * 1033 * The simplest approach would be to have a reference counter in 1034 * the ldc structure which is increment/decremented by the callbacks as 1035 * they use the channel. The drain function could then simply disable any 1036 * further callbacks and do a cv_wait for the ref to hit zero. Unfortunately 1037 * there is a tiny window here - before the callback is able to get the lock 1038 * on the channel it is interrupted and this function gets to execute. It 1039 * sees that the ref count is zero and believes its free to delete the 1040 * associated data structures. 1041 * 1042 * We get around this by taking advantage of the fact that before the ldc 1043 * framework invokes a callback it sets a flag to indicate that there is a 1044 * callback active (or about to become active). If when we attempt to 1045 * unregister a callback when this active flag is set then the unregister 1046 * will fail with EWOULDBLOCK. 1047 * 1048 * If the unregister fails we do a cv_timedwait. We will either be signaled 1049 * by the callback as it is exiting (note we have to wait a short period to 1050 * allow the callback to return fully to the ldc framework and it to clear 1051 * the active flag), or by the timer expiring. In either case we again attempt 1052 * the unregister. We repeat this until we can succesfully unregister the 1053 * callback. 1054 * 1055 * The reason we use a cv_timedwait rather than a simple cv_wait is to catch 1056 * the case where the callback has finished but the ldc framework has not yet 1057 * cleared the active flag. In this case we would never get a cv_signal. 1058 */ 1059 static int 1060 vsw_drain_ldcs(vsw_port_t *port) 1061 { 1062 vsw_ldc_list_t *ldcl = &port->p_ldclist; 1063 vsw_ldc_t *ldcp; 1064 vsw_t *vswp = port->p_vswp; 1065 1066 D1(vswp, "%s: enter", __func__); 1067 1068 READ_ENTER(&ldcl->lockrw); 1069 1070 ldcp = ldcl->head; 1071 1072 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 1073 /* 1074 * If we can unregister the channel callback then we 1075 * know that there is no callback either running or 1076 * scheduled to run for this channel so move on to next 1077 * channel in the list. 1078 */ 1079 mutex_enter(&ldcp->drain_cv_lock); 1080 1081 /* prompt active callbacks to quit */ 1082 ldcp->drain_state = VSW_LDC_DRAINING; 1083 1084 if ((ldc_unreg_callback(ldcp->ldc_handle)) == 0) { 1085 D2(vswp, "%s: unreg callback for chan %ld", __func__, 1086 ldcp->ldc_id); 1087 mutex_exit(&ldcp->drain_cv_lock); 1088 continue; 1089 } else { 1090 /* 1091 * If we end up here we know that either 1) a callback 1092 * is currently executing, 2) is about to start (i.e. 1093 * the ldc framework has set the active flag but 1094 * has not actually invoked the callback yet, or 3) 1095 * has finished and has returned to the ldc framework 1096 * but the ldc framework has not yet cleared the 1097 * active bit. 1098 * 1099 * Wait for it to finish. 1100 */ 1101 while (ldc_unreg_callback(ldcp->ldc_handle) 1102 == EWOULDBLOCK) 1103 (void) cv_timedwait(&ldcp->drain_cv, 1104 &ldcp->drain_cv_lock, lbolt + hz); 1105 1106 mutex_exit(&ldcp->drain_cv_lock); 1107 D2(vswp, "%s: unreg callback for chan %ld after " 1108 "timeout", __func__, ldcp->ldc_id); 1109 } 1110 } 1111 RW_EXIT(&ldcl->lockrw); 1112 1113 D1(vswp, "%s: exit", __func__); 1114 return (0); 1115 } 1116 1117 /* 1118 * Wait until all tasks which reference this port have completed. 1119 * 1120 * Prior to this function being invoked each channel under this port 1121 * should have been quiesced via ldc_set_cb_mode(DISABLE). 1122 */ 1123 static int 1124 vsw_drain_port_taskq(vsw_port_t *port) 1125 { 1126 vsw_t *vswp = port->p_vswp; 1127 1128 D1(vswp, "%s: enter", __func__); 1129 1130 /* 1131 * Mark the port as in the process of being detached, and 1132 * dispatch a marker task to the queue so we know when all 1133 * relevant tasks have completed. 1134 */ 1135 mutex_enter(&port->state_lock); 1136 port->state = VSW_PORT_DETACHING; 1137 1138 if ((vswp->taskq_p == NULL) || 1139 (ddi_taskq_dispatch(vswp->taskq_p, vsw_marker_task, 1140 port, DDI_NOSLEEP) != DDI_SUCCESS)) { 1141 DERR(vswp, "%s: unable to dispatch marker task", 1142 __func__); 1143 mutex_exit(&port->state_lock); 1144 return (1); 1145 } 1146 1147 /* 1148 * Wait for the marker task to finish. 1149 */ 1150 while (port->state != VSW_PORT_DETACHABLE) 1151 cv_wait(&port->state_cv, &port->state_lock); 1152 1153 mutex_exit(&port->state_lock); 1154 1155 D1(vswp, "%s: exit", __func__); 1156 1157 return (0); 1158 } 1159 1160 static void 1161 vsw_marker_task(void *arg) 1162 { 1163 vsw_port_t *port = arg; 1164 vsw_t *vswp = port->p_vswp; 1165 1166 D1(vswp, "%s: enter", __func__); 1167 1168 mutex_enter(&port->state_lock); 1169 1170 /* 1171 * No further tasks should be dispatched which reference 1172 * this port so ok to mark it as safe to detach. 1173 */ 1174 port->state = VSW_PORT_DETACHABLE; 1175 1176 cv_signal(&port->state_cv); 1177 1178 mutex_exit(&port->state_lock); 1179 1180 D1(vswp, "%s: exit", __func__); 1181 } 1182 1183 vsw_port_t * 1184 vsw_lookup_port(vsw_t *vswp, int p_instance) 1185 { 1186 vsw_port_list_t *plist = &vswp->plist; 1187 vsw_port_t *port; 1188 1189 for (port = plist->head; port != NULL; port = port->p_next) { 1190 if (port->p_instance == p_instance) { 1191 D2(vswp, "vsw_lookup_port: found p_instance\n"); 1192 return (port); 1193 } 1194 } 1195 1196 return (NULL); 1197 } 1198 1199 void 1200 vsw_vlan_unaware_port_reset(vsw_port_t *portp) 1201 { 1202 vsw_ldc_list_t *ldclp; 1203 vsw_ldc_t *ldcp; 1204 1205 ldclp = &portp->p_ldclist; 1206 1207 READ_ENTER(&ldclp->lockrw); 1208 1209 /* 1210 * NOTE: for now, we will assume we have a single channel. 1211 */ 1212 if (ldclp->head == NULL) { 1213 RW_EXIT(&ldclp->lockrw); 1214 return; 1215 } 1216 ldcp = ldclp->head; 1217 1218 mutex_enter(&ldcp->ldc_cblock); 1219 1220 /* 1221 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate 1222 * the connection. See comments in vsw_set_vnet_proto_ops(). 1223 */ 1224 if (ldcp->hphase == VSW_MILESTONE4 && VSW_VER_LT(ldcp, 1, 3) && 1225 portp->nvids != 0) { 1226 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1227 } 1228 1229 mutex_exit(&ldcp->ldc_cblock); 1230 1231 RW_EXIT(&ldclp->lockrw); 1232 } 1233 1234 void 1235 vsw_hio_port_reset(vsw_port_t *portp) 1236 { 1237 vsw_ldc_list_t *ldclp; 1238 vsw_ldc_t *ldcp; 1239 1240 ldclp = &portp->p_ldclist; 1241 1242 READ_ENTER(&ldclp->lockrw); 1243 1244 /* 1245 * NOTE: for now, we will assume we have a single channel. 1246 */ 1247 if (ldclp->head == NULL) { 1248 RW_EXIT(&ldclp->lockrw); 1249 return; 1250 } 1251 ldcp = ldclp->head; 1252 1253 mutex_enter(&ldcp->ldc_cblock); 1254 1255 /* 1256 * If the peer is HybridIO capable (ver >= 1.3), reset channel 1257 * to trigger re-negotiation, which inturn trigger HybridIO 1258 * setup/cleanup. 1259 */ 1260 if ((ldcp->hphase == VSW_MILESTONE4) && 1261 (portp->p_hio_capable == B_TRUE)) { 1262 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1263 } 1264 1265 mutex_exit(&ldcp->ldc_cblock); 1266 1267 RW_EXIT(&ldclp->lockrw); 1268 } 1269 1270 /* 1271 * Search for and remove the specified port from the port 1272 * list. Returns 0 if able to locate and remove port, otherwise 1273 * returns 1. 1274 */ 1275 static int 1276 vsw_plist_del_node(vsw_t *vswp, vsw_port_t *port) 1277 { 1278 vsw_port_list_t *plist = &vswp->plist; 1279 vsw_port_t *curr_p, *prev_p; 1280 1281 if (plist->head == NULL) 1282 return (1); 1283 1284 curr_p = prev_p = plist->head; 1285 1286 while (curr_p != NULL) { 1287 if (curr_p == port) { 1288 if (prev_p == curr_p) { 1289 plist->head = curr_p->p_next; 1290 } else { 1291 prev_p->p_next = curr_p->p_next; 1292 } 1293 plist->num_ports--; 1294 break; 1295 } else { 1296 prev_p = curr_p; 1297 curr_p = curr_p->p_next; 1298 } 1299 } 1300 return (0); 1301 } 1302 1303 /* 1304 * Interrupt handler for ldc messages. 1305 */ 1306 static uint_t 1307 vsw_ldc_cb(uint64_t event, caddr_t arg) 1308 { 1309 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 1310 vsw_t *vswp = ldcp->ldc_vswp; 1311 1312 D1(vswp, "%s: enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 1313 1314 mutex_enter(&ldcp->ldc_cblock); 1315 ldcp->ldc_stats.callbacks++; 1316 1317 mutex_enter(&ldcp->status_lock); 1318 if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) { 1319 mutex_exit(&ldcp->status_lock); 1320 mutex_exit(&ldcp->ldc_cblock); 1321 return (LDC_SUCCESS); 1322 } 1323 mutex_exit(&ldcp->status_lock); 1324 1325 if (event & LDC_EVT_UP) { 1326 /* 1327 * Channel has come up. 1328 */ 1329 D2(vswp, "%s: id(%ld) event(%llx) UP: status(%ld)", 1330 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1331 1332 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1333 1334 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 1335 } 1336 1337 if (event & LDC_EVT_READ) { 1338 /* 1339 * Data available for reading. 1340 */ 1341 D2(vswp, "%s: id(ld) event(%llx) data READ", 1342 __func__, ldcp->ldc_id, event); 1343 1344 if (ldcp->rx_thread != NULL) { 1345 /* 1346 * If the receive thread is enabled, then 1347 * wakeup the receive thread to process the 1348 * LDC messages. 1349 */ 1350 mutex_exit(&ldcp->ldc_cblock); 1351 mutex_enter(&ldcp->rx_thr_lock); 1352 if (!(ldcp->rx_thr_flags & VSW_WTHR_DATARCVD)) { 1353 ldcp->rx_thr_flags |= VSW_WTHR_DATARCVD; 1354 cv_signal(&ldcp->rx_thr_cv); 1355 } 1356 mutex_exit(&ldcp->rx_thr_lock); 1357 mutex_enter(&ldcp->ldc_cblock); 1358 } else { 1359 vsw_process_pkt(ldcp); 1360 } 1361 1362 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 1363 1364 goto vsw_cb_exit; 1365 } 1366 1367 if (event & (LDC_EVT_DOWN | LDC_EVT_RESET)) { 1368 D2(vswp, "%s: id(%ld) event (%lx) DOWN/RESET: status(%ld)", 1369 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1370 1371 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 1372 } 1373 1374 /* 1375 * Catch either LDC_EVT_WRITE which we don't support or any 1376 * unknown event. 1377 */ 1378 if (event & 1379 ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) { 1380 DERR(vswp, "%s: id(%ld) Unexpected event=(%llx) status(%ld)", 1381 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1382 } 1383 1384 vsw_cb_exit: 1385 mutex_exit(&ldcp->ldc_cblock); 1386 1387 /* 1388 * Let the drain function know we are finishing if it 1389 * is waiting. 1390 */ 1391 mutex_enter(&ldcp->drain_cv_lock); 1392 if (ldcp->drain_state == VSW_LDC_DRAINING) 1393 cv_signal(&ldcp->drain_cv); 1394 mutex_exit(&ldcp->drain_cv_lock); 1395 1396 return (LDC_SUCCESS); 1397 } 1398 1399 /* 1400 * Reinitialise data structures associated with the channel. 1401 */ 1402 static void 1403 vsw_ldc_reinit(vsw_ldc_t *ldcp) 1404 { 1405 vsw_t *vswp = ldcp->ldc_vswp; 1406 vsw_port_t *port; 1407 vsw_ldc_list_t *ldcl; 1408 1409 D1(vswp, "%s: enter", __func__); 1410 1411 port = ldcp->ldc_port; 1412 ldcl = &port->p_ldclist; 1413 1414 READ_ENTER(&ldcl->lockrw); 1415 1416 D2(vswp, "%s: in 0x%llx : out 0x%llx", __func__, 1417 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 1418 1419 vsw_free_lane_resources(ldcp, INBOUND); 1420 vsw_free_lane_resources(ldcp, OUTBOUND); 1421 RW_EXIT(&ldcl->lockrw); 1422 1423 ldcp->lane_in.lstate = 0; 1424 ldcp->lane_out.lstate = 0; 1425 1426 /* Remove the fdb entry for this port/mac address */ 1427 vsw_fdbe_del(vswp, &(port->p_macaddr)); 1428 1429 /* remove the port from vlans it has been assigned to */ 1430 vsw_vlan_remove_ids(port, VSW_VNETPORT); 1431 1432 /* 1433 * Remove parent port from any multicast groups 1434 * it may have registered with. Client must resend 1435 * multicast add command after handshake completes. 1436 */ 1437 vsw_del_mcst_port(port); 1438 1439 ldcp->peer_session = 0; 1440 ldcp->session_status = 0; 1441 ldcp->hcnt = 0; 1442 ldcp->hphase = VSW_MILESTONE0; 1443 1444 vsw_reset_vnet_proto_ops(ldcp); 1445 1446 D1(vswp, "%s: exit", __func__); 1447 } 1448 1449 /* 1450 * Process a connection event. 1451 * 1452 * Note - care must be taken to ensure that this function is 1453 * not called with the dlistrw lock held. 1454 */ 1455 static void 1456 vsw_process_conn_evt(vsw_ldc_t *ldcp, uint16_t evt) 1457 { 1458 vsw_t *vswp = ldcp->ldc_vswp; 1459 vsw_conn_evt_t *conn = NULL; 1460 1461 D1(vswp, "%s: enter", __func__); 1462 1463 /* 1464 * Check if either a reset or restart event is pending 1465 * or in progress. If so just return. 1466 * 1467 * A VSW_CONN_RESET event originates either with a LDC_RESET_EVT 1468 * being received by the callback handler, or a ECONNRESET error 1469 * code being returned from a ldc_read() or ldc_write() call. 1470 * 1471 * A VSW_CONN_RESTART event occurs when some error checking code 1472 * decides that there is a problem with data from the channel, 1473 * and that the handshake should be restarted. 1474 */ 1475 if (((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) && 1476 (ldstub((uint8_t *)&ldcp->reset_active))) 1477 return; 1478 1479 /* 1480 * If it is an LDC_UP event we first check the recorded 1481 * state of the channel. If this is UP then we know that 1482 * the channel moving to the UP state has already been dealt 1483 * with and don't need to dispatch a new task. 1484 * 1485 * The reason for this check is that when we do a ldc_up(), 1486 * depending on the state of the peer, we may or may not get 1487 * a LDC_UP event. As we can't depend on getting a LDC_UP evt 1488 * every time we do ldc_up() we explicitly check the channel 1489 * status to see has it come up (ldc_up() is asynch and will 1490 * complete at some undefined time), and take the appropriate 1491 * action. 1492 * 1493 * The flip side of this is that we may get a LDC_UP event 1494 * when we have already seen that the channel is up and have 1495 * dealt with that. 1496 */ 1497 mutex_enter(&ldcp->status_lock); 1498 if (evt == VSW_CONN_UP) { 1499 if ((ldcp->ldc_status == LDC_UP) || (ldcp->reset_active != 0)) { 1500 mutex_exit(&ldcp->status_lock); 1501 return; 1502 } 1503 } 1504 mutex_exit(&ldcp->status_lock); 1505 1506 /* 1507 * The transaction group id allows us to identify and discard 1508 * any tasks which are still pending on the taskq and refer 1509 * to the handshake session we are about to restart or reset. 1510 * These stale messages no longer have any real meaning. 1511 */ 1512 (void) atomic_inc_32(&ldcp->hss_id); 1513 1514 ASSERT(vswp->taskq_p != NULL); 1515 1516 if ((conn = kmem_zalloc(sizeof (vsw_conn_evt_t), KM_NOSLEEP)) == NULL) { 1517 cmn_err(CE_WARN, "!vsw%d: unable to allocate memory for" 1518 " connection event", vswp->instance); 1519 goto err_exit; 1520 } 1521 1522 conn->evt = evt; 1523 conn->ldcp = ldcp; 1524 1525 if (ddi_taskq_dispatch(vswp->taskq_p, vsw_conn_task, conn, 1526 DDI_NOSLEEP) != DDI_SUCCESS) { 1527 cmn_err(CE_WARN, "!vsw%d: Can't dispatch connection task", 1528 vswp->instance); 1529 1530 kmem_free(conn, sizeof (vsw_conn_evt_t)); 1531 goto err_exit; 1532 } 1533 1534 D1(vswp, "%s: exit", __func__); 1535 return; 1536 1537 err_exit: 1538 /* 1539 * Have mostly likely failed due to memory shortage. Clear the flag so 1540 * that future requests will at least be attempted and will hopefully 1541 * succeed. 1542 */ 1543 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 1544 ldcp->reset_active = 0; 1545 } 1546 1547 /* 1548 * Deal with events relating to a connection. Invoked from a taskq. 1549 */ 1550 static void 1551 vsw_conn_task(void *arg) 1552 { 1553 vsw_conn_evt_t *conn = (vsw_conn_evt_t *)arg; 1554 vsw_ldc_t *ldcp = NULL; 1555 vsw_port_t *portp; 1556 vsw_t *vswp = NULL; 1557 uint16_t evt; 1558 ldc_status_t curr_status; 1559 1560 ldcp = conn->ldcp; 1561 evt = conn->evt; 1562 vswp = ldcp->ldc_vswp; 1563 portp = ldcp->ldc_port; 1564 1565 D1(vswp, "%s: enter", __func__); 1566 1567 /* can safely free now have copied out data */ 1568 kmem_free(conn, sizeof (vsw_conn_evt_t)); 1569 1570 mutex_enter(&ldcp->status_lock); 1571 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 1572 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 1573 "channel %ld", vswp->instance, ldcp->ldc_id); 1574 mutex_exit(&ldcp->status_lock); 1575 return; 1576 } 1577 1578 /* 1579 * If we wish to restart the handshake on this channel, then if 1580 * the channel is UP we bring it DOWN to flush the underlying 1581 * ldc queue. 1582 */ 1583 if ((evt == VSW_CONN_RESTART) && (curr_status == LDC_UP)) 1584 (void) ldc_down(ldcp->ldc_handle); 1585 1586 if ((vswp->hio_capable) && (portp->p_hio_enabled)) { 1587 vsw_hio_stop(vswp, ldcp); 1588 } 1589 1590 /* 1591 * re-init all the associated data structures. 1592 */ 1593 vsw_ldc_reinit(ldcp); 1594 1595 /* 1596 * Bring the channel back up (note it does no harm to 1597 * do this even if the channel is already UP, Just 1598 * becomes effectively a no-op). 1599 */ 1600 (void) ldc_up(ldcp->ldc_handle); 1601 1602 /* 1603 * Check if channel is now UP. This will only happen if 1604 * peer has also done a ldc_up(). 1605 */ 1606 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 1607 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 1608 "channel %ld", vswp->instance, ldcp->ldc_id); 1609 mutex_exit(&ldcp->status_lock); 1610 return; 1611 } 1612 1613 ldcp->ldc_status = curr_status; 1614 1615 /* channel UP so restart handshake by sending version info */ 1616 if (curr_status == LDC_UP) { 1617 if (ldcp->hcnt++ > vsw_num_handshakes) { 1618 cmn_err(CE_WARN, "!vsw%d: exceeded number of permitted" 1619 " handshake attempts (%d) on channel %ld", 1620 vswp->instance, ldcp->hcnt, ldcp->ldc_id); 1621 mutex_exit(&ldcp->status_lock); 1622 return; 1623 } 1624 1625 if (vsw_obp_ver_proto_workaround == B_FALSE && 1626 (ddi_taskq_dispatch(vswp->taskq_p, vsw_send_ver, ldcp, 1627 DDI_NOSLEEP) != DDI_SUCCESS)) { 1628 cmn_err(CE_WARN, "!vsw%d: Can't dispatch version task", 1629 vswp->instance); 1630 1631 /* 1632 * Don't count as valid restart attempt if couldn't 1633 * send version msg. 1634 */ 1635 if (ldcp->hcnt > 0) 1636 ldcp->hcnt--; 1637 } 1638 } 1639 1640 /* 1641 * Mark that the process is complete by clearing the flag. 1642 * 1643 * Note is it possible that the taskq dispatch above may have failed, 1644 * most likely due to memory shortage. We still clear the flag so 1645 * future attempts will at least be attempted and will hopefully 1646 * succeed. 1647 */ 1648 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 1649 ldcp->reset_active = 0; 1650 1651 mutex_exit(&ldcp->status_lock); 1652 1653 D1(vswp, "%s: exit", __func__); 1654 } 1655 1656 /* 1657 * returns 0 if legal for event signified by flag to have 1658 * occured at the time it did. Otherwise returns 1. 1659 */ 1660 int 1661 vsw_check_flag(vsw_ldc_t *ldcp, int dir, uint64_t flag) 1662 { 1663 vsw_t *vswp = ldcp->ldc_vswp; 1664 uint64_t state; 1665 uint64_t phase; 1666 1667 if (dir == INBOUND) 1668 state = ldcp->lane_in.lstate; 1669 else 1670 state = ldcp->lane_out.lstate; 1671 1672 phase = ldcp->hphase; 1673 1674 switch (flag) { 1675 case VSW_VER_INFO_RECV: 1676 if (phase > VSW_MILESTONE0) { 1677 DERR(vswp, "vsw_check_flag (%d): VER_INFO_RECV" 1678 " when in state %d\n", ldcp->ldc_id, phase); 1679 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1680 return (1); 1681 } 1682 break; 1683 1684 case VSW_VER_ACK_RECV: 1685 case VSW_VER_NACK_RECV: 1686 if (!(state & VSW_VER_INFO_SENT)) { 1687 DERR(vswp, "vsw_check_flag (%d): spurious VER_ACK or " 1688 "VER_NACK when in state %d\n", ldcp->ldc_id, phase); 1689 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1690 return (1); 1691 } else 1692 state &= ~VSW_VER_INFO_SENT; 1693 break; 1694 1695 case VSW_ATTR_INFO_RECV: 1696 if ((phase < VSW_MILESTONE1) || (phase >= VSW_MILESTONE2)) { 1697 DERR(vswp, "vsw_check_flag (%d): ATTR_INFO_RECV" 1698 " when in state %d\n", ldcp->ldc_id, phase); 1699 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1700 return (1); 1701 } 1702 break; 1703 1704 case VSW_ATTR_ACK_RECV: 1705 case VSW_ATTR_NACK_RECV: 1706 if (!(state & VSW_ATTR_INFO_SENT)) { 1707 DERR(vswp, "vsw_check_flag (%d): spurious ATTR_ACK" 1708 " or ATTR_NACK when in state %d\n", 1709 ldcp->ldc_id, phase); 1710 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1711 return (1); 1712 } else 1713 state &= ~VSW_ATTR_INFO_SENT; 1714 break; 1715 1716 case VSW_DRING_INFO_RECV: 1717 if (phase < VSW_MILESTONE1) { 1718 DERR(vswp, "vsw_check_flag (%d): DRING_INFO_RECV" 1719 " when in state %d\n", ldcp->ldc_id, phase); 1720 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1721 return (1); 1722 } 1723 break; 1724 1725 case VSW_DRING_ACK_RECV: 1726 case VSW_DRING_NACK_RECV: 1727 if (!(state & VSW_DRING_INFO_SENT)) { 1728 DERR(vswp, "vsw_check_flag (%d): spurious DRING_ACK " 1729 " or DRING_NACK when in state %d\n", 1730 ldcp->ldc_id, phase); 1731 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1732 return (1); 1733 } else 1734 state &= ~VSW_DRING_INFO_SENT; 1735 break; 1736 1737 case VSW_RDX_INFO_RECV: 1738 if (phase < VSW_MILESTONE3) { 1739 DERR(vswp, "vsw_check_flag (%d): RDX_INFO_RECV" 1740 " when in state %d\n", ldcp->ldc_id, phase); 1741 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1742 return (1); 1743 } 1744 break; 1745 1746 case VSW_RDX_ACK_RECV: 1747 case VSW_RDX_NACK_RECV: 1748 if (!(state & VSW_RDX_INFO_SENT)) { 1749 DERR(vswp, "vsw_check_flag (%d): spurious RDX_ACK or " 1750 "RDX_NACK when in state %d\n", ldcp->ldc_id, phase); 1751 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1752 return (1); 1753 } else 1754 state &= ~VSW_RDX_INFO_SENT; 1755 break; 1756 1757 case VSW_MCST_INFO_RECV: 1758 if (phase < VSW_MILESTONE3) { 1759 DERR(vswp, "vsw_check_flag (%d): VSW_MCST_INFO_RECV" 1760 " when in state %d\n", ldcp->ldc_id, phase); 1761 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1762 return (1); 1763 } 1764 break; 1765 1766 default: 1767 DERR(vswp, "vsw_check_flag (%lld): unknown flag (%llx)", 1768 ldcp->ldc_id, flag); 1769 return (1); 1770 } 1771 1772 if (dir == INBOUND) 1773 ldcp->lane_in.lstate = state; 1774 else 1775 ldcp->lane_out.lstate = state; 1776 1777 D1(vswp, "vsw_check_flag (chan %lld): exit", ldcp->ldc_id); 1778 1779 return (0); 1780 } 1781 1782 void 1783 vsw_next_milestone(vsw_ldc_t *ldcp) 1784 { 1785 vsw_t *vswp = ldcp->ldc_vswp; 1786 vsw_port_t *portp = ldcp->ldc_port; 1787 1788 D1(vswp, "%s (chan %lld): enter (phase %ld)", __func__, 1789 ldcp->ldc_id, ldcp->hphase); 1790 1791 DUMP_FLAGS(ldcp->lane_in.lstate); 1792 DUMP_FLAGS(ldcp->lane_out.lstate); 1793 1794 switch (ldcp->hphase) { 1795 1796 case VSW_MILESTONE0: 1797 /* 1798 * If we haven't started to handshake with our peer, 1799 * start to do so now. 1800 */ 1801 if (ldcp->lane_out.lstate == 0) { 1802 D2(vswp, "%s: (chan %lld) starting handshake " 1803 "with peer", __func__, ldcp->ldc_id); 1804 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1805 } 1806 1807 /* 1808 * Only way to pass this milestone is to have successfully 1809 * negotiated version info. 1810 */ 1811 if ((ldcp->lane_in.lstate & VSW_VER_ACK_SENT) && 1812 (ldcp->lane_out.lstate & VSW_VER_ACK_RECV)) { 1813 1814 D2(vswp, "%s: (chan %lld) leaving milestone 0", 1815 __func__, ldcp->ldc_id); 1816 1817 vsw_set_vnet_proto_ops(ldcp); 1818 1819 /* 1820 * Next milestone is passed when attribute 1821 * information has been successfully exchanged. 1822 */ 1823 ldcp->hphase = VSW_MILESTONE1; 1824 vsw_send_attr(ldcp); 1825 1826 } 1827 break; 1828 1829 case VSW_MILESTONE1: 1830 /* 1831 * Only way to pass this milestone is to have successfully 1832 * negotiated attribute information. 1833 */ 1834 if (ldcp->lane_in.lstate & VSW_ATTR_ACK_SENT) { 1835 1836 ldcp->hphase = VSW_MILESTONE2; 1837 1838 /* 1839 * If the peer device has said it wishes to 1840 * use descriptor rings then we send it our ring 1841 * info, otherwise we just set up a private ring 1842 * which we use an internal buffer 1843 */ 1844 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 1845 (ldcp->lane_in.xfer_mode & VIO_DRING_MODE_V1_2)) || 1846 (VSW_VER_LT(ldcp, 1, 2) && 1847 (ldcp->lane_in.xfer_mode == 1848 VIO_DRING_MODE_V1_0))) { 1849 vsw_send_dring_info(ldcp); 1850 } 1851 } 1852 break; 1853 1854 case VSW_MILESTONE2: 1855 /* 1856 * If peer has indicated in its attribute message that 1857 * it wishes to use descriptor rings then the only way 1858 * to pass this milestone is for us to have received 1859 * valid dring info. 1860 * 1861 * If peer is not using descriptor rings then just fall 1862 * through. 1863 */ 1864 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 1865 (ldcp->lane_in.xfer_mode & VIO_DRING_MODE_V1_2)) || 1866 (VSW_VER_LT(ldcp, 1, 2) && 1867 (ldcp->lane_in.xfer_mode == 1868 VIO_DRING_MODE_V1_0))) { 1869 if (!(ldcp->lane_in.lstate & VSW_DRING_ACK_SENT)) 1870 break; 1871 } 1872 1873 D2(vswp, "%s: (chan %lld) leaving milestone 2", 1874 __func__, ldcp->ldc_id); 1875 1876 ldcp->hphase = VSW_MILESTONE3; 1877 vsw_send_rdx(ldcp); 1878 break; 1879 1880 case VSW_MILESTONE3: 1881 /* 1882 * Pass this milestone when all paramaters have been 1883 * successfully exchanged and RDX sent in both directions. 1884 * 1885 * Mark outbound lane as available to transmit data. 1886 */ 1887 if ((ldcp->lane_out.lstate & VSW_RDX_ACK_SENT) && 1888 (ldcp->lane_in.lstate & VSW_RDX_ACK_RECV)) { 1889 1890 D2(vswp, "%s: (chan %lld) leaving milestone 3", 1891 __func__, ldcp->ldc_id); 1892 D2(vswp, "%s: ** handshake complete (0x%llx : " 1893 "0x%llx) **", __func__, ldcp->lane_in.lstate, 1894 ldcp->lane_out.lstate); 1895 ldcp->lane_out.lstate |= VSW_LANE_ACTIVE; 1896 ldcp->hphase = VSW_MILESTONE4; 1897 ldcp->hcnt = 0; 1898 DISPLAY_STATE(); 1899 /* Start HIO if enabled and capable */ 1900 if ((portp->p_hio_enabled) && (portp->p_hio_capable)) { 1901 D2(vswp, "%s: start HybridIO setup", __func__); 1902 vsw_hio_start(vswp, ldcp); 1903 } 1904 } else { 1905 D2(vswp, "%s: still in milestone 3 (0x%llx : 0x%llx)", 1906 __func__, ldcp->lane_in.lstate, 1907 ldcp->lane_out.lstate); 1908 } 1909 break; 1910 1911 case VSW_MILESTONE4: 1912 D2(vswp, "%s: (chan %lld) in milestone 4", __func__, 1913 ldcp->ldc_id); 1914 break; 1915 1916 default: 1917 DERR(vswp, "%s: (chan %lld) Unknown Phase %x", __func__, 1918 ldcp->ldc_id, ldcp->hphase); 1919 } 1920 1921 D1(vswp, "%s (chan %lld): exit (phase %ld)", __func__, ldcp->ldc_id, 1922 ldcp->hphase); 1923 } 1924 1925 /* 1926 * Check if major version is supported. 1927 * 1928 * Returns 0 if finds supported major number, and if necessary 1929 * adjusts the minor field. 1930 * 1931 * Returns 1 if can't match major number exactly. Sets mjor/minor 1932 * to next lowest support values, or to zero if no other values possible. 1933 */ 1934 static int 1935 vsw_supported_version(vio_ver_msg_t *vp) 1936 { 1937 int i; 1938 1939 D1(NULL, "vsw_supported_version: enter"); 1940 1941 for (i = 0; i < VSW_NUM_VER; i++) { 1942 if (vsw_versions[i].ver_major == vp->ver_major) { 1943 /* 1944 * Matching or lower major version found. Update 1945 * minor number if necessary. 1946 */ 1947 if (vp->ver_minor > vsw_versions[i].ver_minor) { 1948 D2(NULL, "%s: adjusting minor value from %d " 1949 "to %d", __func__, vp->ver_minor, 1950 vsw_versions[i].ver_minor); 1951 vp->ver_minor = vsw_versions[i].ver_minor; 1952 } 1953 1954 return (0); 1955 } 1956 1957 /* 1958 * If the message contains a higher major version number, set 1959 * the message's major/minor versions to the current values 1960 * and return false, so this message will get resent with 1961 * these values. 1962 */ 1963 if (vsw_versions[i].ver_major < vp->ver_major) { 1964 D2(NULL, "%s: adjusting major and minor " 1965 "values to %d, %d\n", 1966 __func__, vsw_versions[i].ver_major, 1967 vsw_versions[i].ver_minor); 1968 vp->ver_major = vsw_versions[i].ver_major; 1969 vp->ver_minor = vsw_versions[i].ver_minor; 1970 return (1); 1971 } 1972 } 1973 1974 /* No match was possible, zero out fields */ 1975 vp->ver_major = 0; 1976 vp->ver_minor = 0; 1977 1978 D1(NULL, "vsw_supported_version: exit"); 1979 1980 return (1); 1981 } 1982 1983 /* 1984 * Set vnet-protocol-version dependent functions based on version. 1985 */ 1986 static void 1987 vsw_set_vnet_proto_ops(vsw_ldc_t *ldcp) 1988 { 1989 vsw_t *vswp = ldcp->ldc_vswp; 1990 lane_t *lp = &ldcp->lane_out; 1991 1992 if (VSW_VER_GTEQ(ldcp, 1, 3)) { 1993 /* 1994 * If the version negotiated with peer is >= 1.3, 1995 * set the mtu in our attributes to max_frame_size. 1996 */ 1997 lp->mtu = vswp->max_frame_size; 1998 } else { 1999 vsw_port_t *portp = ldcp->ldc_port; 2000 /* 2001 * Pre-1.3 peers expect max frame size of ETHERMAX. 2002 * We can negotiate that size with those peers provided the 2003 * following conditions are true: 2004 * - Our max_frame_size is greater only by VLAN_TAGSZ (4). 2005 * - Only pvid is defined for our peer and there are no vids. 2006 * If the above conditions are true, then we can send/recv only 2007 * untagged frames of max size ETHERMAX. Note that pvid of the 2008 * peer can be different, as vsw has to serve the vnet in that 2009 * vlan even if itself is not assigned to that vlan. 2010 */ 2011 if ((vswp->max_frame_size == ETHERMAX + VLAN_TAGSZ) && 2012 portp->nvids == 0) { 2013 lp->mtu = ETHERMAX; 2014 } 2015 } 2016 2017 if (VSW_VER_GTEQ(ldcp, 1, 2)) { 2018 /* Versions >= 1.2 */ 2019 2020 if (VSW_PRI_ETH_DEFINED(vswp)) { 2021 /* 2022 * enable priority routines and pkt mode only if 2023 * at least one pri-eth-type is specified in MD. 2024 */ 2025 ldcp->tx = vsw_ldctx_pri; 2026 ldcp->rx_pktdata = vsw_process_pkt_data; 2027 2028 /* set xfer mode for vsw_send_attr() */ 2029 lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2; 2030 } else { 2031 /* no priority eth types defined in MD */ 2032 2033 ldcp->tx = vsw_ldctx; 2034 ldcp->rx_pktdata = vsw_process_pkt_data_nop; 2035 2036 /* set xfer mode for vsw_send_attr() */ 2037 lp->xfer_mode = VIO_DRING_MODE_V1_2; 2038 } 2039 2040 } else { 2041 /* Versions prior to 1.2 */ 2042 2043 vsw_reset_vnet_proto_ops(ldcp); 2044 } 2045 } 2046 2047 /* 2048 * Reset vnet-protocol-version dependent functions to v1.0. 2049 */ 2050 static void 2051 vsw_reset_vnet_proto_ops(vsw_ldc_t *ldcp) 2052 { 2053 lane_t *lp = &ldcp->lane_out; 2054 2055 ldcp->tx = vsw_ldctx; 2056 ldcp->rx_pktdata = vsw_process_pkt_data_nop; 2057 2058 /* set xfer mode for vsw_send_attr() */ 2059 lp->xfer_mode = VIO_DRING_MODE_V1_0; 2060 } 2061 2062 /* 2063 * Main routine for processing messages received over LDC. 2064 */ 2065 static void 2066 vsw_process_pkt(void *arg) 2067 { 2068 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 2069 vsw_t *vswp = ldcp->ldc_vswp; 2070 size_t msglen; 2071 vio_msg_tag_t *tagp; 2072 uint64_t *ldcmsg; 2073 int rv = 0; 2074 2075 2076 D1(vswp, "%s enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 2077 2078 ASSERT(MUTEX_HELD(&ldcp->ldc_cblock)); 2079 2080 ldcmsg = ldcp->ldcmsg; 2081 /* 2082 * If channel is up read messages until channel is empty. 2083 */ 2084 do { 2085 msglen = ldcp->msglen; 2086 rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen); 2087 2088 if (rv != 0) { 2089 DERR(vswp, "%s :ldc_read err id(%lld) rv(%d) len(%d)\n", 2090 __func__, ldcp->ldc_id, rv, msglen); 2091 } 2092 2093 /* channel has been reset */ 2094 if (rv == ECONNRESET) { 2095 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 2096 break; 2097 } 2098 2099 if (msglen == 0) { 2100 D2(vswp, "%s: ldc_read id(%lld) NODATA", __func__, 2101 ldcp->ldc_id); 2102 break; 2103 } 2104 2105 D2(vswp, "%s: ldc_read id(%lld): msglen(%d)", __func__, 2106 ldcp->ldc_id, msglen); 2107 2108 /* 2109 * Figure out what sort of packet we have gotten by 2110 * examining the msg tag, and then switch it appropriately. 2111 */ 2112 tagp = (vio_msg_tag_t *)ldcmsg; 2113 2114 switch (tagp->vio_msgtype) { 2115 case VIO_TYPE_CTRL: 2116 vsw_dispatch_ctrl_task(ldcp, ldcmsg, tagp); 2117 break; 2118 case VIO_TYPE_DATA: 2119 vsw_process_data_pkt(ldcp, ldcmsg, tagp, msglen); 2120 break; 2121 case VIO_TYPE_ERR: 2122 vsw_process_err_pkt(ldcp, ldcmsg, tagp); 2123 break; 2124 default: 2125 DERR(vswp, "%s: Unknown tag(%lx) ", __func__, 2126 "id(%lx)\n", tagp->vio_msgtype, ldcp->ldc_id); 2127 break; 2128 } 2129 } while (msglen); 2130 2131 D1(vswp, "%s exit: ldcid (%lld)\n", __func__, ldcp->ldc_id); 2132 } 2133 2134 /* 2135 * Dispatch a task to process a VIO control message. 2136 */ 2137 static void 2138 vsw_dispatch_ctrl_task(vsw_ldc_t *ldcp, void *cpkt, vio_msg_tag_t *tagp) 2139 { 2140 vsw_ctrl_task_t *ctaskp = NULL; 2141 vsw_port_t *port = ldcp->ldc_port; 2142 vsw_t *vswp = port->p_vswp; 2143 2144 D1(vswp, "%s: enter", __func__); 2145 2146 /* 2147 * We need to handle RDX ACK messages in-band as once they 2148 * are exchanged it is possible that we will get an 2149 * immediate (legitimate) data packet. 2150 */ 2151 if ((tagp->vio_subtype_env == VIO_RDX) && 2152 (tagp->vio_subtype == VIO_SUBTYPE_ACK)) { 2153 2154 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_ACK_RECV)) 2155 return; 2156 2157 ldcp->lane_in.lstate |= VSW_RDX_ACK_RECV; 2158 D2(vswp, "%s (%ld) handling RDX_ACK in place " 2159 "(ostate 0x%llx : hphase %d)", __func__, 2160 ldcp->ldc_id, ldcp->lane_in.lstate, ldcp->hphase); 2161 vsw_next_milestone(ldcp); 2162 return; 2163 } 2164 2165 ctaskp = kmem_alloc(sizeof (vsw_ctrl_task_t), KM_NOSLEEP); 2166 2167 if (ctaskp == NULL) { 2168 DERR(vswp, "%s: unable to alloc space for ctrl msg", __func__); 2169 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2170 return; 2171 } 2172 2173 ctaskp->ldcp = ldcp; 2174 bcopy((def_msg_t *)cpkt, &ctaskp->pktp, sizeof (def_msg_t)); 2175 ctaskp->hss_id = ldcp->hss_id; 2176 2177 /* 2178 * Dispatch task to processing taskq if port is not in 2179 * the process of being detached. 2180 */ 2181 mutex_enter(&port->state_lock); 2182 if (port->state == VSW_PORT_INIT) { 2183 if ((vswp->taskq_p == NULL) || 2184 (ddi_taskq_dispatch(vswp->taskq_p, vsw_process_ctrl_pkt, 2185 ctaskp, DDI_NOSLEEP) != DDI_SUCCESS)) { 2186 DERR(vswp, "%s: unable to dispatch task to taskq", 2187 __func__); 2188 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2189 mutex_exit(&port->state_lock); 2190 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2191 return; 2192 } 2193 } else { 2194 DWARN(vswp, "%s: port %d detaching, not dispatching " 2195 "task", __func__, port->p_instance); 2196 } 2197 2198 mutex_exit(&port->state_lock); 2199 2200 D2(vswp, "%s: dispatched task to taskq for chan %d", __func__, 2201 ldcp->ldc_id); 2202 D1(vswp, "%s: exit", __func__); 2203 } 2204 2205 /* 2206 * Process a VIO ctrl message. Invoked from taskq. 2207 */ 2208 static void 2209 vsw_process_ctrl_pkt(void *arg) 2210 { 2211 vsw_ctrl_task_t *ctaskp = (vsw_ctrl_task_t *)arg; 2212 vsw_ldc_t *ldcp = ctaskp->ldcp; 2213 vsw_t *vswp = ldcp->ldc_vswp; 2214 vio_msg_tag_t tag; 2215 uint16_t env; 2216 2217 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2218 2219 bcopy(&ctaskp->pktp, &tag, sizeof (vio_msg_tag_t)); 2220 env = tag.vio_subtype_env; 2221 2222 /* stale pkt check */ 2223 if (ctaskp->hss_id < ldcp->hss_id) { 2224 DWARN(vswp, "%s: discarding stale packet belonging to earlier" 2225 " (%ld) handshake session", __func__, ctaskp->hss_id); 2226 return; 2227 } 2228 2229 /* session id check */ 2230 if (ldcp->session_status & VSW_PEER_SESSION) { 2231 if (ldcp->peer_session != tag.vio_sid) { 2232 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 2233 __func__, ldcp->ldc_id, tag.vio_sid); 2234 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2235 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2236 return; 2237 } 2238 } 2239 2240 /* 2241 * Switch on vio_subtype envelope, then let lower routines 2242 * decide if its an INFO, ACK or NACK packet. 2243 */ 2244 switch (env) { 2245 case VIO_VER_INFO: 2246 vsw_process_ctrl_ver_pkt(ldcp, &ctaskp->pktp); 2247 break; 2248 case VIO_DRING_REG: 2249 vsw_process_ctrl_dring_reg_pkt(ldcp, &ctaskp->pktp); 2250 break; 2251 case VIO_DRING_UNREG: 2252 vsw_process_ctrl_dring_unreg_pkt(ldcp, &ctaskp->pktp); 2253 break; 2254 case VIO_ATTR_INFO: 2255 vsw_process_ctrl_attr_pkt(ldcp, &ctaskp->pktp); 2256 break; 2257 case VNET_MCAST_INFO: 2258 vsw_process_ctrl_mcst_pkt(ldcp, &ctaskp->pktp); 2259 break; 2260 case VIO_RDX: 2261 vsw_process_ctrl_rdx_pkt(ldcp, &ctaskp->pktp); 2262 break; 2263 case VIO_DDS_INFO: 2264 vsw_process_dds_msg(vswp, ldcp, &ctaskp->pktp); 2265 break; 2266 default: 2267 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env); 2268 } 2269 2270 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2271 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2272 } 2273 2274 /* 2275 * Version negotiation. We can end up here either because our peer 2276 * has responded to a handshake message we have sent it, or our peer 2277 * has initiated a handshake with us. If its the former then can only 2278 * be ACK or NACK, if its the later can only be INFO. 2279 * 2280 * If its an ACK we move to the next stage of the handshake, namely 2281 * attribute exchange. If its a NACK we see if we can specify another 2282 * version, if we can't we stop. 2283 * 2284 * If it is an INFO we reset all params associated with communication 2285 * in that direction over this channel (remember connection is 2286 * essentially 2 independent simplex channels). 2287 */ 2288 void 2289 vsw_process_ctrl_ver_pkt(vsw_ldc_t *ldcp, void *pkt) 2290 { 2291 vio_ver_msg_t *ver_pkt; 2292 vsw_t *vswp = ldcp->ldc_vswp; 2293 2294 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2295 2296 /* 2297 * We know this is a ctrl/version packet so 2298 * cast it into the correct structure. 2299 */ 2300 ver_pkt = (vio_ver_msg_t *)pkt; 2301 2302 switch (ver_pkt->tag.vio_subtype) { 2303 case VIO_SUBTYPE_INFO: 2304 D2(vswp, "vsw_process_ctrl_ver_pkt: VIO_SUBTYPE_INFO\n"); 2305 2306 /* 2307 * Record the session id, which we will use from now 2308 * until we see another VER_INFO msg. Even then the 2309 * session id in most cases will be unchanged, execpt 2310 * if channel was reset. 2311 */ 2312 if ((ldcp->session_status & VSW_PEER_SESSION) && 2313 (ldcp->peer_session != ver_pkt->tag.vio_sid)) { 2314 DERR(vswp, "%s: updating session id for chan %lld " 2315 "from %llx to %llx", __func__, ldcp->ldc_id, 2316 ldcp->peer_session, ver_pkt->tag.vio_sid); 2317 } 2318 2319 ldcp->peer_session = ver_pkt->tag.vio_sid; 2320 ldcp->session_status |= VSW_PEER_SESSION; 2321 2322 /* Legal message at this time ? */ 2323 if (vsw_check_flag(ldcp, INBOUND, VSW_VER_INFO_RECV)) 2324 return; 2325 2326 /* 2327 * First check the device class. Currently only expect 2328 * to be talking to a network device. In the future may 2329 * also talk to another switch. 2330 */ 2331 if (ver_pkt->dev_class != VDEV_NETWORK) { 2332 DERR(vswp, "%s: illegal device class %d", __func__, 2333 ver_pkt->dev_class); 2334 2335 ver_pkt->tag.vio_sid = ldcp->local_session; 2336 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2337 2338 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2339 2340 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2341 sizeof (vio_ver_msg_t), B_TRUE); 2342 2343 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 2344 vsw_next_milestone(ldcp); 2345 return; 2346 } else { 2347 ldcp->dev_class = ver_pkt->dev_class; 2348 } 2349 2350 /* 2351 * Now check the version. 2352 */ 2353 if (vsw_supported_version(ver_pkt) == 0) { 2354 /* 2355 * Support this major version and possibly 2356 * adjusted minor version. 2357 */ 2358 2359 D2(vswp, "%s: accepted ver %d:%d", __func__, 2360 ver_pkt->ver_major, ver_pkt->ver_minor); 2361 2362 /* Store accepted values */ 2363 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2364 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2365 2366 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2367 2368 ldcp->lane_in.lstate |= VSW_VER_ACK_SENT; 2369 2370 if (vsw_obp_ver_proto_workaround == B_TRUE) { 2371 /* 2372 * Send a version info message 2373 * using the accepted version that 2374 * we are about to ack. Also note that 2375 * we send our ver info before we ack. 2376 * Otherwise, as soon as receiving the 2377 * ack, obp sends attr info msg, which 2378 * breaks vsw_check_flag() invoked 2379 * from vsw_process_ctrl_attr_pkt(); 2380 * as we also need VSW_VER_ACK_RECV to 2381 * be set in lane_out.lstate, before 2382 * we can receive attr info. 2383 */ 2384 vsw_send_ver(ldcp); 2385 } 2386 } else { 2387 /* 2388 * NACK back with the next lower major/minor 2389 * pairing we support (if don't suuport any more 2390 * versions then they will be set to zero. 2391 */ 2392 2393 D2(vswp, "%s: replying with ver %d:%d", __func__, 2394 ver_pkt->ver_major, ver_pkt->ver_minor); 2395 2396 /* Store updated values */ 2397 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2398 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2399 2400 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2401 2402 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 2403 } 2404 2405 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2406 ver_pkt->tag.vio_sid = ldcp->local_session; 2407 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2408 sizeof (vio_ver_msg_t), B_TRUE); 2409 2410 vsw_next_milestone(ldcp); 2411 break; 2412 2413 case VIO_SUBTYPE_ACK: 2414 D2(vswp, "%s: VIO_SUBTYPE_ACK\n", __func__); 2415 2416 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_ACK_RECV)) 2417 return; 2418 2419 /* Store updated values */ 2420 ldcp->lane_out.ver_major = ver_pkt->ver_major; 2421 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 2422 2423 ldcp->lane_out.lstate |= VSW_VER_ACK_RECV; 2424 vsw_next_milestone(ldcp); 2425 2426 break; 2427 2428 case VIO_SUBTYPE_NACK: 2429 D2(vswp, "%s: VIO_SUBTYPE_NACK\n", __func__); 2430 2431 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_NACK_RECV)) 2432 return; 2433 2434 /* 2435 * If our peer sent us a NACK with the ver fields set to 2436 * zero then there is nothing more we can do. Otherwise see 2437 * if we support either the version suggested, or a lesser 2438 * one. 2439 */ 2440 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 2441 DERR(vswp, "%s: peer unable to negotiate any " 2442 "further.", __func__); 2443 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 2444 vsw_next_milestone(ldcp); 2445 return; 2446 } 2447 2448 /* 2449 * Check to see if we support this major version or 2450 * a lower one. If we don't then maj/min will be set 2451 * to zero. 2452 */ 2453 (void) vsw_supported_version(ver_pkt); 2454 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 2455 /* Nothing more we can do */ 2456 DERR(vswp, "%s: version negotiation failed.\n", 2457 __func__); 2458 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 2459 vsw_next_milestone(ldcp); 2460 } else { 2461 /* found a supported major version */ 2462 ldcp->lane_out.ver_major = ver_pkt->ver_major; 2463 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 2464 2465 D2(vswp, "%s: resending with updated values (%x, %x)", 2466 __func__, ver_pkt->ver_major, ver_pkt->ver_minor); 2467 2468 ldcp->lane_out.lstate |= VSW_VER_INFO_SENT; 2469 ver_pkt->tag.vio_sid = ldcp->local_session; 2470 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 2471 2472 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2473 2474 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2475 sizeof (vio_ver_msg_t), B_TRUE); 2476 2477 vsw_next_milestone(ldcp); 2478 2479 } 2480 break; 2481 2482 default: 2483 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2484 ver_pkt->tag.vio_subtype); 2485 } 2486 2487 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 2488 } 2489 2490 /* 2491 * Process an attribute packet. We can end up here either because our peer 2492 * has ACK/NACK'ed back to an earlier ATTR msg we had sent it, or our 2493 * peer has sent us an attribute INFO message 2494 * 2495 * If its an ACK we then move to the next stage of the handshake which 2496 * is to send our descriptor ring info to our peer. If its a NACK then 2497 * there is nothing more we can (currently) do. 2498 * 2499 * If we get a valid/acceptable INFO packet (and we have already negotiated 2500 * a version) we ACK back and set channel state to ATTR_RECV, otherwise we 2501 * NACK back and reset channel state to INACTIV. 2502 * 2503 * FUTURE: in time we will probably negotiate over attributes, but for 2504 * the moment unacceptable attributes are regarded as a fatal error. 2505 * 2506 */ 2507 void 2508 vsw_process_ctrl_attr_pkt(vsw_ldc_t *ldcp, void *pkt) 2509 { 2510 vnet_attr_msg_t *attr_pkt; 2511 vsw_t *vswp = ldcp->ldc_vswp; 2512 vsw_port_t *port = ldcp->ldc_port; 2513 uint64_t macaddr = 0; 2514 int i; 2515 2516 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 2517 2518 /* 2519 * We know this is a ctrl/attr packet so 2520 * cast it into the correct structure. 2521 */ 2522 attr_pkt = (vnet_attr_msg_t *)pkt; 2523 2524 switch (attr_pkt->tag.vio_subtype) { 2525 case VIO_SUBTYPE_INFO: 2526 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2527 2528 if (vsw_check_flag(ldcp, INBOUND, VSW_ATTR_INFO_RECV)) 2529 return; 2530 2531 /* 2532 * If the attributes are unacceptable then we NACK back. 2533 */ 2534 if (vsw_check_attr(attr_pkt, ldcp)) { 2535 2536 DERR(vswp, "%s (chan %d): invalid attributes", 2537 __func__, ldcp->ldc_id); 2538 2539 vsw_free_lane_resources(ldcp, INBOUND); 2540 2541 attr_pkt->tag.vio_sid = ldcp->local_session; 2542 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2543 2544 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 2545 ldcp->lane_in.lstate |= VSW_ATTR_NACK_SENT; 2546 (void) vsw_send_msg(ldcp, (void *)attr_pkt, 2547 sizeof (vnet_attr_msg_t), B_TRUE); 2548 2549 vsw_next_milestone(ldcp); 2550 return; 2551 } 2552 2553 /* 2554 * Otherwise store attributes for this lane and update 2555 * lane state. 2556 */ 2557 ldcp->lane_in.mtu = attr_pkt->mtu; 2558 ldcp->lane_in.addr = attr_pkt->addr; 2559 ldcp->lane_in.addr_type = attr_pkt->addr_type; 2560 ldcp->lane_in.xfer_mode = attr_pkt->xfer_mode; 2561 ldcp->lane_in.ack_freq = attr_pkt->ack_freq; 2562 2563 macaddr = ldcp->lane_in.addr; 2564 for (i = ETHERADDRL - 1; i >= 0; i--) { 2565 port->p_macaddr.ether_addr_octet[i] = macaddr & 0xFF; 2566 macaddr >>= 8; 2567 } 2568 2569 /* create the fdb entry for this port/mac address */ 2570 vsw_fdbe_add(vswp, port); 2571 2572 /* add the port to the specified vlans */ 2573 vsw_vlan_add_ids(port, VSW_VNETPORT); 2574 2575 /* setup device specifc xmit routines */ 2576 mutex_enter(&port->tx_lock); 2577 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 2578 (ldcp->lane_in.xfer_mode & VIO_DRING_MODE_V1_2)) || 2579 (VSW_VER_LT(ldcp, 1, 2) && 2580 (ldcp->lane_in.xfer_mode == VIO_DRING_MODE_V1_0))) { 2581 D2(vswp, "%s: mode = VIO_DRING_MODE", __func__); 2582 port->transmit = vsw_dringsend; 2583 } else if (ldcp->lane_in.xfer_mode == VIO_DESC_MODE) { 2584 D2(vswp, "%s: mode = VIO_DESC_MODE", __func__); 2585 vsw_create_privring(ldcp); 2586 port->transmit = vsw_descrsend; 2587 ldcp->lane_out.xfer_mode = VIO_DESC_MODE; 2588 } 2589 2590 /* 2591 * HybridIO is supported only vnet, not by OBP. 2592 * So, set hio_capable to true only when in DRING mode. 2593 */ 2594 if (VSW_VER_GTEQ(ldcp, 1, 3) && 2595 (ldcp->lane_in.xfer_mode != VIO_DESC_MODE)) { 2596 (void) atomic_swap_32(&port->p_hio_capable, B_TRUE); 2597 } else { 2598 (void) atomic_swap_32(&port->p_hio_capable, B_FALSE); 2599 } 2600 2601 mutex_exit(&port->tx_lock); 2602 2603 attr_pkt->tag.vio_sid = ldcp->local_session; 2604 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2605 2606 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 2607 2608 ldcp->lane_in.lstate |= VSW_ATTR_ACK_SENT; 2609 2610 (void) vsw_send_msg(ldcp, (void *)attr_pkt, 2611 sizeof (vnet_attr_msg_t), B_TRUE); 2612 2613 vsw_next_milestone(ldcp); 2614 break; 2615 2616 case VIO_SUBTYPE_ACK: 2617 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2618 2619 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_ACK_RECV)) 2620 return; 2621 2622 ldcp->lane_out.lstate |= VSW_ATTR_ACK_RECV; 2623 vsw_next_milestone(ldcp); 2624 break; 2625 2626 case VIO_SUBTYPE_NACK: 2627 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2628 2629 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_NACK_RECV)) 2630 return; 2631 2632 ldcp->lane_out.lstate |= VSW_ATTR_NACK_RECV; 2633 vsw_next_milestone(ldcp); 2634 break; 2635 2636 default: 2637 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2638 attr_pkt->tag.vio_subtype); 2639 } 2640 2641 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 2642 } 2643 2644 /* 2645 * Process a dring info packet. We can end up here either because our peer 2646 * has ACK/NACK'ed back to an earlier DRING msg we had sent it, or our 2647 * peer has sent us a dring INFO message. 2648 * 2649 * If we get a valid/acceptable INFO packet (and we have already negotiated 2650 * a version) we ACK back and update the lane state, otherwise we NACK back. 2651 * 2652 * FUTURE: nothing to stop client from sending us info on multiple dring's 2653 * but for the moment we will just use the first one we are given. 2654 * 2655 */ 2656 void 2657 vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *ldcp, void *pkt) 2658 { 2659 vio_dring_reg_msg_t *dring_pkt; 2660 vsw_t *vswp = ldcp->ldc_vswp; 2661 ldc_mem_info_t minfo; 2662 dring_info_t *dp, *dbp; 2663 int dring_found = 0; 2664 2665 /* 2666 * We know this is a ctrl/dring packet so 2667 * cast it into the correct structure. 2668 */ 2669 dring_pkt = (vio_dring_reg_msg_t *)pkt; 2670 2671 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 2672 2673 switch (dring_pkt->tag.vio_subtype) { 2674 case VIO_SUBTYPE_INFO: 2675 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2676 2677 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 2678 return; 2679 2680 /* 2681 * If the dring params are unacceptable then we NACK back. 2682 */ 2683 if (vsw_check_dring_info(dring_pkt)) { 2684 2685 DERR(vswp, "%s (%lld): invalid dring info", 2686 __func__, ldcp->ldc_id); 2687 2688 vsw_free_lane_resources(ldcp, INBOUND); 2689 2690 dring_pkt->tag.vio_sid = ldcp->local_session; 2691 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2692 2693 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2694 2695 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2696 2697 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2698 sizeof (vio_dring_reg_msg_t), B_TRUE); 2699 2700 vsw_next_milestone(ldcp); 2701 return; 2702 } 2703 2704 /* 2705 * Otherwise, attempt to map in the dring using the 2706 * cookie. If that succeeds we send back a unique dring 2707 * identifier that the sending side will use in future 2708 * to refer to this descriptor ring. 2709 */ 2710 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 2711 2712 dp->num_descriptors = dring_pkt->num_descriptors; 2713 dp->descriptor_size = dring_pkt->descriptor_size; 2714 dp->options = dring_pkt->options; 2715 dp->ncookies = dring_pkt->ncookies; 2716 2717 /* 2718 * Note: should only get one cookie. Enforced in 2719 * the ldc layer. 2720 */ 2721 bcopy(&dring_pkt->cookie[0], &dp->cookie[0], 2722 sizeof (ldc_mem_cookie_t)); 2723 2724 D2(vswp, "%s: num_desc %ld : desc_size %ld", __func__, 2725 dp->num_descriptors, dp->descriptor_size); 2726 D2(vswp, "%s: options 0x%lx: ncookies %ld", __func__, 2727 dp->options, dp->ncookies); 2728 2729 if ((ldc_mem_dring_map(ldcp->ldc_handle, &dp->cookie[0], 2730 dp->ncookies, dp->num_descriptors, dp->descriptor_size, 2731 LDC_SHADOW_MAP, &(dp->handle))) != 0) { 2732 2733 DERR(vswp, "%s: dring_map failed\n", __func__); 2734 2735 kmem_free(dp, sizeof (dring_info_t)); 2736 vsw_free_lane_resources(ldcp, INBOUND); 2737 2738 dring_pkt->tag.vio_sid = ldcp->local_session; 2739 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2740 2741 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2742 2743 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2744 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2745 sizeof (vio_dring_reg_msg_t), B_TRUE); 2746 2747 vsw_next_milestone(ldcp); 2748 return; 2749 } 2750 2751 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 2752 2753 DERR(vswp, "%s: dring_addr failed\n", __func__); 2754 2755 kmem_free(dp, sizeof (dring_info_t)); 2756 vsw_free_lane_resources(ldcp, INBOUND); 2757 2758 dring_pkt->tag.vio_sid = ldcp->local_session; 2759 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2760 2761 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2762 2763 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2764 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2765 sizeof (vio_dring_reg_msg_t), B_TRUE); 2766 2767 vsw_next_milestone(ldcp); 2768 return; 2769 } else { 2770 /* store the address of the pub part of ring */ 2771 dp->pub_addr = minfo.vaddr; 2772 } 2773 2774 /* no private section as we are importing */ 2775 dp->priv_addr = NULL; 2776 2777 /* 2778 * Using simple mono increasing int for ident at 2779 * the moment. 2780 */ 2781 dp->ident = ldcp->next_ident; 2782 ldcp->next_ident++; 2783 2784 dp->end_idx = 0; 2785 dp->next = NULL; 2786 2787 /* 2788 * Link it onto the end of the list of drings 2789 * for this lane. 2790 */ 2791 if (ldcp->lane_in.dringp == NULL) { 2792 D2(vswp, "%s: adding first INBOUND dring", __func__); 2793 ldcp->lane_in.dringp = dp; 2794 } else { 2795 dbp = ldcp->lane_in.dringp; 2796 2797 while (dbp->next != NULL) 2798 dbp = dbp->next; 2799 2800 dbp->next = dp; 2801 } 2802 2803 /* acknowledge it */ 2804 dring_pkt->tag.vio_sid = ldcp->local_session; 2805 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2806 dring_pkt->dring_ident = dp->ident; 2807 2808 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2809 sizeof (vio_dring_reg_msg_t), B_TRUE); 2810 2811 ldcp->lane_in.lstate |= VSW_DRING_ACK_SENT; 2812 vsw_next_milestone(ldcp); 2813 break; 2814 2815 case VIO_SUBTYPE_ACK: 2816 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2817 2818 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_ACK_RECV)) 2819 return; 2820 2821 /* 2822 * Peer is acknowledging our dring info and will have 2823 * sent us a dring identifier which we will use to 2824 * refer to this ring w.r.t. our peer. 2825 */ 2826 dp = ldcp->lane_out.dringp; 2827 if (dp != NULL) { 2828 /* 2829 * Find the ring this ident should be associated 2830 * with. 2831 */ 2832 if (vsw_dring_match(dp, dring_pkt)) { 2833 dring_found = 1; 2834 2835 } else while (dp != NULL) { 2836 if (vsw_dring_match(dp, dring_pkt)) { 2837 dring_found = 1; 2838 break; 2839 } 2840 dp = dp->next; 2841 } 2842 2843 if (dring_found == 0) { 2844 DERR(NULL, "%s: unrecognised ring cookie", 2845 __func__); 2846 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2847 return; 2848 } 2849 2850 } else { 2851 DERR(vswp, "%s: DRING ACK received but no drings " 2852 "allocated", __func__); 2853 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2854 return; 2855 } 2856 2857 /* store ident */ 2858 dp->ident = dring_pkt->dring_ident; 2859 ldcp->lane_out.lstate |= VSW_DRING_ACK_RECV; 2860 vsw_next_milestone(ldcp); 2861 break; 2862 2863 case VIO_SUBTYPE_NACK: 2864 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2865 2866 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_NACK_RECV)) 2867 return; 2868 2869 ldcp->lane_out.lstate |= VSW_DRING_NACK_RECV; 2870 vsw_next_milestone(ldcp); 2871 break; 2872 2873 default: 2874 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 2875 dring_pkt->tag.vio_subtype); 2876 } 2877 2878 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 2879 } 2880 2881 /* 2882 * Process a request from peer to unregister a dring. 2883 * 2884 * For the moment we just restart the handshake if our 2885 * peer endpoint attempts to unregister a dring. 2886 */ 2887 void 2888 vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *ldcp, void *pkt) 2889 { 2890 vsw_t *vswp = ldcp->ldc_vswp; 2891 vio_dring_unreg_msg_t *dring_pkt; 2892 2893 /* 2894 * We know this is a ctrl/dring packet so 2895 * cast it into the correct structure. 2896 */ 2897 dring_pkt = (vio_dring_unreg_msg_t *)pkt; 2898 2899 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2900 2901 switch (dring_pkt->tag.vio_subtype) { 2902 case VIO_SUBTYPE_INFO: 2903 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2904 2905 DWARN(vswp, "%s: restarting handshake..", __func__); 2906 break; 2907 2908 case VIO_SUBTYPE_ACK: 2909 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2910 2911 DWARN(vswp, "%s: restarting handshake..", __func__); 2912 break; 2913 2914 case VIO_SUBTYPE_NACK: 2915 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2916 2917 DWARN(vswp, "%s: restarting handshake..", __func__); 2918 break; 2919 2920 default: 2921 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 2922 dring_pkt->tag.vio_subtype); 2923 } 2924 2925 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2926 2927 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2928 } 2929 2930 #define SND_MCST_NACK(ldcp, pkt) \ 2931 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 2932 pkt->tag.vio_sid = ldcp->local_session; \ 2933 (void) vsw_send_msg(ldcp, (void *)pkt, \ 2934 sizeof (vnet_mcast_msg_t), B_TRUE); 2935 2936 /* 2937 * Process a multicast request from a vnet. 2938 * 2939 * Vnet's specify a multicast address that they are interested in. This 2940 * address is used as a key into the hash table which forms the multicast 2941 * forwarding database (mFDB). 2942 * 2943 * The table keys are the multicast addresses, while the table entries 2944 * are pointers to lists of ports which wish to receive packets for the 2945 * specified multicast address. 2946 * 2947 * When a multicast packet is being switched we use the address as a key 2948 * into the hash table, and then walk the appropriate port list forwarding 2949 * the pkt to each port in turn. 2950 * 2951 * If a vnet is no longer interested in a particular multicast grouping 2952 * we simply find the correct location in the hash table and then delete 2953 * the relevant port from the port list. 2954 * 2955 * To deal with the case whereby a port is being deleted without first 2956 * removing itself from the lists in the hash table, we maintain a list 2957 * of multicast addresses the port has registered an interest in, within 2958 * the port structure itself. We then simply walk that list of addresses 2959 * using them as keys into the hash table and remove the port from the 2960 * appropriate lists. 2961 */ 2962 static void 2963 vsw_process_ctrl_mcst_pkt(vsw_ldc_t *ldcp, void *pkt) 2964 { 2965 vnet_mcast_msg_t *mcst_pkt; 2966 vsw_port_t *port = ldcp->ldc_port; 2967 vsw_t *vswp = ldcp->ldc_vswp; 2968 int i; 2969 2970 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2971 2972 /* 2973 * We know this is a ctrl/mcast packet so 2974 * cast it into the correct structure. 2975 */ 2976 mcst_pkt = (vnet_mcast_msg_t *)pkt; 2977 2978 switch (mcst_pkt->tag.vio_subtype) { 2979 case VIO_SUBTYPE_INFO: 2980 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2981 2982 /* 2983 * Check if in correct state to receive a multicast 2984 * message (i.e. handshake complete). If not reset 2985 * the handshake. 2986 */ 2987 if (vsw_check_flag(ldcp, INBOUND, VSW_MCST_INFO_RECV)) 2988 return; 2989 2990 /* 2991 * Before attempting to add or remove address check 2992 * that they are valid multicast addresses. 2993 * If not, then NACK back. 2994 */ 2995 for (i = 0; i < mcst_pkt->count; i++) { 2996 if ((mcst_pkt->mca[i].ether_addr_octet[0] & 01) != 1) { 2997 DERR(vswp, "%s: invalid multicast address", 2998 __func__); 2999 SND_MCST_NACK(ldcp, mcst_pkt); 3000 return; 3001 } 3002 } 3003 3004 /* 3005 * Now add/remove the addresses. If this fails we 3006 * NACK back. 3007 */ 3008 if (vsw_add_rem_mcst(mcst_pkt, port) != 0) { 3009 SND_MCST_NACK(ldcp, mcst_pkt); 3010 return; 3011 } 3012 3013 mcst_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3014 mcst_pkt->tag.vio_sid = ldcp->local_session; 3015 3016 DUMP_TAG_PTR((vio_msg_tag_t *)mcst_pkt); 3017 3018 (void) vsw_send_msg(ldcp, (void *)mcst_pkt, 3019 sizeof (vnet_mcast_msg_t), B_TRUE); 3020 break; 3021 3022 case VIO_SUBTYPE_ACK: 3023 DWARN(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3024 3025 /* 3026 * We shouldn't ever get a multicast ACK message as 3027 * at the moment we never request multicast addresses 3028 * to be set on some other device. This may change in 3029 * the future if we have cascading switches. 3030 */ 3031 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_ACK_RECV)) 3032 return; 3033 3034 /* Do nothing */ 3035 break; 3036 3037 case VIO_SUBTYPE_NACK: 3038 DWARN(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3039 3040 /* 3041 * We shouldn't get a multicast NACK packet for the 3042 * same reasons as we shouldn't get a ACK packet. 3043 */ 3044 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_NACK_RECV)) 3045 return; 3046 3047 /* Do nothing */ 3048 break; 3049 3050 default: 3051 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 3052 mcst_pkt->tag.vio_subtype); 3053 } 3054 3055 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3056 } 3057 3058 static void 3059 vsw_process_ctrl_rdx_pkt(vsw_ldc_t *ldcp, void *pkt) 3060 { 3061 vio_rdx_msg_t *rdx_pkt; 3062 vsw_t *vswp = ldcp->ldc_vswp; 3063 3064 /* 3065 * We know this is a ctrl/rdx packet so 3066 * cast it into the correct structure. 3067 */ 3068 rdx_pkt = (vio_rdx_msg_t *)pkt; 3069 3070 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 3071 3072 switch (rdx_pkt->tag.vio_subtype) { 3073 case VIO_SUBTYPE_INFO: 3074 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3075 3076 if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_INFO_RECV)) 3077 return; 3078 3079 rdx_pkt->tag.vio_sid = ldcp->local_session; 3080 rdx_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3081 3082 DUMP_TAG_PTR((vio_msg_tag_t *)rdx_pkt); 3083 3084 ldcp->lane_out.lstate |= VSW_RDX_ACK_SENT; 3085 3086 (void) vsw_send_msg(ldcp, (void *)rdx_pkt, 3087 sizeof (vio_rdx_msg_t), B_TRUE); 3088 3089 vsw_next_milestone(ldcp); 3090 break; 3091 3092 case VIO_SUBTYPE_ACK: 3093 /* 3094 * Should be handled in-band by callback handler. 3095 */ 3096 DERR(vswp, "%s: Unexpected VIO_SUBTYPE_ACK", __func__); 3097 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3098 break; 3099 3100 case VIO_SUBTYPE_NACK: 3101 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3102 3103 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_NACK_RECV)) 3104 return; 3105 3106 ldcp->lane_in.lstate |= VSW_RDX_NACK_RECV; 3107 vsw_next_milestone(ldcp); 3108 break; 3109 3110 default: 3111 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3112 rdx_pkt->tag.vio_subtype); 3113 } 3114 3115 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3116 } 3117 3118 static void 3119 vsw_process_data_pkt(vsw_ldc_t *ldcp, void *dpkt, vio_msg_tag_t *tagp, 3120 uint32_t msglen) 3121 { 3122 uint16_t env = tagp->vio_subtype_env; 3123 vsw_t *vswp = ldcp->ldc_vswp; 3124 3125 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3126 3127 /* session id check */ 3128 if (ldcp->session_status & VSW_PEER_SESSION) { 3129 if (ldcp->peer_session != tagp->vio_sid) { 3130 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 3131 __func__, ldcp->ldc_id, tagp->vio_sid); 3132 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3133 return; 3134 } 3135 } 3136 3137 /* 3138 * It is an error for us to be getting data packets 3139 * before the handshake has completed. 3140 */ 3141 if (ldcp->hphase != VSW_MILESTONE4) { 3142 DERR(vswp, "%s: got data packet before handshake complete " 3143 "hphase %d (%x: %x)", __func__, ldcp->hphase, 3144 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 3145 DUMP_FLAGS(ldcp->lane_in.lstate); 3146 DUMP_FLAGS(ldcp->lane_out.lstate); 3147 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3148 return; 3149 } 3150 3151 /* 3152 * To reduce the locking contention, release the 3153 * ldc_cblock here and re-acquire it once we are done 3154 * receiving packets. 3155 */ 3156 mutex_exit(&ldcp->ldc_cblock); 3157 mutex_enter(&ldcp->ldc_rxlock); 3158 3159 /* 3160 * Switch on vio_subtype envelope, then let lower routines 3161 * decide if its an INFO, ACK or NACK packet. 3162 */ 3163 if (env == VIO_DRING_DATA) { 3164 vsw_process_data_dring_pkt(ldcp, dpkt); 3165 } else if (env == VIO_PKT_DATA) { 3166 ldcp->rx_pktdata(ldcp, dpkt, msglen); 3167 } else if (env == VIO_DESC_DATA) { 3168 vsw_process_data_ibnd_pkt(ldcp, dpkt); 3169 } else { 3170 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env); 3171 } 3172 3173 mutex_exit(&ldcp->ldc_rxlock); 3174 mutex_enter(&ldcp->ldc_cblock); 3175 3176 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3177 } 3178 3179 #define SND_DRING_NACK(ldcp, pkt) \ 3180 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 3181 pkt->tag.vio_sid = ldcp->local_session; \ 3182 (void) vsw_send_msg(ldcp, (void *)pkt, \ 3183 sizeof (vio_dring_msg_t), B_TRUE); 3184 3185 static void 3186 vsw_process_data_dring_pkt(vsw_ldc_t *ldcp, void *dpkt) 3187 { 3188 vio_dring_msg_t *dring_pkt; 3189 vnet_public_desc_t *pub_addr = NULL; 3190 vsw_private_desc_t *priv_addr = NULL; 3191 dring_info_t *dp = NULL; 3192 vsw_t *vswp = ldcp->ldc_vswp; 3193 mblk_t *mp = NULL; 3194 mblk_t *bp = NULL; 3195 mblk_t *bpt = NULL; 3196 size_t nbytes = 0; 3197 uint64_t ncookies = 0; 3198 uint64_t chain = 0; 3199 uint64_t len; 3200 uint32_t pos, start, datalen; 3201 uint32_t range_start, range_end; 3202 int32_t end, num, cnt = 0; 3203 int i, rv, msg_rv = 0; 3204 boolean_t ack_needed = B_FALSE; 3205 boolean_t prev_desc_ack = B_FALSE; 3206 int read_attempts = 0; 3207 struct ether_header *ehp; 3208 3209 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3210 3211 /* 3212 * We know this is a data/dring packet so 3213 * cast it into the correct structure. 3214 */ 3215 dring_pkt = (vio_dring_msg_t *)dpkt; 3216 3217 /* 3218 * Switch on the vio_subtype. If its INFO then we need to 3219 * process the data. If its an ACK we need to make sure 3220 * it makes sense (i.e did we send an earlier data/info), 3221 * and if its a NACK then we maybe attempt a retry. 3222 */ 3223 switch (dring_pkt->tag.vio_subtype) { 3224 case VIO_SUBTYPE_INFO: 3225 D2(vswp, "%s(%lld): VIO_SUBTYPE_INFO", __func__, ldcp->ldc_id); 3226 3227 READ_ENTER(&ldcp->lane_in.dlistrw); 3228 if ((dp = vsw_ident2dring(&ldcp->lane_in, 3229 dring_pkt->dring_ident)) == NULL) { 3230 RW_EXIT(&ldcp->lane_in.dlistrw); 3231 3232 DERR(vswp, "%s(%lld): unable to find dring from " 3233 "ident 0x%llx", __func__, ldcp->ldc_id, 3234 dring_pkt->dring_ident); 3235 3236 SND_DRING_NACK(ldcp, dring_pkt); 3237 return; 3238 } 3239 3240 start = pos = dring_pkt->start_idx; 3241 end = dring_pkt->end_idx; 3242 len = dp->num_descriptors; 3243 3244 range_start = range_end = pos; 3245 3246 D2(vswp, "%s(%lld): start index %ld : end %ld\n", 3247 __func__, ldcp->ldc_id, start, end); 3248 3249 if (end == -1) { 3250 num = -1; 3251 } else if (end >= 0) { 3252 num = end >= pos ? end - pos + 1: (len - pos + 1) + end; 3253 3254 /* basic sanity check */ 3255 if (end > len) { 3256 RW_EXIT(&ldcp->lane_in.dlistrw); 3257 DERR(vswp, "%s(%lld): endpoint %lld outside " 3258 "ring length %lld", __func__, 3259 ldcp->ldc_id, end, len); 3260 3261 SND_DRING_NACK(ldcp, dring_pkt); 3262 return; 3263 } 3264 } else { 3265 RW_EXIT(&ldcp->lane_in.dlistrw); 3266 DERR(vswp, "%s(%lld): invalid endpoint %lld", 3267 __func__, ldcp->ldc_id, end); 3268 SND_DRING_NACK(ldcp, dring_pkt); 3269 return; 3270 } 3271 3272 while (cnt != num) { 3273 vsw_recheck_desc: 3274 if ((rv = ldc_mem_dring_acquire(dp->handle, 3275 pos, pos)) != 0) { 3276 RW_EXIT(&ldcp->lane_in.dlistrw); 3277 DERR(vswp, "%s(%lld): unable to acquire " 3278 "descriptor at pos %d: err %d", 3279 __func__, pos, ldcp->ldc_id, rv); 3280 SND_DRING_NACK(ldcp, dring_pkt); 3281 ldcp->ldc_stats.ierrors++; 3282 return; 3283 } 3284 3285 pub_addr = (vnet_public_desc_t *)dp->pub_addr + pos; 3286 3287 /* 3288 * When given a bounded range of descriptors 3289 * to process, its an error to hit a descriptor 3290 * which is not ready. In the non-bounded case 3291 * (end_idx == -1) this simply indicates we have 3292 * reached the end of the current active range. 3293 */ 3294 if (pub_addr->hdr.dstate != VIO_DESC_READY) { 3295 /* unbound - no error */ 3296 if (end == -1) { 3297 if (read_attempts == vsw_read_attempts) 3298 break; 3299 3300 delay(drv_usectohz(vsw_desc_delay)); 3301 read_attempts++; 3302 goto vsw_recheck_desc; 3303 } 3304 3305 /* bounded - error - so NACK back */ 3306 RW_EXIT(&ldcp->lane_in.dlistrw); 3307 DERR(vswp, "%s(%lld): descriptor not READY " 3308 "(%d)", __func__, ldcp->ldc_id, 3309 pub_addr->hdr.dstate); 3310 SND_DRING_NACK(ldcp, dring_pkt); 3311 return; 3312 } 3313 3314 DTRACE_PROBE1(read_attempts, int, read_attempts); 3315 3316 range_end = pos; 3317 3318 /* 3319 * If we ACK'd the previous descriptor then now 3320 * record the new range start position for later 3321 * ACK's. 3322 */ 3323 if (prev_desc_ack) { 3324 range_start = pos; 3325 3326 D2(vswp, "%s(%lld): updating range start to be " 3327 "%d", __func__, ldcp->ldc_id, range_start); 3328 3329 prev_desc_ack = B_FALSE; 3330 } 3331 3332 /* 3333 * Data is padded to align on 8 byte boundary, 3334 * datalen is actual data length, i.e. minus that 3335 * padding. 3336 */ 3337 datalen = pub_addr->nbytes; 3338 3339 /* 3340 * Does peer wish us to ACK when we have finished 3341 * with this descriptor ? 3342 */ 3343 if (pub_addr->hdr.ack) 3344 ack_needed = B_TRUE; 3345 3346 D2(vswp, "%s(%lld): processing desc %lld at pos" 3347 " 0x%llx : dstate 0x%lx : datalen 0x%lx", 3348 __func__, ldcp->ldc_id, pos, pub_addr, 3349 pub_addr->hdr.dstate, datalen); 3350 3351 /* 3352 * Mark that we are starting to process descriptor. 3353 */ 3354 pub_addr->hdr.dstate = VIO_DESC_ACCEPTED; 3355 3356 /* 3357 * Ensure that we ask ldc for an aligned 3358 * number of bytes. 3359 */ 3360 nbytes = (datalen + VNET_IPALIGN + 7) & ~7; 3361 3362 mp = vio_multipool_allocb(&ldcp->vmp, nbytes); 3363 if (mp == NULL) { 3364 ldcp->ldc_stats.rx_vio_allocb_fail++; 3365 /* 3366 * No free receive buffers available, so 3367 * fallback onto allocb(9F). Make sure that 3368 * we get a data buffer which is a multiple 3369 * of 8 as this is required by ldc_mem_copy. 3370 */ 3371 DTRACE_PROBE(allocb); 3372 if ((mp = allocb(datalen + VNET_IPALIGN + 8, 3373 BPRI_MED)) == NULL) { 3374 DERR(vswp, "%s(%ld): allocb failed", 3375 __func__, ldcp->ldc_id); 3376 pub_addr->hdr.dstate = VIO_DESC_DONE; 3377 (void) ldc_mem_dring_release(dp->handle, 3378 pos, pos); 3379 ldcp->ldc_stats.ierrors++; 3380 ldcp->ldc_stats.rx_allocb_fail++; 3381 break; 3382 } 3383 } 3384 3385 ncookies = pub_addr->ncookies; 3386 rv = ldc_mem_copy(ldcp->ldc_handle, 3387 (caddr_t)mp->b_rptr, 0, &nbytes, 3388 pub_addr->memcookie, ncookies, LDC_COPY_IN); 3389 3390 if (rv != 0) { 3391 DERR(vswp, "%s(%d): unable to copy in data " 3392 "from %d cookies in desc %d (rv %d)", 3393 __func__, ldcp->ldc_id, ncookies, pos, rv); 3394 freemsg(mp); 3395 3396 pub_addr->hdr.dstate = VIO_DESC_DONE; 3397 (void) ldc_mem_dring_release(dp->handle, 3398 pos, pos); 3399 ldcp->ldc_stats.ierrors++; 3400 break; 3401 } else { 3402 D2(vswp, "%s(%d): copied in %ld bytes" 3403 " using %d cookies", __func__, 3404 ldcp->ldc_id, nbytes, ncookies); 3405 } 3406 3407 /* adjust the read pointer to skip over the padding */ 3408 mp->b_rptr += VNET_IPALIGN; 3409 3410 /* point to the actual end of data */ 3411 mp->b_wptr = mp->b_rptr + datalen; 3412 3413 /* update statistics */ 3414 ehp = (struct ether_header *)mp->b_rptr; 3415 if (IS_BROADCAST(ehp)) 3416 ldcp->ldc_stats.brdcstrcv++; 3417 else if (IS_MULTICAST(ehp)) 3418 ldcp->ldc_stats.multircv++; 3419 3420 ldcp->ldc_stats.ipackets++; 3421 ldcp->ldc_stats.rbytes += datalen; 3422 3423 /* 3424 * IPALIGN space can be used for VLAN_TAG 3425 */ 3426 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, 3427 VSW_VNETPORT, mp); 3428 3429 /* build a chain of received packets */ 3430 if (bp == NULL) { 3431 /* first pkt */ 3432 bp = mp; 3433 bp->b_next = bp->b_prev = NULL; 3434 bpt = bp; 3435 chain = 1; 3436 } else { 3437 mp->b_next = mp->b_prev = NULL; 3438 bpt->b_next = mp; 3439 bpt = mp; 3440 chain++; 3441 } 3442 3443 /* mark we are finished with this descriptor */ 3444 pub_addr->hdr.dstate = VIO_DESC_DONE; 3445 3446 (void) ldc_mem_dring_release(dp->handle, pos, pos); 3447 3448 /* 3449 * Send an ACK back to peer if requested. 3450 */ 3451 if (ack_needed) { 3452 ack_needed = B_FALSE; 3453 3454 dring_pkt->start_idx = range_start; 3455 dring_pkt->end_idx = range_end; 3456 3457 DERR(vswp, "%s(%lld): processed %d %d, ACK" 3458 " requested", __func__, ldcp->ldc_id, 3459 dring_pkt->start_idx, dring_pkt->end_idx); 3460 3461 dring_pkt->dring_process_state = VIO_DP_ACTIVE; 3462 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3463 dring_pkt->tag.vio_sid = ldcp->local_session; 3464 3465 msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt, 3466 sizeof (vio_dring_msg_t), B_FALSE); 3467 3468 /* 3469 * Check if ACK was successfully sent. If not 3470 * we break and deal with that below. 3471 */ 3472 if (msg_rv != 0) 3473 break; 3474 3475 prev_desc_ack = B_TRUE; 3476 range_start = pos; 3477 } 3478 3479 /* next descriptor */ 3480 pos = (pos + 1) % len; 3481 cnt++; 3482 3483 /* 3484 * Break out of loop here and stop processing to 3485 * allow some other network device (or disk) to 3486 * get access to the cpu. 3487 */ 3488 if (chain > vsw_chain_len) { 3489 D3(vswp, "%s(%lld): switching chain of %d " 3490 "msgs", __func__, ldcp->ldc_id, chain); 3491 break; 3492 } 3493 } 3494 RW_EXIT(&ldcp->lane_in.dlistrw); 3495 3496 /* 3497 * If when we attempted to send the ACK we found that the 3498 * channel had been reset then now handle this. We deal with 3499 * it here as we cannot reset the channel while holding the 3500 * dlistrw lock, and we don't want to acquire/release it 3501 * continuously in the above loop, as a channel reset should 3502 * be a rare event. 3503 */ 3504 if (msg_rv == ECONNRESET) { 3505 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 3506 break; 3507 } 3508 3509 /* send the chain of packets to be switched */ 3510 if (bp != NULL) { 3511 DTRACE_PROBE1(vsw_rcv_msgs, int, chain); 3512 D3(vswp, "%s(%lld): switching chain of %d msgs", 3513 __func__, ldcp->ldc_id, chain); 3514 vswp->vsw_switch_frame(vswp, bp, VSW_VNETPORT, 3515 ldcp->ldc_port, NULL); 3516 } 3517 3518 DTRACE_PROBE1(msg_cnt, int, cnt); 3519 3520 /* 3521 * We are now finished so ACK back with the state 3522 * set to STOPPING so our peer knows we are finished 3523 */ 3524 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3525 dring_pkt->tag.vio_sid = ldcp->local_session; 3526 3527 dring_pkt->dring_process_state = VIO_DP_STOPPED; 3528 3529 DTRACE_PROBE(stop_process_sent); 3530 3531 /* 3532 * We have not processed any more descriptors beyond 3533 * the last one we ACK'd. 3534 */ 3535 if (prev_desc_ack) 3536 range_start = range_end; 3537 3538 dring_pkt->start_idx = range_start; 3539 dring_pkt->end_idx = range_end; 3540 3541 D2(vswp, "%s(%lld) processed : %d : %d, now stopping", 3542 __func__, ldcp->ldc_id, dring_pkt->start_idx, 3543 dring_pkt->end_idx); 3544 3545 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 3546 sizeof (vio_dring_msg_t), B_TRUE); 3547 break; 3548 3549 case VIO_SUBTYPE_ACK: 3550 D2(vswp, "%s(%lld): VIO_SUBTYPE_ACK", __func__, ldcp->ldc_id); 3551 /* 3552 * Verify that the relevant descriptors are all 3553 * marked as DONE 3554 */ 3555 READ_ENTER(&ldcp->lane_out.dlistrw); 3556 if ((dp = vsw_ident2dring(&ldcp->lane_out, 3557 dring_pkt->dring_ident)) == NULL) { 3558 RW_EXIT(&ldcp->lane_out.dlistrw); 3559 DERR(vswp, "%s: unknown ident in ACK", __func__); 3560 return; 3561 } 3562 3563 start = end = 0; 3564 start = dring_pkt->start_idx; 3565 end = dring_pkt->end_idx; 3566 len = dp->num_descriptors; 3567 3568 3569 mutex_enter(&dp->dlock); 3570 dp->last_ack_recv = end; 3571 ldcp->ldc_stats.dring_data_acks++; 3572 mutex_exit(&dp->dlock); 3573 3574 (void) vsw_reclaim_dring(dp, start); 3575 3576 /* 3577 * If our peer is stopping processing descriptors then 3578 * we check to make sure it has processed all the descriptors 3579 * we have updated. If not then we send it a new message 3580 * to prompt it to restart. 3581 */ 3582 if (dring_pkt->dring_process_state == VIO_DP_STOPPED) { 3583 DTRACE_PROBE(stop_process_recv); 3584 D2(vswp, "%s(%lld): got stopping msg : %d : %d", 3585 __func__, ldcp->ldc_id, dring_pkt->start_idx, 3586 dring_pkt->end_idx); 3587 3588 /* 3589 * Check next descriptor in public section of ring. 3590 * If its marked as READY then we need to prompt our 3591 * peer to start processing the ring again. 3592 */ 3593 i = (end + 1) % len; 3594 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 3595 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 3596 3597 /* 3598 * Hold the restart lock across all of this to 3599 * make sure that its not possible for us to 3600 * decide that a msg needs to be sent in the future 3601 * but the sending code having already checked is 3602 * about to exit. 3603 */ 3604 mutex_enter(&dp->restart_lock); 3605 ldcp->ldc_stats.dring_stopped_acks++; 3606 mutex_enter(&priv_addr->dstate_lock); 3607 if (pub_addr->hdr.dstate == VIO_DESC_READY) { 3608 3609 mutex_exit(&priv_addr->dstate_lock); 3610 3611 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 3612 dring_pkt->tag.vio_sid = ldcp->local_session; 3613 3614 dring_pkt->start_idx = (end + 1) % len; 3615 dring_pkt->end_idx = -1; 3616 3617 D2(vswp, "%s(%lld) : sending restart msg:" 3618 " %d : %d", __func__, ldcp->ldc_id, 3619 dring_pkt->start_idx, dring_pkt->end_idx); 3620 3621 msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt, 3622 sizeof (vio_dring_msg_t), B_FALSE); 3623 ldcp->ldc_stats.dring_data_msgs++; 3624 3625 } else { 3626 mutex_exit(&priv_addr->dstate_lock); 3627 dp->restart_reqd = B_TRUE; 3628 } 3629 mutex_exit(&dp->restart_lock); 3630 } 3631 RW_EXIT(&ldcp->lane_out.dlistrw); 3632 3633 /* only do channel reset after dropping dlistrw lock */ 3634 if (msg_rv == ECONNRESET) 3635 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 3636 3637 break; 3638 3639 case VIO_SUBTYPE_NACK: 3640 DWARN(vswp, "%s(%lld): VIO_SUBTYPE_NACK", 3641 __func__, ldcp->ldc_id); 3642 /* 3643 * Something is badly wrong if we are getting NACK's 3644 * for our data pkts. So reset the channel. 3645 */ 3646 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3647 3648 break; 3649 3650 default: 3651 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 3652 ldcp->ldc_id, dring_pkt->tag.vio_subtype); 3653 } 3654 3655 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3656 } 3657 3658 /* 3659 * dummy pkt data handler function for vnet protocol version 1.0 3660 */ 3661 static void 3662 vsw_process_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen) 3663 { 3664 _NOTE(ARGUNUSED(arg1, arg2, msglen)) 3665 } 3666 3667 /* 3668 * This function handles raw pkt data messages received over the channel. 3669 * Currently, only priority-eth-type frames are received through this mechanism. 3670 * In this case, the frame(data) is present within the message itself which 3671 * is copied into an mblk before switching it. 3672 */ 3673 static void 3674 vsw_process_pkt_data(void *arg1, void *arg2, uint32_t msglen) 3675 { 3676 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg1; 3677 vio_raw_data_msg_t *dpkt = (vio_raw_data_msg_t *)arg2; 3678 uint32_t size; 3679 mblk_t *mp; 3680 vsw_t *vswp = ldcp->ldc_vswp; 3681 vgen_stats_t *statsp = &ldcp->ldc_stats; 3682 lane_t *lp = &ldcp->lane_out; 3683 3684 size = msglen - VIO_PKT_DATA_HDRSIZE; 3685 if (size < ETHERMIN || size > lp->mtu) { 3686 (void) atomic_inc_32(&statsp->rx_pri_fail); 3687 DWARN(vswp, "%s(%lld) invalid size(%d)\n", __func__, 3688 ldcp->ldc_id, size); 3689 return; 3690 } 3691 3692 mp = vio_multipool_allocb(&ldcp->vmp, size + VLAN_TAGSZ); 3693 if (mp == NULL) { 3694 mp = allocb(size + VLAN_TAGSZ, BPRI_MED); 3695 if (mp == NULL) { 3696 (void) atomic_inc_32(&statsp->rx_pri_fail); 3697 DWARN(vswp, "%s(%lld) allocb failure, " 3698 "unable to process priority frame\n", __func__, 3699 ldcp->ldc_id); 3700 return; 3701 } 3702 } 3703 3704 /* skip over the extra space for vlan tag */ 3705 mp->b_rptr += VLAN_TAGSZ; 3706 3707 /* copy the frame from the payload of raw data msg into the mblk */ 3708 bcopy(dpkt->data, mp->b_rptr, size); 3709 mp->b_wptr = mp->b_rptr + size; 3710 3711 /* update stats */ 3712 (void) atomic_inc_64(&statsp->rx_pri_packets); 3713 (void) atomic_add_64(&statsp->rx_pri_bytes, size); 3714 3715 /* 3716 * VLAN_TAGSZ of extra space has been pre-alloc'd if tag is needed. 3717 */ 3718 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, VSW_VNETPORT, mp); 3719 3720 /* switch the frame to destination */ 3721 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, ldcp->ldc_port, NULL); 3722 } 3723 3724 /* 3725 * Process an in-band descriptor message (most likely from 3726 * OBP). 3727 */ 3728 static void 3729 vsw_process_data_ibnd_pkt(vsw_ldc_t *ldcp, void *pkt) 3730 { 3731 vnet_ibnd_desc_t *ibnd_desc; 3732 dring_info_t *dp = NULL; 3733 vsw_private_desc_t *priv_addr = NULL; 3734 vsw_t *vswp = ldcp->ldc_vswp; 3735 mblk_t *mp = NULL; 3736 size_t nbytes = 0; 3737 size_t off = 0; 3738 uint64_t idx = 0; 3739 uint32_t num = 1, len, datalen = 0; 3740 uint64_t ncookies = 0; 3741 int i, rv; 3742 int j = 0; 3743 3744 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3745 3746 ibnd_desc = (vnet_ibnd_desc_t *)pkt; 3747 3748 switch (ibnd_desc->hdr.tag.vio_subtype) { 3749 case VIO_SUBTYPE_INFO: 3750 D1(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3751 3752 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 3753 return; 3754 3755 /* 3756 * Data is padded to align on a 8 byte boundary, 3757 * nbytes is actual data length, i.e. minus that 3758 * padding. 3759 */ 3760 datalen = ibnd_desc->nbytes; 3761 3762 D2(vswp, "%s(%lld): processing inband desc : " 3763 ": datalen 0x%lx", __func__, ldcp->ldc_id, datalen); 3764 3765 ncookies = ibnd_desc->ncookies; 3766 3767 /* 3768 * allocb(9F) returns an aligned data block. We 3769 * need to ensure that we ask ldc for an aligned 3770 * number of bytes also. 3771 */ 3772 nbytes = datalen; 3773 if (nbytes & 0x7) { 3774 off = 8 - (nbytes & 0x7); 3775 nbytes += off; 3776 } 3777 3778 /* alloc extra space for VLAN_TAG */ 3779 mp = allocb(datalen + 8, BPRI_MED); 3780 if (mp == NULL) { 3781 DERR(vswp, "%s(%lld): allocb failed", 3782 __func__, ldcp->ldc_id); 3783 ldcp->ldc_stats.rx_allocb_fail++; 3784 return; 3785 } 3786 3787 /* skip over the extra space for VLAN_TAG */ 3788 mp->b_rptr += 8; 3789 3790 rv = ldc_mem_copy(ldcp->ldc_handle, (caddr_t)mp->b_rptr, 3791 0, &nbytes, ibnd_desc->memcookie, (uint64_t)ncookies, 3792 LDC_COPY_IN); 3793 3794 if (rv != 0) { 3795 DERR(vswp, "%s(%d): unable to copy in data from " 3796 "%d cookie(s)", __func__, ldcp->ldc_id, ncookies); 3797 freemsg(mp); 3798 ldcp->ldc_stats.ierrors++; 3799 return; 3800 } 3801 3802 D2(vswp, "%s(%d): copied in %ld bytes using %d cookies", 3803 __func__, ldcp->ldc_id, nbytes, ncookies); 3804 3805 /* point to the actual end of data */ 3806 mp->b_wptr = mp->b_rptr + datalen; 3807 ldcp->ldc_stats.ipackets++; 3808 ldcp->ldc_stats.rbytes += datalen; 3809 3810 /* 3811 * We ACK back every in-band descriptor message we process 3812 */ 3813 ibnd_desc->hdr.tag.vio_subtype = VIO_SUBTYPE_ACK; 3814 ibnd_desc->hdr.tag.vio_sid = ldcp->local_session; 3815 (void) vsw_send_msg(ldcp, (void *)ibnd_desc, 3816 sizeof (vnet_ibnd_desc_t), B_TRUE); 3817 3818 /* 3819 * there is extra space alloc'd for VLAN_TAG 3820 */ 3821 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, VSW_VNETPORT, mp); 3822 3823 /* send the packet to be switched */ 3824 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, 3825 ldcp->ldc_port, NULL); 3826 3827 break; 3828 3829 case VIO_SUBTYPE_ACK: 3830 D1(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3831 3832 /* Verify the ACK is valid */ 3833 idx = ibnd_desc->hdr.desc_handle; 3834 3835 if (idx >= vsw_ntxds) { 3836 cmn_err(CE_WARN, "!vsw%d: corrupted ACK received " 3837 "(idx %ld)", vswp->instance, idx); 3838 return; 3839 } 3840 3841 if ((dp = ldcp->lane_out.dringp) == NULL) { 3842 DERR(vswp, "%s: no dring found", __func__); 3843 return; 3844 } 3845 3846 len = dp->num_descriptors; 3847 /* 3848 * If the descriptor we are being ACK'ed for is not the 3849 * one we expected, then pkts were lost somwhere, either 3850 * when we tried to send a msg, or a previous ACK msg from 3851 * our peer. In either case we now reclaim the descriptors 3852 * in the range from the last ACK we received up to the 3853 * current ACK. 3854 */ 3855 if (idx != dp->last_ack_recv) { 3856 DWARN(vswp, "%s: dropped pkts detected, (%ld, %ld)", 3857 __func__, dp->last_ack_recv, idx); 3858 num = idx >= dp->last_ack_recv ? 3859 idx - dp->last_ack_recv + 1: 3860 (len - dp->last_ack_recv + 1) + idx; 3861 } 3862 3863 /* 3864 * When we sent the in-band message to our peer we 3865 * marked the copy in our private ring as READY. We now 3866 * check that the descriptor we are being ACK'ed for is in 3867 * fact READY, i.e. it is one we have shared with our peer. 3868 * 3869 * If its not we flag an error, but still reset the descr 3870 * back to FREE. 3871 */ 3872 for (i = dp->last_ack_recv; j < num; i = (i + 1) % len, j++) { 3873 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 3874 mutex_enter(&priv_addr->dstate_lock); 3875 if (priv_addr->dstate != VIO_DESC_READY) { 3876 DERR(vswp, "%s: (%ld) desc at index %ld not " 3877 "READY (0x%lx)", __func__, 3878 ldcp->ldc_id, idx, priv_addr->dstate); 3879 DERR(vswp, "%s: bound %d: ncookies %ld : " 3880 "datalen %ld", __func__, 3881 priv_addr->bound, priv_addr->ncookies, 3882 priv_addr->datalen); 3883 } 3884 D2(vswp, "%s: (%lld) freeing descp at %lld", __func__, 3885 ldcp->ldc_id, idx); 3886 /* release resources associated with sent msg */ 3887 priv_addr->datalen = 0; 3888 priv_addr->dstate = VIO_DESC_FREE; 3889 mutex_exit(&priv_addr->dstate_lock); 3890 } 3891 /* update to next expected value */ 3892 dp->last_ack_recv = (idx + 1) % dp->num_descriptors; 3893 3894 break; 3895 3896 case VIO_SUBTYPE_NACK: 3897 DERR(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3898 3899 /* 3900 * We should only get a NACK if our peer doesn't like 3901 * something about a message we have sent it. If this 3902 * happens we just release the resources associated with 3903 * the message. (We are relying on higher layers to decide 3904 * whether or not to resend. 3905 */ 3906 3907 /* limit check */ 3908 idx = ibnd_desc->hdr.desc_handle; 3909 3910 if (idx >= vsw_ntxds) { 3911 DERR(vswp, "%s: corrupted NACK received (idx %lld)", 3912 __func__, idx); 3913 return; 3914 } 3915 3916 if ((dp = ldcp->lane_out.dringp) == NULL) { 3917 DERR(vswp, "%s: no dring found", __func__); 3918 return; 3919 } 3920 3921 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 3922 3923 /* move to correct location in ring */ 3924 priv_addr += idx; 3925 3926 /* release resources associated with sent msg */ 3927 mutex_enter(&priv_addr->dstate_lock); 3928 priv_addr->datalen = 0; 3929 priv_addr->dstate = VIO_DESC_FREE; 3930 mutex_exit(&priv_addr->dstate_lock); 3931 3932 break; 3933 3934 default: 3935 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 3936 ldcp->ldc_id, ibnd_desc->hdr.tag.vio_subtype); 3937 } 3938 3939 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3940 } 3941 3942 static void 3943 vsw_process_err_pkt(vsw_ldc_t *ldcp, void *epkt, vio_msg_tag_t *tagp) 3944 { 3945 _NOTE(ARGUNUSED(epkt)) 3946 3947 vsw_t *vswp = ldcp->ldc_vswp; 3948 uint16_t env = tagp->vio_subtype_env; 3949 3950 D1(vswp, "%s (%lld): enter\n", __func__, ldcp->ldc_id); 3951 3952 /* 3953 * Error vio_subtypes have yet to be defined. So for 3954 * the moment we can't do anything. 3955 */ 3956 D2(vswp, "%s: (%x) vio_subtype env", __func__, env); 3957 3958 D1(vswp, "%s (%lld): exit\n", __func__, ldcp->ldc_id); 3959 } 3960 3961 /* transmit the packet over the given port */ 3962 int 3963 vsw_portsend(vsw_port_t *port, mblk_t *mp, mblk_t *mpt, uint32_t count) 3964 { 3965 vsw_ldc_list_t *ldcl = &port->p_ldclist; 3966 vsw_ldc_t *ldcp; 3967 int status = 0; 3968 uint32_t n; 3969 3970 READ_ENTER(&ldcl->lockrw); 3971 /* 3972 * Note for now, we have a single channel. 3973 */ 3974 ldcp = ldcl->head; 3975 if (ldcp == NULL) { 3976 DERR(port->p_vswp, "vsw_portsend: no ldc: dropping packet\n"); 3977 freemsgchain(mp); 3978 RW_EXIT(&ldcl->lockrw); 3979 return (1); 3980 } 3981 3982 n = vsw_vlan_frame_untag(port, VSW_VNETPORT, &mp, &mpt); 3983 3984 count -= n; 3985 if (count == 0) { 3986 goto vsw_portsend_exit; 3987 } 3988 3989 status = ldcp->tx(ldcp, mp, mpt, count); 3990 3991 vsw_portsend_exit: 3992 RW_EXIT(&ldcl->lockrw); 3993 3994 return (status); 3995 } 3996 3997 /* 3998 * Break up frames into 2 seperate chains: normal and 3999 * priority, based on the frame type. The number of 4000 * priority frames is also counted and returned. 4001 * 4002 * Params: 4003 * vswp: pointer to the instance of vsw 4004 * np: head of packet chain to be broken 4005 * npt: tail of packet chain to be broken 4006 * 4007 * Returns: 4008 * np: head of normal data packets 4009 * npt: tail of normal data packets 4010 * hp: head of high priority packets 4011 * hpt: tail of high priority packets 4012 */ 4013 static uint32_t 4014 vsw_get_pri_packets(vsw_t *vswp, mblk_t **np, mblk_t **npt, 4015 mblk_t **hp, mblk_t **hpt) 4016 { 4017 mblk_t *tmp = NULL; 4018 mblk_t *smp = NULL; 4019 mblk_t *hmp = NULL; /* high prio pkts head */ 4020 mblk_t *hmpt = NULL; /* high prio pkts tail */ 4021 mblk_t *nmp = NULL; /* normal pkts head */ 4022 mblk_t *nmpt = NULL; /* normal pkts tail */ 4023 uint32_t count = 0; 4024 int i; 4025 struct ether_header *ehp; 4026 uint32_t num_types; 4027 uint16_t *types; 4028 4029 tmp = *np; 4030 while (tmp != NULL) { 4031 4032 smp = tmp; 4033 tmp = tmp->b_next; 4034 smp->b_next = NULL; 4035 smp->b_prev = NULL; 4036 4037 ehp = (struct ether_header *)smp->b_rptr; 4038 num_types = vswp->pri_num_types; 4039 types = vswp->pri_types; 4040 for (i = 0; i < num_types; i++) { 4041 if (ehp->ether_type == types[i]) { 4042 /* high priority frame */ 4043 4044 if (hmp != NULL) { 4045 hmpt->b_next = smp; 4046 hmpt = smp; 4047 } else { 4048 hmp = hmpt = smp; 4049 } 4050 count++; 4051 break; 4052 } 4053 } 4054 if (i == num_types) { 4055 /* normal data frame */ 4056 4057 if (nmp != NULL) { 4058 nmpt->b_next = smp; 4059 nmpt = smp; 4060 } else { 4061 nmp = nmpt = smp; 4062 } 4063 } 4064 } 4065 4066 *hp = hmp; 4067 *hpt = hmpt; 4068 *np = nmp; 4069 *npt = nmpt; 4070 4071 return (count); 4072 } 4073 4074 /* 4075 * Wrapper function to transmit normal and/or priority frames over the channel. 4076 */ 4077 static int 4078 vsw_ldctx_pri(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count) 4079 { 4080 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4081 mblk_t *tmp; 4082 mblk_t *smp; 4083 mblk_t *hmp; /* high prio pkts head */ 4084 mblk_t *hmpt; /* high prio pkts tail */ 4085 mblk_t *nmp; /* normal pkts head */ 4086 mblk_t *nmpt; /* normal pkts tail */ 4087 uint32_t n = 0; 4088 vsw_t *vswp = ldcp->ldc_vswp; 4089 4090 ASSERT(VSW_PRI_ETH_DEFINED(vswp)); 4091 ASSERT(count != 0); 4092 4093 nmp = mp; 4094 nmpt = mpt; 4095 4096 /* gather any priority frames from the chain of packets */ 4097 n = vsw_get_pri_packets(vswp, &nmp, &nmpt, &hmp, &hmpt); 4098 4099 /* transmit priority frames */ 4100 tmp = hmp; 4101 while (tmp != NULL) { 4102 smp = tmp; 4103 tmp = tmp->b_next; 4104 smp->b_next = NULL; 4105 vsw_ldcsend_pkt(ldcp, smp); 4106 } 4107 4108 count -= n; 4109 4110 if (count == 0) { 4111 /* no normal data frames to process */ 4112 return (0); 4113 } 4114 4115 return (vsw_ldctx(ldcp, nmp, nmpt, count)); 4116 } 4117 4118 /* 4119 * Wrapper function to transmit normal frames over the channel. 4120 */ 4121 static int 4122 vsw_ldctx(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count) 4123 { 4124 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4125 mblk_t *tmp = NULL; 4126 4127 ASSERT(count != 0); 4128 /* 4129 * If the TX thread is enabled, then queue the 4130 * ordinary frames and signal the tx thread. 4131 */ 4132 if (ldcp->tx_thread != NULL) { 4133 4134 mutex_enter(&ldcp->tx_thr_lock); 4135 4136 if ((ldcp->tx_cnt + count) >= vsw_max_tx_qcount) { 4137 /* 4138 * If we reached queue limit, 4139 * do not queue new packets, 4140 * drop them. 4141 */ 4142 ldcp->ldc_stats.tx_qfull += count; 4143 mutex_exit(&ldcp->tx_thr_lock); 4144 freemsgchain(mp); 4145 goto exit; 4146 } 4147 if (ldcp->tx_mhead == NULL) { 4148 ldcp->tx_mhead = mp; 4149 ldcp->tx_mtail = mpt; 4150 cv_signal(&ldcp->tx_thr_cv); 4151 } else { 4152 ldcp->tx_mtail->b_next = mp; 4153 ldcp->tx_mtail = mpt; 4154 } 4155 ldcp->tx_cnt += count; 4156 mutex_exit(&ldcp->tx_thr_lock); 4157 } else { 4158 while (mp != NULL) { 4159 tmp = mp->b_next; 4160 mp->b_next = mp->b_prev = NULL; 4161 (void) vsw_ldcsend(ldcp, mp, 1); 4162 mp = tmp; 4163 } 4164 } 4165 4166 exit: 4167 return (0); 4168 } 4169 4170 /* 4171 * This function transmits the frame in the payload of a raw data 4172 * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to 4173 * send special frames with high priorities, without going through 4174 * the normal data path which uses descriptor ring mechanism. 4175 */ 4176 static void 4177 vsw_ldcsend_pkt(vsw_ldc_t *ldcp, mblk_t *mp) 4178 { 4179 vio_raw_data_msg_t *pkt; 4180 mblk_t *bp; 4181 mblk_t *nmp = NULL; 4182 caddr_t dst; 4183 uint32_t mblksz; 4184 uint32_t size; 4185 uint32_t nbytes; 4186 int rv; 4187 vsw_t *vswp = ldcp->ldc_vswp; 4188 vgen_stats_t *statsp = &ldcp->ldc_stats; 4189 4190 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4191 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4192 (void) atomic_inc_32(&statsp->tx_pri_fail); 4193 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 4194 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 4195 ldcp->lane_out.lstate); 4196 goto send_pkt_exit; 4197 } 4198 4199 size = msgsize(mp); 4200 4201 /* frame size bigger than available payload len of raw data msg ? */ 4202 if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) { 4203 (void) atomic_inc_32(&statsp->tx_pri_fail); 4204 DWARN(vswp, "%s(%lld) invalid size(%d)\n", __func__, 4205 ldcp->ldc_id, size); 4206 goto send_pkt_exit; 4207 } 4208 4209 if (size < ETHERMIN) 4210 size = ETHERMIN; 4211 4212 /* alloc space for a raw data message */ 4213 nmp = vio_allocb(vswp->pri_tx_vmp); 4214 if (nmp == NULL) { 4215 (void) atomic_inc_32(&statsp->tx_pri_fail); 4216 DWARN(vswp, "vio_allocb failed\n"); 4217 goto send_pkt_exit; 4218 } 4219 pkt = (vio_raw_data_msg_t *)nmp->b_rptr; 4220 4221 /* copy frame into the payload of raw data message */ 4222 dst = (caddr_t)pkt->data; 4223 for (bp = mp; bp != NULL; bp = bp->b_cont) { 4224 mblksz = MBLKL(bp); 4225 bcopy(bp->b_rptr, dst, mblksz); 4226 dst += mblksz; 4227 } 4228 4229 /* setup the raw data msg */ 4230 pkt->tag.vio_msgtype = VIO_TYPE_DATA; 4231 pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 4232 pkt->tag.vio_subtype_env = VIO_PKT_DATA; 4233 pkt->tag.vio_sid = ldcp->local_session; 4234 nbytes = VIO_PKT_DATA_HDRSIZE + size; 4235 4236 /* send the msg over ldc */ 4237 rv = vsw_send_msg(ldcp, (void *)pkt, nbytes, B_TRUE); 4238 if (rv != 0) { 4239 (void) atomic_inc_32(&statsp->tx_pri_fail); 4240 DWARN(vswp, "%s(%lld) Error sending priority frame\n", __func__, 4241 ldcp->ldc_id); 4242 goto send_pkt_exit; 4243 } 4244 4245 /* update stats */ 4246 (void) atomic_inc_64(&statsp->tx_pri_packets); 4247 (void) atomic_add_64(&statsp->tx_pri_packets, size); 4248 4249 send_pkt_exit: 4250 if (nmp != NULL) 4251 freemsg(nmp); 4252 freemsg(mp); 4253 } 4254 4255 /* 4256 * Transmit the packet over the given LDC channel. 4257 * 4258 * The 'retries' argument indicates how many times a packet 4259 * is retried before it is dropped. Note, the retry is done 4260 * only for a resource related failure, for all other failures 4261 * the packet is dropped immediately. 4262 */ 4263 static int 4264 vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, uint32_t retries) 4265 { 4266 int i; 4267 int rc; 4268 int status = 0; 4269 vsw_port_t *port = ldcp->ldc_port; 4270 dring_info_t *dp = NULL; 4271 4272 4273 for (i = 0; i < retries; ) { 4274 /* 4275 * Send the message out using the appropriate 4276 * transmit function which will free mblock when it 4277 * is finished with it. 4278 */ 4279 mutex_enter(&port->tx_lock); 4280 if (port->transmit != NULL) { 4281 status = (*port->transmit)(ldcp, mp); 4282 } 4283 if (status == LDC_TX_SUCCESS) { 4284 mutex_exit(&port->tx_lock); 4285 break; 4286 } 4287 i++; /* increment the counter here */ 4288 4289 /* If its the last retry, then update the oerror */ 4290 if ((i == retries) && (status == LDC_TX_NORESOURCES)) { 4291 ldcp->ldc_stats.oerrors++; 4292 } 4293 mutex_exit(&port->tx_lock); 4294 4295 if (status != LDC_TX_NORESOURCES) { 4296 /* 4297 * No retrying required for errors un-related 4298 * to resources. 4299 */ 4300 break; 4301 } 4302 READ_ENTER(&ldcp->lane_out.dlistrw); 4303 if (((dp = ldcp->lane_out.dringp) != NULL) && 4304 ((VSW_VER_GTEQ(ldcp, 1, 2) && 4305 (ldcp->lane_out.xfer_mode & VIO_DRING_MODE_V1_2)) || 4306 ((VSW_VER_LT(ldcp, 1, 2) && 4307 (ldcp->lane_out.xfer_mode == VIO_DRING_MODE_V1_0))))) { 4308 rc = vsw_reclaim_dring(dp, dp->end_idx); 4309 } else { 4310 /* 4311 * If there is no dring or the xfer_mode is 4312 * set to DESC_MODE(ie., OBP), then simply break here. 4313 */ 4314 RW_EXIT(&ldcp->lane_out.dlistrw); 4315 break; 4316 } 4317 RW_EXIT(&ldcp->lane_out.dlistrw); 4318 4319 /* 4320 * Delay only if none were reclaimed 4321 * and its not the last retry. 4322 */ 4323 if ((rc == 0) && (i < retries)) { 4324 delay(drv_usectohz(vsw_ldc_tx_delay)); 4325 } 4326 } 4327 freemsg(mp); 4328 return (status); 4329 } 4330 4331 /* 4332 * Send packet out via descriptor ring to a logical device. 4333 */ 4334 static int 4335 vsw_dringsend(vsw_ldc_t *ldcp, mblk_t *mp) 4336 { 4337 vio_dring_msg_t dring_pkt; 4338 dring_info_t *dp = NULL; 4339 vsw_private_desc_t *priv_desc = NULL; 4340 vnet_public_desc_t *pub = NULL; 4341 vsw_t *vswp = ldcp->ldc_vswp; 4342 mblk_t *bp; 4343 size_t n, size; 4344 caddr_t bufp; 4345 int idx; 4346 int status = LDC_TX_SUCCESS; 4347 struct ether_header *ehp = (struct ether_header *)mp->b_rptr; 4348 lane_t *lp = &ldcp->lane_out; 4349 4350 D1(vswp, "%s(%lld): enter\n", __func__, ldcp->ldc_id); 4351 4352 /* TODO: make test a macro */ 4353 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4354 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4355 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 4356 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 4357 ldcp->lane_out.lstate); 4358 ldcp->ldc_stats.oerrors++; 4359 return (LDC_TX_FAILURE); 4360 } 4361 4362 /* 4363 * Note - using first ring only, this may change 4364 * in the future. 4365 */ 4366 READ_ENTER(&ldcp->lane_out.dlistrw); 4367 if ((dp = ldcp->lane_out.dringp) == NULL) { 4368 RW_EXIT(&ldcp->lane_out.dlistrw); 4369 DERR(vswp, "%s(%lld): no dring for outbound lane on" 4370 " channel %d", __func__, ldcp->ldc_id, ldcp->ldc_id); 4371 ldcp->ldc_stats.oerrors++; 4372 return (LDC_TX_FAILURE); 4373 } 4374 4375 size = msgsize(mp); 4376 if (size > (size_t)lp->mtu) { 4377 RW_EXIT(&ldcp->lane_out.dlistrw); 4378 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 4379 ldcp->ldc_id, size); 4380 ldcp->ldc_stats.oerrors++; 4381 return (LDC_TX_FAILURE); 4382 } 4383 4384 /* 4385 * Find a free descriptor 4386 * 4387 * Note: for the moment we are assuming that we will only 4388 * have one dring going from the switch to each of its 4389 * peers. This may change in the future. 4390 */ 4391 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 4392 D2(vswp, "%s(%lld): no descriptor available for ring " 4393 "at 0x%llx", __func__, ldcp->ldc_id, dp); 4394 4395 /* nothing more we can do */ 4396 status = LDC_TX_NORESOURCES; 4397 ldcp->ldc_stats.tx_no_desc++; 4398 goto vsw_dringsend_free_exit; 4399 } else { 4400 D2(vswp, "%s(%lld): free private descriptor found at pos %ld " 4401 "addr 0x%llx\n", __func__, ldcp->ldc_id, idx, priv_desc); 4402 } 4403 4404 /* copy data into the descriptor */ 4405 bufp = priv_desc->datap; 4406 bufp += VNET_IPALIGN; 4407 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 4408 n = MBLKL(bp); 4409 bcopy(bp->b_rptr, bufp, n); 4410 bufp += n; 4411 } 4412 4413 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 4414 4415 pub = priv_desc->descp; 4416 pub->nbytes = priv_desc->datalen; 4417 4418 /* update statistics */ 4419 if (IS_BROADCAST(ehp)) 4420 ldcp->ldc_stats.brdcstxmt++; 4421 else if (IS_MULTICAST(ehp)) 4422 ldcp->ldc_stats.multixmt++; 4423 ldcp->ldc_stats.opackets++; 4424 ldcp->ldc_stats.obytes += priv_desc->datalen; 4425 4426 mutex_enter(&priv_desc->dstate_lock); 4427 pub->hdr.dstate = VIO_DESC_READY; 4428 mutex_exit(&priv_desc->dstate_lock); 4429 4430 /* 4431 * Determine whether or not we need to send a message to our 4432 * peer prompting them to read our newly updated descriptor(s). 4433 */ 4434 mutex_enter(&dp->restart_lock); 4435 if (dp->restart_reqd) { 4436 dp->restart_reqd = B_FALSE; 4437 ldcp->ldc_stats.dring_data_msgs++; 4438 mutex_exit(&dp->restart_lock); 4439 4440 /* 4441 * Send a vio_dring_msg to peer to prompt them to read 4442 * the updated descriptor ring. 4443 */ 4444 dring_pkt.tag.vio_msgtype = VIO_TYPE_DATA; 4445 dring_pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 4446 dring_pkt.tag.vio_subtype_env = VIO_DRING_DATA; 4447 dring_pkt.tag.vio_sid = ldcp->local_session; 4448 4449 /* Note - for now using first ring */ 4450 dring_pkt.dring_ident = dp->ident; 4451 4452 /* 4453 * If last_ack_recv is -1 then we know we've not 4454 * received any ack's yet, so this must be the first 4455 * msg sent, so set the start to the begining of the ring. 4456 */ 4457 mutex_enter(&dp->dlock); 4458 if (dp->last_ack_recv == -1) { 4459 dring_pkt.start_idx = 0; 4460 } else { 4461 dring_pkt.start_idx = 4462 (dp->last_ack_recv + 1) % dp->num_descriptors; 4463 } 4464 dring_pkt.end_idx = -1; 4465 mutex_exit(&dp->dlock); 4466 4467 D3(vswp, "%s(%lld): dring 0x%llx : ident 0x%llx\n", __func__, 4468 ldcp->ldc_id, dp, dring_pkt.dring_ident); 4469 D3(vswp, "%s(%lld): start %lld : end %lld :\n", 4470 __func__, ldcp->ldc_id, dring_pkt.start_idx, 4471 dring_pkt.end_idx); 4472 4473 RW_EXIT(&ldcp->lane_out.dlistrw); 4474 4475 (void) vsw_send_msg(ldcp, (void *)&dring_pkt, 4476 sizeof (vio_dring_msg_t), B_TRUE); 4477 4478 return (status); 4479 4480 } else { 4481 mutex_exit(&dp->restart_lock); 4482 D2(vswp, "%s(%lld): updating descp %d", __func__, 4483 ldcp->ldc_id, idx); 4484 } 4485 4486 vsw_dringsend_free_exit: 4487 4488 RW_EXIT(&ldcp->lane_out.dlistrw); 4489 4490 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 4491 return (status); 4492 } 4493 4494 /* 4495 * Send an in-band descriptor message over ldc. 4496 */ 4497 static int 4498 vsw_descrsend(vsw_ldc_t *ldcp, mblk_t *mp) 4499 { 4500 vsw_t *vswp = ldcp->ldc_vswp; 4501 vnet_ibnd_desc_t ibnd_msg; 4502 vsw_private_desc_t *priv_desc = NULL; 4503 dring_info_t *dp = NULL; 4504 size_t n, size = 0; 4505 caddr_t bufp; 4506 mblk_t *bp; 4507 int idx, i; 4508 int status = LDC_TX_SUCCESS; 4509 static int warn_msg = 1; 4510 lane_t *lp = &ldcp->lane_out; 4511 4512 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4513 4514 ASSERT(mp != NULL); 4515 4516 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4517 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4518 DERR(vswp, "%s(%lld) status(%d) state (0x%llx), dropping pkt", 4519 __func__, ldcp->ldc_id, ldcp->ldc_status, 4520 ldcp->lane_out.lstate); 4521 ldcp->ldc_stats.oerrors++; 4522 return (LDC_TX_FAILURE); 4523 } 4524 4525 /* 4526 * only expect single dring to exist, which we use 4527 * as an internal buffer, rather than a transfer channel. 4528 */ 4529 READ_ENTER(&ldcp->lane_out.dlistrw); 4530 if ((dp = ldcp->lane_out.dringp) == NULL) { 4531 DERR(vswp, "%s(%lld): no dring for outbound lane", 4532 __func__, ldcp->ldc_id); 4533 DERR(vswp, "%s(%lld) status(%d) state (0x%llx)", __func__, 4534 ldcp->ldc_id, ldcp->ldc_status, ldcp->lane_out.lstate); 4535 RW_EXIT(&ldcp->lane_out.dlistrw); 4536 ldcp->ldc_stats.oerrors++; 4537 return (LDC_TX_FAILURE); 4538 } 4539 4540 size = msgsize(mp); 4541 if (size > (size_t)lp->mtu) { 4542 RW_EXIT(&ldcp->lane_out.dlistrw); 4543 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 4544 ldcp->ldc_id, size); 4545 ldcp->ldc_stats.oerrors++; 4546 return (LDC_TX_FAILURE); 4547 } 4548 4549 /* 4550 * Find a free descriptor in our buffer ring 4551 */ 4552 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 4553 RW_EXIT(&ldcp->lane_out.dlistrw); 4554 if (warn_msg) { 4555 DERR(vswp, "%s(%lld): no descriptor available for ring " 4556 "at 0x%llx", __func__, ldcp->ldc_id, dp); 4557 warn_msg = 0; 4558 } 4559 4560 /* nothing more we can do */ 4561 status = LDC_TX_NORESOURCES; 4562 goto vsw_descrsend_free_exit; 4563 } else { 4564 D2(vswp, "%s(%lld): free private descriptor found at pos " 4565 "%ld addr 0x%x\n", __func__, ldcp->ldc_id, idx, priv_desc); 4566 warn_msg = 1; 4567 } 4568 4569 /* copy data into the descriptor */ 4570 bufp = priv_desc->datap; 4571 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 4572 n = MBLKL(bp); 4573 bcopy(bp->b_rptr, bufp, n); 4574 bufp += n; 4575 } 4576 4577 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 4578 4579 /* create and send the in-band descp msg */ 4580 ibnd_msg.hdr.tag.vio_msgtype = VIO_TYPE_DATA; 4581 ibnd_msg.hdr.tag.vio_subtype = VIO_SUBTYPE_INFO; 4582 ibnd_msg.hdr.tag.vio_subtype_env = VIO_DESC_DATA; 4583 ibnd_msg.hdr.tag.vio_sid = ldcp->local_session; 4584 4585 /* 4586 * Copy the mem cookies describing the data from the 4587 * private region of the descriptor ring into the inband 4588 * descriptor. 4589 */ 4590 for (i = 0; i < priv_desc->ncookies; i++) { 4591 bcopy(&priv_desc->memcookie[i], &ibnd_msg.memcookie[i], 4592 sizeof (ldc_mem_cookie_t)); 4593 } 4594 4595 ibnd_msg.hdr.desc_handle = idx; 4596 ibnd_msg.ncookies = priv_desc->ncookies; 4597 ibnd_msg.nbytes = size; 4598 4599 ldcp->ldc_stats.opackets++; 4600 ldcp->ldc_stats.obytes += size; 4601 4602 RW_EXIT(&ldcp->lane_out.dlistrw); 4603 4604 (void) vsw_send_msg(ldcp, (void *)&ibnd_msg, 4605 sizeof (vnet_ibnd_desc_t), B_TRUE); 4606 4607 vsw_descrsend_free_exit: 4608 4609 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4610 return (status); 4611 } 4612 4613 static void 4614 vsw_send_ver(void *arg) 4615 { 4616 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4617 vsw_t *vswp = ldcp->ldc_vswp; 4618 lane_t *lp = &ldcp->lane_out; 4619 vio_ver_msg_t ver_msg; 4620 4621 D1(vswp, "%s enter", __func__); 4622 4623 ver_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4624 ver_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4625 ver_msg.tag.vio_subtype_env = VIO_VER_INFO; 4626 ver_msg.tag.vio_sid = ldcp->local_session; 4627 4628 if (vsw_obp_ver_proto_workaround == B_FALSE) { 4629 ver_msg.ver_major = vsw_versions[0].ver_major; 4630 ver_msg.ver_minor = vsw_versions[0].ver_minor; 4631 } else { 4632 /* use the major,minor that we've ack'd */ 4633 lane_t *lpi = &ldcp->lane_in; 4634 ver_msg.ver_major = lpi->ver_major; 4635 ver_msg.ver_minor = lpi->ver_minor; 4636 } 4637 ver_msg.dev_class = VDEV_NETWORK_SWITCH; 4638 4639 lp->lstate |= VSW_VER_INFO_SENT; 4640 lp->ver_major = ver_msg.ver_major; 4641 lp->ver_minor = ver_msg.ver_minor; 4642 4643 DUMP_TAG(ver_msg.tag); 4644 4645 (void) vsw_send_msg(ldcp, &ver_msg, sizeof (vio_ver_msg_t), B_TRUE); 4646 4647 D1(vswp, "%s (%d): exit", __func__, ldcp->ldc_id); 4648 } 4649 4650 static void 4651 vsw_send_attr(vsw_ldc_t *ldcp) 4652 { 4653 vsw_t *vswp = ldcp->ldc_vswp; 4654 lane_t *lp = &ldcp->lane_out; 4655 vnet_attr_msg_t attr_msg; 4656 4657 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 4658 4659 /* 4660 * Subtype is set to INFO by default 4661 */ 4662 attr_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4663 attr_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4664 attr_msg.tag.vio_subtype_env = VIO_ATTR_INFO; 4665 attr_msg.tag.vio_sid = ldcp->local_session; 4666 4667 /* payload copied from default settings for lane */ 4668 attr_msg.mtu = lp->mtu; 4669 attr_msg.addr_type = lp->addr_type; 4670 attr_msg.xfer_mode = lp->xfer_mode; 4671 attr_msg.ack_freq = lp->xfer_mode; 4672 4673 READ_ENTER(&vswp->if_lockrw); 4674 attr_msg.addr = vnet_macaddr_strtoul((vswp->if_addr).ether_addr_octet); 4675 RW_EXIT(&vswp->if_lockrw); 4676 4677 ldcp->lane_out.lstate |= VSW_ATTR_INFO_SENT; 4678 4679 DUMP_TAG(attr_msg.tag); 4680 4681 (void) vsw_send_msg(ldcp, &attr_msg, sizeof (vnet_attr_msg_t), B_TRUE); 4682 4683 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 4684 } 4685 4686 /* 4687 * Create dring info msg (which also results in the creation of 4688 * a dring). 4689 */ 4690 static vio_dring_reg_msg_t * 4691 vsw_create_dring_info_pkt(vsw_ldc_t *ldcp) 4692 { 4693 vio_dring_reg_msg_t *mp; 4694 dring_info_t *dp; 4695 vsw_t *vswp = ldcp->ldc_vswp; 4696 4697 D1(vswp, "vsw_create_dring_info_pkt enter\n"); 4698 4699 /* 4700 * If we can't create a dring, obviously no point sending 4701 * a message. 4702 */ 4703 if ((dp = vsw_create_dring(ldcp)) == NULL) 4704 return (NULL); 4705 4706 mp = kmem_zalloc(sizeof (vio_dring_reg_msg_t), KM_SLEEP); 4707 4708 mp->tag.vio_msgtype = VIO_TYPE_CTRL; 4709 mp->tag.vio_subtype = VIO_SUBTYPE_INFO; 4710 mp->tag.vio_subtype_env = VIO_DRING_REG; 4711 mp->tag.vio_sid = ldcp->local_session; 4712 4713 /* payload */ 4714 mp->num_descriptors = dp->num_descriptors; 4715 mp->descriptor_size = dp->descriptor_size; 4716 mp->options = dp->options; 4717 mp->ncookies = dp->ncookies; 4718 bcopy(&dp->cookie[0], &mp->cookie[0], sizeof (ldc_mem_cookie_t)); 4719 4720 mp->dring_ident = 0; 4721 4722 D1(vswp, "vsw_create_dring_info_pkt exit\n"); 4723 4724 return (mp); 4725 } 4726 4727 static void 4728 vsw_send_dring_info(vsw_ldc_t *ldcp) 4729 { 4730 vio_dring_reg_msg_t *dring_msg; 4731 vsw_t *vswp = ldcp->ldc_vswp; 4732 4733 D1(vswp, "%s: (%ld) enter", __func__, ldcp->ldc_id); 4734 4735 dring_msg = vsw_create_dring_info_pkt(ldcp); 4736 if (dring_msg == NULL) { 4737 cmn_err(CE_WARN, "!vsw%d: %s: error creating msg", 4738 vswp->instance, __func__); 4739 return; 4740 } 4741 4742 ldcp->lane_out.lstate |= VSW_DRING_INFO_SENT; 4743 4744 DUMP_TAG_PTR((vio_msg_tag_t *)dring_msg); 4745 4746 (void) vsw_send_msg(ldcp, dring_msg, 4747 sizeof (vio_dring_reg_msg_t), B_TRUE); 4748 4749 kmem_free(dring_msg, sizeof (vio_dring_reg_msg_t)); 4750 4751 D1(vswp, "%s: (%ld) exit", __func__, ldcp->ldc_id); 4752 } 4753 4754 static void 4755 vsw_send_rdx(vsw_ldc_t *ldcp) 4756 { 4757 vsw_t *vswp = ldcp->ldc_vswp; 4758 vio_rdx_msg_t rdx_msg; 4759 4760 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 4761 4762 rdx_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4763 rdx_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4764 rdx_msg.tag.vio_subtype_env = VIO_RDX; 4765 rdx_msg.tag.vio_sid = ldcp->local_session; 4766 4767 ldcp->lane_in.lstate |= VSW_RDX_INFO_SENT; 4768 4769 DUMP_TAG(rdx_msg.tag); 4770 4771 (void) vsw_send_msg(ldcp, &rdx_msg, sizeof (vio_rdx_msg_t), B_TRUE); 4772 4773 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 4774 } 4775 4776 /* 4777 * Generic routine to send message out over ldc channel. 4778 * 4779 * It is possible that when we attempt to write over the ldc channel 4780 * that we get notified that it has been reset. Depending on the value 4781 * of the handle_reset flag we either handle that event here or simply 4782 * notify the caller that the channel was reset. 4783 */ 4784 int 4785 vsw_send_msg(vsw_ldc_t *ldcp, void *msgp, int size, boolean_t handle_reset) 4786 { 4787 int rv; 4788 size_t msglen = size; 4789 vio_msg_tag_t *tag = (vio_msg_tag_t *)msgp; 4790 vsw_t *vswp = ldcp->ldc_vswp; 4791 vio_dring_msg_t *dmsg; 4792 vio_raw_data_msg_t *rmsg; 4793 vnet_ibnd_desc_t *imsg; 4794 boolean_t data_msg = B_FALSE; 4795 4796 D1(vswp, "vsw_send_msg (%lld) enter : sending %d bytes", 4797 ldcp->ldc_id, size); 4798 4799 D2(vswp, "send_msg: type 0x%llx", tag->vio_msgtype); 4800 D2(vswp, "send_msg: stype 0x%llx", tag->vio_subtype); 4801 D2(vswp, "send_msg: senv 0x%llx", tag->vio_subtype_env); 4802 4803 mutex_enter(&ldcp->ldc_txlock); 4804 4805 if (tag->vio_subtype == VIO_SUBTYPE_INFO) { 4806 if (tag->vio_subtype_env == VIO_DRING_DATA) { 4807 dmsg = (vio_dring_msg_t *)tag; 4808 dmsg->seq_num = ldcp->lane_out.seq_num; 4809 data_msg = B_TRUE; 4810 } else if (tag->vio_subtype_env == VIO_PKT_DATA) { 4811 rmsg = (vio_raw_data_msg_t *)tag; 4812 rmsg->seq_num = ldcp->lane_out.seq_num; 4813 data_msg = B_TRUE; 4814 } else if (tag->vio_subtype_env == VIO_DESC_DATA) { 4815 imsg = (vnet_ibnd_desc_t *)tag; 4816 imsg->hdr.seq_num = ldcp->lane_out.seq_num; 4817 data_msg = B_TRUE; 4818 } 4819 } 4820 4821 do { 4822 msglen = size; 4823 rv = ldc_write(ldcp->ldc_handle, (caddr_t)msgp, &msglen); 4824 } while (rv == EWOULDBLOCK && --vsw_wretries > 0); 4825 4826 if (rv == 0 && data_msg == B_TRUE) { 4827 ldcp->lane_out.seq_num++; 4828 } 4829 4830 if ((rv != 0) || (msglen != size)) { 4831 DERR(vswp, "vsw_send_msg:ldc_write failed: chan(%lld) rv(%d) " 4832 "size (%d) msglen(%d)\n", ldcp->ldc_id, rv, size, msglen); 4833 ldcp->ldc_stats.oerrors++; 4834 } 4835 4836 mutex_exit(&ldcp->ldc_txlock); 4837 4838 /* 4839 * If channel has been reset we either handle it here or 4840 * simply report back that it has been reset and let caller 4841 * decide what to do. 4842 */ 4843 if (rv == ECONNRESET) { 4844 DWARN(vswp, "%s (%lld) channel reset", __func__, ldcp->ldc_id); 4845 4846 /* 4847 * N.B - must never be holding the dlistrw lock when 4848 * we do a reset of the channel. 4849 */ 4850 if (handle_reset) { 4851 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 4852 } 4853 } 4854 4855 return (rv); 4856 } 4857 4858 /* 4859 * Remove the specified address from the list of address maintained 4860 * in this port node. 4861 */ 4862 mcst_addr_t * 4863 vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr) 4864 { 4865 vsw_t *vswp = NULL; 4866 vsw_port_t *port = NULL; 4867 mcst_addr_t *prev_p = NULL; 4868 mcst_addr_t *curr_p = NULL; 4869 4870 D1(NULL, "%s: enter : devtype %d : addr 0x%llx", 4871 __func__, devtype, addr); 4872 4873 if (devtype == VSW_VNETPORT) { 4874 port = (vsw_port_t *)arg; 4875 mutex_enter(&port->mca_lock); 4876 prev_p = curr_p = port->mcap; 4877 } else { 4878 vswp = (vsw_t *)arg; 4879 mutex_enter(&vswp->mca_lock); 4880 prev_p = curr_p = vswp->mcap; 4881 } 4882 4883 while (curr_p != NULL) { 4884 if (curr_p->addr == addr) { 4885 D2(NULL, "%s: address found", __func__); 4886 /* match found */ 4887 if (prev_p == curr_p) { 4888 /* list head */ 4889 if (devtype == VSW_VNETPORT) 4890 port->mcap = curr_p->nextp; 4891 else 4892 vswp->mcap = curr_p->nextp; 4893 } else { 4894 prev_p->nextp = curr_p->nextp; 4895 } 4896 break; 4897 } else { 4898 prev_p = curr_p; 4899 curr_p = curr_p->nextp; 4900 } 4901 } 4902 4903 if (devtype == VSW_VNETPORT) 4904 mutex_exit(&port->mca_lock); 4905 else 4906 mutex_exit(&vswp->mca_lock); 4907 4908 D1(NULL, "%s: exit", __func__); 4909 4910 return (curr_p); 4911 } 4912 4913 /* 4914 * Creates a descriptor ring (dring) and links it into the 4915 * link of outbound drings for this channel. 4916 * 4917 * Returns NULL if creation failed. 4918 */ 4919 static dring_info_t * 4920 vsw_create_dring(vsw_ldc_t *ldcp) 4921 { 4922 vsw_private_desc_t *priv_addr = NULL; 4923 vsw_t *vswp = ldcp->ldc_vswp; 4924 ldc_mem_info_t minfo; 4925 dring_info_t *dp, *tp; 4926 int i; 4927 4928 dp = (dring_info_t *)kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 4929 4930 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 4931 4932 /* create public section of ring */ 4933 if ((ldc_mem_dring_create(vsw_ntxds, 4934 VSW_PUB_SIZE, &dp->handle)) != 0) { 4935 4936 DERR(vswp, "vsw_create_dring(%lld): ldc dring create " 4937 "failed", ldcp->ldc_id); 4938 goto create_fail_exit; 4939 } 4940 4941 ASSERT(dp->handle != NULL); 4942 4943 /* 4944 * Get the base address of the public section of the ring. 4945 */ 4946 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 4947 DERR(vswp, "vsw_create_dring(%lld): dring info failed\n", 4948 ldcp->ldc_id); 4949 goto dring_fail_exit; 4950 } else { 4951 ASSERT(minfo.vaddr != 0); 4952 dp->pub_addr = minfo.vaddr; 4953 } 4954 4955 dp->num_descriptors = vsw_ntxds; 4956 dp->descriptor_size = VSW_PUB_SIZE; 4957 dp->options = VIO_TX_DRING; 4958 dp->ncookies = 1; /* guaranteed by ldc */ 4959 4960 /* 4961 * create private portion of ring 4962 */ 4963 dp->priv_addr = (vsw_private_desc_t *)kmem_zalloc( 4964 (sizeof (vsw_private_desc_t) * vsw_ntxds), KM_SLEEP); 4965 4966 if (vsw_setup_ring(ldcp, dp)) { 4967 DERR(vswp, "%s: unable to setup ring", __func__); 4968 goto dring_fail_exit; 4969 } 4970 4971 /* haven't used any descriptors yet */ 4972 dp->end_idx = 0; 4973 dp->last_ack_recv = -1; 4974 4975 /* bind dring to the channel */ 4976 if ((ldc_mem_dring_bind(ldcp->ldc_handle, dp->handle, 4977 LDC_SHADOW_MAP, LDC_MEM_RW, 4978 &dp->cookie[0], &dp->ncookies)) != 0) { 4979 DERR(vswp, "vsw_create_dring: unable to bind to channel " 4980 "%lld", ldcp->ldc_id); 4981 goto dring_fail_exit; 4982 } 4983 4984 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 4985 dp->restart_reqd = B_TRUE; 4986 4987 /* 4988 * Only ever create rings for outgoing lane. Link it onto 4989 * end of list. 4990 */ 4991 WRITE_ENTER(&ldcp->lane_out.dlistrw); 4992 if (ldcp->lane_out.dringp == NULL) { 4993 D2(vswp, "vsw_create_dring: adding first outbound ring"); 4994 ldcp->lane_out.dringp = dp; 4995 } else { 4996 tp = ldcp->lane_out.dringp; 4997 while (tp->next != NULL) 4998 tp = tp->next; 4999 5000 tp->next = dp; 5001 } 5002 RW_EXIT(&ldcp->lane_out.dlistrw); 5003 5004 return (dp); 5005 5006 dring_fail_exit: 5007 (void) ldc_mem_dring_destroy(dp->handle); 5008 5009 create_fail_exit: 5010 if (dp->priv_addr != NULL) { 5011 priv_addr = dp->priv_addr; 5012 for (i = 0; i < vsw_ntxds; i++) { 5013 if (priv_addr->memhandle != NULL) 5014 (void) ldc_mem_free_handle( 5015 priv_addr->memhandle); 5016 priv_addr++; 5017 } 5018 kmem_free(dp->priv_addr, 5019 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 5020 } 5021 mutex_destroy(&dp->dlock); 5022 5023 kmem_free(dp, sizeof (dring_info_t)); 5024 return (NULL); 5025 } 5026 5027 /* 5028 * Create a ring consisting of just a private portion and link 5029 * it into the list of rings for the outbound lane. 5030 * 5031 * These type of rings are used primarily for temporary data 5032 * storage (i.e. as data buffers). 5033 */ 5034 void 5035 vsw_create_privring(vsw_ldc_t *ldcp) 5036 { 5037 dring_info_t *dp, *tp; 5038 vsw_t *vswp = ldcp->ldc_vswp; 5039 5040 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 5041 5042 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 5043 5044 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 5045 5046 /* no public section */ 5047 dp->pub_addr = NULL; 5048 5049 dp->priv_addr = kmem_zalloc( 5050 (sizeof (vsw_private_desc_t) * vsw_ntxds), KM_SLEEP); 5051 5052 dp->num_descriptors = vsw_ntxds; 5053 5054 if (vsw_setup_ring(ldcp, dp)) { 5055 DERR(vswp, "%s: setup of ring failed", __func__); 5056 kmem_free(dp->priv_addr, 5057 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 5058 mutex_destroy(&dp->dlock); 5059 kmem_free(dp, sizeof (dring_info_t)); 5060 return; 5061 } 5062 5063 /* haven't used any descriptors yet */ 5064 dp->end_idx = 0; 5065 5066 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 5067 dp->restart_reqd = B_TRUE; 5068 5069 /* 5070 * Only ever create rings for outgoing lane. Link it onto 5071 * end of list. 5072 */ 5073 WRITE_ENTER(&ldcp->lane_out.dlistrw); 5074 if (ldcp->lane_out.dringp == NULL) { 5075 D2(vswp, "%s: adding first outbound privring", __func__); 5076 ldcp->lane_out.dringp = dp; 5077 } else { 5078 tp = ldcp->lane_out.dringp; 5079 while (tp->next != NULL) 5080 tp = tp->next; 5081 5082 tp->next = dp; 5083 } 5084 RW_EXIT(&ldcp->lane_out.dlistrw); 5085 5086 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 5087 } 5088 5089 /* 5090 * Setup the descriptors in the dring. Returns 0 on success, 1 on 5091 * failure. 5092 */ 5093 int 5094 vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp) 5095 { 5096 vnet_public_desc_t *pub_addr = NULL; 5097 vsw_private_desc_t *priv_addr = NULL; 5098 vsw_t *vswp = ldcp->ldc_vswp; 5099 uint64_t *tmpp; 5100 uint64_t offset = 0; 5101 uint32_t ncookies = 0; 5102 static char *name = "vsw_setup_ring"; 5103 int i, j, nc, rv; 5104 size_t data_sz; 5105 5106 priv_addr = dp->priv_addr; 5107 pub_addr = dp->pub_addr; 5108 5109 /* public section may be null but private should never be */ 5110 ASSERT(priv_addr != NULL); 5111 5112 /* 5113 * Allocate the region of memory which will be used to hold 5114 * the data the descriptors will refer to. 5115 */ 5116 data_sz = vswp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN; 5117 data_sz = VNET_ROUNDUP_2K(data_sz); 5118 dp->desc_data_sz = data_sz; 5119 dp->data_sz = vsw_ntxds * data_sz; 5120 dp->data_addr = kmem_alloc(dp->data_sz, KM_SLEEP); 5121 5122 D2(vswp, "%s: allocated %lld bytes at 0x%llx\n", name, 5123 dp->data_sz, dp->data_addr); 5124 5125 tmpp = (uint64_t *)dp->data_addr; 5126 offset = dp->desc_data_sz/sizeof (tmpp); 5127 5128 /* 5129 * Initialise some of the private and public (if they exist) 5130 * descriptor fields. 5131 */ 5132 for (i = 0; i < vsw_ntxds; i++) { 5133 mutex_init(&priv_addr->dstate_lock, NULL, MUTEX_DRIVER, NULL); 5134 5135 if ((ldc_mem_alloc_handle(ldcp->ldc_handle, 5136 &priv_addr->memhandle)) != 0) { 5137 DERR(vswp, "%s: alloc mem handle failed", name); 5138 goto setup_ring_cleanup; 5139 } 5140 5141 priv_addr->datap = (void *)tmpp; 5142 5143 rv = ldc_mem_bind_handle(priv_addr->memhandle, 5144 (caddr_t)priv_addr->datap, dp->desc_data_sz, 5145 LDC_SHADOW_MAP, LDC_MEM_R|LDC_MEM_W, 5146 &(priv_addr->memcookie[0]), &ncookies); 5147 if (rv != 0) { 5148 DERR(vswp, "%s(%lld): ldc_mem_bind_handle failed " 5149 "(rv %d)", name, ldcp->ldc_id, rv); 5150 goto setup_ring_cleanup; 5151 } 5152 priv_addr->bound = 1; 5153 5154 D2(vswp, "%s: %d: memcookie 0 : addr 0x%llx : size 0x%llx", 5155 name, i, priv_addr->memcookie[0].addr, 5156 priv_addr->memcookie[0].size); 5157 5158 if (ncookies >= (uint32_t)(VSW_MAX_COOKIES + 1)) { 5159 DERR(vswp, "%s(%lld) ldc_mem_bind_handle returned " 5160 "invalid num of cookies (%d) for size 0x%llx", 5161 name, ldcp->ldc_id, ncookies, VSW_RING_EL_DATA_SZ); 5162 5163 goto setup_ring_cleanup; 5164 } else { 5165 for (j = 1; j < ncookies; j++) { 5166 rv = ldc_mem_nextcookie(priv_addr->memhandle, 5167 &(priv_addr->memcookie[j])); 5168 if (rv != 0) { 5169 DERR(vswp, "%s: ldc_mem_nextcookie " 5170 "failed rv (%d)", name, rv); 5171 goto setup_ring_cleanup; 5172 } 5173 D3(vswp, "%s: memcookie %d : addr 0x%llx : " 5174 "size 0x%llx", name, j, 5175 priv_addr->memcookie[j].addr, 5176 priv_addr->memcookie[j].size); 5177 } 5178 5179 } 5180 priv_addr->ncookies = ncookies; 5181 priv_addr->dstate = VIO_DESC_FREE; 5182 5183 if (pub_addr != NULL) { 5184 5185 /* link pub and private sides */ 5186 priv_addr->descp = pub_addr; 5187 5188 pub_addr->ncookies = priv_addr->ncookies; 5189 5190 for (nc = 0; nc < pub_addr->ncookies; nc++) { 5191 bcopy(&priv_addr->memcookie[nc], 5192 &pub_addr->memcookie[nc], 5193 sizeof (ldc_mem_cookie_t)); 5194 } 5195 5196 pub_addr->hdr.dstate = VIO_DESC_FREE; 5197 pub_addr++; 5198 } 5199 5200 /* 5201 * move to next element in the dring and the next 5202 * position in the data buffer. 5203 */ 5204 priv_addr++; 5205 tmpp += offset; 5206 } 5207 5208 return (0); 5209 5210 setup_ring_cleanup: 5211 priv_addr = dp->priv_addr; 5212 5213 for (j = 0; j < i; j++) { 5214 (void) ldc_mem_unbind_handle(priv_addr->memhandle); 5215 (void) ldc_mem_free_handle(priv_addr->memhandle); 5216 5217 mutex_destroy(&priv_addr->dstate_lock); 5218 5219 priv_addr++; 5220 } 5221 kmem_free(dp->data_addr, dp->data_sz); 5222 5223 return (1); 5224 } 5225 5226 /* 5227 * Searches the private section of a ring for a free descriptor, 5228 * starting at the location of the last free descriptor found 5229 * previously. 5230 * 5231 * Returns 0 if free descriptor is available, and updates state 5232 * of private descriptor to VIO_DESC_READY, otherwise returns 1. 5233 * 5234 * FUTURE: might need to return contiguous range of descriptors 5235 * as dring info msg assumes all will be contiguous. 5236 */ 5237 static int 5238 vsw_dring_find_free_desc(dring_info_t *dringp, 5239 vsw_private_desc_t **priv_p, int *idx) 5240 { 5241 vsw_private_desc_t *addr = NULL; 5242 int num = vsw_ntxds; 5243 int ret = 1; 5244 5245 D1(NULL, "%s enter\n", __func__); 5246 5247 ASSERT(dringp->priv_addr != NULL); 5248 5249 D2(NULL, "%s: searching ring, dringp 0x%llx : start pos %lld", 5250 __func__, dringp, dringp->end_idx); 5251 5252 addr = (vsw_private_desc_t *)dringp->priv_addr + dringp->end_idx; 5253 5254 mutex_enter(&addr->dstate_lock); 5255 if (addr->dstate == VIO_DESC_FREE) { 5256 addr->dstate = VIO_DESC_READY; 5257 *priv_p = addr; 5258 *idx = dringp->end_idx; 5259 dringp->end_idx = (dringp->end_idx + 1) % num; 5260 ret = 0; 5261 5262 } 5263 mutex_exit(&addr->dstate_lock); 5264 5265 /* ring full */ 5266 if (ret == 1) { 5267 D2(NULL, "%s: no desp free: started at %d", __func__, 5268 dringp->end_idx); 5269 } 5270 5271 D1(NULL, "%s: exit\n", __func__); 5272 5273 return (ret); 5274 } 5275 5276 /* 5277 * Map from a dring identifier to the ring itself. Returns 5278 * pointer to ring or NULL if no match found. 5279 * 5280 * Should be called with dlistrw rwlock held as reader. 5281 */ 5282 static dring_info_t * 5283 vsw_ident2dring(lane_t *lane, uint64_t ident) 5284 { 5285 dring_info_t *dp = NULL; 5286 5287 if ((dp = lane->dringp) == NULL) { 5288 return (NULL); 5289 } else { 5290 if (dp->ident == ident) 5291 return (dp); 5292 5293 while (dp != NULL) { 5294 if (dp->ident == ident) 5295 break; 5296 dp = dp->next; 5297 } 5298 } 5299 5300 return (dp); 5301 } 5302 5303 /* 5304 * Set the default lane attributes. These are copied into 5305 * the attr msg we send to our peer. If they are not acceptable 5306 * then (currently) the handshake ends. 5307 */ 5308 static void 5309 vsw_set_lane_attr(vsw_t *vswp, lane_t *lp) 5310 { 5311 bzero(lp, sizeof (lane_t)); 5312 5313 READ_ENTER(&vswp->if_lockrw); 5314 ether_copy(&(vswp->if_addr), &(lp->addr)); 5315 RW_EXIT(&vswp->if_lockrw); 5316 5317 lp->mtu = vswp->max_frame_size; 5318 lp->addr_type = ADDR_TYPE_MAC; 5319 lp->xfer_mode = VIO_DRING_MODE_V1_0; 5320 lp->ack_freq = 0; /* for shared mode */ 5321 lp->seq_num = VNET_ISS; 5322 } 5323 5324 /* 5325 * Verify that the attributes are acceptable. 5326 * 5327 * FUTURE: If some attributes are not acceptable, change them 5328 * our desired values. 5329 */ 5330 static int 5331 vsw_check_attr(vnet_attr_msg_t *pkt, vsw_ldc_t *ldcp) 5332 { 5333 int ret = 0; 5334 struct ether_addr ea; 5335 vsw_port_t *port = ldcp->ldc_port; 5336 lane_t *lp = &ldcp->lane_out; 5337 5338 D1(NULL, "vsw_check_attr enter\n"); 5339 5340 if ((pkt->xfer_mode != VIO_DESC_MODE) && 5341 (pkt->xfer_mode != lp->xfer_mode)) { 5342 D2(NULL, "vsw_check_attr: unknown mode %x\n", pkt->xfer_mode); 5343 ret = 1; 5344 } 5345 5346 /* Only support MAC addresses at moment. */ 5347 if ((pkt->addr_type != ADDR_TYPE_MAC) || (pkt->addr == 0)) { 5348 D2(NULL, "vsw_check_attr: invalid addr_type %x, " 5349 "or address 0x%llx\n", pkt->addr_type, pkt->addr); 5350 ret = 1; 5351 } 5352 5353 /* 5354 * MAC address supplied by device should match that stored 5355 * in the vsw-port OBP node. Need to decide what to do if they 5356 * don't match, for the moment just warn but don't fail. 5357 */ 5358 vnet_macaddr_ultostr(pkt->addr, ea.ether_addr_octet); 5359 if (ether_cmp(&ea, &port->p_macaddr) != 0) { 5360 DERR(NULL, "vsw_check_attr: device supplied address " 5361 "0x%llx doesn't match node address 0x%llx\n", 5362 pkt->addr, port->p_macaddr); 5363 } 5364 5365 /* 5366 * Ack freq only makes sense in pkt mode, in shared 5367 * mode the ring descriptors say whether or not to 5368 * send back an ACK. 5369 */ 5370 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 5371 (pkt->xfer_mode & VIO_DRING_MODE_V1_2)) || 5372 (VSW_VER_LT(ldcp, 1, 2) && 5373 (pkt->xfer_mode == VIO_DRING_MODE_V1_0))) { 5374 if (pkt->ack_freq > 0) { 5375 D2(NULL, "vsw_check_attr: non zero ack freq " 5376 " in SHM mode\n"); 5377 ret = 1; 5378 } 5379 } 5380 5381 /* 5382 * Note: for the moment we only support ETHER 5383 * frames. This may change in the future. 5384 */ 5385 if ((pkt->mtu > lp->mtu) || (pkt->mtu <= 0)) { 5386 D2(NULL, "vsw_check_attr: invalid MTU (0x%llx)\n", 5387 pkt->mtu); 5388 ret = 1; 5389 } 5390 5391 D1(NULL, "vsw_check_attr exit\n"); 5392 5393 return (ret); 5394 } 5395 5396 /* 5397 * Returns 1 if there is a problem, 0 otherwise. 5398 */ 5399 static int 5400 vsw_check_dring_info(vio_dring_reg_msg_t *pkt) 5401 { 5402 _NOTE(ARGUNUSED(pkt)) 5403 5404 int ret = 0; 5405 5406 D1(NULL, "vsw_check_dring_info enter\n"); 5407 5408 if ((pkt->num_descriptors == 0) || 5409 (pkt->descriptor_size == 0) || 5410 (pkt->ncookies != 1)) { 5411 DERR(NULL, "vsw_check_dring_info: invalid dring msg"); 5412 ret = 1; 5413 } 5414 5415 D1(NULL, "vsw_check_dring_info exit\n"); 5416 5417 return (ret); 5418 } 5419 5420 /* 5421 * Returns 1 if two memory cookies match. Otherwise returns 0. 5422 */ 5423 static int 5424 vsw_mem_cookie_match(ldc_mem_cookie_t *m1, ldc_mem_cookie_t *m2) 5425 { 5426 if ((m1->addr != m2->addr) || 5427 (m2->size != m2->size)) { 5428 return (0); 5429 } else { 5430 return (1); 5431 } 5432 } 5433 5434 /* 5435 * Returns 1 if ring described in reg message matches that 5436 * described by dring_info structure. Otherwise returns 0. 5437 */ 5438 static int 5439 vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg) 5440 { 5441 if ((msg->descriptor_size != dp->descriptor_size) || 5442 (msg->num_descriptors != dp->num_descriptors) || 5443 (msg->ncookies != dp->ncookies) || 5444 !(vsw_mem_cookie_match(&msg->cookie[0], &dp->cookie[0]))) { 5445 return (0); 5446 } else { 5447 return (1); 5448 } 5449 5450 } 5451 5452 static caddr_t 5453 vsw_print_ethaddr(uint8_t *a, char *ebuf) 5454 { 5455 (void) sprintf(ebuf, "%x:%x:%x:%x:%x:%x", 5456 a[0], a[1], a[2], a[3], a[4], a[5]); 5457 return (ebuf); 5458 } 5459 5460 /* 5461 * Reset and free all the resources associated with 5462 * the channel. 5463 */ 5464 static void 5465 vsw_free_lane_resources(vsw_ldc_t *ldcp, uint64_t dir) 5466 { 5467 dring_info_t *dp, *dpp; 5468 lane_t *lp = NULL; 5469 int rv = 0; 5470 5471 ASSERT(ldcp != NULL); 5472 5473 D1(ldcp->ldc_vswp, "%s (%lld): enter", __func__, ldcp->ldc_id); 5474 5475 if (dir == INBOUND) { 5476 D2(ldcp->ldc_vswp, "%s: freeing INBOUND lane" 5477 " of channel %lld", __func__, ldcp->ldc_id); 5478 lp = &ldcp->lane_in; 5479 } else { 5480 D2(ldcp->ldc_vswp, "%s: freeing OUTBOUND lane" 5481 " of channel %lld", __func__, ldcp->ldc_id); 5482 lp = &ldcp->lane_out; 5483 } 5484 5485 lp->lstate = VSW_LANE_INACTIV; 5486 lp->seq_num = VNET_ISS; 5487 5488 if (lp->dringp) { 5489 if (dir == INBOUND) { 5490 WRITE_ENTER(&lp->dlistrw); 5491 dp = lp->dringp; 5492 while (dp != NULL) { 5493 dpp = dp->next; 5494 if (dp->handle != NULL) 5495 (void) ldc_mem_dring_unmap(dp->handle); 5496 kmem_free(dp, sizeof (dring_info_t)); 5497 dp = dpp; 5498 } 5499 RW_EXIT(&lp->dlistrw); 5500 } else { 5501 /* 5502 * unbind, destroy exported dring, free dring struct 5503 */ 5504 WRITE_ENTER(&lp->dlistrw); 5505 dp = lp->dringp; 5506 rv = vsw_free_ring(dp); 5507 RW_EXIT(&lp->dlistrw); 5508 } 5509 if (rv == 0) { 5510 lp->dringp = NULL; 5511 } 5512 } 5513 5514 D1(ldcp->ldc_vswp, "%s (%lld): exit", __func__, ldcp->ldc_id); 5515 } 5516 5517 /* 5518 * Free ring and all associated resources. 5519 * 5520 * Should be called with dlistrw rwlock held as writer. 5521 */ 5522 static int 5523 vsw_free_ring(dring_info_t *dp) 5524 { 5525 vsw_private_desc_t *paddr = NULL; 5526 dring_info_t *dpp; 5527 int i, rv = 1; 5528 5529 while (dp != NULL) { 5530 mutex_enter(&dp->dlock); 5531 dpp = dp->next; 5532 if (dp->priv_addr != NULL) { 5533 /* 5534 * First unbind and free the memory handles 5535 * stored in each descriptor within the ring. 5536 */ 5537 for (i = 0; i < vsw_ntxds; i++) { 5538 paddr = (vsw_private_desc_t *) 5539 dp->priv_addr + i; 5540 if (paddr->memhandle != NULL) { 5541 if (paddr->bound == 1) { 5542 rv = ldc_mem_unbind_handle( 5543 paddr->memhandle); 5544 5545 if (rv != 0) { 5546 DERR(NULL, "error " 5547 "unbinding handle for " 5548 "ring 0x%llx at pos %d", 5549 dp, i); 5550 mutex_exit(&dp->dlock); 5551 return (rv); 5552 } 5553 paddr->bound = 0; 5554 } 5555 5556 rv = ldc_mem_free_handle( 5557 paddr->memhandle); 5558 if (rv != 0) { 5559 DERR(NULL, "error freeing " 5560 "handle for ring 0x%llx " 5561 "at pos %d", dp, i); 5562 mutex_exit(&dp->dlock); 5563 return (rv); 5564 } 5565 paddr->memhandle = NULL; 5566 } 5567 mutex_destroy(&paddr->dstate_lock); 5568 } 5569 kmem_free(dp->priv_addr, 5570 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 5571 } 5572 5573 /* 5574 * Now unbind and destroy the ring itself. 5575 */ 5576 if (dp->handle != NULL) { 5577 (void) ldc_mem_dring_unbind(dp->handle); 5578 (void) ldc_mem_dring_destroy(dp->handle); 5579 } 5580 5581 if (dp->data_addr != NULL) { 5582 kmem_free(dp->data_addr, dp->data_sz); 5583 } 5584 5585 mutex_exit(&dp->dlock); 5586 mutex_destroy(&dp->dlock); 5587 mutex_destroy(&dp->restart_lock); 5588 kmem_free(dp, sizeof (dring_info_t)); 5589 5590 dp = dpp; 5591 } 5592 return (0); 5593 } 5594 5595 /* 5596 * vsw_ldc_rx_worker -- A per LDC worker thread to receive data. 5597 * This thread is woken up by the LDC interrupt handler to process 5598 * LDC packets and receive data. 5599 */ 5600 static void 5601 vsw_ldc_rx_worker(void *arg) 5602 { 5603 callb_cpr_t cprinfo; 5604 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 5605 vsw_t *vswp = ldcp->ldc_vswp; 5606 5607 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5608 CALLB_CPR_INIT(&cprinfo, &ldcp->rx_thr_lock, callb_generic_cpr, 5609 "vsw_rx_thread"); 5610 mutex_enter(&ldcp->rx_thr_lock); 5611 ldcp->rx_thr_flags |= VSW_WTHR_RUNNING; 5612 while (!(ldcp->rx_thr_flags & VSW_WTHR_STOP)) { 5613 5614 CALLB_CPR_SAFE_BEGIN(&cprinfo); 5615 /* 5616 * Wait until the data is received or a stop 5617 * request is received. 5618 */ 5619 while (!(ldcp->rx_thr_flags & 5620 (VSW_WTHR_DATARCVD | VSW_WTHR_STOP))) { 5621 cv_wait(&ldcp->rx_thr_cv, &ldcp->rx_thr_lock); 5622 } 5623 CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rx_thr_lock) 5624 5625 /* 5626 * First process the stop request. 5627 */ 5628 if (ldcp->rx_thr_flags & VSW_WTHR_STOP) { 5629 D2(vswp, "%s(%lld):Rx thread stopped\n", 5630 __func__, ldcp->ldc_id); 5631 break; 5632 } 5633 ldcp->rx_thr_flags &= ~VSW_WTHR_DATARCVD; 5634 mutex_exit(&ldcp->rx_thr_lock); 5635 D1(vswp, "%s(%lld):calling vsw_process_pkt\n", 5636 __func__, ldcp->ldc_id); 5637 mutex_enter(&ldcp->ldc_cblock); 5638 vsw_process_pkt(ldcp); 5639 mutex_exit(&ldcp->ldc_cblock); 5640 mutex_enter(&ldcp->rx_thr_lock); 5641 } 5642 5643 /* 5644 * Update the run status and wakeup the thread that 5645 * has sent the stop request. 5646 */ 5647 ldcp->rx_thr_flags &= ~VSW_WTHR_RUNNING; 5648 cv_signal(&ldcp->rx_thr_cv); 5649 CALLB_CPR_EXIT(&cprinfo); 5650 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5651 thread_exit(); 5652 } 5653 5654 /* vsw_stop_rx_thread -- Co-ordinate with receive thread to stop it */ 5655 static void 5656 vsw_stop_rx_thread(vsw_ldc_t *ldcp) 5657 { 5658 vsw_t *vswp = ldcp->ldc_vswp; 5659 5660 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5661 /* 5662 * Send a stop request by setting the stop flag and 5663 * wait until the receive thread stops. 5664 */ 5665 mutex_enter(&ldcp->rx_thr_lock); 5666 if (ldcp->rx_thr_flags & VSW_WTHR_RUNNING) { 5667 ldcp->rx_thr_flags |= VSW_WTHR_STOP; 5668 cv_signal(&ldcp->rx_thr_cv); 5669 while (ldcp->rx_thr_flags & VSW_WTHR_RUNNING) { 5670 cv_wait(&ldcp->rx_thr_cv, &ldcp->rx_thr_lock); 5671 } 5672 } 5673 mutex_exit(&ldcp->rx_thr_lock); 5674 ldcp->rx_thread = NULL; 5675 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5676 } 5677 5678 /* 5679 * vsw_ldc_tx_worker -- A per LDC worker thread to transmit data. 5680 * This thread is woken up by the vsw_portsend to transmit 5681 * packets. 5682 */ 5683 static void 5684 vsw_ldc_tx_worker(void *arg) 5685 { 5686 callb_cpr_t cprinfo; 5687 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 5688 vsw_t *vswp = ldcp->ldc_vswp; 5689 mblk_t *mp; 5690 mblk_t *tmp; 5691 5692 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5693 CALLB_CPR_INIT(&cprinfo, &ldcp->tx_thr_lock, callb_generic_cpr, 5694 "vnet_tx_thread"); 5695 mutex_enter(&ldcp->tx_thr_lock); 5696 ldcp->tx_thr_flags |= VSW_WTHR_RUNNING; 5697 while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP)) { 5698 5699 CALLB_CPR_SAFE_BEGIN(&cprinfo); 5700 /* 5701 * Wait until the data is received or a stop 5702 * request is received. 5703 */ 5704 while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP) && 5705 (ldcp->tx_mhead == NULL)) { 5706 cv_wait(&ldcp->tx_thr_cv, &ldcp->tx_thr_lock); 5707 } 5708 CALLB_CPR_SAFE_END(&cprinfo, &ldcp->tx_thr_lock) 5709 5710 /* 5711 * First process the stop request. 5712 */ 5713 if (ldcp->tx_thr_flags & VSW_WTHR_STOP) { 5714 D2(vswp, "%s(%lld):tx thread stopped\n", 5715 __func__, ldcp->ldc_id); 5716 break; 5717 } 5718 mp = ldcp->tx_mhead; 5719 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 5720 ldcp->tx_cnt = 0; 5721 mutex_exit(&ldcp->tx_thr_lock); 5722 D2(vswp, "%s(%lld):calling vsw_ldcsend\n", 5723 __func__, ldcp->ldc_id); 5724 while (mp != NULL) { 5725 tmp = mp->b_next; 5726 mp->b_next = mp->b_prev = NULL; 5727 (void) vsw_ldcsend(ldcp, mp, vsw_ldc_tx_retries); 5728 mp = tmp; 5729 } 5730 mutex_enter(&ldcp->tx_thr_lock); 5731 } 5732 5733 /* 5734 * Update the run status and wakeup the thread that 5735 * has sent the stop request. 5736 */ 5737 ldcp->tx_thr_flags &= ~VSW_WTHR_RUNNING; 5738 cv_signal(&ldcp->tx_thr_cv); 5739 CALLB_CPR_EXIT(&cprinfo); 5740 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5741 thread_exit(); 5742 } 5743 5744 /* vsw_stop_tx_thread -- Co-ordinate with receive thread to stop it */ 5745 static void 5746 vsw_stop_tx_thread(vsw_ldc_t *ldcp) 5747 { 5748 vsw_t *vswp = ldcp->ldc_vswp; 5749 5750 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5751 /* 5752 * Send a stop request by setting the stop flag and 5753 * wait until the receive thread stops. 5754 */ 5755 mutex_enter(&ldcp->tx_thr_lock); 5756 if (ldcp->tx_thr_flags & VSW_WTHR_RUNNING) { 5757 ldcp->tx_thr_flags |= VSW_WTHR_STOP; 5758 cv_signal(&ldcp->tx_thr_cv); 5759 while (ldcp->tx_thr_flags & VSW_WTHR_RUNNING) { 5760 cv_wait(&ldcp->tx_thr_cv, &ldcp->tx_thr_lock); 5761 } 5762 } 5763 mutex_exit(&ldcp->tx_thr_lock); 5764 ldcp->tx_thread = NULL; 5765 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5766 } 5767 5768 /* vsw_reclaim_dring -- reclaim descriptors */ 5769 static int 5770 vsw_reclaim_dring(dring_info_t *dp, int start) 5771 { 5772 int i, j, len; 5773 vsw_private_desc_t *priv_addr; 5774 vnet_public_desc_t *pub_addr; 5775 5776 pub_addr = (vnet_public_desc_t *)dp->pub_addr; 5777 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 5778 len = dp->num_descriptors; 5779 5780 D2(NULL, "%s: start index %ld\n", __func__, start); 5781 5782 j = 0; 5783 for (i = start; j < len; i = (i + 1) % len, j++) { 5784 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 5785 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 5786 5787 mutex_enter(&priv_addr->dstate_lock); 5788 if (pub_addr->hdr.dstate != VIO_DESC_DONE) { 5789 mutex_exit(&priv_addr->dstate_lock); 5790 break; 5791 } 5792 pub_addr->hdr.dstate = VIO_DESC_FREE; 5793 priv_addr->dstate = VIO_DESC_FREE; 5794 /* clear all the fields */ 5795 priv_addr->datalen = 0; 5796 pub_addr->hdr.ack = 0; 5797 mutex_exit(&priv_addr->dstate_lock); 5798 5799 D3(NULL, "claiming descp:%d pub state:0x%llx priv state 0x%llx", 5800 i, pub_addr->hdr.dstate, priv_addr->dstate); 5801 } 5802 return (j); 5803 } 5804 5805 /* 5806 * Debugging routines 5807 */ 5808 static void 5809 display_state(void) 5810 { 5811 vsw_t *vswp; 5812 vsw_port_list_t *plist; 5813 vsw_port_t *port; 5814 vsw_ldc_list_t *ldcl; 5815 vsw_ldc_t *ldcp; 5816 extern vsw_t *vsw_head; 5817 5818 cmn_err(CE_NOTE, "***** system state *****"); 5819 5820 for (vswp = vsw_head; vswp; vswp = vswp->next) { 5821 plist = &vswp->plist; 5822 READ_ENTER(&plist->lockrw); 5823 cmn_err(CE_CONT, "vsw instance %d has %d ports attached\n", 5824 vswp->instance, plist->num_ports); 5825 5826 for (port = plist->head; port != NULL; port = port->p_next) { 5827 ldcl = &port->p_ldclist; 5828 cmn_err(CE_CONT, "port %d : %d ldcs attached\n", 5829 port->p_instance, port->num_ldcs); 5830 READ_ENTER(&ldcl->lockrw); 5831 ldcp = ldcl->head; 5832 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 5833 cmn_err(CE_CONT, "chan %lu : dev %d : " 5834 "status %d : phase %u\n", 5835 ldcp->ldc_id, ldcp->dev_class, 5836 ldcp->ldc_status, ldcp->hphase); 5837 cmn_err(CE_CONT, "chan %lu : lsession %lu : " 5838 "psession %lu\n", ldcp->ldc_id, 5839 ldcp->local_session, ldcp->peer_session); 5840 5841 cmn_err(CE_CONT, "Inbound lane:\n"); 5842 display_lane(&ldcp->lane_in); 5843 cmn_err(CE_CONT, "Outbound lane:\n"); 5844 display_lane(&ldcp->lane_out); 5845 } 5846 RW_EXIT(&ldcl->lockrw); 5847 } 5848 RW_EXIT(&plist->lockrw); 5849 } 5850 cmn_err(CE_NOTE, "***** system state *****"); 5851 } 5852 5853 static void 5854 display_lane(lane_t *lp) 5855 { 5856 dring_info_t *drp; 5857 5858 cmn_err(CE_CONT, "ver 0x%x:0x%x : state %lx : mtu 0x%lx\n", 5859 lp->ver_major, lp->ver_minor, lp->lstate, lp->mtu); 5860 cmn_err(CE_CONT, "addr_type %d : addr 0x%lx : xmode %d\n", 5861 lp->addr_type, lp->addr, lp->xfer_mode); 5862 cmn_err(CE_CONT, "dringp 0x%lx\n", (uint64_t)lp->dringp); 5863 5864 cmn_err(CE_CONT, "Dring info:\n"); 5865 for (drp = lp->dringp; drp != NULL; drp = drp->next) { 5866 cmn_err(CE_CONT, "\tnum_desc %u : dsize %u\n", 5867 drp->num_descriptors, drp->descriptor_size); 5868 cmn_err(CE_CONT, "\thandle 0x%lx\n", drp->handle); 5869 cmn_err(CE_CONT, "\tpub_addr 0x%lx : priv_addr 0x%lx\n", 5870 (uint64_t)drp->pub_addr, (uint64_t)drp->priv_addr); 5871 cmn_err(CE_CONT, "\tident 0x%lx : end_idx %lu\n", 5872 drp->ident, drp->end_idx); 5873 display_ring(drp); 5874 } 5875 } 5876 5877 static void 5878 display_ring(dring_info_t *dringp) 5879 { 5880 uint64_t i; 5881 uint64_t priv_count = 0; 5882 uint64_t pub_count = 0; 5883 vnet_public_desc_t *pub_addr = NULL; 5884 vsw_private_desc_t *priv_addr = NULL; 5885 5886 for (i = 0; i < vsw_ntxds; i++) { 5887 if (dringp->pub_addr != NULL) { 5888 pub_addr = (vnet_public_desc_t *)dringp->pub_addr + i; 5889 5890 if (pub_addr->hdr.dstate == VIO_DESC_FREE) 5891 pub_count++; 5892 } 5893 5894 if (dringp->priv_addr != NULL) { 5895 priv_addr = (vsw_private_desc_t *)dringp->priv_addr + i; 5896 5897 if (priv_addr->dstate == VIO_DESC_FREE) 5898 priv_count++; 5899 } 5900 } 5901 cmn_err(CE_CONT, "\t%lu elements: %lu priv free: %lu pub free\n", 5902 i, priv_count, pub_count); 5903 } 5904 5905 static void 5906 dump_flags(uint64_t state) 5907 { 5908 int i; 5909 5910 typedef struct flag_name { 5911 int flag_val; 5912 char *flag_name; 5913 } flag_name_t; 5914 5915 flag_name_t flags[] = { 5916 VSW_VER_INFO_SENT, "VSW_VER_INFO_SENT", 5917 VSW_VER_INFO_RECV, "VSW_VER_INFO_RECV", 5918 VSW_VER_ACK_RECV, "VSW_VER_ACK_RECV", 5919 VSW_VER_ACK_SENT, "VSW_VER_ACK_SENT", 5920 VSW_VER_NACK_RECV, "VSW_VER_NACK_RECV", 5921 VSW_VER_NACK_SENT, "VSW_VER_NACK_SENT", 5922 VSW_ATTR_INFO_SENT, "VSW_ATTR_INFO_SENT", 5923 VSW_ATTR_INFO_RECV, "VSW_ATTR_INFO_RECV", 5924 VSW_ATTR_ACK_SENT, "VSW_ATTR_ACK_SENT", 5925 VSW_ATTR_ACK_RECV, "VSW_ATTR_ACK_RECV", 5926 VSW_ATTR_NACK_SENT, "VSW_ATTR_NACK_SENT", 5927 VSW_ATTR_NACK_RECV, "VSW_ATTR_NACK_RECV", 5928 VSW_DRING_INFO_SENT, "VSW_DRING_INFO_SENT", 5929 VSW_DRING_INFO_RECV, "VSW_DRING_INFO_RECV", 5930 VSW_DRING_ACK_SENT, "VSW_DRING_ACK_SENT", 5931 VSW_DRING_ACK_RECV, "VSW_DRING_ACK_RECV", 5932 VSW_DRING_NACK_SENT, "VSW_DRING_NACK_SENT", 5933 VSW_DRING_NACK_RECV, "VSW_DRING_NACK_RECV", 5934 VSW_RDX_INFO_SENT, "VSW_RDX_INFO_SENT", 5935 VSW_RDX_INFO_RECV, "VSW_RDX_INFO_RECV", 5936 VSW_RDX_ACK_SENT, "VSW_RDX_ACK_SENT", 5937 VSW_RDX_ACK_RECV, "VSW_RDX_ACK_RECV", 5938 VSW_RDX_NACK_SENT, "VSW_RDX_NACK_SENT", 5939 VSW_RDX_NACK_RECV, "VSW_RDX_NACK_RECV", 5940 VSW_MCST_INFO_SENT, "VSW_MCST_INFO_SENT", 5941 VSW_MCST_INFO_RECV, "VSW_MCST_INFO_RECV", 5942 VSW_MCST_ACK_SENT, "VSW_MCST_ACK_SENT", 5943 VSW_MCST_ACK_RECV, "VSW_MCST_ACK_RECV", 5944 VSW_MCST_NACK_SENT, "VSW_MCST_NACK_SENT", 5945 VSW_MCST_NACK_RECV, "VSW_MCST_NACK_RECV", 5946 VSW_LANE_ACTIVE, "VSW_LANE_ACTIVE"}; 5947 5948 DERR(NULL, "DUMP_FLAGS: %llx\n", state); 5949 for (i = 0; i < sizeof (flags)/sizeof (flag_name_t); i++) { 5950 if (state & flags[i].flag_val) 5951 DERR(NULL, "DUMP_FLAGS %s", flags[i].flag_name); 5952 } 5953 } 5954