1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/errno.h> 31 #include <sys/debug.h> 32 #include <sys/time.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/user.h> 36 #include <sys/stropts.h> 37 #include <sys/stream.h> 38 #include <sys/strlog.h> 39 #include <sys/strsubr.h> 40 #include <sys/cmn_err.h> 41 #include <sys/cpu.h> 42 #include <sys/kmem.h> 43 #include <sys/conf.h> 44 #include <sys/ddi.h> 45 #include <sys/sunddi.h> 46 #include <sys/ksynch.h> 47 #include <sys/stat.h> 48 #include <sys/kstat.h> 49 #include <sys/vtrace.h> 50 #include <sys/strsun.h> 51 #include <sys/dlpi.h> 52 #include <sys/ethernet.h> 53 #include <net/if.h> 54 #include <sys/varargs.h> 55 #include <sys/machsystm.h> 56 #include <sys/modctl.h> 57 #include <sys/modhash.h> 58 #include <sys/mac.h> 59 #include <sys/mac_ether.h> 60 #include <sys/taskq.h> 61 #include <sys/note.h> 62 #include <sys/mach_descrip.h> 63 #include <sys/mac.h> 64 #include <sys/mdeg.h> 65 #include <sys/ldc.h> 66 #include <sys/vsw_fdb.h> 67 #include <sys/vsw.h> 68 #include <sys/vio_mailbox.h> 69 #include <sys/vnet_mailbox.h> 70 #include <sys/vnet_common.h> 71 #include <sys/vio_util.h> 72 #include <sys/sdt.h> 73 #include <sys/atomic.h> 74 #include <sys/callb.h> 75 #include <sys/vlan.h> 76 77 /* Port add/deletion/etc routines */ 78 static int vsw_port_delete(vsw_port_t *port); 79 static int vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id); 80 static int vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id); 81 static int vsw_init_ldcs(vsw_port_t *port); 82 static int vsw_uninit_ldcs(vsw_port_t *port); 83 static int vsw_ldc_init(vsw_ldc_t *ldcp); 84 static int vsw_ldc_uninit(vsw_ldc_t *ldcp); 85 static int vsw_drain_ldcs(vsw_port_t *port); 86 static int vsw_drain_port_taskq(vsw_port_t *port); 87 static void vsw_marker_task(void *); 88 static int vsw_plist_del_node(vsw_t *, vsw_port_t *port); 89 int vsw_detach_ports(vsw_t *vswp); 90 int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 91 mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr); 92 int vsw_port_detach(vsw_t *vswp, int p_instance); 93 int vsw_portsend(vsw_port_t *port, mblk_t *mp, mblk_t *mpt, uint32_t count); 94 int vsw_port_attach(vsw_port_t *portp); 95 vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 96 void vsw_vlan_unaware_port_reset(vsw_port_t *portp); 97 int vsw_send_msg(vsw_ldc_t *, void *, int, boolean_t); 98 void vsw_hio_port_reset(vsw_port_t *portp, boolean_t immediate); 99 100 /* Interrupt routines */ 101 static uint_t vsw_ldc_cb(uint64_t cb, caddr_t arg); 102 103 /* Handshake routines */ 104 static void vsw_ldc_reinit(vsw_ldc_t *); 105 static void vsw_process_conn_evt(vsw_ldc_t *, uint16_t); 106 static void vsw_conn_task(void *); 107 static int vsw_check_flag(vsw_ldc_t *, int, uint64_t); 108 static void vsw_next_milestone(vsw_ldc_t *); 109 static int vsw_supported_version(vio_ver_msg_t *); 110 static void vsw_set_vnet_proto_ops(vsw_ldc_t *ldcp); 111 static void vsw_reset_vnet_proto_ops(vsw_ldc_t *ldcp); 112 113 /* Data processing routines */ 114 static void vsw_process_pkt(void *); 115 static void vsw_dispatch_ctrl_task(vsw_ldc_t *, void *, vio_msg_tag_t *); 116 static void vsw_process_ctrl_pkt(void *); 117 static void vsw_process_ctrl_ver_pkt(vsw_ldc_t *, void *); 118 static void vsw_process_ctrl_attr_pkt(vsw_ldc_t *, void *); 119 static void vsw_process_ctrl_mcst_pkt(vsw_ldc_t *, void *); 120 static void vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *, void *); 121 static void vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *, void *); 122 static void vsw_process_ctrl_rdx_pkt(vsw_ldc_t *, void *); 123 static void vsw_process_data_pkt(vsw_ldc_t *, void *, vio_msg_tag_t *, 124 uint32_t); 125 static void vsw_process_data_dring_pkt(vsw_ldc_t *, void *); 126 static void vsw_process_pkt_data_nop(void *, void *, uint32_t); 127 static void vsw_process_pkt_data(void *, void *, uint32_t); 128 static void vsw_process_data_ibnd_pkt(vsw_ldc_t *, void *); 129 static void vsw_process_err_pkt(vsw_ldc_t *, void *, vio_msg_tag_t *); 130 131 /* Switching/data transmit routines */ 132 static int vsw_dringsend(vsw_ldc_t *, mblk_t *); 133 static int vsw_descrsend(vsw_ldc_t *, mblk_t *); 134 static void vsw_ldcsend_pkt(vsw_ldc_t *ldcp, mblk_t *mp); 135 static int vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, uint32_t retries); 136 static int vsw_ldctx_pri(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count); 137 static int vsw_ldctx(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count); 138 139 /* Packet creation routines */ 140 static void vsw_send_ver(void *); 141 static void vsw_send_attr(vsw_ldc_t *); 142 static vio_dring_reg_msg_t *vsw_create_dring_info_pkt(vsw_ldc_t *); 143 static void vsw_send_dring_info(vsw_ldc_t *); 144 static void vsw_send_rdx(vsw_ldc_t *); 145 146 /* Dring routines */ 147 static dring_info_t *vsw_create_dring(vsw_ldc_t *); 148 static void vsw_create_privring(vsw_ldc_t *); 149 static int vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp); 150 static int vsw_dring_find_free_desc(dring_info_t *, vsw_private_desc_t **, 151 int *); 152 static dring_info_t *vsw_ident2dring(lane_t *, uint64_t); 153 static int vsw_reclaim_dring(dring_info_t *dp, int start); 154 155 static void vsw_set_lane_attr(vsw_t *, lane_t *); 156 static int vsw_check_attr(vnet_attr_msg_t *, vsw_ldc_t *); 157 static int vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg); 158 static int vsw_mem_cookie_match(ldc_mem_cookie_t *, ldc_mem_cookie_t *); 159 static int vsw_check_dring_info(vio_dring_reg_msg_t *); 160 161 /* Rcv/Tx thread routines */ 162 static void vsw_stop_tx_thread(vsw_ldc_t *ldcp); 163 static void vsw_ldc_tx_worker(void *arg); 164 static void vsw_stop_rx_thread(vsw_ldc_t *ldcp); 165 static void vsw_ldc_rx_worker(void *arg); 166 167 /* Misc support routines */ 168 static caddr_t vsw_print_ethaddr(uint8_t *addr, char *ebuf); 169 static void vsw_free_lane_resources(vsw_ldc_t *, uint64_t); 170 static int vsw_free_ring(dring_info_t *); 171 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr); 172 static int vsw_get_same_dest_list(struct ether_header *ehp, 173 mblk_t **rhead, mblk_t **rtail, mblk_t **mpp); 174 static mblk_t *vsw_dupmsgchain(mblk_t *mp); 175 176 /* Debugging routines */ 177 static void dump_flags(uint64_t); 178 static void display_state(void); 179 static void display_lane(lane_t *); 180 static void display_ring(dring_info_t *); 181 182 /* 183 * Functions imported from other files. 184 */ 185 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int); 186 extern int vsw_unset_hw(vsw_t *, vsw_port_t *, int); 187 extern void vsw_reconfig_hw(vsw_t *); 188 extern int vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port); 189 extern void vsw_del_mcst_port(vsw_port_t *port); 190 extern int vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg); 191 extern int vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg); 192 extern void vsw_fdbe_add(vsw_t *vswp, void *port); 193 extern void vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr); 194 extern void vsw_create_vlans(void *arg, int type); 195 extern void vsw_destroy_vlans(void *arg, int type); 196 extern void vsw_vlan_add_ids(void *arg, int type); 197 extern void vsw_vlan_remove_ids(void *arg, int type); 198 extern boolean_t vsw_frame_lookup_vid(void *arg, int caller, 199 struct ether_header *ehp, uint16_t *vidp); 200 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp); 201 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, 202 mblk_t **npt); 203 extern boolean_t vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid); 204 extern void vsw_hio_start(vsw_t *vswp, vsw_ldc_t *ldcp); 205 extern void vsw_hio_stop(vsw_t *vswp, vsw_ldc_t *ldcp); 206 extern void vsw_process_dds_msg(vsw_t *vswp, vsw_ldc_t *ldcp, void *msg); 207 extern void vsw_hio_stop_port(vsw_port_t *portp); 208 209 #define VSW_NUM_VMPOOLS 3 /* number of vio mblk pools */ 210 211 /* 212 * Tunables used in this file. 213 */ 214 extern int vsw_num_handshakes; 215 extern int vsw_wretries; 216 extern int vsw_desc_delay; 217 extern int vsw_read_attempts; 218 extern int vsw_ldc_tx_delay; 219 extern int vsw_ldc_tx_retries; 220 extern boolean_t vsw_ldc_rxthr_enabled; 221 extern boolean_t vsw_ldc_txthr_enabled; 222 extern uint32_t vsw_ntxds; 223 extern uint32_t vsw_max_tx_qcount; 224 extern uint32_t vsw_chain_len; 225 extern uint32_t vsw_mblk_size1; 226 extern uint32_t vsw_mblk_size2; 227 extern uint32_t vsw_mblk_size3; 228 extern uint32_t vsw_num_mblks1; 229 extern uint32_t vsw_num_mblks2; 230 extern uint32_t vsw_num_mblks3; 231 extern boolean_t vsw_obp_ver_proto_workaround; 232 233 #define LDC_ENTER_LOCK(ldcp) \ 234 mutex_enter(&((ldcp)->ldc_cblock));\ 235 mutex_enter(&((ldcp)->ldc_rxlock));\ 236 mutex_enter(&((ldcp)->ldc_txlock)); 237 #define LDC_EXIT_LOCK(ldcp) \ 238 mutex_exit(&((ldcp)->ldc_txlock));\ 239 mutex_exit(&((ldcp)->ldc_rxlock));\ 240 mutex_exit(&((ldcp)->ldc_cblock)); 241 242 #define VSW_VER_EQ(ldcp, major, minor) \ 243 ((ldcp)->lane_out.ver_major == (major) && \ 244 (ldcp)->lane_out.ver_minor == (minor)) 245 246 #define VSW_VER_LT(ldcp, major, minor) \ 247 (((ldcp)->lane_out.ver_major < (major)) || \ 248 ((ldcp)->lane_out.ver_major == (major) && \ 249 (ldcp)->lane_out.ver_minor < (minor))) 250 251 #define VSW_VER_GTEQ(ldcp, major, minor) \ 252 (((ldcp)->lane_out.ver_major > (major)) || \ 253 ((ldcp)->lane_out.ver_major == (major) && \ 254 (ldcp)->lane_out.ver_minor >= (minor))) 255 256 /* supported versions */ 257 static ver_sup_t vsw_versions[] = { {1, 3} }; 258 259 /* 260 * For the moment the state dump routines have their own 261 * private flag. 262 */ 263 #define DUMP_STATE 0 264 265 #if DUMP_STATE 266 267 #define DUMP_TAG(tag) \ 268 { \ 269 D1(NULL, "DUMP_TAG: type 0x%llx", (tag).vio_msgtype); \ 270 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag).vio_subtype); \ 271 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag).vio_subtype_env); \ 272 } 273 274 #define DUMP_TAG_PTR(tag) \ 275 { \ 276 D1(NULL, "DUMP_TAG: type 0x%llx", (tag)->vio_msgtype); \ 277 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag)->vio_subtype); \ 278 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag)->vio_subtype_env); \ 279 } 280 281 #define DUMP_FLAGS(flags) dump_flags(flags); 282 #define DISPLAY_STATE() display_state() 283 284 #else 285 286 #define DUMP_TAG(tag) 287 #define DUMP_TAG_PTR(tag) 288 #define DUMP_FLAGS(state) 289 #define DISPLAY_STATE() 290 291 #endif /* DUMP_STATE */ 292 293 /* 294 * Attach the specified port. 295 * 296 * Returns 0 on success, 1 on failure. 297 */ 298 int 299 vsw_port_attach(vsw_port_t *port) 300 { 301 vsw_t *vswp = port->p_vswp; 302 vsw_port_list_t *plist = &vswp->plist; 303 vsw_port_t *p, **pp; 304 int i; 305 int nids = port->num_ldcs; 306 uint64_t *ldcids; 307 308 D1(vswp, "%s: enter : port %d", __func__, port->p_instance); 309 310 /* port already exists? */ 311 READ_ENTER(&plist->lockrw); 312 for (p = plist->head; p != NULL; p = p->p_next) { 313 if (p->p_instance == port->p_instance) { 314 DWARN(vswp, "%s: port instance %d already attached", 315 __func__, p->p_instance); 316 RW_EXIT(&plist->lockrw); 317 return (1); 318 } 319 } 320 RW_EXIT(&plist->lockrw); 321 322 rw_init(&port->p_ldclist.lockrw, NULL, RW_DRIVER, NULL); 323 324 mutex_init(&port->tx_lock, NULL, MUTEX_DRIVER, NULL); 325 mutex_init(&port->mca_lock, NULL, MUTEX_DRIVER, NULL); 326 327 mutex_init(&port->state_lock, NULL, MUTEX_DRIVER, NULL); 328 cv_init(&port->state_cv, NULL, CV_DRIVER, NULL); 329 port->state = VSW_PORT_INIT; 330 331 D2(vswp, "%s: %d nids", __func__, nids); 332 ldcids = port->ldc_ids; 333 for (i = 0; i < nids; i++) { 334 D2(vswp, "%s: ldcid (%llx)", __func__, (uint64_t)ldcids[i]); 335 if (vsw_ldc_attach(port, (uint64_t)ldcids[i]) != 0) { 336 DERR(vswp, "%s: ldc_attach failed", __func__); 337 338 rw_destroy(&port->p_ldclist.lockrw); 339 340 cv_destroy(&port->state_cv); 341 mutex_destroy(&port->state_lock); 342 343 mutex_destroy(&port->tx_lock); 344 mutex_destroy(&port->mca_lock); 345 kmem_free(port, sizeof (vsw_port_t)); 346 return (1); 347 } 348 } 349 350 if (vswp->switching_setup_done == B_TRUE) { 351 /* 352 * If the underlying physical device has been setup, 353 * program the mac address of this port in it. 354 * Otherwise, port macaddr will be set after the physical 355 * device is successfully setup by the timeout handler. 356 */ 357 mutex_enter(&vswp->hw_lock); 358 (void) vsw_set_hw(vswp, port, VSW_VNETPORT); 359 mutex_exit(&vswp->hw_lock); 360 } 361 362 /* create the fdb entry for this port/mac address */ 363 vsw_fdbe_add(vswp, port); 364 365 vsw_create_vlans(port, VSW_VNETPORT); 366 367 WRITE_ENTER(&plist->lockrw); 368 369 /* link it into the list of ports for this vsw instance */ 370 pp = (vsw_port_t **)(&plist->head); 371 port->p_next = *pp; 372 *pp = port; 373 plist->num_ports++; 374 375 RW_EXIT(&plist->lockrw); 376 377 /* 378 * Initialise the port and any ldc's under it. 379 */ 380 (void) vsw_init_ldcs(port); 381 382 D1(vswp, "%s: exit", __func__); 383 return (0); 384 } 385 386 /* 387 * Detach the specified port. 388 * 389 * Returns 0 on success, 1 on failure. 390 */ 391 int 392 vsw_port_detach(vsw_t *vswp, int p_instance) 393 { 394 vsw_port_t *port = NULL; 395 vsw_port_list_t *plist = &vswp->plist; 396 397 D1(vswp, "%s: enter: port id %d", __func__, p_instance); 398 399 WRITE_ENTER(&plist->lockrw); 400 401 if ((port = vsw_lookup_port(vswp, p_instance)) == NULL) { 402 RW_EXIT(&plist->lockrw); 403 return (1); 404 } 405 406 if (vsw_plist_del_node(vswp, port)) { 407 RW_EXIT(&plist->lockrw); 408 return (1); 409 } 410 411 /* cleanup any HybridIO for this port */ 412 vsw_hio_stop_port(port); 413 414 /* 415 * No longer need to hold writer lock on port list now 416 * that we have unlinked the target port from the list. 417 */ 418 RW_EXIT(&plist->lockrw); 419 420 /* Remove the fdb entry for this port/mac address */ 421 vsw_fdbe_del(vswp, &(port->p_macaddr)); 422 vsw_destroy_vlans(port, VSW_VNETPORT); 423 424 /* Remove any multicast addresses.. */ 425 vsw_del_mcst_port(port); 426 427 /* Remove address if was programmed into HW. */ 428 mutex_enter(&vswp->hw_lock); 429 430 /* 431 * Port's address may not have been set in hardware. This could 432 * happen if the underlying physical device is not yet available and 433 * vsw_setup_switching_timeout() may be in progress. 434 * We remove its addr from hardware only if it has been set before. 435 */ 436 if (port->addr_set != VSW_ADDR_UNSET) 437 (void) vsw_unset_hw(vswp, port, VSW_VNETPORT); 438 439 if (vswp->recfg_reqd) 440 vsw_reconfig_hw(vswp); 441 442 mutex_exit(&vswp->hw_lock); 443 444 if (vsw_port_delete(port)) { 445 return (1); 446 } 447 448 D1(vswp, "%s: exit: p_instance(%d)", __func__, p_instance); 449 return (0); 450 } 451 452 /* 453 * Detach all active ports. 454 * 455 * Returns 0 on success, 1 on failure. 456 */ 457 int 458 vsw_detach_ports(vsw_t *vswp) 459 { 460 vsw_port_list_t *plist = &vswp->plist; 461 vsw_port_t *port = NULL; 462 463 D1(vswp, "%s: enter", __func__); 464 465 WRITE_ENTER(&plist->lockrw); 466 467 while ((port = plist->head) != NULL) { 468 if (vsw_plist_del_node(vswp, port)) { 469 DERR(vswp, "%s: Error deleting port %d" 470 " from port list", __func__, port->p_instance); 471 RW_EXIT(&plist->lockrw); 472 return (1); 473 } 474 475 /* Remove address if was programmed into HW. */ 476 mutex_enter(&vswp->hw_lock); 477 (void) vsw_unset_hw(vswp, port, VSW_VNETPORT); 478 mutex_exit(&vswp->hw_lock); 479 480 /* Remove the fdb entry for this port/mac address */ 481 vsw_fdbe_del(vswp, &(port->p_macaddr)); 482 vsw_destroy_vlans(port, VSW_VNETPORT); 483 484 /* Remove any multicast addresses.. */ 485 vsw_del_mcst_port(port); 486 487 /* 488 * No longer need to hold the lock on the port list 489 * now that we have unlinked the target port from the 490 * list. 491 */ 492 RW_EXIT(&plist->lockrw); 493 if (vsw_port_delete(port)) { 494 DERR(vswp, "%s: Error deleting port %d", 495 __func__, port->p_instance); 496 return (1); 497 } 498 WRITE_ENTER(&plist->lockrw); 499 } 500 RW_EXIT(&plist->lockrw); 501 502 D1(vswp, "%s: exit", __func__); 503 504 return (0); 505 } 506 507 /* 508 * Delete the specified port. 509 * 510 * Returns 0 on success, 1 on failure. 511 */ 512 static int 513 vsw_port_delete(vsw_port_t *port) 514 { 515 vsw_ldc_list_t *ldcl; 516 vsw_t *vswp = port->p_vswp; 517 int num_ldcs; 518 519 D1(vswp, "%s: enter : port id %d", __func__, port->p_instance); 520 521 (void) vsw_uninit_ldcs(port); 522 523 /* 524 * Wait for any pending ctrl msg tasks which reference this 525 * port to finish. 526 */ 527 if (vsw_drain_port_taskq(port)) 528 return (1); 529 530 /* 531 * Wait for any active callbacks to finish 532 */ 533 if (vsw_drain_ldcs(port)) 534 return (1); 535 536 ldcl = &port->p_ldclist; 537 num_ldcs = port->num_ldcs; 538 WRITE_ENTER(&ldcl->lockrw); 539 while (num_ldcs > 0) { 540 if (vsw_ldc_detach(port, ldcl->head->ldc_id) != 0) { 541 cmn_err(CE_WARN, "!vsw%d: unable to detach ldc %ld", 542 vswp->instance, ldcl->head->ldc_id); 543 RW_EXIT(&ldcl->lockrw); 544 port->num_ldcs = num_ldcs; 545 return (1); 546 } 547 num_ldcs--; 548 } 549 RW_EXIT(&ldcl->lockrw); 550 551 rw_destroy(&port->p_ldclist.lockrw); 552 553 mutex_destroy(&port->mca_lock); 554 mutex_destroy(&port->tx_lock); 555 556 cv_destroy(&port->state_cv); 557 mutex_destroy(&port->state_lock); 558 559 if (port->num_ldcs != 0) { 560 kmem_free(port->ldc_ids, port->num_ldcs * sizeof (uint64_t)); 561 port->num_ldcs = 0; 562 } 563 kmem_free(port, sizeof (vsw_port_t)); 564 565 D1(vswp, "%s: exit", __func__); 566 567 return (0); 568 } 569 570 /* 571 * Attach a logical domain channel (ldc) under a specified port. 572 * 573 * Returns 0 on success, 1 on failure. 574 */ 575 static int 576 vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id) 577 { 578 vsw_t *vswp = port->p_vswp; 579 vsw_ldc_list_t *ldcl = &port->p_ldclist; 580 vsw_ldc_t *ldcp = NULL; 581 ldc_attr_t attr; 582 ldc_status_t istatus; 583 int status = DDI_FAILURE; 584 int rv; 585 char kname[MAXNAMELEN]; 586 enum { PROG_init = 0x0, PROG_mblks = 0x1, 587 PROG_callback = 0x2, PROG_rx_thread = 0x4, 588 PROG_tx_thread = 0x8} 589 progress; 590 591 progress = PROG_init; 592 593 D1(vswp, "%s: enter", __func__); 594 595 ldcp = kmem_zalloc(sizeof (vsw_ldc_t), KM_NOSLEEP); 596 if (ldcp == NULL) { 597 DERR(vswp, "%s: kmem_zalloc failed", __func__); 598 return (1); 599 } 600 ldcp->ldc_id = ldc_id; 601 602 /* Allocate pools of receive mblks */ 603 rv = vio_init_multipools(&ldcp->vmp, VSW_NUM_VMPOOLS, 604 vsw_mblk_size1, vsw_mblk_size2, vsw_mblk_size3, 605 vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3); 606 if (rv) { 607 DWARN(vswp, "%s: unable to create free mblk pools for" 608 " channel %ld (rv %d)", __func__, ldc_id, rv); 609 kmem_free(ldcp, sizeof (vsw_ldc_t)); 610 return (1); 611 } 612 613 progress |= PROG_mblks; 614 615 mutex_init(&ldcp->ldc_txlock, NULL, MUTEX_DRIVER, NULL); 616 mutex_init(&ldcp->ldc_rxlock, NULL, MUTEX_DRIVER, NULL); 617 mutex_init(&ldcp->ldc_cblock, NULL, MUTEX_DRIVER, NULL); 618 mutex_init(&ldcp->drain_cv_lock, NULL, MUTEX_DRIVER, NULL); 619 cv_init(&ldcp->drain_cv, NULL, CV_DRIVER, NULL); 620 rw_init(&ldcp->lane_in.dlistrw, NULL, RW_DRIVER, NULL); 621 rw_init(&ldcp->lane_out.dlistrw, NULL, RW_DRIVER, NULL); 622 623 /* required for handshake with peer */ 624 ldcp->local_session = (uint64_t)ddi_get_lbolt(); 625 ldcp->peer_session = 0; 626 ldcp->session_status = 0; 627 ldcp->hss_id = 1; /* Initial handshake session id */ 628 629 (void) atomic_swap_32(&port->p_hio_capable, B_FALSE); 630 631 /* only set for outbound lane, inbound set by peer */ 632 vsw_set_lane_attr(vswp, &ldcp->lane_out); 633 634 attr.devclass = LDC_DEV_NT_SVC; 635 attr.instance = ddi_get_instance(vswp->dip); 636 attr.mode = LDC_MODE_UNRELIABLE; 637 attr.mtu = VSW_LDC_MTU; 638 status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle); 639 if (status != 0) { 640 DERR(vswp, "%s(%lld): ldc_init failed, rv (%d)", 641 __func__, ldc_id, status); 642 goto ldc_attach_fail; 643 } 644 645 if (vsw_ldc_rxthr_enabled) { 646 ldcp->rx_thr_flags = 0; 647 648 mutex_init(&ldcp->rx_thr_lock, NULL, MUTEX_DRIVER, NULL); 649 cv_init(&ldcp->rx_thr_cv, NULL, CV_DRIVER, NULL); 650 ldcp->rx_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 651 vsw_ldc_rx_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 652 653 progress |= PROG_rx_thread; 654 if (ldcp->rx_thread == NULL) { 655 DWARN(vswp, "%s(%lld): Failed to create worker thread", 656 __func__, ldc_id); 657 goto ldc_attach_fail; 658 } 659 } 660 661 if (vsw_ldc_txthr_enabled) { 662 ldcp->tx_thr_flags = 0; 663 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 664 665 mutex_init(&ldcp->tx_thr_lock, NULL, MUTEX_DRIVER, NULL); 666 cv_init(&ldcp->tx_thr_cv, NULL, CV_DRIVER, NULL); 667 ldcp->tx_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 668 vsw_ldc_tx_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 669 670 progress |= PROG_tx_thread; 671 if (ldcp->tx_thread == NULL) { 672 DWARN(vswp, "%s(%lld): Failed to create worker thread", 673 __func__, ldc_id); 674 goto ldc_attach_fail; 675 } 676 } 677 678 status = ldc_reg_callback(ldcp->ldc_handle, vsw_ldc_cb, (caddr_t)ldcp); 679 if (status != 0) { 680 DERR(vswp, "%s(%lld): ldc_reg_callback failed, rv (%d)", 681 __func__, ldc_id, status); 682 (void) ldc_fini(ldcp->ldc_handle); 683 goto ldc_attach_fail; 684 } 685 /* 686 * allocate a message for ldc_read()s, big enough to hold ctrl and 687 * data msgs, including raw data msgs used to recv priority frames. 688 */ 689 ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vswp->max_frame_size; 690 ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP); 691 692 progress |= PROG_callback; 693 694 mutex_init(&ldcp->status_lock, NULL, MUTEX_DRIVER, NULL); 695 696 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 697 DERR(vswp, "%s: ldc_status failed", __func__); 698 mutex_destroy(&ldcp->status_lock); 699 goto ldc_attach_fail; 700 } 701 702 ldcp->ldc_status = istatus; 703 ldcp->ldc_port = port; 704 ldcp->ldc_vswp = vswp; 705 706 vsw_reset_vnet_proto_ops(ldcp); 707 708 (void) sprintf(kname, "%sldc0x%lx", DRV_NAME, ldcp->ldc_id); 709 ldcp->ksp = vgen_setup_kstats(DRV_NAME, vswp->instance, 710 kname, &ldcp->ldc_stats); 711 if (ldcp->ksp == NULL) { 712 DERR(vswp, "%s: kstats setup failed", __func__); 713 goto ldc_attach_fail; 714 } 715 716 /* link it into the list of channels for this port */ 717 WRITE_ENTER(&ldcl->lockrw); 718 ldcp->ldc_next = ldcl->head; 719 ldcl->head = ldcp; 720 RW_EXIT(&ldcl->lockrw); 721 722 D1(vswp, "%s: exit", __func__); 723 return (0); 724 725 ldc_attach_fail: 726 727 if (progress & PROG_callback) { 728 (void) ldc_unreg_callback(ldcp->ldc_handle); 729 kmem_free(ldcp->ldcmsg, ldcp->msglen); 730 } 731 732 if (progress & PROG_rx_thread) { 733 if (ldcp->rx_thread != NULL) { 734 vsw_stop_rx_thread(ldcp); 735 } 736 mutex_destroy(&ldcp->rx_thr_lock); 737 cv_destroy(&ldcp->rx_thr_cv); 738 } 739 740 if (progress & PROG_tx_thread) { 741 if (ldcp->tx_thread != NULL) { 742 vsw_stop_tx_thread(ldcp); 743 } 744 mutex_destroy(&ldcp->tx_thr_lock); 745 cv_destroy(&ldcp->tx_thr_cv); 746 } 747 if (ldcp->ksp != NULL) { 748 vgen_destroy_kstats(ldcp->ksp); 749 } 750 mutex_destroy(&ldcp->ldc_txlock); 751 mutex_destroy(&ldcp->ldc_rxlock); 752 mutex_destroy(&ldcp->ldc_cblock); 753 mutex_destroy(&ldcp->drain_cv_lock); 754 755 cv_destroy(&ldcp->drain_cv); 756 757 rw_destroy(&ldcp->lane_in.dlistrw); 758 rw_destroy(&ldcp->lane_out.dlistrw); 759 760 if (progress & PROG_mblks) { 761 vio_destroy_multipools(&ldcp->vmp, &vswp->rxh); 762 } 763 kmem_free(ldcp, sizeof (vsw_ldc_t)); 764 765 return (1); 766 } 767 768 /* 769 * Detach a logical domain channel (ldc) belonging to a 770 * particular port. 771 * 772 * Returns 0 on success, 1 on failure. 773 */ 774 static int 775 vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id) 776 { 777 vsw_t *vswp = port->p_vswp; 778 vsw_ldc_t *ldcp, *prev_ldcp; 779 vsw_ldc_list_t *ldcl = &port->p_ldclist; 780 int rv; 781 782 prev_ldcp = ldcl->head; 783 for (; (ldcp = prev_ldcp) != NULL; prev_ldcp = ldcp->ldc_next) { 784 if (ldcp->ldc_id == ldc_id) { 785 break; 786 } 787 } 788 789 /* specified ldc id not found */ 790 if (ldcp == NULL) { 791 DERR(vswp, "%s: ldcp = NULL", __func__); 792 return (1); 793 } 794 795 D2(vswp, "%s: detaching channel %lld", __func__, ldcp->ldc_id); 796 797 /* Stop the receive thread */ 798 if (ldcp->rx_thread != NULL) { 799 vsw_stop_rx_thread(ldcp); 800 mutex_destroy(&ldcp->rx_thr_lock); 801 cv_destroy(&ldcp->rx_thr_cv); 802 } 803 kmem_free(ldcp->ldcmsg, ldcp->msglen); 804 805 /* Stop the tx thread */ 806 if (ldcp->tx_thread != NULL) { 807 vsw_stop_tx_thread(ldcp); 808 mutex_destroy(&ldcp->tx_thr_lock); 809 cv_destroy(&ldcp->tx_thr_cv); 810 if (ldcp->tx_mhead != NULL) { 811 freemsgchain(ldcp->tx_mhead); 812 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 813 ldcp->tx_cnt = 0; 814 } 815 } 816 817 /* Destory kstats */ 818 vgen_destroy_kstats(ldcp->ksp); 819 820 /* 821 * Before we can close the channel we must release any mapped 822 * resources (e.g. drings). 823 */ 824 vsw_free_lane_resources(ldcp, INBOUND); 825 vsw_free_lane_resources(ldcp, OUTBOUND); 826 827 /* 828 * If the close fails we are in serious trouble, as won't 829 * be able to delete the parent port. 830 */ 831 if ((rv = ldc_close(ldcp->ldc_handle)) != 0) { 832 DERR(vswp, "%s: error %d closing channel %lld", 833 __func__, rv, ldcp->ldc_id); 834 return (1); 835 } 836 837 (void) ldc_fini(ldcp->ldc_handle); 838 839 ldcp->ldc_status = LDC_INIT; 840 ldcp->ldc_handle = NULL; 841 ldcp->ldc_vswp = NULL; 842 843 844 /* 845 * Most likely some mblks are still in use and 846 * have not been returned to the pool. These mblks are 847 * added to the pool that is maintained in the device instance. 848 * Another attempt will be made to destroy the pool 849 * when the device detaches. 850 */ 851 vio_destroy_multipools(&ldcp->vmp, &vswp->rxh); 852 853 /* unlink it from the list */ 854 prev_ldcp = ldcp->ldc_next; 855 856 mutex_destroy(&ldcp->ldc_txlock); 857 mutex_destroy(&ldcp->ldc_rxlock); 858 mutex_destroy(&ldcp->ldc_cblock); 859 cv_destroy(&ldcp->drain_cv); 860 mutex_destroy(&ldcp->drain_cv_lock); 861 mutex_destroy(&ldcp->status_lock); 862 rw_destroy(&ldcp->lane_in.dlistrw); 863 rw_destroy(&ldcp->lane_out.dlistrw); 864 865 kmem_free(ldcp, sizeof (vsw_ldc_t)); 866 867 return (0); 868 } 869 870 /* 871 * Open and attempt to bring up the channel. Note that channel 872 * can only be brought up if peer has also opened channel. 873 * 874 * Returns 0 if can open and bring up channel, otherwise 875 * returns 1. 876 */ 877 static int 878 vsw_ldc_init(vsw_ldc_t *ldcp) 879 { 880 vsw_t *vswp = ldcp->ldc_vswp; 881 ldc_status_t istatus = 0; 882 int rv; 883 884 D1(vswp, "%s: enter", __func__); 885 886 LDC_ENTER_LOCK(ldcp); 887 888 /* don't start at 0 in case clients don't like that */ 889 ldcp->next_ident = 1; 890 891 rv = ldc_open(ldcp->ldc_handle); 892 if (rv != 0) { 893 DERR(vswp, "%s: ldc_open failed: id(%lld) rv(%d)", 894 __func__, ldcp->ldc_id, rv); 895 LDC_EXIT_LOCK(ldcp); 896 return (1); 897 } 898 899 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 900 DERR(vswp, "%s: unable to get status", __func__); 901 LDC_EXIT_LOCK(ldcp); 902 return (1); 903 904 } else if (istatus != LDC_OPEN && istatus != LDC_READY) { 905 DERR(vswp, "%s: id (%lld) status(%d) is not OPEN/READY", 906 __func__, ldcp->ldc_id, istatus); 907 LDC_EXIT_LOCK(ldcp); 908 return (1); 909 } 910 911 mutex_enter(&ldcp->status_lock); 912 ldcp->ldc_status = istatus; 913 mutex_exit(&ldcp->status_lock); 914 915 rv = ldc_up(ldcp->ldc_handle); 916 if (rv != 0) { 917 /* 918 * Not a fatal error for ldc_up() to fail, as peer 919 * end point may simply not be ready yet. 920 */ 921 D2(vswp, "%s: ldc_up err id(%lld) rv(%d)", __func__, 922 ldcp->ldc_id, rv); 923 LDC_EXIT_LOCK(ldcp); 924 return (1); 925 } 926 927 /* 928 * ldc_up() call is non-blocking so need to explicitly 929 * check channel status to see if in fact the channel 930 * is UP. 931 */ 932 mutex_enter(&ldcp->status_lock); 933 if (ldc_status(ldcp->ldc_handle, &ldcp->ldc_status) != 0) { 934 DERR(vswp, "%s: unable to get status", __func__); 935 mutex_exit(&ldcp->status_lock); 936 LDC_EXIT_LOCK(ldcp); 937 return (1); 938 939 } 940 941 if (ldcp->ldc_status == LDC_UP) { 942 D2(vswp, "%s: channel %ld now UP (%ld)", __func__, 943 ldcp->ldc_id, istatus); 944 mutex_exit(&ldcp->status_lock); 945 LDC_EXIT_LOCK(ldcp); 946 947 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 948 return (0); 949 } 950 951 mutex_exit(&ldcp->status_lock); 952 LDC_EXIT_LOCK(ldcp); 953 954 D1(vswp, "%s: exit", __func__); 955 return (0); 956 } 957 958 /* disable callbacks on the channel */ 959 static int 960 vsw_ldc_uninit(vsw_ldc_t *ldcp) 961 { 962 vsw_t *vswp = ldcp->ldc_vswp; 963 int rv; 964 965 D1(vswp, "vsw_ldc_uninit: enter: id(%lx)\n", ldcp->ldc_id); 966 967 LDC_ENTER_LOCK(ldcp); 968 969 rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE); 970 if (rv != 0) { 971 DERR(vswp, "vsw_ldc_uninit(%lld): error disabling " 972 "interrupts (rv = %d)\n", ldcp->ldc_id, rv); 973 LDC_EXIT_LOCK(ldcp); 974 return (1); 975 } 976 977 mutex_enter(&ldcp->status_lock); 978 ldcp->ldc_status = LDC_INIT; 979 mutex_exit(&ldcp->status_lock); 980 981 LDC_EXIT_LOCK(ldcp); 982 983 D1(vswp, "vsw_ldc_uninit: exit: id(%lx)", ldcp->ldc_id); 984 985 return (0); 986 } 987 988 static int 989 vsw_init_ldcs(vsw_port_t *port) 990 { 991 vsw_ldc_list_t *ldcl = &port->p_ldclist; 992 vsw_ldc_t *ldcp; 993 994 READ_ENTER(&ldcl->lockrw); 995 ldcp = ldcl->head; 996 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 997 (void) vsw_ldc_init(ldcp); 998 } 999 RW_EXIT(&ldcl->lockrw); 1000 1001 return (0); 1002 } 1003 1004 static int 1005 vsw_uninit_ldcs(vsw_port_t *port) 1006 { 1007 vsw_ldc_list_t *ldcl = &port->p_ldclist; 1008 vsw_ldc_t *ldcp; 1009 1010 D1(NULL, "vsw_uninit_ldcs: enter\n"); 1011 1012 READ_ENTER(&ldcl->lockrw); 1013 ldcp = ldcl->head; 1014 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 1015 (void) vsw_ldc_uninit(ldcp); 1016 } 1017 RW_EXIT(&ldcl->lockrw); 1018 1019 D1(NULL, "vsw_uninit_ldcs: exit\n"); 1020 1021 return (0); 1022 } 1023 1024 /* 1025 * Wait until the callback(s) associated with the ldcs under the specified 1026 * port have completed. 1027 * 1028 * Prior to this function being invoked each channel under this port 1029 * should have been quiesced via ldc_set_cb_mode(DISABLE). 1030 * 1031 * A short explaination of what we are doing below.. 1032 * 1033 * The simplest approach would be to have a reference counter in 1034 * the ldc structure which is increment/decremented by the callbacks as 1035 * they use the channel. The drain function could then simply disable any 1036 * further callbacks and do a cv_wait for the ref to hit zero. Unfortunately 1037 * there is a tiny window here - before the callback is able to get the lock 1038 * on the channel it is interrupted and this function gets to execute. It 1039 * sees that the ref count is zero and believes its free to delete the 1040 * associated data structures. 1041 * 1042 * We get around this by taking advantage of the fact that before the ldc 1043 * framework invokes a callback it sets a flag to indicate that there is a 1044 * callback active (or about to become active). If when we attempt to 1045 * unregister a callback when this active flag is set then the unregister 1046 * will fail with EWOULDBLOCK. 1047 * 1048 * If the unregister fails we do a cv_timedwait. We will either be signaled 1049 * by the callback as it is exiting (note we have to wait a short period to 1050 * allow the callback to return fully to the ldc framework and it to clear 1051 * the active flag), or by the timer expiring. In either case we again attempt 1052 * the unregister. We repeat this until we can succesfully unregister the 1053 * callback. 1054 * 1055 * The reason we use a cv_timedwait rather than a simple cv_wait is to catch 1056 * the case where the callback has finished but the ldc framework has not yet 1057 * cleared the active flag. In this case we would never get a cv_signal. 1058 */ 1059 static int 1060 vsw_drain_ldcs(vsw_port_t *port) 1061 { 1062 vsw_ldc_list_t *ldcl = &port->p_ldclist; 1063 vsw_ldc_t *ldcp; 1064 vsw_t *vswp = port->p_vswp; 1065 1066 D1(vswp, "%s: enter", __func__); 1067 1068 READ_ENTER(&ldcl->lockrw); 1069 1070 ldcp = ldcl->head; 1071 1072 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 1073 /* 1074 * If we can unregister the channel callback then we 1075 * know that there is no callback either running or 1076 * scheduled to run for this channel so move on to next 1077 * channel in the list. 1078 */ 1079 mutex_enter(&ldcp->drain_cv_lock); 1080 1081 /* prompt active callbacks to quit */ 1082 ldcp->drain_state = VSW_LDC_DRAINING; 1083 1084 if ((ldc_unreg_callback(ldcp->ldc_handle)) == 0) { 1085 D2(vswp, "%s: unreg callback for chan %ld", __func__, 1086 ldcp->ldc_id); 1087 mutex_exit(&ldcp->drain_cv_lock); 1088 continue; 1089 } else { 1090 /* 1091 * If we end up here we know that either 1) a callback 1092 * is currently executing, 2) is about to start (i.e. 1093 * the ldc framework has set the active flag but 1094 * has not actually invoked the callback yet, or 3) 1095 * has finished and has returned to the ldc framework 1096 * but the ldc framework has not yet cleared the 1097 * active bit. 1098 * 1099 * Wait for it to finish. 1100 */ 1101 while (ldc_unreg_callback(ldcp->ldc_handle) 1102 == EWOULDBLOCK) 1103 (void) cv_timedwait(&ldcp->drain_cv, 1104 &ldcp->drain_cv_lock, lbolt + hz); 1105 1106 mutex_exit(&ldcp->drain_cv_lock); 1107 D2(vswp, "%s: unreg callback for chan %ld after " 1108 "timeout", __func__, ldcp->ldc_id); 1109 } 1110 } 1111 RW_EXIT(&ldcl->lockrw); 1112 1113 D1(vswp, "%s: exit", __func__); 1114 return (0); 1115 } 1116 1117 /* 1118 * Wait until all tasks which reference this port have completed. 1119 * 1120 * Prior to this function being invoked each channel under this port 1121 * should have been quiesced via ldc_set_cb_mode(DISABLE). 1122 */ 1123 static int 1124 vsw_drain_port_taskq(vsw_port_t *port) 1125 { 1126 vsw_t *vswp = port->p_vswp; 1127 1128 D1(vswp, "%s: enter", __func__); 1129 1130 /* 1131 * Mark the port as in the process of being detached, and 1132 * dispatch a marker task to the queue so we know when all 1133 * relevant tasks have completed. 1134 */ 1135 mutex_enter(&port->state_lock); 1136 port->state = VSW_PORT_DETACHING; 1137 1138 if ((vswp->taskq_p == NULL) || 1139 (ddi_taskq_dispatch(vswp->taskq_p, vsw_marker_task, 1140 port, DDI_NOSLEEP) != DDI_SUCCESS)) { 1141 DERR(vswp, "%s: unable to dispatch marker task", 1142 __func__); 1143 mutex_exit(&port->state_lock); 1144 return (1); 1145 } 1146 1147 /* 1148 * Wait for the marker task to finish. 1149 */ 1150 while (port->state != VSW_PORT_DETACHABLE) 1151 cv_wait(&port->state_cv, &port->state_lock); 1152 1153 mutex_exit(&port->state_lock); 1154 1155 D1(vswp, "%s: exit", __func__); 1156 1157 return (0); 1158 } 1159 1160 static void 1161 vsw_marker_task(void *arg) 1162 { 1163 vsw_port_t *port = arg; 1164 vsw_t *vswp = port->p_vswp; 1165 1166 D1(vswp, "%s: enter", __func__); 1167 1168 mutex_enter(&port->state_lock); 1169 1170 /* 1171 * No further tasks should be dispatched which reference 1172 * this port so ok to mark it as safe to detach. 1173 */ 1174 port->state = VSW_PORT_DETACHABLE; 1175 1176 cv_signal(&port->state_cv); 1177 1178 mutex_exit(&port->state_lock); 1179 1180 D1(vswp, "%s: exit", __func__); 1181 } 1182 1183 vsw_port_t * 1184 vsw_lookup_port(vsw_t *vswp, int p_instance) 1185 { 1186 vsw_port_list_t *plist = &vswp->plist; 1187 vsw_port_t *port; 1188 1189 for (port = plist->head; port != NULL; port = port->p_next) { 1190 if (port->p_instance == p_instance) { 1191 D2(vswp, "vsw_lookup_port: found p_instance\n"); 1192 return (port); 1193 } 1194 } 1195 1196 return (NULL); 1197 } 1198 1199 void 1200 vsw_vlan_unaware_port_reset(vsw_port_t *portp) 1201 { 1202 vsw_ldc_list_t *ldclp; 1203 vsw_ldc_t *ldcp; 1204 1205 ldclp = &portp->p_ldclist; 1206 1207 READ_ENTER(&ldclp->lockrw); 1208 1209 /* 1210 * NOTE: for now, we will assume we have a single channel. 1211 */ 1212 if (ldclp->head == NULL) { 1213 RW_EXIT(&ldclp->lockrw); 1214 return; 1215 } 1216 ldcp = ldclp->head; 1217 1218 mutex_enter(&ldcp->ldc_cblock); 1219 1220 /* 1221 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate 1222 * the connection. See comments in vsw_set_vnet_proto_ops(). 1223 */ 1224 if (ldcp->hphase == VSW_MILESTONE4 && VSW_VER_LT(ldcp, 1, 3) && 1225 portp->nvids != 0) { 1226 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1227 } 1228 1229 mutex_exit(&ldcp->ldc_cblock); 1230 1231 RW_EXIT(&ldclp->lockrw); 1232 } 1233 1234 void 1235 vsw_hio_port_reset(vsw_port_t *portp, boolean_t immediate) 1236 { 1237 vsw_ldc_list_t *ldclp; 1238 vsw_ldc_t *ldcp; 1239 1240 ldclp = &portp->p_ldclist; 1241 1242 READ_ENTER(&ldclp->lockrw); 1243 1244 /* 1245 * NOTE: for now, we will assume we have a single channel. 1246 */ 1247 if (ldclp->head == NULL) { 1248 RW_EXIT(&ldclp->lockrw); 1249 return; 1250 } 1251 ldcp = ldclp->head; 1252 1253 mutex_enter(&ldcp->ldc_cblock); 1254 1255 /* 1256 * If the peer is HybridIO capable (ver >= 1.3), reset channel 1257 * to trigger re-negotiation, which inturn trigger HybridIO 1258 * setup/cleanup. 1259 */ 1260 if ((ldcp->hphase == VSW_MILESTONE4) && 1261 (portp->p_hio_capable == B_TRUE)) { 1262 if (immediate == B_TRUE) { 1263 (void) ldc_down(ldcp->ldc_handle); 1264 } else { 1265 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1266 } 1267 } 1268 1269 mutex_exit(&ldcp->ldc_cblock); 1270 1271 RW_EXIT(&ldclp->lockrw); 1272 } 1273 1274 /* 1275 * Search for and remove the specified port from the port 1276 * list. Returns 0 if able to locate and remove port, otherwise 1277 * returns 1. 1278 */ 1279 static int 1280 vsw_plist_del_node(vsw_t *vswp, vsw_port_t *port) 1281 { 1282 vsw_port_list_t *plist = &vswp->plist; 1283 vsw_port_t *curr_p, *prev_p; 1284 1285 if (plist->head == NULL) 1286 return (1); 1287 1288 curr_p = prev_p = plist->head; 1289 1290 while (curr_p != NULL) { 1291 if (curr_p == port) { 1292 if (prev_p == curr_p) { 1293 plist->head = curr_p->p_next; 1294 } else { 1295 prev_p->p_next = curr_p->p_next; 1296 } 1297 plist->num_ports--; 1298 break; 1299 } else { 1300 prev_p = curr_p; 1301 curr_p = curr_p->p_next; 1302 } 1303 } 1304 return (0); 1305 } 1306 1307 /* 1308 * Interrupt handler for ldc messages. 1309 */ 1310 static uint_t 1311 vsw_ldc_cb(uint64_t event, caddr_t arg) 1312 { 1313 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 1314 vsw_t *vswp = ldcp->ldc_vswp; 1315 1316 D1(vswp, "%s: enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 1317 1318 mutex_enter(&ldcp->ldc_cblock); 1319 ldcp->ldc_stats.callbacks++; 1320 1321 mutex_enter(&ldcp->status_lock); 1322 if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) { 1323 mutex_exit(&ldcp->status_lock); 1324 mutex_exit(&ldcp->ldc_cblock); 1325 return (LDC_SUCCESS); 1326 } 1327 mutex_exit(&ldcp->status_lock); 1328 1329 if (event & LDC_EVT_UP) { 1330 /* 1331 * Channel has come up. 1332 */ 1333 D2(vswp, "%s: id(%ld) event(%llx) UP: status(%ld)", 1334 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1335 1336 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1337 1338 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 1339 } 1340 1341 if (event & LDC_EVT_READ) { 1342 /* 1343 * Data available for reading. 1344 */ 1345 D2(vswp, "%s: id(ld) event(%llx) data READ", 1346 __func__, ldcp->ldc_id, event); 1347 1348 if (ldcp->rx_thread != NULL) { 1349 /* 1350 * If the receive thread is enabled, then 1351 * wakeup the receive thread to process the 1352 * LDC messages. 1353 */ 1354 mutex_exit(&ldcp->ldc_cblock); 1355 mutex_enter(&ldcp->rx_thr_lock); 1356 if (!(ldcp->rx_thr_flags & VSW_WTHR_DATARCVD)) { 1357 ldcp->rx_thr_flags |= VSW_WTHR_DATARCVD; 1358 cv_signal(&ldcp->rx_thr_cv); 1359 } 1360 mutex_exit(&ldcp->rx_thr_lock); 1361 mutex_enter(&ldcp->ldc_cblock); 1362 } else { 1363 vsw_process_pkt(ldcp); 1364 } 1365 1366 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 1367 1368 goto vsw_cb_exit; 1369 } 1370 1371 if (event & (LDC_EVT_DOWN | LDC_EVT_RESET)) { 1372 D2(vswp, "%s: id(%ld) event (%lx) DOWN/RESET: status(%ld)", 1373 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1374 1375 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 1376 } 1377 1378 /* 1379 * Catch either LDC_EVT_WRITE which we don't support or any 1380 * unknown event. 1381 */ 1382 if (event & 1383 ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) { 1384 DERR(vswp, "%s: id(%ld) Unexpected event=(%llx) status(%ld)", 1385 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1386 } 1387 1388 vsw_cb_exit: 1389 mutex_exit(&ldcp->ldc_cblock); 1390 1391 /* 1392 * Let the drain function know we are finishing if it 1393 * is waiting. 1394 */ 1395 mutex_enter(&ldcp->drain_cv_lock); 1396 if (ldcp->drain_state == VSW_LDC_DRAINING) 1397 cv_signal(&ldcp->drain_cv); 1398 mutex_exit(&ldcp->drain_cv_lock); 1399 1400 return (LDC_SUCCESS); 1401 } 1402 1403 /* 1404 * Reinitialise data structures associated with the channel. 1405 */ 1406 static void 1407 vsw_ldc_reinit(vsw_ldc_t *ldcp) 1408 { 1409 vsw_t *vswp = ldcp->ldc_vswp; 1410 vsw_port_t *port; 1411 vsw_ldc_list_t *ldcl; 1412 1413 D1(vswp, "%s: enter", __func__); 1414 1415 port = ldcp->ldc_port; 1416 ldcl = &port->p_ldclist; 1417 1418 READ_ENTER(&ldcl->lockrw); 1419 1420 D2(vswp, "%s: in 0x%llx : out 0x%llx", __func__, 1421 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 1422 1423 vsw_free_lane_resources(ldcp, INBOUND); 1424 vsw_free_lane_resources(ldcp, OUTBOUND); 1425 RW_EXIT(&ldcl->lockrw); 1426 1427 ldcp->lane_in.lstate = 0; 1428 ldcp->lane_out.lstate = 0; 1429 1430 /* Remove the fdb entry for this port/mac address */ 1431 vsw_fdbe_del(vswp, &(port->p_macaddr)); 1432 1433 /* remove the port from vlans it has been assigned to */ 1434 vsw_vlan_remove_ids(port, VSW_VNETPORT); 1435 1436 /* 1437 * Remove parent port from any multicast groups 1438 * it may have registered with. Client must resend 1439 * multicast add command after handshake completes. 1440 */ 1441 vsw_del_mcst_port(port); 1442 1443 ldcp->peer_session = 0; 1444 ldcp->session_status = 0; 1445 ldcp->hcnt = 0; 1446 ldcp->hphase = VSW_MILESTONE0; 1447 1448 vsw_reset_vnet_proto_ops(ldcp); 1449 1450 D1(vswp, "%s: exit", __func__); 1451 } 1452 1453 /* 1454 * Process a connection event. 1455 * 1456 * Note - care must be taken to ensure that this function is 1457 * not called with the dlistrw lock held. 1458 */ 1459 static void 1460 vsw_process_conn_evt(vsw_ldc_t *ldcp, uint16_t evt) 1461 { 1462 vsw_t *vswp = ldcp->ldc_vswp; 1463 vsw_conn_evt_t *conn = NULL; 1464 1465 D1(vswp, "%s: enter", __func__); 1466 1467 /* 1468 * Check if either a reset or restart event is pending 1469 * or in progress. If so just return. 1470 * 1471 * A VSW_CONN_RESET event originates either with a LDC_RESET_EVT 1472 * being received by the callback handler, or a ECONNRESET error 1473 * code being returned from a ldc_read() or ldc_write() call. 1474 * 1475 * A VSW_CONN_RESTART event occurs when some error checking code 1476 * decides that there is a problem with data from the channel, 1477 * and that the handshake should be restarted. 1478 */ 1479 if (((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) && 1480 (ldstub((uint8_t *)&ldcp->reset_active))) 1481 return; 1482 1483 /* 1484 * If it is an LDC_UP event we first check the recorded 1485 * state of the channel. If this is UP then we know that 1486 * the channel moving to the UP state has already been dealt 1487 * with and don't need to dispatch a new task. 1488 * 1489 * The reason for this check is that when we do a ldc_up(), 1490 * depending on the state of the peer, we may or may not get 1491 * a LDC_UP event. As we can't depend on getting a LDC_UP evt 1492 * every time we do ldc_up() we explicitly check the channel 1493 * status to see has it come up (ldc_up() is asynch and will 1494 * complete at some undefined time), and take the appropriate 1495 * action. 1496 * 1497 * The flip side of this is that we may get a LDC_UP event 1498 * when we have already seen that the channel is up and have 1499 * dealt with that. 1500 */ 1501 mutex_enter(&ldcp->status_lock); 1502 if (evt == VSW_CONN_UP) { 1503 if ((ldcp->ldc_status == LDC_UP) || (ldcp->reset_active != 0)) { 1504 mutex_exit(&ldcp->status_lock); 1505 return; 1506 } 1507 } 1508 mutex_exit(&ldcp->status_lock); 1509 1510 /* 1511 * The transaction group id allows us to identify and discard 1512 * any tasks which are still pending on the taskq and refer 1513 * to the handshake session we are about to restart or reset. 1514 * These stale messages no longer have any real meaning. 1515 */ 1516 (void) atomic_inc_32(&ldcp->hss_id); 1517 1518 ASSERT(vswp->taskq_p != NULL); 1519 1520 if ((conn = kmem_zalloc(sizeof (vsw_conn_evt_t), KM_NOSLEEP)) == NULL) { 1521 cmn_err(CE_WARN, "!vsw%d: unable to allocate memory for" 1522 " connection event", vswp->instance); 1523 goto err_exit; 1524 } 1525 1526 conn->evt = evt; 1527 conn->ldcp = ldcp; 1528 1529 if (ddi_taskq_dispatch(vswp->taskq_p, vsw_conn_task, conn, 1530 DDI_NOSLEEP) != DDI_SUCCESS) { 1531 cmn_err(CE_WARN, "!vsw%d: Can't dispatch connection task", 1532 vswp->instance); 1533 1534 kmem_free(conn, sizeof (vsw_conn_evt_t)); 1535 goto err_exit; 1536 } 1537 1538 D1(vswp, "%s: exit", __func__); 1539 return; 1540 1541 err_exit: 1542 /* 1543 * Have mostly likely failed due to memory shortage. Clear the flag so 1544 * that future requests will at least be attempted and will hopefully 1545 * succeed. 1546 */ 1547 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 1548 ldcp->reset_active = 0; 1549 } 1550 1551 /* 1552 * Deal with events relating to a connection. Invoked from a taskq. 1553 */ 1554 static void 1555 vsw_conn_task(void *arg) 1556 { 1557 vsw_conn_evt_t *conn = (vsw_conn_evt_t *)arg; 1558 vsw_ldc_t *ldcp = NULL; 1559 vsw_port_t *portp; 1560 vsw_t *vswp = NULL; 1561 uint16_t evt; 1562 ldc_status_t curr_status; 1563 1564 ldcp = conn->ldcp; 1565 evt = conn->evt; 1566 vswp = ldcp->ldc_vswp; 1567 portp = ldcp->ldc_port; 1568 1569 D1(vswp, "%s: enter", __func__); 1570 1571 /* can safely free now have copied out data */ 1572 kmem_free(conn, sizeof (vsw_conn_evt_t)); 1573 1574 mutex_enter(&ldcp->status_lock); 1575 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 1576 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 1577 "channel %ld", vswp->instance, ldcp->ldc_id); 1578 mutex_exit(&ldcp->status_lock); 1579 return; 1580 } 1581 1582 /* 1583 * If we wish to restart the handshake on this channel, then if 1584 * the channel is UP we bring it DOWN to flush the underlying 1585 * ldc queue. 1586 */ 1587 if ((evt == VSW_CONN_RESTART) && (curr_status == LDC_UP)) 1588 (void) ldc_down(ldcp->ldc_handle); 1589 1590 if ((portp->p_hio_capable) && (portp->p_hio_enabled)) { 1591 vsw_hio_stop(vswp, ldcp); 1592 } 1593 1594 /* 1595 * re-init all the associated data structures. 1596 */ 1597 vsw_ldc_reinit(ldcp); 1598 1599 /* 1600 * Bring the channel back up (note it does no harm to 1601 * do this even if the channel is already UP, Just 1602 * becomes effectively a no-op). 1603 */ 1604 (void) ldc_up(ldcp->ldc_handle); 1605 1606 /* 1607 * Check if channel is now UP. This will only happen if 1608 * peer has also done a ldc_up(). 1609 */ 1610 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 1611 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 1612 "channel %ld", vswp->instance, ldcp->ldc_id); 1613 mutex_exit(&ldcp->status_lock); 1614 return; 1615 } 1616 1617 ldcp->ldc_status = curr_status; 1618 1619 /* channel UP so restart handshake by sending version info */ 1620 if (curr_status == LDC_UP) { 1621 if (ldcp->hcnt++ > vsw_num_handshakes) { 1622 cmn_err(CE_WARN, "!vsw%d: exceeded number of permitted" 1623 " handshake attempts (%d) on channel %ld", 1624 vswp->instance, ldcp->hcnt, ldcp->ldc_id); 1625 mutex_exit(&ldcp->status_lock); 1626 return; 1627 } 1628 1629 if (vsw_obp_ver_proto_workaround == B_FALSE && 1630 (ddi_taskq_dispatch(vswp->taskq_p, vsw_send_ver, ldcp, 1631 DDI_NOSLEEP) != DDI_SUCCESS)) { 1632 cmn_err(CE_WARN, "!vsw%d: Can't dispatch version task", 1633 vswp->instance); 1634 1635 /* 1636 * Don't count as valid restart attempt if couldn't 1637 * send version msg. 1638 */ 1639 if (ldcp->hcnt > 0) 1640 ldcp->hcnt--; 1641 } 1642 } 1643 1644 /* 1645 * Mark that the process is complete by clearing the flag. 1646 * 1647 * Note is it possible that the taskq dispatch above may have failed, 1648 * most likely due to memory shortage. We still clear the flag so 1649 * future attempts will at least be attempted and will hopefully 1650 * succeed. 1651 */ 1652 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 1653 ldcp->reset_active = 0; 1654 1655 mutex_exit(&ldcp->status_lock); 1656 1657 D1(vswp, "%s: exit", __func__); 1658 } 1659 1660 /* 1661 * returns 0 if legal for event signified by flag to have 1662 * occured at the time it did. Otherwise returns 1. 1663 */ 1664 int 1665 vsw_check_flag(vsw_ldc_t *ldcp, int dir, uint64_t flag) 1666 { 1667 vsw_t *vswp = ldcp->ldc_vswp; 1668 uint64_t state; 1669 uint64_t phase; 1670 1671 if (dir == INBOUND) 1672 state = ldcp->lane_in.lstate; 1673 else 1674 state = ldcp->lane_out.lstate; 1675 1676 phase = ldcp->hphase; 1677 1678 switch (flag) { 1679 case VSW_VER_INFO_RECV: 1680 if (phase > VSW_MILESTONE0) { 1681 DERR(vswp, "vsw_check_flag (%d): VER_INFO_RECV" 1682 " when in state %d\n", ldcp->ldc_id, phase); 1683 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1684 return (1); 1685 } 1686 break; 1687 1688 case VSW_VER_ACK_RECV: 1689 case VSW_VER_NACK_RECV: 1690 if (!(state & VSW_VER_INFO_SENT)) { 1691 DERR(vswp, "vsw_check_flag (%d): spurious VER_ACK or " 1692 "VER_NACK when in state %d\n", ldcp->ldc_id, phase); 1693 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1694 return (1); 1695 } else 1696 state &= ~VSW_VER_INFO_SENT; 1697 break; 1698 1699 case VSW_ATTR_INFO_RECV: 1700 if ((phase < VSW_MILESTONE1) || (phase >= VSW_MILESTONE2)) { 1701 DERR(vswp, "vsw_check_flag (%d): ATTR_INFO_RECV" 1702 " when in state %d\n", ldcp->ldc_id, phase); 1703 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1704 return (1); 1705 } 1706 break; 1707 1708 case VSW_ATTR_ACK_RECV: 1709 case VSW_ATTR_NACK_RECV: 1710 if (!(state & VSW_ATTR_INFO_SENT)) { 1711 DERR(vswp, "vsw_check_flag (%d): spurious ATTR_ACK" 1712 " or ATTR_NACK when in state %d\n", 1713 ldcp->ldc_id, phase); 1714 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1715 return (1); 1716 } else 1717 state &= ~VSW_ATTR_INFO_SENT; 1718 break; 1719 1720 case VSW_DRING_INFO_RECV: 1721 if (phase < VSW_MILESTONE1) { 1722 DERR(vswp, "vsw_check_flag (%d): DRING_INFO_RECV" 1723 " when in state %d\n", ldcp->ldc_id, phase); 1724 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1725 return (1); 1726 } 1727 break; 1728 1729 case VSW_DRING_ACK_RECV: 1730 case VSW_DRING_NACK_RECV: 1731 if (!(state & VSW_DRING_INFO_SENT)) { 1732 DERR(vswp, "vsw_check_flag (%d): spurious DRING_ACK " 1733 " or DRING_NACK when in state %d\n", 1734 ldcp->ldc_id, phase); 1735 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1736 return (1); 1737 } else 1738 state &= ~VSW_DRING_INFO_SENT; 1739 break; 1740 1741 case VSW_RDX_INFO_RECV: 1742 if (phase < VSW_MILESTONE3) { 1743 DERR(vswp, "vsw_check_flag (%d): RDX_INFO_RECV" 1744 " when in state %d\n", ldcp->ldc_id, phase); 1745 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1746 return (1); 1747 } 1748 break; 1749 1750 case VSW_RDX_ACK_RECV: 1751 case VSW_RDX_NACK_RECV: 1752 if (!(state & VSW_RDX_INFO_SENT)) { 1753 DERR(vswp, "vsw_check_flag (%d): spurious RDX_ACK or " 1754 "RDX_NACK when in state %d\n", ldcp->ldc_id, phase); 1755 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1756 return (1); 1757 } else 1758 state &= ~VSW_RDX_INFO_SENT; 1759 break; 1760 1761 case VSW_MCST_INFO_RECV: 1762 if (phase < VSW_MILESTONE3) { 1763 DERR(vswp, "vsw_check_flag (%d): VSW_MCST_INFO_RECV" 1764 " when in state %d\n", ldcp->ldc_id, phase); 1765 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1766 return (1); 1767 } 1768 break; 1769 1770 default: 1771 DERR(vswp, "vsw_check_flag (%lld): unknown flag (%llx)", 1772 ldcp->ldc_id, flag); 1773 return (1); 1774 } 1775 1776 if (dir == INBOUND) 1777 ldcp->lane_in.lstate = state; 1778 else 1779 ldcp->lane_out.lstate = state; 1780 1781 D1(vswp, "vsw_check_flag (chan %lld): exit", ldcp->ldc_id); 1782 1783 return (0); 1784 } 1785 1786 void 1787 vsw_next_milestone(vsw_ldc_t *ldcp) 1788 { 1789 vsw_t *vswp = ldcp->ldc_vswp; 1790 vsw_port_t *portp = ldcp->ldc_port; 1791 1792 D1(vswp, "%s (chan %lld): enter (phase %ld)", __func__, 1793 ldcp->ldc_id, ldcp->hphase); 1794 1795 DUMP_FLAGS(ldcp->lane_in.lstate); 1796 DUMP_FLAGS(ldcp->lane_out.lstate); 1797 1798 switch (ldcp->hphase) { 1799 1800 case VSW_MILESTONE0: 1801 /* 1802 * If we haven't started to handshake with our peer, 1803 * start to do so now. 1804 */ 1805 if (ldcp->lane_out.lstate == 0) { 1806 D2(vswp, "%s: (chan %lld) starting handshake " 1807 "with peer", __func__, ldcp->ldc_id); 1808 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1809 } 1810 1811 /* 1812 * Only way to pass this milestone is to have successfully 1813 * negotiated version info. 1814 */ 1815 if ((ldcp->lane_in.lstate & VSW_VER_ACK_SENT) && 1816 (ldcp->lane_out.lstate & VSW_VER_ACK_RECV)) { 1817 1818 D2(vswp, "%s: (chan %lld) leaving milestone 0", 1819 __func__, ldcp->ldc_id); 1820 1821 vsw_set_vnet_proto_ops(ldcp); 1822 1823 /* 1824 * Next milestone is passed when attribute 1825 * information has been successfully exchanged. 1826 */ 1827 ldcp->hphase = VSW_MILESTONE1; 1828 vsw_send_attr(ldcp); 1829 1830 } 1831 break; 1832 1833 case VSW_MILESTONE1: 1834 /* 1835 * Only way to pass this milestone is to have successfully 1836 * negotiated attribute information. 1837 */ 1838 if (ldcp->lane_in.lstate & VSW_ATTR_ACK_SENT) { 1839 1840 ldcp->hphase = VSW_MILESTONE2; 1841 1842 /* 1843 * If the peer device has said it wishes to 1844 * use descriptor rings then we send it our ring 1845 * info, otherwise we just set up a private ring 1846 * which we use an internal buffer 1847 */ 1848 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 1849 (ldcp->lane_in.xfer_mode & VIO_DRING_MODE_V1_2)) || 1850 (VSW_VER_LT(ldcp, 1, 2) && 1851 (ldcp->lane_in.xfer_mode == 1852 VIO_DRING_MODE_V1_0))) { 1853 vsw_send_dring_info(ldcp); 1854 } 1855 } 1856 break; 1857 1858 case VSW_MILESTONE2: 1859 /* 1860 * If peer has indicated in its attribute message that 1861 * it wishes to use descriptor rings then the only way 1862 * to pass this milestone is for us to have received 1863 * valid dring info. 1864 * 1865 * If peer is not using descriptor rings then just fall 1866 * through. 1867 */ 1868 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 1869 (ldcp->lane_in.xfer_mode & VIO_DRING_MODE_V1_2)) || 1870 (VSW_VER_LT(ldcp, 1, 2) && 1871 (ldcp->lane_in.xfer_mode == 1872 VIO_DRING_MODE_V1_0))) { 1873 if (!(ldcp->lane_in.lstate & VSW_DRING_ACK_SENT)) 1874 break; 1875 } 1876 1877 D2(vswp, "%s: (chan %lld) leaving milestone 2", 1878 __func__, ldcp->ldc_id); 1879 1880 ldcp->hphase = VSW_MILESTONE3; 1881 vsw_send_rdx(ldcp); 1882 break; 1883 1884 case VSW_MILESTONE3: 1885 /* 1886 * Pass this milestone when all paramaters have been 1887 * successfully exchanged and RDX sent in both directions. 1888 * 1889 * Mark outbound lane as available to transmit data. 1890 */ 1891 if ((ldcp->lane_out.lstate & VSW_RDX_ACK_SENT) && 1892 (ldcp->lane_in.lstate & VSW_RDX_ACK_RECV)) { 1893 1894 D2(vswp, "%s: (chan %lld) leaving milestone 3", 1895 __func__, ldcp->ldc_id); 1896 D2(vswp, "%s: ** handshake complete (0x%llx : " 1897 "0x%llx) **", __func__, ldcp->lane_in.lstate, 1898 ldcp->lane_out.lstate); 1899 ldcp->lane_out.lstate |= VSW_LANE_ACTIVE; 1900 ldcp->hphase = VSW_MILESTONE4; 1901 ldcp->hcnt = 0; 1902 DISPLAY_STATE(); 1903 /* Start HIO if enabled and capable */ 1904 if ((portp->p_hio_enabled) && (portp->p_hio_capable)) { 1905 D2(vswp, "%s: start HybridIO setup", __func__); 1906 vsw_hio_start(vswp, ldcp); 1907 } 1908 } else { 1909 D2(vswp, "%s: still in milestone 3 (0x%llx : 0x%llx)", 1910 __func__, ldcp->lane_in.lstate, 1911 ldcp->lane_out.lstate); 1912 } 1913 break; 1914 1915 case VSW_MILESTONE4: 1916 D2(vswp, "%s: (chan %lld) in milestone 4", __func__, 1917 ldcp->ldc_id); 1918 break; 1919 1920 default: 1921 DERR(vswp, "%s: (chan %lld) Unknown Phase %x", __func__, 1922 ldcp->ldc_id, ldcp->hphase); 1923 } 1924 1925 D1(vswp, "%s (chan %lld): exit (phase %ld)", __func__, ldcp->ldc_id, 1926 ldcp->hphase); 1927 } 1928 1929 /* 1930 * Check if major version is supported. 1931 * 1932 * Returns 0 if finds supported major number, and if necessary 1933 * adjusts the minor field. 1934 * 1935 * Returns 1 if can't match major number exactly. Sets mjor/minor 1936 * to next lowest support values, or to zero if no other values possible. 1937 */ 1938 static int 1939 vsw_supported_version(vio_ver_msg_t *vp) 1940 { 1941 int i; 1942 1943 D1(NULL, "vsw_supported_version: enter"); 1944 1945 for (i = 0; i < VSW_NUM_VER; i++) { 1946 if (vsw_versions[i].ver_major == vp->ver_major) { 1947 /* 1948 * Matching or lower major version found. Update 1949 * minor number if necessary. 1950 */ 1951 if (vp->ver_minor > vsw_versions[i].ver_minor) { 1952 D2(NULL, "%s: adjusting minor value from %d " 1953 "to %d", __func__, vp->ver_minor, 1954 vsw_versions[i].ver_minor); 1955 vp->ver_minor = vsw_versions[i].ver_minor; 1956 } 1957 1958 return (0); 1959 } 1960 1961 /* 1962 * If the message contains a higher major version number, set 1963 * the message's major/minor versions to the current values 1964 * and return false, so this message will get resent with 1965 * these values. 1966 */ 1967 if (vsw_versions[i].ver_major < vp->ver_major) { 1968 D2(NULL, "%s: adjusting major and minor " 1969 "values to %d, %d\n", 1970 __func__, vsw_versions[i].ver_major, 1971 vsw_versions[i].ver_minor); 1972 vp->ver_major = vsw_versions[i].ver_major; 1973 vp->ver_minor = vsw_versions[i].ver_minor; 1974 return (1); 1975 } 1976 } 1977 1978 /* No match was possible, zero out fields */ 1979 vp->ver_major = 0; 1980 vp->ver_minor = 0; 1981 1982 D1(NULL, "vsw_supported_version: exit"); 1983 1984 return (1); 1985 } 1986 1987 /* 1988 * Set vnet-protocol-version dependent functions based on version. 1989 */ 1990 static void 1991 vsw_set_vnet_proto_ops(vsw_ldc_t *ldcp) 1992 { 1993 vsw_t *vswp = ldcp->ldc_vswp; 1994 lane_t *lp = &ldcp->lane_out; 1995 1996 if (VSW_VER_GTEQ(ldcp, 1, 3)) { 1997 /* 1998 * If the version negotiated with peer is >= 1.3, 1999 * set the mtu in our attributes to max_frame_size. 2000 */ 2001 lp->mtu = vswp->max_frame_size; 2002 } else { 2003 vsw_port_t *portp = ldcp->ldc_port; 2004 /* 2005 * Pre-1.3 peers expect max frame size of ETHERMAX. 2006 * We can negotiate that size with those peers provided the 2007 * following conditions are true: 2008 * - Our max_frame_size is greater only by VLAN_TAGSZ (4). 2009 * - Only pvid is defined for our peer and there are no vids. 2010 * If the above conditions are true, then we can send/recv only 2011 * untagged frames of max size ETHERMAX. Note that pvid of the 2012 * peer can be different, as vsw has to serve the vnet in that 2013 * vlan even if itself is not assigned to that vlan. 2014 */ 2015 if ((vswp->max_frame_size == ETHERMAX + VLAN_TAGSZ) && 2016 portp->nvids == 0) { 2017 lp->mtu = ETHERMAX; 2018 } 2019 } 2020 2021 if (VSW_VER_GTEQ(ldcp, 1, 2)) { 2022 /* Versions >= 1.2 */ 2023 2024 if (VSW_PRI_ETH_DEFINED(vswp)) { 2025 /* 2026 * enable priority routines and pkt mode only if 2027 * at least one pri-eth-type is specified in MD. 2028 */ 2029 ldcp->tx = vsw_ldctx_pri; 2030 ldcp->rx_pktdata = vsw_process_pkt_data; 2031 2032 /* set xfer mode for vsw_send_attr() */ 2033 lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2; 2034 } else { 2035 /* no priority eth types defined in MD */ 2036 2037 ldcp->tx = vsw_ldctx; 2038 ldcp->rx_pktdata = vsw_process_pkt_data_nop; 2039 2040 /* set xfer mode for vsw_send_attr() */ 2041 lp->xfer_mode = VIO_DRING_MODE_V1_2; 2042 } 2043 2044 } else { 2045 /* Versions prior to 1.2 */ 2046 2047 vsw_reset_vnet_proto_ops(ldcp); 2048 } 2049 } 2050 2051 /* 2052 * Reset vnet-protocol-version dependent functions to v1.0. 2053 */ 2054 static void 2055 vsw_reset_vnet_proto_ops(vsw_ldc_t *ldcp) 2056 { 2057 lane_t *lp = &ldcp->lane_out; 2058 2059 ldcp->tx = vsw_ldctx; 2060 ldcp->rx_pktdata = vsw_process_pkt_data_nop; 2061 2062 /* set xfer mode for vsw_send_attr() */ 2063 lp->xfer_mode = VIO_DRING_MODE_V1_0; 2064 } 2065 2066 /* 2067 * Main routine for processing messages received over LDC. 2068 */ 2069 static void 2070 vsw_process_pkt(void *arg) 2071 { 2072 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 2073 vsw_t *vswp = ldcp->ldc_vswp; 2074 size_t msglen; 2075 vio_msg_tag_t *tagp; 2076 uint64_t *ldcmsg; 2077 int rv = 0; 2078 2079 2080 D1(vswp, "%s enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 2081 2082 ASSERT(MUTEX_HELD(&ldcp->ldc_cblock)); 2083 2084 ldcmsg = ldcp->ldcmsg; 2085 /* 2086 * If channel is up read messages until channel is empty. 2087 */ 2088 do { 2089 msglen = ldcp->msglen; 2090 rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen); 2091 2092 if (rv != 0) { 2093 DERR(vswp, "%s :ldc_read err id(%lld) rv(%d) len(%d)\n", 2094 __func__, ldcp->ldc_id, rv, msglen); 2095 } 2096 2097 /* channel has been reset */ 2098 if (rv == ECONNRESET) { 2099 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 2100 break; 2101 } 2102 2103 if (msglen == 0) { 2104 D2(vswp, "%s: ldc_read id(%lld) NODATA", __func__, 2105 ldcp->ldc_id); 2106 break; 2107 } 2108 2109 D2(vswp, "%s: ldc_read id(%lld): msglen(%d)", __func__, 2110 ldcp->ldc_id, msglen); 2111 2112 /* 2113 * Figure out what sort of packet we have gotten by 2114 * examining the msg tag, and then switch it appropriately. 2115 */ 2116 tagp = (vio_msg_tag_t *)ldcmsg; 2117 2118 switch (tagp->vio_msgtype) { 2119 case VIO_TYPE_CTRL: 2120 vsw_dispatch_ctrl_task(ldcp, ldcmsg, tagp); 2121 break; 2122 case VIO_TYPE_DATA: 2123 vsw_process_data_pkt(ldcp, ldcmsg, tagp, msglen); 2124 break; 2125 case VIO_TYPE_ERR: 2126 vsw_process_err_pkt(ldcp, ldcmsg, tagp); 2127 break; 2128 default: 2129 DERR(vswp, "%s: Unknown tag(%lx) ", __func__, 2130 "id(%lx)\n", tagp->vio_msgtype, ldcp->ldc_id); 2131 break; 2132 } 2133 } while (msglen); 2134 2135 D1(vswp, "%s exit: ldcid (%lld)\n", __func__, ldcp->ldc_id); 2136 } 2137 2138 /* 2139 * Dispatch a task to process a VIO control message. 2140 */ 2141 static void 2142 vsw_dispatch_ctrl_task(vsw_ldc_t *ldcp, void *cpkt, vio_msg_tag_t *tagp) 2143 { 2144 vsw_ctrl_task_t *ctaskp = NULL; 2145 vsw_port_t *port = ldcp->ldc_port; 2146 vsw_t *vswp = port->p_vswp; 2147 2148 D1(vswp, "%s: enter", __func__); 2149 2150 /* 2151 * We need to handle RDX ACK messages in-band as once they 2152 * are exchanged it is possible that we will get an 2153 * immediate (legitimate) data packet. 2154 */ 2155 if ((tagp->vio_subtype_env == VIO_RDX) && 2156 (tagp->vio_subtype == VIO_SUBTYPE_ACK)) { 2157 2158 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_ACK_RECV)) 2159 return; 2160 2161 ldcp->lane_in.lstate |= VSW_RDX_ACK_RECV; 2162 D2(vswp, "%s (%ld) handling RDX_ACK in place " 2163 "(ostate 0x%llx : hphase %d)", __func__, 2164 ldcp->ldc_id, ldcp->lane_in.lstate, ldcp->hphase); 2165 vsw_next_milestone(ldcp); 2166 return; 2167 } 2168 2169 ctaskp = kmem_alloc(sizeof (vsw_ctrl_task_t), KM_NOSLEEP); 2170 2171 if (ctaskp == NULL) { 2172 DERR(vswp, "%s: unable to alloc space for ctrl msg", __func__); 2173 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2174 return; 2175 } 2176 2177 ctaskp->ldcp = ldcp; 2178 bcopy((def_msg_t *)cpkt, &ctaskp->pktp, sizeof (def_msg_t)); 2179 ctaskp->hss_id = ldcp->hss_id; 2180 2181 /* 2182 * Dispatch task to processing taskq if port is not in 2183 * the process of being detached. 2184 */ 2185 mutex_enter(&port->state_lock); 2186 if (port->state == VSW_PORT_INIT) { 2187 if ((vswp->taskq_p == NULL) || 2188 (ddi_taskq_dispatch(vswp->taskq_p, vsw_process_ctrl_pkt, 2189 ctaskp, DDI_NOSLEEP) != DDI_SUCCESS)) { 2190 DERR(vswp, "%s: unable to dispatch task to taskq", 2191 __func__); 2192 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2193 mutex_exit(&port->state_lock); 2194 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2195 return; 2196 } 2197 } else { 2198 DWARN(vswp, "%s: port %d detaching, not dispatching " 2199 "task", __func__, port->p_instance); 2200 } 2201 2202 mutex_exit(&port->state_lock); 2203 2204 D2(vswp, "%s: dispatched task to taskq for chan %d", __func__, 2205 ldcp->ldc_id); 2206 D1(vswp, "%s: exit", __func__); 2207 } 2208 2209 /* 2210 * Process a VIO ctrl message. Invoked from taskq. 2211 */ 2212 static void 2213 vsw_process_ctrl_pkt(void *arg) 2214 { 2215 vsw_ctrl_task_t *ctaskp = (vsw_ctrl_task_t *)arg; 2216 vsw_ldc_t *ldcp = ctaskp->ldcp; 2217 vsw_t *vswp = ldcp->ldc_vswp; 2218 vio_msg_tag_t tag; 2219 uint16_t env; 2220 2221 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2222 2223 bcopy(&ctaskp->pktp, &tag, sizeof (vio_msg_tag_t)); 2224 env = tag.vio_subtype_env; 2225 2226 /* stale pkt check */ 2227 if (ctaskp->hss_id < ldcp->hss_id) { 2228 DWARN(vswp, "%s: discarding stale packet belonging to earlier" 2229 " (%ld) handshake session", __func__, ctaskp->hss_id); 2230 return; 2231 } 2232 2233 /* session id check */ 2234 if (ldcp->session_status & VSW_PEER_SESSION) { 2235 if (ldcp->peer_session != tag.vio_sid) { 2236 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 2237 __func__, ldcp->ldc_id, tag.vio_sid); 2238 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2239 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2240 return; 2241 } 2242 } 2243 2244 /* 2245 * Switch on vio_subtype envelope, then let lower routines 2246 * decide if its an INFO, ACK or NACK packet. 2247 */ 2248 switch (env) { 2249 case VIO_VER_INFO: 2250 vsw_process_ctrl_ver_pkt(ldcp, &ctaskp->pktp); 2251 break; 2252 case VIO_DRING_REG: 2253 vsw_process_ctrl_dring_reg_pkt(ldcp, &ctaskp->pktp); 2254 break; 2255 case VIO_DRING_UNREG: 2256 vsw_process_ctrl_dring_unreg_pkt(ldcp, &ctaskp->pktp); 2257 break; 2258 case VIO_ATTR_INFO: 2259 vsw_process_ctrl_attr_pkt(ldcp, &ctaskp->pktp); 2260 break; 2261 case VNET_MCAST_INFO: 2262 vsw_process_ctrl_mcst_pkt(ldcp, &ctaskp->pktp); 2263 break; 2264 case VIO_RDX: 2265 vsw_process_ctrl_rdx_pkt(ldcp, &ctaskp->pktp); 2266 break; 2267 case VIO_DDS_INFO: 2268 vsw_process_dds_msg(vswp, ldcp, &ctaskp->pktp); 2269 break; 2270 default: 2271 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env); 2272 } 2273 2274 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2275 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2276 } 2277 2278 /* 2279 * Version negotiation. We can end up here either because our peer 2280 * has responded to a handshake message we have sent it, or our peer 2281 * has initiated a handshake with us. If its the former then can only 2282 * be ACK or NACK, if its the later can only be INFO. 2283 * 2284 * If its an ACK we move to the next stage of the handshake, namely 2285 * attribute exchange. If its a NACK we see if we can specify another 2286 * version, if we can't we stop. 2287 * 2288 * If it is an INFO we reset all params associated with communication 2289 * in that direction over this channel (remember connection is 2290 * essentially 2 independent simplex channels). 2291 */ 2292 void 2293 vsw_process_ctrl_ver_pkt(vsw_ldc_t *ldcp, void *pkt) 2294 { 2295 vio_ver_msg_t *ver_pkt; 2296 vsw_t *vswp = ldcp->ldc_vswp; 2297 2298 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2299 2300 /* 2301 * We know this is a ctrl/version packet so 2302 * cast it into the correct structure. 2303 */ 2304 ver_pkt = (vio_ver_msg_t *)pkt; 2305 2306 switch (ver_pkt->tag.vio_subtype) { 2307 case VIO_SUBTYPE_INFO: 2308 D2(vswp, "vsw_process_ctrl_ver_pkt: VIO_SUBTYPE_INFO\n"); 2309 2310 /* 2311 * Record the session id, which we will use from now 2312 * until we see another VER_INFO msg. Even then the 2313 * session id in most cases will be unchanged, execpt 2314 * if channel was reset. 2315 */ 2316 if ((ldcp->session_status & VSW_PEER_SESSION) && 2317 (ldcp->peer_session != ver_pkt->tag.vio_sid)) { 2318 DERR(vswp, "%s: updating session id for chan %lld " 2319 "from %llx to %llx", __func__, ldcp->ldc_id, 2320 ldcp->peer_session, ver_pkt->tag.vio_sid); 2321 } 2322 2323 ldcp->peer_session = ver_pkt->tag.vio_sid; 2324 ldcp->session_status |= VSW_PEER_SESSION; 2325 2326 /* Legal message at this time ? */ 2327 if (vsw_check_flag(ldcp, INBOUND, VSW_VER_INFO_RECV)) 2328 return; 2329 2330 /* 2331 * First check the device class. Currently only expect 2332 * to be talking to a network device. In the future may 2333 * also talk to another switch. 2334 */ 2335 if (ver_pkt->dev_class != VDEV_NETWORK) { 2336 DERR(vswp, "%s: illegal device class %d", __func__, 2337 ver_pkt->dev_class); 2338 2339 ver_pkt->tag.vio_sid = ldcp->local_session; 2340 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2341 2342 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2343 2344 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2345 sizeof (vio_ver_msg_t), B_TRUE); 2346 2347 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 2348 vsw_next_milestone(ldcp); 2349 return; 2350 } else { 2351 ldcp->dev_class = ver_pkt->dev_class; 2352 } 2353 2354 /* 2355 * Now check the version. 2356 */ 2357 if (vsw_supported_version(ver_pkt) == 0) { 2358 /* 2359 * Support this major version and possibly 2360 * adjusted minor version. 2361 */ 2362 2363 D2(vswp, "%s: accepted ver %d:%d", __func__, 2364 ver_pkt->ver_major, ver_pkt->ver_minor); 2365 2366 /* Store accepted values */ 2367 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2368 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2369 2370 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2371 2372 ldcp->lane_in.lstate |= VSW_VER_ACK_SENT; 2373 2374 if (vsw_obp_ver_proto_workaround == B_TRUE) { 2375 /* 2376 * Send a version info message 2377 * using the accepted version that 2378 * we are about to ack. Also note that 2379 * we send our ver info before we ack. 2380 * Otherwise, as soon as receiving the 2381 * ack, obp sends attr info msg, which 2382 * breaks vsw_check_flag() invoked 2383 * from vsw_process_ctrl_attr_pkt(); 2384 * as we also need VSW_VER_ACK_RECV to 2385 * be set in lane_out.lstate, before 2386 * we can receive attr info. 2387 */ 2388 vsw_send_ver(ldcp); 2389 } 2390 } else { 2391 /* 2392 * NACK back with the next lower major/minor 2393 * pairing we support (if don't suuport any more 2394 * versions then they will be set to zero. 2395 */ 2396 2397 D2(vswp, "%s: replying with ver %d:%d", __func__, 2398 ver_pkt->ver_major, ver_pkt->ver_minor); 2399 2400 /* Store updated values */ 2401 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2402 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2403 2404 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2405 2406 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 2407 } 2408 2409 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2410 ver_pkt->tag.vio_sid = ldcp->local_session; 2411 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2412 sizeof (vio_ver_msg_t), B_TRUE); 2413 2414 vsw_next_milestone(ldcp); 2415 break; 2416 2417 case VIO_SUBTYPE_ACK: 2418 D2(vswp, "%s: VIO_SUBTYPE_ACK\n", __func__); 2419 2420 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_ACK_RECV)) 2421 return; 2422 2423 /* Store updated values */ 2424 ldcp->lane_out.ver_major = ver_pkt->ver_major; 2425 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 2426 2427 ldcp->lane_out.lstate |= VSW_VER_ACK_RECV; 2428 vsw_next_milestone(ldcp); 2429 2430 break; 2431 2432 case VIO_SUBTYPE_NACK: 2433 D2(vswp, "%s: VIO_SUBTYPE_NACK\n", __func__); 2434 2435 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_NACK_RECV)) 2436 return; 2437 2438 /* 2439 * If our peer sent us a NACK with the ver fields set to 2440 * zero then there is nothing more we can do. Otherwise see 2441 * if we support either the version suggested, or a lesser 2442 * one. 2443 */ 2444 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 2445 DERR(vswp, "%s: peer unable to negotiate any " 2446 "further.", __func__); 2447 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 2448 vsw_next_milestone(ldcp); 2449 return; 2450 } 2451 2452 /* 2453 * Check to see if we support this major version or 2454 * a lower one. If we don't then maj/min will be set 2455 * to zero. 2456 */ 2457 (void) vsw_supported_version(ver_pkt); 2458 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 2459 /* Nothing more we can do */ 2460 DERR(vswp, "%s: version negotiation failed.\n", 2461 __func__); 2462 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 2463 vsw_next_milestone(ldcp); 2464 } else { 2465 /* found a supported major version */ 2466 ldcp->lane_out.ver_major = ver_pkt->ver_major; 2467 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 2468 2469 D2(vswp, "%s: resending with updated values (%x, %x)", 2470 __func__, ver_pkt->ver_major, ver_pkt->ver_minor); 2471 2472 ldcp->lane_out.lstate |= VSW_VER_INFO_SENT; 2473 ver_pkt->tag.vio_sid = ldcp->local_session; 2474 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 2475 2476 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2477 2478 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2479 sizeof (vio_ver_msg_t), B_TRUE); 2480 2481 vsw_next_milestone(ldcp); 2482 2483 } 2484 break; 2485 2486 default: 2487 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2488 ver_pkt->tag.vio_subtype); 2489 } 2490 2491 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 2492 } 2493 2494 /* 2495 * Process an attribute packet. We can end up here either because our peer 2496 * has ACK/NACK'ed back to an earlier ATTR msg we had sent it, or our 2497 * peer has sent us an attribute INFO message 2498 * 2499 * If its an ACK we then move to the next stage of the handshake which 2500 * is to send our descriptor ring info to our peer. If its a NACK then 2501 * there is nothing more we can (currently) do. 2502 * 2503 * If we get a valid/acceptable INFO packet (and we have already negotiated 2504 * a version) we ACK back and set channel state to ATTR_RECV, otherwise we 2505 * NACK back and reset channel state to INACTIV. 2506 * 2507 * FUTURE: in time we will probably negotiate over attributes, but for 2508 * the moment unacceptable attributes are regarded as a fatal error. 2509 * 2510 */ 2511 void 2512 vsw_process_ctrl_attr_pkt(vsw_ldc_t *ldcp, void *pkt) 2513 { 2514 vnet_attr_msg_t *attr_pkt; 2515 vsw_t *vswp = ldcp->ldc_vswp; 2516 vsw_port_t *port = ldcp->ldc_port; 2517 uint64_t macaddr = 0; 2518 int i; 2519 2520 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 2521 2522 /* 2523 * We know this is a ctrl/attr packet so 2524 * cast it into the correct structure. 2525 */ 2526 attr_pkt = (vnet_attr_msg_t *)pkt; 2527 2528 switch (attr_pkt->tag.vio_subtype) { 2529 case VIO_SUBTYPE_INFO: 2530 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2531 2532 if (vsw_check_flag(ldcp, INBOUND, VSW_ATTR_INFO_RECV)) 2533 return; 2534 2535 /* 2536 * If the attributes are unacceptable then we NACK back. 2537 */ 2538 if (vsw_check_attr(attr_pkt, ldcp)) { 2539 2540 DERR(vswp, "%s (chan %d): invalid attributes", 2541 __func__, ldcp->ldc_id); 2542 2543 vsw_free_lane_resources(ldcp, INBOUND); 2544 2545 attr_pkt->tag.vio_sid = ldcp->local_session; 2546 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2547 2548 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 2549 ldcp->lane_in.lstate |= VSW_ATTR_NACK_SENT; 2550 (void) vsw_send_msg(ldcp, (void *)attr_pkt, 2551 sizeof (vnet_attr_msg_t), B_TRUE); 2552 2553 vsw_next_milestone(ldcp); 2554 return; 2555 } 2556 2557 /* 2558 * Otherwise store attributes for this lane and update 2559 * lane state. 2560 */ 2561 ldcp->lane_in.mtu = attr_pkt->mtu; 2562 ldcp->lane_in.addr = attr_pkt->addr; 2563 ldcp->lane_in.addr_type = attr_pkt->addr_type; 2564 ldcp->lane_in.xfer_mode = attr_pkt->xfer_mode; 2565 ldcp->lane_in.ack_freq = attr_pkt->ack_freq; 2566 2567 macaddr = ldcp->lane_in.addr; 2568 for (i = ETHERADDRL - 1; i >= 0; i--) { 2569 port->p_macaddr.ether_addr_octet[i] = macaddr & 0xFF; 2570 macaddr >>= 8; 2571 } 2572 2573 /* create the fdb entry for this port/mac address */ 2574 vsw_fdbe_add(vswp, port); 2575 2576 /* add the port to the specified vlans */ 2577 vsw_vlan_add_ids(port, VSW_VNETPORT); 2578 2579 /* setup device specifc xmit routines */ 2580 mutex_enter(&port->tx_lock); 2581 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 2582 (ldcp->lane_in.xfer_mode & VIO_DRING_MODE_V1_2)) || 2583 (VSW_VER_LT(ldcp, 1, 2) && 2584 (ldcp->lane_in.xfer_mode == VIO_DRING_MODE_V1_0))) { 2585 D2(vswp, "%s: mode = VIO_DRING_MODE", __func__); 2586 port->transmit = vsw_dringsend; 2587 } else if (ldcp->lane_in.xfer_mode == VIO_DESC_MODE) { 2588 D2(vswp, "%s: mode = VIO_DESC_MODE", __func__); 2589 vsw_create_privring(ldcp); 2590 port->transmit = vsw_descrsend; 2591 ldcp->lane_out.xfer_mode = VIO_DESC_MODE; 2592 } 2593 2594 /* 2595 * HybridIO is supported only vnet, not by OBP. 2596 * So, set hio_capable to true only when in DRING mode. 2597 */ 2598 if (VSW_VER_GTEQ(ldcp, 1, 3) && 2599 (ldcp->lane_in.xfer_mode != VIO_DESC_MODE)) { 2600 (void) atomic_swap_32(&port->p_hio_capable, B_TRUE); 2601 } else { 2602 (void) atomic_swap_32(&port->p_hio_capable, B_FALSE); 2603 } 2604 2605 mutex_exit(&port->tx_lock); 2606 2607 attr_pkt->tag.vio_sid = ldcp->local_session; 2608 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2609 2610 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 2611 2612 ldcp->lane_in.lstate |= VSW_ATTR_ACK_SENT; 2613 2614 (void) vsw_send_msg(ldcp, (void *)attr_pkt, 2615 sizeof (vnet_attr_msg_t), B_TRUE); 2616 2617 vsw_next_milestone(ldcp); 2618 break; 2619 2620 case VIO_SUBTYPE_ACK: 2621 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2622 2623 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_ACK_RECV)) 2624 return; 2625 2626 ldcp->lane_out.lstate |= VSW_ATTR_ACK_RECV; 2627 vsw_next_milestone(ldcp); 2628 break; 2629 2630 case VIO_SUBTYPE_NACK: 2631 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2632 2633 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_NACK_RECV)) 2634 return; 2635 2636 ldcp->lane_out.lstate |= VSW_ATTR_NACK_RECV; 2637 vsw_next_milestone(ldcp); 2638 break; 2639 2640 default: 2641 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2642 attr_pkt->tag.vio_subtype); 2643 } 2644 2645 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 2646 } 2647 2648 /* 2649 * Process a dring info packet. We can end up here either because our peer 2650 * has ACK/NACK'ed back to an earlier DRING msg we had sent it, or our 2651 * peer has sent us a dring INFO message. 2652 * 2653 * If we get a valid/acceptable INFO packet (and we have already negotiated 2654 * a version) we ACK back and update the lane state, otherwise we NACK back. 2655 * 2656 * FUTURE: nothing to stop client from sending us info on multiple dring's 2657 * but for the moment we will just use the first one we are given. 2658 * 2659 */ 2660 void 2661 vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *ldcp, void *pkt) 2662 { 2663 vio_dring_reg_msg_t *dring_pkt; 2664 vsw_t *vswp = ldcp->ldc_vswp; 2665 ldc_mem_info_t minfo; 2666 dring_info_t *dp, *dbp; 2667 int dring_found = 0; 2668 2669 /* 2670 * We know this is a ctrl/dring packet so 2671 * cast it into the correct structure. 2672 */ 2673 dring_pkt = (vio_dring_reg_msg_t *)pkt; 2674 2675 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 2676 2677 switch (dring_pkt->tag.vio_subtype) { 2678 case VIO_SUBTYPE_INFO: 2679 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2680 2681 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 2682 return; 2683 2684 /* 2685 * If the dring params are unacceptable then we NACK back. 2686 */ 2687 if (vsw_check_dring_info(dring_pkt)) { 2688 2689 DERR(vswp, "%s (%lld): invalid dring info", 2690 __func__, ldcp->ldc_id); 2691 2692 vsw_free_lane_resources(ldcp, INBOUND); 2693 2694 dring_pkt->tag.vio_sid = ldcp->local_session; 2695 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2696 2697 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2698 2699 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2700 2701 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2702 sizeof (vio_dring_reg_msg_t), B_TRUE); 2703 2704 vsw_next_milestone(ldcp); 2705 return; 2706 } 2707 2708 /* 2709 * Otherwise, attempt to map in the dring using the 2710 * cookie. If that succeeds we send back a unique dring 2711 * identifier that the sending side will use in future 2712 * to refer to this descriptor ring. 2713 */ 2714 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 2715 2716 dp->num_descriptors = dring_pkt->num_descriptors; 2717 dp->descriptor_size = dring_pkt->descriptor_size; 2718 dp->options = dring_pkt->options; 2719 dp->ncookies = dring_pkt->ncookies; 2720 2721 /* 2722 * Note: should only get one cookie. Enforced in 2723 * the ldc layer. 2724 */ 2725 bcopy(&dring_pkt->cookie[0], &dp->cookie[0], 2726 sizeof (ldc_mem_cookie_t)); 2727 2728 D2(vswp, "%s: num_desc %ld : desc_size %ld", __func__, 2729 dp->num_descriptors, dp->descriptor_size); 2730 D2(vswp, "%s: options 0x%lx: ncookies %ld", __func__, 2731 dp->options, dp->ncookies); 2732 2733 if ((ldc_mem_dring_map(ldcp->ldc_handle, &dp->cookie[0], 2734 dp->ncookies, dp->num_descriptors, dp->descriptor_size, 2735 LDC_SHADOW_MAP, &(dp->handle))) != 0) { 2736 2737 DERR(vswp, "%s: dring_map failed\n", __func__); 2738 2739 kmem_free(dp, sizeof (dring_info_t)); 2740 vsw_free_lane_resources(ldcp, INBOUND); 2741 2742 dring_pkt->tag.vio_sid = ldcp->local_session; 2743 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2744 2745 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2746 2747 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2748 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2749 sizeof (vio_dring_reg_msg_t), B_TRUE); 2750 2751 vsw_next_milestone(ldcp); 2752 return; 2753 } 2754 2755 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 2756 2757 DERR(vswp, "%s: dring_addr failed\n", __func__); 2758 2759 kmem_free(dp, sizeof (dring_info_t)); 2760 vsw_free_lane_resources(ldcp, INBOUND); 2761 2762 dring_pkt->tag.vio_sid = ldcp->local_session; 2763 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2764 2765 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2766 2767 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2768 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2769 sizeof (vio_dring_reg_msg_t), B_TRUE); 2770 2771 vsw_next_milestone(ldcp); 2772 return; 2773 } else { 2774 /* store the address of the pub part of ring */ 2775 dp->pub_addr = minfo.vaddr; 2776 } 2777 2778 /* no private section as we are importing */ 2779 dp->priv_addr = NULL; 2780 2781 /* 2782 * Using simple mono increasing int for ident at 2783 * the moment. 2784 */ 2785 dp->ident = ldcp->next_ident; 2786 ldcp->next_ident++; 2787 2788 dp->end_idx = 0; 2789 dp->next = NULL; 2790 2791 /* 2792 * Link it onto the end of the list of drings 2793 * for this lane. 2794 */ 2795 if (ldcp->lane_in.dringp == NULL) { 2796 D2(vswp, "%s: adding first INBOUND dring", __func__); 2797 ldcp->lane_in.dringp = dp; 2798 } else { 2799 dbp = ldcp->lane_in.dringp; 2800 2801 while (dbp->next != NULL) 2802 dbp = dbp->next; 2803 2804 dbp->next = dp; 2805 } 2806 2807 /* acknowledge it */ 2808 dring_pkt->tag.vio_sid = ldcp->local_session; 2809 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2810 dring_pkt->dring_ident = dp->ident; 2811 2812 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2813 sizeof (vio_dring_reg_msg_t), B_TRUE); 2814 2815 ldcp->lane_in.lstate |= VSW_DRING_ACK_SENT; 2816 vsw_next_milestone(ldcp); 2817 break; 2818 2819 case VIO_SUBTYPE_ACK: 2820 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2821 2822 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_ACK_RECV)) 2823 return; 2824 2825 /* 2826 * Peer is acknowledging our dring info and will have 2827 * sent us a dring identifier which we will use to 2828 * refer to this ring w.r.t. our peer. 2829 */ 2830 dp = ldcp->lane_out.dringp; 2831 if (dp != NULL) { 2832 /* 2833 * Find the ring this ident should be associated 2834 * with. 2835 */ 2836 if (vsw_dring_match(dp, dring_pkt)) { 2837 dring_found = 1; 2838 2839 } else while (dp != NULL) { 2840 if (vsw_dring_match(dp, dring_pkt)) { 2841 dring_found = 1; 2842 break; 2843 } 2844 dp = dp->next; 2845 } 2846 2847 if (dring_found == 0) { 2848 DERR(NULL, "%s: unrecognised ring cookie", 2849 __func__); 2850 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2851 return; 2852 } 2853 2854 } else { 2855 DERR(vswp, "%s: DRING ACK received but no drings " 2856 "allocated", __func__); 2857 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2858 return; 2859 } 2860 2861 /* store ident */ 2862 dp->ident = dring_pkt->dring_ident; 2863 ldcp->lane_out.lstate |= VSW_DRING_ACK_RECV; 2864 vsw_next_milestone(ldcp); 2865 break; 2866 2867 case VIO_SUBTYPE_NACK: 2868 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2869 2870 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_NACK_RECV)) 2871 return; 2872 2873 ldcp->lane_out.lstate |= VSW_DRING_NACK_RECV; 2874 vsw_next_milestone(ldcp); 2875 break; 2876 2877 default: 2878 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 2879 dring_pkt->tag.vio_subtype); 2880 } 2881 2882 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 2883 } 2884 2885 /* 2886 * Process a request from peer to unregister a dring. 2887 * 2888 * For the moment we just restart the handshake if our 2889 * peer endpoint attempts to unregister a dring. 2890 */ 2891 void 2892 vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *ldcp, void *pkt) 2893 { 2894 vsw_t *vswp = ldcp->ldc_vswp; 2895 vio_dring_unreg_msg_t *dring_pkt; 2896 2897 /* 2898 * We know this is a ctrl/dring packet so 2899 * cast it into the correct structure. 2900 */ 2901 dring_pkt = (vio_dring_unreg_msg_t *)pkt; 2902 2903 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2904 2905 switch (dring_pkt->tag.vio_subtype) { 2906 case VIO_SUBTYPE_INFO: 2907 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2908 2909 DWARN(vswp, "%s: restarting handshake..", __func__); 2910 break; 2911 2912 case VIO_SUBTYPE_ACK: 2913 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2914 2915 DWARN(vswp, "%s: restarting handshake..", __func__); 2916 break; 2917 2918 case VIO_SUBTYPE_NACK: 2919 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2920 2921 DWARN(vswp, "%s: restarting handshake..", __func__); 2922 break; 2923 2924 default: 2925 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 2926 dring_pkt->tag.vio_subtype); 2927 } 2928 2929 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2930 2931 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2932 } 2933 2934 #define SND_MCST_NACK(ldcp, pkt) \ 2935 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 2936 pkt->tag.vio_sid = ldcp->local_session; \ 2937 (void) vsw_send_msg(ldcp, (void *)pkt, \ 2938 sizeof (vnet_mcast_msg_t), B_TRUE); 2939 2940 /* 2941 * Process a multicast request from a vnet. 2942 * 2943 * Vnet's specify a multicast address that they are interested in. This 2944 * address is used as a key into the hash table which forms the multicast 2945 * forwarding database (mFDB). 2946 * 2947 * The table keys are the multicast addresses, while the table entries 2948 * are pointers to lists of ports which wish to receive packets for the 2949 * specified multicast address. 2950 * 2951 * When a multicast packet is being switched we use the address as a key 2952 * into the hash table, and then walk the appropriate port list forwarding 2953 * the pkt to each port in turn. 2954 * 2955 * If a vnet is no longer interested in a particular multicast grouping 2956 * we simply find the correct location in the hash table and then delete 2957 * the relevant port from the port list. 2958 * 2959 * To deal with the case whereby a port is being deleted without first 2960 * removing itself from the lists in the hash table, we maintain a list 2961 * of multicast addresses the port has registered an interest in, within 2962 * the port structure itself. We then simply walk that list of addresses 2963 * using them as keys into the hash table and remove the port from the 2964 * appropriate lists. 2965 */ 2966 static void 2967 vsw_process_ctrl_mcst_pkt(vsw_ldc_t *ldcp, void *pkt) 2968 { 2969 vnet_mcast_msg_t *mcst_pkt; 2970 vsw_port_t *port = ldcp->ldc_port; 2971 vsw_t *vswp = ldcp->ldc_vswp; 2972 int i; 2973 2974 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2975 2976 /* 2977 * We know this is a ctrl/mcast packet so 2978 * cast it into the correct structure. 2979 */ 2980 mcst_pkt = (vnet_mcast_msg_t *)pkt; 2981 2982 switch (mcst_pkt->tag.vio_subtype) { 2983 case VIO_SUBTYPE_INFO: 2984 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2985 2986 /* 2987 * Check if in correct state to receive a multicast 2988 * message (i.e. handshake complete). If not reset 2989 * the handshake. 2990 */ 2991 if (vsw_check_flag(ldcp, INBOUND, VSW_MCST_INFO_RECV)) 2992 return; 2993 2994 /* 2995 * Before attempting to add or remove address check 2996 * that they are valid multicast addresses. 2997 * If not, then NACK back. 2998 */ 2999 for (i = 0; i < mcst_pkt->count; i++) { 3000 if ((mcst_pkt->mca[i].ether_addr_octet[0] & 01) != 1) { 3001 DERR(vswp, "%s: invalid multicast address", 3002 __func__); 3003 SND_MCST_NACK(ldcp, mcst_pkt); 3004 return; 3005 } 3006 } 3007 3008 /* 3009 * Now add/remove the addresses. If this fails we 3010 * NACK back. 3011 */ 3012 if (vsw_add_rem_mcst(mcst_pkt, port) != 0) { 3013 SND_MCST_NACK(ldcp, mcst_pkt); 3014 return; 3015 } 3016 3017 mcst_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3018 mcst_pkt->tag.vio_sid = ldcp->local_session; 3019 3020 DUMP_TAG_PTR((vio_msg_tag_t *)mcst_pkt); 3021 3022 (void) vsw_send_msg(ldcp, (void *)mcst_pkt, 3023 sizeof (vnet_mcast_msg_t), B_TRUE); 3024 break; 3025 3026 case VIO_SUBTYPE_ACK: 3027 DWARN(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3028 3029 /* 3030 * We shouldn't ever get a multicast ACK message as 3031 * at the moment we never request multicast addresses 3032 * to be set on some other device. This may change in 3033 * the future if we have cascading switches. 3034 */ 3035 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_ACK_RECV)) 3036 return; 3037 3038 /* Do nothing */ 3039 break; 3040 3041 case VIO_SUBTYPE_NACK: 3042 DWARN(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3043 3044 /* 3045 * We shouldn't get a multicast NACK packet for the 3046 * same reasons as we shouldn't get a ACK packet. 3047 */ 3048 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_NACK_RECV)) 3049 return; 3050 3051 /* Do nothing */ 3052 break; 3053 3054 default: 3055 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 3056 mcst_pkt->tag.vio_subtype); 3057 } 3058 3059 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3060 } 3061 3062 static void 3063 vsw_process_ctrl_rdx_pkt(vsw_ldc_t *ldcp, void *pkt) 3064 { 3065 vio_rdx_msg_t *rdx_pkt; 3066 vsw_t *vswp = ldcp->ldc_vswp; 3067 3068 /* 3069 * We know this is a ctrl/rdx packet so 3070 * cast it into the correct structure. 3071 */ 3072 rdx_pkt = (vio_rdx_msg_t *)pkt; 3073 3074 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 3075 3076 switch (rdx_pkt->tag.vio_subtype) { 3077 case VIO_SUBTYPE_INFO: 3078 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3079 3080 if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_INFO_RECV)) 3081 return; 3082 3083 rdx_pkt->tag.vio_sid = ldcp->local_session; 3084 rdx_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3085 3086 DUMP_TAG_PTR((vio_msg_tag_t *)rdx_pkt); 3087 3088 ldcp->lane_out.lstate |= VSW_RDX_ACK_SENT; 3089 3090 (void) vsw_send_msg(ldcp, (void *)rdx_pkt, 3091 sizeof (vio_rdx_msg_t), B_TRUE); 3092 3093 vsw_next_milestone(ldcp); 3094 break; 3095 3096 case VIO_SUBTYPE_ACK: 3097 /* 3098 * Should be handled in-band by callback handler. 3099 */ 3100 DERR(vswp, "%s: Unexpected VIO_SUBTYPE_ACK", __func__); 3101 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3102 break; 3103 3104 case VIO_SUBTYPE_NACK: 3105 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3106 3107 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_NACK_RECV)) 3108 return; 3109 3110 ldcp->lane_in.lstate |= VSW_RDX_NACK_RECV; 3111 vsw_next_milestone(ldcp); 3112 break; 3113 3114 default: 3115 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3116 rdx_pkt->tag.vio_subtype); 3117 } 3118 3119 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3120 } 3121 3122 static void 3123 vsw_process_data_pkt(vsw_ldc_t *ldcp, void *dpkt, vio_msg_tag_t *tagp, 3124 uint32_t msglen) 3125 { 3126 uint16_t env = tagp->vio_subtype_env; 3127 vsw_t *vswp = ldcp->ldc_vswp; 3128 3129 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3130 3131 /* session id check */ 3132 if (ldcp->session_status & VSW_PEER_SESSION) { 3133 if (ldcp->peer_session != tagp->vio_sid) { 3134 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 3135 __func__, ldcp->ldc_id, tagp->vio_sid); 3136 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3137 return; 3138 } 3139 } 3140 3141 /* 3142 * It is an error for us to be getting data packets 3143 * before the handshake has completed. 3144 */ 3145 if (ldcp->hphase != VSW_MILESTONE4) { 3146 DERR(vswp, "%s: got data packet before handshake complete " 3147 "hphase %d (%x: %x)", __func__, ldcp->hphase, 3148 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 3149 DUMP_FLAGS(ldcp->lane_in.lstate); 3150 DUMP_FLAGS(ldcp->lane_out.lstate); 3151 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3152 return; 3153 } 3154 3155 /* 3156 * To reduce the locking contention, release the 3157 * ldc_cblock here and re-acquire it once we are done 3158 * receiving packets. 3159 */ 3160 mutex_exit(&ldcp->ldc_cblock); 3161 mutex_enter(&ldcp->ldc_rxlock); 3162 3163 /* 3164 * Switch on vio_subtype envelope, then let lower routines 3165 * decide if its an INFO, ACK or NACK packet. 3166 */ 3167 if (env == VIO_DRING_DATA) { 3168 vsw_process_data_dring_pkt(ldcp, dpkt); 3169 } else if (env == VIO_PKT_DATA) { 3170 ldcp->rx_pktdata(ldcp, dpkt, msglen); 3171 } else if (env == VIO_DESC_DATA) { 3172 vsw_process_data_ibnd_pkt(ldcp, dpkt); 3173 } else { 3174 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env); 3175 } 3176 3177 mutex_exit(&ldcp->ldc_rxlock); 3178 mutex_enter(&ldcp->ldc_cblock); 3179 3180 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3181 } 3182 3183 #define SND_DRING_NACK(ldcp, pkt) \ 3184 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 3185 pkt->tag.vio_sid = ldcp->local_session; \ 3186 (void) vsw_send_msg(ldcp, (void *)pkt, \ 3187 sizeof (vio_dring_msg_t), B_TRUE); 3188 3189 static void 3190 vsw_process_data_dring_pkt(vsw_ldc_t *ldcp, void *dpkt) 3191 { 3192 vio_dring_msg_t *dring_pkt; 3193 vnet_public_desc_t *pub_addr = NULL; 3194 vsw_private_desc_t *priv_addr = NULL; 3195 dring_info_t *dp = NULL; 3196 vsw_t *vswp = ldcp->ldc_vswp; 3197 mblk_t *mp = NULL; 3198 mblk_t *bp = NULL; 3199 mblk_t *bpt = NULL; 3200 size_t nbytes = 0; 3201 uint64_t ncookies = 0; 3202 uint64_t chain = 0; 3203 uint64_t len; 3204 uint32_t pos, start, datalen; 3205 uint32_t range_start, range_end; 3206 int32_t end, num, cnt = 0; 3207 int i, rv, msg_rv = 0; 3208 boolean_t ack_needed = B_FALSE; 3209 boolean_t prev_desc_ack = B_FALSE; 3210 int read_attempts = 0; 3211 struct ether_header *ehp; 3212 3213 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3214 3215 /* 3216 * We know this is a data/dring packet so 3217 * cast it into the correct structure. 3218 */ 3219 dring_pkt = (vio_dring_msg_t *)dpkt; 3220 3221 /* 3222 * Switch on the vio_subtype. If its INFO then we need to 3223 * process the data. If its an ACK we need to make sure 3224 * it makes sense (i.e did we send an earlier data/info), 3225 * and if its a NACK then we maybe attempt a retry. 3226 */ 3227 switch (dring_pkt->tag.vio_subtype) { 3228 case VIO_SUBTYPE_INFO: 3229 D2(vswp, "%s(%lld): VIO_SUBTYPE_INFO", __func__, ldcp->ldc_id); 3230 3231 READ_ENTER(&ldcp->lane_in.dlistrw); 3232 if ((dp = vsw_ident2dring(&ldcp->lane_in, 3233 dring_pkt->dring_ident)) == NULL) { 3234 RW_EXIT(&ldcp->lane_in.dlistrw); 3235 3236 DERR(vswp, "%s(%lld): unable to find dring from " 3237 "ident 0x%llx", __func__, ldcp->ldc_id, 3238 dring_pkt->dring_ident); 3239 3240 SND_DRING_NACK(ldcp, dring_pkt); 3241 return; 3242 } 3243 3244 start = pos = dring_pkt->start_idx; 3245 end = dring_pkt->end_idx; 3246 len = dp->num_descriptors; 3247 3248 range_start = range_end = pos; 3249 3250 D2(vswp, "%s(%lld): start index %ld : end %ld\n", 3251 __func__, ldcp->ldc_id, start, end); 3252 3253 if (end == -1) { 3254 num = -1; 3255 } else if (end >= 0) { 3256 num = end >= pos ? end - pos + 1: (len - pos + 1) + end; 3257 3258 /* basic sanity check */ 3259 if (end > len) { 3260 RW_EXIT(&ldcp->lane_in.dlistrw); 3261 DERR(vswp, "%s(%lld): endpoint %lld outside " 3262 "ring length %lld", __func__, 3263 ldcp->ldc_id, end, len); 3264 3265 SND_DRING_NACK(ldcp, dring_pkt); 3266 return; 3267 } 3268 } else { 3269 RW_EXIT(&ldcp->lane_in.dlistrw); 3270 DERR(vswp, "%s(%lld): invalid endpoint %lld", 3271 __func__, ldcp->ldc_id, end); 3272 SND_DRING_NACK(ldcp, dring_pkt); 3273 return; 3274 } 3275 3276 while (cnt != num) { 3277 vsw_recheck_desc: 3278 if ((rv = ldc_mem_dring_acquire(dp->handle, 3279 pos, pos)) != 0) { 3280 RW_EXIT(&ldcp->lane_in.dlistrw); 3281 DERR(vswp, "%s(%lld): unable to acquire " 3282 "descriptor at pos %d: err %d", 3283 __func__, pos, ldcp->ldc_id, rv); 3284 SND_DRING_NACK(ldcp, dring_pkt); 3285 ldcp->ldc_stats.ierrors++; 3286 return; 3287 } 3288 3289 pub_addr = (vnet_public_desc_t *)dp->pub_addr + pos; 3290 3291 /* 3292 * When given a bounded range of descriptors 3293 * to process, its an error to hit a descriptor 3294 * which is not ready. In the non-bounded case 3295 * (end_idx == -1) this simply indicates we have 3296 * reached the end of the current active range. 3297 */ 3298 if (pub_addr->hdr.dstate != VIO_DESC_READY) { 3299 /* unbound - no error */ 3300 if (end == -1) { 3301 if (read_attempts == vsw_read_attempts) 3302 break; 3303 3304 delay(drv_usectohz(vsw_desc_delay)); 3305 read_attempts++; 3306 goto vsw_recheck_desc; 3307 } 3308 3309 /* bounded - error - so NACK back */ 3310 RW_EXIT(&ldcp->lane_in.dlistrw); 3311 DERR(vswp, "%s(%lld): descriptor not READY " 3312 "(%d)", __func__, ldcp->ldc_id, 3313 pub_addr->hdr.dstate); 3314 SND_DRING_NACK(ldcp, dring_pkt); 3315 return; 3316 } 3317 3318 DTRACE_PROBE1(read_attempts, int, read_attempts); 3319 3320 range_end = pos; 3321 3322 /* 3323 * If we ACK'd the previous descriptor then now 3324 * record the new range start position for later 3325 * ACK's. 3326 */ 3327 if (prev_desc_ack) { 3328 range_start = pos; 3329 3330 D2(vswp, "%s(%lld): updating range start to be " 3331 "%d", __func__, ldcp->ldc_id, range_start); 3332 3333 prev_desc_ack = B_FALSE; 3334 } 3335 3336 /* 3337 * Data is padded to align on 8 byte boundary, 3338 * datalen is actual data length, i.e. minus that 3339 * padding. 3340 */ 3341 datalen = pub_addr->nbytes; 3342 3343 /* 3344 * Does peer wish us to ACK when we have finished 3345 * with this descriptor ? 3346 */ 3347 if (pub_addr->hdr.ack) 3348 ack_needed = B_TRUE; 3349 3350 D2(vswp, "%s(%lld): processing desc %lld at pos" 3351 " 0x%llx : dstate 0x%lx : datalen 0x%lx", 3352 __func__, ldcp->ldc_id, pos, pub_addr, 3353 pub_addr->hdr.dstate, datalen); 3354 3355 /* 3356 * Mark that we are starting to process descriptor. 3357 */ 3358 pub_addr->hdr.dstate = VIO_DESC_ACCEPTED; 3359 3360 /* 3361 * Ensure that we ask ldc for an aligned 3362 * number of bytes. 3363 */ 3364 nbytes = (datalen + VNET_IPALIGN + 7) & ~7; 3365 3366 mp = vio_multipool_allocb(&ldcp->vmp, nbytes); 3367 if (mp == NULL) { 3368 ldcp->ldc_stats.rx_vio_allocb_fail++; 3369 /* 3370 * No free receive buffers available, so 3371 * fallback onto allocb(9F). Make sure that 3372 * we get a data buffer which is a multiple 3373 * of 8 as this is required by ldc_mem_copy. 3374 */ 3375 DTRACE_PROBE(allocb); 3376 if ((mp = allocb(datalen + VNET_IPALIGN + 8, 3377 BPRI_MED)) == NULL) { 3378 DERR(vswp, "%s(%ld): allocb failed", 3379 __func__, ldcp->ldc_id); 3380 pub_addr->hdr.dstate = VIO_DESC_DONE; 3381 (void) ldc_mem_dring_release(dp->handle, 3382 pos, pos); 3383 ldcp->ldc_stats.ierrors++; 3384 ldcp->ldc_stats.rx_allocb_fail++; 3385 break; 3386 } 3387 } 3388 3389 ncookies = pub_addr->ncookies; 3390 rv = ldc_mem_copy(ldcp->ldc_handle, 3391 (caddr_t)mp->b_rptr, 0, &nbytes, 3392 pub_addr->memcookie, ncookies, LDC_COPY_IN); 3393 3394 if (rv != 0) { 3395 DERR(vswp, "%s(%d): unable to copy in data " 3396 "from %d cookies in desc %d (rv %d)", 3397 __func__, ldcp->ldc_id, ncookies, pos, rv); 3398 freemsg(mp); 3399 3400 pub_addr->hdr.dstate = VIO_DESC_DONE; 3401 (void) ldc_mem_dring_release(dp->handle, 3402 pos, pos); 3403 ldcp->ldc_stats.ierrors++; 3404 break; 3405 } else { 3406 D2(vswp, "%s(%d): copied in %ld bytes" 3407 " using %d cookies", __func__, 3408 ldcp->ldc_id, nbytes, ncookies); 3409 } 3410 3411 /* adjust the read pointer to skip over the padding */ 3412 mp->b_rptr += VNET_IPALIGN; 3413 3414 /* point to the actual end of data */ 3415 mp->b_wptr = mp->b_rptr + datalen; 3416 3417 /* update statistics */ 3418 ehp = (struct ether_header *)mp->b_rptr; 3419 if (IS_BROADCAST(ehp)) 3420 ldcp->ldc_stats.brdcstrcv++; 3421 else if (IS_MULTICAST(ehp)) 3422 ldcp->ldc_stats.multircv++; 3423 3424 ldcp->ldc_stats.ipackets++; 3425 ldcp->ldc_stats.rbytes += datalen; 3426 3427 /* 3428 * IPALIGN space can be used for VLAN_TAG 3429 */ 3430 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, 3431 VSW_VNETPORT, mp); 3432 3433 /* build a chain of received packets */ 3434 if (bp == NULL) { 3435 /* first pkt */ 3436 bp = mp; 3437 bp->b_next = bp->b_prev = NULL; 3438 bpt = bp; 3439 chain = 1; 3440 } else { 3441 mp->b_next = mp->b_prev = NULL; 3442 bpt->b_next = mp; 3443 bpt = mp; 3444 chain++; 3445 } 3446 3447 /* mark we are finished with this descriptor */ 3448 pub_addr->hdr.dstate = VIO_DESC_DONE; 3449 3450 (void) ldc_mem_dring_release(dp->handle, pos, pos); 3451 3452 /* 3453 * Send an ACK back to peer if requested. 3454 */ 3455 if (ack_needed) { 3456 ack_needed = B_FALSE; 3457 3458 dring_pkt->start_idx = range_start; 3459 dring_pkt->end_idx = range_end; 3460 3461 DERR(vswp, "%s(%lld): processed %d %d, ACK" 3462 " requested", __func__, ldcp->ldc_id, 3463 dring_pkt->start_idx, dring_pkt->end_idx); 3464 3465 dring_pkt->dring_process_state = VIO_DP_ACTIVE; 3466 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3467 dring_pkt->tag.vio_sid = ldcp->local_session; 3468 3469 msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt, 3470 sizeof (vio_dring_msg_t), B_FALSE); 3471 3472 /* 3473 * Check if ACK was successfully sent. If not 3474 * we break and deal with that below. 3475 */ 3476 if (msg_rv != 0) 3477 break; 3478 3479 prev_desc_ack = B_TRUE; 3480 range_start = pos; 3481 } 3482 3483 /* next descriptor */ 3484 pos = (pos + 1) % len; 3485 cnt++; 3486 3487 /* 3488 * Break out of loop here and stop processing to 3489 * allow some other network device (or disk) to 3490 * get access to the cpu. 3491 */ 3492 if (chain > vsw_chain_len) { 3493 D3(vswp, "%s(%lld): switching chain of %d " 3494 "msgs", __func__, ldcp->ldc_id, chain); 3495 break; 3496 } 3497 } 3498 RW_EXIT(&ldcp->lane_in.dlistrw); 3499 3500 /* 3501 * If when we attempted to send the ACK we found that the 3502 * channel had been reset then now handle this. We deal with 3503 * it here as we cannot reset the channel while holding the 3504 * dlistrw lock, and we don't want to acquire/release it 3505 * continuously in the above loop, as a channel reset should 3506 * be a rare event. 3507 */ 3508 if (msg_rv == ECONNRESET) { 3509 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 3510 break; 3511 } 3512 3513 /* send the chain of packets to be switched */ 3514 if (bp != NULL) { 3515 DTRACE_PROBE1(vsw_rcv_msgs, int, chain); 3516 D3(vswp, "%s(%lld): switching chain of %d msgs", 3517 __func__, ldcp->ldc_id, chain); 3518 vswp->vsw_switch_frame(vswp, bp, VSW_VNETPORT, 3519 ldcp->ldc_port, NULL); 3520 } 3521 3522 DTRACE_PROBE1(msg_cnt, int, cnt); 3523 3524 /* 3525 * We are now finished so ACK back with the state 3526 * set to STOPPING so our peer knows we are finished 3527 */ 3528 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3529 dring_pkt->tag.vio_sid = ldcp->local_session; 3530 3531 dring_pkt->dring_process_state = VIO_DP_STOPPED; 3532 3533 DTRACE_PROBE(stop_process_sent); 3534 3535 /* 3536 * We have not processed any more descriptors beyond 3537 * the last one we ACK'd. 3538 */ 3539 if (prev_desc_ack) 3540 range_start = range_end; 3541 3542 dring_pkt->start_idx = range_start; 3543 dring_pkt->end_idx = range_end; 3544 3545 D2(vswp, "%s(%lld) processed : %d : %d, now stopping", 3546 __func__, ldcp->ldc_id, dring_pkt->start_idx, 3547 dring_pkt->end_idx); 3548 3549 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 3550 sizeof (vio_dring_msg_t), B_TRUE); 3551 break; 3552 3553 case VIO_SUBTYPE_ACK: 3554 D2(vswp, "%s(%lld): VIO_SUBTYPE_ACK", __func__, ldcp->ldc_id); 3555 /* 3556 * Verify that the relevant descriptors are all 3557 * marked as DONE 3558 */ 3559 READ_ENTER(&ldcp->lane_out.dlistrw); 3560 if ((dp = vsw_ident2dring(&ldcp->lane_out, 3561 dring_pkt->dring_ident)) == NULL) { 3562 RW_EXIT(&ldcp->lane_out.dlistrw); 3563 DERR(vswp, "%s: unknown ident in ACK", __func__); 3564 return; 3565 } 3566 3567 start = end = 0; 3568 start = dring_pkt->start_idx; 3569 end = dring_pkt->end_idx; 3570 len = dp->num_descriptors; 3571 3572 3573 mutex_enter(&dp->dlock); 3574 dp->last_ack_recv = end; 3575 ldcp->ldc_stats.dring_data_acks++; 3576 mutex_exit(&dp->dlock); 3577 3578 (void) vsw_reclaim_dring(dp, start); 3579 3580 /* 3581 * If our peer is stopping processing descriptors then 3582 * we check to make sure it has processed all the descriptors 3583 * we have updated. If not then we send it a new message 3584 * to prompt it to restart. 3585 */ 3586 if (dring_pkt->dring_process_state == VIO_DP_STOPPED) { 3587 DTRACE_PROBE(stop_process_recv); 3588 D2(vswp, "%s(%lld): got stopping msg : %d : %d", 3589 __func__, ldcp->ldc_id, dring_pkt->start_idx, 3590 dring_pkt->end_idx); 3591 3592 /* 3593 * Check next descriptor in public section of ring. 3594 * If its marked as READY then we need to prompt our 3595 * peer to start processing the ring again. 3596 */ 3597 i = (end + 1) % len; 3598 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 3599 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 3600 3601 /* 3602 * Hold the restart lock across all of this to 3603 * make sure that its not possible for us to 3604 * decide that a msg needs to be sent in the future 3605 * but the sending code having already checked is 3606 * about to exit. 3607 */ 3608 mutex_enter(&dp->restart_lock); 3609 ldcp->ldc_stats.dring_stopped_acks++; 3610 mutex_enter(&priv_addr->dstate_lock); 3611 if (pub_addr->hdr.dstate == VIO_DESC_READY) { 3612 3613 mutex_exit(&priv_addr->dstate_lock); 3614 3615 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 3616 dring_pkt->tag.vio_sid = ldcp->local_session; 3617 3618 dring_pkt->start_idx = (end + 1) % len; 3619 dring_pkt->end_idx = -1; 3620 3621 D2(vswp, "%s(%lld) : sending restart msg:" 3622 " %d : %d", __func__, ldcp->ldc_id, 3623 dring_pkt->start_idx, dring_pkt->end_idx); 3624 3625 msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt, 3626 sizeof (vio_dring_msg_t), B_FALSE); 3627 ldcp->ldc_stats.dring_data_msgs++; 3628 3629 } else { 3630 mutex_exit(&priv_addr->dstate_lock); 3631 dp->restart_reqd = B_TRUE; 3632 } 3633 mutex_exit(&dp->restart_lock); 3634 } 3635 RW_EXIT(&ldcp->lane_out.dlistrw); 3636 3637 /* only do channel reset after dropping dlistrw lock */ 3638 if (msg_rv == ECONNRESET) 3639 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 3640 3641 break; 3642 3643 case VIO_SUBTYPE_NACK: 3644 DWARN(vswp, "%s(%lld): VIO_SUBTYPE_NACK", 3645 __func__, ldcp->ldc_id); 3646 /* 3647 * Something is badly wrong if we are getting NACK's 3648 * for our data pkts. So reset the channel. 3649 */ 3650 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3651 3652 break; 3653 3654 default: 3655 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 3656 ldcp->ldc_id, dring_pkt->tag.vio_subtype); 3657 } 3658 3659 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3660 } 3661 3662 /* 3663 * dummy pkt data handler function for vnet protocol version 1.0 3664 */ 3665 static void 3666 vsw_process_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen) 3667 { 3668 _NOTE(ARGUNUSED(arg1, arg2, msglen)) 3669 } 3670 3671 /* 3672 * This function handles raw pkt data messages received over the channel. 3673 * Currently, only priority-eth-type frames are received through this mechanism. 3674 * In this case, the frame(data) is present within the message itself which 3675 * is copied into an mblk before switching it. 3676 */ 3677 static void 3678 vsw_process_pkt_data(void *arg1, void *arg2, uint32_t msglen) 3679 { 3680 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg1; 3681 vio_raw_data_msg_t *dpkt = (vio_raw_data_msg_t *)arg2; 3682 uint32_t size; 3683 mblk_t *mp; 3684 vsw_t *vswp = ldcp->ldc_vswp; 3685 vgen_stats_t *statsp = &ldcp->ldc_stats; 3686 lane_t *lp = &ldcp->lane_out; 3687 3688 size = msglen - VIO_PKT_DATA_HDRSIZE; 3689 if (size < ETHERMIN || size > lp->mtu) { 3690 (void) atomic_inc_32(&statsp->rx_pri_fail); 3691 DWARN(vswp, "%s(%lld) invalid size(%d)\n", __func__, 3692 ldcp->ldc_id, size); 3693 return; 3694 } 3695 3696 mp = vio_multipool_allocb(&ldcp->vmp, size + VLAN_TAGSZ); 3697 if (mp == NULL) { 3698 mp = allocb(size + VLAN_TAGSZ, BPRI_MED); 3699 if (mp == NULL) { 3700 (void) atomic_inc_32(&statsp->rx_pri_fail); 3701 DWARN(vswp, "%s(%lld) allocb failure, " 3702 "unable to process priority frame\n", __func__, 3703 ldcp->ldc_id); 3704 return; 3705 } 3706 } 3707 3708 /* skip over the extra space for vlan tag */ 3709 mp->b_rptr += VLAN_TAGSZ; 3710 3711 /* copy the frame from the payload of raw data msg into the mblk */ 3712 bcopy(dpkt->data, mp->b_rptr, size); 3713 mp->b_wptr = mp->b_rptr + size; 3714 3715 /* update stats */ 3716 (void) atomic_inc_64(&statsp->rx_pri_packets); 3717 (void) atomic_add_64(&statsp->rx_pri_bytes, size); 3718 3719 /* 3720 * VLAN_TAGSZ of extra space has been pre-alloc'd if tag is needed. 3721 */ 3722 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, VSW_VNETPORT, mp); 3723 3724 /* switch the frame to destination */ 3725 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, ldcp->ldc_port, NULL); 3726 } 3727 3728 /* 3729 * Process an in-band descriptor message (most likely from 3730 * OBP). 3731 */ 3732 static void 3733 vsw_process_data_ibnd_pkt(vsw_ldc_t *ldcp, void *pkt) 3734 { 3735 vnet_ibnd_desc_t *ibnd_desc; 3736 dring_info_t *dp = NULL; 3737 vsw_private_desc_t *priv_addr = NULL; 3738 vsw_t *vswp = ldcp->ldc_vswp; 3739 mblk_t *mp = NULL; 3740 size_t nbytes = 0; 3741 size_t off = 0; 3742 uint64_t idx = 0; 3743 uint32_t num = 1, len, datalen = 0; 3744 uint64_t ncookies = 0; 3745 int i, rv; 3746 int j = 0; 3747 3748 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3749 3750 ibnd_desc = (vnet_ibnd_desc_t *)pkt; 3751 3752 switch (ibnd_desc->hdr.tag.vio_subtype) { 3753 case VIO_SUBTYPE_INFO: 3754 D1(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3755 3756 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 3757 return; 3758 3759 /* 3760 * Data is padded to align on a 8 byte boundary, 3761 * nbytes is actual data length, i.e. minus that 3762 * padding. 3763 */ 3764 datalen = ibnd_desc->nbytes; 3765 3766 D2(vswp, "%s(%lld): processing inband desc : " 3767 ": datalen 0x%lx", __func__, ldcp->ldc_id, datalen); 3768 3769 ncookies = ibnd_desc->ncookies; 3770 3771 /* 3772 * allocb(9F) returns an aligned data block. We 3773 * need to ensure that we ask ldc for an aligned 3774 * number of bytes also. 3775 */ 3776 nbytes = datalen; 3777 if (nbytes & 0x7) { 3778 off = 8 - (nbytes & 0x7); 3779 nbytes += off; 3780 } 3781 3782 /* alloc extra space for VLAN_TAG */ 3783 mp = allocb(datalen + 8, BPRI_MED); 3784 if (mp == NULL) { 3785 DERR(vswp, "%s(%lld): allocb failed", 3786 __func__, ldcp->ldc_id); 3787 ldcp->ldc_stats.rx_allocb_fail++; 3788 return; 3789 } 3790 3791 /* skip over the extra space for VLAN_TAG */ 3792 mp->b_rptr += 8; 3793 3794 rv = ldc_mem_copy(ldcp->ldc_handle, (caddr_t)mp->b_rptr, 3795 0, &nbytes, ibnd_desc->memcookie, (uint64_t)ncookies, 3796 LDC_COPY_IN); 3797 3798 if (rv != 0) { 3799 DERR(vswp, "%s(%d): unable to copy in data from " 3800 "%d cookie(s)", __func__, ldcp->ldc_id, ncookies); 3801 freemsg(mp); 3802 ldcp->ldc_stats.ierrors++; 3803 return; 3804 } 3805 3806 D2(vswp, "%s(%d): copied in %ld bytes using %d cookies", 3807 __func__, ldcp->ldc_id, nbytes, ncookies); 3808 3809 /* point to the actual end of data */ 3810 mp->b_wptr = mp->b_rptr + datalen; 3811 ldcp->ldc_stats.ipackets++; 3812 ldcp->ldc_stats.rbytes += datalen; 3813 3814 /* 3815 * We ACK back every in-band descriptor message we process 3816 */ 3817 ibnd_desc->hdr.tag.vio_subtype = VIO_SUBTYPE_ACK; 3818 ibnd_desc->hdr.tag.vio_sid = ldcp->local_session; 3819 (void) vsw_send_msg(ldcp, (void *)ibnd_desc, 3820 sizeof (vnet_ibnd_desc_t), B_TRUE); 3821 3822 /* 3823 * there is extra space alloc'd for VLAN_TAG 3824 */ 3825 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, VSW_VNETPORT, mp); 3826 3827 /* send the packet to be switched */ 3828 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, 3829 ldcp->ldc_port, NULL); 3830 3831 break; 3832 3833 case VIO_SUBTYPE_ACK: 3834 D1(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3835 3836 /* Verify the ACK is valid */ 3837 idx = ibnd_desc->hdr.desc_handle; 3838 3839 if (idx >= vsw_ntxds) { 3840 cmn_err(CE_WARN, "!vsw%d: corrupted ACK received " 3841 "(idx %ld)", vswp->instance, idx); 3842 return; 3843 } 3844 3845 if ((dp = ldcp->lane_out.dringp) == NULL) { 3846 DERR(vswp, "%s: no dring found", __func__); 3847 return; 3848 } 3849 3850 len = dp->num_descriptors; 3851 /* 3852 * If the descriptor we are being ACK'ed for is not the 3853 * one we expected, then pkts were lost somwhere, either 3854 * when we tried to send a msg, or a previous ACK msg from 3855 * our peer. In either case we now reclaim the descriptors 3856 * in the range from the last ACK we received up to the 3857 * current ACK. 3858 */ 3859 if (idx != dp->last_ack_recv) { 3860 DWARN(vswp, "%s: dropped pkts detected, (%ld, %ld)", 3861 __func__, dp->last_ack_recv, idx); 3862 num = idx >= dp->last_ack_recv ? 3863 idx - dp->last_ack_recv + 1: 3864 (len - dp->last_ack_recv + 1) + idx; 3865 } 3866 3867 /* 3868 * When we sent the in-band message to our peer we 3869 * marked the copy in our private ring as READY. We now 3870 * check that the descriptor we are being ACK'ed for is in 3871 * fact READY, i.e. it is one we have shared with our peer. 3872 * 3873 * If its not we flag an error, but still reset the descr 3874 * back to FREE. 3875 */ 3876 for (i = dp->last_ack_recv; j < num; i = (i + 1) % len, j++) { 3877 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 3878 mutex_enter(&priv_addr->dstate_lock); 3879 if (priv_addr->dstate != VIO_DESC_READY) { 3880 DERR(vswp, "%s: (%ld) desc at index %ld not " 3881 "READY (0x%lx)", __func__, 3882 ldcp->ldc_id, idx, priv_addr->dstate); 3883 DERR(vswp, "%s: bound %d: ncookies %ld : " 3884 "datalen %ld", __func__, 3885 priv_addr->bound, priv_addr->ncookies, 3886 priv_addr->datalen); 3887 } 3888 D2(vswp, "%s: (%lld) freeing descp at %lld", __func__, 3889 ldcp->ldc_id, idx); 3890 /* release resources associated with sent msg */ 3891 priv_addr->datalen = 0; 3892 priv_addr->dstate = VIO_DESC_FREE; 3893 mutex_exit(&priv_addr->dstate_lock); 3894 } 3895 /* update to next expected value */ 3896 dp->last_ack_recv = (idx + 1) % dp->num_descriptors; 3897 3898 break; 3899 3900 case VIO_SUBTYPE_NACK: 3901 DERR(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3902 3903 /* 3904 * We should only get a NACK if our peer doesn't like 3905 * something about a message we have sent it. If this 3906 * happens we just release the resources associated with 3907 * the message. (We are relying on higher layers to decide 3908 * whether or not to resend. 3909 */ 3910 3911 /* limit check */ 3912 idx = ibnd_desc->hdr.desc_handle; 3913 3914 if (idx >= vsw_ntxds) { 3915 DERR(vswp, "%s: corrupted NACK received (idx %lld)", 3916 __func__, idx); 3917 return; 3918 } 3919 3920 if ((dp = ldcp->lane_out.dringp) == NULL) { 3921 DERR(vswp, "%s: no dring found", __func__); 3922 return; 3923 } 3924 3925 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 3926 3927 /* move to correct location in ring */ 3928 priv_addr += idx; 3929 3930 /* release resources associated with sent msg */ 3931 mutex_enter(&priv_addr->dstate_lock); 3932 priv_addr->datalen = 0; 3933 priv_addr->dstate = VIO_DESC_FREE; 3934 mutex_exit(&priv_addr->dstate_lock); 3935 3936 break; 3937 3938 default: 3939 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 3940 ldcp->ldc_id, ibnd_desc->hdr.tag.vio_subtype); 3941 } 3942 3943 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3944 } 3945 3946 static void 3947 vsw_process_err_pkt(vsw_ldc_t *ldcp, void *epkt, vio_msg_tag_t *tagp) 3948 { 3949 _NOTE(ARGUNUSED(epkt)) 3950 3951 vsw_t *vswp = ldcp->ldc_vswp; 3952 uint16_t env = tagp->vio_subtype_env; 3953 3954 D1(vswp, "%s (%lld): enter\n", __func__, ldcp->ldc_id); 3955 3956 /* 3957 * Error vio_subtypes have yet to be defined. So for 3958 * the moment we can't do anything. 3959 */ 3960 D2(vswp, "%s: (%x) vio_subtype env", __func__, env); 3961 3962 D1(vswp, "%s (%lld): exit\n", __func__, ldcp->ldc_id); 3963 } 3964 3965 /* transmit the packet over the given port */ 3966 int 3967 vsw_portsend(vsw_port_t *port, mblk_t *mp, mblk_t *mpt, uint32_t count) 3968 { 3969 vsw_ldc_list_t *ldcl = &port->p_ldclist; 3970 vsw_ldc_t *ldcp; 3971 int status = 0; 3972 uint32_t n; 3973 3974 READ_ENTER(&ldcl->lockrw); 3975 /* 3976 * Note for now, we have a single channel. 3977 */ 3978 ldcp = ldcl->head; 3979 if (ldcp == NULL) { 3980 DERR(port->p_vswp, "vsw_portsend: no ldc: dropping packet\n"); 3981 freemsgchain(mp); 3982 RW_EXIT(&ldcl->lockrw); 3983 return (1); 3984 } 3985 3986 n = vsw_vlan_frame_untag(port, VSW_VNETPORT, &mp, &mpt); 3987 3988 count -= n; 3989 if (count == 0) { 3990 goto vsw_portsend_exit; 3991 } 3992 3993 status = ldcp->tx(ldcp, mp, mpt, count); 3994 3995 vsw_portsend_exit: 3996 RW_EXIT(&ldcl->lockrw); 3997 3998 return (status); 3999 } 4000 4001 /* 4002 * Break up frames into 2 seperate chains: normal and 4003 * priority, based on the frame type. The number of 4004 * priority frames is also counted and returned. 4005 * 4006 * Params: 4007 * vswp: pointer to the instance of vsw 4008 * np: head of packet chain to be broken 4009 * npt: tail of packet chain to be broken 4010 * 4011 * Returns: 4012 * np: head of normal data packets 4013 * npt: tail of normal data packets 4014 * hp: head of high priority packets 4015 * hpt: tail of high priority packets 4016 */ 4017 static uint32_t 4018 vsw_get_pri_packets(vsw_t *vswp, mblk_t **np, mblk_t **npt, 4019 mblk_t **hp, mblk_t **hpt) 4020 { 4021 mblk_t *tmp = NULL; 4022 mblk_t *smp = NULL; 4023 mblk_t *hmp = NULL; /* high prio pkts head */ 4024 mblk_t *hmpt = NULL; /* high prio pkts tail */ 4025 mblk_t *nmp = NULL; /* normal pkts head */ 4026 mblk_t *nmpt = NULL; /* normal pkts tail */ 4027 uint32_t count = 0; 4028 int i; 4029 struct ether_header *ehp; 4030 uint32_t num_types; 4031 uint16_t *types; 4032 4033 tmp = *np; 4034 while (tmp != NULL) { 4035 4036 smp = tmp; 4037 tmp = tmp->b_next; 4038 smp->b_next = NULL; 4039 smp->b_prev = NULL; 4040 4041 ehp = (struct ether_header *)smp->b_rptr; 4042 num_types = vswp->pri_num_types; 4043 types = vswp->pri_types; 4044 for (i = 0; i < num_types; i++) { 4045 if (ehp->ether_type == types[i]) { 4046 /* high priority frame */ 4047 4048 if (hmp != NULL) { 4049 hmpt->b_next = smp; 4050 hmpt = smp; 4051 } else { 4052 hmp = hmpt = smp; 4053 } 4054 count++; 4055 break; 4056 } 4057 } 4058 if (i == num_types) { 4059 /* normal data frame */ 4060 4061 if (nmp != NULL) { 4062 nmpt->b_next = smp; 4063 nmpt = smp; 4064 } else { 4065 nmp = nmpt = smp; 4066 } 4067 } 4068 } 4069 4070 *hp = hmp; 4071 *hpt = hmpt; 4072 *np = nmp; 4073 *npt = nmpt; 4074 4075 return (count); 4076 } 4077 4078 /* 4079 * Wrapper function to transmit normal and/or priority frames over the channel. 4080 */ 4081 static int 4082 vsw_ldctx_pri(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count) 4083 { 4084 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4085 mblk_t *tmp; 4086 mblk_t *smp; 4087 mblk_t *hmp; /* high prio pkts head */ 4088 mblk_t *hmpt; /* high prio pkts tail */ 4089 mblk_t *nmp; /* normal pkts head */ 4090 mblk_t *nmpt; /* normal pkts tail */ 4091 uint32_t n = 0; 4092 vsw_t *vswp = ldcp->ldc_vswp; 4093 4094 ASSERT(VSW_PRI_ETH_DEFINED(vswp)); 4095 ASSERT(count != 0); 4096 4097 nmp = mp; 4098 nmpt = mpt; 4099 4100 /* gather any priority frames from the chain of packets */ 4101 n = vsw_get_pri_packets(vswp, &nmp, &nmpt, &hmp, &hmpt); 4102 4103 /* transmit priority frames */ 4104 tmp = hmp; 4105 while (tmp != NULL) { 4106 smp = tmp; 4107 tmp = tmp->b_next; 4108 smp->b_next = NULL; 4109 vsw_ldcsend_pkt(ldcp, smp); 4110 } 4111 4112 count -= n; 4113 4114 if (count == 0) { 4115 /* no normal data frames to process */ 4116 return (0); 4117 } 4118 4119 return (vsw_ldctx(ldcp, nmp, nmpt, count)); 4120 } 4121 4122 /* 4123 * Wrapper function to transmit normal frames over the channel. 4124 */ 4125 static int 4126 vsw_ldctx(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count) 4127 { 4128 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4129 mblk_t *tmp = NULL; 4130 4131 ASSERT(count != 0); 4132 /* 4133 * If the TX thread is enabled, then queue the 4134 * ordinary frames and signal the tx thread. 4135 */ 4136 if (ldcp->tx_thread != NULL) { 4137 4138 mutex_enter(&ldcp->tx_thr_lock); 4139 4140 if ((ldcp->tx_cnt + count) >= vsw_max_tx_qcount) { 4141 /* 4142 * If we reached queue limit, 4143 * do not queue new packets, 4144 * drop them. 4145 */ 4146 ldcp->ldc_stats.tx_qfull += count; 4147 mutex_exit(&ldcp->tx_thr_lock); 4148 freemsgchain(mp); 4149 goto exit; 4150 } 4151 if (ldcp->tx_mhead == NULL) { 4152 ldcp->tx_mhead = mp; 4153 ldcp->tx_mtail = mpt; 4154 cv_signal(&ldcp->tx_thr_cv); 4155 } else { 4156 ldcp->tx_mtail->b_next = mp; 4157 ldcp->tx_mtail = mpt; 4158 } 4159 ldcp->tx_cnt += count; 4160 mutex_exit(&ldcp->tx_thr_lock); 4161 } else { 4162 while (mp != NULL) { 4163 tmp = mp->b_next; 4164 mp->b_next = mp->b_prev = NULL; 4165 (void) vsw_ldcsend(ldcp, mp, 1); 4166 mp = tmp; 4167 } 4168 } 4169 4170 exit: 4171 return (0); 4172 } 4173 4174 /* 4175 * This function transmits the frame in the payload of a raw data 4176 * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to 4177 * send special frames with high priorities, without going through 4178 * the normal data path which uses descriptor ring mechanism. 4179 */ 4180 static void 4181 vsw_ldcsend_pkt(vsw_ldc_t *ldcp, mblk_t *mp) 4182 { 4183 vio_raw_data_msg_t *pkt; 4184 mblk_t *bp; 4185 mblk_t *nmp = NULL; 4186 caddr_t dst; 4187 uint32_t mblksz; 4188 uint32_t size; 4189 uint32_t nbytes; 4190 int rv; 4191 vsw_t *vswp = ldcp->ldc_vswp; 4192 vgen_stats_t *statsp = &ldcp->ldc_stats; 4193 4194 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4195 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4196 (void) atomic_inc_32(&statsp->tx_pri_fail); 4197 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 4198 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 4199 ldcp->lane_out.lstate); 4200 goto send_pkt_exit; 4201 } 4202 4203 size = msgsize(mp); 4204 4205 /* frame size bigger than available payload len of raw data msg ? */ 4206 if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) { 4207 (void) atomic_inc_32(&statsp->tx_pri_fail); 4208 DWARN(vswp, "%s(%lld) invalid size(%d)\n", __func__, 4209 ldcp->ldc_id, size); 4210 goto send_pkt_exit; 4211 } 4212 4213 if (size < ETHERMIN) 4214 size = ETHERMIN; 4215 4216 /* alloc space for a raw data message */ 4217 nmp = vio_allocb(vswp->pri_tx_vmp); 4218 if (nmp == NULL) { 4219 (void) atomic_inc_32(&statsp->tx_pri_fail); 4220 DWARN(vswp, "vio_allocb failed\n"); 4221 goto send_pkt_exit; 4222 } 4223 pkt = (vio_raw_data_msg_t *)nmp->b_rptr; 4224 4225 /* copy frame into the payload of raw data message */ 4226 dst = (caddr_t)pkt->data; 4227 for (bp = mp; bp != NULL; bp = bp->b_cont) { 4228 mblksz = MBLKL(bp); 4229 bcopy(bp->b_rptr, dst, mblksz); 4230 dst += mblksz; 4231 } 4232 4233 /* setup the raw data msg */ 4234 pkt->tag.vio_msgtype = VIO_TYPE_DATA; 4235 pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 4236 pkt->tag.vio_subtype_env = VIO_PKT_DATA; 4237 pkt->tag.vio_sid = ldcp->local_session; 4238 nbytes = VIO_PKT_DATA_HDRSIZE + size; 4239 4240 /* send the msg over ldc */ 4241 rv = vsw_send_msg(ldcp, (void *)pkt, nbytes, B_TRUE); 4242 if (rv != 0) { 4243 (void) atomic_inc_32(&statsp->tx_pri_fail); 4244 DWARN(vswp, "%s(%lld) Error sending priority frame\n", __func__, 4245 ldcp->ldc_id); 4246 goto send_pkt_exit; 4247 } 4248 4249 /* update stats */ 4250 (void) atomic_inc_64(&statsp->tx_pri_packets); 4251 (void) atomic_add_64(&statsp->tx_pri_packets, size); 4252 4253 send_pkt_exit: 4254 if (nmp != NULL) 4255 freemsg(nmp); 4256 freemsg(mp); 4257 } 4258 4259 /* 4260 * Transmit the packet over the given LDC channel. 4261 * 4262 * The 'retries' argument indicates how many times a packet 4263 * is retried before it is dropped. Note, the retry is done 4264 * only for a resource related failure, for all other failures 4265 * the packet is dropped immediately. 4266 */ 4267 static int 4268 vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, uint32_t retries) 4269 { 4270 int i; 4271 int rc; 4272 int status = 0; 4273 vsw_port_t *port = ldcp->ldc_port; 4274 dring_info_t *dp = NULL; 4275 4276 4277 for (i = 0; i < retries; ) { 4278 /* 4279 * Send the message out using the appropriate 4280 * transmit function which will free mblock when it 4281 * is finished with it. 4282 */ 4283 mutex_enter(&port->tx_lock); 4284 if (port->transmit != NULL) { 4285 status = (*port->transmit)(ldcp, mp); 4286 } 4287 if (status == LDC_TX_SUCCESS) { 4288 mutex_exit(&port->tx_lock); 4289 break; 4290 } 4291 i++; /* increment the counter here */ 4292 4293 /* If its the last retry, then update the oerror */ 4294 if ((i == retries) && (status == LDC_TX_NORESOURCES)) { 4295 ldcp->ldc_stats.oerrors++; 4296 } 4297 mutex_exit(&port->tx_lock); 4298 4299 if (status != LDC_TX_NORESOURCES) { 4300 /* 4301 * No retrying required for errors un-related 4302 * to resources. 4303 */ 4304 break; 4305 } 4306 READ_ENTER(&ldcp->lane_out.dlistrw); 4307 if (((dp = ldcp->lane_out.dringp) != NULL) && 4308 ((VSW_VER_GTEQ(ldcp, 1, 2) && 4309 (ldcp->lane_out.xfer_mode & VIO_DRING_MODE_V1_2)) || 4310 ((VSW_VER_LT(ldcp, 1, 2) && 4311 (ldcp->lane_out.xfer_mode == VIO_DRING_MODE_V1_0))))) { 4312 rc = vsw_reclaim_dring(dp, dp->end_idx); 4313 } else { 4314 /* 4315 * If there is no dring or the xfer_mode is 4316 * set to DESC_MODE(ie., OBP), then simply break here. 4317 */ 4318 RW_EXIT(&ldcp->lane_out.dlistrw); 4319 break; 4320 } 4321 RW_EXIT(&ldcp->lane_out.dlistrw); 4322 4323 /* 4324 * Delay only if none were reclaimed 4325 * and its not the last retry. 4326 */ 4327 if ((rc == 0) && (i < retries)) { 4328 delay(drv_usectohz(vsw_ldc_tx_delay)); 4329 } 4330 } 4331 freemsg(mp); 4332 return (status); 4333 } 4334 4335 /* 4336 * Send packet out via descriptor ring to a logical device. 4337 */ 4338 static int 4339 vsw_dringsend(vsw_ldc_t *ldcp, mblk_t *mp) 4340 { 4341 vio_dring_msg_t dring_pkt; 4342 dring_info_t *dp = NULL; 4343 vsw_private_desc_t *priv_desc = NULL; 4344 vnet_public_desc_t *pub = NULL; 4345 vsw_t *vswp = ldcp->ldc_vswp; 4346 mblk_t *bp; 4347 size_t n, size; 4348 caddr_t bufp; 4349 int idx; 4350 int status = LDC_TX_SUCCESS; 4351 struct ether_header *ehp = (struct ether_header *)mp->b_rptr; 4352 lane_t *lp = &ldcp->lane_out; 4353 4354 D1(vswp, "%s(%lld): enter\n", __func__, ldcp->ldc_id); 4355 4356 /* TODO: make test a macro */ 4357 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4358 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4359 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 4360 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 4361 ldcp->lane_out.lstate); 4362 ldcp->ldc_stats.oerrors++; 4363 return (LDC_TX_FAILURE); 4364 } 4365 4366 /* 4367 * Note - using first ring only, this may change 4368 * in the future. 4369 */ 4370 READ_ENTER(&ldcp->lane_out.dlistrw); 4371 if ((dp = ldcp->lane_out.dringp) == NULL) { 4372 RW_EXIT(&ldcp->lane_out.dlistrw); 4373 DERR(vswp, "%s(%lld): no dring for outbound lane on" 4374 " channel %d", __func__, ldcp->ldc_id, ldcp->ldc_id); 4375 ldcp->ldc_stats.oerrors++; 4376 return (LDC_TX_FAILURE); 4377 } 4378 4379 size = msgsize(mp); 4380 if (size > (size_t)lp->mtu) { 4381 RW_EXIT(&ldcp->lane_out.dlistrw); 4382 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 4383 ldcp->ldc_id, size); 4384 ldcp->ldc_stats.oerrors++; 4385 return (LDC_TX_FAILURE); 4386 } 4387 4388 /* 4389 * Find a free descriptor 4390 * 4391 * Note: for the moment we are assuming that we will only 4392 * have one dring going from the switch to each of its 4393 * peers. This may change in the future. 4394 */ 4395 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 4396 D2(vswp, "%s(%lld): no descriptor available for ring " 4397 "at 0x%llx", __func__, ldcp->ldc_id, dp); 4398 4399 /* nothing more we can do */ 4400 status = LDC_TX_NORESOURCES; 4401 ldcp->ldc_stats.tx_no_desc++; 4402 goto vsw_dringsend_free_exit; 4403 } else { 4404 D2(vswp, "%s(%lld): free private descriptor found at pos %ld " 4405 "addr 0x%llx\n", __func__, ldcp->ldc_id, idx, priv_desc); 4406 } 4407 4408 /* copy data into the descriptor */ 4409 bufp = priv_desc->datap; 4410 bufp += VNET_IPALIGN; 4411 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 4412 n = MBLKL(bp); 4413 bcopy(bp->b_rptr, bufp, n); 4414 bufp += n; 4415 } 4416 4417 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 4418 4419 pub = priv_desc->descp; 4420 pub->nbytes = priv_desc->datalen; 4421 4422 /* update statistics */ 4423 if (IS_BROADCAST(ehp)) 4424 ldcp->ldc_stats.brdcstxmt++; 4425 else if (IS_MULTICAST(ehp)) 4426 ldcp->ldc_stats.multixmt++; 4427 ldcp->ldc_stats.opackets++; 4428 ldcp->ldc_stats.obytes += priv_desc->datalen; 4429 4430 mutex_enter(&priv_desc->dstate_lock); 4431 pub->hdr.dstate = VIO_DESC_READY; 4432 mutex_exit(&priv_desc->dstate_lock); 4433 4434 /* 4435 * Determine whether or not we need to send a message to our 4436 * peer prompting them to read our newly updated descriptor(s). 4437 */ 4438 mutex_enter(&dp->restart_lock); 4439 if (dp->restart_reqd) { 4440 dp->restart_reqd = B_FALSE; 4441 ldcp->ldc_stats.dring_data_msgs++; 4442 mutex_exit(&dp->restart_lock); 4443 4444 /* 4445 * Send a vio_dring_msg to peer to prompt them to read 4446 * the updated descriptor ring. 4447 */ 4448 dring_pkt.tag.vio_msgtype = VIO_TYPE_DATA; 4449 dring_pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 4450 dring_pkt.tag.vio_subtype_env = VIO_DRING_DATA; 4451 dring_pkt.tag.vio_sid = ldcp->local_session; 4452 4453 /* Note - for now using first ring */ 4454 dring_pkt.dring_ident = dp->ident; 4455 4456 /* 4457 * If last_ack_recv is -1 then we know we've not 4458 * received any ack's yet, so this must be the first 4459 * msg sent, so set the start to the begining of the ring. 4460 */ 4461 mutex_enter(&dp->dlock); 4462 if (dp->last_ack_recv == -1) { 4463 dring_pkt.start_idx = 0; 4464 } else { 4465 dring_pkt.start_idx = 4466 (dp->last_ack_recv + 1) % dp->num_descriptors; 4467 } 4468 dring_pkt.end_idx = -1; 4469 mutex_exit(&dp->dlock); 4470 4471 D3(vswp, "%s(%lld): dring 0x%llx : ident 0x%llx\n", __func__, 4472 ldcp->ldc_id, dp, dring_pkt.dring_ident); 4473 D3(vswp, "%s(%lld): start %lld : end %lld :\n", 4474 __func__, ldcp->ldc_id, dring_pkt.start_idx, 4475 dring_pkt.end_idx); 4476 4477 RW_EXIT(&ldcp->lane_out.dlistrw); 4478 4479 (void) vsw_send_msg(ldcp, (void *)&dring_pkt, 4480 sizeof (vio_dring_msg_t), B_TRUE); 4481 4482 return (status); 4483 4484 } else { 4485 mutex_exit(&dp->restart_lock); 4486 D2(vswp, "%s(%lld): updating descp %d", __func__, 4487 ldcp->ldc_id, idx); 4488 } 4489 4490 vsw_dringsend_free_exit: 4491 4492 RW_EXIT(&ldcp->lane_out.dlistrw); 4493 4494 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 4495 return (status); 4496 } 4497 4498 /* 4499 * Send an in-band descriptor message over ldc. 4500 */ 4501 static int 4502 vsw_descrsend(vsw_ldc_t *ldcp, mblk_t *mp) 4503 { 4504 vsw_t *vswp = ldcp->ldc_vswp; 4505 vnet_ibnd_desc_t ibnd_msg; 4506 vsw_private_desc_t *priv_desc = NULL; 4507 dring_info_t *dp = NULL; 4508 size_t n, size = 0; 4509 caddr_t bufp; 4510 mblk_t *bp; 4511 int idx, i; 4512 int status = LDC_TX_SUCCESS; 4513 static int warn_msg = 1; 4514 lane_t *lp = &ldcp->lane_out; 4515 4516 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4517 4518 ASSERT(mp != NULL); 4519 4520 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4521 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4522 DERR(vswp, "%s(%lld) status(%d) state (0x%llx), dropping pkt", 4523 __func__, ldcp->ldc_id, ldcp->ldc_status, 4524 ldcp->lane_out.lstate); 4525 ldcp->ldc_stats.oerrors++; 4526 return (LDC_TX_FAILURE); 4527 } 4528 4529 /* 4530 * only expect single dring to exist, which we use 4531 * as an internal buffer, rather than a transfer channel. 4532 */ 4533 READ_ENTER(&ldcp->lane_out.dlistrw); 4534 if ((dp = ldcp->lane_out.dringp) == NULL) { 4535 DERR(vswp, "%s(%lld): no dring for outbound lane", 4536 __func__, ldcp->ldc_id); 4537 DERR(vswp, "%s(%lld) status(%d) state (0x%llx)", __func__, 4538 ldcp->ldc_id, ldcp->ldc_status, ldcp->lane_out.lstate); 4539 RW_EXIT(&ldcp->lane_out.dlistrw); 4540 ldcp->ldc_stats.oerrors++; 4541 return (LDC_TX_FAILURE); 4542 } 4543 4544 size = msgsize(mp); 4545 if (size > (size_t)lp->mtu) { 4546 RW_EXIT(&ldcp->lane_out.dlistrw); 4547 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 4548 ldcp->ldc_id, size); 4549 ldcp->ldc_stats.oerrors++; 4550 return (LDC_TX_FAILURE); 4551 } 4552 4553 /* 4554 * Find a free descriptor in our buffer ring 4555 */ 4556 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 4557 RW_EXIT(&ldcp->lane_out.dlistrw); 4558 if (warn_msg) { 4559 DERR(vswp, "%s(%lld): no descriptor available for ring " 4560 "at 0x%llx", __func__, ldcp->ldc_id, dp); 4561 warn_msg = 0; 4562 } 4563 4564 /* nothing more we can do */ 4565 status = LDC_TX_NORESOURCES; 4566 goto vsw_descrsend_free_exit; 4567 } else { 4568 D2(vswp, "%s(%lld): free private descriptor found at pos " 4569 "%ld addr 0x%x\n", __func__, ldcp->ldc_id, idx, priv_desc); 4570 warn_msg = 1; 4571 } 4572 4573 /* copy data into the descriptor */ 4574 bufp = priv_desc->datap; 4575 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 4576 n = MBLKL(bp); 4577 bcopy(bp->b_rptr, bufp, n); 4578 bufp += n; 4579 } 4580 4581 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 4582 4583 /* create and send the in-band descp msg */ 4584 ibnd_msg.hdr.tag.vio_msgtype = VIO_TYPE_DATA; 4585 ibnd_msg.hdr.tag.vio_subtype = VIO_SUBTYPE_INFO; 4586 ibnd_msg.hdr.tag.vio_subtype_env = VIO_DESC_DATA; 4587 ibnd_msg.hdr.tag.vio_sid = ldcp->local_session; 4588 4589 /* 4590 * Copy the mem cookies describing the data from the 4591 * private region of the descriptor ring into the inband 4592 * descriptor. 4593 */ 4594 for (i = 0; i < priv_desc->ncookies; i++) { 4595 bcopy(&priv_desc->memcookie[i], &ibnd_msg.memcookie[i], 4596 sizeof (ldc_mem_cookie_t)); 4597 } 4598 4599 ibnd_msg.hdr.desc_handle = idx; 4600 ibnd_msg.ncookies = priv_desc->ncookies; 4601 ibnd_msg.nbytes = size; 4602 4603 ldcp->ldc_stats.opackets++; 4604 ldcp->ldc_stats.obytes += size; 4605 4606 RW_EXIT(&ldcp->lane_out.dlistrw); 4607 4608 (void) vsw_send_msg(ldcp, (void *)&ibnd_msg, 4609 sizeof (vnet_ibnd_desc_t), B_TRUE); 4610 4611 vsw_descrsend_free_exit: 4612 4613 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4614 return (status); 4615 } 4616 4617 static void 4618 vsw_send_ver(void *arg) 4619 { 4620 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4621 vsw_t *vswp = ldcp->ldc_vswp; 4622 lane_t *lp = &ldcp->lane_out; 4623 vio_ver_msg_t ver_msg; 4624 4625 D1(vswp, "%s enter", __func__); 4626 4627 ver_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4628 ver_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4629 ver_msg.tag.vio_subtype_env = VIO_VER_INFO; 4630 ver_msg.tag.vio_sid = ldcp->local_session; 4631 4632 if (vsw_obp_ver_proto_workaround == B_FALSE) { 4633 ver_msg.ver_major = vsw_versions[0].ver_major; 4634 ver_msg.ver_minor = vsw_versions[0].ver_minor; 4635 } else { 4636 /* use the major,minor that we've ack'd */ 4637 lane_t *lpi = &ldcp->lane_in; 4638 ver_msg.ver_major = lpi->ver_major; 4639 ver_msg.ver_minor = lpi->ver_minor; 4640 } 4641 ver_msg.dev_class = VDEV_NETWORK_SWITCH; 4642 4643 lp->lstate |= VSW_VER_INFO_SENT; 4644 lp->ver_major = ver_msg.ver_major; 4645 lp->ver_minor = ver_msg.ver_minor; 4646 4647 DUMP_TAG(ver_msg.tag); 4648 4649 (void) vsw_send_msg(ldcp, &ver_msg, sizeof (vio_ver_msg_t), B_TRUE); 4650 4651 D1(vswp, "%s (%d): exit", __func__, ldcp->ldc_id); 4652 } 4653 4654 static void 4655 vsw_send_attr(vsw_ldc_t *ldcp) 4656 { 4657 vsw_t *vswp = ldcp->ldc_vswp; 4658 lane_t *lp = &ldcp->lane_out; 4659 vnet_attr_msg_t attr_msg; 4660 4661 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 4662 4663 /* 4664 * Subtype is set to INFO by default 4665 */ 4666 attr_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4667 attr_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4668 attr_msg.tag.vio_subtype_env = VIO_ATTR_INFO; 4669 attr_msg.tag.vio_sid = ldcp->local_session; 4670 4671 /* payload copied from default settings for lane */ 4672 attr_msg.mtu = lp->mtu; 4673 attr_msg.addr_type = lp->addr_type; 4674 attr_msg.xfer_mode = lp->xfer_mode; 4675 attr_msg.ack_freq = lp->xfer_mode; 4676 4677 READ_ENTER(&vswp->if_lockrw); 4678 attr_msg.addr = vnet_macaddr_strtoul((vswp->if_addr).ether_addr_octet); 4679 RW_EXIT(&vswp->if_lockrw); 4680 4681 ldcp->lane_out.lstate |= VSW_ATTR_INFO_SENT; 4682 4683 DUMP_TAG(attr_msg.tag); 4684 4685 (void) vsw_send_msg(ldcp, &attr_msg, sizeof (vnet_attr_msg_t), B_TRUE); 4686 4687 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 4688 } 4689 4690 /* 4691 * Create dring info msg (which also results in the creation of 4692 * a dring). 4693 */ 4694 static vio_dring_reg_msg_t * 4695 vsw_create_dring_info_pkt(vsw_ldc_t *ldcp) 4696 { 4697 vio_dring_reg_msg_t *mp; 4698 dring_info_t *dp; 4699 vsw_t *vswp = ldcp->ldc_vswp; 4700 4701 D1(vswp, "vsw_create_dring_info_pkt enter\n"); 4702 4703 /* 4704 * If we can't create a dring, obviously no point sending 4705 * a message. 4706 */ 4707 if ((dp = vsw_create_dring(ldcp)) == NULL) 4708 return (NULL); 4709 4710 mp = kmem_zalloc(sizeof (vio_dring_reg_msg_t), KM_SLEEP); 4711 4712 mp->tag.vio_msgtype = VIO_TYPE_CTRL; 4713 mp->tag.vio_subtype = VIO_SUBTYPE_INFO; 4714 mp->tag.vio_subtype_env = VIO_DRING_REG; 4715 mp->tag.vio_sid = ldcp->local_session; 4716 4717 /* payload */ 4718 mp->num_descriptors = dp->num_descriptors; 4719 mp->descriptor_size = dp->descriptor_size; 4720 mp->options = dp->options; 4721 mp->ncookies = dp->ncookies; 4722 bcopy(&dp->cookie[0], &mp->cookie[0], sizeof (ldc_mem_cookie_t)); 4723 4724 mp->dring_ident = 0; 4725 4726 D1(vswp, "vsw_create_dring_info_pkt exit\n"); 4727 4728 return (mp); 4729 } 4730 4731 static void 4732 vsw_send_dring_info(vsw_ldc_t *ldcp) 4733 { 4734 vio_dring_reg_msg_t *dring_msg; 4735 vsw_t *vswp = ldcp->ldc_vswp; 4736 4737 D1(vswp, "%s: (%ld) enter", __func__, ldcp->ldc_id); 4738 4739 dring_msg = vsw_create_dring_info_pkt(ldcp); 4740 if (dring_msg == NULL) { 4741 cmn_err(CE_WARN, "!vsw%d: %s: error creating msg", 4742 vswp->instance, __func__); 4743 return; 4744 } 4745 4746 ldcp->lane_out.lstate |= VSW_DRING_INFO_SENT; 4747 4748 DUMP_TAG_PTR((vio_msg_tag_t *)dring_msg); 4749 4750 (void) vsw_send_msg(ldcp, dring_msg, 4751 sizeof (vio_dring_reg_msg_t), B_TRUE); 4752 4753 kmem_free(dring_msg, sizeof (vio_dring_reg_msg_t)); 4754 4755 D1(vswp, "%s: (%ld) exit", __func__, ldcp->ldc_id); 4756 } 4757 4758 static void 4759 vsw_send_rdx(vsw_ldc_t *ldcp) 4760 { 4761 vsw_t *vswp = ldcp->ldc_vswp; 4762 vio_rdx_msg_t rdx_msg; 4763 4764 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 4765 4766 rdx_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4767 rdx_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4768 rdx_msg.tag.vio_subtype_env = VIO_RDX; 4769 rdx_msg.tag.vio_sid = ldcp->local_session; 4770 4771 ldcp->lane_in.lstate |= VSW_RDX_INFO_SENT; 4772 4773 DUMP_TAG(rdx_msg.tag); 4774 4775 (void) vsw_send_msg(ldcp, &rdx_msg, sizeof (vio_rdx_msg_t), B_TRUE); 4776 4777 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 4778 } 4779 4780 /* 4781 * Generic routine to send message out over ldc channel. 4782 * 4783 * It is possible that when we attempt to write over the ldc channel 4784 * that we get notified that it has been reset. Depending on the value 4785 * of the handle_reset flag we either handle that event here or simply 4786 * notify the caller that the channel was reset. 4787 */ 4788 int 4789 vsw_send_msg(vsw_ldc_t *ldcp, void *msgp, int size, boolean_t handle_reset) 4790 { 4791 int rv; 4792 size_t msglen = size; 4793 vio_msg_tag_t *tag = (vio_msg_tag_t *)msgp; 4794 vsw_t *vswp = ldcp->ldc_vswp; 4795 vio_dring_msg_t *dmsg; 4796 vio_raw_data_msg_t *rmsg; 4797 vnet_ibnd_desc_t *imsg; 4798 boolean_t data_msg = B_FALSE; 4799 4800 D1(vswp, "vsw_send_msg (%lld) enter : sending %d bytes", 4801 ldcp->ldc_id, size); 4802 4803 D2(vswp, "send_msg: type 0x%llx", tag->vio_msgtype); 4804 D2(vswp, "send_msg: stype 0x%llx", tag->vio_subtype); 4805 D2(vswp, "send_msg: senv 0x%llx", tag->vio_subtype_env); 4806 4807 mutex_enter(&ldcp->ldc_txlock); 4808 4809 if (tag->vio_subtype == VIO_SUBTYPE_INFO) { 4810 if (tag->vio_subtype_env == VIO_DRING_DATA) { 4811 dmsg = (vio_dring_msg_t *)tag; 4812 dmsg->seq_num = ldcp->lane_out.seq_num; 4813 data_msg = B_TRUE; 4814 } else if (tag->vio_subtype_env == VIO_PKT_DATA) { 4815 rmsg = (vio_raw_data_msg_t *)tag; 4816 rmsg->seq_num = ldcp->lane_out.seq_num; 4817 data_msg = B_TRUE; 4818 } else if (tag->vio_subtype_env == VIO_DESC_DATA) { 4819 imsg = (vnet_ibnd_desc_t *)tag; 4820 imsg->hdr.seq_num = ldcp->lane_out.seq_num; 4821 data_msg = B_TRUE; 4822 } 4823 } 4824 4825 do { 4826 msglen = size; 4827 rv = ldc_write(ldcp->ldc_handle, (caddr_t)msgp, &msglen); 4828 } while (rv == EWOULDBLOCK && --vsw_wretries > 0); 4829 4830 if (rv == 0 && data_msg == B_TRUE) { 4831 ldcp->lane_out.seq_num++; 4832 } 4833 4834 if ((rv != 0) || (msglen != size)) { 4835 DERR(vswp, "vsw_send_msg:ldc_write failed: chan(%lld) rv(%d) " 4836 "size (%d) msglen(%d)\n", ldcp->ldc_id, rv, size, msglen); 4837 ldcp->ldc_stats.oerrors++; 4838 } 4839 4840 mutex_exit(&ldcp->ldc_txlock); 4841 4842 /* 4843 * If channel has been reset we either handle it here or 4844 * simply report back that it has been reset and let caller 4845 * decide what to do. 4846 */ 4847 if (rv == ECONNRESET) { 4848 DWARN(vswp, "%s (%lld) channel reset", __func__, ldcp->ldc_id); 4849 4850 /* 4851 * N.B - must never be holding the dlistrw lock when 4852 * we do a reset of the channel. 4853 */ 4854 if (handle_reset) { 4855 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 4856 } 4857 } 4858 4859 return (rv); 4860 } 4861 4862 /* 4863 * Remove the specified address from the list of address maintained 4864 * in this port node. 4865 */ 4866 mcst_addr_t * 4867 vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr) 4868 { 4869 vsw_t *vswp = NULL; 4870 vsw_port_t *port = NULL; 4871 mcst_addr_t *prev_p = NULL; 4872 mcst_addr_t *curr_p = NULL; 4873 4874 D1(NULL, "%s: enter : devtype %d : addr 0x%llx", 4875 __func__, devtype, addr); 4876 4877 if (devtype == VSW_VNETPORT) { 4878 port = (vsw_port_t *)arg; 4879 mutex_enter(&port->mca_lock); 4880 prev_p = curr_p = port->mcap; 4881 } else { 4882 vswp = (vsw_t *)arg; 4883 mutex_enter(&vswp->mca_lock); 4884 prev_p = curr_p = vswp->mcap; 4885 } 4886 4887 while (curr_p != NULL) { 4888 if (curr_p->addr == addr) { 4889 D2(NULL, "%s: address found", __func__); 4890 /* match found */ 4891 if (prev_p == curr_p) { 4892 /* list head */ 4893 if (devtype == VSW_VNETPORT) 4894 port->mcap = curr_p->nextp; 4895 else 4896 vswp->mcap = curr_p->nextp; 4897 } else { 4898 prev_p->nextp = curr_p->nextp; 4899 } 4900 break; 4901 } else { 4902 prev_p = curr_p; 4903 curr_p = curr_p->nextp; 4904 } 4905 } 4906 4907 if (devtype == VSW_VNETPORT) 4908 mutex_exit(&port->mca_lock); 4909 else 4910 mutex_exit(&vswp->mca_lock); 4911 4912 D1(NULL, "%s: exit", __func__); 4913 4914 return (curr_p); 4915 } 4916 4917 /* 4918 * Creates a descriptor ring (dring) and links it into the 4919 * link of outbound drings for this channel. 4920 * 4921 * Returns NULL if creation failed. 4922 */ 4923 static dring_info_t * 4924 vsw_create_dring(vsw_ldc_t *ldcp) 4925 { 4926 vsw_private_desc_t *priv_addr = NULL; 4927 vsw_t *vswp = ldcp->ldc_vswp; 4928 ldc_mem_info_t minfo; 4929 dring_info_t *dp, *tp; 4930 int i; 4931 4932 dp = (dring_info_t *)kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 4933 4934 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 4935 4936 /* create public section of ring */ 4937 if ((ldc_mem_dring_create(vsw_ntxds, 4938 VSW_PUB_SIZE, &dp->handle)) != 0) { 4939 4940 DERR(vswp, "vsw_create_dring(%lld): ldc dring create " 4941 "failed", ldcp->ldc_id); 4942 goto create_fail_exit; 4943 } 4944 4945 ASSERT(dp->handle != NULL); 4946 4947 /* 4948 * Get the base address of the public section of the ring. 4949 */ 4950 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 4951 DERR(vswp, "vsw_create_dring(%lld): dring info failed\n", 4952 ldcp->ldc_id); 4953 goto dring_fail_exit; 4954 } else { 4955 ASSERT(minfo.vaddr != 0); 4956 dp->pub_addr = minfo.vaddr; 4957 } 4958 4959 dp->num_descriptors = vsw_ntxds; 4960 dp->descriptor_size = VSW_PUB_SIZE; 4961 dp->options = VIO_TX_DRING; 4962 dp->ncookies = 1; /* guaranteed by ldc */ 4963 4964 /* 4965 * create private portion of ring 4966 */ 4967 dp->priv_addr = (vsw_private_desc_t *)kmem_zalloc( 4968 (sizeof (vsw_private_desc_t) * vsw_ntxds), KM_SLEEP); 4969 4970 if (vsw_setup_ring(ldcp, dp)) { 4971 DERR(vswp, "%s: unable to setup ring", __func__); 4972 goto dring_fail_exit; 4973 } 4974 4975 /* haven't used any descriptors yet */ 4976 dp->end_idx = 0; 4977 dp->last_ack_recv = -1; 4978 4979 /* bind dring to the channel */ 4980 if ((ldc_mem_dring_bind(ldcp->ldc_handle, dp->handle, 4981 LDC_SHADOW_MAP, LDC_MEM_RW, 4982 &dp->cookie[0], &dp->ncookies)) != 0) { 4983 DERR(vswp, "vsw_create_dring: unable to bind to channel " 4984 "%lld", ldcp->ldc_id); 4985 goto dring_fail_exit; 4986 } 4987 4988 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 4989 dp->restart_reqd = B_TRUE; 4990 4991 /* 4992 * Only ever create rings for outgoing lane. Link it onto 4993 * end of list. 4994 */ 4995 WRITE_ENTER(&ldcp->lane_out.dlistrw); 4996 if (ldcp->lane_out.dringp == NULL) { 4997 D2(vswp, "vsw_create_dring: adding first outbound ring"); 4998 ldcp->lane_out.dringp = dp; 4999 } else { 5000 tp = ldcp->lane_out.dringp; 5001 while (tp->next != NULL) 5002 tp = tp->next; 5003 5004 tp->next = dp; 5005 } 5006 RW_EXIT(&ldcp->lane_out.dlistrw); 5007 5008 return (dp); 5009 5010 dring_fail_exit: 5011 (void) ldc_mem_dring_destroy(dp->handle); 5012 5013 create_fail_exit: 5014 if (dp->priv_addr != NULL) { 5015 priv_addr = dp->priv_addr; 5016 for (i = 0; i < vsw_ntxds; i++) { 5017 if (priv_addr->memhandle != NULL) 5018 (void) ldc_mem_free_handle( 5019 priv_addr->memhandle); 5020 priv_addr++; 5021 } 5022 kmem_free(dp->priv_addr, 5023 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 5024 } 5025 mutex_destroy(&dp->dlock); 5026 5027 kmem_free(dp, sizeof (dring_info_t)); 5028 return (NULL); 5029 } 5030 5031 /* 5032 * Create a ring consisting of just a private portion and link 5033 * it into the list of rings for the outbound lane. 5034 * 5035 * These type of rings are used primarily for temporary data 5036 * storage (i.e. as data buffers). 5037 */ 5038 void 5039 vsw_create_privring(vsw_ldc_t *ldcp) 5040 { 5041 dring_info_t *dp, *tp; 5042 vsw_t *vswp = ldcp->ldc_vswp; 5043 5044 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 5045 5046 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 5047 5048 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 5049 5050 /* no public section */ 5051 dp->pub_addr = NULL; 5052 5053 dp->priv_addr = kmem_zalloc( 5054 (sizeof (vsw_private_desc_t) * vsw_ntxds), KM_SLEEP); 5055 5056 dp->num_descriptors = vsw_ntxds; 5057 5058 if (vsw_setup_ring(ldcp, dp)) { 5059 DERR(vswp, "%s: setup of ring failed", __func__); 5060 kmem_free(dp->priv_addr, 5061 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 5062 mutex_destroy(&dp->dlock); 5063 kmem_free(dp, sizeof (dring_info_t)); 5064 return; 5065 } 5066 5067 /* haven't used any descriptors yet */ 5068 dp->end_idx = 0; 5069 5070 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 5071 dp->restart_reqd = B_TRUE; 5072 5073 /* 5074 * Only ever create rings for outgoing lane. Link it onto 5075 * end of list. 5076 */ 5077 WRITE_ENTER(&ldcp->lane_out.dlistrw); 5078 if (ldcp->lane_out.dringp == NULL) { 5079 D2(vswp, "%s: adding first outbound privring", __func__); 5080 ldcp->lane_out.dringp = dp; 5081 } else { 5082 tp = ldcp->lane_out.dringp; 5083 while (tp->next != NULL) 5084 tp = tp->next; 5085 5086 tp->next = dp; 5087 } 5088 RW_EXIT(&ldcp->lane_out.dlistrw); 5089 5090 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 5091 } 5092 5093 /* 5094 * Setup the descriptors in the dring. Returns 0 on success, 1 on 5095 * failure. 5096 */ 5097 int 5098 vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp) 5099 { 5100 vnet_public_desc_t *pub_addr = NULL; 5101 vsw_private_desc_t *priv_addr = NULL; 5102 vsw_t *vswp = ldcp->ldc_vswp; 5103 uint64_t *tmpp; 5104 uint64_t offset = 0; 5105 uint32_t ncookies = 0; 5106 static char *name = "vsw_setup_ring"; 5107 int i, j, nc, rv; 5108 size_t data_sz; 5109 5110 priv_addr = dp->priv_addr; 5111 pub_addr = dp->pub_addr; 5112 5113 /* public section may be null but private should never be */ 5114 ASSERT(priv_addr != NULL); 5115 5116 /* 5117 * Allocate the region of memory which will be used to hold 5118 * the data the descriptors will refer to. 5119 */ 5120 data_sz = vswp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN; 5121 data_sz = VNET_ROUNDUP_2K(data_sz); 5122 dp->desc_data_sz = data_sz; 5123 dp->data_sz = vsw_ntxds * data_sz; 5124 dp->data_addr = kmem_alloc(dp->data_sz, KM_SLEEP); 5125 5126 D2(vswp, "%s: allocated %lld bytes at 0x%llx\n", name, 5127 dp->data_sz, dp->data_addr); 5128 5129 tmpp = (uint64_t *)dp->data_addr; 5130 offset = dp->desc_data_sz/sizeof (tmpp); 5131 5132 /* 5133 * Initialise some of the private and public (if they exist) 5134 * descriptor fields. 5135 */ 5136 for (i = 0; i < vsw_ntxds; i++) { 5137 mutex_init(&priv_addr->dstate_lock, NULL, MUTEX_DRIVER, NULL); 5138 5139 if ((ldc_mem_alloc_handle(ldcp->ldc_handle, 5140 &priv_addr->memhandle)) != 0) { 5141 DERR(vswp, "%s: alloc mem handle failed", name); 5142 goto setup_ring_cleanup; 5143 } 5144 5145 priv_addr->datap = (void *)tmpp; 5146 5147 rv = ldc_mem_bind_handle(priv_addr->memhandle, 5148 (caddr_t)priv_addr->datap, dp->desc_data_sz, 5149 LDC_SHADOW_MAP, LDC_MEM_R|LDC_MEM_W, 5150 &(priv_addr->memcookie[0]), &ncookies); 5151 if (rv != 0) { 5152 DERR(vswp, "%s(%lld): ldc_mem_bind_handle failed " 5153 "(rv %d)", name, ldcp->ldc_id, rv); 5154 goto setup_ring_cleanup; 5155 } 5156 priv_addr->bound = 1; 5157 5158 D2(vswp, "%s: %d: memcookie 0 : addr 0x%llx : size 0x%llx", 5159 name, i, priv_addr->memcookie[0].addr, 5160 priv_addr->memcookie[0].size); 5161 5162 if (ncookies >= (uint32_t)(VSW_MAX_COOKIES + 1)) { 5163 DERR(vswp, "%s(%lld) ldc_mem_bind_handle returned " 5164 "invalid num of cookies (%d) for size 0x%llx", 5165 name, ldcp->ldc_id, ncookies, VSW_RING_EL_DATA_SZ); 5166 5167 goto setup_ring_cleanup; 5168 } else { 5169 for (j = 1; j < ncookies; j++) { 5170 rv = ldc_mem_nextcookie(priv_addr->memhandle, 5171 &(priv_addr->memcookie[j])); 5172 if (rv != 0) { 5173 DERR(vswp, "%s: ldc_mem_nextcookie " 5174 "failed rv (%d)", name, rv); 5175 goto setup_ring_cleanup; 5176 } 5177 D3(vswp, "%s: memcookie %d : addr 0x%llx : " 5178 "size 0x%llx", name, j, 5179 priv_addr->memcookie[j].addr, 5180 priv_addr->memcookie[j].size); 5181 } 5182 5183 } 5184 priv_addr->ncookies = ncookies; 5185 priv_addr->dstate = VIO_DESC_FREE; 5186 5187 if (pub_addr != NULL) { 5188 5189 /* link pub and private sides */ 5190 priv_addr->descp = pub_addr; 5191 5192 pub_addr->ncookies = priv_addr->ncookies; 5193 5194 for (nc = 0; nc < pub_addr->ncookies; nc++) { 5195 bcopy(&priv_addr->memcookie[nc], 5196 &pub_addr->memcookie[nc], 5197 sizeof (ldc_mem_cookie_t)); 5198 } 5199 5200 pub_addr->hdr.dstate = VIO_DESC_FREE; 5201 pub_addr++; 5202 } 5203 5204 /* 5205 * move to next element in the dring and the next 5206 * position in the data buffer. 5207 */ 5208 priv_addr++; 5209 tmpp += offset; 5210 } 5211 5212 return (0); 5213 5214 setup_ring_cleanup: 5215 priv_addr = dp->priv_addr; 5216 5217 for (j = 0; j < i; j++) { 5218 (void) ldc_mem_unbind_handle(priv_addr->memhandle); 5219 (void) ldc_mem_free_handle(priv_addr->memhandle); 5220 5221 mutex_destroy(&priv_addr->dstate_lock); 5222 5223 priv_addr++; 5224 } 5225 kmem_free(dp->data_addr, dp->data_sz); 5226 5227 return (1); 5228 } 5229 5230 /* 5231 * Searches the private section of a ring for a free descriptor, 5232 * starting at the location of the last free descriptor found 5233 * previously. 5234 * 5235 * Returns 0 if free descriptor is available, and updates state 5236 * of private descriptor to VIO_DESC_READY, otherwise returns 1. 5237 * 5238 * FUTURE: might need to return contiguous range of descriptors 5239 * as dring info msg assumes all will be contiguous. 5240 */ 5241 static int 5242 vsw_dring_find_free_desc(dring_info_t *dringp, 5243 vsw_private_desc_t **priv_p, int *idx) 5244 { 5245 vsw_private_desc_t *addr = NULL; 5246 int num = vsw_ntxds; 5247 int ret = 1; 5248 5249 D1(NULL, "%s enter\n", __func__); 5250 5251 ASSERT(dringp->priv_addr != NULL); 5252 5253 D2(NULL, "%s: searching ring, dringp 0x%llx : start pos %lld", 5254 __func__, dringp, dringp->end_idx); 5255 5256 addr = (vsw_private_desc_t *)dringp->priv_addr + dringp->end_idx; 5257 5258 mutex_enter(&addr->dstate_lock); 5259 if (addr->dstate == VIO_DESC_FREE) { 5260 addr->dstate = VIO_DESC_READY; 5261 *priv_p = addr; 5262 *idx = dringp->end_idx; 5263 dringp->end_idx = (dringp->end_idx + 1) % num; 5264 ret = 0; 5265 5266 } 5267 mutex_exit(&addr->dstate_lock); 5268 5269 /* ring full */ 5270 if (ret == 1) { 5271 D2(NULL, "%s: no desp free: started at %d", __func__, 5272 dringp->end_idx); 5273 } 5274 5275 D1(NULL, "%s: exit\n", __func__); 5276 5277 return (ret); 5278 } 5279 5280 /* 5281 * Map from a dring identifier to the ring itself. Returns 5282 * pointer to ring or NULL if no match found. 5283 * 5284 * Should be called with dlistrw rwlock held as reader. 5285 */ 5286 static dring_info_t * 5287 vsw_ident2dring(lane_t *lane, uint64_t ident) 5288 { 5289 dring_info_t *dp = NULL; 5290 5291 if ((dp = lane->dringp) == NULL) { 5292 return (NULL); 5293 } else { 5294 if (dp->ident == ident) 5295 return (dp); 5296 5297 while (dp != NULL) { 5298 if (dp->ident == ident) 5299 break; 5300 dp = dp->next; 5301 } 5302 } 5303 5304 return (dp); 5305 } 5306 5307 /* 5308 * Set the default lane attributes. These are copied into 5309 * the attr msg we send to our peer. If they are not acceptable 5310 * then (currently) the handshake ends. 5311 */ 5312 static void 5313 vsw_set_lane_attr(vsw_t *vswp, lane_t *lp) 5314 { 5315 bzero(lp, sizeof (lane_t)); 5316 5317 READ_ENTER(&vswp->if_lockrw); 5318 ether_copy(&(vswp->if_addr), &(lp->addr)); 5319 RW_EXIT(&vswp->if_lockrw); 5320 5321 lp->mtu = vswp->max_frame_size; 5322 lp->addr_type = ADDR_TYPE_MAC; 5323 lp->xfer_mode = VIO_DRING_MODE_V1_0; 5324 lp->ack_freq = 0; /* for shared mode */ 5325 lp->seq_num = VNET_ISS; 5326 } 5327 5328 /* 5329 * Verify that the attributes are acceptable. 5330 * 5331 * FUTURE: If some attributes are not acceptable, change them 5332 * our desired values. 5333 */ 5334 static int 5335 vsw_check_attr(vnet_attr_msg_t *pkt, vsw_ldc_t *ldcp) 5336 { 5337 int ret = 0; 5338 struct ether_addr ea; 5339 vsw_port_t *port = ldcp->ldc_port; 5340 lane_t *lp = &ldcp->lane_out; 5341 5342 D1(NULL, "vsw_check_attr enter\n"); 5343 5344 if ((pkt->xfer_mode != VIO_DESC_MODE) && 5345 (pkt->xfer_mode != lp->xfer_mode)) { 5346 D2(NULL, "vsw_check_attr: unknown mode %x\n", pkt->xfer_mode); 5347 ret = 1; 5348 } 5349 5350 /* Only support MAC addresses at moment. */ 5351 if ((pkt->addr_type != ADDR_TYPE_MAC) || (pkt->addr == 0)) { 5352 D2(NULL, "vsw_check_attr: invalid addr_type %x, " 5353 "or address 0x%llx\n", pkt->addr_type, pkt->addr); 5354 ret = 1; 5355 } 5356 5357 /* 5358 * MAC address supplied by device should match that stored 5359 * in the vsw-port OBP node. Need to decide what to do if they 5360 * don't match, for the moment just warn but don't fail. 5361 */ 5362 vnet_macaddr_ultostr(pkt->addr, ea.ether_addr_octet); 5363 if (ether_cmp(&ea, &port->p_macaddr) != 0) { 5364 DERR(NULL, "vsw_check_attr: device supplied address " 5365 "0x%llx doesn't match node address 0x%llx\n", 5366 pkt->addr, port->p_macaddr); 5367 } 5368 5369 /* 5370 * Ack freq only makes sense in pkt mode, in shared 5371 * mode the ring descriptors say whether or not to 5372 * send back an ACK. 5373 */ 5374 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 5375 (pkt->xfer_mode & VIO_DRING_MODE_V1_2)) || 5376 (VSW_VER_LT(ldcp, 1, 2) && 5377 (pkt->xfer_mode == VIO_DRING_MODE_V1_0))) { 5378 if (pkt->ack_freq > 0) { 5379 D2(NULL, "vsw_check_attr: non zero ack freq " 5380 " in SHM mode\n"); 5381 ret = 1; 5382 } 5383 } 5384 5385 /* 5386 * Note: for the moment we only support ETHER 5387 * frames. This may change in the future. 5388 */ 5389 if ((pkt->mtu > lp->mtu) || (pkt->mtu <= 0)) { 5390 D2(NULL, "vsw_check_attr: invalid MTU (0x%llx)\n", 5391 pkt->mtu); 5392 ret = 1; 5393 } 5394 5395 D1(NULL, "vsw_check_attr exit\n"); 5396 5397 return (ret); 5398 } 5399 5400 /* 5401 * Returns 1 if there is a problem, 0 otherwise. 5402 */ 5403 static int 5404 vsw_check_dring_info(vio_dring_reg_msg_t *pkt) 5405 { 5406 _NOTE(ARGUNUSED(pkt)) 5407 5408 int ret = 0; 5409 5410 D1(NULL, "vsw_check_dring_info enter\n"); 5411 5412 if ((pkt->num_descriptors == 0) || 5413 (pkt->descriptor_size == 0) || 5414 (pkt->ncookies != 1)) { 5415 DERR(NULL, "vsw_check_dring_info: invalid dring msg"); 5416 ret = 1; 5417 } 5418 5419 D1(NULL, "vsw_check_dring_info exit\n"); 5420 5421 return (ret); 5422 } 5423 5424 /* 5425 * Returns 1 if two memory cookies match. Otherwise returns 0. 5426 */ 5427 static int 5428 vsw_mem_cookie_match(ldc_mem_cookie_t *m1, ldc_mem_cookie_t *m2) 5429 { 5430 if ((m1->addr != m2->addr) || 5431 (m2->size != m2->size)) { 5432 return (0); 5433 } else { 5434 return (1); 5435 } 5436 } 5437 5438 /* 5439 * Returns 1 if ring described in reg message matches that 5440 * described by dring_info structure. Otherwise returns 0. 5441 */ 5442 static int 5443 vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg) 5444 { 5445 if ((msg->descriptor_size != dp->descriptor_size) || 5446 (msg->num_descriptors != dp->num_descriptors) || 5447 (msg->ncookies != dp->ncookies) || 5448 !(vsw_mem_cookie_match(&msg->cookie[0], &dp->cookie[0]))) { 5449 return (0); 5450 } else { 5451 return (1); 5452 } 5453 5454 } 5455 5456 static caddr_t 5457 vsw_print_ethaddr(uint8_t *a, char *ebuf) 5458 { 5459 (void) sprintf(ebuf, "%x:%x:%x:%x:%x:%x", 5460 a[0], a[1], a[2], a[3], a[4], a[5]); 5461 return (ebuf); 5462 } 5463 5464 /* 5465 * Reset and free all the resources associated with 5466 * the channel. 5467 */ 5468 static void 5469 vsw_free_lane_resources(vsw_ldc_t *ldcp, uint64_t dir) 5470 { 5471 dring_info_t *dp, *dpp; 5472 lane_t *lp = NULL; 5473 int rv = 0; 5474 5475 ASSERT(ldcp != NULL); 5476 5477 D1(ldcp->ldc_vswp, "%s (%lld): enter", __func__, ldcp->ldc_id); 5478 5479 if (dir == INBOUND) { 5480 D2(ldcp->ldc_vswp, "%s: freeing INBOUND lane" 5481 " of channel %lld", __func__, ldcp->ldc_id); 5482 lp = &ldcp->lane_in; 5483 } else { 5484 D2(ldcp->ldc_vswp, "%s: freeing OUTBOUND lane" 5485 " of channel %lld", __func__, ldcp->ldc_id); 5486 lp = &ldcp->lane_out; 5487 } 5488 5489 lp->lstate = VSW_LANE_INACTIV; 5490 lp->seq_num = VNET_ISS; 5491 5492 if (lp->dringp) { 5493 if (dir == INBOUND) { 5494 WRITE_ENTER(&lp->dlistrw); 5495 dp = lp->dringp; 5496 while (dp != NULL) { 5497 dpp = dp->next; 5498 if (dp->handle != NULL) 5499 (void) ldc_mem_dring_unmap(dp->handle); 5500 kmem_free(dp, sizeof (dring_info_t)); 5501 dp = dpp; 5502 } 5503 RW_EXIT(&lp->dlistrw); 5504 } else { 5505 /* 5506 * unbind, destroy exported dring, free dring struct 5507 */ 5508 WRITE_ENTER(&lp->dlistrw); 5509 dp = lp->dringp; 5510 rv = vsw_free_ring(dp); 5511 RW_EXIT(&lp->dlistrw); 5512 } 5513 if (rv == 0) { 5514 lp->dringp = NULL; 5515 } 5516 } 5517 5518 D1(ldcp->ldc_vswp, "%s (%lld): exit", __func__, ldcp->ldc_id); 5519 } 5520 5521 /* 5522 * Free ring and all associated resources. 5523 * 5524 * Should be called with dlistrw rwlock held as writer. 5525 */ 5526 static int 5527 vsw_free_ring(dring_info_t *dp) 5528 { 5529 vsw_private_desc_t *paddr = NULL; 5530 dring_info_t *dpp; 5531 int i, rv = 1; 5532 5533 while (dp != NULL) { 5534 mutex_enter(&dp->dlock); 5535 dpp = dp->next; 5536 if (dp->priv_addr != NULL) { 5537 /* 5538 * First unbind and free the memory handles 5539 * stored in each descriptor within the ring. 5540 */ 5541 for (i = 0; i < vsw_ntxds; i++) { 5542 paddr = (vsw_private_desc_t *) 5543 dp->priv_addr + i; 5544 if (paddr->memhandle != NULL) { 5545 if (paddr->bound == 1) { 5546 rv = ldc_mem_unbind_handle( 5547 paddr->memhandle); 5548 5549 if (rv != 0) { 5550 DERR(NULL, "error " 5551 "unbinding handle for " 5552 "ring 0x%llx at pos %d", 5553 dp, i); 5554 mutex_exit(&dp->dlock); 5555 return (rv); 5556 } 5557 paddr->bound = 0; 5558 } 5559 5560 rv = ldc_mem_free_handle( 5561 paddr->memhandle); 5562 if (rv != 0) { 5563 DERR(NULL, "error freeing " 5564 "handle for ring 0x%llx " 5565 "at pos %d", dp, i); 5566 mutex_exit(&dp->dlock); 5567 return (rv); 5568 } 5569 paddr->memhandle = NULL; 5570 } 5571 mutex_destroy(&paddr->dstate_lock); 5572 } 5573 kmem_free(dp->priv_addr, 5574 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 5575 } 5576 5577 /* 5578 * Now unbind and destroy the ring itself. 5579 */ 5580 if (dp->handle != NULL) { 5581 (void) ldc_mem_dring_unbind(dp->handle); 5582 (void) ldc_mem_dring_destroy(dp->handle); 5583 } 5584 5585 if (dp->data_addr != NULL) { 5586 kmem_free(dp->data_addr, dp->data_sz); 5587 } 5588 5589 mutex_exit(&dp->dlock); 5590 mutex_destroy(&dp->dlock); 5591 mutex_destroy(&dp->restart_lock); 5592 kmem_free(dp, sizeof (dring_info_t)); 5593 5594 dp = dpp; 5595 } 5596 return (0); 5597 } 5598 5599 /* 5600 * vsw_ldc_rx_worker -- A per LDC worker thread to receive data. 5601 * This thread is woken up by the LDC interrupt handler to process 5602 * LDC packets and receive data. 5603 */ 5604 static void 5605 vsw_ldc_rx_worker(void *arg) 5606 { 5607 callb_cpr_t cprinfo; 5608 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 5609 vsw_t *vswp = ldcp->ldc_vswp; 5610 5611 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5612 CALLB_CPR_INIT(&cprinfo, &ldcp->rx_thr_lock, callb_generic_cpr, 5613 "vsw_rx_thread"); 5614 mutex_enter(&ldcp->rx_thr_lock); 5615 ldcp->rx_thr_flags |= VSW_WTHR_RUNNING; 5616 while (!(ldcp->rx_thr_flags & VSW_WTHR_STOP)) { 5617 5618 CALLB_CPR_SAFE_BEGIN(&cprinfo); 5619 /* 5620 * Wait until the data is received or a stop 5621 * request is received. 5622 */ 5623 while (!(ldcp->rx_thr_flags & 5624 (VSW_WTHR_DATARCVD | VSW_WTHR_STOP))) { 5625 cv_wait(&ldcp->rx_thr_cv, &ldcp->rx_thr_lock); 5626 } 5627 CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rx_thr_lock) 5628 5629 /* 5630 * First process the stop request. 5631 */ 5632 if (ldcp->rx_thr_flags & VSW_WTHR_STOP) { 5633 D2(vswp, "%s(%lld):Rx thread stopped\n", 5634 __func__, ldcp->ldc_id); 5635 break; 5636 } 5637 ldcp->rx_thr_flags &= ~VSW_WTHR_DATARCVD; 5638 mutex_exit(&ldcp->rx_thr_lock); 5639 D1(vswp, "%s(%lld):calling vsw_process_pkt\n", 5640 __func__, ldcp->ldc_id); 5641 mutex_enter(&ldcp->ldc_cblock); 5642 vsw_process_pkt(ldcp); 5643 mutex_exit(&ldcp->ldc_cblock); 5644 mutex_enter(&ldcp->rx_thr_lock); 5645 } 5646 5647 /* 5648 * Update the run status and wakeup the thread that 5649 * has sent the stop request. 5650 */ 5651 ldcp->rx_thr_flags &= ~VSW_WTHR_RUNNING; 5652 cv_signal(&ldcp->rx_thr_cv); 5653 CALLB_CPR_EXIT(&cprinfo); 5654 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5655 thread_exit(); 5656 } 5657 5658 /* vsw_stop_rx_thread -- Co-ordinate with receive thread to stop it */ 5659 static void 5660 vsw_stop_rx_thread(vsw_ldc_t *ldcp) 5661 { 5662 vsw_t *vswp = ldcp->ldc_vswp; 5663 5664 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5665 /* 5666 * Send a stop request by setting the stop flag and 5667 * wait until the receive thread stops. 5668 */ 5669 mutex_enter(&ldcp->rx_thr_lock); 5670 if (ldcp->rx_thr_flags & VSW_WTHR_RUNNING) { 5671 ldcp->rx_thr_flags |= VSW_WTHR_STOP; 5672 cv_signal(&ldcp->rx_thr_cv); 5673 while (ldcp->rx_thr_flags & VSW_WTHR_RUNNING) { 5674 cv_wait(&ldcp->rx_thr_cv, &ldcp->rx_thr_lock); 5675 } 5676 } 5677 mutex_exit(&ldcp->rx_thr_lock); 5678 ldcp->rx_thread = NULL; 5679 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5680 } 5681 5682 /* 5683 * vsw_ldc_tx_worker -- A per LDC worker thread to transmit data. 5684 * This thread is woken up by the vsw_portsend to transmit 5685 * packets. 5686 */ 5687 static void 5688 vsw_ldc_tx_worker(void *arg) 5689 { 5690 callb_cpr_t cprinfo; 5691 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 5692 vsw_t *vswp = ldcp->ldc_vswp; 5693 mblk_t *mp; 5694 mblk_t *tmp; 5695 5696 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5697 CALLB_CPR_INIT(&cprinfo, &ldcp->tx_thr_lock, callb_generic_cpr, 5698 "vnet_tx_thread"); 5699 mutex_enter(&ldcp->tx_thr_lock); 5700 ldcp->tx_thr_flags |= VSW_WTHR_RUNNING; 5701 while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP)) { 5702 5703 CALLB_CPR_SAFE_BEGIN(&cprinfo); 5704 /* 5705 * Wait until the data is received or a stop 5706 * request is received. 5707 */ 5708 while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP) && 5709 (ldcp->tx_mhead == NULL)) { 5710 cv_wait(&ldcp->tx_thr_cv, &ldcp->tx_thr_lock); 5711 } 5712 CALLB_CPR_SAFE_END(&cprinfo, &ldcp->tx_thr_lock) 5713 5714 /* 5715 * First process the stop request. 5716 */ 5717 if (ldcp->tx_thr_flags & VSW_WTHR_STOP) { 5718 D2(vswp, "%s(%lld):tx thread stopped\n", 5719 __func__, ldcp->ldc_id); 5720 break; 5721 } 5722 mp = ldcp->tx_mhead; 5723 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 5724 ldcp->tx_cnt = 0; 5725 mutex_exit(&ldcp->tx_thr_lock); 5726 D2(vswp, "%s(%lld):calling vsw_ldcsend\n", 5727 __func__, ldcp->ldc_id); 5728 while (mp != NULL) { 5729 tmp = mp->b_next; 5730 mp->b_next = mp->b_prev = NULL; 5731 (void) vsw_ldcsend(ldcp, mp, vsw_ldc_tx_retries); 5732 mp = tmp; 5733 } 5734 mutex_enter(&ldcp->tx_thr_lock); 5735 } 5736 5737 /* 5738 * Update the run status and wakeup the thread that 5739 * has sent the stop request. 5740 */ 5741 ldcp->tx_thr_flags &= ~VSW_WTHR_RUNNING; 5742 cv_signal(&ldcp->tx_thr_cv); 5743 CALLB_CPR_EXIT(&cprinfo); 5744 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5745 thread_exit(); 5746 } 5747 5748 /* vsw_stop_tx_thread -- Co-ordinate with receive thread to stop it */ 5749 static void 5750 vsw_stop_tx_thread(vsw_ldc_t *ldcp) 5751 { 5752 vsw_t *vswp = ldcp->ldc_vswp; 5753 5754 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5755 /* 5756 * Send a stop request by setting the stop flag and 5757 * wait until the receive thread stops. 5758 */ 5759 mutex_enter(&ldcp->tx_thr_lock); 5760 if (ldcp->tx_thr_flags & VSW_WTHR_RUNNING) { 5761 ldcp->tx_thr_flags |= VSW_WTHR_STOP; 5762 cv_signal(&ldcp->tx_thr_cv); 5763 while (ldcp->tx_thr_flags & VSW_WTHR_RUNNING) { 5764 cv_wait(&ldcp->tx_thr_cv, &ldcp->tx_thr_lock); 5765 } 5766 } 5767 mutex_exit(&ldcp->tx_thr_lock); 5768 ldcp->tx_thread = NULL; 5769 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5770 } 5771 5772 /* vsw_reclaim_dring -- reclaim descriptors */ 5773 static int 5774 vsw_reclaim_dring(dring_info_t *dp, int start) 5775 { 5776 int i, j, len; 5777 vsw_private_desc_t *priv_addr; 5778 vnet_public_desc_t *pub_addr; 5779 5780 pub_addr = (vnet_public_desc_t *)dp->pub_addr; 5781 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 5782 len = dp->num_descriptors; 5783 5784 D2(NULL, "%s: start index %ld\n", __func__, start); 5785 5786 j = 0; 5787 for (i = start; j < len; i = (i + 1) % len, j++) { 5788 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 5789 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 5790 5791 mutex_enter(&priv_addr->dstate_lock); 5792 if (pub_addr->hdr.dstate != VIO_DESC_DONE) { 5793 mutex_exit(&priv_addr->dstate_lock); 5794 break; 5795 } 5796 pub_addr->hdr.dstate = VIO_DESC_FREE; 5797 priv_addr->dstate = VIO_DESC_FREE; 5798 /* clear all the fields */ 5799 priv_addr->datalen = 0; 5800 pub_addr->hdr.ack = 0; 5801 mutex_exit(&priv_addr->dstate_lock); 5802 5803 D3(NULL, "claiming descp:%d pub state:0x%llx priv state 0x%llx", 5804 i, pub_addr->hdr.dstate, priv_addr->dstate); 5805 } 5806 return (j); 5807 } 5808 5809 /* 5810 * Debugging routines 5811 */ 5812 static void 5813 display_state(void) 5814 { 5815 vsw_t *vswp; 5816 vsw_port_list_t *plist; 5817 vsw_port_t *port; 5818 vsw_ldc_list_t *ldcl; 5819 vsw_ldc_t *ldcp; 5820 extern vsw_t *vsw_head; 5821 5822 cmn_err(CE_NOTE, "***** system state *****"); 5823 5824 for (vswp = vsw_head; vswp; vswp = vswp->next) { 5825 plist = &vswp->plist; 5826 READ_ENTER(&plist->lockrw); 5827 cmn_err(CE_CONT, "vsw instance %d has %d ports attached\n", 5828 vswp->instance, plist->num_ports); 5829 5830 for (port = plist->head; port != NULL; port = port->p_next) { 5831 ldcl = &port->p_ldclist; 5832 cmn_err(CE_CONT, "port %d : %d ldcs attached\n", 5833 port->p_instance, port->num_ldcs); 5834 READ_ENTER(&ldcl->lockrw); 5835 ldcp = ldcl->head; 5836 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 5837 cmn_err(CE_CONT, "chan %lu : dev %d : " 5838 "status %d : phase %u\n", 5839 ldcp->ldc_id, ldcp->dev_class, 5840 ldcp->ldc_status, ldcp->hphase); 5841 cmn_err(CE_CONT, "chan %lu : lsession %lu : " 5842 "psession %lu\n", ldcp->ldc_id, 5843 ldcp->local_session, ldcp->peer_session); 5844 5845 cmn_err(CE_CONT, "Inbound lane:\n"); 5846 display_lane(&ldcp->lane_in); 5847 cmn_err(CE_CONT, "Outbound lane:\n"); 5848 display_lane(&ldcp->lane_out); 5849 } 5850 RW_EXIT(&ldcl->lockrw); 5851 } 5852 RW_EXIT(&plist->lockrw); 5853 } 5854 cmn_err(CE_NOTE, "***** system state *****"); 5855 } 5856 5857 static void 5858 display_lane(lane_t *lp) 5859 { 5860 dring_info_t *drp; 5861 5862 cmn_err(CE_CONT, "ver 0x%x:0x%x : state %lx : mtu 0x%lx\n", 5863 lp->ver_major, lp->ver_minor, lp->lstate, lp->mtu); 5864 cmn_err(CE_CONT, "addr_type %d : addr 0x%lx : xmode %d\n", 5865 lp->addr_type, lp->addr, lp->xfer_mode); 5866 cmn_err(CE_CONT, "dringp 0x%lx\n", (uint64_t)lp->dringp); 5867 5868 cmn_err(CE_CONT, "Dring info:\n"); 5869 for (drp = lp->dringp; drp != NULL; drp = drp->next) { 5870 cmn_err(CE_CONT, "\tnum_desc %u : dsize %u\n", 5871 drp->num_descriptors, drp->descriptor_size); 5872 cmn_err(CE_CONT, "\thandle 0x%lx\n", drp->handle); 5873 cmn_err(CE_CONT, "\tpub_addr 0x%lx : priv_addr 0x%lx\n", 5874 (uint64_t)drp->pub_addr, (uint64_t)drp->priv_addr); 5875 cmn_err(CE_CONT, "\tident 0x%lx : end_idx %lu\n", 5876 drp->ident, drp->end_idx); 5877 display_ring(drp); 5878 } 5879 } 5880 5881 static void 5882 display_ring(dring_info_t *dringp) 5883 { 5884 uint64_t i; 5885 uint64_t priv_count = 0; 5886 uint64_t pub_count = 0; 5887 vnet_public_desc_t *pub_addr = NULL; 5888 vsw_private_desc_t *priv_addr = NULL; 5889 5890 for (i = 0; i < vsw_ntxds; i++) { 5891 if (dringp->pub_addr != NULL) { 5892 pub_addr = (vnet_public_desc_t *)dringp->pub_addr + i; 5893 5894 if (pub_addr->hdr.dstate == VIO_DESC_FREE) 5895 pub_count++; 5896 } 5897 5898 if (dringp->priv_addr != NULL) { 5899 priv_addr = (vsw_private_desc_t *)dringp->priv_addr + i; 5900 5901 if (priv_addr->dstate == VIO_DESC_FREE) 5902 priv_count++; 5903 } 5904 } 5905 cmn_err(CE_CONT, "\t%lu elements: %lu priv free: %lu pub free\n", 5906 i, priv_count, pub_count); 5907 } 5908 5909 static void 5910 dump_flags(uint64_t state) 5911 { 5912 int i; 5913 5914 typedef struct flag_name { 5915 int flag_val; 5916 char *flag_name; 5917 } flag_name_t; 5918 5919 flag_name_t flags[] = { 5920 VSW_VER_INFO_SENT, "VSW_VER_INFO_SENT", 5921 VSW_VER_INFO_RECV, "VSW_VER_INFO_RECV", 5922 VSW_VER_ACK_RECV, "VSW_VER_ACK_RECV", 5923 VSW_VER_ACK_SENT, "VSW_VER_ACK_SENT", 5924 VSW_VER_NACK_RECV, "VSW_VER_NACK_RECV", 5925 VSW_VER_NACK_SENT, "VSW_VER_NACK_SENT", 5926 VSW_ATTR_INFO_SENT, "VSW_ATTR_INFO_SENT", 5927 VSW_ATTR_INFO_RECV, "VSW_ATTR_INFO_RECV", 5928 VSW_ATTR_ACK_SENT, "VSW_ATTR_ACK_SENT", 5929 VSW_ATTR_ACK_RECV, "VSW_ATTR_ACK_RECV", 5930 VSW_ATTR_NACK_SENT, "VSW_ATTR_NACK_SENT", 5931 VSW_ATTR_NACK_RECV, "VSW_ATTR_NACK_RECV", 5932 VSW_DRING_INFO_SENT, "VSW_DRING_INFO_SENT", 5933 VSW_DRING_INFO_RECV, "VSW_DRING_INFO_RECV", 5934 VSW_DRING_ACK_SENT, "VSW_DRING_ACK_SENT", 5935 VSW_DRING_ACK_RECV, "VSW_DRING_ACK_RECV", 5936 VSW_DRING_NACK_SENT, "VSW_DRING_NACK_SENT", 5937 VSW_DRING_NACK_RECV, "VSW_DRING_NACK_RECV", 5938 VSW_RDX_INFO_SENT, "VSW_RDX_INFO_SENT", 5939 VSW_RDX_INFO_RECV, "VSW_RDX_INFO_RECV", 5940 VSW_RDX_ACK_SENT, "VSW_RDX_ACK_SENT", 5941 VSW_RDX_ACK_RECV, "VSW_RDX_ACK_RECV", 5942 VSW_RDX_NACK_SENT, "VSW_RDX_NACK_SENT", 5943 VSW_RDX_NACK_RECV, "VSW_RDX_NACK_RECV", 5944 VSW_MCST_INFO_SENT, "VSW_MCST_INFO_SENT", 5945 VSW_MCST_INFO_RECV, "VSW_MCST_INFO_RECV", 5946 VSW_MCST_ACK_SENT, "VSW_MCST_ACK_SENT", 5947 VSW_MCST_ACK_RECV, "VSW_MCST_ACK_RECV", 5948 VSW_MCST_NACK_SENT, "VSW_MCST_NACK_SENT", 5949 VSW_MCST_NACK_RECV, "VSW_MCST_NACK_RECV", 5950 VSW_LANE_ACTIVE, "VSW_LANE_ACTIVE"}; 5951 5952 DERR(NULL, "DUMP_FLAGS: %llx\n", state); 5953 for (i = 0; i < sizeof (flags)/sizeof (flag_name_t); i++) { 5954 if (state & flags[i].flag_val) 5955 DERR(NULL, "DUMP_FLAGS %s", flags[i].flag_name); 5956 } 5957 } 5958