1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/errno.h> 31 #include <sys/debug.h> 32 #include <sys/time.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/user.h> 36 #include <sys/stropts.h> 37 #include <sys/stream.h> 38 #include <sys/strlog.h> 39 #include <sys/strsubr.h> 40 #include <sys/cmn_err.h> 41 #include <sys/cpu.h> 42 #include <sys/kmem.h> 43 #include <sys/conf.h> 44 #include <sys/ddi.h> 45 #include <sys/sunddi.h> 46 #include <sys/ksynch.h> 47 #include <sys/stat.h> 48 #include <sys/kstat.h> 49 #include <sys/vtrace.h> 50 #include <sys/strsun.h> 51 #include <sys/dlpi.h> 52 #include <sys/ethernet.h> 53 #include <net/if.h> 54 #include <sys/varargs.h> 55 #include <sys/machsystm.h> 56 #include <sys/modctl.h> 57 #include <sys/modhash.h> 58 #include <sys/mac.h> 59 #include <sys/mac_ether.h> 60 #include <sys/taskq.h> 61 #include <sys/note.h> 62 #include <sys/mach_descrip.h> 63 #include <sys/mac.h> 64 #include <sys/mdeg.h> 65 #include <sys/ldc.h> 66 #include <sys/vsw_fdb.h> 67 #include <sys/vsw.h> 68 #include <sys/vio_mailbox.h> 69 #include <sys/vnet_mailbox.h> 70 #include <sys/vnet_common.h> 71 #include <sys/vio_util.h> 72 #include <sys/sdt.h> 73 #include <sys/atomic.h> 74 #include <sys/callb.h> 75 #include <sys/vlan.h> 76 77 /* Port add/deletion/etc routines */ 78 static int vsw_port_delete(vsw_port_t *port); 79 static int vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id); 80 static int vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id); 81 static int vsw_init_ldcs(vsw_port_t *port); 82 static int vsw_uninit_ldcs(vsw_port_t *port); 83 static int vsw_ldc_init(vsw_ldc_t *ldcp); 84 static int vsw_ldc_uninit(vsw_ldc_t *ldcp); 85 static int vsw_drain_ldcs(vsw_port_t *port); 86 static int vsw_drain_port_taskq(vsw_port_t *port); 87 static void vsw_marker_task(void *); 88 static int vsw_plist_del_node(vsw_t *, vsw_port_t *port); 89 int vsw_detach_ports(vsw_t *vswp); 90 int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 91 mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr); 92 int vsw_port_detach(vsw_t *vswp, int p_instance); 93 int vsw_portsend(vsw_port_t *port, mblk_t *mp, mblk_t *mpt, uint32_t count); 94 int vsw_port_attach(vsw_port_t *portp); 95 vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 96 void vsw_vlan_unaware_port_reset(vsw_port_t *portp); 97 int vsw_send_msg(vsw_ldc_t *, void *, int, boolean_t); 98 void vsw_hio_port_reset(vsw_port_t *portp, boolean_t immediate); 99 100 /* Interrupt routines */ 101 static uint_t vsw_ldc_cb(uint64_t cb, caddr_t arg); 102 103 /* Handshake routines */ 104 static void vsw_ldc_reinit(vsw_ldc_t *); 105 static void vsw_process_conn_evt(vsw_ldc_t *, uint16_t); 106 static void vsw_conn_task(void *); 107 static int vsw_check_flag(vsw_ldc_t *, int, uint64_t); 108 static void vsw_next_milestone(vsw_ldc_t *); 109 static int vsw_supported_version(vio_ver_msg_t *); 110 static void vsw_set_vnet_proto_ops(vsw_ldc_t *ldcp); 111 static void vsw_reset_vnet_proto_ops(vsw_ldc_t *ldcp); 112 113 /* Data processing routines */ 114 static void vsw_process_pkt(void *); 115 static void vsw_dispatch_ctrl_task(vsw_ldc_t *, void *, vio_msg_tag_t *); 116 static void vsw_process_ctrl_pkt(void *); 117 static void vsw_process_ctrl_ver_pkt(vsw_ldc_t *, void *); 118 static void vsw_process_ctrl_attr_pkt(vsw_ldc_t *, void *); 119 static void vsw_process_ctrl_mcst_pkt(vsw_ldc_t *, void *); 120 static void vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *, void *); 121 static void vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *, void *); 122 static void vsw_process_ctrl_rdx_pkt(vsw_ldc_t *, void *); 123 static void vsw_process_data_pkt(vsw_ldc_t *, void *, vio_msg_tag_t *, 124 uint32_t); 125 static void vsw_process_data_dring_pkt(vsw_ldc_t *, void *); 126 static void vsw_process_pkt_data_nop(void *, void *, uint32_t); 127 static void vsw_process_pkt_data(void *, void *, uint32_t); 128 static void vsw_process_data_ibnd_pkt(vsw_ldc_t *, void *); 129 static void vsw_process_err_pkt(vsw_ldc_t *, void *, vio_msg_tag_t *); 130 131 /* Switching/data transmit routines */ 132 static int vsw_dringsend(vsw_ldc_t *, mblk_t *); 133 static int vsw_descrsend(vsw_ldc_t *, mblk_t *); 134 static void vsw_ldcsend_pkt(vsw_ldc_t *ldcp, mblk_t *mp); 135 static int vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, uint32_t retries); 136 static int vsw_ldctx_pri(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count); 137 static int vsw_ldctx(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count); 138 139 /* Packet creation routines */ 140 static void vsw_send_ver(void *); 141 static void vsw_send_attr(vsw_ldc_t *); 142 static vio_dring_reg_msg_t *vsw_create_dring_info_pkt(vsw_ldc_t *); 143 static void vsw_send_dring_info(vsw_ldc_t *); 144 static void vsw_send_rdx(vsw_ldc_t *); 145 146 /* Dring routines */ 147 static dring_info_t *vsw_create_dring(vsw_ldc_t *); 148 static void vsw_create_privring(vsw_ldc_t *); 149 static int vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp); 150 static int vsw_dring_find_free_desc(dring_info_t *, vsw_private_desc_t **, 151 int *); 152 static dring_info_t *vsw_ident2dring(lane_t *, uint64_t); 153 static int vsw_reclaim_dring(dring_info_t *dp, int start); 154 155 static void vsw_set_lane_attr(vsw_t *, lane_t *); 156 static int vsw_check_attr(vnet_attr_msg_t *, vsw_ldc_t *); 157 static int vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg); 158 static int vsw_mem_cookie_match(ldc_mem_cookie_t *, ldc_mem_cookie_t *); 159 static int vsw_check_dring_info(vio_dring_reg_msg_t *); 160 161 /* Rcv/Tx thread routines */ 162 static void vsw_stop_tx_thread(vsw_ldc_t *ldcp); 163 static void vsw_ldc_tx_worker(void *arg); 164 static void vsw_stop_rx_thread(vsw_ldc_t *ldcp); 165 static void vsw_ldc_rx_worker(void *arg); 166 167 /* Misc support routines */ 168 static caddr_t vsw_print_ethaddr(uint8_t *addr, char *ebuf); 169 static void vsw_free_lane_resources(vsw_ldc_t *, uint64_t); 170 static int vsw_free_ring(dring_info_t *); 171 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr); 172 static int vsw_get_same_dest_list(struct ether_header *ehp, 173 mblk_t **rhead, mblk_t **rtail, mblk_t **mpp); 174 static mblk_t *vsw_dupmsgchain(mblk_t *mp); 175 176 /* Debugging routines */ 177 static void dump_flags(uint64_t); 178 static void display_state(void); 179 static void display_lane(lane_t *); 180 static void display_ring(dring_info_t *); 181 182 /* 183 * Functions imported from other files. 184 */ 185 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int); 186 extern int vsw_unset_hw(vsw_t *, vsw_port_t *, int); 187 extern void vsw_reconfig_hw(vsw_t *); 188 extern int vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port); 189 extern void vsw_del_mcst_port(vsw_port_t *port); 190 extern int vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg); 191 extern int vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg); 192 extern void vsw_fdbe_add(vsw_t *vswp, void *port); 193 extern void vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr); 194 extern void vsw_create_vlans(void *arg, int type); 195 extern void vsw_destroy_vlans(void *arg, int type); 196 extern void vsw_vlan_add_ids(void *arg, int type); 197 extern void vsw_vlan_remove_ids(void *arg, int type); 198 extern boolean_t vsw_frame_lookup_vid(void *arg, int caller, 199 struct ether_header *ehp, uint16_t *vidp); 200 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp); 201 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, 202 mblk_t **npt); 203 extern boolean_t vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid); 204 extern void vsw_hio_start(vsw_t *vswp, vsw_ldc_t *ldcp); 205 extern void vsw_hio_stop(vsw_t *vswp, vsw_ldc_t *ldcp); 206 extern void vsw_process_dds_msg(vsw_t *vswp, vsw_ldc_t *ldcp, void *msg); 207 extern void vsw_hio_stop_port(vsw_port_t *portp); 208 209 #define VSW_NUM_VMPOOLS 3 /* number of vio mblk pools */ 210 211 /* 212 * Tunables used in this file. 213 */ 214 extern int vsw_num_handshakes; 215 extern int vsw_wretries; 216 extern int vsw_desc_delay; 217 extern int vsw_read_attempts; 218 extern int vsw_ldc_tx_delay; 219 extern int vsw_ldc_tx_retries; 220 extern boolean_t vsw_ldc_rxthr_enabled; 221 extern boolean_t vsw_ldc_txthr_enabled; 222 extern uint32_t vsw_ntxds; 223 extern uint32_t vsw_max_tx_qcount; 224 extern uint32_t vsw_chain_len; 225 extern uint32_t vsw_mblk_size1; 226 extern uint32_t vsw_mblk_size2; 227 extern uint32_t vsw_mblk_size3; 228 extern uint32_t vsw_num_mblks1; 229 extern uint32_t vsw_num_mblks2; 230 extern uint32_t vsw_num_mblks3; 231 extern boolean_t vsw_obp_ver_proto_workaround; 232 233 #define LDC_ENTER_LOCK(ldcp) \ 234 mutex_enter(&((ldcp)->ldc_cblock));\ 235 mutex_enter(&((ldcp)->ldc_rxlock));\ 236 mutex_enter(&((ldcp)->ldc_txlock)); 237 #define LDC_EXIT_LOCK(ldcp) \ 238 mutex_exit(&((ldcp)->ldc_txlock));\ 239 mutex_exit(&((ldcp)->ldc_rxlock));\ 240 mutex_exit(&((ldcp)->ldc_cblock)); 241 242 #define VSW_VER_EQ(ldcp, major, minor) \ 243 ((ldcp)->lane_out.ver_major == (major) && \ 244 (ldcp)->lane_out.ver_minor == (minor)) 245 246 #define VSW_VER_LT(ldcp, major, minor) \ 247 (((ldcp)->lane_out.ver_major < (major)) || \ 248 ((ldcp)->lane_out.ver_major == (major) && \ 249 (ldcp)->lane_out.ver_minor < (minor))) 250 251 #define VSW_VER_GTEQ(ldcp, major, minor) \ 252 (((ldcp)->lane_out.ver_major > (major)) || \ 253 ((ldcp)->lane_out.ver_major == (major) && \ 254 (ldcp)->lane_out.ver_minor >= (minor))) 255 256 /* supported versions */ 257 static ver_sup_t vsw_versions[] = { {1, 3} }; 258 259 /* 260 * For the moment the state dump routines have their own 261 * private flag. 262 */ 263 #define DUMP_STATE 0 264 265 #if DUMP_STATE 266 267 #define DUMP_TAG(tag) \ 268 { \ 269 D1(NULL, "DUMP_TAG: type 0x%llx", (tag).vio_msgtype); \ 270 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag).vio_subtype); \ 271 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag).vio_subtype_env); \ 272 } 273 274 #define DUMP_TAG_PTR(tag) \ 275 { \ 276 D1(NULL, "DUMP_TAG: type 0x%llx", (tag)->vio_msgtype); \ 277 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag)->vio_subtype); \ 278 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag)->vio_subtype_env); \ 279 } 280 281 #define DUMP_FLAGS(flags) dump_flags(flags); 282 #define DISPLAY_STATE() display_state() 283 284 #else 285 286 #define DUMP_TAG(tag) 287 #define DUMP_TAG_PTR(tag) 288 #define DUMP_FLAGS(state) 289 #define DISPLAY_STATE() 290 291 #endif /* DUMP_STATE */ 292 293 /* 294 * Attach the specified port. 295 * 296 * Returns 0 on success, 1 on failure. 297 */ 298 int 299 vsw_port_attach(vsw_port_t *port) 300 { 301 vsw_t *vswp = port->p_vswp; 302 vsw_port_list_t *plist = &vswp->plist; 303 vsw_port_t *p, **pp; 304 int i; 305 int nids = port->num_ldcs; 306 uint64_t *ldcids; 307 308 D1(vswp, "%s: enter : port %d", __func__, port->p_instance); 309 310 /* port already exists? */ 311 READ_ENTER(&plist->lockrw); 312 for (p = plist->head; p != NULL; p = p->p_next) { 313 if (p->p_instance == port->p_instance) { 314 DWARN(vswp, "%s: port instance %d already attached", 315 __func__, p->p_instance); 316 RW_EXIT(&plist->lockrw); 317 return (1); 318 } 319 } 320 RW_EXIT(&plist->lockrw); 321 322 rw_init(&port->p_ldclist.lockrw, NULL, RW_DRIVER, NULL); 323 324 mutex_init(&port->tx_lock, NULL, MUTEX_DRIVER, NULL); 325 mutex_init(&port->mca_lock, NULL, MUTEX_DRIVER, NULL); 326 327 mutex_init(&port->state_lock, NULL, MUTEX_DRIVER, NULL); 328 cv_init(&port->state_cv, NULL, CV_DRIVER, NULL); 329 port->state = VSW_PORT_INIT; 330 331 D2(vswp, "%s: %d nids", __func__, nids); 332 ldcids = port->ldc_ids; 333 for (i = 0; i < nids; i++) { 334 D2(vswp, "%s: ldcid (%llx)", __func__, (uint64_t)ldcids[i]); 335 if (vsw_ldc_attach(port, (uint64_t)ldcids[i]) != 0) { 336 DERR(vswp, "%s: ldc_attach failed", __func__); 337 338 rw_destroy(&port->p_ldclist.lockrw); 339 340 cv_destroy(&port->state_cv); 341 mutex_destroy(&port->state_lock); 342 343 mutex_destroy(&port->tx_lock); 344 mutex_destroy(&port->mca_lock); 345 kmem_free(port, sizeof (vsw_port_t)); 346 return (1); 347 } 348 } 349 350 if (vswp->switching_setup_done == B_TRUE) { 351 /* 352 * If the underlying physical device has been setup, 353 * program the mac address of this port in it. 354 * Otherwise, port macaddr will be set after the physical 355 * device is successfully setup by the timeout handler. 356 */ 357 mutex_enter(&vswp->hw_lock); 358 (void) vsw_set_hw(vswp, port, VSW_VNETPORT); 359 mutex_exit(&vswp->hw_lock); 360 } 361 362 /* create the fdb entry for this port/mac address */ 363 vsw_fdbe_add(vswp, port); 364 365 vsw_create_vlans(port, VSW_VNETPORT); 366 367 WRITE_ENTER(&plist->lockrw); 368 369 /* link it into the list of ports for this vsw instance */ 370 pp = (vsw_port_t **)(&plist->head); 371 port->p_next = *pp; 372 *pp = port; 373 plist->num_ports++; 374 375 RW_EXIT(&plist->lockrw); 376 377 /* 378 * Initialise the port and any ldc's under it. 379 */ 380 (void) vsw_init_ldcs(port); 381 382 D1(vswp, "%s: exit", __func__); 383 return (0); 384 } 385 386 /* 387 * Detach the specified port. 388 * 389 * Returns 0 on success, 1 on failure. 390 */ 391 int 392 vsw_port_detach(vsw_t *vswp, int p_instance) 393 { 394 vsw_port_t *port = NULL; 395 vsw_port_list_t *plist = &vswp->plist; 396 397 D1(vswp, "%s: enter: port id %d", __func__, p_instance); 398 399 WRITE_ENTER(&plist->lockrw); 400 401 if ((port = vsw_lookup_port(vswp, p_instance)) == NULL) { 402 RW_EXIT(&plist->lockrw); 403 return (1); 404 } 405 406 if (vsw_plist_del_node(vswp, port)) { 407 RW_EXIT(&plist->lockrw); 408 return (1); 409 } 410 411 /* cleanup any HybridIO for this port */ 412 vsw_hio_stop_port(port); 413 414 /* 415 * No longer need to hold writer lock on port list now 416 * that we have unlinked the target port from the list. 417 */ 418 RW_EXIT(&plist->lockrw); 419 420 /* Remove the fdb entry for this port/mac address */ 421 vsw_fdbe_del(vswp, &(port->p_macaddr)); 422 vsw_destroy_vlans(port, VSW_VNETPORT); 423 424 /* Remove any multicast addresses.. */ 425 vsw_del_mcst_port(port); 426 427 /* Remove address if was programmed into HW. */ 428 mutex_enter(&vswp->hw_lock); 429 430 /* 431 * Port's address may not have been set in hardware. This could 432 * happen if the underlying physical device is not yet available and 433 * vsw_setup_switching_timeout() may be in progress. 434 * We remove its addr from hardware only if it has been set before. 435 */ 436 if (port->addr_set != VSW_ADDR_UNSET) 437 (void) vsw_unset_hw(vswp, port, VSW_VNETPORT); 438 439 if (vswp->recfg_reqd) 440 vsw_reconfig_hw(vswp); 441 442 mutex_exit(&vswp->hw_lock); 443 444 if (vsw_port_delete(port)) { 445 return (1); 446 } 447 448 D1(vswp, "%s: exit: p_instance(%d)", __func__, p_instance); 449 return (0); 450 } 451 452 /* 453 * Detach all active ports. 454 * 455 * Returns 0 on success, 1 on failure. 456 */ 457 int 458 vsw_detach_ports(vsw_t *vswp) 459 { 460 vsw_port_list_t *plist = &vswp->plist; 461 vsw_port_t *port = NULL; 462 463 D1(vswp, "%s: enter", __func__); 464 465 WRITE_ENTER(&plist->lockrw); 466 467 while ((port = plist->head) != NULL) { 468 if (vsw_plist_del_node(vswp, port)) { 469 DERR(vswp, "%s: Error deleting port %d" 470 " from port list", __func__, port->p_instance); 471 RW_EXIT(&plist->lockrw); 472 return (1); 473 } 474 475 /* Remove address if was programmed into HW. */ 476 mutex_enter(&vswp->hw_lock); 477 (void) vsw_unset_hw(vswp, port, VSW_VNETPORT); 478 mutex_exit(&vswp->hw_lock); 479 480 /* Remove the fdb entry for this port/mac address */ 481 vsw_fdbe_del(vswp, &(port->p_macaddr)); 482 vsw_destroy_vlans(port, VSW_VNETPORT); 483 484 /* Remove any multicast addresses.. */ 485 vsw_del_mcst_port(port); 486 487 /* 488 * No longer need to hold the lock on the port list 489 * now that we have unlinked the target port from the 490 * list. 491 */ 492 RW_EXIT(&plist->lockrw); 493 if (vsw_port_delete(port)) { 494 DERR(vswp, "%s: Error deleting port %d", 495 __func__, port->p_instance); 496 return (1); 497 } 498 WRITE_ENTER(&plist->lockrw); 499 } 500 RW_EXIT(&plist->lockrw); 501 502 D1(vswp, "%s: exit", __func__); 503 504 return (0); 505 } 506 507 /* 508 * Delete the specified port. 509 * 510 * Returns 0 on success, 1 on failure. 511 */ 512 static int 513 vsw_port_delete(vsw_port_t *port) 514 { 515 vsw_ldc_list_t *ldcl; 516 vsw_t *vswp = port->p_vswp; 517 int num_ldcs; 518 519 D1(vswp, "%s: enter : port id %d", __func__, port->p_instance); 520 521 (void) vsw_uninit_ldcs(port); 522 523 /* 524 * Wait for any pending ctrl msg tasks which reference this 525 * port to finish. 526 */ 527 if (vsw_drain_port_taskq(port)) 528 return (1); 529 530 /* 531 * Wait for any active callbacks to finish 532 */ 533 if (vsw_drain_ldcs(port)) 534 return (1); 535 536 ldcl = &port->p_ldclist; 537 num_ldcs = port->num_ldcs; 538 WRITE_ENTER(&ldcl->lockrw); 539 while (num_ldcs > 0) { 540 if (vsw_ldc_detach(port, ldcl->head->ldc_id) != 0) { 541 cmn_err(CE_WARN, "!vsw%d: unable to detach ldc %ld", 542 vswp->instance, ldcl->head->ldc_id); 543 RW_EXIT(&ldcl->lockrw); 544 port->num_ldcs = num_ldcs; 545 return (1); 546 } 547 num_ldcs--; 548 } 549 RW_EXIT(&ldcl->lockrw); 550 551 rw_destroy(&port->p_ldclist.lockrw); 552 553 mutex_destroy(&port->mca_lock); 554 mutex_destroy(&port->tx_lock); 555 556 cv_destroy(&port->state_cv); 557 mutex_destroy(&port->state_lock); 558 559 if (port->num_ldcs != 0) { 560 kmem_free(port->ldc_ids, port->num_ldcs * sizeof (uint64_t)); 561 port->num_ldcs = 0; 562 } 563 kmem_free(port, sizeof (vsw_port_t)); 564 565 D1(vswp, "%s: exit", __func__); 566 567 return (0); 568 } 569 570 /* 571 * Attach a logical domain channel (ldc) under a specified port. 572 * 573 * Returns 0 on success, 1 on failure. 574 */ 575 static int 576 vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id) 577 { 578 vsw_t *vswp = port->p_vswp; 579 vsw_ldc_list_t *ldcl = &port->p_ldclist; 580 vsw_ldc_t *ldcp = NULL; 581 ldc_attr_t attr; 582 ldc_status_t istatus; 583 int status = DDI_FAILURE; 584 int rv; 585 char kname[MAXNAMELEN]; 586 enum { PROG_init = 0x0, PROG_mblks = 0x1, 587 PROG_callback = 0x2, PROG_rx_thread = 0x4, 588 PROG_tx_thread = 0x8} 589 progress; 590 591 progress = PROG_init; 592 593 D1(vswp, "%s: enter", __func__); 594 595 ldcp = kmem_zalloc(sizeof (vsw_ldc_t), KM_NOSLEEP); 596 if (ldcp == NULL) { 597 DERR(vswp, "%s: kmem_zalloc failed", __func__); 598 return (1); 599 } 600 ldcp->ldc_id = ldc_id; 601 602 /* Allocate pools of receive mblks */ 603 rv = vio_init_multipools(&ldcp->vmp, VSW_NUM_VMPOOLS, 604 vsw_mblk_size1, vsw_mblk_size2, vsw_mblk_size3, 605 vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3); 606 if (rv) { 607 DWARN(vswp, "%s: unable to create free mblk pools for" 608 " channel %ld (rv %d)", __func__, ldc_id, rv); 609 kmem_free(ldcp, sizeof (vsw_ldc_t)); 610 return (1); 611 } 612 613 progress |= PROG_mblks; 614 615 mutex_init(&ldcp->ldc_txlock, NULL, MUTEX_DRIVER, NULL); 616 mutex_init(&ldcp->ldc_rxlock, NULL, MUTEX_DRIVER, NULL); 617 mutex_init(&ldcp->ldc_cblock, NULL, MUTEX_DRIVER, NULL); 618 mutex_init(&ldcp->drain_cv_lock, NULL, MUTEX_DRIVER, NULL); 619 cv_init(&ldcp->drain_cv, NULL, CV_DRIVER, NULL); 620 rw_init(&ldcp->lane_in.dlistrw, NULL, RW_DRIVER, NULL); 621 rw_init(&ldcp->lane_out.dlistrw, NULL, RW_DRIVER, NULL); 622 623 /* required for handshake with peer */ 624 ldcp->local_session = (uint64_t)ddi_get_lbolt(); 625 ldcp->peer_session = 0; 626 ldcp->session_status = 0; 627 ldcp->hss_id = 1; /* Initial handshake session id */ 628 629 (void) atomic_swap_32(&port->p_hio_capable, B_FALSE); 630 631 /* only set for outbound lane, inbound set by peer */ 632 vsw_set_lane_attr(vswp, &ldcp->lane_out); 633 634 attr.devclass = LDC_DEV_NT_SVC; 635 attr.instance = ddi_get_instance(vswp->dip); 636 attr.mode = LDC_MODE_UNRELIABLE; 637 attr.mtu = VSW_LDC_MTU; 638 status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle); 639 if (status != 0) { 640 DERR(vswp, "%s(%lld): ldc_init failed, rv (%d)", 641 __func__, ldc_id, status); 642 goto ldc_attach_fail; 643 } 644 645 if (vsw_ldc_rxthr_enabled) { 646 ldcp->rx_thr_flags = 0; 647 648 mutex_init(&ldcp->rx_thr_lock, NULL, MUTEX_DRIVER, NULL); 649 cv_init(&ldcp->rx_thr_cv, NULL, CV_DRIVER, NULL); 650 ldcp->rx_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 651 vsw_ldc_rx_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 652 653 progress |= PROG_rx_thread; 654 if (ldcp->rx_thread == NULL) { 655 DWARN(vswp, "%s(%lld): Failed to create worker thread", 656 __func__, ldc_id); 657 goto ldc_attach_fail; 658 } 659 } 660 661 if (vsw_ldc_txthr_enabled) { 662 ldcp->tx_thr_flags = 0; 663 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 664 665 mutex_init(&ldcp->tx_thr_lock, NULL, MUTEX_DRIVER, NULL); 666 cv_init(&ldcp->tx_thr_cv, NULL, CV_DRIVER, NULL); 667 ldcp->tx_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 668 vsw_ldc_tx_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 669 670 progress |= PROG_tx_thread; 671 if (ldcp->tx_thread == NULL) { 672 DWARN(vswp, "%s(%lld): Failed to create worker thread", 673 __func__, ldc_id); 674 goto ldc_attach_fail; 675 } 676 } 677 678 status = ldc_reg_callback(ldcp->ldc_handle, vsw_ldc_cb, (caddr_t)ldcp); 679 if (status != 0) { 680 DERR(vswp, "%s(%lld): ldc_reg_callback failed, rv (%d)", 681 __func__, ldc_id, status); 682 (void) ldc_fini(ldcp->ldc_handle); 683 goto ldc_attach_fail; 684 } 685 /* 686 * allocate a message for ldc_read()s, big enough to hold ctrl and 687 * data msgs, including raw data msgs used to recv priority frames. 688 */ 689 ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vswp->max_frame_size; 690 ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP); 691 692 progress |= PROG_callback; 693 694 mutex_init(&ldcp->status_lock, NULL, MUTEX_DRIVER, NULL); 695 696 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 697 DERR(vswp, "%s: ldc_status failed", __func__); 698 mutex_destroy(&ldcp->status_lock); 699 goto ldc_attach_fail; 700 } 701 702 ldcp->ldc_status = istatus; 703 ldcp->ldc_port = port; 704 ldcp->ldc_vswp = vswp; 705 706 vsw_reset_vnet_proto_ops(ldcp); 707 708 (void) sprintf(kname, "%sldc0x%lx", DRV_NAME, ldcp->ldc_id); 709 ldcp->ksp = vgen_setup_kstats(DRV_NAME, vswp->instance, 710 kname, &ldcp->ldc_stats); 711 if (ldcp->ksp == NULL) { 712 DERR(vswp, "%s: kstats setup failed", __func__); 713 goto ldc_attach_fail; 714 } 715 716 /* link it into the list of channels for this port */ 717 WRITE_ENTER(&ldcl->lockrw); 718 ldcp->ldc_next = ldcl->head; 719 ldcl->head = ldcp; 720 RW_EXIT(&ldcl->lockrw); 721 722 D1(vswp, "%s: exit", __func__); 723 return (0); 724 725 ldc_attach_fail: 726 727 if (progress & PROG_callback) { 728 (void) ldc_unreg_callback(ldcp->ldc_handle); 729 kmem_free(ldcp->ldcmsg, ldcp->msglen); 730 } 731 732 if (progress & PROG_rx_thread) { 733 if (ldcp->rx_thread != NULL) { 734 vsw_stop_rx_thread(ldcp); 735 } 736 mutex_destroy(&ldcp->rx_thr_lock); 737 cv_destroy(&ldcp->rx_thr_cv); 738 } 739 740 if (progress & PROG_tx_thread) { 741 if (ldcp->tx_thread != NULL) { 742 vsw_stop_tx_thread(ldcp); 743 } 744 mutex_destroy(&ldcp->tx_thr_lock); 745 cv_destroy(&ldcp->tx_thr_cv); 746 } 747 if (ldcp->ksp != NULL) { 748 vgen_destroy_kstats(ldcp->ksp); 749 } 750 mutex_destroy(&ldcp->ldc_txlock); 751 mutex_destroy(&ldcp->ldc_rxlock); 752 mutex_destroy(&ldcp->ldc_cblock); 753 mutex_destroy(&ldcp->drain_cv_lock); 754 755 cv_destroy(&ldcp->drain_cv); 756 757 rw_destroy(&ldcp->lane_in.dlistrw); 758 rw_destroy(&ldcp->lane_out.dlistrw); 759 760 if (progress & PROG_mblks) { 761 vio_destroy_multipools(&ldcp->vmp, &vswp->rxh); 762 } 763 kmem_free(ldcp, sizeof (vsw_ldc_t)); 764 765 return (1); 766 } 767 768 /* 769 * Detach a logical domain channel (ldc) belonging to a 770 * particular port. 771 * 772 * Returns 0 on success, 1 on failure. 773 */ 774 static int 775 vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id) 776 { 777 vsw_t *vswp = port->p_vswp; 778 vsw_ldc_t *ldcp, *prev_ldcp; 779 vsw_ldc_list_t *ldcl = &port->p_ldclist; 780 int rv; 781 782 prev_ldcp = ldcl->head; 783 for (; (ldcp = prev_ldcp) != NULL; prev_ldcp = ldcp->ldc_next) { 784 if (ldcp->ldc_id == ldc_id) { 785 break; 786 } 787 } 788 789 /* specified ldc id not found */ 790 if (ldcp == NULL) { 791 DERR(vswp, "%s: ldcp = NULL", __func__); 792 return (1); 793 } 794 795 D2(vswp, "%s: detaching channel %lld", __func__, ldcp->ldc_id); 796 797 /* Stop the receive thread */ 798 if (ldcp->rx_thread != NULL) { 799 vsw_stop_rx_thread(ldcp); 800 mutex_destroy(&ldcp->rx_thr_lock); 801 cv_destroy(&ldcp->rx_thr_cv); 802 } 803 kmem_free(ldcp->ldcmsg, ldcp->msglen); 804 805 /* Stop the tx thread */ 806 if (ldcp->tx_thread != NULL) { 807 vsw_stop_tx_thread(ldcp); 808 mutex_destroy(&ldcp->tx_thr_lock); 809 cv_destroy(&ldcp->tx_thr_cv); 810 if (ldcp->tx_mhead != NULL) { 811 freemsgchain(ldcp->tx_mhead); 812 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 813 ldcp->tx_cnt = 0; 814 } 815 } 816 817 /* Destory kstats */ 818 vgen_destroy_kstats(ldcp->ksp); 819 820 /* 821 * Before we can close the channel we must release any mapped 822 * resources (e.g. drings). 823 */ 824 vsw_free_lane_resources(ldcp, INBOUND); 825 vsw_free_lane_resources(ldcp, OUTBOUND); 826 827 /* 828 * If the close fails we are in serious trouble, as won't 829 * be able to delete the parent port. 830 */ 831 if ((rv = ldc_close(ldcp->ldc_handle)) != 0) { 832 DERR(vswp, "%s: error %d closing channel %lld", 833 __func__, rv, ldcp->ldc_id); 834 return (1); 835 } 836 837 (void) ldc_fini(ldcp->ldc_handle); 838 839 ldcp->ldc_status = LDC_INIT; 840 ldcp->ldc_handle = NULL; 841 ldcp->ldc_vswp = NULL; 842 843 844 /* 845 * Most likely some mblks are still in use and 846 * have not been returned to the pool. These mblks are 847 * added to the pool that is maintained in the device instance. 848 * Another attempt will be made to destroy the pool 849 * when the device detaches. 850 */ 851 vio_destroy_multipools(&ldcp->vmp, &vswp->rxh); 852 853 /* unlink it from the list */ 854 prev_ldcp = ldcp->ldc_next; 855 856 mutex_destroy(&ldcp->ldc_txlock); 857 mutex_destroy(&ldcp->ldc_rxlock); 858 mutex_destroy(&ldcp->ldc_cblock); 859 cv_destroy(&ldcp->drain_cv); 860 mutex_destroy(&ldcp->drain_cv_lock); 861 mutex_destroy(&ldcp->status_lock); 862 rw_destroy(&ldcp->lane_in.dlistrw); 863 rw_destroy(&ldcp->lane_out.dlistrw); 864 865 kmem_free(ldcp, sizeof (vsw_ldc_t)); 866 867 return (0); 868 } 869 870 /* 871 * Open and attempt to bring up the channel. Note that channel 872 * can only be brought up if peer has also opened channel. 873 * 874 * Returns 0 if can open and bring up channel, otherwise 875 * returns 1. 876 */ 877 static int 878 vsw_ldc_init(vsw_ldc_t *ldcp) 879 { 880 vsw_t *vswp = ldcp->ldc_vswp; 881 ldc_status_t istatus = 0; 882 int rv; 883 884 D1(vswp, "%s: enter", __func__); 885 886 LDC_ENTER_LOCK(ldcp); 887 888 /* don't start at 0 in case clients don't like that */ 889 ldcp->next_ident = 1; 890 891 rv = ldc_open(ldcp->ldc_handle); 892 if (rv != 0) { 893 DERR(vswp, "%s: ldc_open failed: id(%lld) rv(%d)", 894 __func__, ldcp->ldc_id, rv); 895 LDC_EXIT_LOCK(ldcp); 896 return (1); 897 } 898 899 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 900 DERR(vswp, "%s: unable to get status", __func__); 901 LDC_EXIT_LOCK(ldcp); 902 return (1); 903 904 } else if (istatus != LDC_OPEN && istatus != LDC_READY) { 905 DERR(vswp, "%s: id (%lld) status(%d) is not OPEN/READY", 906 __func__, ldcp->ldc_id, istatus); 907 LDC_EXIT_LOCK(ldcp); 908 return (1); 909 } 910 911 mutex_enter(&ldcp->status_lock); 912 ldcp->ldc_status = istatus; 913 mutex_exit(&ldcp->status_lock); 914 915 rv = ldc_up(ldcp->ldc_handle); 916 if (rv != 0) { 917 /* 918 * Not a fatal error for ldc_up() to fail, as peer 919 * end point may simply not be ready yet. 920 */ 921 D2(vswp, "%s: ldc_up err id(%lld) rv(%d)", __func__, 922 ldcp->ldc_id, rv); 923 LDC_EXIT_LOCK(ldcp); 924 return (1); 925 } 926 927 /* 928 * ldc_up() call is non-blocking so need to explicitly 929 * check channel status to see if in fact the channel 930 * is UP. 931 */ 932 mutex_enter(&ldcp->status_lock); 933 if (ldc_status(ldcp->ldc_handle, &ldcp->ldc_status) != 0) { 934 DERR(vswp, "%s: unable to get status", __func__); 935 mutex_exit(&ldcp->status_lock); 936 LDC_EXIT_LOCK(ldcp); 937 return (1); 938 939 } 940 941 if (ldcp->ldc_status == LDC_UP) { 942 D2(vswp, "%s: channel %ld now UP (%ld)", __func__, 943 ldcp->ldc_id, istatus); 944 mutex_exit(&ldcp->status_lock); 945 LDC_EXIT_LOCK(ldcp); 946 947 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 948 return (0); 949 } 950 951 mutex_exit(&ldcp->status_lock); 952 LDC_EXIT_LOCK(ldcp); 953 954 D1(vswp, "%s: exit", __func__); 955 return (0); 956 } 957 958 /* disable callbacks on the channel */ 959 static int 960 vsw_ldc_uninit(vsw_ldc_t *ldcp) 961 { 962 vsw_t *vswp = ldcp->ldc_vswp; 963 int rv; 964 965 D1(vswp, "vsw_ldc_uninit: enter: id(%lx)\n", ldcp->ldc_id); 966 967 LDC_ENTER_LOCK(ldcp); 968 969 rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE); 970 if (rv != 0) { 971 DERR(vswp, "vsw_ldc_uninit(%lld): error disabling " 972 "interrupts (rv = %d)\n", ldcp->ldc_id, rv); 973 LDC_EXIT_LOCK(ldcp); 974 return (1); 975 } 976 977 mutex_enter(&ldcp->status_lock); 978 ldcp->ldc_status = LDC_INIT; 979 mutex_exit(&ldcp->status_lock); 980 981 LDC_EXIT_LOCK(ldcp); 982 983 D1(vswp, "vsw_ldc_uninit: exit: id(%lx)", ldcp->ldc_id); 984 985 return (0); 986 } 987 988 static int 989 vsw_init_ldcs(vsw_port_t *port) 990 { 991 vsw_ldc_list_t *ldcl = &port->p_ldclist; 992 vsw_ldc_t *ldcp; 993 994 READ_ENTER(&ldcl->lockrw); 995 ldcp = ldcl->head; 996 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 997 (void) vsw_ldc_init(ldcp); 998 } 999 RW_EXIT(&ldcl->lockrw); 1000 1001 return (0); 1002 } 1003 1004 static int 1005 vsw_uninit_ldcs(vsw_port_t *port) 1006 { 1007 vsw_ldc_list_t *ldcl = &port->p_ldclist; 1008 vsw_ldc_t *ldcp; 1009 1010 D1(NULL, "vsw_uninit_ldcs: enter\n"); 1011 1012 READ_ENTER(&ldcl->lockrw); 1013 ldcp = ldcl->head; 1014 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 1015 (void) vsw_ldc_uninit(ldcp); 1016 } 1017 RW_EXIT(&ldcl->lockrw); 1018 1019 D1(NULL, "vsw_uninit_ldcs: exit\n"); 1020 1021 return (0); 1022 } 1023 1024 /* 1025 * Wait until the callback(s) associated with the ldcs under the specified 1026 * port have completed. 1027 * 1028 * Prior to this function being invoked each channel under this port 1029 * should have been quiesced via ldc_set_cb_mode(DISABLE). 1030 * 1031 * A short explaination of what we are doing below.. 1032 * 1033 * The simplest approach would be to have a reference counter in 1034 * the ldc structure which is increment/decremented by the callbacks as 1035 * they use the channel. The drain function could then simply disable any 1036 * further callbacks and do a cv_wait for the ref to hit zero. Unfortunately 1037 * there is a tiny window here - before the callback is able to get the lock 1038 * on the channel it is interrupted and this function gets to execute. It 1039 * sees that the ref count is zero and believes its free to delete the 1040 * associated data structures. 1041 * 1042 * We get around this by taking advantage of the fact that before the ldc 1043 * framework invokes a callback it sets a flag to indicate that there is a 1044 * callback active (or about to become active). If when we attempt to 1045 * unregister a callback when this active flag is set then the unregister 1046 * will fail with EWOULDBLOCK. 1047 * 1048 * If the unregister fails we do a cv_timedwait. We will either be signaled 1049 * by the callback as it is exiting (note we have to wait a short period to 1050 * allow the callback to return fully to the ldc framework and it to clear 1051 * the active flag), or by the timer expiring. In either case we again attempt 1052 * the unregister. We repeat this until we can succesfully unregister the 1053 * callback. 1054 * 1055 * The reason we use a cv_timedwait rather than a simple cv_wait is to catch 1056 * the case where the callback has finished but the ldc framework has not yet 1057 * cleared the active flag. In this case we would never get a cv_signal. 1058 */ 1059 static int 1060 vsw_drain_ldcs(vsw_port_t *port) 1061 { 1062 vsw_ldc_list_t *ldcl = &port->p_ldclist; 1063 vsw_ldc_t *ldcp; 1064 vsw_t *vswp = port->p_vswp; 1065 1066 D1(vswp, "%s: enter", __func__); 1067 1068 READ_ENTER(&ldcl->lockrw); 1069 1070 ldcp = ldcl->head; 1071 1072 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 1073 /* 1074 * If we can unregister the channel callback then we 1075 * know that there is no callback either running or 1076 * scheduled to run for this channel so move on to next 1077 * channel in the list. 1078 */ 1079 mutex_enter(&ldcp->drain_cv_lock); 1080 1081 /* prompt active callbacks to quit */ 1082 ldcp->drain_state = VSW_LDC_DRAINING; 1083 1084 if ((ldc_unreg_callback(ldcp->ldc_handle)) == 0) { 1085 D2(vswp, "%s: unreg callback for chan %ld", __func__, 1086 ldcp->ldc_id); 1087 mutex_exit(&ldcp->drain_cv_lock); 1088 continue; 1089 } else { 1090 /* 1091 * If we end up here we know that either 1) a callback 1092 * is currently executing, 2) is about to start (i.e. 1093 * the ldc framework has set the active flag but 1094 * has not actually invoked the callback yet, or 3) 1095 * has finished and has returned to the ldc framework 1096 * but the ldc framework has not yet cleared the 1097 * active bit. 1098 * 1099 * Wait for it to finish. 1100 */ 1101 while (ldc_unreg_callback(ldcp->ldc_handle) 1102 == EWOULDBLOCK) 1103 (void) cv_timedwait(&ldcp->drain_cv, 1104 &ldcp->drain_cv_lock, lbolt + hz); 1105 1106 mutex_exit(&ldcp->drain_cv_lock); 1107 D2(vswp, "%s: unreg callback for chan %ld after " 1108 "timeout", __func__, ldcp->ldc_id); 1109 } 1110 } 1111 RW_EXIT(&ldcl->lockrw); 1112 1113 D1(vswp, "%s: exit", __func__); 1114 return (0); 1115 } 1116 1117 /* 1118 * Wait until all tasks which reference this port have completed. 1119 * 1120 * Prior to this function being invoked each channel under this port 1121 * should have been quiesced via ldc_set_cb_mode(DISABLE). 1122 */ 1123 static int 1124 vsw_drain_port_taskq(vsw_port_t *port) 1125 { 1126 vsw_t *vswp = port->p_vswp; 1127 1128 D1(vswp, "%s: enter", __func__); 1129 1130 /* 1131 * Mark the port as in the process of being detached, and 1132 * dispatch a marker task to the queue so we know when all 1133 * relevant tasks have completed. 1134 */ 1135 mutex_enter(&port->state_lock); 1136 port->state = VSW_PORT_DETACHING; 1137 1138 if ((vswp->taskq_p == NULL) || 1139 (ddi_taskq_dispatch(vswp->taskq_p, vsw_marker_task, 1140 port, DDI_NOSLEEP) != DDI_SUCCESS)) { 1141 DERR(vswp, "%s: unable to dispatch marker task", 1142 __func__); 1143 mutex_exit(&port->state_lock); 1144 return (1); 1145 } 1146 1147 /* 1148 * Wait for the marker task to finish. 1149 */ 1150 while (port->state != VSW_PORT_DETACHABLE) 1151 cv_wait(&port->state_cv, &port->state_lock); 1152 1153 mutex_exit(&port->state_lock); 1154 1155 D1(vswp, "%s: exit", __func__); 1156 1157 return (0); 1158 } 1159 1160 static void 1161 vsw_marker_task(void *arg) 1162 { 1163 vsw_port_t *port = arg; 1164 vsw_t *vswp = port->p_vswp; 1165 1166 D1(vswp, "%s: enter", __func__); 1167 1168 mutex_enter(&port->state_lock); 1169 1170 /* 1171 * No further tasks should be dispatched which reference 1172 * this port so ok to mark it as safe to detach. 1173 */ 1174 port->state = VSW_PORT_DETACHABLE; 1175 1176 cv_signal(&port->state_cv); 1177 1178 mutex_exit(&port->state_lock); 1179 1180 D1(vswp, "%s: exit", __func__); 1181 } 1182 1183 vsw_port_t * 1184 vsw_lookup_port(vsw_t *vswp, int p_instance) 1185 { 1186 vsw_port_list_t *plist = &vswp->plist; 1187 vsw_port_t *port; 1188 1189 for (port = plist->head; port != NULL; port = port->p_next) { 1190 if (port->p_instance == p_instance) { 1191 D2(vswp, "vsw_lookup_port: found p_instance\n"); 1192 return (port); 1193 } 1194 } 1195 1196 return (NULL); 1197 } 1198 1199 void 1200 vsw_vlan_unaware_port_reset(vsw_port_t *portp) 1201 { 1202 vsw_ldc_list_t *ldclp; 1203 vsw_ldc_t *ldcp; 1204 1205 ldclp = &portp->p_ldclist; 1206 1207 READ_ENTER(&ldclp->lockrw); 1208 1209 /* 1210 * NOTE: for now, we will assume we have a single channel. 1211 */ 1212 if (ldclp->head == NULL) { 1213 RW_EXIT(&ldclp->lockrw); 1214 return; 1215 } 1216 ldcp = ldclp->head; 1217 1218 mutex_enter(&ldcp->ldc_cblock); 1219 1220 /* 1221 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate 1222 * the connection. See comments in vsw_set_vnet_proto_ops(). 1223 */ 1224 if (ldcp->hphase == VSW_MILESTONE4 && VSW_VER_LT(ldcp, 1, 3) && 1225 portp->nvids != 0) { 1226 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1227 } 1228 1229 mutex_exit(&ldcp->ldc_cblock); 1230 1231 RW_EXIT(&ldclp->lockrw); 1232 } 1233 1234 void 1235 vsw_hio_port_reset(vsw_port_t *portp, boolean_t immediate) 1236 { 1237 vsw_ldc_list_t *ldclp; 1238 vsw_ldc_t *ldcp; 1239 1240 ldclp = &portp->p_ldclist; 1241 1242 READ_ENTER(&ldclp->lockrw); 1243 1244 /* 1245 * NOTE: for now, we will assume we have a single channel. 1246 */ 1247 if (ldclp->head == NULL) { 1248 RW_EXIT(&ldclp->lockrw); 1249 return; 1250 } 1251 ldcp = ldclp->head; 1252 1253 mutex_enter(&ldcp->ldc_cblock); 1254 1255 /* 1256 * If the peer is HybridIO capable (ver >= 1.3), reset channel 1257 * to trigger re-negotiation, which inturn trigger HybridIO 1258 * setup/cleanup. 1259 */ 1260 if ((ldcp->hphase == VSW_MILESTONE4) && 1261 (portp->p_hio_capable == B_TRUE)) { 1262 if (immediate == B_TRUE) { 1263 (void) ldc_down(ldcp->ldc_handle); 1264 } else { 1265 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1266 } 1267 } 1268 1269 mutex_exit(&ldcp->ldc_cblock); 1270 1271 RW_EXIT(&ldclp->lockrw); 1272 } 1273 1274 /* 1275 * Search for and remove the specified port from the port 1276 * list. Returns 0 if able to locate and remove port, otherwise 1277 * returns 1. 1278 */ 1279 static int 1280 vsw_plist_del_node(vsw_t *vswp, vsw_port_t *port) 1281 { 1282 vsw_port_list_t *plist = &vswp->plist; 1283 vsw_port_t *curr_p, *prev_p; 1284 1285 if (plist->head == NULL) 1286 return (1); 1287 1288 curr_p = prev_p = plist->head; 1289 1290 while (curr_p != NULL) { 1291 if (curr_p == port) { 1292 if (prev_p == curr_p) { 1293 plist->head = curr_p->p_next; 1294 } else { 1295 prev_p->p_next = curr_p->p_next; 1296 } 1297 plist->num_ports--; 1298 break; 1299 } else { 1300 prev_p = curr_p; 1301 curr_p = curr_p->p_next; 1302 } 1303 } 1304 return (0); 1305 } 1306 1307 /* 1308 * Interrupt handler for ldc messages. 1309 */ 1310 static uint_t 1311 vsw_ldc_cb(uint64_t event, caddr_t arg) 1312 { 1313 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 1314 vsw_t *vswp = ldcp->ldc_vswp; 1315 1316 D1(vswp, "%s: enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 1317 1318 mutex_enter(&ldcp->ldc_cblock); 1319 ldcp->ldc_stats.callbacks++; 1320 1321 mutex_enter(&ldcp->status_lock); 1322 if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) { 1323 mutex_exit(&ldcp->status_lock); 1324 mutex_exit(&ldcp->ldc_cblock); 1325 return (LDC_SUCCESS); 1326 } 1327 mutex_exit(&ldcp->status_lock); 1328 1329 if (event & LDC_EVT_UP) { 1330 /* 1331 * Channel has come up. 1332 */ 1333 D2(vswp, "%s: id(%ld) event(%llx) UP: status(%ld)", 1334 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1335 1336 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1337 1338 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 1339 } 1340 1341 if (event & LDC_EVT_READ) { 1342 /* 1343 * Data available for reading. 1344 */ 1345 D2(vswp, "%s: id(ld) event(%llx) data READ", 1346 __func__, ldcp->ldc_id, event); 1347 1348 if (ldcp->rx_thread != NULL) { 1349 /* 1350 * If the receive thread is enabled, then 1351 * wakeup the receive thread to process the 1352 * LDC messages. 1353 */ 1354 mutex_exit(&ldcp->ldc_cblock); 1355 mutex_enter(&ldcp->rx_thr_lock); 1356 if (!(ldcp->rx_thr_flags & VSW_WTHR_DATARCVD)) { 1357 ldcp->rx_thr_flags |= VSW_WTHR_DATARCVD; 1358 cv_signal(&ldcp->rx_thr_cv); 1359 } 1360 mutex_exit(&ldcp->rx_thr_lock); 1361 mutex_enter(&ldcp->ldc_cblock); 1362 } else { 1363 vsw_process_pkt(ldcp); 1364 } 1365 1366 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 1367 1368 goto vsw_cb_exit; 1369 } 1370 1371 if (event & (LDC_EVT_DOWN | LDC_EVT_RESET)) { 1372 D2(vswp, "%s: id(%ld) event (%lx) DOWN/RESET: status(%ld)", 1373 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1374 1375 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 1376 } 1377 1378 /* 1379 * Catch either LDC_EVT_WRITE which we don't support or any 1380 * unknown event. 1381 */ 1382 if (event & 1383 ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) { 1384 DERR(vswp, "%s: id(%ld) Unexpected event=(%llx) status(%ld)", 1385 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1386 } 1387 1388 vsw_cb_exit: 1389 mutex_exit(&ldcp->ldc_cblock); 1390 1391 /* 1392 * Let the drain function know we are finishing if it 1393 * is waiting. 1394 */ 1395 mutex_enter(&ldcp->drain_cv_lock); 1396 if (ldcp->drain_state == VSW_LDC_DRAINING) 1397 cv_signal(&ldcp->drain_cv); 1398 mutex_exit(&ldcp->drain_cv_lock); 1399 1400 return (LDC_SUCCESS); 1401 } 1402 1403 /* 1404 * Reinitialise data structures associated with the channel. 1405 */ 1406 static void 1407 vsw_ldc_reinit(vsw_ldc_t *ldcp) 1408 { 1409 vsw_t *vswp = ldcp->ldc_vswp; 1410 vsw_port_t *port; 1411 vsw_ldc_list_t *ldcl; 1412 1413 D1(vswp, "%s: enter", __func__); 1414 1415 port = ldcp->ldc_port; 1416 ldcl = &port->p_ldclist; 1417 1418 READ_ENTER(&ldcl->lockrw); 1419 1420 D2(vswp, "%s: in 0x%llx : out 0x%llx", __func__, 1421 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 1422 1423 vsw_free_lane_resources(ldcp, INBOUND); 1424 vsw_free_lane_resources(ldcp, OUTBOUND); 1425 RW_EXIT(&ldcl->lockrw); 1426 1427 ldcp->lane_in.lstate = 0; 1428 ldcp->lane_out.lstate = 0; 1429 1430 /* Remove the fdb entry for this port/mac address */ 1431 vsw_fdbe_del(vswp, &(port->p_macaddr)); 1432 1433 /* remove the port from vlans it has been assigned to */ 1434 vsw_vlan_remove_ids(port, VSW_VNETPORT); 1435 1436 /* 1437 * Remove parent port from any multicast groups 1438 * it may have registered with. Client must resend 1439 * multicast add command after handshake completes. 1440 */ 1441 vsw_del_mcst_port(port); 1442 1443 ldcp->peer_session = 0; 1444 ldcp->session_status = 0; 1445 ldcp->hcnt = 0; 1446 ldcp->hphase = VSW_MILESTONE0; 1447 1448 vsw_reset_vnet_proto_ops(ldcp); 1449 1450 D1(vswp, "%s: exit", __func__); 1451 } 1452 1453 /* 1454 * Process a connection event. 1455 * 1456 * Note - care must be taken to ensure that this function is 1457 * not called with the dlistrw lock held. 1458 */ 1459 static void 1460 vsw_process_conn_evt(vsw_ldc_t *ldcp, uint16_t evt) 1461 { 1462 vsw_t *vswp = ldcp->ldc_vswp; 1463 vsw_conn_evt_t *conn = NULL; 1464 1465 D1(vswp, "%s: enter", __func__); 1466 1467 /* 1468 * Check if either a reset or restart event is pending 1469 * or in progress. If so just return. 1470 * 1471 * A VSW_CONN_RESET event originates either with a LDC_RESET_EVT 1472 * being received by the callback handler, or a ECONNRESET error 1473 * code being returned from a ldc_read() or ldc_write() call. 1474 * 1475 * A VSW_CONN_RESTART event occurs when some error checking code 1476 * decides that there is a problem with data from the channel, 1477 * and that the handshake should be restarted. 1478 */ 1479 if (((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) && 1480 (ldstub((uint8_t *)&ldcp->reset_active))) 1481 return; 1482 1483 /* 1484 * If it is an LDC_UP event we first check the recorded 1485 * state of the channel. If this is UP then we know that 1486 * the channel moving to the UP state has already been dealt 1487 * with and don't need to dispatch a new task. 1488 * 1489 * The reason for this check is that when we do a ldc_up(), 1490 * depending on the state of the peer, we may or may not get 1491 * a LDC_UP event. As we can't depend on getting a LDC_UP evt 1492 * every time we do ldc_up() we explicitly check the channel 1493 * status to see has it come up (ldc_up() is asynch and will 1494 * complete at some undefined time), and take the appropriate 1495 * action. 1496 * 1497 * The flip side of this is that we may get a LDC_UP event 1498 * when we have already seen that the channel is up and have 1499 * dealt with that. 1500 */ 1501 mutex_enter(&ldcp->status_lock); 1502 if (evt == VSW_CONN_UP) { 1503 if ((ldcp->ldc_status == LDC_UP) || (ldcp->reset_active != 0)) { 1504 mutex_exit(&ldcp->status_lock); 1505 return; 1506 } 1507 } 1508 mutex_exit(&ldcp->status_lock); 1509 1510 /* 1511 * The transaction group id allows us to identify and discard 1512 * any tasks which are still pending on the taskq and refer 1513 * to the handshake session we are about to restart or reset. 1514 * These stale messages no longer have any real meaning. 1515 */ 1516 (void) atomic_inc_32(&ldcp->hss_id); 1517 1518 ASSERT(vswp->taskq_p != NULL); 1519 1520 if ((conn = kmem_zalloc(sizeof (vsw_conn_evt_t), KM_NOSLEEP)) == NULL) { 1521 cmn_err(CE_WARN, "!vsw%d: unable to allocate memory for" 1522 " connection event", vswp->instance); 1523 goto err_exit; 1524 } 1525 1526 conn->evt = evt; 1527 conn->ldcp = ldcp; 1528 1529 if (ddi_taskq_dispatch(vswp->taskq_p, vsw_conn_task, conn, 1530 DDI_NOSLEEP) != DDI_SUCCESS) { 1531 cmn_err(CE_WARN, "!vsw%d: Can't dispatch connection task", 1532 vswp->instance); 1533 1534 kmem_free(conn, sizeof (vsw_conn_evt_t)); 1535 goto err_exit; 1536 } 1537 1538 D1(vswp, "%s: exit", __func__); 1539 return; 1540 1541 err_exit: 1542 /* 1543 * Have mostly likely failed due to memory shortage. Clear the flag so 1544 * that future requests will at least be attempted and will hopefully 1545 * succeed. 1546 */ 1547 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 1548 ldcp->reset_active = 0; 1549 } 1550 1551 /* 1552 * Deal with events relating to a connection. Invoked from a taskq. 1553 */ 1554 static void 1555 vsw_conn_task(void *arg) 1556 { 1557 vsw_conn_evt_t *conn = (vsw_conn_evt_t *)arg; 1558 vsw_ldc_t *ldcp = NULL; 1559 vsw_port_t *portp; 1560 vsw_t *vswp = NULL; 1561 uint16_t evt; 1562 ldc_status_t curr_status; 1563 1564 ldcp = conn->ldcp; 1565 evt = conn->evt; 1566 vswp = ldcp->ldc_vswp; 1567 portp = ldcp->ldc_port; 1568 1569 D1(vswp, "%s: enter", __func__); 1570 1571 /* can safely free now have copied out data */ 1572 kmem_free(conn, sizeof (vsw_conn_evt_t)); 1573 1574 mutex_enter(&ldcp->status_lock); 1575 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 1576 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 1577 "channel %ld", vswp->instance, ldcp->ldc_id); 1578 mutex_exit(&ldcp->status_lock); 1579 return; 1580 } 1581 1582 /* 1583 * If we wish to restart the handshake on this channel, then if 1584 * the channel is UP we bring it DOWN to flush the underlying 1585 * ldc queue. 1586 */ 1587 if ((evt == VSW_CONN_RESTART) && (curr_status == LDC_UP)) 1588 (void) ldc_down(ldcp->ldc_handle); 1589 1590 if ((portp->p_hio_capable) && (portp->p_hio_enabled)) { 1591 vsw_hio_stop(vswp, ldcp); 1592 } 1593 1594 /* 1595 * re-init all the associated data structures. 1596 */ 1597 vsw_ldc_reinit(ldcp); 1598 1599 /* 1600 * Bring the channel back up (note it does no harm to 1601 * do this even if the channel is already UP, Just 1602 * becomes effectively a no-op). 1603 */ 1604 (void) ldc_up(ldcp->ldc_handle); 1605 1606 /* 1607 * Check if channel is now UP. This will only happen if 1608 * peer has also done a ldc_up(). 1609 */ 1610 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 1611 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 1612 "channel %ld", vswp->instance, ldcp->ldc_id); 1613 mutex_exit(&ldcp->status_lock); 1614 return; 1615 } 1616 1617 ldcp->ldc_status = curr_status; 1618 1619 /* channel UP so restart handshake by sending version info */ 1620 if (curr_status == LDC_UP) { 1621 if (ldcp->hcnt++ > vsw_num_handshakes) { 1622 cmn_err(CE_WARN, "!vsw%d: exceeded number of permitted" 1623 " handshake attempts (%d) on channel %ld", 1624 vswp->instance, ldcp->hcnt, ldcp->ldc_id); 1625 mutex_exit(&ldcp->status_lock); 1626 return; 1627 } 1628 1629 if (vsw_obp_ver_proto_workaround == B_FALSE && 1630 (ddi_taskq_dispatch(vswp->taskq_p, vsw_send_ver, ldcp, 1631 DDI_NOSLEEP) != DDI_SUCCESS)) { 1632 cmn_err(CE_WARN, "!vsw%d: Can't dispatch version task", 1633 vswp->instance); 1634 1635 /* 1636 * Don't count as valid restart attempt if couldn't 1637 * send version msg. 1638 */ 1639 if (ldcp->hcnt > 0) 1640 ldcp->hcnt--; 1641 } 1642 } 1643 1644 /* 1645 * Mark that the process is complete by clearing the flag. 1646 * 1647 * Note is it possible that the taskq dispatch above may have failed, 1648 * most likely due to memory shortage. We still clear the flag so 1649 * future attempts will at least be attempted and will hopefully 1650 * succeed. 1651 */ 1652 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 1653 ldcp->reset_active = 0; 1654 1655 mutex_exit(&ldcp->status_lock); 1656 1657 D1(vswp, "%s: exit", __func__); 1658 } 1659 1660 /* 1661 * returns 0 if legal for event signified by flag to have 1662 * occured at the time it did. Otherwise returns 1. 1663 */ 1664 int 1665 vsw_check_flag(vsw_ldc_t *ldcp, int dir, uint64_t flag) 1666 { 1667 vsw_t *vswp = ldcp->ldc_vswp; 1668 uint64_t state; 1669 uint64_t phase; 1670 1671 if (dir == INBOUND) 1672 state = ldcp->lane_in.lstate; 1673 else 1674 state = ldcp->lane_out.lstate; 1675 1676 phase = ldcp->hphase; 1677 1678 switch (flag) { 1679 case VSW_VER_INFO_RECV: 1680 if (phase > VSW_MILESTONE0) { 1681 DERR(vswp, "vsw_check_flag (%d): VER_INFO_RECV" 1682 " when in state %d\n", ldcp->ldc_id, phase); 1683 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1684 return (1); 1685 } 1686 break; 1687 1688 case VSW_VER_ACK_RECV: 1689 case VSW_VER_NACK_RECV: 1690 if (!(state & VSW_VER_INFO_SENT)) { 1691 DERR(vswp, "vsw_check_flag (%d): spurious VER_ACK or " 1692 "VER_NACK when in state %d\n", ldcp->ldc_id, phase); 1693 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1694 return (1); 1695 } else 1696 state &= ~VSW_VER_INFO_SENT; 1697 break; 1698 1699 case VSW_ATTR_INFO_RECV: 1700 if ((phase < VSW_MILESTONE1) || (phase >= VSW_MILESTONE2)) { 1701 DERR(vswp, "vsw_check_flag (%d): ATTR_INFO_RECV" 1702 " when in state %d\n", ldcp->ldc_id, phase); 1703 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1704 return (1); 1705 } 1706 break; 1707 1708 case VSW_ATTR_ACK_RECV: 1709 case VSW_ATTR_NACK_RECV: 1710 if (!(state & VSW_ATTR_INFO_SENT)) { 1711 DERR(vswp, "vsw_check_flag (%d): spurious ATTR_ACK" 1712 " or ATTR_NACK when in state %d\n", 1713 ldcp->ldc_id, phase); 1714 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1715 return (1); 1716 } else 1717 state &= ~VSW_ATTR_INFO_SENT; 1718 break; 1719 1720 case VSW_DRING_INFO_RECV: 1721 if (phase < VSW_MILESTONE1) { 1722 DERR(vswp, "vsw_check_flag (%d): DRING_INFO_RECV" 1723 " when in state %d\n", ldcp->ldc_id, phase); 1724 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1725 return (1); 1726 } 1727 break; 1728 1729 case VSW_DRING_ACK_RECV: 1730 case VSW_DRING_NACK_RECV: 1731 if (!(state & VSW_DRING_INFO_SENT)) { 1732 DERR(vswp, "vsw_check_flag (%d): spurious DRING_ACK " 1733 " or DRING_NACK when in state %d\n", 1734 ldcp->ldc_id, phase); 1735 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1736 return (1); 1737 } else 1738 state &= ~VSW_DRING_INFO_SENT; 1739 break; 1740 1741 case VSW_RDX_INFO_RECV: 1742 if (phase < VSW_MILESTONE3) { 1743 DERR(vswp, "vsw_check_flag (%d): RDX_INFO_RECV" 1744 " when in state %d\n", ldcp->ldc_id, phase); 1745 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1746 return (1); 1747 } 1748 break; 1749 1750 case VSW_RDX_ACK_RECV: 1751 case VSW_RDX_NACK_RECV: 1752 if (!(state & VSW_RDX_INFO_SENT)) { 1753 DERR(vswp, "vsw_check_flag (%d): spurious RDX_ACK or " 1754 "RDX_NACK when in state %d\n", ldcp->ldc_id, phase); 1755 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1756 return (1); 1757 } else 1758 state &= ~VSW_RDX_INFO_SENT; 1759 break; 1760 1761 case VSW_MCST_INFO_RECV: 1762 if (phase < VSW_MILESTONE3) { 1763 DERR(vswp, "vsw_check_flag (%d): VSW_MCST_INFO_RECV" 1764 " when in state %d\n", ldcp->ldc_id, phase); 1765 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1766 return (1); 1767 } 1768 break; 1769 1770 default: 1771 DERR(vswp, "vsw_check_flag (%lld): unknown flag (%llx)", 1772 ldcp->ldc_id, flag); 1773 return (1); 1774 } 1775 1776 if (dir == INBOUND) 1777 ldcp->lane_in.lstate = state; 1778 else 1779 ldcp->lane_out.lstate = state; 1780 1781 D1(vswp, "vsw_check_flag (chan %lld): exit", ldcp->ldc_id); 1782 1783 return (0); 1784 } 1785 1786 void 1787 vsw_next_milestone(vsw_ldc_t *ldcp) 1788 { 1789 vsw_t *vswp = ldcp->ldc_vswp; 1790 vsw_port_t *portp = ldcp->ldc_port; 1791 1792 D1(vswp, "%s (chan %lld): enter (phase %ld)", __func__, 1793 ldcp->ldc_id, ldcp->hphase); 1794 1795 DUMP_FLAGS(ldcp->lane_in.lstate); 1796 DUMP_FLAGS(ldcp->lane_out.lstate); 1797 1798 switch (ldcp->hphase) { 1799 1800 case VSW_MILESTONE0: 1801 /* 1802 * If we haven't started to handshake with our peer, 1803 * start to do so now. 1804 */ 1805 if (ldcp->lane_out.lstate == 0) { 1806 D2(vswp, "%s: (chan %lld) starting handshake " 1807 "with peer", __func__, ldcp->ldc_id); 1808 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1809 } 1810 1811 /* 1812 * Only way to pass this milestone is to have successfully 1813 * negotiated version info. 1814 */ 1815 if ((ldcp->lane_in.lstate & VSW_VER_ACK_SENT) && 1816 (ldcp->lane_out.lstate & VSW_VER_ACK_RECV)) { 1817 1818 D2(vswp, "%s: (chan %lld) leaving milestone 0", 1819 __func__, ldcp->ldc_id); 1820 1821 vsw_set_vnet_proto_ops(ldcp); 1822 1823 /* 1824 * Next milestone is passed when attribute 1825 * information has been successfully exchanged. 1826 */ 1827 ldcp->hphase = VSW_MILESTONE1; 1828 vsw_send_attr(ldcp); 1829 1830 } 1831 break; 1832 1833 case VSW_MILESTONE1: 1834 /* 1835 * Only way to pass this milestone is to have successfully 1836 * negotiated attribute information. 1837 */ 1838 if (ldcp->lane_in.lstate & VSW_ATTR_ACK_SENT) { 1839 1840 ldcp->hphase = VSW_MILESTONE2; 1841 1842 /* 1843 * If the peer device has said it wishes to 1844 * use descriptor rings then we send it our ring 1845 * info, otherwise we just set up a private ring 1846 * which we use an internal buffer 1847 */ 1848 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 1849 (ldcp->lane_in.xfer_mode & VIO_DRING_MODE_V1_2)) || 1850 (VSW_VER_LT(ldcp, 1, 2) && 1851 (ldcp->lane_in.xfer_mode == 1852 VIO_DRING_MODE_V1_0))) { 1853 vsw_send_dring_info(ldcp); 1854 } 1855 } 1856 break; 1857 1858 case VSW_MILESTONE2: 1859 /* 1860 * If peer has indicated in its attribute message that 1861 * it wishes to use descriptor rings then the only way 1862 * to pass this milestone is for us to have received 1863 * valid dring info. 1864 * 1865 * If peer is not using descriptor rings then just fall 1866 * through. 1867 */ 1868 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 1869 (ldcp->lane_in.xfer_mode & VIO_DRING_MODE_V1_2)) || 1870 (VSW_VER_LT(ldcp, 1, 2) && 1871 (ldcp->lane_in.xfer_mode == 1872 VIO_DRING_MODE_V1_0))) { 1873 if (!(ldcp->lane_in.lstate & VSW_DRING_ACK_SENT)) 1874 break; 1875 } 1876 1877 D2(vswp, "%s: (chan %lld) leaving milestone 2", 1878 __func__, ldcp->ldc_id); 1879 1880 ldcp->hphase = VSW_MILESTONE3; 1881 vsw_send_rdx(ldcp); 1882 break; 1883 1884 case VSW_MILESTONE3: 1885 /* 1886 * Pass this milestone when all paramaters have been 1887 * successfully exchanged and RDX sent in both directions. 1888 * 1889 * Mark outbound lane as available to transmit data. 1890 */ 1891 if ((ldcp->lane_out.lstate & VSW_RDX_ACK_SENT) && 1892 (ldcp->lane_in.lstate & VSW_RDX_ACK_RECV)) { 1893 1894 D2(vswp, "%s: (chan %lld) leaving milestone 3", 1895 __func__, ldcp->ldc_id); 1896 D2(vswp, "%s: ** handshake complete (0x%llx : " 1897 "0x%llx) **", __func__, ldcp->lane_in.lstate, 1898 ldcp->lane_out.lstate); 1899 ldcp->lane_out.lstate |= VSW_LANE_ACTIVE; 1900 ldcp->hphase = VSW_MILESTONE4; 1901 ldcp->hcnt = 0; 1902 DISPLAY_STATE(); 1903 /* Start HIO if enabled and capable */ 1904 if ((portp->p_hio_enabled) && (portp->p_hio_capable)) { 1905 D2(vswp, "%s: start HybridIO setup", __func__); 1906 vsw_hio_start(vswp, ldcp); 1907 } 1908 } else { 1909 D2(vswp, "%s: still in milestone 3 (0x%llx : 0x%llx)", 1910 __func__, ldcp->lane_in.lstate, 1911 ldcp->lane_out.lstate); 1912 } 1913 break; 1914 1915 case VSW_MILESTONE4: 1916 D2(vswp, "%s: (chan %lld) in milestone 4", __func__, 1917 ldcp->ldc_id); 1918 break; 1919 1920 default: 1921 DERR(vswp, "%s: (chan %lld) Unknown Phase %x", __func__, 1922 ldcp->ldc_id, ldcp->hphase); 1923 } 1924 1925 D1(vswp, "%s (chan %lld): exit (phase %ld)", __func__, ldcp->ldc_id, 1926 ldcp->hphase); 1927 } 1928 1929 /* 1930 * Check if major version is supported. 1931 * 1932 * Returns 0 if finds supported major number, and if necessary 1933 * adjusts the minor field. 1934 * 1935 * Returns 1 if can't match major number exactly. Sets mjor/minor 1936 * to next lowest support values, or to zero if no other values possible. 1937 */ 1938 static int 1939 vsw_supported_version(vio_ver_msg_t *vp) 1940 { 1941 int i; 1942 1943 D1(NULL, "vsw_supported_version: enter"); 1944 1945 for (i = 0; i < VSW_NUM_VER; i++) { 1946 if (vsw_versions[i].ver_major == vp->ver_major) { 1947 /* 1948 * Matching or lower major version found. Update 1949 * minor number if necessary. 1950 */ 1951 if (vp->ver_minor > vsw_versions[i].ver_minor) { 1952 D2(NULL, "%s: adjusting minor value from %d " 1953 "to %d", __func__, vp->ver_minor, 1954 vsw_versions[i].ver_minor); 1955 vp->ver_minor = vsw_versions[i].ver_minor; 1956 } 1957 1958 return (0); 1959 } 1960 1961 /* 1962 * If the message contains a higher major version number, set 1963 * the message's major/minor versions to the current values 1964 * and return false, so this message will get resent with 1965 * these values. 1966 */ 1967 if (vsw_versions[i].ver_major < vp->ver_major) { 1968 D2(NULL, "%s: adjusting major and minor " 1969 "values to %d, %d\n", 1970 __func__, vsw_versions[i].ver_major, 1971 vsw_versions[i].ver_minor); 1972 vp->ver_major = vsw_versions[i].ver_major; 1973 vp->ver_minor = vsw_versions[i].ver_minor; 1974 return (1); 1975 } 1976 } 1977 1978 /* No match was possible, zero out fields */ 1979 vp->ver_major = 0; 1980 vp->ver_minor = 0; 1981 1982 D1(NULL, "vsw_supported_version: exit"); 1983 1984 return (1); 1985 } 1986 1987 /* 1988 * Set vnet-protocol-version dependent functions based on version. 1989 */ 1990 static void 1991 vsw_set_vnet_proto_ops(vsw_ldc_t *ldcp) 1992 { 1993 vsw_t *vswp = ldcp->ldc_vswp; 1994 lane_t *lp = &ldcp->lane_out; 1995 1996 if (VSW_VER_GTEQ(ldcp, 1, 3)) { 1997 /* 1998 * If the version negotiated with peer is >= 1.3, 1999 * set the mtu in our attributes to max_frame_size. 2000 */ 2001 lp->mtu = vswp->max_frame_size; 2002 } else { 2003 vsw_port_t *portp = ldcp->ldc_port; 2004 /* 2005 * Pre-1.3 peers expect max frame size of ETHERMAX. 2006 * We can negotiate that size with those peers provided the 2007 * following conditions are true: 2008 * - Our max_frame_size is greater only by VLAN_TAGSZ (4). 2009 * - Only pvid is defined for our peer and there are no vids. 2010 * If the above conditions are true, then we can send/recv only 2011 * untagged frames of max size ETHERMAX. Note that pvid of the 2012 * peer can be different, as vsw has to serve the vnet in that 2013 * vlan even if itself is not assigned to that vlan. 2014 */ 2015 if ((vswp->max_frame_size == ETHERMAX + VLAN_TAGSZ) && 2016 portp->nvids == 0) { 2017 lp->mtu = ETHERMAX; 2018 } 2019 } 2020 2021 if (VSW_VER_GTEQ(ldcp, 1, 2)) { 2022 /* Versions >= 1.2 */ 2023 2024 if (VSW_PRI_ETH_DEFINED(vswp)) { 2025 /* 2026 * enable priority routines and pkt mode only if 2027 * at least one pri-eth-type is specified in MD. 2028 */ 2029 ldcp->tx = vsw_ldctx_pri; 2030 ldcp->rx_pktdata = vsw_process_pkt_data; 2031 2032 /* set xfer mode for vsw_send_attr() */ 2033 lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2; 2034 } else { 2035 /* no priority eth types defined in MD */ 2036 2037 ldcp->tx = vsw_ldctx; 2038 ldcp->rx_pktdata = vsw_process_pkt_data_nop; 2039 2040 /* set xfer mode for vsw_send_attr() */ 2041 lp->xfer_mode = VIO_DRING_MODE_V1_2; 2042 } 2043 2044 } else { 2045 /* Versions prior to 1.2 */ 2046 2047 vsw_reset_vnet_proto_ops(ldcp); 2048 } 2049 } 2050 2051 /* 2052 * Reset vnet-protocol-version dependent functions to v1.0. 2053 */ 2054 static void 2055 vsw_reset_vnet_proto_ops(vsw_ldc_t *ldcp) 2056 { 2057 lane_t *lp = &ldcp->lane_out; 2058 2059 ldcp->tx = vsw_ldctx; 2060 ldcp->rx_pktdata = vsw_process_pkt_data_nop; 2061 2062 /* set xfer mode for vsw_send_attr() */ 2063 lp->xfer_mode = VIO_DRING_MODE_V1_0; 2064 } 2065 2066 /* 2067 * Main routine for processing messages received over LDC. 2068 */ 2069 static void 2070 vsw_process_pkt(void *arg) 2071 { 2072 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 2073 vsw_t *vswp = ldcp->ldc_vswp; 2074 size_t msglen; 2075 vio_msg_tag_t *tagp; 2076 uint64_t *ldcmsg; 2077 int rv = 0; 2078 2079 2080 D1(vswp, "%s enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 2081 2082 ASSERT(MUTEX_HELD(&ldcp->ldc_cblock)); 2083 2084 ldcmsg = ldcp->ldcmsg; 2085 /* 2086 * If channel is up read messages until channel is empty. 2087 */ 2088 do { 2089 msglen = ldcp->msglen; 2090 rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen); 2091 2092 if (rv != 0) { 2093 DERR(vswp, "%s :ldc_read err id(%lld) rv(%d) len(%d)\n", 2094 __func__, ldcp->ldc_id, rv, msglen); 2095 } 2096 2097 /* channel has been reset */ 2098 if (rv == ECONNRESET) { 2099 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 2100 break; 2101 } 2102 2103 if (msglen == 0) { 2104 D2(vswp, "%s: ldc_read id(%lld) NODATA", __func__, 2105 ldcp->ldc_id); 2106 break; 2107 } 2108 2109 D2(vswp, "%s: ldc_read id(%lld): msglen(%d)", __func__, 2110 ldcp->ldc_id, msglen); 2111 2112 /* 2113 * Figure out what sort of packet we have gotten by 2114 * examining the msg tag, and then switch it appropriately. 2115 */ 2116 tagp = (vio_msg_tag_t *)ldcmsg; 2117 2118 switch (tagp->vio_msgtype) { 2119 case VIO_TYPE_CTRL: 2120 vsw_dispatch_ctrl_task(ldcp, ldcmsg, tagp); 2121 break; 2122 case VIO_TYPE_DATA: 2123 vsw_process_data_pkt(ldcp, ldcmsg, tagp, msglen); 2124 break; 2125 case VIO_TYPE_ERR: 2126 vsw_process_err_pkt(ldcp, ldcmsg, tagp); 2127 break; 2128 default: 2129 DERR(vswp, "%s: Unknown tag(%lx) ", __func__, 2130 "id(%lx)\n", tagp->vio_msgtype, ldcp->ldc_id); 2131 break; 2132 } 2133 } while (msglen); 2134 2135 D1(vswp, "%s exit: ldcid (%lld)\n", __func__, ldcp->ldc_id); 2136 } 2137 2138 /* 2139 * Dispatch a task to process a VIO control message. 2140 */ 2141 static void 2142 vsw_dispatch_ctrl_task(vsw_ldc_t *ldcp, void *cpkt, vio_msg_tag_t *tagp) 2143 { 2144 vsw_ctrl_task_t *ctaskp = NULL; 2145 vsw_port_t *port = ldcp->ldc_port; 2146 vsw_t *vswp = port->p_vswp; 2147 2148 D1(vswp, "%s: enter", __func__); 2149 2150 /* 2151 * We need to handle RDX ACK messages in-band as once they 2152 * are exchanged it is possible that we will get an 2153 * immediate (legitimate) data packet. 2154 */ 2155 if ((tagp->vio_subtype_env == VIO_RDX) && 2156 (tagp->vio_subtype == VIO_SUBTYPE_ACK)) { 2157 2158 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_ACK_RECV)) 2159 return; 2160 2161 ldcp->lane_in.lstate |= VSW_RDX_ACK_RECV; 2162 D2(vswp, "%s (%ld) handling RDX_ACK in place " 2163 "(ostate 0x%llx : hphase %d)", __func__, 2164 ldcp->ldc_id, ldcp->lane_in.lstate, ldcp->hphase); 2165 vsw_next_milestone(ldcp); 2166 return; 2167 } 2168 2169 ctaskp = kmem_alloc(sizeof (vsw_ctrl_task_t), KM_NOSLEEP); 2170 2171 if (ctaskp == NULL) { 2172 DERR(vswp, "%s: unable to alloc space for ctrl msg", __func__); 2173 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2174 return; 2175 } 2176 2177 ctaskp->ldcp = ldcp; 2178 bcopy((def_msg_t *)cpkt, &ctaskp->pktp, sizeof (def_msg_t)); 2179 ctaskp->hss_id = ldcp->hss_id; 2180 2181 /* 2182 * Dispatch task to processing taskq if port is not in 2183 * the process of being detached. 2184 */ 2185 mutex_enter(&port->state_lock); 2186 if (port->state == VSW_PORT_INIT) { 2187 if ((vswp->taskq_p == NULL) || 2188 (ddi_taskq_dispatch(vswp->taskq_p, vsw_process_ctrl_pkt, 2189 ctaskp, DDI_NOSLEEP) != DDI_SUCCESS)) { 2190 DERR(vswp, "%s: unable to dispatch task to taskq", 2191 __func__); 2192 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2193 mutex_exit(&port->state_lock); 2194 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2195 return; 2196 } 2197 } else { 2198 DWARN(vswp, "%s: port %d detaching, not dispatching " 2199 "task", __func__, port->p_instance); 2200 } 2201 2202 mutex_exit(&port->state_lock); 2203 2204 D2(vswp, "%s: dispatched task to taskq for chan %d", __func__, 2205 ldcp->ldc_id); 2206 D1(vswp, "%s: exit", __func__); 2207 } 2208 2209 /* 2210 * Process a VIO ctrl message. Invoked from taskq. 2211 */ 2212 static void 2213 vsw_process_ctrl_pkt(void *arg) 2214 { 2215 vsw_ctrl_task_t *ctaskp = (vsw_ctrl_task_t *)arg; 2216 vsw_ldc_t *ldcp = ctaskp->ldcp; 2217 vsw_t *vswp = ldcp->ldc_vswp; 2218 vio_msg_tag_t tag; 2219 uint16_t env; 2220 2221 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2222 2223 bcopy(&ctaskp->pktp, &tag, sizeof (vio_msg_tag_t)); 2224 env = tag.vio_subtype_env; 2225 2226 /* stale pkt check */ 2227 if (ctaskp->hss_id < ldcp->hss_id) { 2228 DWARN(vswp, "%s: discarding stale packet belonging to earlier" 2229 " (%ld) handshake session", __func__, ctaskp->hss_id); 2230 return; 2231 } 2232 2233 /* session id check */ 2234 if (ldcp->session_status & VSW_PEER_SESSION) { 2235 if (ldcp->peer_session != tag.vio_sid) { 2236 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 2237 __func__, ldcp->ldc_id, tag.vio_sid); 2238 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2239 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2240 return; 2241 } 2242 } 2243 2244 /* 2245 * Switch on vio_subtype envelope, then let lower routines 2246 * decide if its an INFO, ACK or NACK packet. 2247 */ 2248 switch (env) { 2249 case VIO_VER_INFO: 2250 vsw_process_ctrl_ver_pkt(ldcp, &ctaskp->pktp); 2251 break; 2252 case VIO_DRING_REG: 2253 vsw_process_ctrl_dring_reg_pkt(ldcp, &ctaskp->pktp); 2254 break; 2255 case VIO_DRING_UNREG: 2256 vsw_process_ctrl_dring_unreg_pkt(ldcp, &ctaskp->pktp); 2257 break; 2258 case VIO_ATTR_INFO: 2259 vsw_process_ctrl_attr_pkt(ldcp, &ctaskp->pktp); 2260 break; 2261 case VNET_MCAST_INFO: 2262 vsw_process_ctrl_mcst_pkt(ldcp, &ctaskp->pktp); 2263 break; 2264 case VIO_RDX: 2265 vsw_process_ctrl_rdx_pkt(ldcp, &ctaskp->pktp); 2266 break; 2267 case VIO_DDS_INFO: 2268 vsw_process_dds_msg(vswp, ldcp, &ctaskp->pktp); 2269 break; 2270 default: 2271 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env); 2272 } 2273 2274 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2275 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2276 } 2277 2278 /* 2279 * Version negotiation. We can end up here either because our peer 2280 * has responded to a handshake message we have sent it, or our peer 2281 * has initiated a handshake with us. If its the former then can only 2282 * be ACK or NACK, if its the later can only be INFO. 2283 * 2284 * If its an ACK we move to the next stage of the handshake, namely 2285 * attribute exchange. If its a NACK we see if we can specify another 2286 * version, if we can't we stop. 2287 * 2288 * If it is an INFO we reset all params associated with communication 2289 * in that direction over this channel (remember connection is 2290 * essentially 2 independent simplex channels). 2291 */ 2292 void 2293 vsw_process_ctrl_ver_pkt(vsw_ldc_t *ldcp, void *pkt) 2294 { 2295 vio_ver_msg_t *ver_pkt; 2296 vsw_t *vswp = ldcp->ldc_vswp; 2297 2298 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2299 2300 /* 2301 * We know this is a ctrl/version packet so 2302 * cast it into the correct structure. 2303 */ 2304 ver_pkt = (vio_ver_msg_t *)pkt; 2305 2306 switch (ver_pkt->tag.vio_subtype) { 2307 case VIO_SUBTYPE_INFO: 2308 D2(vswp, "vsw_process_ctrl_ver_pkt: VIO_SUBTYPE_INFO\n"); 2309 2310 /* 2311 * Record the session id, which we will use from now 2312 * until we see another VER_INFO msg. Even then the 2313 * session id in most cases will be unchanged, execpt 2314 * if channel was reset. 2315 */ 2316 if ((ldcp->session_status & VSW_PEER_SESSION) && 2317 (ldcp->peer_session != ver_pkt->tag.vio_sid)) { 2318 DERR(vswp, "%s: updating session id for chan %lld " 2319 "from %llx to %llx", __func__, ldcp->ldc_id, 2320 ldcp->peer_session, ver_pkt->tag.vio_sid); 2321 } 2322 2323 ldcp->peer_session = ver_pkt->tag.vio_sid; 2324 ldcp->session_status |= VSW_PEER_SESSION; 2325 2326 /* Legal message at this time ? */ 2327 if (vsw_check_flag(ldcp, INBOUND, VSW_VER_INFO_RECV)) 2328 return; 2329 2330 /* 2331 * First check the device class. Currently only expect 2332 * to be talking to a network device. In the future may 2333 * also talk to another switch. 2334 */ 2335 if (ver_pkt->dev_class != VDEV_NETWORK) { 2336 DERR(vswp, "%s: illegal device class %d", __func__, 2337 ver_pkt->dev_class); 2338 2339 ver_pkt->tag.vio_sid = ldcp->local_session; 2340 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2341 2342 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2343 2344 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2345 sizeof (vio_ver_msg_t), B_TRUE); 2346 2347 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 2348 vsw_next_milestone(ldcp); 2349 return; 2350 } else { 2351 ldcp->dev_class = ver_pkt->dev_class; 2352 } 2353 2354 /* 2355 * Now check the version. 2356 */ 2357 if (vsw_supported_version(ver_pkt) == 0) { 2358 /* 2359 * Support this major version and possibly 2360 * adjusted minor version. 2361 */ 2362 2363 D2(vswp, "%s: accepted ver %d:%d", __func__, 2364 ver_pkt->ver_major, ver_pkt->ver_minor); 2365 2366 /* Store accepted values */ 2367 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2368 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2369 2370 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2371 2372 ldcp->lane_in.lstate |= VSW_VER_ACK_SENT; 2373 2374 if (vsw_obp_ver_proto_workaround == B_TRUE) { 2375 /* 2376 * Send a version info message 2377 * using the accepted version that 2378 * we are about to ack. Also note that 2379 * we send our ver info before we ack. 2380 * Otherwise, as soon as receiving the 2381 * ack, obp sends attr info msg, which 2382 * breaks vsw_check_flag() invoked 2383 * from vsw_process_ctrl_attr_pkt(); 2384 * as we also need VSW_VER_ACK_RECV to 2385 * be set in lane_out.lstate, before 2386 * we can receive attr info. 2387 */ 2388 vsw_send_ver(ldcp); 2389 } 2390 } else { 2391 /* 2392 * NACK back with the next lower major/minor 2393 * pairing we support (if don't suuport any more 2394 * versions then they will be set to zero. 2395 */ 2396 2397 D2(vswp, "%s: replying with ver %d:%d", __func__, 2398 ver_pkt->ver_major, ver_pkt->ver_minor); 2399 2400 /* Store updated values */ 2401 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2402 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2403 2404 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2405 2406 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 2407 } 2408 2409 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2410 ver_pkt->tag.vio_sid = ldcp->local_session; 2411 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2412 sizeof (vio_ver_msg_t), B_TRUE); 2413 2414 vsw_next_milestone(ldcp); 2415 break; 2416 2417 case VIO_SUBTYPE_ACK: 2418 D2(vswp, "%s: VIO_SUBTYPE_ACK\n", __func__); 2419 2420 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_ACK_RECV)) 2421 return; 2422 2423 /* Store updated values */ 2424 ldcp->lane_out.ver_major = ver_pkt->ver_major; 2425 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 2426 2427 ldcp->lane_out.lstate |= VSW_VER_ACK_RECV; 2428 vsw_next_milestone(ldcp); 2429 2430 break; 2431 2432 case VIO_SUBTYPE_NACK: 2433 D2(vswp, "%s: VIO_SUBTYPE_NACK\n", __func__); 2434 2435 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_NACK_RECV)) 2436 return; 2437 2438 /* 2439 * If our peer sent us a NACK with the ver fields set to 2440 * zero then there is nothing more we can do. Otherwise see 2441 * if we support either the version suggested, or a lesser 2442 * one. 2443 */ 2444 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 2445 DERR(vswp, "%s: peer unable to negotiate any " 2446 "further.", __func__); 2447 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 2448 vsw_next_milestone(ldcp); 2449 return; 2450 } 2451 2452 /* 2453 * Check to see if we support this major version or 2454 * a lower one. If we don't then maj/min will be set 2455 * to zero. 2456 */ 2457 (void) vsw_supported_version(ver_pkt); 2458 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 2459 /* Nothing more we can do */ 2460 DERR(vswp, "%s: version negotiation failed.\n", 2461 __func__); 2462 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 2463 vsw_next_milestone(ldcp); 2464 } else { 2465 /* found a supported major version */ 2466 ldcp->lane_out.ver_major = ver_pkt->ver_major; 2467 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 2468 2469 D2(vswp, "%s: resending with updated values (%x, %x)", 2470 __func__, ver_pkt->ver_major, ver_pkt->ver_minor); 2471 2472 ldcp->lane_out.lstate |= VSW_VER_INFO_SENT; 2473 ver_pkt->tag.vio_sid = ldcp->local_session; 2474 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 2475 2476 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2477 2478 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2479 sizeof (vio_ver_msg_t), B_TRUE); 2480 2481 vsw_next_milestone(ldcp); 2482 2483 } 2484 break; 2485 2486 default: 2487 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2488 ver_pkt->tag.vio_subtype); 2489 } 2490 2491 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 2492 } 2493 2494 /* 2495 * Process an attribute packet. We can end up here either because our peer 2496 * has ACK/NACK'ed back to an earlier ATTR msg we had sent it, or our 2497 * peer has sent us an attribute INFO message 2498 * 2499 * If its an ACK we then move to the next stage of the handshake which 2500 * is to send our descriptor ring info to our peer. If its a NACK then 2501 * there is nothing more we can (currently) do. 2502 * 2503 * If we get a valid/acceptable INFO packet (and we have already negotiated 2504 * a version) we ACK back and set channel state to ATTR_RECV, otherwise we 2505 * NACK back and reset channel state to INACTIV. 2506 * 2507 * FUTURE: in time we will probably negotiate over attributes, but for 2508 * the moment unacceptable attributes are regarded as a fatal error. 2509 * 2510 */ 2511 void 2512 vsw_process_ctrl_attr_pkt(vsw_ldc_t *ldcp, void *pkt) 2513 { 2514 vnet_attr_msg_t *attr_pkt; 2515 vsw_t *vswp = ldcp->ldc_vswp; 2516 vsw_port_t *port = ldcp->ldc_port; 2517 uint64_t macaddr = 0; 2518 int i; 2519 2520 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 2521 2522 /* 2523 * We know this is a ctrl/attr packet so 2524 * cast it into the correct structure. 2525 */ 2526 attr_pkt = (vnet_attr_msg_t *)pkt; 2527 2528 switch (attr_pkt->tag.vio_subtype) { 2529 case VIO_SUBTYPE_INFO: 2530 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2531 2532 if (vsw_check_flag(ldcp, INBOUND, VSW_ATTR_INFO_RECV)) 2533 return; 2534 2535 /* 2536 * If the attributes are unacceptable then we NACK back. 2537 */ 2538 if (vsw_check_attr(attr_pkt, ldcp)) { 2539 2540 DERR(vswp, "%s (chan %d): invalid attributes", 2541 __func__, ldcp->ldc_id); 2542 2543 vsw_free_lane_resources(ldcp, INBOUND); 2544 2545 attr_pkt->tag.vio_sid = ldcp->local_session; 2546 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2547 2548 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 2549 ldcp->lane_in.lstate |= VSW_ATTR_NACK_SENT; 2550 (void) vsw_send_msg(ldcp, (void *)attr_pkt, 2551 sizeof (vnet_attr_msg_t), B_TRUE); 2552 2553 vsw_next_milestone(ldcp); 2554 return; 2555 } 2556 2557 /* 2558 * Otherwise store attributes for this lane and update 2559 * lane state. 2560 */ 2561 ldcp->lane_in.mtu = attr_pkt->mtu; 2562 ldcp->lane_in.addr = attr_pkt->addr; 2563 ldcp->lane_in.addr_type = attr_pkt->addr_type; 2564 ldcp->lane_in.xfer_mode = attr_pkt->xfer_mode; 2565 ldcp->lane_in.ack_freq = attr_pkt->ack_freq; 2566 2567 macaddr = ldcp->lane_in.addr; 2568 for (i = ETHERADDRL - 1; i >= 0; i--) { 2569 port->p_macaddr.ether_addr_octet[i] = macaddr & 0xFF; 2570 macaddr >>= 8; 2571 } 2572 2573 /* create the fdb entry for this port/mac address */ 2574 vsw_fdbe_add(vswp, port); 2575 2576 /* add the port to the specified vlans */ 2577 vsw_vlan_add_ids(port, VSW_VNETPORT); 2578 2579 /* setup device specifc xmit routines */ 2580 mutex_enter(&port->tx_lock); 2581 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 2582 (ldcp->lane_in.xfer_mode & VIO_DRING_MODE_V1_2)) || 2583 (VSW_VER_LT(ldcp, 1, 2) && 2584 (ldcp->lane_in.xfer_mode == VIO_DRING_MODE_V1_0))) { 2585 D2(vswp, "%s: mode = VIO_DRING_MODE", __func__); 2586 port->transmit = vsw_dringsend; 2587 } else if (ldcp->lane_in.xfer_mode == VIO_DESC_MODE) { 2588 D2(vswp, "%s: mode = VIO_DESC_MODE", __func__); 2589 vsw_create_privring(ldcp); 2590 port->transmit = vsw_descrsend; 2591 ldcp->lane_out.xfer_mode = VIO_DESC_MODE; 2592 } 2593 2594 /* 2595 * HybridIO is supported only vnet, not by OBP. 2596 * So, set hio_capable to true only when in DRING mode. 2597 */ 2598 if (VSW_VER_GTEQ(ldcp, 1, 3) && 2599 (ldcp->lane_in.xfer_mode != VIO_DESC_MODE)) { 2600 (void) atomic_swap_32(&port->p_hio_capable, B_TRUE); 2601 } else { 2602 (void) atomic_swap_32(&port->p_hio_capable, B_FALSE); 2603 } 2604 2605 mutex_exit(&port->tx_lock); 2606 2607 attr_pkt->tag.vio_sid = ldcp->local_session; 2608 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2609 2610 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 2611 2612 ldcp->lane_in.lstate |= VSW_ATTR_ACK_SENT; 2613 2614 (void) vsw_send_msg(ldcp, (void *)attr_pkt, 2615 sizeof (vnet_attr_msg_t), B_TRUE); 2616 2617 vsw_next_milestone(ldcp); 2618 break; 2619 2620 case VIO_SUBTYPE_ACK: 2621 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2622 2623 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_ACK_RECV)) 2624 return; 2625 2626 ldcp->lane_out.lstate |= VSW_ATTR_ACK_RECV; 2627 vsw_next_milestone(ldcp); 2628 break; 2629 2630 case VIO_SUBTYPE_NACK: 2631 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2632 2633 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_NACK_RECV)) 2634 return; 2635 2636 ldcp->lane_out.lstate |= VSW_ATTR_NACK_RECV; 2637 vsw_next_milestone(ldcp); 2638 break; 2639 2640 default: 2641 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2642 attr_pkt->tag.vio_subtype); 2643 } 2644 2645 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 2646 } 2647 2648 /* 2649 * Process a dring info packet. We can end up here either because our peer 2650 * has ACK/NACK'ed back to an earlier DRING msg we had sent it, or our 2651 * peer has sent us a dring INFO message. 2652 * 2653 * If we get a valid/acceptable INFO packet (and we have already negotiated 2654 * a version) we ACK back and update the lane state, otherwise we NACK back. 2655 * 2656 * FUTURE: nothing to stop client from sending us info on multiple dring's 2657 * but for the moment we will just use the first one we are given. 2658 * 2659 */ 2660 void 2661 vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *ldcp, void *pkt) 2662 { 2663 vio_dring_reg_msg_t *dring_pkt; 2664 vsw_t *vswp = ldcp->ldc_vswp; 2665 ldc_mem_info_t minfo; 2666 dring_info_t *dp, *dbp; 2667 int dring_found = 0; 2668 2669 /* 2670 * We know this is a ctrl/dring packet so 2671 * cast it into the correct structure. 2672 */ 2673 dring_pkt = (vio_dring_reg_msg_t *)pkt; 2674 2675 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 2676 2677 switch (dring_pkt->tag.vio_subtype) { 2678 case VIO_SUBTYPE_INFO: 2679 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2680 2681 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 2682 return; 2683 2684 /* 2685 * If the dring params are unacceptable then we NACK back. 2686 */ 2687 if (vsw_check_dring_info(dring_pkt)) { 2688 2689 DERR(vswp, "%s (%lld): invalid dring info", 2690 __func__, ldcp->ldc_id); 2691 2692 vsw_free_lane_resources(ldcp, INBOUND); 2693 2694 dring_pkt->tag.vio_sid = ldcp->local_session; 2695 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2696 2697 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2698 2699 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2700 2701 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2702 sizeof (vio_dring_reg_msg_t), B_TRUE); 2703 2704 vsw_next_milestone(ldcp); 2705 return; 2706 } 2707 2708 /* 2709 * Otherwise, attempt to map in the dring using the 2710 * cookie. If that succeeds we send back a unique dring 2711 * identifier that the sending side will use in future 2712 * to refer to this descriptor ring. 2713 */ 2714 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 2715 2716 dp->num_descriptors = dring_pkt->num_descriptors; 2717 dp->descriptor_size = dring_pkt->descriptor_size; 2718 dp->options = dring_pkt->options; 2719 dp->ncookies = dring_pkt->ncookies; 2720 2721 /* 2722 * Note: should only get one cookie. Enforced in 2723 * the ldc layer. 2724 */ 2725 bcopy(&dring_pkt->cookie[0], &dp->cookie[0], 2726 sizeof (ldc_mem_cookie_t)); 2727 2728 D2(vswp, "%s: num_desc %ld : desc_size %ld", __func__, 2729 dp->num_descriptors, dp->descriptor_size); 2730 D2(vswp, "%s: options 0x%lx: ncookies %ld", __func__, 2731 dp->options, dp->ncookies); 2732 2733 if ((ldc_mem_dring_map(ldcp->ldc_handle, &dp->cookie[0], 2734 dp->ncookies, dp->num_descriptors, dp->descriptor_size, 2735 LDC_DIRECT_MAP, &(dp->handle))) != 0) { 2736 2737 DERR(vswp, "%s: dring_map failed\n", __func__); 2738 2739 kmem_free(dp, sizeof (dring_info_t)); 2740 vsw_free_lane_resources(ldcp, INBOUND); 2741 2742 dring_pkt->tag.vio_sid = ldcp->local_session; 2743 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2744 2745 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2746 2747 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2748 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2749 sizeof (vio_dring_reg_msg_t), B_TRUE); 2750 2751 vsw_next_milestone(ldcp); 2752 return; 2753 } 2754 2755 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 2756 2757 DERR(vswp, "%s: dring_addr failed\n", __func__); 2758 2759 kmem_free(dp, sizeof (dring_info_t)); 2760 vsw_free_lane_resources(ldcp, INBOUND); 2761 2762 dring_pkt->tag.vio_sid = ldcp->local_session; 2763 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2764 2765 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2766 2767 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2768 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2769 sizeof (vio_dring_reg_msg_t), B_TRUE); 2770 2771 vsw_next_milestone(ldcp); 2772 return; 2773 } else { 2774 /* store the address of the pub part of ring */ 2775 dp->pub_addr = minfo.vaddr; 2776 2777 /* cache the dring mtype */ 2778 dp->dring_mtype = minfo.mtype; 2779 } 2780 2781 /* no private section as we are importing */ 2782 dp->priv_addr = NULL; 2783 2784 /* 2785 * Using simple mono increasing int for ident at 2786 * the moment. 2787 */ 2788 dp->ident = ldcp->next_ident; 2789 ldcp->next_ident++; 2790 2791 dp->end_idx = 0; 2792 dp->next = NULL; 2793 2794 /* 2795 * Link it onto the end of the list of drings 2796 * for this lane. 2797 */ 2798 if (ldcp->lane_in.dringp == NULL) { 2799 D2(vswp, "%s: adding first INBOUND dring", __func__); 2800 ldcp->lane_in.dringp = dp; 2801 } else { 2802 dbp = ldcp->lane_in.dringp; 2803 2804 while (dbp->next != NULL) 2805 dbp = dbp->next; 2806 2807 dbp->next = dp; 2808 } 2809 2810 /* acknowledge it */ 2811 dring_pkt->tag.vio_sid = ldcp->local_session; 2812 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2813 dring_pkt->dring_ident = dp->ident; 2814 2815 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2816 sizeof (vio_dring_reg_msg_t), B_TRUE); 2817 2818 ldcp->lane_in.lstate |= VSW_DRING_ACK_SENT; 2819 vsw_next_milestone(ldcp); 2820 break; 2821 2822 case VIO_SUBTYPE_ACK: 2823 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2824 2825 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_ACK_RECV)) 2826 return; 2827 2828 /* 2829 * Peer is acknowledging our dring info and will have 2830 * sent us a dring identifier which we will use to 2831 * refer to this ring w.r.t. our peer. 2832 */ 2833 dp = ldcp->lane_out.dringp; 2834 if (dp != NULL) { 2835 /* 2836 * Find the ring this ident should be associated 2837 * with. 2838 */ 2839 if (vsw_dring_match(dp, dring_pkt)) { 2840 dring_found = 1; 2841 2842 } else while (dp != NULL) { 2843 if (vsw_dring_match(dp, dring_pkt)) { 2844 dring_found = 1; 2845 break; 2846 } 2847 dp = dp->next; 2848 } 2849 2850 if (dring_found == 0) { 2851 DERR(NULL, "%s: unrecognised ring cookie", 2852 __func__); 2853 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2854 return; 2855 } 2856 2857 } else { 2858 DERR(vswp, "%s: DRING ACK received but no drings " 2859 "allocated", __func__); 2860 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2861 return; 2862 } 2863 2864 /* store ident */ 2865 dp->ident = dring_pkt->dring_ident; 2866 ldcp->lane_out.lstate |= VSW_DRING_ACK_RECV; 2867 vsw_next_milestone(ldcp); 2868 break; 2869 2870 case VIO_SUBTYPE_NACK: 2871 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2872 2873 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_NACK_RECV)) 2874 return; 2875 2876 ldcp->lane_out.lstate |= VSW_DRING_NACK_RECV; 2877 vsw_next_milestone(ldcp); 2878 break; 2879 2880 default: 2881 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 2882 dring_pkt->tag.vio_subtype); 2883 } 2884 2885 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 2886 } 2887 2888 /* 2889 * Process a request from peer to unregister a dring. 2890 * 2891 * For the moment we just restart the handshake if our 2892 * peer endpoint attempts to unregister a dring. 2893 */ 2894 void 2895 vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *ldcp, void *pkt) 2896 { 2897 vsw_t *vswp = ldcp->ldc_vswp; 2898 vio_dring_unreg_msg_t *dring_pkt; 2899 2900 /* 2901 * We know this is a ctrl/dring packet so 2902 * cast it into the correct structure. 2903 */ 2904 dring_pkt = (vio_dring_unreg_msg_t *)pkt; 2905 2906 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2907 2908 switch (dring_pkt->tag.vio_subtype) { 2909 case VIO_SUBTYPE_INFO: 2910 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2911 2912 DWARN(vswp, "%s: restarting handshake..", __func__); 2913 break; 2914 2915 case VIO_SUBTYPE_ACK: 2916 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2917 2918 DWARN(vswp, "%s: restarting handshake..", __func__); 2919 break; 2920 2921 case VIO_SUBTYPE_NACK: 2922 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2923 2924 DWARN(vswp, "%s: restarting handshake..", __func__); 2925 break; 2926 2927 default: 2928 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 2929 dring_pkt->tag.vio_subtype); 2930 } 2931 2932 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2933 2934 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2935 } 2936 2937 #define SND_MCST_NACK(ldcp, pkt) \ 2938 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 2939 pkt->tag.vio_sid = ldcp->local_session; \ 2940 (void) vsw_send_msg(ldcp, (void *)pkt, \ 2941 sizeof (vnet_mcast_msg_t), B_TRUE); 2942 2943 /* 2944 * Process a multicast request from a vnet. 2945 * 2946 * Vnet's specify a multicast address that they are interested in. This 2947 * address is used as a key into the hash table which forms the multicast 2948 * forwarding database (mFDB). 2949 * 2950 * The table keys are the multicast addresses, while the table entries 2951 * are pointers to lists of ports which wish to receive packets for the 2952 * specified multicast address. 2953 * 2954 * When a multicast packet is being switched we use the address as a key 2955 * into the hash table, and then walk the appropriate port list forwarding 2956 * the pkt to each port in turn. 2957 * 2958 * If a vnet is no longer interested in a particular multicast grouping 2959 * we simply find the correct location in the hash table and then delete 2960 * the relevant port from the port list. 2961 * 2962 * To deal with the case whereby a port is being deleted without first 2963 * removing itself from the lists in the hash table, we maintain a list 2964 * of multicast addresses the port has registered an interest in, within 2965 * the port structure itself. We then simply walk that list of addresses 2966 * using them as keys into the hash table and remove the port from the 2967 * appropriate lists. 2968 */ 2969 static void 2970 vsw_process_ctrl_mcst_pkt(vsw_ldc_t *ldcp, void *pkt) 2971 { 2972 vnet_mcast_msg_t *mcst_pkt; 2973 vsw_port_t *port = ldcp->ldc_port; 2974 vsw_t *vswp = ldcp->ldc_vswp; 2975 int i; 2976 2977 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2978 2979 /* 2980 * We know this is a ctrl/mcast packet so 2981 * cast it into the correct structure. 2982 */ 2983 mcst_pkt = (vnet_mcast_msg_t *)pkt; 2984 2985 switch (mcst_pkt->tag.vio_subtype) { 2986 case VIO_SUBTYPE_INFO: 2987 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2988 2989 /* 2990 * Check if in correct state to receive a multicast 2991 * message (i.e. handshake complete). If not reset 2992 * the handshake. 2993 */ 2994 if (vsw_check_flag(ldcp, INBOUND, VSW_MCST_INFO_RECV)) 2995 return; 2996 2997 /* 2998 * Before attempting to add or remove address check 2999 * that they are valid multicast addresses. 3000 * If not, then NACK back. 3001 */ 3002 for (i = 0; i < mcst_pkt->count; i++) { 3003 if ((mcst_pkt->mca[i].ether_addr_octet[0] & 01) != 1) { 3004 DERR(vswp, "%s: invalid multicast address", 3005 __func__); 3006 SND_MCST_NACK(ldcp, mcst_pkt); 3007 return; 3008 } 3009 } 3010 3011 /* 3012 * Now add/remove the addresses. If this fails we 3013 * NACK back. 3014 */ 3015 if (vsw_add_rem_mcst(mcst_pkt, port) != 0) { 3016 SND_MCST_NACK(ldcp, mcst_pkt); 3017 return; 3018 } 3019 3020 mcst_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3021 mcst_pkt->tag.vio_sid = ldcp->local_session; 3022 3023 DUMP_TAG_PTR((vio_msg_tag_t *)mcst_pkt); 3024 3025 (void) vsw_send_msg(ldcp, (void *)mcst_pkt, 3026 sizeof (vnet_mcast_msg_t), B_TRUE); 3027 break; 3028 3029 case VIO_SUBTYPE_ACK: 3030 DWARN(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3031 3032 /* 3033 * We shouldn't ever get a multicast ACK message as 3034 * at the moment we never request multicast addresses 3035 * to be set on some other device. This may change in 3036 * the future if we have cascading switches. 3037 */ 3038 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_ACK_RECV)) 3039 return; 3040 3041 /* Do nothing */ 3042 break; 3043 3044 case VIO_SUBTYPE_NACK: 3045 DWARN(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3046 3047 /* 3048 * We shouldn't get a multicast NACK packet for the 3049 * same reasons as we shouldn't get a ACK packet. 3050 */ 3051 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_NACK_RECV)) 3052 return; 3053 3054 /* Do nothing */ 3055 break; 3056 3057 default: 3058 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 3059 mcst_pkt->tag.vio_subtype); 3060 } 3061 3062 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3063 } 3064 3065 static void 3066 vsw_process_ctrl_rdx_pkt(vsw_ldc_t *ldcp, void *pkt) 3067 { 3068 vio_rdx_msg_t *rdx_pkt; 3069 vsw_t *vswp = ldcp->ldc_vswp; 3070 3071 /* 3072 * We know this is a ctrl/rdx packet so 3073 * cast it into the correct structure. 3074 */ 3075 rdx_pkt = (vio_rdx_msg_t *)pkt; 3076 3077 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 3078 3079 switch (rdx_pkt->tag.vio_subtype) { 3080 case VIO_SUBTYPE_INFO: 3081 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3082 3083 if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_INFO_RECV)) 3084 return; 3085 3086 rdx_pkt->tag.vio_sid = ldcp->local_session; 3087 rdx_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3088 3089 DUMP_TAG_PTR((vio_msg_tag_t *)rdx_pkt); 3090 3091 ldcp->lane_out.lstate |= VSW_RDX_ACK_SENT; 3092 3093 (void) vsw_send_msg(ldcp, (void *)rdx_pkt, 3094 sizeof (vio_rdx_msg_t), B_TRUE); 3095 3096 vsw_next_milestone(ldcp); 3097 break; 3098 3099 case VIO_SUBTYPE_ACK: 3100 /* 3101 * Should be handled in-band by callback handler. 3102 */ 3103 DERR(vswp, "%s: Unexpected VIO_SUBTYPE_ACK", __func__); 3104 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3105 break; 3106 3107 case VIO_SUBTYPE_NACK: 3108 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3109 3110 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_NACK_RECV)) 3111 return; 3112 3113 ldcp->lane_in.lstate |= VSW_RDX_NACK_RECV; 3114 vsw_next_milestone(ldcp); 3115 break; 3116 3117 default: 3118 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3119 rdx_pkt->tag.vio_subtype); 3120 } 3121 3122 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3123 } 3124 3125 static void 3126 vsw_process_data_pkt(vsw_ldc_t *ldcp, void *dpkt, vio_msg_tag_t *tagp, 3127 uint32_t msglen) 3128 { 3129 uint16_t env = tagp->vio_subtype_env; 3130 vsw_t *vswp = ldcp->ldc_vswp; 3131 3132 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3133 3134 /* session id check */ 3135 if (ldcp->session_status & VSW_PEER_SESSION) { 3136 if (ldcp->peer_session != tagp->vio_sid) { 3137 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 3138 __func__, ldcp->ldc_id, tagp->vio_sid); 3139 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3140 return; 3141 } 3142 } 3143 3144 /* 3145 * It is an error for us to be getting data packets 3146 * before the handshake has completed. 3147 */ 3148 if (ldcp->hphase != VSW_MILESTONE4) { 3149 DERR(vswp, "%s: got data packet before handshake complete " 3150 "hphase %d (%x: %x)", __func__, ldcp->hphase, 3151 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 3152 DUMP_FLAGS(ldcp->lane_in.lstate); 3153 DUMP_FLAGS(ldcp->lane_out.lstate); 3154 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3155 return; 3156 } 3157 3158 /* 3159 * To reduce the locking contention, release the 3160 * ldc_cblock here and re-acquire it once we are done 3161 * receiving packets. 3162 */ 3163 mutex_exit(&ldcp->ldc_cblock); 3164 mutex_enter(&ldcp->ldc_rxlock); 3165 3166 /* 3167 * Switch on vio_subtype envelope, then let lower routines 3168 * decide if its an INFO, ACK or NACK packet. 3169 */ 3170 if (env == VIO_DRING_DATA) { 3171 vsw_process_data_dring_pkt(ldcp, dpkt); 3172 } else if (env == VIO_PKT_DATA) { 3173 ldcp->rx_pktdata(ldcp, dpkt, msglen); 3174 } else if (env == VIO_DESC_DATA) { 3175 vsw_process_data_ibnd_pkt(ldcp, dpkt); 3176 } else { 3177 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env); 3178 } 3179 3180 mutex_exit(&ldcp->ldc_rxlock); 3181 mutex_enter(&ldcp->ldc_cblock); 3182 3183 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3184 } 3185 3186 #define SND_DRING_NACK(ldcp, pkt) \ 3187 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 3188 pkt->tag.vio_sid = ldcp->local_session; \ 3189 (void) vsw_send_msg(ldcp, (void *)pkt, \ 3190 sizeof (vio_dring_msg_t), B_TRUE); 3191 3192 static void 3193 vsw_process_data_dring_pkt(vsw_ldc_t *ldcp, void *dpkt) 3194 { 3195 vio_dring_msg_t *dring_pkt; 3196 vnet_public_desc_t desc, *pub_addr = NULL; 3197 vsw_private_desc_t *priv_addr = NULL; 3198 dring_info_t *dp = NULL; 3199 vsw_t *vswp = ldcp->ldc_vswp; 3200 mblk_t *mp = NULL; 3201 mblk_t *bp = NULL; 3202 mblk_t *bpt = NULL; 3203 size_t nbytes = 0; 3204 uint64_t chain = 0; 3205 uint64_t len; 3206 uint32_t pos, start; 3207 uint32_t range_start, range_end; 3208 int32_t end, num, cnt = 0; 3209 int i, rv, rng_rv = 0, msg_rv = 0; 3210 boolean_t prev_desc_ack = B_FALSE; 3211 int read_attempts = 0; 3212 struct ether_header *ehp; 3213 3214 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3215 3216 /* 3217 * We know this is a data/dring packet so 3218 * cast it into the correct structure. 3219 */ 3220 dring_pkt = (vio_dring_msg_t *)dpkt; 3221 3222 /* 3223 * Switch on the vio_subtype. If its INFO then we need to 3224 * process the data. If its an ACK we need to make sure 3225 * it makes sense (i.e did we send an earlier data/info), 3226 * and if its a NACK then we maybe attempt a retry. 3227 */ 3228 switch (dring_pkt->tag.vio_subtype) { 3229 case VIO_SUBTYPE_INFO: 3230 D2(vswp, "%s(%lld): VIO_SUBTYPE_INFO", __func__, ldcp->ldc_id); 3231 3232 READ_ENTER(&ldcp->lane_in.dlistrw); 3233 if ((dp = vsw_ident2dring(&ldcp->lane_in, 3234 dring_pkt->dring_ident)) == NULL) { 3235 RW_EXIT(&ldcp->lane_in.dlistrw); 3236 3237 DERR(vswp, "%s(%lld): unable to find dring from " 3238 "ident 0x%llx", __func__, ldcp->ldc_id, 3239 dring_pkt->dring_ident); 3240 3241 SND_DRING_NACK(ldcp, dring_pkt); 3242 return; 3243 } 3244 3245 start = pos = dring_pkt->start_idx; 3246 end = dring_pkt->end_idx; 3247 len = dp->num_descriptors; 3248 3249 range_start = range_end = pos; 3250 3251 D2(vswp, "%s(%lld): start index %ld : end %ld\n", 3252 __func__, ldcp->ldc_id, start, end); 3253 3254 if (end == -1) { 3255 num = -1; 3256 } else if (end >= 0) { 3257 num = end >= pos ? end - pos + 1: (len - pos + 1) + end; 3258 3259 /* basic sanity check */ 3260 if (end > len) { 3261 RW_EXIT(&ldcp->lane_in.dlistrw); 3262 DERR(vswp, "%s(%lld): endpoint %lld outside " 3263 "ring length %lld", __func__, 3264 ldcp->ldc_id, end, len); 3265 3266 SND_DRING_NACK(ldcp, dring_pkt); 3267 return; 3268 } 3269 } else { 3270 RW_EXIT(&ldcp->lane_in.dlistrw); 3271 DERR(vswp, "%s(%lld): invalid endpoint %lld", 3272 __func__, ldcp->ldc_id, end); 3273 SND_DRING_NACK(ldcp, dring_pkt); 3274 return; 3275 } 3276 3277 while (cnt != num) { 3278 vsw_recheck_desc: 3279 pub_addr = (vnet_public_desc_t *)dp->pub_addr + pos; 3280 3281 if ((rng_rv = vnet_dring_entry_copy(pub_addr, 3282 &desc, dp->dring_mtype, dp->handle, 3283 pos, pos)) != 0) { 3284 DERR(vswp, "%s(%lld): unable to copy " 3285 "descriptor at pos %d: err %d", 3286 __func__, pos, ldcp->ldc_id, rng_rv); 3287 ldcp->ldc_stats.ierrors++; 3288 break; 3289 } 3290 3291 /* 3292 * When given a bounded range of descriptors 3293 * to process, its an error to hit a descriptor 3294 * which is not ready. In the non-bounded case 3295 * (end_idx == -1) this simply indicates we have 3296 * reached the end of the current active range. 3297 */ 3298 if (desc.hdr.dstate != VIO_DESC_READY) { 3299 /* unbound - no error */ 3300 if (end == -1) { 3301 if (read_attempts == vsw_read_attempts) 3302 break; 3303 3304 delay(drv_usectohz(vsw_desc_delay)); 3305 read_attempts++; 3306 goto vsw_recheck_desc; 3307 } 3308 3309 /* bounded - error - so NACK back */ 3310 RW_EXIT(&ldcp->lane_in.dlistrw); 3311 DERR(vswp, "%s(%lld): descriptor not READY " 3312 "(%d)", __func__, ldcp->ldc_id, 3313 desc.hdr.dstate); 3314 SND_DRING_NACK(ldcp, dring_pkt); 3315 return; 3316 } 3317 3318 DTRACE_PROBE1(read_attempts, int, read_attempts); 3319 3320 range_end = pos; 3321 3322 /* 3323 * If we ACK'd the previous descriptor then now 3324 * record the new range start position for later 3325 * ACK's. 3326 */ 3327 if (prev_desc_ack) { 3328 range_start = pos; 3329 3330 D2(vswp, "%s(%lld): updating range start to be " 3331 "%d", __func__, ldcp->ldc_id, range_start); 3332 3333 prev_desc_ack = B_FALSE; 3334 } 3335 3336 D2(vswp, "%s(%lld): processing desc %lld at pos" 3337 " 0x%llx : dstate 0x%lx : datalen 0x%lx", 3338 __func__, ldcp->ldc_id, pos, &desc, 3339 desc.hdr.dstate, desc.nbytes); 3340 3341 /* 3342 * Ensure that we ask ldc for an aligned 3343 * number of bytes. Data is padded to align on 8 3344 * byte boundary, desc.nbytes is actual data length, 3345 * i.e. minus that padding. 3346 */ 3347 nbytes = (desc.nbytes + VNET_IPALIGN + 7) & ~7; 3348 3349 mp = vio_multipool_allocb(&ldcp->vmp, nbytes); 3350 if (mp == NULL) { 3351 ldcp->ldc_stats.rx_vio_allocb_fail++; 3352 /* 3353 * No free receive buffers available, so 3354 * fallback onto allocb(9F). Make sure that 3355 * we get a data buffer which is a multiple 3356 * of 8 as this is required by ldc_mem_copy. 3357 */ 3358 DTRACE_PROBE(allocb); 3359 if ((mp = allocb(desc.nbytes + VNET_IPALIGN + 8, 3360 BPRI_MED)) == NULL) { 3361 DERR(vswp, "%s(%ld): allocb failed", 3362 __func__, ldcp->ldc_id); 3363 rng_rv = vnet_dring_entry_set_dstate( 3364 pub_addr, dp->dring_mtype, 3365 dp->handle, pos, pos, 3366 VIO_DESC_DONE); 3367 ldcp->ldc_stats.ierrors++; 3368 ldcp->ldc_stats.rx_allocb_fail++; 3369 break; 3370 } 3371 } 3372 3373 rv = ldc_mem_copy(ldcp->ldc_handle, 3374 (caddr_t)mp->b_rptr, 0, &nbytes, 3375 desc.memcookie, desc.ncookies, LDC_COPY_IN); 3376 if (rv != 0) { 3377 DERR(vswp, "%s(%d): unable to copy in data " 3378 "from %d cookies in desc %d (rv %d)", 3379 __func__, ldcp->ldc_id, desc.ncookies, 3380 pos, rv); 3381 freemsg(mp); 3382 3383 rng_rv = vnet_dring_entry_set_dstate(pub_addr, 3384 dp->dring_mtype, dp->handle, pos, pos, 3385 VIO_DESC_DONE); 3386 ldcp->ldc_stats.ierrors++; 3387 break; 3388 } else { 3389 D2(vswp, "%s(%d): copied in %ld bytes" 3390 " using %d cookies", __func__, 3391 ldcp->ldc_id, nbytes, desc.ncookies); 3392 } 3393 3394 /* adjust the read pointer to skip over the padding */ 3395 mp->b_rptr += VNET_IPALIGN; 3396 3397 /* point to the actual end of data */ 3398 mp->b_wptr = mp->b_rptr + desc.nbytes; 3399 3400 /* update statistics */ 3401 ehp = (struct ether_header *)mp->b_rptr; 3402 if (IS_BROADCAST(ehp)) 3403 ldcp->ldc_stats.brdcstrcv++; 3404 else if (IS_MULTICAST(ehp)) 3405 ldcp->ldc_stats.multircv++; 3406 3407 ldcp->ldc_stats.ipackets++; 3408 ldcp->ldc_stats.rbytes += desc.nbytes; 3409 3410 /* 3411 * IPALIGN space can be used for VLAN_TAG 3412 */ 3413 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, 3414 VSW_VNETPORT, mp); 3415 3416 /* build a chain of received packets */ 3417 if (bp == NULL) { 3418 /* first pkt */ 3419 bp = mp; 3420 bp->b_next = bp->b_prev = NULL; 3421 bpt = bp; 3422 chain = 1; 3423 } else { 3424 mp->b_next = mp->b_prev = NULL; 3425 bpt->b_next = mp; 3426 bpt = mp; 3427 chain++; 3428 } 3429 3430 /* mark we are finished with this descriptor */ 3431 if ((rng_rv = vnet_dring_entry_set_dstate(pub_addr, 3432 dp->dring_mtype, dp->handle, pos, pos, 3433 VIO_DESC_DONE)) != 0) { 3434 DERR(vswp, "%s(%lld): unable to update " 3435 "dstate at pos %d: err %d", 3436 __func__, pos, ldcp->ldc_id, rng_rv); 3437 ldcp->ldc_stats.ierrors++; 3438 break; 3439 } 3440 3441 /* 3442 * Send an ACK back to peer if requested. 3443 */ 3444 if (desc.hdr.ack) { 3445 dring_pkt->start_idx = range_start; 3446 dring_pkt->end_idx = range_end; 3447 3448 DERR(vswp, "%s(%lld): processed %d %d, ACK" 3449 " requested", __func__, ldcp->ldc_id, 3450 dring_pkt->start_idx, dring_pkt->end_idx); 3451 3452 dring_pkt->dring_process_state = VIO_DP_ACTIVE; 3453 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3454 dring_pkt->tag.vio_sid = ldcp->local_session; 3455 3456 msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt, 3457 sizeof (vio_dring_msg_t), B_FALSE); 3458 3459 /* 3460 * Check if ACK was successfully sent. If not 3461 * we break and deal with that below. 3462 */ 3463 if (msg_rv != 0) 3464 break; 3465 3466 prev_desc_ack = B_TRUE; 3467 range_start = pos; 3468 } 3469 3470 /* next descriptor */ 3471 pos = (pos + 1) % len; 3472 cnt++; 3473 3474 /* 3475 * Break out of loop here and stop processing to 3476 * allow some other network device (or disk) to 3477 * get access to the cpu. 3478 */ 3479 if (chain > vsw_chain_len) { 3480 D3(vswp, "%s(%lld): switching chain of %d " 3481 "msgs", __func__, ldcp->ldc_id, chain); 3482 break; 3483 } 3484 } 3485 RW_EXIT(&ldcp->lane_in.dlistrw); 3486 3487 /* send the chain of packets to be switched */ 3488 if (bp != NULL) { 3489 DTRACE_PROBE1(vsw_rcv_msgs, int, chain); 3490 D3(vswp, "%s(%lld): switching chain of %d msgs", 3491 __func__, ldcp->ldc_id, chain); 3492 vswp->vsw_switch_frame(vswp, bp, VSW_VNETPORT, 3493 ldcp->ldc_port, NULL); 3494 } 3495 3496 /* 3497 * If when we encountered an error when attempting to 3498 * access an imported dring, initiate a connection reset. 3499 */ 3500 if (rng_rv != 0) { 3501 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3502 break; 3503 } 3504 3505 /* 3506 * If when we attempted to send the ACK we found that the 3507 * channel had been reset then now handle this. We deal with 3508 * it here as we cannot reset the channel while holding the 3509 * dlistrw lock, and we don't want to acquire/release it 3510 * continuously in the above loop, as a channel reset should 3511 * be a rare event. 3512 */ 3513 if (msg_rv == ECONNRESET) { 3514 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 3515 break; 3516 } 3517 3518 DTRACE_PROBE1(msg_cnt, int, cnt); 3519 3520 /* 3521 * We are now finished so ACK back with the state 3522 * set to STOPPING so our peer knows we are finished 3523 */ 3524 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3525 dring_pkt->tag.vio_sid = ldcp->local_session; 3526 3527 dring_pkt->dring_process_state = VIO_DP_STOPPED; 3528 3529 DTRACE_PROBE(stop_process_sent); 3530 3531 /* 3532 * We have not processed any more descriptors beyond 3533 * the last one we ACK'd. 3534 */ 3535 if (prev_desc_ack) 3536 range_start = range_end; 3537 3538 dring_pkt->start_idx = range_start; 3539 dring_pkt->end_idx = range_end; 3540 3541 D2(vswp, "%s(%lld) processed : %d : %d, now stopping", 3542 __func__, ldcp->ldc_id, dring_pkt->start_idx, 3543 dring_pkt->end_idx); 3544 3545 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 3546 sizeof (vio_dring_msg_t), B_TRUE); 3547 break; 3548 3549 case VIO_SUBTYPE_ACK: 3550 D2(vswp, "%s(%lld): VIO_SUBTYPE_ACK", __func__, ldcp->ldc_id); 3551 /* 3552 * Verify that the relevant descriptors are all 3553 * marked as DONE 3554 */ 3555 READ_ENTER(&ldcp->lane_out.dlistrw); 3556 if ((dp = vsw_ident2dring(&ldcp->lane_out, 3557 dring_pkt->dring_ident)) == NULL) { 3558 RW_EXIT(&ldcp->lane_out.dlistrw); 3559 DERR(vswp, "%s: unknown ident in ACK", __func__); 3560 return; 3561 } 3562 3563 start = end = 0; 3564 start = dring_pkt->start_idx; 3565 end = dring_pkt->end_idx; 3566 len = dp->num_descriptors; 3567 3568 3569 mutex_enter(&dp->dlock); 3570 dp->last_ack_recv = end; 3571 ldcp->ldc_stats.dring_data_acks++; 3572 mutex_exit(&dp->dlock); 3573 3574 (void) vsw_reclaim_dring(dp, start); 3575 3576 /* 3577 * If our peer is stopping processing descriptors then 3578 * we check to make sure it has processed all the descriptors 3579 * we have updated. If not then we send it a new message 3580 * to prompt it to restart. 3581 */ 3582 if (dring_pkt->dring_process_state == VIO_DP_STOPPED) { 3583 DTRACE_PROBE(stop_process_recv); 3584 D2(vswp, "%s(%lld): got stopping msg : %d : %d", 3585 __func__, ldcp->ldc_id, dring_pkt->start_idx, 3586 dring_pkt->end_idx); 3587 3588 /* 3589 * Check next descriptor in public section of ring. 3590 * If its marked as READY then we need to prompt our 3591 * peer to start processing the ring again. 3592 */ 3593 i = (end + 1) % len; 3594 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 3595 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 3596 3597 /* 3598 * Hold the restart lock across all of this to 3599 * make sure that its not possible for us to 3600 * decide that a msg needs to be sent in the future 3601 * but the sending code having already checked is 3602 * about to exit. 3603 */ 3604 mutex_enter(&dp->restart_lock); 3605 ldcp->ldc_stats.dring_stopped_acks++; 3606 mutex_enter(&priv_addr->dstate_lock); 3607 if (pub_addr->hdr.dstate == VIO_DESC_READY) { 3608 3609 mutex_exit(&priv_addr->dstate_lock); 3610 3611 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 3612 dring_pkt->tag.vio_sid = ldcp->local_session; 3613 3614 dring_pkt->start_idx = (end + 1) % len; 3615 dring_pkt->end_idx = -1; 3616 3617 D2(vswp, "%s(%lld) : sending restart msg:" 3618 " %d : %d", __func__, ldcp->ldc_id, 3619 dring_pkt->start_idx, dring_pkt->end_idx); 3620 3621 msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt, 3622 sizeof (vio_dring_msg_t), B_FALSE); 3623 ldcp->ldc_stats.dring_data_msgs++; 3624 3625 } else { 3626 mutex_exit(&priv_addr->dstate_lock); 3627 dp->restart_reqd = B_TRUE; 3628 } 3629 mutex_exit(&dp->restart_lock); 3630 } 3631 RW_EXIT(&ldcp->lane_out.dlistrw); 3632 3633 /* only do channel reset after dropping dlistrw lock */ 3634 if (msg_rv == ECONNRESET) 3635 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 3636 3637 break; 3638 3639 case VIO_SUBTYPE_NACK: 3640 DWARN(vswp, "%s(%lld): VIO_SUBTYPE_NACK", 3641 __func__, ldcp->ldc_id); 3642 /* 3643 * Something is badly wrong if we are getting NACK's 3644 * for our data pkts. So reset the channel. 3645 */ 3646 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3647 3648 break; 3649 3650 default: 3651 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 3652 ldcp->ldc_id, dring_pkt->tag.vio_subtype); 3653 } 3654 3655 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3656 } 3657 3658 /* 3659 * dummy pkt data handler function for vnet protocol version 1.0 3660 */ 3661 static void 3662 vsw_process_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen) 3663 { 3664 _NOTE(ARGUNUSED(arg1, arg2, msglen)) 3665 } 3666 3667 /* 3668 * This function handles raw pkt data messages received over the channel. 3669 * Currently, only priority-eth-type frames are received through this mechanism. 3670 * In this case, the frame(data) is present within the message itself which 3671 * is copied into an mblk before switching it. 3672 */ 3673 static void 3674 vsw_process_pkt_data(void *arg1, void *arg2, uint32_t msglen) 3675 { 3676 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg1; 3677 vio_raw_data_msg_t *dpkt = (vio_raw_data_msg_t *)arg2; 3678 uint32_t size; 3679 mblk_t *mp; 3680 vsw_t *vswp = ldcp->ldc_vswp; 3681 vgen_stats_t *statsp = &ldcp->ldc_stats; 3682 lane_t *lp = &ldcp->lane_out; 3683 3684 size = msglen - VIO_PKT_DATA_HDRSIZE; 3685 if (size < ETHERMIN || size > lp->mtu) { 3686 (void) atomic_inc_32(&statsp->rx_pri_fail); 3687 DWARN(vswp, "%s(%lld) invalid size(%d)\n", __func__, 3688 ldcp->ldc_id, size); 3689 return; 3690 } 3691 3692 mp = vio_multipool_allocb(&ldcp->vmp, size + VLAN_TAGSZ); 3693 if (mp == NULL) { 3694 mp = allocb(size + VLAN_TAGSZ, BPRI_MED); 3695 if (mp == NULL) { 3696 (void) atomic_inc_32(&statsp->rx_pri_fail); 3697 DWARN(vswp, "%s(%lld) allocb failure, " 3698 "unable to process priority frame\n", __func__, 3699 ldcp->ldc_id); 3700 return; 3701 } 3702 } 3703 3704 /* skip over the extra space for vlan tag */ 3705 mp->b_rptr += VLAN_TAGSZ; 3706 3707 /* copy the frame from the payload of raw data msg into the mblk */ 3708 bcopy(dpkt->data, mp->b_rptr, size); 3709 mp->b_wptr = mp->b_rptr + size; 3710 3711 /* update stats */ 3712 (void) atomic_inc_64(&statsp->rx_pri_packets); 3713 (void) atomic_add_64(&statsp->rx_pri_bytes, size); 3714 3715 /* 3716 * VLAN_TAGSZ of extra space has been pre-alloc'd if tag is needed. 3717 */ 3718 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, VSW_VNETPORT, mp); 3719 3720 /* switch the frame to destination */ 3721 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, ldcp->ldc_port, NULL); 3722 } 3723 3724 /* 3725 * Process an in-band descriptor message (most likely from 3726 * OBP). 3727 */ 3728 static void 3729 vsw_process_data_ibnd_pkt(vsw_ldc_t *ldcp, void *pkt) 3730 { 3731 vnet_ibnd_desc_t *ibnd_desc; 3732 dring_info_t *dp = NULL; 3733 vsw_private_desc_t *priv_addr = NULL; 3734 vsw_t *vswp = ldcp->ldc_vswp; 3735 mblk_t *mp = NULL; 3736 size_t nbytes = 0; 3737 size_t off = 0; 3738 uint64_t idx = 0; 3739 uint32_t num = 1, len, datalen = 0; 3740 uint64_t ncookies = 0; 3741 int i, rv; 3742 int j = 0; 3743 3744 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3745 3746 ibnd_desc = (vnet_ibnd_desc_t *)pkt; 3747 3748 switch (ibnd_desc->hdr.tag.vio_subtype) { 3749 case VIO_SUBTYPE_INFO: 3750 D1(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3751 3752 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 3753 return; 3754 3755 /* 3756 * Data is padded to align on a 8 byte boundary, 3757 * nbytes is actual data length, i.e. minus that 3758 * padding. 3759 */ 3760 datalen = ibnd_desc->nbytes; 3761 3762 D2(vswp, "%s(%lld): processing inband desc : " 3763 ": datalen 0x%lx", __func__, ldcp->ldc_id, datalen); 3764 3765 ncookies = ibnd_desc->ncookies; 3766 3767 /* 3768 * allocb(9F) returns an aligned data block. We 3769 * need to ensure that we ask ldc for an aligned 3770 * number of bytes also. 3771 */ 3772 nbytes = datalen; 3773 if (nbytes & 0x7) { 3774 off = 8 - (nbytes & 0x7); 3775 nbytes += off; 3776 } 3777 3778 /* alloc extra space for VLAN_TAG */ 3779 mp = allocb(datalen + 8, BPRI_MED); 3780 if (mp == NULL) { 3781 DERR(vswp, "%s(%lld): allocb failed", 3782 __func__, ldcp->ldc_id); 3783 ldcp->ldc_stats.rx_allocb_fail++; 3784 return; 3785 } 3786 3787 /* skip over the extra space for VLAN_TAG */ 3788 mp->b_rptr += 8; 3789 3790 rv = ldc_mem_copy(ldcp->ldc_handle, (caddr_t)mp->b_rptr, 3791 0, &nbytes, ibnd_desc->memcookie, (uint64_t)ncookies, 3792 LDC_COPY_IN); 3793 3794 if (rv != 0) { 3795 DERR(vswp, "%s(%d): unable to copy in data from " 3796 "%d cookie(s)", __func__, ldcp->ldc_id, ncookies); 3797 freemsg(mp); 3798 ldcp->ldc_stats.ierrors++; 3799 return; 3800 } 3801 3802 D2(vswp, "%s(%d): copied in %ld bytes using %d cookies", 3803 __func__, ldcp->ldc_id, nbytes, ncookies); 3804 3805 /* point to the actual end of data */ 3806 mp->b_wptr = mp->b_rptr + datalen; 3807 ldcp->ldc_stats.ipackets++; 3808 ldcp->ldc_stats.rbytes += datalen; 3809 3810 /* 3811 * We ACK back every in-band descriptor message we process 3812 */ 3813 ibnd_desc->hdr.tag.vio_subtype = VIO_SUBTYPE_ACK; 3814 ibnd_desc->hdr.tag.vio_sid = ldcp->local_session; 3815 (void) vsw_send_msg(ldcp, (void *)ibnd_desc, 3816 sizeof (vnet_ibnd_desc_t), B_TRUE); 3817 3818 /* 3819 * there is extra space alloc'd for VLAN_TAG 3820 */ 3821 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, VSW_VNETPORT, mp); 3822 3823 /* send the packet to be switched */ 3824 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, 3825 ldcp->ldc_port, NULL); 3826 3827 break; 3828 3829 case VIO_SUBTYPE_ACK: 3830 D1(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3831 3832 /* Verify the ACK is valid */ 3833 idx = ibnd_desc->hdr.desc_handle; 3834 3835 if (idx >= vsw_ntxds) { 3836 cmn_err(CE_WARN, "!vsw%d: corrupted ACK received " 3837 "(idx %ld)", vswp->instance, idx); 3838 return; 3839 } 3840 3841 if ((dp = ldcp->lane_out.dringp) == NULL) { 3842 DERR(vswp, "%s: no dring found", __func__); 3843 return; 3844 } 3845 3846 len = dp->num_descriptors; 3847 /* 3848 * If the descriptor we are being ACK'ed for is not the 3849 * one we expected, then pkts were lost somwhere, either 3850 * when we tried to send a msg, or a previous ACK msg from 3851 * our peer. In either case we now reclaim the descriptors 3852 * in the range from the last ACK we received up to the 3853 * current ACK. 3854 */ 3855 if (idx != dp->last_ack_recv) { 3856 DWARN(vswp, "%s: dropped pkts detected, (%ld, %ld)", 3857 __func__, dp->last_ack_recv, idx); 3858 num = idx >= dp->last_ack_recv ? 3859 idx - dp->last_ack_recv + 1: 3860 (len - dp->last_ack_recv + 1) + idx; 3861 } 3862 3863 /* 3864 * When we sent the in-band message to our peer we 3865 * marked the copy in our private ring as READY. We now 3866 * check that the descriptor we are being ACK'ed for is in 3867 * fact READY, i.e. it is one we have shared with our peer. 3868 * 3869 * If its not we flag an error, but still reset the descr 3870 * back to FREE. 3871 */ 3872 for (i = dp->last_ack_recv; j < num; i = (i + 1) % len, j++) { 3873 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 3874 mutex_enter(&priv_addr->dstate_lock); 3875 if (priv_addr->dstate != VIO_DESC_READY) { 3876 DERR(vswp, "%s: (%ld) desc at index %ld not " 3877 "READY (0x%lx)", __func__, 3878 ldcp->ldc_id, idx, priv_addr->dstate); 3879 DERR(vswp, "%s: bound %d: ncookies %ld : " 3880 "datalen %ld", __func__, 3881 priv_addr->bound, priv_addr->ncookies, 3882 priv_addr->datalen); 3883 } 3884 D2(vswp, "%s: (%lld) freeing descp at %lld", __func__, 3885 ldcp->ldc_id, idx); 3886 /* release resources associated with sent msg */ 3887 priv_addr->datalen = 0; 3888 priv_addr->dstate = VIO_DESC_FREE; 3889 mutex_exit(&priv_addr->dstate_lock); 3890 } 3891 /* update to next expected value */ 3892 dp->last_ack_recv = (idx + 1) % dp->num_descriptors; 3893 3894 break; 3895 3896 case VIO_SUBTYPE_NACK: 3897 DERR(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3898 3899 /* 3900 * We should only get a NACK if our peer doesn't like 3901 * something about a message we have sent it. If this 3902 * happens we just release the resources associated with 3903 * the message. (We are relying on higher layers to decide 3904 * whether or not to resend. 3905 */ 3906 3907 /* limit check */ 3908 idx = ibnd_desc->hdr.desc_handle; 3909 3910 if (idx >= vsw_ntxds) { 3911 DERR(vswp, "%s: corrupted NACK received (idx %lld)", 3912 __func__, idx); 3913 return; 3914 } 3915 3916 if ((dp = ldcp->lane_out.dringp) == NULL) { 3917 DERR(vswp, "%s: no dring found", __func__); 3918 return; 3919 } 3920 3921 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 3922 3923 /* move to correct location in ring */ 3924 priv_addr += idx; 3925 3926 /* release resources associated with sent msg */ 3927 mutex_enter(&priv_addr->dstate_lock); 3928 priv_addr->datalen = 0; 3929 priv_addr->dstate = VIO_DESC_FREE; 3930 mutex_exit(&priv_addr->dstate_lock); 3931 3932 break; 3933 3934 default: 3935 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 3936 ldcp->ldc_id, ibnd_desc->hdr.tag.vio_subtype); 3937 } 3938 3939 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3940 } 3941 3942 static void 3943 vsw_process_err_pkt(vsw_ldc_t *ldcp, void *epkt, vio_msg_tag_t *tagp) 3944 { 3945 _NOTE(ARGUNUSED(epkt)) 3946 3947 vsw_t *vswp = ldcp->ldc_vswp; 3948 uint16_t env = tagp->vio_subtype_env; 3949 3950 D1(vswp, "%s (%lld): enter\n", __func__, ldcp->ldc_id); 3951 3952 /* 3953 * Error vio_subtypes have yet to be defined. So for 3954 * the moment we can't do anything. 3955 */ 3956 D2(vswp, "%s: (%x) vio_subtype env", __func__, env); 3957 3958 D1(vswp, "%s (%lld): exit\n", __func__, ldcp->ldc_id); 3959 } 3960 3961 /* transmit the packet over the given port */ 3962 int 3963 vsw_portsend(vsw_port_t *port, mblk_t *mp, mblk_t *mpt, uint32_t count) 3964 { 3965 vsw_ldc_list_t *ldcl = &port->p_ldclist; 3966 vsw_ldc_t *ldcp; 3967 int status = 0; 3968 uint32_t n; 3969 3970 READ_ENTER(&ldcl->lockrw); 3971 /* 3972 * Note for now, we have a single channel. 3973 */ 3974 ldcp = ldcl->head; 3975 if (ldcp == NULL) { 3976 DERR(port->p_vswp, "vsw_portsend: no ldc: dropping packet\n"); 3977 freemsgchain(mp); 3978 RW_EXIT(&ldcl->lockrw); 3979 return (1); 3980 } 3981 3982 n = vsw_vlan_frame_untag(port, VSW_VNETPORT, &mp, &mpt); 3983 3984 count -= n; 3985 if (count == 0) { 3986 goto vsw_portsend_exit; 3987 } 3988 3989 status = ldcp->tx(ldcp, mp, mpt, count); 3990 3991 vsw_portsend_exit: 3992 RW_EXIT(&ldcl->lockrw); 3993 3994 return (status); 3995 } 3996 3997 /* 3998 * Break up frames into 2 seperate chains: normal and 3999 * priority, based on the frame type. The number of 4000 * priority frames is also counted and returned. 4001 * 4002 * Params: 4003 * vswp: pointer to the instance of vsw 4004 * np: head of packet chain to be broken 4005 * npt: tail of packet chain to be broken 4006 * 4007 * Returns: 4008 * np: head of normal data packets 4009 * npt: tail of normal data packets 4010 * hp: head of high priority packets 4011 * hpt: tail of high priority packets 4012 */ 4013 static uint32_t 4014 vsw_get_pri_packets(vsw_t *vswp, mblk_t **np, mblk_t **npt, 4015 mblk_t **hp, mblk_t **hpt) 4016 { 4017 mblk_t *tmp = NULL; 4018 mblk_t *smp = NULL; 4019 mblk_t *hmp = NULL; /* high prio pkts head */ 4020 mblk_t *hmpt = NULL; /* high prio pkts tail */ 4021 mblk_t *nmp = NULL; /* normal pkts head */ 4022 mblk_t *nmpt = NULL; /* normal pkts tail */ 4023 uint32_t count = 0; 4024 int i; 4025 struct ether_header *ehp; 4026 uint32_t num_types; 4027 uint16_t *types; 4028 4029 tmp = *np; 4030 while (tmp != NULL) { 4031 4032 smp = tmp; 4033 tmp = tmp->b_next; 4034 smp->b_next = NULL; 4035 smp->b_prev = NULL; 4036 4037 ehp = (struct ether_header *)smp->b_rptr; 4038 num_types = vswp->pri_num_types; 4039 types = vswp->pri_types; 4040 for (i = 0; i < num_types; i++) { 4041 if (ehp->ether_type == types[i]) { 4042 /* high priority frame */ 4043 4044 if (hmp != NULL) { 4045 hmpt->b_next = smp; 4046 hmpt = smp; 4047 } else { 4048 hmp = hmpt = smp; 4049 } 4050 count++; 4051 break; 4052 } 4053 } 4054 if (i == num_types) { 4055 /* normal data frame */ 4056 4057 if (nmp != NULL) { 4058 nmpt->b_next = smp; 4059 nmpt = smp; 4060 } else { 4061 nmp = nmpt = smp; 4062 } 4063 } 4064 } 4065 4066 *hp = hmp; 4067 *hpt = hmpt; 4068 *np = nmp; 4069 *npt = nmpt; 4070 4071 return (count); 4072 } 4073 4074 /* 4075 * Wrapper function to transmit normal and/or priority frames over the channel. 4076 */ 4077 static int 4078 vsw_ldctx_pri(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count) 4079 { 4080 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4081 mblk_t *tmp; 4082 mblk_t *smp; 4083 mblk_t *hmp; /* high prio pkts head */ 4084 mblk_t *hmpt; /* high prio pkts tail */ 4085 mblk_t *nmp; /* normal pkts head */ 4086 mblk_t *nmpt; /* normal pkts tail */ 4087 uint32_t n = 0; 4088 vsw_t *vswp = ldcp->ldc_vswp; 4089 4090 ASSERT(VSW_PRI_ETH_DEFINED(vswp)); 4091 ASSERT(count != 0); 4092 4093 nmp = mp; 4094 nmpt = mpt; 4095 4096 /* gather any priority frames from the chain of packets */ 4097 n = vsw_get_pri_packets(vswp, &nmp, &nmpt, &hmp, &hmpt); 4098 4099 /* transmit priority frames */ 4100 tmp = hmp; 4101 while (tmp != NULL) { 4102 smp = tmp; 4103 tmp = tmp->b_next; 4104 smp->b_next = NULL; 4105 vsw_ldcsend_pkt(ldcp, smp); 4106 } 4107 4108 count -= n; 4109 4110 if (count == 0) { 4111 /* no normal data frames to process */ 4112 return (0); 4113 } 4114 4115 return (vsw_ldctx(ldcp, nmp, nmpt, count)); 4116 } 4117 4118 /* 4119 * Wrapper function to transmit normal frames over the channel. 4120 */ 4121 static int 4122 vsw_ldctx(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count) 4123 { 4124 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4125 mblk_t *tmp = NULL; 4126 4127 ASSERT(count != 0); 4128 /* 4129 * If the TX thread is enabled, then queue the 4130 * ordinary frames and signal the tx thread. 4131 */ 4132 if (ldcp->tx_thread != NULL) { 4133 4134 mutex_enter(&ldcp->tx_thr_lock); 4135 4136 if ((ldcp->tx_cnt + count) >= vsw_max_tx_qcount) { 4137 /* 4138 * If we reached queue limit, 4139 * do not queue new packets, 4140 * drop them. 4141 */ 4142 ldcp->ldc_stats.tx_qfull += count; 4143 mutex_exit(&ldcp->tx_thr_lock); 4144 freemsgchain(mp); 4145 goto exit; 4146 } 4147 if (ldcp->tx_mhead == NULL) { 4148 ldcp->tx_mhead = mp; 4149 ldcp->tx_mtail = mpt; 4150 cv_signal(&ldcp->tx_thr_cv); 4151 } else { 4152 ldcp->tx_mtail->b_next = mp; 4153 ldcp->tx_mtail = mpt; 4154 } 4155 ldcp->tx_cnt += count; 4156 mutex_exit(&ldcp->tx_thr_lock); 4157 } else { 4158 while (mp != NULL) { 4159 tmp = mp->b_next; 4160 mp->b_next = mp->b_prev = NULL; 4161 (void) vsw_ldcsend(ldcp, mp, 1); 4162 mp = tmp; 4163 } 4164 } 4165 4166 exit: 4167 return (0); 4168 } 4169 4170 /* 4171 * This function transmits the frame in the payload of a raw data 4172 * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to 4173 * send special frames with high priorities, without going through 4174 * the normal data path which uses descriptor ring mechanism. 4175 */ 4176 static void 4177 vsw_ldcsend_pkt(vsw_ldc_t *ldcp, mblk_t *mp) 4178 { 4179 vio_raw_data_msg_t *pkt; 4180 mblk_t *bp; 4181 mblk_t *nmp = NULL; 4182 caddr_t dst; 4183 uint32_t mblksz; 4184 uint32_t size; 4185 uint32_t nbytes; 4186 int rv; 4187 vsw_t *vswp = ldcp->ldc_vswp; 4188 vgen_stats_t *statsp = &ldcp->ldc_stats; 4189 4190 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4191 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4192 (void) atomic_inc_32(&statsp->tx_pri_fail); 4193 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 4194 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 4195 ldcp->lane_out.lstate); 4196 goto send_pkt_exit; 4197 } 4198 4199 size = msgsize(mp); 4200 4201 /* frame size bigger than available payload len of raw data msg ? */ 4202 if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) { 4203 (void) atomic_inc_32(&statsp->tx_pri_fail); 4204 DWARN(vswp, "%s(%lld) invalid size(%d)\n", __func__, 4205 ldcp->ldc_id, size); 4206 goto send_pkt_exit; 4207 } 4208 4209 if (size < ETHERMIN) 4210 size = ETHERMIN; 4211 4212 /* alloc space for a raw data message */ 4213 nmp = vio_allocb(vswp->pri_tx_vmp); 4214 if (nmp == NULL) { 4215 (void) atomic_inc_32(&statsp->tx_pri_fail); 4216 DWARN(vswp, "vio_allocb failed\n"); 4217 goto send_pkt_exit; 4218 } 4219 pkt = (vio_raw_data_msg_t *)nmp->b_rptr; 4220 4221 /* copy frame into the payload of raw data message */ 4222 dst = (caddr_t)pkt->data; 4223 for (bp = mp; bp != NULL; bp = bp->b_cont) { 4224 mblksz = MBLKL(bp); 4225 bcopy(bp->b_rptr, dst, mblksz); 4226 dst += mblksz; 4227 } 4228 4229 /* setup the raw data msg */ 4230 pkt->tag.vio_msgtype = VIO_TYPE_DATA; 4231 pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 4232 pkt->tag.vio_subtype_env = VIO_PKT_DATA; 4233 pkt->tag.vio_sid = ldcp->local_session; 4234 nbytes = VIO_PKT_DATA_HDRSIZE + size; 4235 4236 /* send the msg over ldc */ 4237 rv = vsw_send_msg(ldcp, (void *)pkt, nbytes, B_TRUE); 4238 if (rv != 0) { 4239 (void) atomic_inc_32(&statsp->tx_pri_fail); 4240 DWARN(vswp, "%s(%lld) Error sending priority frame\n", __func__, 4241 ldcp->ldc_id); 4242 goto send_pkt_exit; 4243 } 4244 4245 /* update stats */ 4246 (void) atomic_inc_64(&statsp->tx_pri_packets); 4247 (void) atomic_add_64(&statsp->tx_pri_packets, size); 4248 4249 send_pkt_exit: 4250 if (nmp != NULL) 4251 freemsg(nmp); 4252 freemsg(mp); 4253 } 4254 4255 /* 4256 * Transmit the packet over the given LDC channel. 4257 * 4258 * The 'retries' argument indicates how many times a packet 4259 * is retried before it is dropped. Note, the retry is done 4260 * only for a resource related failure, for all other failures 4261 * the packet is dropped immediately. 4262 */ 4263 static int 4264 vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, uint32_t retries) 4265 { 4266 int i; 4267 int rc; 4268 int status = 0; 4269 vsw_port_t *port = ldcp->ldc_port; 4270 dring_info_t *dp = NULL; 4271 4272 4273 for (i = 0; i < retries; ) { 4274 /* 4275 * Send the message out using the appropriate 4276 * transmit function which will free mblock when it 4277 * is finished with it. 4278 */ 4279 mutex_enter(&port->tx_lock); 4280 if (port->transmit != NULL) { 4281 status = (*port->transmit)(ldcp, mp); 4282 } 4283 if (status == LDC_TX_SUCCESS) { 4284 mutex_exit(&port->tx_lock); 4285 break; 4286 } 4287 i++; /* increment the counter here */ 4288 4289 /* If its the last retry, then update the oerror */ 4290 if ((i == retries) && (status == LDC_TX_NORESOURCES)) { 4291 ldcp->ldc_stats.oerrors++; 4292 } 4293 mutex_exit(&port->tx_lock); 4294 4295 if (status != LDC_TX_NORESOURCES) { 4296 /* 4297 * No retrying required for errors un-related 4298 * to resources. 4299 */ 4300 break; 4301 } 4302 READ_ENTER(&ldcp->lane_out.dlistrw); 4303 if (((dp = ldcp->lane_out.dringp) != NULL) && 4304 ((VSW_VER_GTEQ(ldcp, 1, 2) && 4305 (ldcp->lane_out.xfer_mode & VIO_DRING_MODE_V1_2)) || 4306 ((VSW_VER_LT(ldcp, 1, 2) && 4307 (ldcp->lane_out.xfer_mode == VIO_DRING_MODE_V1_0))))) { 4308 rc = vsw_reclaim_dring(dp, dp->end_idx); 4309 } else { 4310 /* 4311 * If there is no dring or the xfer_mode is 4312 * set to DESC_MODE(ie., OBP), then simply break here. 4313 */ 4314 RW_EXIT(&ldcp->lane_out.dlistrw); 4315 break; 4316 } 4317 RW_EXIT(&ldcp->lane_out.dlistrw); 4318 4319 /* 4320 * Delay only if none were reclaimed 4321 * and its not the last retry. 4322 */ 4323 if ((rc == 0) && (i < retries)) { 4324 delay(drv_usectohz(vsw_ldc_tx_delay)); 4325 } 4326 } 4327 freemsg(mp); 4328 return (status); 4329 } 4330 4331 /* 4332 * Send packet out via descriptor ring to a logical device. 4333 */ 4334 static int 4335 vsw_dringsend(vsw_ldc_t *ldcp, mblk_t *mp) 4336 { 4337 vio_dring_msg_t dring_pkt; 4338 dring_info_t *dp = NULL; 4339 vsw_private_desc_t *priv_desc = NULL; 4340 vnet_public_desc_t *pub = NULL; 4341 vsw_t *vswp = ldcp->ldc_vswp; 4342 mblk_t *bp; 4343 size_t n, size; 4344 caddr_t bufp; 4345 int idx; 4346 int status = LDC_TX_SUCCESS; 4347 struct ether_header *ehp = (struct ether_header *)mp->b_rptr; 4348 lane_t *lp = &ldcp->lane_out; 4349 4350 D1(vswp, "%s(%lld): enter\n", __func__, ldcp->ldc_id); 4351 4352 /* TODO: make test a macro */ 4353 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4354 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4355 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 4356 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 4357 ldcp->lane_out.lstate); 4358 ldcp->ldc_stats.oerrors++; 4359 return (LDC_TX_FAILURE); 4360 } 4361 4362 /* 4363 * Note - using first ring only, this may change 4364 * in the future. 4365 */ 4366 READ_ENTER(&ldcp->lane_out.dlistrw); 4367 if ((dp = ldcp->lane_out.dringp) == NULL) { 4368 RW_EXIT(&ldcp->lane_out.dlistrw); 4369 DERR(vswp, "%s(%lld): no dring for outbound lane on" 4370 " channel %d", __func__, ldcp->ldc_id, ldcp->ldc_id); 4371 ldcp->ldc_stats.oerrors++; 4372 return (LDC_TX_FAILURE); 4373 } 4374 4375 size = msgsize(mp); 4376 if (size > (size_t)lp->mtu) { 4377 RW_EXIT(&ldcp->lane_out.dlistrw); 4378 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 4379 ldcp->ldc_id, size); 4380 ldcp->ldc_stats.oerrors++; 4381 return (LDC_TX_FAILURE); 4382 } 4383 4384 /* 4385 * Find a free descriptor 4386 * 4387 * Note: for the moment we are assuming that we will only 4388 * have one dring going from the switch to each of its 4389 * peers. This may change in the future. 4390 */ 4391 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 4392 D2(vswp, "%s(%lld): no descriptor available for ring " 4393 "at 0x%llx", __func__, ldcp->ldc_id, dp); 4394 4395 /* nothing more we can do */ 4396 status = LDC_TX_NORESOURCES; 4397 ldcp->ldc_stats.tx_no_desc++; 4398 goto vsw_dringsend_free_exit; 4399 } else { 4400 D2(vswp, "%s(%lld): free private descriptor found at pos %ld " 4401 "addr 0x%llx\n", __func__, ldcp->ldc_id, idx, priv_desc); 4402 } 4403 4404 /* copy data into the descriptor */ 4405 bufp = priv_desc->datap; 4406 bufp += VNET_IPALIGN; 4407 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 4408 n = MBLKL(bp); 4409 bcopy(bp->b_rptr, bufp, n); 4410 bufp += n; 4411 } 4412 4413 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 4414 4415 pub = priv_desc->descp; 4416 pub->nbytes = priv_desc->datalen; 4417 4418 /* update statistics */ 4419 if (IS_BROADCAST(ehp)) 4420 ldcp->ldc_stats.brdcstxmt++; 4421 else if (IS_MULTICAST(ehp)) 4422 ldcp->ldc_stats.multixmt++; 4423 ldcp->ldc_stats.opackets++; 4424 ldcp->ldc_stats.obytes += priv_desc->datalen; 4425 4426 mutex_enter(&priv_desc->dstate_lock); 4427 pub->hdr.dstate = VIO_DESC_READY; 4428 mutex_exit(&priv_desc->dstate_lock); 4429 4430 /* 4431 * Determine whether or not we need to send a message to our 4432 * peer prompting them to read our newly updated descriptor(s). 4433 */ 4434 mutex_enter(&dp->restart_lock); 4435 if (dp->restart_reqd) { 4436 dp->restart_reqd = B_FALSE; 4437 ldcp->ldc_stats.dring_data_msgs++; 4438 mutex_exit(&dp->restart_lock); 4439 4440 /* 4441 * Send a vio_dring_msg to peer to prompt them to read 4442 * the updated descriptor ring. 4443 */ 4444 dring_pkt.tag.vio_msgtype = VIO_TYPE_DATA; 4445 dring_pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 4446 dring_pkt.tag.vio_subtype_env = VIO_DRING_DATA; 4447 dring_pkt.tag.vio_sid = ldcp->local_session; 4448 4449 /* Note - for now using first ring */ 4450 dring_pkt.dring_ident = dp->ident; 4451 4452 /* 4453 * If last_ack_recv is -1 then we know we've not 4454 * received any ack's yet, so this must be the first 4455 * msg sent, so set the start to the begining of the ring. 4456 */ 4457 mutex_enter(&dp->dlock); 4458 if (dp->last_ack_recv == -1) { 4459 dring_pkt.start_idx = 0; 4460 } else { 4461 dring_pkt.start_idx = 4462 (dp->last_ack_recv + 1) % dp->num_descriptors; 4463 } 4464 dring_pkt.end_idx = -1; 4465 mutex_exit(&dp->dlock); 4466 4467 D3(vswp, "%s(%lld): dring 0x%llx : ident 0x%llx\n", __func__, 4468 ldcp->ldc_id, dp, dring_pkt.dring_ident); 4469 D3(vswp, "%s(%lld): start %lld : end %lld :\n", 4470 __func__, ldcp->ldc_id, dring_pkt.start_idx, 4471 dring_pkt.end_idx); 4472 4473 RW_EXIT(&ldcp->lane_out.dlistrw); 4474 4475 (void) vsw_send_msg(ldcp, (void *)&dring_pkt, 4476 sizeof (vio_dring_msg_t), B_TRUE); 4477 4478 return (status); 4479 4480 } else { 4481 mutex_exit(&dp->restart_lock); 4482 D2(vswp, "%s(%lld): updating descp %d", __func__, 4483 ldcp->ldc_id, idx); 4484 } 4485 4486 vsw_dringsend_free_exit: 4487 4488 RW_EXIT(&ldcp->lane_out.dlistrw); 4489 4490 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 4491 return (status); 4492 } 4493 4494 /* 4495 * Send an in-band descriptor message over ldc. 4496 */ 4497 static int 4498 vsw_descrsend(vsw_ldc_t *ldcp, mblk_t *mp) 4499 { 4500 vsw_t *vswp = ldcp->ldc_vswp; 4501 vnet_ibnd_desc_t ibnd_msg; 4502 vsw_private_desc_t *priv_desc = NULL; 4503 dring_info_t *dp = NULL; 4504 size_t n, size = 0; 4505 caddr_t bufp; 4506 mblk_t *bp; 4507 int idx, i; 4508 int status = LDC_TX_SUCCESS; 4509 static int warn_msg = 1; 4510 lane_t *lp = &ldcp->lane_out; 4511 4512 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4513 4514 ASSERT(mp != NULL); 4515 4516 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4517 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4518 DERR(vswp, "%s(%lld) status(%d) state (0x%llx), dropping pkt", 4519 __func__, ldcp->ldc_id, ldcp->ldc_status, 4520 ldcp->lane_out.lstate); 4521 ldcp->ldc_stats.oerrors++; 4522 return (LDC_TX_FAILURE); 4523 } 4524 4525 /* 4526 * only expect single dring to exist, which we use 4527 * as an internal buffer, rather than a transfer channel. 4528 */ 4529 READ_ENTER(&ldcp->lane_out.dlistrw); 4530 if ((dp = ldcp->lane_out.dringp) == NULL) { 4531 DERR(vswp, "%s(%lld): no dring for outbound lane", 4532 __func__, ldcp->ldc_id); 4533 DERR(vswp, "%s(%lld) status(%d) state (0x%llx)", __func__, 4534 ldcp->ldc_id, ldcp->ldc_status, ldcp->lane_out.lstate); 4535 RW_EXIT(&ldcp->lane_out.dlistrw); 4536 ldcp->ldc_stats.oerrors++; 4537 return (LDC_TX_FAILURE); 4538 } 4539 4540 size = msgsize(mp); 4541 if (size > (size_t)lp->mtu) { 4542 RW_EXIT(&ldcp->lane_out.dlistrw); 4543 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 4544 ldcp->ldc_id, size); 4545 ldcp->ldc_stats.oerrors++; 4546 return (LDC_TX_FAILURE); 4547 } 4548 4549 /* 4550 * Find a free descriptor in our buffer ring 4551 */ 4552 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 4553 RW_EXIT(&ldcp->lane_out.dlistrw); 4554 if (warn_msg) { 4555 DERR(vswp, "%s(%lld): no descriptor available for ring " 4556 "at 0x%llx", __func__, ldcp->ldc_id, dp); 4557 warn_msg = 0; 4558 } 4559 4560 /* nothing more we can do */ 4561 status = LDC_TX_NORESOURCES; 4562 goto vsw_descrsend_free_exit; 4563 } else { 4564 D2(vswp, "%s(%lld): free private descriptor found at pos " 4565 "%ld addr 0x%x\n", __func__, ldcp->ldc_id, idx, priv_desc); 4566 warn_msg = 1; 4567 } 4568 4569 /* copy data into the descriptor */ 4570 bufp = priv_desc->datap; 4571 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 4572 n = MBLKL(bp); 4573 bcopy(bp->b_rptr, bufp, n); 4574 bufp += n; 4575 } 4576 4577 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 4578 4579 /* create and send the in-band descp msg */ 4580 ibnd_msg.hdr.tag.vio_msgtype = VIO_TYPE_DATA; 4581 ibnd_msg.hdr.tag.vio_subtype = VIO_SUBTYPE_INFO; 4582 ibnd_msg.hdr.tag.vio_subtype_env = VIO_DESC_DATA; 4583 ibnd_msg.hdr.tag.vio_sid = ldcp->local_session; 4584 4585 /* 4586 * Copy the mem cookies describing the data from the 4587 * private region of the descriptor ring into the inband 4588 * descriptor. 4589 */ 4590 for (i = 0; i < priv_desc->ncookies; i++) { 4591 bcopy(&priv_desc->memcookie[i], &ibnd_msg.memcookie[i], 4592 sizeof (ldc_mem_cookie_t)); 4593 } 4594 4595 ibnd_msg.hdr.desc_handle = idx; 4596 ibnd_msg.ncookies = priv_desc->ncookies; 4597 ibnd_msg.nbytes = size; 4598 4599 ldcp->ldc_stats.opackets++; 4600 ldcp->ldc_stats.obytes += size; 4601 4602 RW_EXIT(&ldcp->lane_out.dlistrw); 4603 4604 (void) vsw_send_msg(ldcp, (void *)&ibnd_msg, 4605 sizeof (vnet_ibnd_desc_t), B_TRUE); 4606 4607 vsw_descrsend_free_exit: 4608 4609 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4610 return (status); 4611 } 4612 4613 static void 4614 vsw_send_ver(void *arg) 4615 { 4616 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4617 vsw_t *vswp = ldcp->ldc_vswp; 4618 lane_t *lp = &ldcp->lane_out; 4619 vio_ver_msg_t ver_msg; 4620 4621 D1(vswp, "%s enter", __func__); 4622 4623 ver_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4624 ver_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4625 ver_msg.tag.vio_subtype_env = VIO_VER_INFO; 4626 ver_msg.tag.vio_sid = ldcp->local_session; 4627 4628 if (vsw_obp_ver_proto_workaround == B_FALSE) { 4629 ver_msg.ver_major = vsw_versions[0].ver_major; 4630 ver_msg.ver_minor = vsw_versions[0].ver_minor; 4631 } else { 4632 /* use the major,minor that we've ack'd */ 4633 lane_t *lpi = &ldcp->lane_in; 4634 ver_msg.ver_major = lpi->ver_major; 4635 ver_msg.ver_minor = lpi->ver_minor; 4636 } 4637 ver_msg.dev_class = VDEV_NETWORK_SWITCH; 4638 4639 lp->lstate |= VSW_VER_INFO_SENT; 4640 lp->ver_major = ver_msg.ver_major; 4641 lp->ver_minor = ver_msg.ver_minor; 4642 4643 DUMP_TAG(ver_msg.tag); 4644 4645 (void) vsw_send_msg(ldcp, &ver_msg, sizeof (vio_ver_msg_t), B_TRUE); 4646 4647 D1(vswp, "%s (%d): exit", __func__, ldcp->ldc_id); 4648 } 4649 4650 static void 4651 vsw_send_attr(vsw_ldc_t *ldcp) 4652 { 4653 vsw_t *vswp = ldcp->ldc_vswp; 4654 lane_t *lp = &ldcp->lane_out; 4655 vnet_attr_msg_t attr_msg; 4656 4657 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 4658 4659 /* 4660 * Subtype is set to INFO by default 4661 */ 4662 attr_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4663 attr_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4664 attr_msg.tag.vio_subtype_env = VIO_ATTR_INFO; 4665 attr_msg.tag.vio_sid = ldcp->local_session; 4666 4667 /* payload copied from default settings for lane */ 4668 attr_msg.mtu = lp->mtu; 4669 attr_msg.addr_type = lp->addr_type; 4670 attr_msg.xfer_mode = lp->xfer_mode; 4671 attr_msg.ack_freq = lp->xfer_mode; 4672 4673 READ_ENTER(&vswp->if_lockrw); 4674 attr_msg.addr = vnet_macaddr_strtoul((vswp->if_addr).ether_addr_octet); 4675 RW_EXIT(&vswp->if_lockrw); 4676 4677 ldcp->lane_out.lstate |= VSW_ATTR_INFO_SENT; 4678 4679 DUMP_TAG(attr_msg.tag); 4680 4681 (void) vsw_send_msg(ldcp, &attr_msg, sizeof (vnet_attr_msg_t), B_TRUE); 4682 4683 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 4684 } 4685 4686 /* 4687 * Create dring info msg (which also results in the creation of 4688 * a dring). 4689 */ 4690 static vio_dring_reg_msg_t * 4691 vsw_create_dring_info_pkt(vsw_ldc_t *ldcp) 4692 { 4693 vio_dring_reg_msg_t *mp; 4694 dring_info_t *dp; 4695 vsw_t *vswp = ldcp->ldc_vswp; 4696 4697 D1(vswp, "vsw_create_dring_info_pkt enter\n"); 4698 4699 /* 4700 * If we can't create a dring, obviously no point sending 4701 * a message. 4702 */ 4703 if ((dp = vsw_create_dring(ldcp)) == NULL) 4704 return (NULL); 4705 4706 mp = kmem_zalloc(sizeof (vio_dring_reg_msg_t), KM_SLEEP); 4707 4708 mp->tag.vio_msgtype = VIO_TYPE_CTRL; 4709 mp->tag.vio_subtype = VIO_SUBTYPE_INFO; 4710 mp->tag.vio_subtype_env = VIO_DRING_REG; 4711 mp->tag.vio_sid = ldcp->local_session; 4712 4713 /* payload */ 4714 mp->num_descriptors = dp->num_descriptors; 4715 mp->descriptor_size = dp->descriptor_size; 4716 mp->options = dp->options; 4717 mp->ncookies = dp->ncookies; 4718 bcopy(&dp->cookie[0], &mp->cookie[0], sizeof (ldc_mem_cookie_t)); 4719 4720 mp->dring_ident = 0; 4721 4722 D1(vswp, "vsw_create_dring_info_pkt exit\n"); 4723 4724 return (mp); 4725 } 4726 4727 static void 4728 vsw_send_dring_info(vsw_ldc_t *ldcp) 4729 { 4730 vio_dring_reg_msg_t *dring_msg; 4731 vsw_t *vswp = ldcp->ldc_vswp; 4732 4733 D1(vswp, "%s: (%ld) enter", __func__, ldcp->ldc_id); 4734 4735 dring_msg = vsw_create_dring_info_pkt(ldcp); 4736 if (dring_msg == NULL) { 4737 cmn_err(CE_WARN, "!vsw%d: %s: error creating msg", 4738 vswp->instance, __func__); 4739 return; 4740 } 4741 4742 ldcp->lane_out.lstate |= VSW_DRING_INFO_SENT; 4743 4744 DUMP_TAG_PTR((vio_msg_tag_t *)dring_msg); 4745 4746 (void) vsw_send_msg(ldcp, dring_msg, 4747 sizeof (vio_dring_reg_msg_t), B_TRUE); 4748 4749 kmem_free(dring_msg, sizeof (vio_dring_reg_msg_t)); 4750 4751 D1(vswp, "%s: (%ld) exit", __func__, ldcp->ldc_id); 4752 } 4753 4754 static void 4755 vsw_send_rdx(vsw_ldc_t *ldcp) 4756 { 4757 vsw_t *vswp = ldcp->ldc_vswp; 4758 vio_rdx_msg_t rdx_msg; 4759 4760 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 4761 4762 rdx_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4763 rdx_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4764 rdx_msg.tag.vio_subtype_env = VIO_RDX; 4765 rdx_msg.tag.vio_sid = ldcp->local_session; 4766 4767 ldcp->lane_in.lstate |= VSW_RDX_INFO_SENT; 4768 4769 DUMP_TAG(rdx_msg.tag); 4770 4771 (void) vsw_send_msg(ldcp, &rdx_msg, sizeof (vio_rdx_msg_t), B_TRUE); 4772 4773 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 4774 } 4775 4776 /* 4777 * Generic routine to send message out over ldc channel. 4778 * 4779 * It is possible that when we attempt to write over the ldc channel 4780 * that we get notified that it has been reset. Depending on the value 4781 * of the handle_reset flag we either handle that event here or simply 4782 * notify the caller that the channel was reset. 4783 */ 4784 int 4785 vsw_send_msg(vsw_ldc_t *ldcp, void *msgp, int size, boolean_t handle_reset) 4786 { 4787 int rv; 4788 size_t msglen = size; 4789 vio_msg_tag_t *tag = (vio_msg_tag_t *)msgp; 4790 vsw_t *vswp = ldcp->ldc_vswp; 4791 vio_dring_msg_t *dmsg; 4792 vio_raw_data_msg_t *rmsg; 4793 vnet_ibnd_desc_t *imsg; 4794 boolean_t data_msg = B_FALSE; 4795 4796 D1(vswp, "vsw_send_msg (%lld) enter : sending %d bytes", 4797 ldcp->ldc_id, size); 4798 4799 D2(vswp, "send_msg: type 0x%llx", tag->vio_msgtype); 4800 D2(vswp, "send_msg: stype 0x%llx", tag->vio_subtype); 4801 D2(vswp, "send_msg: senv 0x%llx", tag->vio_subtype_env); 4802 4803 mutex_enter(&ldcp->ldc_txlock); 4804 4805 if (tag->vio_subtype == VIO_SUBTYPE_INFO) { 4806 if (tag->vio_subtype_env == VIO_DRING_DATA) { 4807 dmsg = (vio_dring_msg_t *)tag; 4808 dmsg->seq_num = ldcp->lane_out.seq_num; 4809 data_msg = B_TRUE; 4810 } else if (tag->vio_subtype_env == VIO_PKT_DATA) { 4811 rmsg = (vio_raw_data_msg_t *)tag; 4812 rmsg->seq_num = ldcp->lane_out.seq_num; 4813 data_msg = B_TRUE; 4814 } else if (tag->vio_subtype_env == VIO_DESC_DATA) { 4815 imsg = (vnet_ibnd_desc_t *)tag; 4816 imsg->hdr.seq_num = ldcp->lane_out.seq_num; 4817 data_msg = B_TRUE; 4818 } 4819 } 4820 4821 do { 4822 msglen = size; 4823 rv = ldc_write(ldcp->ldc_handle, (caddr_t)msgp, &msglen); 4824 } while (rv == EWOULDBLOCK && --vsw_wretries > 0); 4825 4826 if (rv == 0 && data_msg == B_TRUE) { 4827 ldcp->lane_out.seq_num++; 4828 } 4829 4830 if ((rv != 0) || (msglen != size)) { 4831 DERR(vswp, "vsw_send_msg:ldc_write failed: chan(%lld) rv(%d) " 4832 "size (%d) msglen(%d)\n", ldcp->ldc_id, rv, size, msglen); 4833 ldcp->ldc_stats.oerrors++; 4834 } 4835 4836 mutex_exit(&ldcp->ldc_txlock); 4837 4838 /* 4839 * If channel has been reset we either handle it here or 4840 * simply report back that it has been reset and let caller 4841 * decide what to do. 4842 */ 4843 if (rv == ECONNRESET) { 4844 DWARN(vswp, "%s (%lld) channel reset", __func__, ldcp->ldc_id); 4845 4846 /* 4847 * N.B - must never be holding the dlistrw lock when 4848 * we do a reset of the channel. 4849 */ 4850 if (handle_reset) { 4851 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 4852 } 4853 } 4854 4855 return (rv); 4856 } 4857 4858 /* 4859 * Remove the specified address from the list of address maintained 4860 * in this port node. 4861 */ 4862 mcst_addr_t * 4863 vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr) 4864 { 4865 vsw_t *vswp = NULL; 4866 vsw_port_t *port = NULL; 4867 mcst_addr_t *prev_p = NULL; 4868 mcst_addr_t *curr_p = NULL; 4869 4870 D1(NULL, "%s: enter : devtype %d : addr 0x%llx", 4871 __func__, devtype, addr); 4872 4873 if (devtype == VSW_VNETPORT) { 4874 port = (vsw_port_t *)arg; 4875 mutex_enter(&port->mca_lock); 4876 prev_p = curr_p = port->mcap; 4877 } else { 4878 vswp = (vsw_t *)arg; 4879 mutex_enter(&vswp->mca_lock); 4880 prev_p = curr_p = vswp->mcap; 4881 } 4882 4883 while (curr_p != NULL) { 4884 if (curr_p->addr == addr) { 4885 D2(NULL, "%s: address found", __func__); 4886 /* match found */ 4887 if (prev_p == curr_p) { 4888 /* list head */ 4889 if (devtype == VSW_VNETPORT) 4890 port->mcap = curr_p->nextp; 4891 else 4892 vswp->mcap = curr_p->nextp; 4893 } else { 4894 prev_p->nextp = curr_p->nextp; 4895 } 4896 break; 4897 } else { 4898 prev_p = curr_p; 4899 curr_p = curr_p->nextp; 4900 } 4901 } 4902 4903 if (devtype == VSW_VNETPORT) 4904 mutex_exit(&port->mca_lock); 4905 else 4906 mutex_exit(&vswp->mca_lock); 4907 4908 D1(NULL, "%s: exit", __func__); 4909 4910 return (curr_p); 4911 } 4912 4913 /* 4914 * Creates a descriptor ring (dring) and links it into the 4915 * link of outbound drings for this channel. 4916 * 4917 * Returns NULL if creation failed. 4918 */ 4919 static dring_info_t * 4920 vsw_create_dring(vsw_ldc_t *ldcp) 4921 { 4922 vsw_private_desc_t *priv_addr = NULL; 4923 vsw_t *vswp = ldcp->ldc_vswp; 4924 ldc_mem_info_t minfo; 4925 dring_info_t *dp, *tp; 4926 int i; 4927 4928 dp = (dring_info_t *)kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 4929 4930 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 4931 4932 /* create public section of ring */ 4933 if ((ldc_mem_dring_create(vsw_ntxds, 4934 VSW_PUB_SIZE, &dp->handle)) != 0) { 4935 4936 DERR(vswp, "vsw_create_dring(%lld): ldc dring create " 4937 "failed", ldcp->ldc_id); 4938 goto create_fail_exit; 4939 } 4940 4941 ASSERT(dp->handle != NULL); 4942 4943 /* 4944 * Get the base address of the public section of the ring. 4945 */ 4946 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 4947 DERR(vswp, "vsw_create_dring(%lld): dring info failed\n", 4948 ldcp->ldc_id); 4949 goto dring_fail_exit; 4950 } else { 4951 ASSERT(minfo.vaddr != 0); 4952 dp->pub_addr = minfo.vaddr; 4953 } 4954 4955 dp->num_descriptors = vsw_ntxds; 4956 dp->descriptor_size = VSW_PUB_SIZE; 4957 dp->options = VIO_TX_DRING; 4958 dp->ncookies = 1; /* guaranteed by ldc */ 4959 4960 /* 4961 * create private portion of ring 4962 */ 4963 dp->priv_addr = (vsw_private_desc_t *)kmem_zalloc( 4964 (sizeof (vsw_private_desc_t) * vsw_ntxds), KM_SLEEP); 4965 4966 if (vsw_setup_ring(ldcp, dp)) { 4967 DERR(vswp, "%s: unable to setup ring", __func__); 4968 goto dring_fail_exit; 4969 } 4970 4971 /* haven't used any descriptors yet */ 4972 dp->end_idx = 0; 4973 dp->last_ack_recv = -1; 4974 4975 /* bind dring to the channel */ 4976 if ((ldc_mem_dring_bind(ldcp->ldc_handle, dp->handle, 4977 LDC_DIRECT_MAP | LDC_SHADOW_MAP, LDC_MEM_RW, 4978 &dp->cookie[0], &dp->ncookies)) != 0) { 4979 DERR(vswp, "vsw_create_dring: unable to bind to channel " 4980 "%lld", ldcp->ldc_id); 4981 goto dring_fail_exit; 4982 } 4983 4984 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 4985 dp->restart_reqd = B_TRUE; 4986 4987 /* 4988 * Only ever create rings for outgoing lane. Link it onto 4989 * end of list. 4990 */ 4991 WRITE_ENTER(&ldcp->lane_out.dlistrw); 4992 if (ldcp->lane_out.dringp == NULL) { 4993 D2(vswp, "vsw_create_dring: adding first outbound ring"); 4994 ldcp->lane_out.dringp = dp; 4995 } else { 4996 tp = ldcp->lane_out.dringp; 4997 while (tp->next != NULL) 4998 tp = tp->next; 4999 5000 tp->next = dp; 5001 } 5002 RW_EXIT(&ldcp->lane_out.dlistrw); 5003 5004 return (dp); 5005 5006 dring_fail_exit: 5007 (void) ldc_mem_dring_destroy(dp->handle); 5008 5009 create_fail_exit: 5010 if (dp->priv_addr != NULL) { 5011 priv_addr = dp->priv_addr; 5012 for (i = 0; i < vsw_ntxds; i++) { 5013 if (priv_addr->memhandle != NULL) 5014 (void) ldc_mem_free_handle( 5015 priv_addr->memhandle); 5016 priv_addr++; 5017 } 5018 kmem_free(dp->priv_addr, 5019 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 5020 } 5021 mutex_destroy(&dp->dlock); 5022 5023 kmem_free(dp, sizeof (dring_info_t)); 5024 return (NULL); 5025 } 5026 5027 /* 5028 * Create a ring consisting of just a private portion and link 5029 * it into the list of rings for the outbound lane. 5030 * 5031 * These type of rings are used primarily for temporary data 5032 * storage (i.e. as data buffers). 5033 */ 5034 void 5035 vsw_create_privring(vsw_ldc_t *ldcp) 5036 { 5037 dring_info_t *dp, *tp; 5038 vsw_t *vswp = ldcp->ldc_vswp; 5039 5040 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 5041 5042 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 5043 5044 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 5045 5046 /* no public section */ 5047 dp->pub_addr = NULL; 5048 5049 dp->priv_addr = kmem_zalloc( 5050 (sizeof (vsw_private_desc_t) * vsw_ntxds), KM_SLEEP); 5051 5052 dp->num_descriptors = vsw_ntxds; 5053 5054 if (vsw_setup_ring(ldcp, dp)) { 5055 DERR(vswp, "%s: setup of ring failed", __func__); 5056 kmem_free(dp->priv_addr, 5057 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 5058 mutex_destroy(&dp->dlock); 5059 kmem_free(dp, sizeof (dring_info_t)); 5060 return; 5061 } 5062 5063 /* haven't used any descriptors yet */ 5064 dp->end_idx = 0; 5065 5066 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 5067 dp->restart_reqd = B_TRUE; 5068 5069 /* 5070 * Only ever create rings for outgoing lane. Link it onto 5071 * end of list. 5072 */ 5073 WRITE_ENTER(&ldcp->lane_out.dlistrw); 5074 if (ldcp->lane_out.dringp == NULL) { 5075 D2(vswp, "%s: adding first outbound privring", __func__); 5076 ldcp->lane_out.dringp = dp; 5077 } else { 5078 tp = ldcp->lane_out.dringp; 5079 while (tp->next != NULL) 5080 tp = tp->next; 5081 5082 tp->next = dp; 5083 } 5084 RW_EXIT(&ldcp->lane_out.dlistrw); 5085 5086 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 5087 } 5088 5089 /* 5090 * Setup the descriptors in the dring. Returns 0 on success, 1 on 5091 * failure. 5092 */ 5093 int 5094 vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp) 5095 { 5096 vnet_public_desc_t *pub_addr = NULL; 5097 vsw_private_desc_t *priv_addr = NULL; 5098 vsw_t *vswp = ldcp->ldc_vswp; 5099 uint64_t *tmpp; 5100 uint64_t offset = 0; 5101 uint32_t ncookies = 0; 5102 static char *name = "vsw_setup_ring"; 5103 int i, j, nc, rv; 5104 size_t data_sz; 5105 5106 priv_addr = dp->priv_addr; 5107 pub_addr = dp->pub_addr; 5108 5109 /* public section may be null but private should never be */ 5110 ASSERT(priv_addr != NULL); 5111 5112 /* 5113 * Allocate the region of memory which will be used to hold 5114 * the data the descriptors will refer to. 5115 */ 5116 data_sz = vswp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN; 5117 data_sz = VNET_ROUNDUP_2K(data_sz); 5118 dp->desc_data_sz = data_sz; 5119 dp->data_sz = vsw_ntxds * data_sz; 5120 dp->data_addr = kmem_alloc(dp->data_sz, KM_SLEEP); 5121 5122 D2(vswp, "%s: allocated %lld bytes at 0x%llx\n", name, 5123 dp->data_sz, dp->data_addr); 5124 5125 tmpp = (uint64_t *)dp->data_addr; 5126 offset = dp->desc_data_sz/sizeof (tmpp); 5127 5128 /* 5129 * Initialise some of the private and public (if they exist) 5130 * descriptor fields. 5131 */ 5132 for (i = 0; i < vsw_ntxds; i++) { 5133 mutex_init(&priv_addr->dstate_lock, NULL, MUTEX_DRIVER, NULL); 5134 5135 if ((ldc_mem_alloc_handle(ldcp->ldc_handle, 5136 &priv_addr->memhandle)) != 0) { 5137 DERR(vswp, "%s: alloc mem handle failed", name); 5138 goto setup_ring_cleanup; 5139 } 5140 5141 priv_addr->datap = (void *)tmpp; 5142 5143 rv = ldc_mem_bind_handle(priv_addr->memhandle, 5144 (caddr_t)priv_addr->datap, dp->desc_data_sz, 5145 LDC_SHADOW_MAP, LDC_MEM_R|LDC_MEM_W, 5146 &(priv_addr->memcookie[0]), &ncookies); 5147 if (rv != 0) { 5148 DERR(vswp, "%s(%lld): ldc_mem_bind_handle failed " 5149 "(rv %d)", name, ldcp->ldc_id, rv); 5150 goto setup_ring_cleanup; 5151 } 5152 priv_addr->bound = 1; 5153 5154 D2(vswp, "%s: %d: memcookie 0 : addr 0x%llx : size 0x%llx", 5155 name, i, priv_addr->memcookie[0].addr, 5156 priv_addr->memcookie[0].size); 5157 5158 if (ncookies >= (uint32_t)(VSW_MAX_COOKIES + 1)) { 5159 DERR(vswp, "%s(%lld) ldc_mem_bind_handle returned " 5160 "invalid num of cookies (%d) for size 0x%llx", 5161 name, ldcp->ldc_id, ncookies, VSW_RING_EL_DATA_SZ); 5162 5163 goto setup_ring_cleanup; 5164 } else { 5165 for (j = 1; j < ncookies; j++) { 5166 rv = ldc_mem_nextcookie(priv_addr->memhandle, 5167 &(priv_addr->memcookie[j])); 5168 if (rv != 0) { 5169 DERR(vswp, "%s: ldc_mem_nextcookie " 5170 "failed rv (%d)", name, rv); 5171 goto setup_ring_cleanup; 5172 } 5173 D3(vswp, "%s: memcookie %d : addr 0x%llx : " 5174 "size 0x%llx", name, j, 5175 priv_addr->memcookie[j].addr, 5176 priv_addr->memcookie[j].size); 5177 } 5178 5179 } 5180 priv_addr->ncookies = ncookies; 5181 priv_addr->dstate = VIO_DESC_FREE; 5182 5183 if (pub_addr != NULL) { 5184 5185 /* link pub and private sides */ 5186 priv_addr->descp = pub_addr; 5187 5188 pub_addr->ncookies = priv_addr->ncookies; 5189 5190 for (nc = 0; nc < pub_addr->ncookies; nc++) { 5191 bcopy(&priv_addr->memcookie[nc], 5192 &pub_addr->memcookie[nc], 5193 sizeof (ldc_mem_cookie_t)); 5194 } 5195 5196 pub_addr->hdr.dstate = VIO_DESC_FREE; 5197 pub_addr++; 5198 } 5199 5200 /* 5201 * move to next element in the dring and the next 5202 * position in the data buffer. 5203 */ 5204 priv_addr++; 5205 tmpp += offset; 5206 } 5207 5208 return (0); 5209 5210 setup_ring_cleanup: 5211 priv_addr = dp->priv_addr; 5212 5213 for (j = 0; j < i; j++) { 5214 (void) ldc_mem_unbind_handle(priv_addr->memhandle); 5215 (void) ldc_mem_free_handle(priv_addr->memhandle); 5216 5217 mutex_destroy(&priv_addr->dstate_lock); 5218 5219 priv_addr++; 5220 } 5221 kmem_free(dp->data_addr, dp->data_sz); 5222 5223 return (1); 5224 } 5225 5226 /* 5227 * Searches the private section of a ring for a free descriptor, 5228 * starting at the location of the last free descriptor found 5229 * previously. 5230 * 5231 * Returns 0 if free descriptor is available, and updates state 5232 * of private descriptor to VIO_DESC_READY, otherwise returns 1. 5233 * 5234 * FUTURE: might need to return contiguous range of descriptors 5235 * as dring info msg assumes all will be contiguous. 5236 */ 5237 static int 5238 vsw_dring_find_free_desc(dring_info_t *dringp, 5239 vsw_private_desc_t **priv_p, int *idx) 5240 { 5241 vsw_private_desc_t *addr = NULL; 5242 int num = vsw_ntxds; 5243 int ret = 1; 5244 5245 D1(NULL, "%s enter\n", __func__); 5246 5247 ASSERT(dringp->priv_addr != NULL); 5248 5249 D2(NULL, "%s: searching ring, dringp 0x%llx : start pos %lld", 5250 __func__, dringp, dringp->end_idx); 5251 5252 addr = (vsw_private_desc_t *)dringp->priv_addr + dringp->end_idx; 5253 5254 mutex_enter(&addr->dstate_lock); 5255 if (addr->dstate == VIO_DESC_FREE) { 5256 addr->dstate = VIO_DESC_READY; 5257 *priv_p = addr; 5258 *idx = dringp->end_idx; 5259 dringp->end_idx = (dringp->end_idx + 1) % num; 5260 ret = 0; 5261 5262 } 5263 mutex_exit(&addr->dstate_lock); 5264 5265 /* ring full */ 5266 if (ret == 1) { 5267 D2(NULL, "%s: no desp free: started at %d", __func__, 5268 dringp->end_idx); 5269 } 5270 5271 D1(NULL, "%s: exit\n", __func__); 5272 5273 return (ret); 5274 } 5275 5276 /* 5277 * Map from a dring identifier to the ring itself. Returns 5278 * pointer to ring or NULL if no match found. 5279 * 5280 * Should be called with dlistrw rwlock held as reader. 5281 */ 5282 static dring_info_t * 5283 vsw_ident2dring(lane_t *lane, uint64_t ident) 5284 { 5285 dring_info_t *dp = NULL; 5286 5287 if ((dp = lane->dringp) == NULL) { 5288 return (NULL); 5289 } else { 5290 if (dp->ident == ident) 5291 return (dp); 5292 5293 while (dp != NULL) { 5294 if (dp->ident == ident) 5295 break; 5296 dp = dp->next; 5297 } 5298 } 5299 5300 return (dp); 5301 } 5302 5303 /* 5304 * Set the default lane attributes. These are copied into 5305 * the attr msg we send to our peer. If they are not acceptable 5306 * then (currently) the handshake ends. 5307 */ 5308 static void 5309 vsw_set_lane_attr(vsw_t *vswp, lane_t *lp) 5310 { 5311 bzero(lp, sizeof (lane_t)); 5312 5313 READ_ENTER(&vswp->if_lockrw); 5314 ether_copy(&(vswp->if_addr), &(lp->addr)); 5315 RW_EXIT(&vswp->if_lockrw); 5316 5317 lp->mtu = vswp->max_frame_size; 5318 lp->addr_type = ADDR_TYPE_MAC; 5319 lp->xfer_mode = VIO_DRING_MODE_V1_0; 5320 lp->ack_freq = 0; /* for shared mode */ 5321 lp->seq_num = VNET_ISS; 5322 } 5323 5324 /* 5325 * Verify that the attributes are acceptable. 5326 * 5327 * FUTURE: If some attributes are not acceptable, change them 5328 * our desired values. 5329 */ 5330 static int 5331 vsw_check_attr(vnet_attr_msg_t *pkt, vsw_ldc_t *ldcp) 5332 { 5333 int ret = 0; 5334 struct ether_addr ea; 5335 vsw_port_t *port = ldcp->ldc_port; 5336 lane_t *lp = &ldcp->lane_out; 5337 5338 D1(NULL, "vsw_check_attr enter\n"); 5339 5340 if ((pkt->xfer_mode != VIO_DESC_MODE) && 5341 (pkt->xfer_mode != lp->xfer_mode)) { 5342 D2(NULL, "vsw_check_attr: unknown mode %x\n", pkt->xfer_mode); 5343 ret = 1; 5344 } 5345 5346 /* Only support MAC addresses at moment. */ 5347 if ((pkt->addr_type != ADDR_TYPE_MAC) || (pkt->addr == 0)) { 5348 D2(NULL, "vsw_check_attr: invalid addr_type %x, " 5349 "or address 0x%llx\n", pkt->addr_type, pkt->addr); 5350 ret = 1; 5351 } 5352 5353 /* 5354 * MAC address supplied by device should match that stored 5355 * in the vsw-port OBP node. Need to decide what to do if they 5356 * don't match, for the moment just warn but don't fail. 5357 */ 5358 vnet_macaddr_ultostr(pkt->addr, ea.ether_addr_octet); 5359 if (ether_cmp(&ea, &port->p_macaddr) != 0) { 5360 DERR(NULL, "vsw_check_attr: device supplied address " 5361 "0x%llx doesn't match node address 0x%llx\n", 5362 pkt->addr, port->p_macaddr); 5363 } 5364 5365 /* 5366 * Ack freq only makes sense in pkt mode, in shared 5367 * mode the ring descriptors say whether or not to 5368 * send back an ACK. 5369 */ 5370 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 5371 (pkt->xfer_mode & VIO_DRING_MODE_V1_2)) || 5372 (VSW_VER_LT(ldcp, 1, 2) && 5373 (pkt->xfer_mode == VIO_DRING_MODE_V1_0))) { 5374 if (pkt->ack_freq > 0) { 5375 D2(NULL, "vsw_check_attr: non zero ack freq " 5376 " in SHM mode\n"); 5377 ret = 1; 5378 } 5379 } 5380 5381 /* 5382 * Note: for the moment we only support ETHER 5383 * frames. This may change in the future. 5384 */ 5385 if ((pkt->mtu > lp->mtu) || (pkt->mtu <= 0)) { 5386 D2(NULL, "vsw_check_attr: invalid MTU (0x%llx)\n", 5387 pkt->mtu); 5388 ret = 1; 5389 } 5390 5391 D1(NULL, "vsw_check_attr exit\n"); 5392 5393 return (ret); 5394 } 5395 5396 /* 5397 * Returns 1 if there is a problem, 0 otherwise. 5398 */ 5399 static int 5400 vsw_check_dring_info(vio_dring_reg_msg_t *pkt) 5401 { 5402 _NOTE(ARGUNUSED(pkt)) 5403 5404 int ret = 0; 5405 5406 D1(NULL, "vsw_check_dring_info enter\n"); 5407 5408 if ((pkt->num_descriptors == 0) || 5409 (pkt->descriptor_size == 0) || 5410 (pkt->ncookies != 1)) { 5411 DERR(NULL, "vsw_check_dring_info: invalid dring msg"); 5412 ret = 1; 5413 } 5414 5415 D1(NULL, "vsw_check_dring_info exit\n"); 5416 5417 return (ret); 5418 } 5419 5420 /* 5421 * Returns 1 if two memory cookies match. Otherwise returns 0. 5422 */ 5423 static int 5424 vsw_mem_cookie_match(ldc_mem_cookie_t *m1, ldc_mem_cookie_t *m2) 5425 { 5426 if ((m1->addr != m2->addr) || 5427 (m2->size != m2->size)) { 5428 return (0); 5429 } else { 5430 return (1); 5431 } 5432 } 5433 5434 /* 5435 * Returns 1 if ring described in reg message matches that 5436 * described by dring_info structure. Otherwise returns 0. 5437 */ 5438 static int 5439 vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg) 5440 { 5441 if ((msg->descriptor_size != dp->descriptor_size) || 5442 (msg->num_descriptors != dp->num_descriptors) || 5443 (msg->ncookies != dp->ncookies) || 5444 !(vsw_mem_cookie_match(&msg->cookie[0], &dp->cookie[0]))) { 5445 return (0); 5446 } else { 5447 return (1); 5448 } 5449 5450 } 5451 5452 static caddr_t 5453 vsw_print_ethaddr(uint8_t *a, char *ebuf) 5454 { 5455 (void) sprintf(ebuf, "%x:%x:%x:%x:%x:%x", 5456 a[0], a[1], a[2], a[3], a[4], a[5]); 5457 return (ebuf); 5458 } 5459 5460 /* 5461 * Reset and free all the resources associated with 5462 * the channel. 5463 */ 5464 static void 5465 vsw_free_lane_resources(vsw_ldc_t *ldcp, uint64_t dir) 5466 { 5467 dring_info_t *dp, *dpp; 5468 lane_t *lp = NULL; 5469 int rv = 0; 5470 5471 ASSERT(ldcp != NULL); 5472 5473 D1(ldcp->ldc_vswp, "%s (%lld): enter", __func__, ldcp->ldc_id); 5474 5475 if (dir == INBOUND) { 5476 D2(ldcp->ldc_vswp, "%s: freeing INBOUND lane" 5477 " of channel %lld", __func__, ldcp->ldc_id); 5478 lp = &ldcp->lane_in; 5479 } else { 5480 D2(ldcp->ldc_vswp, "%s: freeing OUTBOUND lane" 5481 " of channel %lld", __func__, ldcp->ldc_id); 5482 lp = &ldcp->lane_out; 5483 } 5484 5485 lp->lstate = VSW_LANE_INACTIV; 5486 lp->seq_num = VNET_ISS; 5487 5488 if (lp->dringp) { 5489 if (dir == INBOUND) { 5490 WRITE_ENTER(&lp->dlistrw); 5491 dp = lp->dringp; 5492 while (dp != NULL) { 5493 dpp = dp->next; 5494 if (dp->handle != NULL) 5495 (void) ldc_mem_dring_unmap(dp->handle); 5496 kmem_free(dp, sizeof (dring_info_t)); 5497 dp = dpp; 5498 } 5499 RW_EXIT(&lp->dlistrw); 5500 } else { 5501 /* 5502 * unbind, destroy exported dring, free dring struct 5503 */ 5504 WRITE_ENTER(&lp->dlistrw); 5505 dp = lp->dringp; 5506 rv = vsw_free_ring(dp); 5507 RW_EXIT(&lp->dlistrw); 5508 } 5509 if (rv == 0) { 5510 lp->dringp = NULL; 5511 } 5512 } 5513 5514 D1(ldcp->ldc_vswp, "%s (%lld): exit", __func__, ldcp->ldc_id); 5515 } 5516 5517 /* 5518 * Free ring and all associated resources. 5519 * 5520 * Should be called with dlistrw rwlock held as writer. 5521 */ 5522 static int 5523 vsw_free_ring(dring_info_t *dp) 5524 { 5525 vsw_private_desc_t *paddr = NULL; 5526 dring_info_t *dpp; 5527 int i, rv = 1; 5528 5529 while (dp != NULL) { 5530 mutex_enter(&dp->dlock); 5531 dpp = dp->next; 5532 if (dp->priv_addr != NULL) { 5533 /* 5534 * First unbind and free the memory handles 5535 * stored in each descriptor within the ring. 5536 */ 5537 for (i = 0; i < vsw_ntxds; i++) { 5538 paddr = (vsw_private_desc_t *) 5539 dp->priv_addr + i; 5540 if (paddr->memhandle != NULL) { 5541 if (paddr->bound == 1) { 5542 rv = ldc_mem_unbind_handle( 5543 paddr->memhandle); 5544 5545 if (rv != 0) { 5546 DERR(NULL, "error " 5547 "unbinding handle for " 5548 "ring 0x%llx at pos %d", 5549 dp, i); 5550 mutex_exit(&dp->dlock); 5551 return (rv); 5552 } 5553 paddr->bound = 0; 5554 } 5555 5556 rv = ldc_mem_free_handle( 5557 paddr->memhandle); 5558 if (rv != 0) { 5559 DERR(NULL, "error freeing " 5560 "handle for ring 0x%llx " 5561 "at pos %d", dp, i); 5562 mutex_exit(&dp->dlock); 5563 return (rv); 5564 } 5565 paddr->memhandle = NULL; 5566 } 5567 mutex_destroy(&paddr->dstate_lock); 5568 } 5569 kmem_free(dp->priv_addr, 5570 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 5571 } 5572 5573 /* 5574 * Now unbind and destroy the ring itself. 5575 */ 5576 if (dp->handle != NULL) { 5577 (void) ldc_mem_dring_unbind(dp->handle); 5578 (void) ldc_mem_dring_destroy(dp->handle); 5579 } 5580 5581 if (dp->data_addr != NULL) { 5582 kmem_free(dp->data_addr, dp->data_sz); 5583 } 5584 5585 mutex_exit(&dp->dlock); 5586 mutex_destroy(&dp->dlock); 5587 mutex_destroy(&dp->restart_lock); 5588 kmem_free(dp, sizeof (dring_info_t)); 5589 5590 dp = dpp; 5591 } 5592 return (0); 5593 } 5594 5595 /* 5596 * vsw_ldc_rx_worker -- A per LDC worker thread to receive data. 5597 * This thread is woken up by the LDC interrupt handler to process 5598 * LDC packets and receive data. 5599 */ 5600 static void 5601 vsw_ldc_rx_worker(void *arg) 5602 { 5603 callb_cpr_t cprinfo; 5604 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 5605 vsw_t *vswp = ldcp->ldc_vswp; 5606 5607 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5608 CALLB_CPR_INIT(&cprinfo, &ldcp->rx_thr_lock, callb_generic_cpr, 5609 "vsw_rx_thread"); 5610 mutex_enter(&ldcp->rx_thr_lock); 5611 ldcp->rx_thr_flags |= VSW_WTHR_RUNNING; 5612 while (!(ldcp->rx_thr_flags & VSW_WTHR_STOP)) { 5613 5614 CALLB_CPR_SAFE_BEGIN(&cprinfo); 5615 /* 5616 * Wait until the data is received or a stop 5617 * request is received. 5618 */ 5619 while (!(ldcp->rx_thr_flags & 5620 (VSW_WTHR_DATARCVD | VSW_WTHR_STOP))) { 5621 cv_wait(&ldcp->rx_thr_cv, &ldcp->rx_thr_lock); 5622 } 5623 CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rx_thr_lock) 5624 5625 /* 5626 * First process the stop request. 5627 */ 5628 if (ldcp->rx_thr_flags & VSW_WTHR_STOP) { 5629 D2(vswp, "%s(%lld):Rx thread stopped\n", 5630 __func__, ldcp->ldc_id); 5631 break; 5632 } 5633 ldcp->rx_thr_flags &= ~VSW_WTHR_DATARCVD; 5634 mutex_exit(&ldcp->rx_thr_lock); 5635 D1(vswp, "%s(%lld):calling vsw_process_pkt\n", 5636 __func__, ldcp->ldc_id); 5637 mutex_enter(&ldcp->ldc_cblock); 5638 vsw_process_pkt(ldcp); 5639 mutex_exit(&ldcp->ldc_cblock); 5640 mutex_enter(&ldcp->rx_thr_lock); 5641 } 5642 5643 /* 5644 * Update the run status and wakeup the thread that 5645 * has sent the stop request. 5646 */ 5647 ldcp->rx_thr_flags &= ~VSW_WTHR_RUNNING; 5648 cv_signal(&ldcp->rx_thr_cv); 5649 CALLB_CPR_EXIT(&cprinfo); 5650 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5651 thread_exit(); 5652 } 5653 5654 /* vsw_stop_rx_thread -- Co-ordinate with receive thread to stop it */ 5655 static void 5656 vsw_stop_rx_thread(vsw_ldc_t *ldcp) 5657 { 5658 vsw_t *vswp = ldcp->ldc_vswp; 5659 5660 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5661 /* 5662 * Send a stop request by setting the stop flag and 5663 * wait until the receive thread stops. 5664 */ 5665 mutex_enter(&ldcp->rx_thr_lock); 5666 if (ldcp->rx_thr_flags & VSW_WTHR_RUNNING) { 5667 ldcp->rx_thr_flags |= VSW_WTHR_STOP; 5668 cv_signal(&ldcp->rx_thr_cv); 5669 while (ldcp->rx_thr_flags & VSW_WTHR_RUNNING) { 5670 cv_wait(&ldcp->rx_thr_cv, &ldcp->rx_thr_lock); 5671 } 5672 } 5673 mutex_exit(&ldcp->rx_thr_lock); 5674 ldcp->rx_thread = NULL; 5675 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5676 } 5677 5678 /* 5679 * vsw_ldc_tx_worker -- A per LDC worker thread to transmit data. 5680 * This thread is woken up by the vsw_portsend to transmit 5681 * packets. 5682 */ 5683 static void 5684 vsw_ldc_tx_worker(void *arg) 5685 { 5686 callb_cpr_t cprinfo; 5687 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 5688 vsw_t *vswp = ldcp->ldc_vswp; 5689 mblk_t *mp; 5690 mblk_t *tmp; 5691 5692 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5693 CALLB_CPR_INIT(&cprinfo, &ldcp->tx_thr_lock, callb_generic_cpr, 5694 "vnet_tx_thread"); 5695 mutex_enter(&ldcp->tx_thr_lock); 5696 ldcp->tx_thr_flags |= VSW_WTHR_RUNNING; 5697 while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP)) { 5698 5699 CALLB_CPR_SAFE_BEGIN(&cprinfo); 5700 /* 5701 * Wait until the data is received or a stop 5702 * request is received. 5703 */ 5704 while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP) && 5705 (ldcp->tx_mhead == NULL)) { 5706 cv_wait(&ldcp->tx_thr_cv, &ldcp->tx_thr_lock); 5707 } 5708 CALLB_CPR_SAFE_END(&cprinfo, &ldcp->tx_thr_lock) 5709 5710 /* 5711 * First process the stop request. 5712 */ 5713 if (ldcp->tx_thr_flags & VSW_WTHR_STOP) { 5714 D2(vswp, "%s(%lld):tx thread stopped\n", 5715 __func__, ldcp->ldc_id); 5716 break; 5717 } 5718 mp = ldcp->tx_mhead; 5719 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 5720 ldcp->tx_cnt = 0; 5721 mutex_exit(&ldcp->tx_thr_lock); 5722 D2(vswp, "%s(%lld):calling vsw_ldcsend\n", 5723 __func__, ldcp->ldc_id); 5724 while (mp != NULL) { 5725 tmp = mp->b_next; 5726 mp->b_next = mp->b_prev = NULL; 5727 (void) vsw_ldcsend(ldcp, mp, vsw_ldc_tx_retries); 5728 mp = tmp; 5729 } 5730 mutex_enter(&ldcp->tx_thr_lock); 5731 } 5732 5733 /* 5734 * Update the run status and wakeup the thread that 5735 * has sent the stop request. 5736 */ 5737 ldcp->tx_thr_flags &= ~VSW_WTHR_RUNNING; 5738 cv_signal(&ldcp->tx_thr_cv); 5739 CALLB_CPR_EXIT(&cprinfo); 5740 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5741 thread_exit(); 5742 } 5743 5744 /* vsw_stop_tx_thread -- Co-ordinate with receive thread to stop it */ 5745 static void 5746 vsw_stop_tx_thread(vsw_ldc_t *ldcp) 5747 { 5748 vsw_t *vswp = ldcp->ldc_vswp; 5749 5750 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5751 /* 5752 * Send a stop request by setting the stop flag and 5753 * wait until the receive thread stops. 5754 */ 5755 mutex_enter(&ldcp->tx_thr_lock); 5756 if (ldcp->tx_thr_flags & VSW_WTHR_RUNNING) { 5757 ldcp->tx_thr_flags |= VSW_WTHR_STOP; 5758 cv_signal(&ldcp->tx_thr_cv); 5759 while (ldcp->tx_thr_flags & VSW_WTHR_RUNNING) { 5760 cv_wait(&ldcp->tx_thr_cv, &ldcp->tx_thr_lock); 5761 } 5762 } 5763 mutex_exit(&ldcp->tx_thr_lock); 5764 ldcp->tx_thread = NULL; 5765 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5766 } 5767 5768 /* vsw_reclaim_dring -- reclaim descriptors */ 5769 static int 5770 vsw_reclaim_dring(dring_info_t *dp, int start) 5771 { 5772 int i, j, len; 5773 vsw_private_desc_t *priv_addr; 5774 vnet_public_desc_t *pub_addr; 5775 5776 pub_addr = (vnet_public_desc_t *)dp->pub_addr; 5777 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 5778 len = dp->num_descriptors; 5779 5780 D2(NULL, "%s: start index %ld\n", __func__, start); 5781 5782 j = 0; 5783 for (i = start; j < len; i = (i + 1) % len, j++) { 5784 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 5785 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 5786 5787 mutex_enter(&priv_addr->dstate_lock); 5788 if (pub_addr->hdr.dstate != VIO_DESC_DONE) { 5789 mutex_exit(&priv_addr->dstate_lock); 5790 break; 5791 } 5792 pub_addr->hdr.dstate = VIO_DESC_FREE; 5793 priv_addr->dstate = VIO_DESC_FREE; 5794 /* clear all the fields */ 5795 priv_addr->datalen = 0; 5796 pub_addr->hdr.ack = 0; 5797 mutex_exit(&priv_addr->dstate_lock); 5798 5799 D3(NULL, "claiming descp:%d pub state:0x%llx priv state 0x%llx", 5800 i, pub_addr->hdr.dstate, priv_addr->dstate); 5801 } 5802 return (j); 5803 } 5804 5805 /* 5806 * Debugging routines 5807 */ 5808 static void 5809 display_state(void) 5810 { 5811 vsw_t *vswp; 5812 vsw_port_list_t *plist; 5813 vsw_port_t *port; 5814 vsw_ldc_list_t *ldcl; 5815 vsw_ldc_t *ldcp; 5816 extern vsw_t *vsw_head; 5817 5818 cmn_err(CE_NOTE, "***** system state *****"); 5819 5820 for (vswp = vsw_head; vswp; vswp = vswp->next) { 5821 plist = &vswp->plist; 5822 READ_ENTER(&plist->lockrw); 5823 cmn_err(CE_CONT, "vsw instance %d has %d ports attached\n", 5824 vswp->instance, plist->num_ports); 5825 5826 for (port = plist->head; port != NULL; port = port->p_next) { 5827 ldcl = &port->p_ldclist; 5828 cmn_err(CE_CONT, "port %d : %d ldcs attached\n", 5829 port->p_instance, port->num_ldcs); 5830 READ_ENTER(&ldcl->lockrw); 5831 ldcp = ldcl->head; 5832 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 5833 cmn_err(CE_CONT, "chan %lu : dev %d : " 5834 "status %d : phase %u\n", 5835 ldcp->ldc_id, ldcp->dev_class, 5836 ldcp->ldc_status, ldcp->hphase); 5837 cmn_err(CE_CONT, "chan %lu : lsession %lu : " 5838 "psession %lu\n", ldcp->ldc_id, 5839 ldcp->local_session, ldcp->peer_session); 5840 5841 cmn_err(CE_CONT, "Inbound lane:\n"); 5842 display_lane(&ldcp->lane_in); 5843 cmn_err(CE_CONT, "Outbound lane:\n"); 5844 display_lane(&ldcp->lane_out); 5845 } 5846 RW_EXIT(&ldcl->lockrw); 5847 } 5848 RW_EXIT(&plist->lockrw); 5849 } 5850 cmn_err(CE_NOTE, "***** system state *****"); 5851 } 5852 5853 static void 5854 display_lane(lane_t *lp) 5855 { 5856 dring_info_t *drp; 5857 5858 cmn_err(CE_CONT, "ver 0x%x:0x%x : state %lx : mtu 0x%lx\n", 5859 lp->ver_major, lp->ver_minor, lp->lstate, lp->mtu); 5860 cmn_err(CE_CONT, "addr_type %d : addr 0x%lx : xmode %d\n", 5861 lp->addr_type, lp->addr, lp->xfer_mode); 5862 cmn_err(CE_CONT, "dringp 0x%lx\n", (uint64_t)lp->dringp); 5863 5864 cmn_err(CE_CONT, "Dring info:\n"); 5865 for (drp = lp->dringp; drp != NULL; drp = drp->next) { 5866 cmn_err(CE_CONT, "\tnum_desc %u : dsize %u\n", 5867 drp->num_descriptors, drp->descriptor_size); 5868 cmn_err(CE_CONT, "\thandle 0x%lx\n", drp->handle); 5869 cmn_err(CE_CONT, "\tpub_addr 0x%lx : priv_addr 0x%lx\n", 5870 (uint64_t)drp->pub_addr, (uint64_t)drp->priv_addr); 5871 cmn_err(CE_CONT, "\tident 0x%lx : end_idx %lu\n", 5872 drp->ident, drp->end_idx); 5873 display_ring(drp); 5874 } 5875 } 5876 5877 static void 5878 display_ring(dring_info_t *dringp) 5879 { 5880 uint64_t i; 5881 uint64_t priv_count = 0; 5882 uint64_t pub_count = 0; 5883 vnet_public_desc_t *pub_addr = NULL; 5884 vsw_private_desc_t *priv_addr = NULL; 5885 5886 for (i = 0; i < vsw_ntxds; i++) { 5887 if (dringp->pub_addr != NULL) { 5888 pub_addr = (vnet_public_desc_t *)dringp->pub_addr + i; 5889 5890 if (pub_addr->hdr.dstate == VIO_DESC_FREE) 5891 pub_count++; 5892 } 5893 5894 if (dringp->priv_addr != NULL) { 5895 priv_addr = (vsw_private_desc_t *)dringp->priv_addr + i; 5896 5897 if (priv_addr->dstate == VIO_DESC_FREE) 5898 priv_count++; 5899 } 5900 } 5901 cmn_err(CE_CONT, "\t%lu elements: %lu priv free: %lu pub free\n", 5902 i, priv_count, pub_count); 5903 } 5904 5905 static void 5906 dump_flags(uint64_t state) 5907 { 5908 int i; 5909 5910 typedef struct flag_name { 5911 int flag_val; 5912 char *flag_name; 5913 } flag_name_t; 5914 5915 flag_name_t flags[] = { 5916 VSW_VER_INFO_SENT, "VSW_VER_INFO_SENT", 5917 VSW_VER_INFO_RECV, "VSW_VER_INFO_RECV", 5918 VSW_VER_ACK_RECV, "VSW_VER_ACK_RECV", 5919 VSW_VER_ACK_SENT, "VSW_VER_ACK_SENT", 5920 VSW_VER_NACK_RECV, "VSW_VER_NACK_RECV", 5921 VSW_VER_NACK_SENT, "VSW_VER_NACK_SENT", 5922 VSW_ATTR_INFO_SENT, "VSW_ATTR_INFO_SENT", 5923 VSW_ATTR_INFO_RECV, "VSW_ATTR_INFO_RECV", 5924 VSW_ATTR_ACK_SENT, "VSW_ATTR_ACK_SENT", 5925 VSW_ATTR_ACK_RECV, "VSW_ATTR_ACK_RECV", 5926 VSW_ATTR_NACK_SENT, "VSW_ATTR_NACK_SENT", 5927 VSW_ATTR_NACK_RECV, "VSW_ATTR_NACK_RECV", 5928 VSW_DRING_INFO_SENT, "VSW_DRING_INFO_SENT", 5929 VSW_DRING_INFO_RECV, "VSW_DRING_INFO_RECV", 5930 VSW_DRING_ACK_SENT, "VSW_DRING_ACK_SENT", 5931 VSW_DRING_ACK_RECV, "VSW_DRING_ACK_RECV", 5932 VSW_DRING_NACK_SENT, "VSW_DRING_NACK_SENT", 5933 VSW_DRING_NACK_RECV, "VSW_DRING_NACK_RECV", 5934 VSW_RDX_INFO_SENT, "VSW_RDX_INFO_SENT", 5935 VSW_RDX_INFO_RECV, "VSW_RDX_INFO_RECV", 5936 VSW_RDX_ACK_SENT, "VSW_RDX_ACK_SENT", 5937 VSW_RDX_ACK_RECV, "VSW_RDX_ACK_RECV", 5938 VSW_RDX_NACK_SENT, "VSW_RDX_NACK_SENT", 5939 VSW_RDX_NACK_RECV, "VSW_RDX_NACK_RECV", 5940 VSW_MCST_INFO_SENT, "VSW_MCST_INFO_SENT", 5941 VSW_MCST_INFO_RECV, "VSW_MCST_INFO_RECV", 5942 VSW_MCST_ACK_SENT, "VSW_MCST_ACK_SENT", 5943 VSW_MCST_ACK_RECV, "VSW_MCST_ACK_RECV", 5944 VSW_MCST_NACK_SENT, "VSW_MCST_NACK_SENT", 5945 VSW_MCST_NACK_RECV, "VSW_MCST_NACK_RECV", 5946 VSW_LANE_ACTIVE, "VSW_LANE_ACTIVE"}; 5947 5948 DERR(NULL, "DUMP_FLAGS: %llx\n", state); 5949 for (i = 0; i < sizeof (flags)/sizeof (flag_name_t); i++) { 5950 if (state & flags[i].flag_val) 5951 DERR(NULL, "DUMP_FLAGS %s", flags[i].flag_name); 5952 } 5953 } 5954